public inbox for gdb-patches@sourceware.org
 help / color / mirror / Atom feed
From: Tom Tromey <tom@tromey.com>
To: gdb-patches@sourceware.org
Cc: Tom Tromey <tom@tromey.com>
Subject: [PATCH 13/30] Statically examine abbrev properties
Date: Wed, 25 Aug 2021 20:19:20 -0600	[thread overview]
Message-ID: <20210826021937.1490292-14-tom@tromey.com> (raw)
In-Reply-To: <20210826021937.1490292-1-tom@tromey.com>

The new DIE scanner works more or less along the lines indicated by
the text for the .debug_names section, disregarding the bugs in the
specification.

While working on this, I noticed that whether a DIE is interesting is
a static property of the DIE's abbrev.  It also turns out that many
abbrevs imply a static size for the DIE data, and additionally that
for many abbrevs, the sibling offset is stored at a constant offset
from the start of the DIE.

This patch changes the abbrev reader to analyze each abbrev and stash
the results on the abbrev.  These combine to speed up the new indexer.
If the "interesting" flag is false, GDB knows to skip the DIE
immediately.  If the sibling offset is statically known, skipping can
be done without reading any attributes; and in some other cases, the
DIE can be skipped using simple arithmetic.
---
 gdb/dwarf2/abbrev.c | 158 ++++++++++++++++++++++++++++++++++++++++++++
 gdb/dwarf2/abbrev.h |   7 +-
 2 files changed, 163 insertions(+), 2 deletions(-)

diff --git a/gdb/dwarf2/abbrev.c b/gdb/dwarf2/abbrev.c
index c84f21256fd..658868aceba 100644
--- a/gdb/dwarf2/abbrev.c
+++ b/gdb/dwarf2/abbrev.c
@@ -76,6 +76,43 @@ abbrev_table::add_abbrev (struct abbrev_info *abbrev)
   *slot = abbrev;
 }
 
+/* Helper function that returns true if a DIE with the given tag might
+   plausibly be indexed.  */
+
+static bool
+tag_interesting_for_index (dwarf_tag tag)
+{
+  switch (tag)
+    {
+    case DW_TAG_array_type:
+    case DW_TAG_base_type:
+    case DW_TAG_class_type:
+    case DW_TAG_constant:
+    case DW_TAG_enumeration_type:
+    case DW_TAG_enumerator:
+    case DW_TAG_imported_declaration:
+    case DW_TAG_imported_unit:
+    case DW_TAG_inlined_subroutine:
+    case DW_TAG_interface_type:
+    case DW_TAG_module:
+    case DW_TAG_namespace:
+    case DW_TAG_ptr_to_member_type:
+    case DW_TAG_set_type:
+    case DW_TAG_string_type:
+    case DW_TAG_structure_type:
+    case DW_TAG_subprogram:
+    case DW_TAG_subrange_type:
+    case DW_TAG_subroutine_type:
+    case DW_TAG_typedef:
+    case DW_TAG_union_type:
+    case DW_TAG_unspecified_type:
+    case DW_TAG_variable:
+      return true;
+    }
+
+  return false;
+}
+
 /* Read in an abbrev table.  */
 
 abbrev_table_up
@@ -116,6 +153,17 @@ abbrev_table::read (struct dwarf2_section_info *section,
       cur_abbrev->has_children = read_1_byte (abfd, abbrev_ptr);
       abbrev_ptr += 1;
 
+      unsigned int size = 0;
+      unsigned int sibling_offset = -1;
+      bool is_csize = true;
+
+      bool has_hardcoded_declaration = false;
+      bool has_specification_or_origin = false;
+      bool has_name = false;
+      bool has_linkage_name = false;
+      bool has_location = false;
+      bool has_external = false;
+
       /* Now read in declarations.  */
       int num_attrs = 0;
       for (;;)
@@ -142,12 +190,122 @@ abbrev_table::read (struct dwarf2_section_info *section,
 	  if (cur_attr.name == 0)
 	    break;
 
+	  switch (cur_attr.name)
+	    {
+	    case DW_AT_declaration:
+	      if (cur_attr.form == DW_FORM_flag_present)
+		has_hardcoded_declaration = true;
+	      break;
+
+	    case DW_AT_external:
+	      has_external = true;
+	      break;
+
+	    case DW_AT_specification:
+	    case DW_AT_abstract_origin:
+	    case DW_AT_extension:
+	      has_specification_or_origin = true;
+	      break;
+
+	    case DW_AT_name:
+	      has_name = true;
+	      break;
+
+	    case DW_AT_MIPS_linkage_name:
+	    case DW_AT_linkage_name:
+	      has_linkage_name = true;
+	      break;
+
+	    case DW_AT_const_value:
+	    case DW_AT_location:
+	      has_location = true;
+	      break;
+
+	    case DW_AT_sibling:
+	      if (is_csize && cur_attr.form == DW_FORM_ref4)
+		sibling_offset = size;
+	      break;
+	    }
+
+	  switch (cur_attr.form)
+	    {
+	    case DW_FORM_data1:
+	    case DW_FORM_ref1:
+	    case DW_FORM_flag:
+	    case DW_FORM_strx1:
+	      size += 1;
+	      break;
+	    case DW_FORM_flag_present:
+	    case DW_FORM_implicit_const:
+	      break;
+	    case DW_FORM_data2:
+	    case DW_FORM_ref2:
+	    case DW_FORM_strx2:
+	      size += 2;
+	      break;
+	    case DW_FORM_strx3:
+	      size += 3;
+	      break;
+	    case DW_FORM_data4:
+	    case DW_FORM_ref4:
+	    case DW_FORM_strx4:
+	      size += 4;
+	      break;
+	    case DW_FORM_data8:
+	    case DW_FORM_ref8:
+	    case DW_FORM_ref_sig8:
+	      size += 8;
+	      break;
+	    case DW_FORM_data16:
+	      size += 16;
+	      break;
+
+	    default:
+	      is_csize = false;
+	      break;
+	    case DW_FORM_ref_addr:
+	    case DW_FORM_GNU_ref_alt:
+	    case DW_FORM_addr:
+	      is_csize = false;
+	      break;
+	    }
+
 	  ++num_attrs;
 	  obstack_grow (obstack, &cur_attr, sizeof (cur_attr));
 	}
 
       cur_abbrev = (struct abbrev_info *) obstack_finish (obstack);
       cur_abbrev->num_attrs = num_attrs;
+
+      if (!has_name && !has_linkage_name && !has_specification_or_origin)
+	{
+	  /* Some anonymous DIEs are worth examining.  */
+	  cur_abbrev->interesting
+	    = (cur_abbrev->tag == DW_TAG_namespace
+	       || cur_abbrev->tag == DW_TAG_enumeration_type);
+	}
+      else if (has_hardcoded_declaration
+	       && (cur_abbrev->tag != DW_TAG_variable || !has_external))
+	cur_abbrev->interesting = false;
+      else if (!tag_interesting_for_index (cur_abbrev->tag))
+	cur_abbrev->interesting = false;
+      else if (!has_location && !has_specification_or_origin && !has_external
+	       && cur_abbrev->tag == DW_TAG_variable)
+	cur_abbrev->interesting = false;
+      else
+	cur_abbrev->interesting = true;
+
+      /* If there are no children, and the abbrev has a constant size,
+	 then we don't care about the sibling offset, because it's
+	 simple to just skip the entire DIE without reading a sibling
+	 offset.  */
+      if ((!cur_abbrev->has_children && is_csize)
+	  /* Overflow.  */
+	  || sibling_offset != (unsigned short) sibling_offset)
+	sibling_offset = -1;
+      cur_abbrev->size_if_constant = is_csize ? size : 0;
+      cur_abbrev->sibling_offset = sibling_offset;
+
       abbrev_table->add_abbrev (cur_abbrev);
     }
 
diff --git a/gdb/dwarf2/abbrev.h b/gdb/dwarf2/abbrev.h
index 0a1ca4d39b4..ae02f76885d 100644
--- a/gdb/dwarf2/abbrev.h
+++ b/gdb/dwarf2/abbrev.h
@@ -44,9 +44,12 @@ struct abbrev_info
   /* Number identifying abbrev.  */
   unsigned int number;
   /* DWARF tag.  */
-  enum dwarf_tag tag;
+  ENUM_BITFIELD (dwarf_tag) tag : 16;
   /* True if the DIE has children.  */
-  unsigned short has_children;
+  bool has_children;
+  bool interesting;
+  unsigned short size_if_constant;
+  unsigned short sibling_offset;
   /* Number of attributes.  */
   unsigned short num_attrs;
   /* An array of attribute descriptions, allocated using the struct
-- 
2.31.1


  parent reply	other threads:[~2021-08-26  2:19 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-26  2:19 [PATCH 00/30] Rewrite the DWARF "partial" reader Tom Tromey
2021-08-26  2:19 ` [PATCH 01/30] Introduce make_unique_xstrndup Tom Tromey
2021-08-26  2:19 ` [PATCH 02/30] Split create_addrmap_from_aranges Tom Tromey
2021-08-26  2:19 ` [PATCH 03/30] Add dwarf2_per_cu_data::addresses_seen Tom Tromey
2021-08-26  2:19 ` [PATCH 04/30] Refactor dwarf2_get_pc_bounds Tom Tromey
2021-08-26  2:19 ` [PATCH 05/30] Allow ada_decode not to decode operators Tom Tromey
2021-08-26  2:19 ` [PATCH 06/30] Let skip_one_die not skip children Tom Tromey
2021-08-26  2:19 ` [PATCH 07/30] Add name splitting Tom Tromey
2021-08-26  2:19 ` [PATCH 08/30] Add new overload of dwarf5_djb_hash Tom Tromey
2021-08-26  2:19 ` [PATCH 09/30] Refactor build_type_psymtabs_reader Tom Tromey
2021-08-26  2:19 ` [PATCH 10/30] Add batching parameter to parallel_for_each Tom Tromey
2021-08-26  2:19 ` [PATCH 11/30] Return vector of results from parallel_for_each Tom Tromey
2021-08-27  6:20   ` Tom de Vries
2021-08-28 19:20     ` Tom Tromey
2021-08-26  2:19 ` [PATCH 12/30] Introduce DWARF abbrev cache Tom Tromey
2021-08-26  2:19 ` Tom Tromey [this message]
2021-09-06 22:31   ` [PATCH 13/30] Statically examine abbrev properties Lancelot SIX
2021-11-04 18:00     ` Tom Tromey
2021-08-26  2:19 ` [PATCH 14/30] Update skip_one_die for new " Tom Tromey
2021-08-26  2:19 ` [PATCH 15/30] Introduce the new DWARF index class Tom Tromey
2021-09-09 23:32   ` Lancelot SIX
2021-11-04 18:03     ` Tom Tromey
2021-08-26  2:19 ` [PATCH 16/30] The new DWARF indexer Tom Tromey
2021-08-26  2:19 ` [PATCH 17/30] Implement quick_symbol_functions for cooked DWARF index Tom Tromey
2021-08-26  2:19 ` [PATCH 18/30] Wire in the new DWARF indexer Tom Tromey
2021-08-26  2:19 ` [PATCH 19/30] Pre-read DWARF section data Tom Tromey
2021-08-26  2:19 ` [PATCH 20/30] Parallelize DWARF indexing Tom Tromey
2021-08-26  2:19 ` [PATCH 21/30] "Finalize" the DWARF index in the background Tom Tromey
2021-08-26  2:19 ` [PATCH 22/30] Rename write_psymtabs_to_index Tom Tromey
2021-08-26  2:19 ` [PATCH 23/30] Change the key type in psym_index_map Tom Tromey
2021-08-26  2:19 ` [PATCH 24/30] Change parameters to write_address_map Tom Tromey
2021-08-26  2:19 ` [PATCH 25/30] Genericize addrmap handling in the DWARF index writer Tom Tromey
2021-08-26  2:19 ` [PATCH 26/30] Adapt .gdb_index writer to new DWARF scanner Tom Tromey
2021-08-26  2:19 ` [PATCH 27/30] Adapt .debug_names " Tom Tromey
2021-08-26  2:19 ` [PATCH 28/30] Enable the new DWARF indexer Tom Tromey
2021-08-26  2:19 ` [PATCH 29/30] Delete DWARF psymtab code Tom Tromey
2021-08-26  2:19 ` [PATCH 30/30] Remove dwarf2_per_cu_data::v Tom Tromey
2021-08-26 20:32 ` [PATCH 00/30] Rewrite the DWARF "partial" reader Tom de Vries
2021-08-26 21:29   ` Tom Tromey
2021-08-27  7:31     ` Tom de Vries
2021-08-30 15:04       ` Tom Tromey
2021-09-06 19:46         ` Tom Tromey
2021-09-07 10:58           ` Tom de Vries
2021-09-07 12:16             ` Tom de Vries
2021-10-29 23:06               ` Tom Tromey
2021-09-09 19:00 ` Wei-min Pan
2021-09-11 21:08   ` Tom Tromey
2021-09-13 16:50     ` Weimin Pan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210826021937.1490292-14-tom@tromey.com \
    --to=tom@tromey.com \
    --cc=gdb-patches@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).