From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2126) id 8E6053858C50; Mon, 17 Oct 2022 16:11:05 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 8E6053858C50 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1666023065; bh=M3rZlF3nwskmVYu4aiEWqDRCJS77ADkYAElpoL4N8E0=; h=From:To:Subject:Date:From; b=n+EVVMfTtDeVfv8kSV+LTqieY3igi1eJh4DjMT/RMLen3KTEBR/EKFCidwv5CrjXo qhf+oydeSYdcY9ExRHr5jL7+q0Ag7I6F+AEZJG+kYvDZtzl8lQiW2KjFEQNwZKZSpv g4scAdOxDOf89+7n+4ejSr+uee6qFk1tyBD+BYl8= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Tom Tromey To: gdb-cvs@sourceware.org Subject: [binutils-gdb] Change .gdb_index de-duplication implementation X-Act-Checkin: binutils-gdb X-Git-Author: Tom Tromey X-Git-Refname: refs/heads/master X-Git-Oldrev: 5fea97943259a2bd997f92ffa66116b5c0d4eaab X-Git-Newrev: dd05fc7071a6517de13975fcddca861547351266 Message-Id: <20221017161105.8E6053858C50@sourceware.org> Date: Mon, 17 Oct 2022 16:11:05 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=3Dbinutils-gdb.git;h=3Ddd05fc7071a6= 517de13975fcddca861547351266 commit dd05fc7071a6517de13975fcddca861547351266 Author: Tom Tromey Date: Thu Sep 22 13:10:55 2022 -0600 Change .gdb_index de-duplication implementation =20 While investigating PR symtab/29179, I found that one Ada test failed because, although a certain symbol was present in the index, with the new DWARF reader it pointed to a different CU than was chosen by earlier versions of gdb. =20 This patch changes how symbol de-duplication is done, deferring the process until the entire symbol table has been constructed. This way, it's possible to always choose the lower-numbered CU among duplicates, which is how gdb (implicitly) previously worked. =20 Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=3D29179 Diff: --- gdb/dwarf2/index-write.c | 77 ++++++++++++++++++++++++++------------------= ---- 1 file changed, 41 insertions(+), 36 deletions(-) diff --git a/gdb/dwarf2/index-write.c b/gdb/dwarf2/index-write.c index 6b4052c3467..f592734addc 100644 --- a/gdb/dwarf2/index-write.c +++ b/gdb/dwarf2/index-write.c @@ -176,6 +176,10 @@ struct symtab_index_entry /* A sorted vector of the indices of all the CUs that hold an object of this name. */ std::vector cu_indices; + + /* Minimize CU_INDICES, sorting them and removing duplicates as + appropriate. */ + void minimize (); }; =20 /* The symbol table. This is a power-of-2-sized hash table. */ @@ -186,6 +190,13 @@ struct mapped_symtab data.resize (1024); } =20 + /* Minimize each entry in the symbol table, removing duplicates. */ + void minimize () + { + for (symtab_index_entry &item : data) + item.minimize (); + } + offset_type n_elements =3D 0; std::vector data; =20 @@ -271,21 +282,36 @@ add_index_entry (struct mapped_symtab *symtab, const = char *name, slot.cu_indices.push_back (cu_index_and_attrs); } =20 -/* Sort and remove duplicates of all symbols' cu_indices lists. */ +/* See symtab_index_entry. */ =20 -static void -uniquify_cu_indices (struct mapped_symtab *symtab) +void +symtab_index_entry::minimize () { - for (auto &entry : symtab->data) + if (name =3D=3D nullptr || cu_indices.empty ()) + return; + + std::sort (cu_indices.begin (), cu_indices.end ()); + auto from =3D std::unique (cu_indices.begin (), cu_indices.end ()); + cu_indices.erase (from, cu_indices.end ()); + + /* We don't want to enter a variable or type more than once, so + remove any such duplicates from the list as well. When doing + this, we want to keep the entry from the first CU -- but this is + implicit due to the sort. This choice is done because it's + similar to what gdb historically did for partial symbols. */ + std::unordered_set seen; + from =3D std::remove_if (cu_indices.begin (), cu_indices.end (), + [&] (offset_type val) { - if (entry.name !=3D NULL && !entry.cu_indices.empty ()) - { - auto &cu_indices =3D entry.cu_indices; - std::sort (cu_indices.begin (), cu_indices.end ()); - auto from =3D std::unique (cu_indices.begin (), cu_indices.end ()); - cu_indices.erase (from, cu_indices.end ()); - } - } + gdb_index_symbol_kind kind =3D GDB_INDEX_SYMBOL_KIND_VALUE (val); + if (kind !=3D GDB_INDEX_SYMBOL_KIND_TYPE + && kind !=3D GDB_INDEX_SYMBOL_KIND_VARIABLE) + return false; + + val &=3D ~GDB_INDEX_CU_MASK; + return !seen.insert (val).second; + }); + cu_indices.erase (from, cu_indices.end ()); } =20 /* A form of 'const char *' suitable for container keys. Only the @@ -1100,15 +1126,6 @@ write_cooked_index (cooked_index_vector *table, const cu_index_map &cu_index_htab, struct mapped_symtab *symtab) { - /* We track type names and only enter a given type once. */ - htab_up type_names (htab_create_alloc (10, htab_hash_string, htab_eq_str= ing, - nullptr, xcalloc, xfree)); - /* Same with variable names. However, if a type and variable share - a name, we want both, which is why there are two hash tables - here. */ - htab_up var_names (htab_create_alloc (10, htab_hash_string, htab_eq_stri= ng, - nullptr, xcalloc, xfree)); - const char *main_for_ada =3D main_name (); =20 for (const cooked_index_entry *entry : table->all_entries ()) @@ -1157,24 +1174,12 @@ write_cooked_index (cooked_index_vector *table, else if (entry->tag =3D=3D DW_TAG_variable || entry->tag =3D=3D DW_TAG_constant || entry->tag =3D=3D DW_TAG_enumerator) - { - kind =3D GDB_INDEX_SYMBOL_KIND_VARIABLE; - void **slot =3D htab_find_slot (var_names.get (), name, INSERT); - if (*slot !=3D nullptr) - continue; - *slot =3D (void *) name; - } + kind =3D GDB_INDEX_SYMBOL_KIND_VARIABLE; else if (entry->tag =3D=3D DW_TAG_module || entry->tag =3D=3D DW_TAG_common_block) kind =3D GDB_INDEX_SYMBOL_KIND_OTHER; else - { - kind =3D GDB_INDEX_SYMBOL_KIND_TYPE; - void **slot =3D htab_find_slot (type_names.get (), name, INSERT); - if (*slot !=3D nullptr) - continue; - *slot =3D (void *) name; - } + kind =3D GDB_INDEX_SYMBOL_KIND_TYPE; =20 add_index_entry (symtab, name, (entry->flags & IS_STATIC) !=3D 0, kind, it->second); @@ -1252,7 +1257,7 @@ write_gdbindex (dwarf2_per_objfile *per_objfile, =20 /* Now that we've processed all symbols we can shrink their cu_indices lists. */ - uniquify_cu_indices (&symtab); + symtab.minimize (); =20 data_buf symtab_vec, constant_pool; if (symtab.n_elements =3D=3D 0)