From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-io1-xd35.google.com (mail-io1-xd35.google.com [IPv6:2607:f8b0:4864:20::d35]) by sourceware.org (Postfix) with ESMTPS id 4D2EC385842F for ; Fri, 14 Oct 2022 16:09:04 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 4D2EC385842F Received: by mail-io1-xd35.google.com with SMTP id 187so4241424iov.10 for ; Fri, 14 Oct 2022 09:09:04 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=e5gDdcbGPtN0JPaK8e+LUrLJpBk0MzefrsWYEx3l2P4=; b=sRTJ1FZ1kk+W6nUvntu3SkcV0BjVJdpOuhDel1lph2paJxC3KvVFE1wTC+6mtx9fYf Whz3hJTARaTa1WGduBODcK38WOnvRi/dobIMU9oF4lfmZmRQmqcZBIcnRpbQowixjEFX 5ursh1aCA19eu170a0J2wDOAPqFB3/tE6yKQvTO7paUGsFcuMbzUsrOK/P3wPA4pAhuE 2SMjx9ftfoGKb5izzYFAvEYC10iQr9nH/waYRL9mXk1wfXWEcZ3nW0Xc8CfEu+y53vOR 4wLdV/QkrED5RHl9bGs8WJeBNFD+CkoCkIxPnrakKVMqPoI74uMIzCrWFkK0qdrBRAU5 r28g== X-Gm-Message-State: ACrzQf2yHTysFA9ZT3SgVGCQ7oTx8j9xoX5pOr+v6DEZ4nl3KCyHaDBz rxvFaucASrFbEvkgOX84F3mzJN4faZn+Xw== X-Google-Smtp-Source: AMsMyM5uhgP0/RCKUyFy8oplqJFZcKCm5XnZejKbWZ/+9JLam24lJD8ikrVIdFksPKc8jkgOOmb0Mw== X-Received: by 2002:a02:cc9c:0:b0:363:e0e6:ec1b with SMTP id s28-20020a02cc9c000000b00363e0e6ec1bmr3142060jap.89.1665763743483; Fri, 14 Oct 2022 09:09:03 -0700 (PDT) Received: from localhost.localdomain (71-211-160-49.hlrn.qwest.net. [71.211.160.49]) by smtp.gmail.com with ESMTPSA id b7-20020a026f47000000b00363b0517662sm1275501jae.12.2022.10.14.09.09.02 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Fri, 14 Oct 2022 09:09:03 -0700 (PDT) From: Tom Tromey To: gdb-patches@sourceware.org Cc: Tom Tromey Subject: [PATCH v2 4/4] Change .gdb_index de-duplication implementation Date: Fri, 14 Oct 2022 10:08:49 -0600 Message-Id: <20221014160849.919621-5-tromey@adacore.com> X-Mailer: git-send-email 2.34.3 In-Reply-To: <20221014160849.919621-1-tromey@adacore.com> References: <20221014160849.919621-1-tromey@adacore.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-10.6 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, KAM_STOCKGEN, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: gdb-patches@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gdb-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 14 Oct 2022 16:09:09 -0000 While investigating PR symtab/29179, I found that one Ada test failed because, although a certain symbol was present in the index, with the new DWARF reader it pointed to a different CU than was chosen by earlier versions of gdb. This patch changes how symbol de-duplication is done, deferring the process until the entire symbol table has been constructed. This way, it's possible to always choose the lower-numbered CU among duplicates, which is how gdb (implicitly) previously worked. Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=29179 --- gdb/dwarf2/index-write.c | 77 +++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 36 deletions(-) diff --git a/gdb/dwarf2/index-write.c b/gdb/dwarf2/index-write.c index 6b4052c3467..f592734addc 100644 --- a/gdb/dwarf2/index-write.c +++ b/gdb/dwarf2/index-write.c @@ -176,6 +176,10 @@ struct symtab_index_entry /* A sorted vector of the indices of all the CUs that hold an object of this name. */ std::vector cu_indices; + + /* Minimize CU_INDICES, sorting them and removing duplicates as + appropriate. */ + void minimize (); }; /* The symbol table. This is a power-of-2-sized hash table. */ @@ -186,6 +190,13 @@ struct mapped_symtab data.resize (1024); } + /* Minimize each entry in the symbol table, removing duplicates. */ + void minimize () + { + for (symtab_index_entry &item : data) + item.minimize (); + } + offset_type n_elements = 0; std::vector data; @@ -271,21 +282,36 @@ add_index_entry (struct mapped_symtab *symtab, const char *name, slot.cu_indices.push_back (cu_index_and_attrs); } -/* Sort and remove duplicates of all symbols' cu_indices lists. */ +/* See symtab_index_entry. */ -static void -uniquify_cu_indices (struct mapped_symtab *symtab) +void +symtab_index_entry::minimize () { - for (auto &entry : symtab->data) + if (name == nullptr || cu_indices.empty ()) + return; + + std::sort (cu_indices.begin (), cu_indices.end ()); + auto from = std::unique (cu_indices.begin (), cu_indices.end ()); + cu_indices.erase (from, cu_indices.end ()); + + /* We don't want to enter a variable or type more than once, so + remove any such duplicates from the list as well. When doing + this, we want to keep the entry from the first CU -- but this is + implicit due to the sort. This choice is done because it's + similar to what gdb historically did for partial symbols. */ + std::unordered_set seen; + from = std::remove_if (cu_indices.begin (), cu_indices.end (), + [&] (offset_type val) { - if (entry.name != NULL && !entry.cu_indices.empty ()) - { - auto &cu_indices = entry.cu_indices; - std::sort (cu_indices.begin (), cu_indices.end ()); - auto from = std::unique (cu_indices.begin (), cu_indices.end ()); - cu_indices.erase (from, cu_indices.end ()); - } - } + gdb_index_symbol_kind kind = GDB_INDEX_SYMBOL_KIND_VALUE (val); + if (kind != GDB_INDEX_SYMBOL_KIND_TYPE + && kind != GDB_INDEX_SYMBOL_KIND_VARIABLE) + return false; + + val &= ~GDB_INDEX_CU_MASK; + return !seen.insert (val).second; + }); + cu_indices.erase (from, cu_indices.end ()); } /* A form of 'const char *' suitable for container keys. Only the @@ -1100,15 +1126,6 @@ write_cooked_index (cooked_index_vector *table, const cu_index_map &cu_index_htab, struct mapped_symtab *symtab) { - /* We track type names and only enter a given type once. */ - htab_up type_names (htab_create_alloc (10, htab_hash_string, htab_eq_string, - nullptr, xcalloc, xfree)); - /* Same with variable names. However, if a type and variable share - a name, we want both, which is why there are two hash tables - here. */ - htab_up var_names (htab_create_alloc (10, htab_hash_string, htab_eq_string, - nullptr, xcalloc, xfree)); - const char *main_for_ada = main_name (); for (const cooked_index_entry *entry : table->all_entries ()) @@ -1157,24 +1174,12 @@ write_cooked_index (cooked_index_vector *table, else if (entry->tag == DW_TAG_variable || entry->tag == DW_TAG_constant || entry->tag == DW_TAG_enumerator) - { - kind = GDB_INDEX_SYMBOL_KIND_VARIABLE; - void **slot = htab_find_slot (var_names.get (), name, INSERT); - if (*slot != nullptr) - continue; - *slot = (void *) name; - } + kind = GDB_INDEX_SYMBOL_KIND_VARIABLE; else if (entry->tag == DW_TAG_module || entry->tag == DW_TAG_common_block) kind = GDB_INDEX_SYMBOL_KIND_OTHER; else - { - kind = GDB_INDEX_SYMBOL_KIND_TYPE; - void **slot = htab_find_slot (type_names.get (), name, INSERT); - if (*slot != nullptr) - continue; - *slot = (void *) name; - } + kind = GDB_INDEX_SYMBOL_KIND_TYPE; add_index_entry (symtab, name, (entry->flags & IS_STATIC) != 0, kind, it->second); @@ -1252,7 +1257,7 @@ write_gdbindex (dwarf2_per_objfile *per_objfile, /* Now that we've processed all symbols we can shrink their cu_indices lists. */ - uniquify_cu_indices (&symtab); + symtab.minimize (); data_buf symtab_vec, constant_pool; if (symtab.n_elements == 0) -- 2.34.3