From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lndn.lancelotsix.com (lndn.lancelotsix.com [51.195.220.111]) by sourceware.org (Postfix) with ESMTPS id 00C88385801E for ; Sun, 15 Aug 2021 14:51:53 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 00C88385801E Received: from lsix-M11x-R2 (unknown [IPv6:2a01:e34:ef9a:d0b0:2bc0:2173:dba3:b87b]) by lndn.lancelotsix.com (Postfix) with ESMTPSA id 2E69380D7A; Sun, 15 Aug 2021 14:51:52 +0000 (UTC) Date: Sun, 15 Aug 2021 15:51:47 +0100 From: Lancelot SIX To: Aaron Merey Cc: gdb-patches@sourceware.org, simon.marchi@polymtl.ca, tom@tromey.com Subject: Re: [PATCH 2/3] gdb: Add soname to build-id mapping for corefiles Message-ID: <20210815145147.ap32fq6wji34wjyf@lsix-M11x-R2> References: <20210812042406.75637-1-amerey@redhat.com> <20210812042406.75637-3-amerey@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20210812042406.75637-3-amerey@redhat.com> X-Greylist: Sender succeeded SMTP AUTH, not delayed by milter-greylist-4.5.11 (lndn.lancelotsix.com [0.0.0.0]); Sun, 15 Aug 2021 14:51:52 +0000 (UTC) X-Spam-Status: No, score=-11.5 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, KAM_SHORT, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: gdb-patches@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gdb-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 15 Aug 2021 14:52:04 -0000 Hi, I have a few comments I placed bellow. On Thu, Aug 12, 2021 at 12:24:05AM -0400, Aaron Merey via Gdb-patches wrote: > Since commit aa2d5a422 gdb has been able to read executable and shared > library build-ids within core files. > > Expand this functionality so that each program_space maintains a map of > sonames to build-ids for each shared library referenced in the program_space's > core file. > > This feature may be used to verify that gdb has found the correct shared > libraries for core files and to facilitate downloading shared libaries via > debuginfod. > --- > gdb/arch-utils.c | 21 +++++++++---------- > gdb/arch-utils.h | 21 +++++++++---------- > gdb/build-id.h | 2 ++ > gdb/corelow.c | 13 +++++++++++- > gdb/gdbarch.c | 2 +- > gdb/gdbarch.h | 4 ++-- > gdb/gdbarch.sh | 2 +- > gdb/linux-tdep.c | 52 +++++++++++++++++++++++++++++++++++++----------- > gdb/progspace.c | 36 +++++++++++++++++++++++++++++++++ > gdb/progspace.h | 17 ++++++++++++++++ > gdb/solib.c | 35 ++++++++++++++++++++++++++++++++ > gdb/solib.h | 5 +++++ > 12 files changed, 173 insertions(+), 37 deletions(-) > > diff --git a/gdb/arch-utils.c b/gdb/arch-utils.c > index 4290d637ce1..4c7497e6b4c 100644 > --- a/gdb/arch-utils.c > +++ b/gdb/arch-utils.c > @@ -1072,16 +1072,17 @@ default_get_pc_address_flags (frame_info *frame, CORE_ADDR pc) > > /* See arch-utils.h. */ > void > -default_read_core_file_mappings (struct gdbarch *gdbarch, > - struct bfd *cbfd, > - gdb::function_view > - pre_loop_cb, > - gdb::function_view - ULONGEST start, > - ULONGEST end, > - ULONGEST file_ofs, > - const char *filename)> > - loop_cb) > +default_read_core_file_mappings > + (struct gdbarch *gdbarch, > + struct bfd *cbfd, > + gdb::function_view pre_loop_cb, > + gdb::function_view + ULONGEST start, > + ULONGEST end, > + ULONGEST file_ofs, > + const char *filename, > + const bfd_build_id *build_id)> > + loop_cb) It looks like 'loop_cb' could go on the previous line. If the type of the function callbacks are too big, I guess it could be possible to give them a name before declaring the function. Something like using loop_cb_ftype = gdb::function_view; > { > } > > diff --git a/gdb/arch-utils.h b/gdb/arch-utils.h > index 03e9082f6d7..9139438c5fd 100644 > --- a/gdb/arch-utils.h > +++ b/gdb/arch-utils.h > @@ -295,14 +295,15 @@ extern std::string default_get_pc_address_flags (frame_info *frame, > CORE_ADDR pc); > > /* Default implementation of gdbarch read_core_file_mappings method. */ > -extern void default_read_core_file_mappings (struct gdbarch *gdbarch, > - struct bfd *cbfd, > - gdb::function_view > - pre_loop_cb, > - gdb::function_view - ULONGEST start, > - ULONGEST end, > - ULONGEST file_ofs, > - const char *filename)> > - loop_cb); > +extern void default_read_core_file_mappings > + (struct gdbarch *gdbarch, > + struct bfd *cbfd, > + gdb::function_view pre_loop_cb, > + gdb::function_view + ULONGEST start, > + ULONGEST end, > + ULONGEST file_ofs, > + const char *filename, > + const bfd_build_id *build_id)> > + loop_cb); loop_cb could also go up one line here. > #endif /* ARCH_UTILS_H */ > diff --git a/gdb/build-id.h b/gdb/build-id.h > index 42f8d57ede1..3c9402ee71b 100644 > --- a/gdb/build-id.h > +++ b/gdb/build-id.h > @@ -20,8 +20,10 @@ > #ifndef BUILD_ID_H > #define BUILD_ID_H > > +#include "defs.h" > #include "gdb_bfd.h" > #include "gdbsupport/rsp-low.h" > +#include > > /* Locate NT_GNU_BUILD_ID from ABFD and return its content. */ > > diff --git a/gdb/corelow.c b/gdb/corelow.c > index eb785a08633..97eadceed84 100644 > --- a/gdb/corelow.c > +++ b/gdb/corelow.c > @@ -214,7 +214,7 @@ core_target::build_file_mappings () > /* read_core_file_mappings will invoke this lambda for each mapping > that it finds. */ > [&] (int num, ULONGEST start, ULONGEST end, ULONGEST file_ofs, > - const char *filename) > + const char *filename, const bfd_build_id *build_id) > { > /* Architecture-specific read_core_mapping methods are expected to > weed out non-file-backed mappings. */ > @@ -282,6 +282,16 @@ core_target::build_file_mappings () > > /* Set target_section fields. */ > m_core_file_mappings.emplace_back (start, end, sec); > + > + /* If this is a bfd of a shared library, record its soname > + and build id. */ > + if (build_id != nullptr) > + { > + gdb::optional soname = gdb_bfd_read_elf_soname (bfd); > + if (soname) > + current_program_space->set_cbfd_soname_build_id (soname->data (), > + build_id); Here, since set_cbfd_soname_build_id's first argument is a std::string, you could just use '*soname' instead of 'soname->data ()'. > + } > }); > > normalize_mem_ranges (&m_core_unavailable_mappings); > @@ -305,6 +315,7 @@ core_target::close () > comments in clear_solib in solib.c. */ > clear_solib (); > > + current_program_space->clear_cbfd_soname_build_ids (); > current_program_space->cbfd.reset (nullptr); > } > > diff --git a/gdb/gdbarch.c b/gdb/gdbarch.c > index 830a86df89f..b6472bb36d5 100644 > --- a/gdb/gdbarch.c > +++ b/gdb/gdbarch.c > @@ -5411,7 +5411,7 @@ set_gdbarch_get_pc_address_flags (struct gdbarch *gdbarch, > } > > void > -gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, struct bfd *cbfd, gdb::function_view pre_loop_cb, gdb::function_view loop_cb) > +gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, struct bfd *cbfd, gdb::function_view pre_loop_cb, gdb::function_view loop_cb) > { > gdb_assert (gdbarch != NULL); > gdb_assert (gdbarch->read_core_file_mappings != NULL); > diff --git a/gdb/gdbarch.h b/gdb/gdbarch.h > index 7db3e36d76a..dbd1fa0afc7 100644 > --- a/gdb/gdbarch.h > +++ b/gdb/gdbarch.h > @@ -1710,8 +1710,8 @@ extern void set_gdbarch_get_pc_address_flags (struct gdbarch *gdbarch, gdbarch_g > > /* Read core file mappings */ > > -typedef void (gdbarch_read_core_file_mappings_ftype) (struct gdbarch *gdbarch, struct bfd *cbfd, gdb::function_view pre_loop_cb, gdb::function_view loop_cb); > -extern void gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, struct bfd *cbfd, gdb::function_view pre_loop_cb, gdb::function_view loop_cb); > +typedef void (gdbarch_read_core_file_mappings_ftype) (struct gdbarch *gdbarch, struct bfd *cbfd, gdb::function_view pre_loop_cb, gdb::function_view loop_cb); > +extern void gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, struct bfd *cbfd, gdb::function_view pre_loop_cb, gdb::function_view loop_cb); > extern void set_gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, gdbarch_read_core_file_mappings_ftype *read_core_file_mappings); > > extern struct gdbarch_tdep *gdbarch_tdep (struct gdbarch *gdbarch); > diff --git a/gdb/gdbarch.sh b/gdb/gdbarch.sh > index 9bc9de91c30..56679b8fee6 100755 > --- a/gdb/gdbarch.sh > +++ b/gdb/gdbarch.sh > @@ -1210,7 +1210,7 @@ m;ULONGEST;type_align;struct type *type;type;;default_type_align;;0 > f;std::string;get_pc_address_flags;frame_info *frame, CORE_ADDR pc;frame, pc;;default_get_pc_address_flags;;0 > > # Read core file mappings > -m;void;read_core_file_mappings;struct bfd *cbfd, gdb::function_view pre_loop_cb, gdb::function_view loop_cb;cbfd, pre_loop_cb, loop_cb;;default_read_core_file_mappings;;0 > +m;void;read_core_file_mappings;struct bfd *cbfd, gdb::function_view pre_loop_cb, gdb::function_view loop_cb;cbfd, pre_loop_cb, loop_cb;;default_read_core_file_mappings;;0 > > EOF > } > diff --git a/gdb/linux-tdep.c b/gdb/linux-tdep.c > index 637d3d36a0b..eb35a2b5297 100644 > --- a/gdb/linux-tdep.c > +++ b/gdb/linux-tdep.c > @@ -43,6 +43,7 @@ > #include "gcore-elf.h" > > #include > +#include > > /* This enum represents the values that the user can choose when > informing the Linux kernel about which memory mappings will be > @@ -1096,16 +1097,17 @@ linux_info_proc (struct gdbarch *gdbarch, const char *args, > for each mapping. */ > > static void > -linux_read_core_file_mappings (struct gdbarch *gdbarch, > - struct bfd *cbfd, > - gdb::function_view > - pre_loop_cb, > - gdb::function_view - ULONGEST start, > - ULONGEST end, > - ULONGEST file_ofs, > - const char *filename)> > - loop_cb) > +linux_read_core_file_mappings > + (struct gdbarch *gdbarch, > + struct bfd *cbfd, > + gdb::function_view pre_loop_cb, > + gdb::function_view + ULONGEST start, > + ULONGEST end, > + ULONGEST file_ofs, > + const char *filename, > + const bfd_build_id *build_id)> > + loop_cb) 'loop_cb' could be on the line above. > { > /* Ensure that ULONGEST is big enough for reading 64-bit core files. */ > gdb_static_assert (sizeof (ULONGEST) >= 8); > @@ -1174,6 +1176,23 @@ linux_read_core_file_mappings (struct gdbarch *gdbarch, > if (f != descend) > warning (_("malformed note - filename area is too big")); > > + const bfd_build_id *orig_build_id = cbfd->build_id; > + std::unordered_map vma_map; > + std::unordered_map filename_map; > + > + /* Search for solib build-ids in the core file. Each time one is found, > + map the start vma of the corresponding elf header to the build-id. */ > + for (bfd_section *sec = cbfd->sections; sec != nullptr; sec = sec->next) > + { > + cbfd->build_id = nullptr; > + > + if (sec->flags & SEC_LOAD > + && get_elf_backend_data (cbfd)->elf_backend_core_find_build_id > + (cbfd, (bfd_vma) sec->filepos)) > + vma_map[sec->vma] = cbfd->build_id; > + } > + > + cbfd->build_id = orig_build_id; > pre_loop_cb (count); > > for (int i = 0; i < count; i++) > @@ -1187,8 +1206,17 @@ linux_read_core_file_mappings (struct gdbarch *gdbarch, > descdata += addr_size; > char * filename = filenames; > filenames += strlen ((char *) filenames) + 1; > + const bfd_build_id *build_id = vma_map[start]; > + > + /* Map filename to the build-id associated with this start vma, > + if such a build-id was found. Otherwise use the build-id > + already associated with this filename if it exists. */ > + if (build_id != nullptr) > + filename_map[filename] = build_id; > + else > + build_id = filename_map[filename]; > > - loop_cb (i, start, end, file_ofs, filename); > + loop_cb (i, start, end, file_ofs, filename, build_id); > } > } > > @@ -1217,7 +1245,7 @@ linux_core_info_proc_mappings (struct gdbarch *gdbarch, const char *args) > } > }, > [=] (int num, ULONGEST start, ULONGEST end, ULONGEST file_ofs, > - const char *filename) > + const char *filename, const bfd_build_id *build_id) > { > if (gdbarch_addr_bit (gdbarch) == 32) > printf_filtered ("\t%10s %10s %10s %10s %s\n", > diff --git a/gdb/progspace.c b/gdb/progspace.c > index 7080bf8ee27..d39bd45fcf4 100644 > --- a/gdb/progspace.c > +++ b/gdb/progspace.c > @@ -17,6 +17,7 @@ > You should have received a copy of the GNU General Public License > along with this program. If not, see . */ > > +#include "build-id.h" > #include "defs.h" > #include "gdbcmd.h" > #include "objfiles.h" > @@ -358,6 +359,41 @@ print_program_space (struct ui_out *uiout, int requested) > } > } > > +/* See progspace.h. */ > + > +void > +program_space::set_cbfd_soname_build_id (std::string soname, This parameter could be 'std::string const &' or... > + const bfd_build_id *build_id) > +{ > + std::string build_id_hex = build_id_to_string (build_id); > + cbfd_soname_to_build_id[soname] = build_id_hex; ... use 'std::move (soname)' here. I guess the more 'usual' approach would be to have the argument as a const reference (but to be honest, the implication of calling one more ctor and copying the soname is negligible, to say the least). > + > + return; I am not sure if the GNU coding standard says something about this, but 'return;' as the last statement of a void function is redundant. > +} > + > +/* See progspace.h. */ > + > +const char * > +program_space::get_cbfd_soname_build_id (const char *soname) With set_cbfd_soname_build_id using a std::string, I would find it more consistent to use std::string here also. Any reason not to use it I missed? You could use 'basename (soname.c_str ())' bellow. The return type could also be 'const std::string *' (the map stores std::string internally), but keeping a const char * is pretty similar. > +{ > + gdb_assert (soname); > + > + auto it = cbfd_soname_to_build_id.find (basename (soname)); > + if (it == cbfd_soname_to_build_id.end ()) > + return nullptr; > + > + return it->second.c_str (); > +} > + > +/* See progspace.h. */ > + > +void > +program_space::clear_cbfd_soname_build_ids () > +{ > + cbfd_soname_to_build_id.clear (); > + return; Same here, I guess 'return;' could be removed. > +} > + > /* Boolean test for an already-known program space id. */ > > static int > diff --git a/gdb/progspace.h b/gdb/progspace.h > index fb348ca7539..b42b3ffc4f1 100644 > --- a/gdb/progspace.h > +++ b/gdb/progspace.h > @@ -30,6 +30,7 @@ > #include "gdbsupport/safe-iterator.h" > #include > #include > +#include > > struct target_ops; > struct bfd; > @@ -324,6 +325,19 @@ struct program_space > /* Binary file diddling handle for the core file. */ > gdb_bfd_ref_ptr cbfd; > > + /* Associate a core file SONAME with BUILD_ID so that it can be retrieved > + with get_cbfd_soname_build_id. */ > + void set_cbfd_soname_build_id (std::string soname, > + const bfd_build_id *build_id); > + > + /* If a core file SONAME had a build-id associated with it by a previous > + call to set_cbfd_soname_build_id then return the build-id as a > + NULL-terminated hex string. */ > + const char *get_cbfd_soname_build_id (const char *soname); > + > + /* Clear all core file soname to build-id mappings. */ > + void clear_cbfd_soname_build_ids (); > + > /* The address space attached to this program space. More than one > program space may be bound to the same address space. In the > traditional unix-like debugging scenario, this will usually > @@ -378,6 +392,9 @@ struct program_space > /* The set of target sections matching the sections mapped into > this program space. Managed by both exec_ops and solib.c. */ > target_section_table m_target_sections; > + > + /* Mapping of a core file's library sonames to their respective build-ids. */ > + std::unordered_map cbfd_soname_to_build_id; > }; > > /* An address space. It is used for comparing if > diff --git a/gdb/solib.c b/gdb/solib.c > index e30affbb7e7..8b92cf7db53 100644 > --- a/gdb/solib.c > +++ b/gdb/solib.c > @@ -23,6 +23,7 @@ > #include > #include "symtab.h" > #include "bfd.h" > +#include "build-id.h" > #include "symfile.h" > #include "objfiles.h" > #include "gdbcore.h" > @@ -1585,6 +1586,40 @@ gdb_bfd_scan_elf_dyntag (const int desired_dyntag, bfd *abfd, CORE_ADDR *ptr, > return 0; > } > > +/* See solib.h. */ > + > +gdb::optional > +gdb_bfd_read_elf_soname (struct bfd *bfd) > +{ > + gdb_assert (bfd != nullptr); > + > + gdb_bfd_ref_ptr abfd = gdb_bfd_open (bfd->filename, gnutarget); > + > + if (abfd == nullptr) > + return gdb::optional (); > + > + /* Check that bfd is an ET_DYN ELF file. */ > + bfd_check_format (abfd.get (), bfd_object); > + if (!(bfd_get_file_flags (abfd.get ()) & DYNAMIC)) > + return gdb::optional (); > + > + /* Determine soname of shared library. If found map soname to build-id. */ > + CORE_ADDR idx; > + if (!gdb_bfd_scan_elf_dyntag (DT_SONAME, abfd.get (), &idx, nullptr)) > + return gdb::optional (); > + > + struct bfd_section *dynstr = bfd_get_section_by_name (abfd.get (), ".dynstr"); > + if (dynstr == nullptr) > + return gdb::optional (); > + > + /* Read the soname from the string table. */ > + gdb::byte_vector dynstr_buf; > + if (!gdb_bfd_get_full_section_contents (abfd.get (), dynstr, &dynstr_buf)) > + return gdb::optional (); > + > + return gdb::optional ((char *)dynstr_buf.data () + idx); This will not change much, but you could cast to 'const char *' (this is the type the std::string constructor expects). > +} > + > /* Lookup the value for a specific symbol from symbol table. Look up symbol > from ABFD. MATCH_SYM is a callback function to determine whether to pick > up a symbol. DATA is the input of this callback function. Return NULL > diff --git a/gdb/solib.h b/gdb/solib.h > index c50f74e06bf..51cc047463f 100644 > --- a/gdb/solib.h > +++ b/gdb/solib.h > @@ -118,6 +118,11 @@ extern CORE_ADDR gdb_bfd_lookup_symbol_from_symtab (bfd *abfd, > extern int gdb_bfd_scan_elf_dyntag (const int desired_dyntag, bfd *abfd, > CORE_ADDR *ptr, CORE_ADDR *ptr_addr); > > +/* If BFD is an ELF shared object then attempt to return the string > + referred to by its DT_SONAME tag. */ > + > +extern gdb::optional gdb_bfd_read_elf_soname (struct bfd *bfd); > + > /* Enable or disable optional solib event breakpoints as appropriate. */ > > extern void update_solib_breakpoints (void); > -- > 2.31.1 > I hope the comments are helpful. Best, Lancelot.