From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from relay6-d.mail.gandi.net (relay6-d.mail.gandi.net [217.70.183.198]) by sourceware.org (Postfix) with ESMTPS id CAB6B385843D for ; Tue, 13 Sep 2022 09:26:16 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org CAB6B385843D Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=seketeli.org Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=seketeli.org Received: (Authenticated sender: dodji@seketeli.org) by mail.gandi.net (Postfix) with ESMTPSA id 5478AC0015; Tue, 13 Sep 2022 09:26:15 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=seketeli.org; s=gm1; t=1663061175; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=jJoFCaXP8SfJRu45m3G7dv7qwPpS+Jz0eIAz64Y7rMA=; b=UqmPDtkqSDq763wWbgixdzlRr38zs2PrZYumY94iqE6DaizjoljrQsGXDL01Y2O24mds9c mWFvNvCYE78u4FIxthZvU5+Dq8dc23DScij1kKF3X2Sb34rb4c0dhEQeX6EYNshUMmKQHi VZrt+uy7hYluAVRXlREeqUzhVOb+y8ER+2J7DWd7yoqWtQ+/YuzX8M7m7YyVJ6gVhUNxIm CGbRdM7VkiP5RyYPPhJe2ew1efS1b+nIeJJqLXdayES31ynlcLguKR/pq27Rge4o2dVUY5 /uXbXDmMCXjCF/WYG/xjYJko9JEOWO9YJcwR8tVznxVMpTIVlKRKJhWLTXbPJQ== Received: by localhost (Postfix, from userid 1000) id 833DE5802BD; Tue, 13 Sep 2022 11:26:14 +0200 (CEST) From: Dodji Seketeli To: "Guillermo E. Martinez via Libabigail" Cc: "Guillermo E. Martinez" Subject: Re: [PATCHv v2] ctf-reader: Lookup debug info for symbols in a non default archive member Organization: Me, myself and I References: <20220831151603.915945-1-guillermo.e.martinez@oracle.com> <20220907234042.1610173-1-guillermo.e.martinez@oracle.com> X-Operating-System: Fedora 38 X-URL: http://www.seketeli.net/~dodji Date: Tue, 13 Sep 2022 11:26:14 +0200 In-Reply-To: <20220907234042.1610173-1-guillermo.e.martinez@oracle.com> (Guillermo E. Martinez via Libabigail's message of "Wed, 7 Sep 2022 18:40:42 -0500") Message-ID: <87tu5btykp.fsf@seketeli.org> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/27.1 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-Spam-Status: No, score=-9.8 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,JMQ_SPF_NEUTRAL,RCVD_IN_DNSWL_LOW,SPF_HELO_NONE,SPF_PASS,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: Hello Guillermo, Thank you for the explanations and the updated patch. Everything is clear for me now! Thanks again. I have applied the patch to master, but just with some slight obvious changes that I am discussing below. "Guillermo E. Martinez via Libabigail" a =C3=A9crit: [...] > --- a/src/abg-ctf-reader.cc [...] > @@ -1204,6 +1204,61 @@ lookup_type(read_context *ctxt, corpus_sptr corp, > return result; > } >=20=20 > +/// Given a symbol name, lookup the corresponding CTF information in > +/// the default dictionary (CTF archive member provided by the caller) > +/// If the search is not success, the looks for the symbol name > +/// in _all_ archive members. > +/// > +/// @param ctfa the CTF archive. > +/// @param dict the default dictionary to looks for. > +/// @param sym_name the symbol name. > +/// @param corp the IR corpus. > +/// > +/// Note that if @ref sym_name is found in other than its default dictio= nary > +/// @ref ctf_dict will be updated and it must be explicitly closed by its > +/// caller. > +/// > +/// @return a valid CTF type id, if @ref sym_name was found, CTF_ERR oth= erwise. > + > +static ctf_id_t > +lookup_symbol_in_ctf_archive(ctf_archive_t *ctfa, ctf_dict_t **ctf_dict, > + const char *sym_name, corpus_sptr corp) It seems to me that the "corp" parameter is not used in the function, so I removed it. I have adjusted the doxygen comment to remove it as well. > +{ > + int ctf_err; > + ctf_dict_t *dict =3D *ctf_dict; > + ctf_id_t ctf_type =3D ctf_lookup_by_symbol_name(dict, sym_name); > + > + if (ctf_type !=3D CTF_ERR) > + return ctf_type; > + > + /* Probably --ctf-variables option was used by ld, so symbol type > + definition must be found in the CTF Variable section. */ > + ctf_type =3D ctf_lookup_variable(dict, sym_name); > + > + /* Not lucky, then, search in whole archive */ > + if (ctf_type =3D=3D CTF_ERR) > + { > + ctf_dict_t *fp; > + ctf_next_t *i =3D NULL; > + const char *arcname; > + > + while ((fp =3D ctf_archive_next(ctfa, &i, &arcname, 1, &ctf_err)) = !=3D NULL) > + { > + if ((ctf_type =3D ctf_lookup_by_symbol_name (fp, sym_name)) = =3D=3D CTF_ERR) > + ctf_type =3D ctf_lookup_variable(fp, sym_name); > + > + if (ctf_type !=3D CTF_ERR) > + { > + *ctf_dict =3D fp; > + break; > + } > + ctf_dict_close(fp); > + } > + } > + > + return ctf_type; > +} > + [...] > for (const auto& symbol : symtab_reader::filtered_symtab(*symtab, filt= er)) > { > std::string sym_name =3D symbol->get_name(); > ctf_id_t ctf_sym_type; >=20=20 > - ctf_sym_type =3D ctf_lookup_variable(ctf_dict, sym_name.c_str()); > - if (ctf_sym_type =3D=3D (ctf_id_t) -1 > - && !(corp->get_origin() & corpus::LINUX_KERNEL_BINARY_ORIGIN)) > - // lookup in function objects > - ctf_sym_type =3D ctf_lookup_by_symbol_name(ctf_dict, sym_name.c_= str()); > - > - if (ctf_sym_type =3D=3D (ctf_id_t) -1) > - continue; > + ctf_sym_type =3D lookup_symbol_in_ctf_archive(ctxt->ctfa, &ctf_dic= t, > + sym_name.c_str(), corp= ); I have adjusted that call to remove the "corp" argument as it's no longer needed. Oh, thanks for adjusting this code. Using lookup_symbol_in_ctf_archive here makes things a lot clearer to me at least! Below is the patch that I have applied. I have slightly amended the introductory test to correct some slight typos. >From ad47854627f76c7959ae1a7ae59c9fcda38091c5 Mon Sep 17 00:00:00 2001 From: "Guillermo E. Martinez via Libabigail" Date: Wed, 7 Sep 2022 18:40:42 -0500 Subject: [PATCH] ctf-reader: Lookup debug info for symbols in a non default= archive member The current mechanism used by the ctf reader for looking for debug information given a specific Linux symbol is the following: it opens the dictionary (default) which name matches the binary name being processed in the current corpus, e.g. `vmlinux' or `module-name`.ko. However there are symbols and information that are not located in the default dictionary; this is evident comparing the symbols in `Module.symvers' file with ABI XML file, so for example, the ctf reader is expecting to find the information for `LZ4_decompress_fast' symbol in the CTF `vmlinux' archive member, because this symbols is defined in `vmlinux' binary: 0x4c416eb9 LZ4_decompress_fast vmlinux EXPORT_SYMBOL But, it figures out that it is missing. The correct location is `vmlinux#0' dictionary: CTF archive member: vmlinux: ... Function objects: ... CTF archive member: vmlinux#0: Function objects: ... LZ4_decompress_fast -> 0x80037400: (kind 5) int (*) (const char *, char= *, int) (aligned at 0x8) ... Therefore, ctf reader must be looking for debug information in the whole archive; fortunately `libctf' provides a fast lookup mechanism using cache, dictionary references, etc., so the penalty performance is ~10%. Now, it make use of `ctf_lookup_by_symbol_name' at first instance which is in charge to locate symbol information given a symbol name on either CTF Function or Variable sections; if the symbol isn't found it tries using `ctf_lookup_variable' to look into the CTF Variable section; this could happens due to `ld' operating with the `--ctf-variables' option which makes function types information to reside in the CTF Variable section. * src/abg-ctf-reader.cc (lookup_symbol_in_ctf_archive): New function. (process_ctf_archive): Use `lookup_symbol_in_ctf_archive'. Signed-off-by: Guillermo E. Martinez Signed-off-by: Dodji Seketeli --- src/abg-ctf-reader.cc | 74 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 10 deletions(-) diff --git a/src/abg-ctf-reader.cc b/src/abg-ctf-reader.cc index 71808f9a..e307fcd7 100644 --- a/src/abg-ctf-reader.cc +++ b/src/abg-ctf-reader.cc @@ -1204,6 +1204,61 @@ lookup_type(read_context *ctxt, corpus_sptr corp, return result; } =20 +/// Given a symbol name, lookup the corresponding CTF information in +/// the default dictionary (CTF archive member provided by the caller) +/// If the search is not success, the looks for the symbol name +/// in _all_ archive members. +/// +/// @param ctfa the CTF archive. +/// @param dict the default dictionary to looks for. +/// @param sym_name the symbol name. +/// @param corp the IR corpus. +/// +/// Note that if @ref sym_name is found in other than its default dictiona= ry +/// @ref ctf_dict will be updated and it must be explicitly closed by its +/// caller. +/// +/// @return a valid CTF type id, if @ref sym_name was found, CTF_ERR other= wise. + +static ctf_id_t +lookup_symbol_in_ctf_archive(ctf_archive_t *ctfa, ctf_dict_t **ctf_dict, + const char *sym_name) +{ + int ctf_err; + ctf_dict_t *dict =3D *ctf_dict; + ctf_id_t ctf_type =3D ctf_lookup_by_symbol_name(dict, sym_name); + + if (ctf_type !=3D CTF_ERR) + return ctf_type; + + /* Probably --ctf-variables option was used by ld, so symbol type + definition must be found in the CTF Variable section. */ + ctf_type =3D ctf_lookup_variable(dict, sym_name); + + /* Not lucky, then, search in whole archive */ + if (ctf_type =3D=3D CTF_ERR) + { + ctf_dict_t *fp; + ctf_next_t *i =3D NULL; + const char *arcname; + + while ((fp =3D ctf_archive_next(ctfa, &i, &arcname, 1, &ctf_err)) != =3D NULL) + { + if ((ctf_type =3D ctf_lookup_by_symbol_name (fp, sym_name)) =3D= =3D CTF_ERR) + ctf_type =3D ctf_lookup_variable(fp, sym_name); + + if (ctf_type !=3D CTF_ERR) + { + *ctf_dict =3D fp; + break; + } + ctf_dict_close(fp); + } + } + + return ctf_type; +} + /// Process a CTF archive and create libabigail IR for the types, /// variables and function declarations found in the archive, iterating /// over public symbols. The IR is added to the given corpus. @@ -1222,7 +1277,7 @@ process_ctf_archive(read_context *ctxt, corpus_sptr c= orp) corp->add(ir_translation_unit); =20 int ctf_err; - ctf_dict_t *ctf_dict; + ctf_dict_t *ctf_dict, *dict_tmp; const auto symtab =3D ctxt->symtab; symtab_reader::symtab_filter filter =3D symtab->make_filter(); filter.set_public_symbols(); @@ -1248,19 +1303,17 @@ process_ctf_archive(read_context *ctxt, corpus_sptr= corp) abort(); } =20 + dict_tmp =3D ctf_dict; + for (const auto& symbol : symtab_reader::filtered_symtab(*symtab, filter= )) { std::string sym_name =3D symbol->get_name(); ctf_id_t ctf_sym_type; =20 - ctf_sym_type =3D ctf_lookup_variable(ctf_dict, sym_name.c_str()); - if (ctf_sym_type =3D=3D (ctf_id_t) -1 - && !(corp->get_origin() & corpus::LINUX_KERNEL_BINARY_ORIGIN)) - // lookup in function objects - ctf_sym_type =3D ctf_lookup_by_symbol_name(ctf_dict, sym_name.c_st= r()); - - if (ctf_sym_type =3D=3D (ctf_id_t) -1) - continue; + ctf_sym_type =3D lookup_symbol_in_ctf_archive(ctxt->ctfa, &ctf_dict, + sym_name.c_str()); + if (ctf_sym_type =3D=3D CTF_ERR) + continue; =20 if (ctf_type_kind(ctf_dict, ctf_sym_type) !=3D CTF_K_FUNCTION) { @@ -1298,13 +1351,14 @@ process_ctf_archive(read_context *ctxt, corpus_sptr= corp) func_type, 0 /* is_inline */, location())); - func_declaration->set_symbol(symbol); add_decl_to_scope(func_declaration, ir_translation_unit->get_global_scope()); func_declaration->set_is_in_public_symbol_table(true); ctxt->maybe_add_fn_to_exported_decls(func_declaration.get()); } + + ctf_dict =3D dict_tmp; } =20 ctf_dict_close(ctf_dict); --=20 2.37.2 --=20 Dodji