From: "Martin Liška" <mliska@suse.cz>
To: Fangrui Song <maskray@google.com>
Cc: binutils@sourceware.org, Alan Modra <amodra@gmail.com>
Subject: Re: [PATCH] readelf: support zstd compressed debug sections [PR 29640]
Date: Fri, 14 Oct 2022 10:26:54 +0200 [thread overview]
Message-ID: <5eeed442-a153-5805-eae9-44b006506fd1@suse.cz> (raw)
In-Reply-To: <56a23340-a416-0ac9-dc17-4be601edacf7@suse.cz>
On 10/14/22 09:47, Martin Liška wrote:
> On 10/14/22 05:35, Fangrui Song wrote:
>> On 2022-10-13, Martin Liška wrote:
>>> On 10/1/22 08:20, Fangrui Song wrote:
>>>> ---
>>>> binutils/Makefile.am | 6 +--
>>>> binutils/Makefile.in | 6 +--
>>>> binutils/readelf.c | 112 +++++++++++++++++++++++++++----------------
>>>> 3 files changed, 78 insertions(+), 46 deletions(-)
>>>>
>>>> diff --git a/binutils/Makefile.am b/binutils/Makefile.am
>>>> index 751fbacce12..b249af721ae 100644
>>>> --- a/binutils/Makefile.am
>>>> +++ b/binutils/Makefile.am
>>>> @@ -54,8 +54,8 @@ DEBUGINFOD_LIBS = @DEBUGINFOD_LIBS@
>>>> WARN_CFLAGS = @WARN_CFLAGS@
>>>> WARN_CFLAGS_FOR_BUILD = @WARN_CFLAGS_FOR_BUILD@
>>>> NO_WERROR = @NO_WERROR@
>>>> -AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC)
>>>> -AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC)
>>>> +AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC) $(ZSTD_CFLAGS)
>>>> +AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC) $(ZSTD_CFLAGS)
>>>> LIBICONV = @LIBICONV@
>>>>
>>>> # these two are almost the same program
>>>> @@ -256,7 +256,7 @@ objcopy_SOURCES = objcopy.c not-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>>>> strings_SOURCES = strings.c $(BULIBS)
>>>>
>>>> readelf_SOURCES = readelf.c version.c unwind-ia64.c dwarf.c demanguse.c $(ELFLIBS)
>>>> -readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>>> +readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(ZSTD_LIBS) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>>>
>>>> elfedit_SOURCES = elfedit.c version.c $(ELFLIBS)
>>>> elfedit_LDADD = $(LIBINTL) $(LIBIBERTY)
>>>> diff --git a/binutils/Makefile.in b/binutils/Makefile.in
>>>> index 6de4e239408..7d9039e0f2f 100644
>>>> --- a/binutils/Makefile.in
>>>> +++ b/binutils/Makefile.in
>>>> @@ -651,8 +651,8 @@ am__skipyacc =
>>>> # case both are empty.
>>>> ZLIB = @zlibdir@ -lz
>>>> ZLIBINC = @zlibinc@
>>>> -AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC)
>>>> -AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC)
>>>> +AM_CFLAGS = $(WARN_CFLAGS) $(ZLIBINC) $(ZSTD_CFLAGS)
>>>> +AM_CFLAGS_FOR_BUILD = $(WARN_CFLAGS_FOR_BUILD) $(ZLIBINC) $(ZSTD_CFLAGS)
>>>>
>>>> # these two are almost the same program
>>>> AR_PROG = ar
>>>> @@ -790,7 +790,7 @@ size_SOURCES = size.c $(BULIBS)
>>>> objcopy_SOURCES = objcopy.c not-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>>>> strings_SOURCES = strings.c $(BULIBS)
>>>> readelf_SOURCES = readelf.c version.c unwind-ia64.c dwarf.c demanguse.c $(ELFLIBS)
>>>> -readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>>> +readelf_LDADD = $(LIBCTF_NOBFD) $(LIBINTL) $(LIBIBERTY) $(ZLIB) $(ZSTD_LIBS) $(DEBUGINFOD_LIBS) $(MSGPACK_LIBS)
>>>> elfedit_SOURCES = elfedit.c version.c $(ELFLIBS)
>>>> elfedit_LDADD = $(LIBINTL) $(LIBIBERTY)
>>>> strip_new_SOURCES = objcopy.c is-strip.c rename.c $(WRITE_DEBUG_SRCS) $(BULIBS)
>>>> diff --git a/binutils/readelf.c b/binutils/readelf.c
>>>> index 351571c8abb..04cda213f33 100644
>>>> --- a/binutils/readelf.c
>>>> +++ b/binutils/readelf.c
>>>> @@ -44,6 +44,9 @@
>>>> #include <assert.h>
>>>> #include <time.h>
>>>> #include <zlib.h>
>>>> +#ifdef HAVE_ZSTD
>>>> +#include <zstd.h>
>>>> +#endif
>>>> #include <wchar.h>
>>>>
>>>> #if defined HAVE_MSGPACK
>>>> @@ -15170,48 +15173,56 @@ get_section_contents (Elf_Internal_Shdr * section, Filedata * filedata)
>>>> _("section contents"));
>>>> }
>>>>
>>>> -/* Uncompresses a section that was compressed using zlib, in place. */
>>>> +/* Uncompresses a section that was compressed using zlib/zstd, in place. */
>>>>
>>>> static bool
>>>> -uncompress_section_contents (unsigned char **buffer,
>>>> - uint64_t uncompressed_size,
>>>> - uint64_t *size)
>>>> +uncompress_section_contents (bool is_zstd, unsigned char **buffer,
>>>> + uint64_t uncompressed_size, uint64_t *size)
>>>> {
>>>> uint64_t compressed_size = *size;
>>>> unsigned char *compressed_buffer = *buffer;
>>>> - unsigned char *uncompressed_buffer;
>>>> + unsigned char *uncompressed_buffer = xmalloc (uncompressed_size);
>>>> z_stream strm;
>>>> int rc;
>>>>
>>>> - /* It is possible the section consists of several compressed
>>>> - buffers concatenated together, so we uncompress in a loop. */
>>>> - /* PR 18313: The state field in the z_stream structure is supposed
>>>> - to be invisible to the user (ie us), but some compilers will
>>>> - still complain about it being used without initialisation. So
>>>> - we first zero the entire z_stream structure and then set the fields
>>>> - that we need. */
>>>> - memset (& strm, 0, sizeof strm);
>>>> - strm.avail_in = compressed_size;
>>>> - strm.next_in = (Bytef *) compressed_buffer;
>>>> - strm.avail_out = uncompressed_size;
>>>> - uncompressed_buffer = (unsigned char *) xmalloc (uncompressed_size);
>>>> + if (is_zstd)
>>>> + {
>>>> +#ifdef HAVE_ZSTD
>>>> + size_t ret = ZSTD_decompress (uncompressed_buffer, uncompressed_size,
>>>> + compressed_buffer, compressed_size);
>>>> + if (ZSTD_isError (ret))
>>>> + goto fail;
>>>> +#endif
>>>> + }
>>>> + else
>>>> + {
>>>> + /* It is possible the section consists of several compressed
>>>> + buffers concatenated together, so we uncompress in a loop. */
>>>> + /* PR 18313: The state field in the z_stream structure is supposed
>>>> + to be invisible to the user (ie us), but some compilers will
>>>> + still complain about it being used without initialisation. So
>>>> + we first zero the entire z_stream structure and then set the fields
>>>> + that we need. */
>>>> + memset (&strm, 0, sizeof strm);
>>>> + strm.avail_in = compressed_size;
>>>> + strm.next_in = (Bytef *)compressed_buffer;
>>>> + strm.avail_out = uncompressed_size;
>>>>
>>>> - rc = inflateInit (& strm);
>>>> - while (strm.avail_in > 0)
>>>> - {
>>>> - if (rc != Z_OK)
>>>> - break;
>>>> - strm.next_out = ((Bytef *) uncompressed_buffer
>>>> - + (uncompressed_size - strm.avail_out));
>>>> - rc = inflate (&strm, Z_FINISH);
>>>> - if (rc != Z_STREAM_END)
>>>> - break;
>>>> - rc = inflateReset (& strm);
>>>> + rc = inflateInit (&strm);
>>>> + while (strm.avail_in > 0)
>>>> + {
>>>> + if (rc != Z_OK)
>>>> + break;
>>>> + strm.next_out = ((Bytef *)uncompressed_buffer
>>>> + + (uncompressed_size - strm.avail_out));
>>>> + rc = inflate (&strm, Z_FINISH);
>>>> + if (rc != Z_STREAM_END)
>>>> + break;
>>>> + rc = inflateReset (&strm);
>>>> + }
>>>> + if (inflateEnd (&strm) != Z_OK || rc != Z_OK || strm.avail_out != 0)
>>>> + goto fail;
>>>> }
>>>> - if (inflateEnd (& strm) != Z_OK
>>>> - || rc != Z_OK
>>>> - || strm.avail_out != 0)
>>>> - goto fail;
>>>>
>>>> *buffer = uncompressed_buffer;
>>>> *size = uncompressed_size;
>>>> @@ -15254,6 +15265,7 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>>>> {
>>>> uint64_t new_size = num_bytes;
>>>> uint64_t uncompressed_size = 0;
>>>> + bool is_zstd = false;
>>>>
>>>> if ((section->sh_flags & SHF_COMPRESSED) != 0)
>>>> {
>>>> @@ -15266,7 +15278,13 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>>>> by get_compression_header. */
>>>> goto error_out;
>>>>
>>>> - if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>>>> + if (chdr.ch_type == ELFCOMPRESS_ZLIB)
>>>> + ;
>>>> +#ifdef HAVE_ZSTD
>>>> + else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
>>>> + is_zstd = true;
>>>> +#endif
>>>> + else
>>>> {
>>>> warn (_("section '%s' has unsupported compress type: %d\n"),
>>>> printable_section_name (filedata, section), chdr.ch_type);
>>>> @@ -15295,8 +15313,8 @@ dump_section_as_strings (Elf_Internal_Shdr * section, Filedata * filedata)
>>>>
>>>> if (uncompressed_size)
>>>> {
>>>> - if (uncompress_section_contents (& start,
>>>> - uncompressed_size, & new_size))
>>>> + if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>>>> + &new_size))
>>>> num_bytes = new_size;
>>>> else
>>>> {
>>>> @@ -15470,6 +15488,7 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>>>> {
>>>> uint64_t new_size = section_size;
>>>> uint64_t uncompressed_size = 0;
>>>> + bool is_zstd = false;
>>>>
>>>> if ((section->sh_flags & SHF_COMPRESSED) != 0)
>>>> {
>>>> @@ -15482,7 +15501,13 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>>>> by get_compression_header. */
>>>> goto error_out;
>>>>
>>>> - if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>>>> + if (chdr.ch_type == ELFCOMPRESS_ZLIB)
>>>> + ;
>>>> +#ifdef HAVE_ZSTD
>>>> + else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
>>>> + is_zstd = true;
>>>> +#endif
>>>> + else
>>>> {
>>>> warn (_("section '%s' has unsupported compress type: %d\n"),
>>>> printable_section_name (filedata, section), chdr.ch_type);
>>>> @@ -15511,8 +15536,8 @@ dump_section_as_bytes (Elf_Internal_Shdr *section,
>>>>
>>>> if (uncompressed_size)
>>>> {
>>>> - if (uncompress_section_contents (& start, uncompressed_size,
>>>> - & new_size))
>>>> + if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>>>> + &new_size))
>>>> {
>>>> section_size = new_size;
>>>> }
>>>> @@ -15848,6 +15873,7 @@ load_specific_debug_section (enum dwarf_section_display_enum debug,
>>>> unsigned char *start = section->start;
>>>> uint64_t size = sec->sh_size;
>>>> uint64_t uncompressed_size = 0;
>>>> + bool is_zstd = false;
>>>>
>>>> if ((sec->sh_flags & SHF_COMPRESSED) != 0)
>>>> {
>>>> @@ -15869,7 +15895,13 @@ load_specific_debug_section (enum dwarf_section_display_enum debug,
>>>> by get_compression_header. */
>>>> return false;
>>>>
>>>> - if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>>>> + if (chdr.ch_type == ELFCOMPRESS_ZLIB)
>>>> + ;
>>>> +#ifdef HAVE_ZSTD
>>>> + else if (chdr.ch_type == ELFCOMPRESS_ZSTD)
>>>> + is_zstd = true;
>>>> +#endif
>>>> + else
>>>> {
>>>> warn (_("section '%s' has unsupported compress type: %d\n"),
>>>> section->name, chdr.ch_type);
>>>> @@ -15898,7 +15930,7 @@ load_specific_debug_section (enum dwarf_section_display_enum debug,
>>>>
>>>> if (uncompressed_size)
>>>> {
>>>> - if (uncompress_section_contents (&start, uncompressed_size,
>>>> + if (uncompress_section_contents (is_zstd, &start, uncompressed_size,
>>>> &size))
>>>> {
>>>> /* Free the compressed buffer, update the section buffer
>>>
>>> Hi.
>>>
>>> I noticed the following issue (dunno if caused by objcopy or the patches readelf) with this patch:
>>>
>>> $ echo 'int main() {return 0;}' | gcc -O2 -g -c -o a.o -x c -
>>> $ /home/marxin/Programming/binutils/objdir/ld/../binutils/objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zstd
>>> $ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a-x32.o >/dev/null
>>> readelf: Error: Unable to decompress section .debug_info
>>> readelf: Error: No comp units in .debug_info section ?
>>>
>>> while zlib is fine:
>>>
>>> $ /home/marxin/Programming/binutils/objdir/ld/../binutils/objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zlib
>>> $ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a-x32.o >/dev/null
>>>
>>> Can you please take a look?
>>
>> objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zstd uses
>> ch_type=1 (ELFCOMPRESS_ZLIB), but I don't know where goes wrong.
>>
>> Converting EI_CLASS looks very hacky to me.
>
> Ok, I've got it. It's unrelated to '-O elf32-x86-64', but to the fact we move from zlib-gabi compression of a.o
> to zstd for the objcopy output file.
>
> Simpler reproducer:
>
> $ echo 'int main() {return 0;}' | gcc -O2 -g -c -o a.o -x c - -Wa,--compress-debug-sections=zlib
> $ /home/marxin/Programming/binutils/objdir//binutils/objcopy a.o a2.o --compress-debug-sections=zstd
> $ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a2.o
> readelf: Error: Unable to decompress section .debug_info
> readelf: Error: No comp units in .debug_info section ?
>
> As seen the .debug_info section in a2.o claims to be compressed:
>
> $ readelf -SW a2.o | grep C
> [ 5] .debug_info PROGBITS 0000000000000000 000043 000050 00 C 0 0 1
>
> But it's not as it's not beneficial and the following code in in bfd/compress.c is taken:
>
> /* If compression didn't make the section smaller, keep it uncompressed. */
> if (compressed_size >= uncompressed_size)
> {
> memcpy (buffer, input_buffer, uncompressed_size);
> sec->compress_status = COMPRESS_SECTION_NONE;
> }
>
> Apparently, we miss removal of the compression header :/ Moreover, the following is fine:
> $ /home/marxin/Programming/binutils/objdir//binutils/objcopy a.o a2.o --compress-debug-sections=none
> $ /home/marxin/Programming/binutils/objdir/binutils/readelf -wi a2.o >/dev/null
>
> Lemme try preparing a fix.
>
> Martin
There's a patch candidate that works for the simple example (all x86_64 ELF), but fails for:
objcopy -O elf32-x86-64 a.o a-x32.o --compress-debug-sections=zstd
likely due to bfd_update_compression_header where compression header is updated.
diff --git a/bfd/compress.c b/bfd/compress.c
index 364df14142b..262e2f5bed2 100644
--- a/bfd/compress.c
+++ b/bfd/compress.c
@@ -202,8 +202,10 @@ bfd_compress_section_contents (bfd *abfd, sec_ptr sec)
compressed_size += new_header_size;
}
- /* If compression didn't make the section smaller, keep it uncompressed. */
- if (compressed_size >= uncompressed_size)
+ /* If compression didn't make the section smaller, keep it uncompressed.
+ Do it only if the original section is not compressed, otherwise we would
+ need to drop the section header. */
+ if (!compressed && compressed_size >= uncompressed_size)
{
memcpy (buffer, input_buffer, uncompressed_size);
sec->compress_status = COMPRESS_SECTION_NONE;
@Alan: Can you please help us?
Thanks,
Martin
next prev parent reply other threads:[~2022-10-14 8:26 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-01 6:20 Fangrui Song
2022-10-06 2:20 ` Fangrui Song
2022-10-20 8:04 ` Martin Liška
2022-10-20 8:05 ` Martin Liška
2022-10-13 11:38 ` Martin Liška
2022-10-14 3:35 ` Fangrui Song
2022-10-14 7:47 ` Martin Liška
2022-10-14 8:26 ` Martin Liška [this message]
2022-10-14 11:28 ` Alan Modra
2022-10-14 12:09 ` Alan Modra
2022-10-16 4:42 ` Alan Modra
2022-10-16 20:46 ` Fangrui Song
2022-10-21 9:16 ` Alan Modra
2022-10-17 7:52 ` Martin Liška
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5eeed442-a153-5805-eae9-44b006506fd1@suse.cz \
--to=mliska@suse.cz \
--cc=amodra@gmail.com \
--cc=binutils@sourceware.org \
--cc=maskray@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).