public inbox for elfutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH][RFC] readelf: partial support of ZSTD compression
@ 2022-10-24 11:09 Martin Liška
  2022-10-24 11:41 ` Dmitry V. Levin
  0 siblings, 1 reply; 25+ messages in thread
From: Martin Liška @ 2022-10-24 11:09 UTC (permalink / raw)
  To: elfutils-devel; +Cc: Mark Wielaard, Fangrui Song

Support decompression of ZSTD sections and add support
for it when -SWz is used:

...
[30] .debug_abbrev        PROGBITS     0000000000000000 00001f9d 00000168  0 C      0   0  1
     [ELF ZSTD (2) 000002fc  1]
...

One TODO I see is that:
+libelf_so_LDLIBS = $(libelf_so_DEPS) -lz -lzstd

should be conditional based on HAVE_ZSTD. But I don't know how to do that?

And the second part should cover elfcompress where it should support ZSTD compression,
leaving that for now.

Martin

libelf/ChangeLog:

	* Makefile.am: Add -lzstd.
	* elf.h (ELFCOMPRESS_ZSTD): Add new value.
	* elf_compress.c (__libelf_decompress): Dispatch based
	on the compression algorithm.
	(__libelf_decompress_zlib): New.
	(__libelf_decompress_zstd): New.
	(__libelf_decompress_elf): Pass type of compression to
	__libelf_decompress.
	* elf_compress_gnu.c (elf_compress_gnu): Use ELFCOMPRESS_ZLIB
	as .z* sections can be only compressed with ZLIB.
	* libelfP.h (__libelf_decompress): Change signature.

m4/ChangeLog:

	* zstd.m4: New file.

src/ChangeLog:

	* readelf.c (elf_ch_type_name): Use switch and support ZSTD.
---
 libelf/Makefile.am        |  2 +-
 libelf/elf.h              |  3 +++
 libelf/elf_compress.c     | 56 ++++++++++++++++++++++++++++++++++++---
 libelf/elf_compress_gnu.c |  2 +-
 libelf/libelfP.h          |  2 +-
 m4/zstd.m4                | 23 ++++++++++++++++
 src/readelf.c             | 18 ++++++++-----
 7 files changed, 93 insertions(+), 13 deletions(-)
 create mode 100644 m4/zstd.m4

diff --git a/libelf/Makefile.am b/libelf/Makefile.am
index 560ed45f..33f3a0a1 100644
--- a/libelf/Makefile.am
+++ b/libelf/Makefile.am
@@ -106,7 +106,7 @@ libelf_pic_a_SOURCES =
 am_libelf_pic_a_OBJECTS = $(libelf_a_SOURCES:.c=.os)
 
 libelf_so_DEPS = ../lib/libeu.a
-libelf_so_LDLIBS = $(libelf_so_DEPS) -lz
+libelf_so_LDLIBS = $(libelf_so_DEPS) -lz -lzstd
 if USE_LOCKS
 libelf_so_LDLIBS += -lpthread
 endif
diff --git a/libelf/elf.h b/libelf/elf.h
index 02a1b3f5..f0f0ec7d 100644
--- a/libelf/elf.h
+++ b/libelf/elf.h
@@ -506,6 +506,9 @@ typedef struct
 
 /* Legal values for ch_type (compression algorithm).  */
 #define ELFCOMPRESS_ZLIB	1	   /* ZLIB/DEFLATE algorithm.  */
+#define ELFCOMPRESS_ZSTD	2	   /* Compressed with zstd  */
+					   /* (see http://www.zstandard.org). */
+
 #define ELFCOMPRESS_LOOS	0x60000000 /* Start of OS-specific.  */
 #define ELFCOMPRESS_HIOS	0x6fffffff /* End of OS-specific.  */
 #define ELFCOMPRESS_LOPROC	0x70000000 /* Start of processor-specific.  */
diff --git a/libelf/elf_compress.c b/libelf/elf_compress.c
index d7f53af2..62b41b20 100644
--- a/libelf/elf_compress.c
+++ b/libelf/elf_compress.c
@@ -39,6 +39,10 @@
 #include <string.h>
 #include <zlib.h>
 
+#ifdef USE_ZSTD
+#include <zstd.h>
+#endif
+
 /* Cleanup and return result.  Don't leak memory.  */
 static void *
 do_deflate_cleanup (void *result, z_stream *z, void *out_buf,
@@ -207,7 +211,7 @@ __libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
 
 void *
 internal_function
-__libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
+__libelf_decompress_zlib (void *buf_in, size_t size_in, size_t size_out)
 {
   /* Catch highly unlikely compression ratios so we don't allocate
      some giant amount of memory for nothing. The max compression
@@ -260,6 +264,50 @@ __libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
   return buf_out;
 }
 
+#ifdef USE_ZSTD
+void *
+internal_function
+__libelf_decompress_zstd (void *buf_in, size_t size_in, size_t size_out)
+{
+  /* Malloc might return NULL when requestion zero size.  This is highly
+     unlikely, it would only happen when the compression was forced.
+     But we do need a non-NULL buffer to return and set as result.
+     Just make sure to always allocate at least 1 byte.  */
+  void *buf_out = malloc (size_out ?: 1);
+  if (unlikely (buf_out == NULL))
+    {
+      __libelf_seterrno (ELF_E_NOMEM);
+      return NULL;
+    }
+
+  size_t ret = ZSTD_decompress (buf_out, size_out, buf_in, size_in);
+  if (ZSTD_isError (ret))
+    {
+      free (buf_out);
+      __libelf_seterrno (ELF_E_DECOMPRESS_ERROR);
+      return NULL;
+    }
+  else
+    return buf_out;
+}
+#endif
+
+void *
+internal_function
+__libelf_decompress (int chtype, void *buf_in, size_t size_in, size_t size_out)
+{
+  if (chtype == ELFCOMPRESS_ZLIB)
+    return __libelf_decompress_zlib (buf_in, size_in, size_out);
+  else
+    {
+#ifdef USE_ZSTD
+    return __libelf_decompress_zstd (buf_in, size_in, size_out);
+#else
+    return 0;
+#endif
+    }
+}
+
 void *
 internal_function
 __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
@@ -268,7 +316,7 @@ __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
   if (gelf_getchdr (scn, &chdr) == NULL)
     return NULL;
 
-  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
+  if (chdr.ch_type != ELFCOMPRESS_ZLIB && chdr.ch_type != ELFCOMPRESS_ZSTD)
     {
       __libelf_seterrno (ELF_E_UNKNOWN_COMPRESSION_TYPE);
       return NULL;
@@ -295,7 +343,9 @@ __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
 		  ? sizeof (Elf32_Chdr) : sizeof (Elf64_Chdr));
   size_t size_in = data->d_size - hsize;
   void *buf_in = data->d_buf + hsize;
-  void *buf_out = __libelf_decompress (buf_in, size_in, chdr.ch_size);
+  void *buf_out
+    = __libelf_decompress (chdr.ch_type, buf_in, size_in, chdr.ch_size);
+
   *size_out = chdr.ch_size;
   *addralign = chdr.ch_addralign;
   return buf_out;
diff --git a/libelf/elf_compress_gnu.c b/libelf/elf_compress_gnu.c
index 3d2977e7..be9e990e 100644
--- a/libelf/elf_compress_gnu.c
+++ b/libelf/elf_compress_gnu.c
@@ -178,7 +178,7 @@ elf_compress_gnu (Elf_Scn *scn, int inflate, unsigned int flags)
       size_t size = gsize;
       size_t size_in = data->d_size - hsize;
       void *buf_in = data->d_buf + hsize;
-      void *buf_out = __libelf_decompress (buf_in, size_in, size);
+      void *buf_out = __libelf_decompress (ELFCOMPRESS_ZLIB, buf_in, size_in, size);
       if (buf_out == NULL)
 	return -1;
 
diff --git a/libelf/libelfP.h b/libelf/libelfP.h
index d88a613c..ab82357c 100644
--- a/libelf/libelfP.h
+++ b/libelf/libelfP.h
@@ -577,7 +577,7 @@ extern void * __libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
 				 size_t *size, bool force)
      internal_function;
 
-extern void * __libelf_decompress (void *buf_in, size_t size_in,
+extern void * __libelf_decompress (int chtype, void *buf_in, size_t size_in,
 				   size_t size_out) internal_function;
 extern void * __libelf_decompress_elf (Elf_Scn *scn,
 				       size_t *size_out, size_t *addralign)
diff --git a/m4/zstd.m4 b/m4/zstd.m4
new file mode 100644
index 00000000..6da4db68
--- /dev/null
+++ b/m4/zstd.m4
@@ -0,0 +1,23 @@
+dnl Copyright (C) 2022 Free Software Foundation, Inc.
+dnl This file is free software, distributed under the terms of the GNU
+dnl General Public License.  As a special exception to the GNU General
+dnl Public License, this file may be distributed as part of a program
+dnl that contains a configuration script generated by Autoconf, under
+dnl the same distribution terms as the rest of that program.
+
+dnl Enable features using the zstd library.
+AC_DEFUN([AC_ZSTD], [
+AC_ARG_WITH(zstd,
+  [AS_HELP_STRING([--with-zstd], [support zstd compressed debug sections (default=auto)])],
+  [], [with_zstd=auto])
+
+AS_IF([test "$with_zstd" != no],
+  [PKG_CHECK_MODULES(ZSTD, [libzstd], [
+    AC_DEFINE(HAVE_ZSTD, 1, [Define to 1 if zstd is enabled.])
+  ], [
+    if test "$with_zstd" = yes; then
+      AC_MSG_ERROR([--with-zstd was given, but pkgconfig/libzstd.pc is not found])
+    fi
+  ])
+  ])
+])
diff --git a/src/readelf.c b/src/readelf.c
index a206e60e..1af20e35 100644
--- a/src/readelf.c
+++ b/src/readelf.c
@@ -1229,13 +1229,17 @@ get_visibility_type (int value)
 static const char *
 elf_ch_type_name (unsigned int code)
 {
-  if (code == 0)
-    return "NONE";
-
-  if (code == ELFCOMPRESS_ZLIB)
-    return "ZLIB";
-
-  return "UNKNOWN";
+  switch (code)
+    {
+    case 0:
+      return "NONE";
+    case ELFCOMPRESS_ZLIB:
+      return "ZLIB";
+    case ELFCOMPRESS_ZSTD:
+      return "ZSTD";
+    default:
+      return "UNKNOWN";
+    }
 }
 
 /* Print the section headers.  */
-- 
2.38.0


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH][RFC] readelf: partial support of ZSTD compression
  2022-10-24 11:09 [PATCH][RFC] readelf: partial support of ZSTD compression Martin Liška
@ 2022-10-24 11:41 ` Dmitry V. Levin
  2022-10-24 12:17   ` Martin Liška
  0 siblings, 1 reply; 25+ messages in thread
From: Dmitry V. Levin @ 2022-10-24 11:41 UTC (permalink / raw)
  To: Martin Liška; +Cc: elfutils-devel, Mark Wielaard, Fangrui Song

On Mon, Oct 24, 2022 at 01:09:59PM +0200, Martin Liška wrote:
[...]
> One TODO I see is that:
> +libelf_so_LDLIBS = $(libelf_so_DEPS) -lz -lzstd
> 
> should be conditional based on HAVE_ZSTD. But I don't know how to do that?

I suppose you're talking about libzstd_LIBS.

[...]
> diff --git a/m4/zstd.m4 b/m4/zstd.m4
> new file mode 100644
> index 00000000..6da4db68
> --- /dev/null
> +++ b/m4/zstd.m4
> @@ -0,0 +1,23 @@
> +dnl Copyright (C) 2022 Free Software Foundation, Inc.
> +dnl This file is free software, distributed under the terms of the GNU
> +dnl General Public License.  As a special exception to the GNU General
> +dnl Public License, this file may be distributed as part of a program
> +dnl that contains a configuration script generated by Autoconf, under
> +dnl the same distribution terms as the rest of that program.
> +
> +dnl Enable features using the zstd library.
> +AC_DEFUN([AC_ZSTD], [
> +AC_ARG_WITH(zstd,
> +  [AS_HELP_STRING([--with-zstd], [support zstd compressed debug sections (default=auto)])],
> +  [], [with_zstd=auto])

Where does this code come from?
I though the "AC_" prefix is reserved for the GNU Autoconf.
Also, looks like it would be more appropriate to call it --enable-zstd
rather than --with-zstd.


-- 
ldv

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH][RFC] readelf: partial support of ZSTD compression
  2022-10-24 11:41 ` Dmitry V. Levin
@ 2022-10-24 12:17   ` Martin Liška
  2022-10-24 16:48     ` Dmitry V. Levin
  0 siblings, 1 reply; 25+ messages in thread
From: Martin Liška @ 2022-10-24 12:17 UTC (permalink / raw)
  To: Dmitry V. Levin; +Cc: elfutils-devel, Mark Wielaard, Fangrui Song

On 10/24/22 13:41, Dmitry V. Levin wrote:
> On Mon, Oct 24, 2022 at 01:09:59PM +0200, Martin Liška wrote:
> [...]
>> One TODO I see is that:
>> +libelf_so_LDLIBS = $(libelf_so_DEPS) -lz -lzstd
>>
>> should be conditional based on HAVE_ZSTD. But I don't know how to do that?
> 
> I suppose you're talking about libzstd_LIBS.

Hm, can't see it after autoreconf -fi and ./configure.

> 
> [...]
>> diff --git a/m4/zstd.m4 b/m4/zstd.m4
>> new file mode 100644
>> index 00000000..6da4db68
>> --- /dev/null
>> +++ b/m4/zstd.m4
>> @@ -0,0 +1,23 @@
>> +dnl Copyright (C) 2022 Free Software Foundation, Inc.
>> +dnl This file is free software, distributed under the terms of the GNU
>> +dnl General Public License.  As a special exception to the GNU General
>> +dnl Public License, this file may be distributed as part of a program
>> +dnl that contains a configuration script generated by Autoconf, under
>> +dnl the same distribution terms as the rest of that program.
>> +
>> +dnl Enable features using the zstd library.
>> +AC_DEFUN([AC_ZSTD], [
>> +AC_ARG_WITH(zstd,
>> +  [AS_HELP_STRING([--with-zstd], [support zstd compressed debug sections (default=auto)])],
>> +  [], [with_zstd=auto])
> 
> Where does this code come from?
> I though the "AC_" prefix is reserved for the GNU Autoconf.

It comes from binutils './config/zstd.m4' file.

> Also, looks like it would be more appropriate to call it --enable-zstd
> rather than --with-zstd.
> 

Ah, I see.

Thanks,
Martin


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH][RFC] readelf: partial support of ZSTD compression
  2022-10-24 12:17   ` Martin Liška
@ 2022-10-24 16:48     ` Dmitry V. Levin
  2022-10-24 18:16       ` Martin Liška
  0 siblings, 1 reply; 25+ messages in thread
From: Dmitry V. Levin @ 2022-10-24 16:48 UTC (permalink / raw)
  To: Martin Liška; +Cc: elfutils-devel, Mark Wielaard, Fangrui Song

On Mon, Oct 24, 2022 at 02:17:17PM +0200, Martin Liška wrote:
> On 10/24/22 13:41, Dmitry V. Levin wrote:
> > On Mon, Oct 24, 2022 at 01:09:59PM +0200, Martin Liška wrote:
> > [...]
> >> One TODO I see is that:
> >> +libelf_so_LDLIBS = $(libelf_so_DEPS) -lz -lzstd
> >>
> >> should be conditional based on HAVE_ZSTD. But I don't know how to do that?
> > 
> > I suppose you're talking about libzstd_LIBS.
> 
> Hm, can't see it after autoreconf -fi and ./configure.

That's because you do
PKG_CHECK_MODULES(ZSTD, [libzstd], ...)
and this defines ZSTD_CFLAGS and ZSTD_LIBS instead of libzstd_CFLAGS
and libzstd_LIBS because PKG_CHECK_MODULES() uses its first argument
as the prefix for these variables.


-- 
ldv

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH][RFC] readelf: partial support of ZSTD compression
  2022-10-24 16:48     ` Dmitry V. Levin
@ 2022-10-24 18:16       ` Martin Liška
  2022-10-28 22:21         ` Mark Wielaard
  0 siblings, 1 reply; 25+ messages in thread
From: Martin Liška @ 2022-10-24 18:16 UTC (permalink / raw)
  To: Dmitry V. Levin; +Cc: elfutils-devel, Mark Wielaard, Fangrui Song

[-- Attachment #1: Type: text/plain, Size: 993 bytes --]

On 10/24/22 18:48, Dmitry V. Levin wrote:
> On Mon, Oct 24, 2022 at 02:17:17PM +0200, Martin Liška wrote:
>> On 10/24/22 13:41, Dmitry V. Levin wrote:
>>> On Mon, Oct 24, 2022 at 01:09:59PM +0200, Martin Liška wrote:
>>> [...]
>>>> One TODO I see is that:
>>>> +libelf_so_LDLIBS = $(libelf_so_DEPS) -lz -lzstd
>>>>
>>>> should be conditional based on HAVE_ZSTD. But I don't know how to do that?
>>>
>>> I suppose you're talking about libzstd_LIBS.
>>
>> Hm, can't see it after autoreconf -fi and ./configure.
> 
> That's because you do
> PKG_CHECK_MODULES(ZSTD, [libzstd], ...)
> and this defines ZSTD_CFLAGS and ZSTD_LIBS instead of libzstd_CFLAGS
> and libzstd_LIBS because PKG_CHECK_MODULES() uses its first argument
> as the prefix for these variables.
> 
> 

Thank you. Apparently, I collided with the existing:
eu_ZIPLIB(zstd,ZSTD,zstd,ZSTD_decompress,[ZSTD (zst)])

Anyway, I'm sending V2 that works fine --with-zstd and --without-zstd as expected.

Ready for master?

Thanks,
Martin

[-- Attachment #2: 0001-readelf-partial-support-of-ZSTD-compression.patch --]
[-- Type: text/x-patch, Size: 8117 bytes --]

From 4aea412783b9b0dcaf0f887947bf2e8ee6c5368b Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Mon, 24 Oct 2022 11:53:13 +0200
Subject: [PATCH] readelf: partial support of ZSTD compression

Support decompression of ZSTD sections and add support
for it when -SWz is used:

...
[30] .debug_abbrev        PROGBITS     0000000000000000 00001f9d 00000168  0 C      0   0  1
     [ELF ZSTD (2) 000002fc  1]
...

ChangeLog:

	* configure.ac: Add zstd_LIBS.

libelf/ChangeLog:

	* Makefile.am: Use zstd_LIBS.
	* elf.h (ELFCOMPRESS_ZSTD): Add new value.
	* elf_compress.c (__libelf_decompress): Dispatch based
	on the compression algorithm.
	(__libelf_decompress_zlib): New.
	(__libelf_decompress_zstd): New.
	(__libelf_decompress_elf): Pass type of compression to
	__libelf_decompress.
	* elf_compress_gnu.c (elf_compress_gnu): Use ELFCOMPRESS_ZLIB
	as .z* sections can be only compressed with ZLIB.
	* libelfP.h (__libelf_decompress): Change signature.

src/ChangeLog:

	* readelf.c (elf_ch_type_name): Use switch and support ZSTD.
---
 configure.ac              |  8 +++---
 libelf/Makefile.am        |  2 +-
 libelf/elf.h              |  3 +++
 libelf/elf_compress.c     | 56 ++++++++++++++++++++++++++++++++++++---
 libelf/elf_compress_gnu.c |  2 +-
 libelf/libelfP.h          |  2 +-
 src/readelf.c             | 18 ++++++++-----
 7 files changed, 75 insertions(+), 16 deletions(-)

diff --git a/configure.ac b/configure.ac
index 03b67a9d..803876e2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -410,6 +410,11 @@ dnl Test for bzlib and xz/lzma/zstd, gives BZLIB/LZMALIB/ZSTD .am
 dnl conditional and config.h USE_BZLIB/USE_LZMALIB/USE_ZSTD #define.
 save_LIBS="$LIBS"
 LIBS=
+eu_ZIPLIB(zstd,ZSTD,zstd,ZSTD_decompress,[ZSTD (zst)])
+AS_IF([test "x$with_zstd" = xyes], [LIBZSTD="libzstd"], [LIBLZSTD=""])
+AC_SUBST([LIBZSTD])
+zstd_LIBS="$LIBS"
+AC_SUBST([zstd_LIBS])
 eu_ZIPLIB(bzlib,BZLIB,bz2,BZ2_bzdopen,bzip2)
 # We need this since bzip2 doesn't have a pkgconfig file.
 BZ2_LIB="$LIBS"
@@ -417,9 +422,6 @@ AC_SUBST([BZ2_LIB])
 eu_ZIPLIB(lzma,LZMA,lzma,lzma_auto_decoder,[LZMA (xz)])
 AS_IF([test "x$with_lzma" = xyes], [LIBLZMA="liblzma"], [LIBLZMA=""])
 AC_SUBST([LIBLZMA])
-eu_ZIPLIB(zstd,ZSTD,zstd,ZSTD_decompress,[ZSTD (zst)])
-AS_IF([test "x$with_zstd" = xyes], [LIBZSTD="libzstd"], [LIBLZSTD=""])
-AC_SUBST([LIBZSTD])
 zip_LIBS="$LIBS"
 LIBS="$save_LIBS"
 AC_SUBST([zip_LIBS])
diff --git a/libelf/Makefile.am b/libelf/Makefile.am
index 560ed45f..24c25cf8 100644
--- a/libelf/Makefile.am
+++ b/libelf/Makefile.am
@@ -106,7 +106,7 @@ libelf_pic_a_SOURCES =
 am_libelf_pic_a_OBJECTS = $(libelf_a_SOURCES:.c=.os)
 
 libelf_so_DEPS = ../lib/libeu.a
-libelf_so_LDLIBS = $(libelf_so_DEPS) -lz
+libelf_so_LDLIBS = $(libelf_so_DEPS) -lz $(zstd_LIBS)
 if USE_LOCKS
 libelf_so_LDLIBS += -lpthread
 endif
diff --git a/libelf/elf.h b/libelf/elf.h
index 02a1b3f5..f0f0ec7d 100644
--- a/libelf/elf.h
+++ b/libelf/elf.h
@@ -506,6 +506,9 @@ typedef struct
 
 /* Legal values for ch_type (compression algorithm).  */
 #define ELFCOMPRESS_ZLIB	1	   /* ZLIB/DEFLATE algorithm.  */
+#define ELFCOMPRESS_ZSTD	2	   /* Compressed with zstd  */
+					   /* (see http://www.zstandard.org). */
+
 #define ELFCOMPRESS_LOOS	0x60000000 /* Start of OS-specific.  */
 #define ELFCOMPRESS_HIOS	0x6fffffff /* End of OS-specific.  */
 #define ELFCOMPRESS_LOPROC	0x70000000 /* Start of processor-specific.  */
diff --git a/libelf/elf_compress.c b/libelf/elf_compress.c
index d7f53af2..62b41b20 100644
--- a/libelf/elf_compress.c
+++ b/libelf/elf_compress.c
@@ -39,6 +39,10 @@
 #include <string.h>
 #include <zlib.h>
 
+#ifdef USE_ZSTD
+#include <zstd.h>
+#endif
+
 /* Cleanup and return result.  Don't leak memory.  */
 static void *
 do_deflate_cleanup (void *result, z_stream *z, void *out_buf,
@@ -207,7 +211,7 @@ __libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
 
 void *
 internal_function
-__libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
+__libelf_decompress_zlib (void *buf_in, size_t size_in, size_t size_out)
 {
   /* Catch highly unlikely compression ratios so we don't allocate
      some giant amount of memory for nothing. The max compression
@@ -260,6 +264,50 @@ __libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
   return buf_out;
 }
 
+#ifdef USE_ZSTD
+void *
+internal_function
+__libelf_decompress_zstd (void *buf_in, size_t size_in, size_t size_out)
+{
+  /* Malloc might return NULL when requestion zero size.  This is highly
+     unlikely, it would only happen when the compression was forced.
+     But we do need a non-NULL buffer to return and set as result.
+     Just make sure to always allocate at least 1 byte.  */
+  void *buf_out = malloc (size_out ?: 1);
+  if (unlikely (buf_out == NULL))
+    {
+      __libelf_seterrno (ELF_E_NOMEM);
+      return NULL;
+    }
+
+  size_t ret = ZSTD_decompress (buf_out, size_out, buf_in, size_in);
+  if (ZSTD_isError (ret))
+    {
+      free (buf_out);
+      __libelf_seterrno (ELF_E_DECOMPRESS_ERROR);
+      return NULL;
+    }
+  else
+    return buf_out;
+}
+#endif
+
+void *
+internal_function
+__libelf_decompress (int chtype, void *buf_in, size_t size_in, size_t size_out)
+{
+  if (chtype == ELFCOMPRESS_ZLIB)
+    return __libelf_decompress_zlib (buf_in, size_in, size_out);
+  else
+    {
+#ifdef USE_ZSTD
+    return __libelf_decompress_zstd (buf_in, size_in, size_out);
+#else
+    return 0;
+#endif
+    }
+}
+
 void *
 internal_function
 __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
@@ -268,7 +316,7 @@ __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
   if (gelf_getchdr (scn, &chdr) == NULL)
     return NULL;
 
-  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
+  if (chdr.ch_type != ELFCOMPRESS_ZLIB && chdr.ch_type != ELFCOMPRESS_ZSTD)
     {
       __libelf_seterrno (ELF_E_UNKNOWN_COMPRESSION_TYPE);
       return NULL;
@@ -295,7 +343,9 @@ __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
 		  ? sizeof (Elf32_Chdr) : sizeof (Elf64_Chdr));
   size_t size_in = data->d_size - hsize;
   void *buf_in = data->d_buf + hsize;
-  void *buf_out = __libelf_decompress (buf_in, size_in, chdr.ch_size);
+  void *buf_out
+    = __libelf_decompress (chdr.ch_type, buf_in, size_in, chdr.ch_size);
+
   *size_out = chdr.ch_size;
   *addralign = chdr.ch_addralign;
   return buf_out;
diff --git a/libelf/elf_compress_gnu.c b/libelf/elf_compress_gnu.c
index 3d2977e7..be9e990e 100644
--- a/libelf/elf_compress_gnu.c
+++ b/libelf/elf_compress_gnu.c
@@ -178,7 +178,7 @@ elf_compress_gnu (Elf_Scn *scn, int inflate, unsigned int flags)
       size_t size = gsize;
       size_t size_in = data->d_size - hsize;
       void *buf_in = data->d_buf + hsize;
-      void *buf_out = __libelf_decompress (buf_in, size_in, size);
+      void *buf_out = __libelf_decompress (ELFCOMPRESS_ZLIB, buf_in, size_in, size);
       if (buf_out == NULL)
 	return -1;
 
diff --git a/libelf/libelfP.h b/libelf/libelfP.h
index d88a613c..ab82357c 100644
--- a/libelf/libelfP.h
+++ b/libelf/libelfP.h
@@ -577,7 +577,7 @@ extern void * __libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
 				 size_t *size, bool force)
      internal_function;
 
-extern void * __libelf_decompress (void *buf_in, size_t size_in,
+extern void * __libelf_decompress (int chtype, void *buf_in, size_t size_in,
 				   size_t size_out) internal_function;
 extern void * __libelf_decompress_elf (Elf_Scn *scn,
 				       size_t *size_out, size_t *addralign)
diff --git a/src/readelf.c b/src/readelf.c
index a206e60e..1af20e35 100644
--- a/src/readelf.c
+++ b/src/readelf.c
@@ -1229,13 +1229,17 @@ get_visibility_type (int value)
 static const char *
 elf_ch_type_name (unsigned int code)
 {
-  if (code == 0)
-    return "NONE";
-
-  if (code == ELFCOMPRESS_ZLIB)
-    return "ZLIB";
-
-  return "UNKNOWN";
+  switch (code)
+    {
+    case 0:
+      return "NONE";
+    case ELFCOMPRESS_ZLIB:
+      return "ZLIB";
+    case ELFCOMPRESS_ZSTD:
+      return "ZSTD";
+    default:
+      return "UNKNOWN";
+    }
 }
 
 /* Print the section headers.  */
-- 
2.38.0


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH][RFC] readelf: partial support of ZSTD compression
  2022-10-24 18:16       ` Martin Liška
@ 2022-10-28 22:21         ` Mark Wielaard
  2022-11-28 13:16           ` Martin Liška
  0 siblings, 1 reply; 25+ messages in thread
From: Mark Wielaard @ 2022-10-28 22:21 UTC (permalink / raw)
  To: Martin Liška; +Cc: Dmitry V. Levin, elfutils-devel, Fangrui Song

Hi Martin,

On Mon, Oct 24, 2022 at 08:16:09PM +0200, Martin Liška wrote:
> Anyway, I'm sending V2 that works fine --with-zstd and --without-zstd as expected.
> 
> Ready for master?

The decompression part looks good. Few small nitpicks below.

Although I like to also have compression working.  Then we can also
add support to src/elfcompress, which makes for a good testcase. See
tests/run-compress.sh (which has found some subtle memory issues when
run under valgrind).

> From 4aea412783b9b0dcaf0f887947bf2e8ee6c5368b Mon Sep 17 00:00:00 2001
> From: Martin Liska <mliska@suse.cz>
> Date: Mon, 24 Oct 2022 11:53:13 +0200
> Subject: [PATCH] readelf: partial support of ZSTD compression
> 
> Support decompression of ZSTD sections and add support
> for it when -SWz is used:
> 
> ...
> [30] .debug_abbrev        PROGBITS     0000000000000000 00001f9d 00000168  0 C      0   0  1
>      [ELF ZSTD (2) 000002fc  1]
> ...

And for this it would be nice to have a tests/run-readelf-z.sh testcase.

> ChangeLog:
> 
> 	* configure.ac: Add zstd_LIBS.
> 
> libelf/ChangeLog:
> 
> 	* Makefile.am: Use zstd_LIBS.
> 	* elf.h (ELFCOMPRESS_ZSTD): Add new value.
> 	* elf_compress.c (__libelf_decompress): Dispatch based
> 	on the compression algorithm.
> 	(__libelf_decompress_zlib): New.
> 	(__libelf_decompress_zstd): New.
> 	(__libelf_decompress_elf): Pass type of compression to
> 	__libelf_decompress.
> 	* elf_compress_gnu.c (elf_compress_gnu): Use ELFCOMPRESS_ZLIB
> 	as .z* sections can be only compressed with ZLIB.
> 	* libelfP.h (__libelf_decompress): Change signature.
> 
> src/ChangeLog:
> 
> 	* readelf.c (elf_ch_type_name): Use switch and support ZSTD.

> diff --git a/libelf/elf.h b/libelf/elf.h
> index 02a1b3f5..f0f0ec7d 100644
> --- a/libelf/elf.h
> +++ b/libelf/elf.h

We normally sync elf.h from glibc. I'll do that in a second.

> +#ifdef USE_ZSTD
> +void *
> +internal_function
> +__libelf_decompress_zstd (void *buf_in, size_t size_in, size_t size_out)
> +{
> +  /* Malloc might return NULL when requestion zero size.  This is highly

requesting

> +     unlikely, it would only happen when the compression was forced.
> +     But we do need a non-NULL buffer to return and set as result.
> +     Just make sure to always allocate at least 1 byte.  */

> +void *
> +internal_function
> +__libelf_decompress (int chtype, void *buf_in, size_t size_in, size_t size_out)
> +{
> +  if (chtype == ELFCOMPRESS_ZLIB)
> +    return __libelf_decompress_zlib (buf_in, size_in, size_out);
> +  else
> +    {
> +#ifdef USE_ZSTD
> +    return __libelf_decompress_zstd (buf_in, size_in, size_out);
> +#else
> +    return 0;
> +#endif

return NULL for consistency?

> +    }
> +}
> +
>  void *
>  internal_function
>  __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
> @@ -268,7 +316,7 @@ __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
>    if (gelf_getchdr (scn, &chdr) == NULL)
>      return NULL;
>  
> -  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
> +  if (chdr.ch_type != ELFCOMPRESS_ZLIB && chdr.ch_type != ELFCOMPRESS_ZSTD)
>      {

What about the ifndef USE_ZSTD case? Should this then not recognize
ELFCOMPRESS_ZSTD?

Thanks,

Mark


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH][RFC] readelf: partial support of ZSTD compression
  2022-10-28 22:21         ` Mark Wielaard
@ 2022-11-28 13:16           ` Martin Liška
  2022-11-28 22:29             ` Mark Wielaard
  0 siblings, 1 reply; 25+ messages in thread
From: Martin Liška @ 2022-11-28 13:16 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: Dmitry V. Levin, elfutils-devel, Fangrui Song

[-- Attachment #1: Type: text/plain, Size: 935 bytes --]

On 10/29/22 00:21, Mark Wielaard wrote:
> Although I like to also have compression working.  Then we can also
> add support to src/elfcompress, which makes for a good testcase. See
> tests/run-compress.sh (which has found some subtle memory issues when
> run under valgrind).

Hi.

All right, so I'm preparing a full support for ZSTD (both compression and compression)
and I noticed a refactoring would be handy for compress_section function and callers
of the function.

Note right now, there are basically 3 compression types and process_file
function handles basically all combinations of these (3 x 3 options), while adding ZSTD
support would make it even more complicated. However, ZSTD will behave very similar to ZLIB
(not zlib-gnu), except a different algorithm will be used. Plus, in order to distinguish
ZLIB from ZSTD, we need to read GElf_Chdr.

So what do you think about the refactoring as the first step?

Cheers,
Martin

[-- Attachment #2: 0001-Refactor-elf_compare.patch --]
[-- Type: text/x-patch, Size: 12784 bytes --]

From 45b68678cb4a7135532b0f6c5e667ea3a06a0c35 Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Mon, 28 Nov 2022 14:10:36 +0100
Subject: [PATCH] Refactor elf_compare

---
 src/elfcompress.c | 164 ++++++++++++++++++++++++++++------------------
 1 file changed, 101 insertions(+), 63 deletions(-)

diff --git a/src/elfcompress.c b/src/elfcompress.c
index 51ff69d2..16898f6a 100644
--- a/src/elfcompress.c
+++ b/src/elfcompress.c
@@ -48,13 +48,20 @@ static bool force = false;
 static bool permissive = false;
 static const char *foutput = NULL;
 
-#define T_UNSET 0
-#define T_DECOMPRESS 1    /* none */
-#define T_COMPRESS_ZLIB 2 /* zlib */
-#define T_COMPRESS_GNU  3 /* zlib-gnu */
+/* Compression algorithm, where all legal values for ch_type
+   (compression algorithm) do match the following enum.  */
+enum ch_type
+{
+  UNSET = -1,
+  NONE,
+  ZLIB,
+
+  ZLIB_GNU = 1 << 16
+};
+
 #define WORD_BITS (8U * sizeof (unsigned int))
 
-static int type = T_UNSET;
+static enum ch_type type = UNSET;
 
 struct section_pattern
 {
@@ -120,22 +127,22 @@ parse_opt (int key, char *arg __attribute__ ((unused)),
       break;
 
     case 't':
-      if (type != T_UNSET)
+      if (type != UNSET)
 	argp_error (state, N_("-t option specified twice"));
 
       if (strcmp ("none", arg) == 0)
-	type = T_DECOMPRESS;
+	type = NONE;
       else if (strcmp ("zlib", arg) == 0 || strcmp ("zlib-gabi", arg) == 0)
-	type = T_COMPRESS_ZLIB;
+	type = ZLIB;
       else if (strcmp ("zlib-gnu", arg) == 0 || strcmp ("gnu", arg) == 0)
-	type = T_COMPRESS_GNU;
+	type = ZLIB_GNU;
       else
 	argp_error (state, N_("unknown compression type '%s'"), arg);
       break;
 
     case ARGP_KEY_SUCCESS:
-      if (type == T_UNSET)
-	type = T_COMPRESS_ZLIB;
+      if (type == UNSET)
+	type = ZLIB;
       if (patterns == NULL)
 	add_pattern (".?(z)debug*");
       break;
@@ -198,14 +205,19 @@ setshdrstrndx (Elf *elf, GElf_Ehdr *ehdr, size_t ndx)
 static int
 compress_section (Elf_Scn *scn, size_t orig_size, const char *name,
 		  const char *newname, size_t ndx,
-		  bool gnu, bool compress, bool report_verbose)
+		  enum ch_type schtype, enum ch_type dchtype,
+		  bool report_verbose)
 {
+  /* We either compress or decompress.  */
+  assert (schtype == NONE || dchtype == NONE);
+  bool compress = dchtype != NONE;
+
   int res;
   unsigned int flags = compress && force ? ELF_CHF_FORCE : 0;
-  if (gnu)
+  if (schtype == ZLIB_GNU || dchtype == ZLIB_GNU)
     res = elf_compress_gnu (scn, compress ? 1 : 0, flags);
   else
-    res = elf_compress (scn, compress ? ELFCOMPRESS_ZLIB : 0, flags);
+    res = elf_compress (scn, dchtype, flags);
 
   if (res < 0)
     error (0, 0, "Couldn't decompress section [%zd] %s: %s",
@@ -446,20 +458,20 @@ process_file (const char *fname)
 
       if (section_name_matches (sname))
 	{
-	  if (!force && type == T_DECOMPRESS
+	  if (!force && type == NONE
 	      && (shdr->sh_flags & SHF_COMPRESSED) == 0
 	      && !startswith (sname, ".zdebug"))
 	    {
 	      if (verbose > 0)
 		printf ("[%zd] %s already decompressed\n", ndx, sname);
 	    }
-	  else if (!force && type == T_COMPRESS_ZLIB
+	  else if (!force && type == ZLIB
 		   && (shdr->sh_flags & SHF_COMPRESSED) != 0)
 	    {
 	      if (verbose > 0)
 		printf ("[%zd] %s already compressed\n", ndx, sname);
 	    }
-	  else if (!force && type == T_COMPRESS_GNU
+	  else if (!force && type == ZLIB_GNU
 		   && startswith (sname, ".zdebug"))
 	    {
 	      if (verbose > 0)
@@ -471,9 +483,9 @@ process_file (const char *fname)
 	      set_section (sections, ndx);
 	      /* Check if we might want to change this section name.  */
 	      if (! adjust_names
-		  && ((type != T_COMPRESS_GNU
+		  && ((type != ZLIB_GNU
 		       && startswith (sname, ".zdebug"))
-		      || (type == T_COMPRESS_GNU
+		      || (type == ZLIB_GNU
 			  && startswith (sname, ".debug"))))
 		adjust_names = true;
 
@@ -634,11 +646,11 @@ process_file (const char *fname)
      and keep track of whether or not to compress them (later in the
      fixup pass).  Also record the original size, so we can report the
      difference later when we do compress.  */
-  int shstrtab_compressed = T_UNSET;
+  enum ch_type shstrtab_compressed = UNSET;
   size_t shstrtab_size = 0;
   char *shstrtab_name = NULL;
   char *shstrtab_newname = NULL;
-  int symtab_compressed = T_UNSET;
+  enum ch_type symtab_compressed = UNSET;
   size_t symtab_size = 0;
   char *symtab_name = NULL;
   char *symtab_newname = NULL;
@@ -677,6 +689,32 @@ process_file (const char *fname)
 	     (de)compressed, invalidating the string pointers.  */
 	  sname = xstrdup (sname);
 
+	  /* Detect source compression that is how is the section compressed
+	     now.  */
+	  GElf_Chdr chdr;
+	  enum ch_type schtype = NONE;
+	  if ((shdr->sh_flags & SHF_COMPRESSED) != 0)
+	    {
+	      if (gelf_getchdr (scn, &chdr) != NULL)
+		{
+		  schtype = (enum ch_type)chdr.ch_type;
+		  if (schtype == NONE)
+		    {
+		      error (0, 0, "Compression type for section %zd"
+			     " can't be zero ", ndx);
+		      goto cleanup;
+		    }
+		}
+	      else
+		{
+		  error (0, 0, "Couldn't get chdr for section %zd", ndx);
+		  goto cleanup;
+		}
+	    }
+	  /* Set ZLIB compression manually for .zdebug* sections.  */
+	  else if (startswith (sname, ".zdebug"))
+	    schtype = ZLIB_GNU;
+
 	  /* We might want to decompress (and rename), but not
 	     compress during this pass since we might need the section
 	     data in later passes.  Skip those sections for now and
@@ -687,35 +725,32 @@ process_file (const char *fname)
 
 	  switch (type)
 	    {
-	    case T_DECOMPRESS:
-	      if ((shdr->sh_flags & SHF_COMPRESSED) != 0)
+	    case NONE:
+	      if (schtype != NONE)
 		{
+		  if (schtype == ZLIB_GNU)
+		    {
+		      snamebuf[0] = '.';
+		      strcpy (&snamebuf[1], &sname[2]);
+		      newname = snamebuf;
+		    }
 		  if (compress_section (scn, size, sname, NULL, ndx,
-					false, false, verbose > 0) < 0)
-		    goto cleanup;
-		}
-	      else if (startswith (sname, ".zdebug"))
-		{
-		  snamebuf[0] = '.';
-		  strcpy (&snamebuf[1], &sname[2]);
-		  newname = snamebuf;
-		  if (compress_section (scn, size, sname, newname, ndx,
-					true, false, verbose > 0) < 0)
+					schtype, NONE, verbose > 0) < 0)
 		    goto cleanup;
 		}
 	      else if (verbose > 0)
 		printf ("[%zd] %s already decompressed\n", ndx, sname);
 	      break;
 
-	    case T_COMPRESS_GNU:
+	    case ZLIB_GNU:
 	      if (startswith (sname, ".debug"))
 		{
-		  if ((shdr->sh_flags & SHF_COMPRESSED) != 0)
+		  if (schtype == ZLIB)
 		    {
 		      /* First decompress to recompress GNU style.
 			 Don't report even when verbose.  */
 		      if (compress_section (scn, size, sname, NULL, ndx,
-					    false, false, false) < 0)
+					    schtype, NONE, false) < 0)
 			goto cleanup;
 		    }
 
@@ -729,7 +764,7 @@ process_file (const char *fname)
 		      if (ndx == shdrstrndx)
 			{
 			  shstrtab_size = size;
-			  shstrtab_compressed = T_COMPRESS_GNU;
+			  shstrtab_compressed = ZLIB_GNU;
 			  if (shstrtab_name != NULL
 			      || shstrtab_newname != NULL)
 			    {
@@ -745,7 +780,7 @@ process_file (const char *fname)
 		      else
 			{
 			  symtab_size = size;
-			  symtab_compressed = T_COMPRESS_GNU;
+			  symtab_compressed = ZLIB_GNU;
 			  symtab_name = xstrdup (sname);
 			  symtab_newname = xstrdup (newname);
 			}
@@ -753,7 +788,7 @@ process_file (const char *fname)
 		  else
 		    {
 		      int result = compress_section (scn, size, sname, newname,
-						     ndx, true, true,
+						     ndx, NONE, type,
 						     verbose > 0);
 		      if (result < 0)
 			goto cleanup;
@@ -764,7 +799,7 @@ process_file (const char *fname)
 		}
 	      else if (verbose >= 0)
 		{
-		  if (startswith (sname, ".zdebug"))
+		  if (schtype == ZLIB_GNU)
 		    printf ("[%zd] %s unchanged, already GNU compressed\n",
 			    ndx, sname);
 		  else
@@ -773,15 +808,15 @@ process_file (const char *fname)
 		}
 	      break;
 
-	    case T_COMPRESS_ZLIB:
+	    case ZLIB:
 	      if ((shdr->sh_flags & SHF_COMPRESSED) == 0)
 		{
-		  if (startswith (sname, ".zdebug"))
+		  if (schtype == ZLIB_GNU)
 		    {
 		      /* First decompress to recompress zlib style.
 			 Don't report even when verbose.  */
 		      if (compress_section (scn, size, sname, NULL, ndx,
-					    true, false, false) < 0)
+					    schtype, NONE, false) < 0)
 			goto cleanup;
 
 		      snamebuf[0] = '.';
@@ -794,7 +829,7 @@ process_file (const char *fname)
 		      if (ndx == shdrstrndx)
 			{
 			  shstrtab_size = size;
-			  shstrtab_compressed = T_COMPRESS_ZLIB;
+			  shstrtab_compressed = ZLIB;
 			  if (shstrtab_name != NULL
 			      || shstrtab_newname != NULL)
 			    {
@@ -811,19 +846,22 @@ process_file (const char *fname)
 		      else
 			{
 			  symtab_size = size;
-			  symtab_compressed = T_COMPRESS_ZLIB;
+			  symtab_compressed = ZLIB;
 			  symtab_name = xstrdup (sname);
 			  symtab_newname = (newname == NULL
 					    ? NULL : xstrdup (newname));
 			}
 		    }
 		  else if (compress_section (scn, size, sname, newname, ndx,
-					     false, true, verbose > 0) < 0)
+					     NONE, type, verbose > 0) < 0)
 		    goto cleanup;
 		}
 	      else if (verbose > 0)
 		printf ("[%zd] %s already compressed\n", ndx, sname);
 	      break;
+
+	    case UNSET:
+	      break;
 	    }
 
 	  free (sname);
@@ -903,28 +941,28 @@ process_file (const char *fname)
 	      /* If the section is (still) compressed we'll need to
 		 uncompress it first to adjust the data, then
 		 recompress it in the fixup pass.  */
-	      if (symtab_compressed == T_UNSET)
+	      if (symtab_compressed == UNSET)
 		{
 		  size_t size = shdr->sh_size;
 		  if ((shdr->sh_flags == SHF_COMPRESSED) != 0)
 		    {
 		      /* Don't report the (internal) uncompression.  */
 		      if (compress_section (newscn, size, sname, NULL, ndx,
-					    false, false, false) < 0)
+					    ZLIB, NONE, false) < 0)
 			goto cleanup;
 
 		      symtab_size = size;
-		      symtab_compressed = T_COMPRESS_ZLIB;
+		      symtab_compressed = ZLIB;
 		    }
 		  else if (startswith (name, ".zdebug"))
 		    {
 		      /* Don't report the (internal) uncompression.  */
 		      if (compress_section (newscn, size, sname, NULL, ndx,
-					    true, false, false) < 0)
+					    ZLIB_GNU, NONE, false) < 0)
 			goto cleanup;
 
 		      symtab_size = size;
-		      symtab_compressed = T_COMPRESS_GNU;
+		      symtab_compressed = ZLIB_GNU;
 		    }
 		}
 
@@ -1037,7 +1075,7 @@ process_file (const char *fname)
 	 or if the section was already compressed (and the user didn't
 	 ask for decompression).  Note somewhat identical code for
 	 symtab below.  */
-      if (shstrtab_compressed == T_UNSET)
+      if (shstrtab_compressed == UNSET)
 	{
 	  /* The user didn't ask for compression, but maybe it was
 	     compressed in the original ELF file.  */
@@ -1067,18 +1105,18 @@ process_file (const char *fname)
 
 	  shstrtab_size = shdr->sh_size;
 	  if ((shdr->sh_flags & SHF_COMPRESSED) != 0)
-	    shstrtab_compressed = T_COMPRESS_ZLIB;
+	    shstrtab_compressed = ZLIB;
 	  else if (startswith (shstrtab_name, ".zdebug"))
-	    shstrtab_compressed = T_COMPRESS_GNU;
+	    shstrtab_compressed = ZLIB_GNU;
 	}
 
       /* Should we (re)compress?  */
-      if (shstrtab_compressed != T_UNSET)
+      if (shstrtab_compressed != UNSET)
 	{
 	  if (compress_section (scn, shstrtab_size, shstrtab_name,
 				shstrtab_newname, shdrstrndx,
-				shstrtab_compressed == T_COMPRESS_GNU,
-				true, verbose > 0) < 0)
+				NONE, shstrtab_compressed,
+				verbose > 0) < 0)
 	    goto cleanup;
 	}
     }
@@ -1178,7 +1216,7 @@ process_file (const char *fname)
 		 us to, or if the section was already compressed (and
 		 the user didn't ask for decompression).  Note
 		 somewhat identical code for shstrtab above.  */
-	      if (symtab_compressed == T_UNSET)
+	      if (symtab_compressed == UNSET)
 		{
 		  /* The user didn't ask for compression, but maybe it was
 		     compressed in the original ELF file.  */
@@ -1208,18 +1246,18 @@ process_file (const char *fname)
 
 		  symtab_size = shdr->sh_size;
 		  if ((shdr->sh_flags & SHF_COMPRESSED) != 0)
-		    symtab_compressed = T_COMPRESS_ZLIB;
+		    symtab_compressed = ZLIB;
 		  else if (startswith (symtab_name, ".zdebug"))
-		    symtab_compressed = T_COMPRESS_GNU;
+		    symtab_compressed = ZLIB_GNU;
 		}
 
 	      /* Should we (re)compress?  */
-	      if (symtab_compressed != T_UNSET)
+	      if (symtab_compressed != UNSET)
 		{
 		  if (compress_section (scn, symtab_size, symtab_name,
 					symtab_newname, symtabndx,
-					symtab_compressed == T_COMPRESS_GNU,
-					true, verbose > 0) < 0)
+					NONE, symtab_compressed,
+					verbose > 0) < 0)
 		    goto cleanup;
 		}
 	    }
-- 
2.38.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH][RFC] readelf: partial support of ZSTD compression
  2022-11-28 13:16           ` Martin Liška
@ 2022-11-28 22:29             ` Mark Wielaard
  2022-11-29  9:34               ` Martin Liška
  0 siblings, 1 reply; 25+ messages in thread
From: Mark Wielaard @ 2022-11-28 22:29 UTC (permalink / raw)
  To: Martin Liška; +Cc: Dmitry V. Levin, elfutils-devel, Fangrui Song

Hi Martin,

On Mon, Nov 28, 2022 at 02:16:35PM +0100, Martin Liška wrote:
> On 10/29/22 00:21, Mark Wielaard wrote:
> > Although I like to also have compression working.  Then we can also
> > add support to src/elfcompress, which makes for a good testcase. See
> > tests/run-compress.sh (which has found some subtle memory issues when
> > run under valgrind).
> 
> All right, so I'm preparing a full support for ZSTD (both compression and compression)
> and I noticed a refactoring would be handy for compress_section function and callers
> of the function.
> 
> Note right now, there are basically 3 compression types and process_file
> function handles basically all combinations of these (3 x 3 options), while adding ZSTD
> support would make it even more complicated. However, ZSTD will behave very similar to ZLIB
> (not zlib-gnu), except a different algorithm will be used. Plus, in order to distinguish
> ZLIB from ZSTD, we need to read GElf_Chdr.
> 
> So what do you think about the refactoring as the first step?

Looks good. I would just add a sanity check that chdr.ch_type is of a
known compression type (it is only checked to be not NONE atm).

Cheers,

Mark



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH][RFC] readelf: partial support of ZSTD compression
  2022-11-28 22:29             ` Mark Wielaard
@ 2022-11-29  9:34               ` Martin Liška
  2022-11-29 12:05                 ` [PATCHv2] support ZSTD compression algorithm Martin Liška
  0 siblings, 1 reply; 25+ messages in thread
From: Martin Liška @ 2022-11-29  9:34 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: Dmitry V. Levin, elfutils-devel, Fangrui Song

On 11/28/22 23:29, Mark Wielaard wrote:
> Hi Martin,
> 
> On Mon, Nov 28, 2022 at 02:16:35PM +0100, Martin Liška wrote:
>> On 10/29/22 00:21, Mark Wielaard wrote:
>>> Although I like to also have compression working.  Then we can also
>>> add support to src/elfcompress, which makes for a good testcase. See
>>> tests/run-compress.sh (which has found some subtle memory issues when
>>> run under valgrind).
>>
>> All right, so I'm preparing a full support for ZSTD (both compression and compression)
>> and I noticed a refactoring would be handy for compress_section function and callers
>> of the function.
>>
>> Note right now, there are basically 3 compression types and process_file
>> function handles basically all combinations of these (3 x 3 options), while adding ZSTD
>> support would make it even more complicated. However, ZSTD will behave very similar to ZLIB
>> (not zlib-gnu), except a different algorithm will be used. Plus, in order to distinguish
>> ZLIB from ZSTD, we need to read GElf_Chdr.
>>
>> So what do you think about the refactoring as the first step?
> 
> Looks good. I would just add a sanity check that chdr.ch_type is of a
> known compression type (it is only checked to be not NONE atm).

Thanks, fixes that and pushed as 6bb3e0b5. Now I can finish working on the ZSTD support.

Cheers,
Martin

> 
> Cheers,
> 
> Mark
> 
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCHv2] support ZSTD compression algorithm
  2022-11-29  9:34               ` Martin Liška
@ 2022-11-29 12:05                 ` Martin Liška
  2022-12-09 10:17                   ` Martin Liška
                                     ` (2 more replies)
  0 siblings, 3 replies; 25+ messages in thread
From: Martin Liška @ 2022-11-29 12:05 UTC (permalink / raw)
  To: elfutils-devel; +Cc: Mark Wielaard

Hi.

There's second version of the patch that fully support both compression and decompression.

Changes from the v1:
- compression support added
- zstd detection is fixed
- new tests are added
- builds fine w/ and w/o the ZSTD library

What's currently missing and where I need a help:

1) When I build ./configure --without-zstd, I don't have
a reasonable error message (something similar to binutils's readelf:
readelf: Warning: section '.debug_str' has unsupported compress type: 2)
even though, __libelf_decompress returns NULL and __libelf_seterrno).
One can see a garbage in the console.

How to handle that properly?

2) How should I run my newly added tests conditionally if zstd
configuration support is enabled?

Cheers,
Martin

---
  configure.ac               |   8 +-
  libelf/Makefile.am         |   2 +-
  libelf/elf_compress.c      | 294 ++++++++++++++++++++++++++++++-------
  libelf/elf_compress_gnu.c  |   5 +-
  libelf/libelfP.h           |   4 +-
  src/elfcompress.c          | 144 ++++++++++--------
  src/readelf.c              |  18 ++-
  tests/run-compress-test.sh |  24 +++
  8 files changed, 373 insertions(+), 126 deletions(-)

diff --git a/configure.ac b/configure.ac
index 59be27ac..07cfa54b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -410,6 +410,11 @@ dnl Test for bzlib and xz/lzma/zstd, gives BZLIB/LZMALIB/ZSTD .am
  dnl conditional and config.h USE_BZLIB/USE_LZMALIB/USE_ZSTD #define.
  save_LIBS="$LIBS"
  LIBS=
+eu_ZIPLIB(zstd,ZSTD,zstd,ZSTD_decompress,[ZSTD (zst)])
+AS_IF([test "x$with_zstd" = xyes], [LIBZSTD="libzstd"], [LIBLZSTD=""])
+AC_SUBST([LIBZSTD])
+zstd_LIBS="$LIBS"
+AC_SUBST([zstd_LIBS])
  eu_ZIPLIB(bzlib,BZLIB,bz2,BZ2_bzdopen,bzip2)
  # We need this since bzip2 doesn't have a pkgconfig file.
  BZ2_LIB="$LIBS"
@@ -417,9 +422,6 @@ AC_SUBST([BZ2_LIB])
  eu_ZIPLIB(lzma,LZMA,lzma,lzma_auto_decoder,[LZMA (xz)])
  AS_IF([test "x$with_lzma" = xyes], [LIBLZMA="liblzma"], [LIBLZMA=""])
  AC_SUBST([LIBLZMA])
-eu_ZIPLIB(zstd,ZSTD,zstd,ZSTD_decompress,[ZSTD (zst)])
-AS_IF([test "x$with_zstd" = xyes], [LIBZSTD="libzstd"], [LIBLZSTD=""])
-AC_SUBST([LIBZSTD])
  zip_LIBS="$LIBS"
  LIBS="$save_LIBS"
  AC_SUBST([zip_LIBS])
diff --git a/libelf/Makefile.am b/libelf/Makefile.am
index 560ed45f..24c25cf8 100644
--- a/libelf/Makefile.am
+++ b/libelf/Makefile.am
@@ -106,7 +106,7 @@ libelf_pic_a_SOURCES =
  am_libelf_pic_a_OBJECTS = $(libelf_a_SOURCES:.c=.os)
  
  libelf_so_DEPS = ../lib/libeu.a
-libelf_so_LDLIBS = $(libelf_so_DEPS) -lz
+libelf_so_LDLIBS = $(libelf_so_DEPS) -lz $(zstd_LIBS)
  if USE_LOCKS
  libelf_so_LDLIBS += -lpthread
  endif
diff --git a/libelf/elf_compress.c b/libelf/elf_compress.c
index d7f53af2..7a6e37a4 100644
--- a/libelf/elf_compress.c
+++ b/libelf/elf_compress.c
@@ -39,6 +39,10 @@
  #include <string.h>
  #include <zlib.h>
  
+#ifdef USE_ZSTD
+#include <zstd.h>
+#endif
+
  /* Cleanup and return result.  Don't leak memory.  */
  static void *
  do_deflate_cleanup (void *result, z_stream *z, void *out_buf,
@@ -54,53 +58,14 @@ do_deflate_cleanup (void *result, z_stream *z, void *out_buf,
  #define deflate_cleanup(result, cdata) \
      do_deflate_cleanup(result, &z, out_buf, cdata)
  
-/* Given a section, uses the (in-memory) Elf_Data to extract the
-   original data size (including the given header size) and data
-   alignment.  Returns a buffer that has at least hsize bytes (for the
-   caller to fill in with a header) plus zlib compressed date.  Also
-   returns the new buffer size in new_size (hsize + compressed data
-   size).  Returns (void *) -1 when FORCE is false and the compressed
-   data would be bigger than the original data.  */
  void *
  internal_function
-__libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
-		   size_t *orig_size, size_t *orig_addralign,
-		   size_t *new_size, bool force)
+__libelf_compress_zlib (Elf_Scn *scn, size_t hsize, int ei_data,
+			size_t *orig_size, size_t *orig_addralign,
+			size_t *new_size, bool force,
+			Elf_Data *data, Elf_Data *next_data,
+			void *out_buf, size_t out_size, size_t block)
  {
-  /* The compressed data is the on-disk data.  We simplify the
-     implementation a bit by asking for the (converted) in-memory
-     data (which might be all there is if the user created it with
-     elf_newdata) and then convert back to raw if needed before
-     compressing.  Should be made a bit more clever to directly
-     use raw if that is directly available.  */
-  Elf_Data *data = elf_getdata (scn, NULL);
-  if (data == NULL)
-    return NULL;
-
-  /* When not forced and we immediately know we would use more data by
-     compressing, because of the header plus zlib overhead (five bytes
-     per 16 KB block, plus a one-time overhead of six bytes for the
-     entire stream), don't do anything.  */
-  Elf_Data *next_data = elf_getdata (scn, data);
-  if (next_data == NULL && !force
-      && data->d_size <= hsize + 5 + 6)
-    return (void *) -1;
-
-  *orig_addralign = data->d_align;
-  *orig_size = data->d_size;
-
-  /* Guess an output block size. 1/8th of the original Elf_Data plus
-     hsize.  Make the first chunk twice that size (25%), then increase
-     by a block (12.5%) when necessary.  */
-  size_t block = (data->d_size / 8) + hsize;
-  size_t out_size = 2 * block;
-  void *out_buf = malloc (out_size);
-  if (out_buf == NULL)
-    {
-      __libelf_seterrno (ELF_E_NOMEM);
-      return NULL;
-    }
-
    /* Caller gets to fill in the header at the start.  Just skip it here.  */
    size_t used = hsize;
  
@@ -205,9 +170,186 @@ __libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
    return out_buf;
  }
  
+#ifdef USE_ZSTD
+/* Cleanup and return result.  Don't leak memory.  */
+static void *
+do_zstd_cleanup (void *result, ZSTD_CCtx * const cctx, void *out_buf,
+		 Elf_Data *cdatap)
+{
+  ZSTD_freeCCtx (cctx);
+  free (out_buf);
+  if (cdatap != NULL)
+    free (cdatap->d_buf);
+  return result;
+}
+
+#define zstd_cleanup(result, cdata) \
+    do_zstd_cleanup(result, cctx, out_buf, cdata)
+
  void *
  internal_function
-__libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
+__libelf_compress_zstd (Elf_Scn *scn, size_t hsize, int ei_data,
+			size_t *orig_size, size_t *orig_addralign,
+			size_t *new_size, bool force,
+			Elf_Data *data, Elf_Data *next_data,
+			void *out_buf, size_t out_size, size_t block)
+{
+  /* Caller gets to fill in the header at the start.  Just skip it here.  */
+  size_t used = hsize;
+
+  ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+  Elf_Data cdata;
+  cdata.d_buf = NULL;
+
+  /* Loop over data buffers.  */
+  ZSTD_EndDirective mode = ZSTD_e_continue;
+
+  do
+    {
+      /* Convert to raw if different endianness.  */
+      cdata = *data;
+      bool convert = ei_data != MY_ELFDATA && data->d_size > 0;
+      if (convert)
+	{
+	  /* Don't do this conversion in place, we might want to keep
+	     the original data around, caller decides.  */
+	  cdata.d_buf = malloc (data->d_size);
+	  if (cdata.d_buf == NULL)
+	    {
+	      __libelf_seterrno (ELF_E_NOMEM);
+	      return zstd_cleanup (NULL, NULL);
+	    }
+	  if (gelf_xlatetof (scn->elf, &cdata, data, ei_data) == NULL)
+	    return zstd_cleanup (NULL, &cdata);
+	}
+
+      ZSTD_inBuffer ib = { cdata.d_buf, cdata.d_size, 0 };
+
+      /* Get next buffer to see if this is the last one.  */
+      data = next_data;
+      if (data != NULL)
+	{
+	  *orig_addralign = MAX (*orig_addralign, data->d_align);
+	  *orig_size += data->d_size;
+	  next_data = elf_getdata (scn, data);
+	}
+      else
+	mode = ZSTD_e_end;
+
+      /* Flush one data buffer.  */
+      for (;;)
+	{
+	  ZSTD_outBuffer ob = { out_buf + used, out_size - used, 0 };
+	  size_t ret = ZSTD_compressStream2 (cctx, &ob, &ib, mode);
+	  if (ZSTD_isError (ret))
+	    {
+	      __libelf_seterrno (ELF_E_COMPRESS_ERROR);
+	      return zstd_cleanup (NULL, convert ? &cdata : NULL);
+	    }
+	  used += ob.pos;
+
+	  /* Bail out if we are sure the user doesn't want the
+	     compression forced and we are using more compressed data
+	     than original data.  */
+	  if (!force && mode == ZSTD_e_end && used >= *orig_size)
+	    return zstd_cleanup ((void *) -1, convert ? &cdata : NULL);
+
+	  if (ret > 0)
+	    {
+	      void *bigger = realloc (out_buf, out_size + block);
+	      if (bigger == NULL)
+		{
+		  __libelf_seterrno (ELF_E_NOMEM);
+		  return zstd_cleanup (NULL, convert ? &cdata : NULL);
+		}
+	      out_buf = bigger;
+	      out_size += block;
+	    }
+	  else
+	    break;
+	}
+
+      if (convert)
+	{
+	  free (cdata.d_buf);
+	  cdata.d_buf = NULL;
+	}
+    }
+  while (mode != ZSTD_e_end); /* More data blocks.  */
+
+  ZSTD_freeCCtx (cctx);
+  *new_size = used;
+  return out_buf;
+}
+#endif
+
+/* Given a section, uses the (in-memory) Elf_Data to extract the
+   original data size (including the given header size) and data
+   alignment.  Returns a buffer that has at least hsize bytes (for the
+   caller to fill in with a header) plus zlib compressed date.  Also
+   returns the new buffer size in new_size (hsize + compressed data
+   size).  Returns (void *) -1 when FORCE is false and the compressed
+   data would be bigger than the original data.  */
+void *
+internal_function
+__libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
+		   size_t *orig_size, size_t *orig_addralign,
+		   size_t *new_size, bool force, bool use_zstd)
+{
+  /* The compressed data is the on-disk data.  We simplify the
+     implementation a bit by asking for the (converted) in-memory
+     data (which might be all there is if the user created it with
+     elf_newdata) and then convert back to raw if needed before
+     compressing.  Should be made a bit more clever to directly
+     use raw if that is directly available.  */
+  Elf_Data *data = elf_getdata (scn, NULL);
+  if (data == NULL)
+    return NULL;
+
+  /* When not forced and we immediately know we would use more data by
+     compressing, because of the header plus zlib overhead (five bytes
+     per 16 KB block, plus a one-time overhead of six bytes for the
+     entire stream), don't do anything.
+     Size estimation for ZSTD compression would be similar.  */
+  Elf_Data *next_data = elf_getdata (scn, data);
+  if (next_data == NULL && !force
+      && data->d_size <= hsize + 5 + 6)
+    return (void *) -1;
+
+  *orig_addralign = data->d_align;
+  *orig_size = data->d_size;
+
+  /* Guess an output block size. 1/8th of the original Elf_Data plus
+     hsize.  Make the first chunk twice that size (25%), then increase
+     by a block (12.5%) when necessary.  */
+  size_t block = (data->d_size / 8) + hsize;
+  size_t out_size = 2 * block;
+  void *out_buf = malloc (out_size);
+  if (out_buf == NULL)
+    {
+      __libelf_seterrno (ELF_E_NOMEM);
+      return NULL;
+    }
+
+  if (use_zstd)
+#ifdef USE_ZSTD
+    return __libelf_compress_zstd (scn, hsize, ei_data, orig_size,
+				   orig_addralign, new_size, force,
+				   data, next_data, out_buf, out_size,
+				   block);
+#else
+    return NULL;
+#endif
+  else
+    return __libelf_compress_zlib (scn, hsize, ei_data, orig_size,
+				   orig_addralign, new_size, force,
+				   data, next_data, out_buf, out_size,
+				   block);
+}
+
+void *
+internal_function
+__libelf_decompress_zlib (void *buf_in, size_t size_in, size_t size_out)
  {
    /* Catch highly unlikely compression ratios so we don't allocate
       some giant amount of memory for nothing. The max compression
@@ -218,7 +360,7 @@ __libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
        return NULL;
      }
  
-  /* Malloc might return NULL when requestion zero size.  This is highly
+  /* Malloc might return NULL when requesting zero size.  This is highly
       unlikely, it would only happen when the compression was forced.
       But we do need a non-NULL buffer to return and set as result.
       Just make sure to always allocate at least 1 byte.  */
@@ -260,6 +402,51 @@ __libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
    return buf_out;
  }
  
+#ifdef USE_ZSTD
+void *
+internal_function
+__libelf_decompress_zstd (void *buf_in, size_t size_in, size_t size_out)
+{
+  /* Malloc might return NULL when requesting zero size.  This is highly
+     unlikely, it would only happen when the compression was forced.
+     But we do need a non-NULL buffer to return and set as result.
+     Just make sure to always allocate at least 1 byte.  */
+  void *buf_out = malloc (size_out ?: 1);
+  if (unlikely (buf_out == NULL))
+    {
+      __libelf_seterrno (ELF_E_NOMEM);
+      return NULL;
+    }
+
+  size_t ret = ZSTD_decompress (buf_out, size_out, buf_in, size_in);
+  if (ZSTD_isError (ret))
+    {
+      free (buf_out);
+      __libelf_seterrno (ELF_E_DECOMPRESS_ERROR);
+      return NULL;
+    }
+  else
+    return buf_out;
+}
+#endif
+
+void *
+internal_function
+__libelf_decompress (int chtype, void *buf_in, size_t size_in, size_t size_out)
+{
+  if (chtype == ELFCOMPRESS_ZLIB)
+    return __libelf_decompress_zlib (buf_in, size_in, size_out);
+  else
+    {
+#ifdef USE_ZSTD
+    return __libelf_decompress_zstd (buf_in, size_in, size_out);
+#else
+    __libelf_seterrno (ELF_E_DECOMPRESS_ERROR);
+    return NULL;
+#endif
+    }
+}
+
  void *
  internal_function
  __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
@@ -268,7 +455,7 @@ __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
    if (gelf_getchdr (scn, &chdr) == NULL)
      return NULL;
  
-  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
+  if (chdr.ch_type != ELFCOMPRESS_ZLIB && chdr.ch_type != ELFCOMPRESS_ZSTD)
      {
        __libelf_seterrno (ELF_E_UNKNOWN_COMPRESSION_TYPE);
        return NULL;
@@ -295,7 +482,9 @@ __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
  		  ? sizeof (Elf32_Chdr) : sizeof (Elf64_Chdr));
    size_t size_in = data->d_size - hsize;
    void *buf_in = data->d_buf + hsize;
-  void *buf_out = __libelf_decompress (buf_in, size_in, chdr.ch_size);
+  void *buf_out
+    = __libelf_decompress (chdr.ch_type, buf_in, size_in, chdr.ch_size);
+
    *size_out = chdr.ch_size;
    *addralign = chdr.ch_addralign;
    return buf_out;
@@ -394,7 +583,7 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
      }
  
    int compressed = (sh_flags & SHF_COMPRESSED);
-  if (type == ELFCOMPRESS_ZLIB)
+  if (type == ELFCOMPRESS_ZLIB || type == ELFCOMPRESS_ZSTD)
      {
        /* Compress/Deflate.  */
        if (compressed == 1)
@@ -408,7 +597,8 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
        size_t orig_size, orig_addralign, new_size;
        void *out_buf = __libelf_compress (scn, hsize, elfdata,
  					 &orig_size, &orig_addralign,
-					 &new_size, force);
+					 &new_size, force,
+					 type == ELFCOMPRESS_ZSTD);
  
        /* Compression would make section larger, don't change anything.  */
        if (out_buf == (void *) -1)
@@ -422,7 +612,7 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
        if (elfclass == ELFCLASS32)
  	{
  	  Elf32_Chdr chdr;
-	  chdr.ch_type = ELFCOMPRESS_ZLIB;
+	  chdr.ch_type = type;
  	  chdr.ch_size = orig_size;
  	  chdr.ch_addralign = orig_addralign;
  	  if (elfdata != MY_ELFDATA)
@@ -436,7 +626,7 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
        else
  	{
  	  Elf64_Chdr chdr;
-	  chdr.ch_type = ELFCOMPRESS_ZLIB;
+	  chdr.ch_type = type;
  	  chdr.ch_reserved = 0;
  	  chdr.ch_size = orig_size;
  	  chdr.ch_addralign = sh_addralign;
diff --git a/libelf/elf_compress_gnu.c b/libelf/elf_compress_gnu.c
index 3d2977e7..8e20b30e 100644
--- a/libelf/elf_compress_gnu.c
+++ b/libelf/elf_compress_gnu.c
@@ -103,7 +103,8 @@ elf_compress_gnu (Elf_Scn *scn, int inflate, unsigned int flags)
        size_t orig_size, new_size, orig_addralign;
        void *out_buf = __libelf_compress (scn, hsize, elfdata,
  					 &orig_size, &orig_addralign,
-					 &new_size, force);
+					 &new_size, force,
+					 /* use_zstd */ false);
  
        /* Compression would make section larger, don't change anything.  */
        if (out_buf == (void *) -1)
@@ -178,7 +179,7 @@ elf_compress_gnu (Elf_Scn *scn, int inflate, unsigned int flags)
        size_t size = gsize;
        size_t size_in = data->d_size - hsize;
        void *buf_in = data->d_buf + hsize;
-      void *buf_out = __libelf_decompress (buf_in, size_in, size);
+      void *buf_out = __libelf_decompress (ELFCOMPRESS_ZLIB, buf_in, size_in, size);
        if (buf_out == NULL)
  	return -1;
  
diff --git a/libelf/libelfP.h b/libelf/libelfP.h
index d88a613c..6624f38a 100644
--- a/libelf/libelfP.h
+++ b/libelf/libelfP.h
@@ -574,10 +574,10 @@ extern uint32_t __libelf_crc32 (uint32_t crc, unsigned char *buf, size_t len)
  
  extern void * __libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
  				 size_t *orig_size, size_t *orig_addralign,
-				 size_t *size, bool force)
+				 size_t *size, bool force, bool use_zstd)
       internal_function;
  
-extern void * __libelf_decompress (void *buf_in, size_t size_in,
+extern void * __libelf_decompress (int chtype, void *buf_in, size_t size_in,
  				   size_t size_out) internal_function;
  extern void * __libelf_decompress_elf (Elf_Scn *scn,
  				       size_t *size_out, size_t *addralign)
diff --git a/src/elfcompress.c b/src/elfcompress.c
index eff765e8..b0f1677c 100644
--- a/src/elfcompress.c
+++ b/src/elfcompress.c
@@ -55,9 +55,10 @@ enum ch_type
    UNSET = -1,
    NONE,
    ZLIB,
+  ZSTD,
  
    /* Maximal supported ch_type.  */
-  MAXIMAL_CH_TYPE = ZLIB,
+  MAXIMAL_CH_TYPE = ZSTD,
  
    ZLIB_GNU = 1 << 16
  };
@@ -139,6 +140,12 @@ parse_opt (int key, char *arg __attribute__ ((unused)),
  	type = ZLIB;
        else if (strcmp ("zlib-gnu", arg) == 0 || strcmp ("gnu", arg) == 0)
  	type = ZLIB_GNU;
+      else if (strcmp ("zstd", arg) == 0)
+#ifdef USE_ZSTD
+	type = ZSTD;
+#else
+	argp_error (state, N_("ZSTD support is not enabled"));
+#endif
        else
  	argp_error (state, N_("unknown compression type '%s'"), arg);
        break;
@@ -281,6 +288,44 @@ get_sections (unsigned int *sections, size_t shnum)
    return s;
  }
  
+/* Return compression type of a given section SHDR.  */
+
+static enum ch_type
+get_section_chtype (Elf_Scn *scn, GElf_Shdr *shdr, const char *sname,
+		    size_t ndx)
+{
+  enum ch_type chtype = UNSET;
+  if ((shdr->sh_flags & SHF_COMPRESSED) != 0)
+    {
+      GElf_Chdr chdr;
+      if (gelf_getchdr (scn, &chdr) != NULL)
+	{
+	  chtype = (enum ch_type)chdr.ch_type;
+	  if (chtype == NONE)
+	    {
+	      error (0, 0, "Compression type for section %zd"
+		     " can't be zero ", ndx);
+	      chtype = UNSET;
+	    }
+	  else if (chtype > MAXIMAL_CH_TYPE)
+	    {
+	      error (0, 0, "Compression type (%d) for section %zd"
+		     " is unsupported ", chtype, ndx);
+	      chtype = UNSET;
+	    }
+	}
+      else
+	error (0, 0, "Couldn't get chdr for section %zd", ndx);
+    }
+  /* Set ZLIB_GNU compression manually for .zdebug* sections.  */
+  else if (startswith (sname, ".zdebug"))
+    chtype = ZLIB_GNU;
+  else
+    chtype = NONE;
+
+  return chtype;
+}
+
  static int
  process_file (const char *fname)
  {
@@ -461,26 +506,29 @@ process_file (const char *fname)
  
        if (section_name_matches (sname))
  	{
-	  if (!force && type == NONE
-	      && (shdr->sh_flags & SHF_COMPRESSED) == 0
-	      && !startswith (sname, ".zdebug"))
-	    {
-	      if (verbose > 0)
-		printf ("[%zd] %s already decompressed\n", ndx, sname);
-	    }
-	  else if (!force && type == ZLIB
-		   && (shdr->sh_flags & SHF_COMPRESSED) != 0)
-	    {
-	      if (verbose > 0)
-		printf ("[%zd] %s already compressed\n", ndx, sname);
-	    }
-	  else if (!force && type == ZLIB_GNU
-		   && startswith (sname, ".zdebug"))
+	  enum ch_type schtype = get_section_chtype (scn, shdr, sname, ndx);
+	  if (!force && verbose > 0)
  	    {
-	      if (verbose > 0)
-		printf ("[%zd] %s already GNU compressed\n", ndx, sname);
+	      /* The current compression matches the final one.  */
+	      if (type == schtype)
+		switch (type)
+		  {
+		  case NONE:
+		    printf ("[%zd] %s already decompressed\n", ndx, sname);
+		    break;
+		  case ZLIB:
+		  case ZSTD:
+		    printf ("[%zd] %s already compressed\n", ndx, sname);
+		    break;
+		  case ZLIB_GNU:
+		    printf ("[%zd] %s already GNU compressed\n", ndx, sname);
+		    break;
+		  default:
+		    abort ();
+		  }
  	    }
-	  else if (shdr->sh_type != SHT_NOBITS
+
+	  if (shdr->sh_type != SHT_NOBITS
  	      && (shdr->sh_flags & SHF_ALLOC) == 0)
  	    {
  	      set_section (sections, ndx);
@@ -692,37 +740,12 @@ process_file (const char *fname)
  	     (de)compressed, invalidating the string pointers.  */
  	  sname = xstrdup (sname);
  
+
  	  /* Detect source compression that is how is the section compressed
  	     now.  */
-	  GElf_Chdr chdr;
-	  enum ch_type schtype = NONE;
-	  if ((shdr->sh_flags & SHF_COMPRESSED) != 0)
-	    {
-	      if (gelf_getchdr (scn, &chdr) != NULL)
-		{
-		  schtype = (enum ch_type)chdr.ch_type;
-		  if (schtype == NONE)
-		    {
-		      error (0, 0, "Compression type for section %zd"
-			     " can't be zero ", ndx);
-		      goto cleanup;
-		    }
-		  else if (schtype > MAXIMAL_CH_TYPE)
-		    {
-		      error (0, 0, "Compression type (%d) for section %zd"
-			     " is unsupported ", schtype, ndx);
-		      goto cleanup;
-		    }
-		}
-	      else
-		{
-		  error (0, 0, "Couldn't get chdr for section %zd", ndx);
-		  goto cleanup;
-		}
-	    }
-	  /* Set ZLIB compression manually for .zdebug* sections.  */
-	  else if (startswith (sname, ".zdebug"))
-	    schtype = ZLIB_GNU;
+	  enum ch_type schtype = get_section_chtype (scn, shdr, sname, ndx);
+	  if (schtype == UNSET)
+	    goto cleanup;
  
  	  /* We might want to decompress (and rename), but not
  	     compress during this pass since we might need the section
@@ -754,7 +777,7 @@ process_file (const char *fname)
  	    case ZLIB_GNU:
  	      if (startswith (sname, ".debug"))
  		{
-		  if (schtype == ZLIB)
+		  if (schtype == ZLIB || schtype == ZSTD)
  		    {
  		      /* First decompress to recompress GNU style.
  			 Don't report even when verbose.  */
@@ -818,19 +841,22 @@ process_file (const char *fname)
  	      break;
  
  	    case ZLIB:
-	      if ((shdr->sh_flags & SHF_COMPRESSED) == 0)
+	    case ZSTD:
+	      if (schtype != type)
  		{
-		  if (schtype == ZLIB_GNU)
+		  if (schtype != NONE)
  		    {
-		      /* First decompress to recompress zlib style.
-			 Don't report even when verbose.  */
+		      /* Decompress first.  */
  		      if (compress_section (scn, size, sname, NULL, ndx,
  					    schtype, NONE, false) < 0)
  			goto cleanup;
  
-		      snamebuf[0] = '.';
-		      strcpy (&snamebuf[1], &sname[2]);
-		      newname = snamebuf;
+		      if (schtype == ZLIB_GNU)
+			{
+			  snamebuf[0] = '.';
+			  strcpy (&snamebuf[1], &sname[2]);
+			  newname = snamebuf;
+			}
  		    }
  
  		  if (skip_compress_section)
@@ -838,7 +864,7 @@ process_file (const char *fname)
  		      if (ndx == shdrstrndx)
  			{
  			  shstrtab_size = size;
-			  shstrtab_compressed = ZLIB;
+			  shstrtab_compressed = type;
  			  if (shstrtab_name != NULL
  			      || shstrtab_newname != NULL)
  			    {
@@ -855,7 +881,7 @@ process_file (const char *fname)
  		      else
  			{
  			  symtab_size = size;
-			  symtab_compressed = ZLIB;
+			  symtab_compressed = type;
  			  symtab_name = xstrdup (sname);
  			  symtab_newname = (newname == NULL
  					    ? NULL : xstrdup (newname));
@@ -1378,7 +1404,7 @@ main (int argc, char **argv)
  	N_("Place (de)compressed output into FILE"),
  	0 },
        { "type", 't', "TYPE", 0,
-	N_("What type of compression to apply. TYPE can be 'none' (decompress), 'zlib' (ELF ZLIB compression, the default, 'zlib-gabi' is an alias) or 'zlib-gnu' (.zdebug GNU style compression, 'gnu' is an alias)"),
+	N_("What type of compression to apply. TYPE can be 'none' (decompress), 'zlib' (ELF ZLIB compression, the default, 'zlib-gabi' is an alias), 'zlib-gnu' (.zdebug GNU style compression, 'gnu' is an alias) or 'zstd'"),
  	0 },
        { "name", 'n', "SECTION", 0,
  	N_("SECTION name to (de)compress, SECTION is an extended wildcard pattern (defaults to '.?(z)debug*')"),
diff --git a/src/readelf.c b/src/readelf.c
index cc3e0229..451f8400 100644
--- a/src/readelf.c
+++ b/src/readelf.c
@@ -1238,13 +1238,17 @@ get_visibility_type (int value)
  static const char *
  elf_ch_type_name (unsigned int code)
  {
-  if (code == 0)
-    return "NONE";
-
-  if (code == ELFCOMPRESS_ZLIB)
-    return "ZLIB";
-
-  return "UNKNOWN";
+  switch (code)
+    {
+    case 0:
+      return "NONE";
+    case ELFCOMPRESS_ZLIB:
+      return "ZLIB";
+    case ELFCOMPRESS_ZSTD:
+      return "ZSTD";
+    default:
+      return "UNKNOWN";
+    }
  }
  
  /* Print the section headers.  */
diff --git a/tests/run-compress-test.sh b/tests/run-compress-test.sh
index a6a298f5..3f9c990e 100755
--- a/tests/run-compress-test.sh
+++ b/tests/run-compress-test.sh
@@ -61,6 +61,30 @@ testrun_elfcompress_file()
      echo "uncompress $elfcompressedfile -> $elfuncompressedfile"
      testrun ${abs_top_builddir}/src/elfcompress -v -t none -o ${elfuncompressedfile} ${elfcompressedfile}
      testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${elfuncompressedfile}
+
+    outputfile="${infile}.gabi.zstd"
+    tempfiles "$outputfile"
+    echo "zstd compress $elfcompressedfile -> $outputfile"
+    testrun ${abs_top_builddir}/src/elfcompress -v -t zstd -o ${outputfile} ${elfcompressedfile}
+    testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${outputfile}
+    echo "checking compressed section header" $outputfile
+    testrun ${abs_top_builddir}/src/readelf -Sz ${outputfile} | grep "ELF ZSTD" >/dev/null
+
+    zstdfile="${infile}.zstd"
+    tempfiles "$zstdfile"
+    echo "zstd compress $uncompressedfile -> $zstdfile"
+    testrun ${abs_top_builddir}/src/elfcompress -v -t zstd -o ${zstdfile} ${elfuncompressedfile}
+    testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${zstdfile}
+    echo "checking compressed section header" $zstdfile
+    testrun ${abs_top_builddir}/src/readelf -Sz ${zstdfile} | grep "ELF ZSTD" >/dev/null
+
+    zstdgnufile="${infile}.zstd.gnu"
+    tempfiles "$zstdgnufile"
+    echo "zstd re-compress to GNU ZLIB $zstdfile -> $zstdgnufile"
+    testrun ${abs_top_builddir}/src/elfcompress -v -t zlib-gnu -o ${zstdgnufile} ${zstdfile}
+    testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${zstdgnufile}
+    echo "checking .zdebug section name" $zstdgnufile
+    testrun ${abs_top_builddir}/src/readelf -S ${zstdgnufile} | grep ".zdebug" >/dev/null
  }
  
  testrun_elfcompress()
-- 
2.38.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] support ZSTD compression algorithm
  2022-11-29 12:05                 ` [PATCHv2] support ZSTD compression algorithm Martin Liška
@ 2022-12-09 10:17                   ` Martin Liška
  2022-12-15 13:17                   ` Mark Wielaard
  2022-12-16  0:32                   ` [PATCHv2] support ZSTD compression algorithm Mark Wielaard
  2 siblings, 0 replies; 25+ messages in thread
From: Martin Liška @ 2022-12-09 10:17 UTC (permalink / raw)
  To: elfutils-devel; +Cc: Mark Wielaard

PING^1

On 11/29/22 13:05, Martin Liška wrote:
> Hi.
> 
> There's second version of the patch that fully support both compression and decompression.
> 
> Changes from the v1:
> - compression support added
> - zstd detection is fixed
> - new tests are added
> - builds fine w/ and w/o the ZSTD library
> 
> What's currently missing and where I need a help:
> 
> 1) When I build ./configure --without-zstd, I don't have
> a reasonable error message (something similar to binutils's readelf:
> readelf: Warning: section '.debug_str' has unsupported compress type: 2)
> even though, __libelf_decompress returns NULL and __libelf_seterrno).
> One can see a garbage in the console.
> 
> How to handle that properly?
> 
> 2) How should I run my newly added tests conditionally if zstd
> configuration support is enabled?
> 
> Cheers,
> Martin
> 
> ---
>  configure.ac               |   8 +-
>  libelf/Makefile.am         |   2 +-
>  libelf/elf_compress.c      | 294 ++++++++++++++++++++++++++++++-------
>  libelf/elf_compress_gnu.c  |   5 +-
>  libelf/libelfP.h           |   4 +-
>  src/elfcompress.c          | 144 ++++++++++--------
>  src/readelf.c              |  18 ++-
>  tests/run-compress-test.sh |  24 +++
>  8 files changed, 373 insertions(+), 126 deletions(-)
> 
> diff --git a/configure.ac b/configure.ac
> index 59be27ac..07cfa54b 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -410,6 +410,11 @@ dnl Test for bzlib and xz/lzma/zstd, gives BZLIB/LZMALIB/ZSTD .am
>  dnl conditional and config.h USE_BZLIB/USE_LZMALIB/USE_ZSTD #define.
>  save_LIBS="$LIBS"
>  LIBS=
> +eu_ZIPLIB(zstd,ZSTD,zstd,ZSTD_decompress,[ZSTD (zst)])
> +AS_IF([test "x$with_zstd" = xyes], [LIBZSTD="libzstd"], [LIBLZSTD=""])
> +AC_SUBST([LIBZSTD])
> +zstd_LIBS="$LIBS"
> +AC_SUBST([zstd_LIBS])
>  eu_ZIPLIB(bzlib,BZLIB,bz2,BZ2_bzdopen,bzip2)
>  # We need this since bzip2 doesn't have a pkgconfig file.
>  BZ2_LIB="$LIBS"
> @@ -417,9 +422,6 @@ AC_SUBST([BZ2_LIB])
>  eu_ZIPLIB(lzma,LZMA,lzma,lzma_auto_decoder,[LZMA (xz)])
>  AS_IF([test "x$with_lzma" = xyes], [LIBLZMA="liblzma"], [LIBLZMA=""])
>  AC_SUBST([LIBLZMA])
> -eu_ZIPLIB(zstd,ZSTD,zstd,ZSTD_decompress,[ZSTD (zst)])
> -AS_IF([test "x$with_zstd" = xyes], [LIBZSTD="libzstd"], [LIBLZSTD=""])
> -AC_SUBST([LIBZSTD])
>  zip_LIBS="$LIBS"
>  LIBS="$save_LIBS"
>  AC_SUBST([zip_LIBS])
> diff --git a/libelf/Makefile.am b/libelf/Makefile.am
> index 560ed45f..24c25cf8 100644
> --- a/libelf/Makefile.am
> +++ b/libelf/Makefile.am
> @@ -106,7 +106,7 @@ libelf_pic_a_SOURCES =
>  am_libelf_pic_a_OBJECTS = $(libelf_a_SOURCES:.c=.os)
>  
>  libelf_so_DEPS = ../lib/libeu.a
> -libelf_so_LDLIBS = $(libelf_so_DEPS) -lz
> +libelf_so_LDLIBS = $(libelf_so_DEPS) -lz $(zstd_LIBS)
>  if USE_LOCKS
>  libelf_so_LDLIBS += -lpthread
>  endif
> diff --git a/libelf/elf_compress.c b/libelf/elf_compress.c
> index d7f53af2..7a6e37a4 100644
> --- a/libelf/elf_compress.c
> +++ b/libelf/elf_compress.c
> @@ -39,6 +39,10 @@
>  #include <string.h>
>  #include <zlib.h>
>  
> +#ifdef USE_ZSTD
> +#include <zstd.h>
> +#endif
> +
>  /* Cleanup and return result.  Don't leak memory.  */
>  static void *
>  do_deflate_cleanup (void *result, z_stream *z, void *out_buf,
> @@ -54,53 +58,14 @@ do_deflate_cleanup (void *result, z_stream *z, void *out_buf,
>  #define deflate_cleanup(result, cdata) \
>      do_deflate_cleanup(result, &z, out_buf, cdata)
>  
> -/* Given a section, uses the (in-memory) Elf_Data to extract the
> -   original data size (including the given header size) and data
> -   alignment.  Returns a buffer that has at least hsize bytes (for the
> -   caller to fill in with a header) plus zlib compressed date.  Also
> -   returns the new buffer size in new_size (hsize + compressed data
> -   size).  Returns (void *) -1 when FORCE is false and the compressed
> -   data would be bigger than the original data.  */
>  void *
>  internal_function
> -__libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
> -           size_t *orig_size, size_t *orig_addralign,
> -           size_t *new_size, bool force)
> +__libelf_compress_zlib (Elf_Scn *scn, size_t hsize, int ei_data,
> +            size_t *orig_size, size_t *orig_addralign,
> +            size_t *new_size, bool force,
> +            Elf_Data *data, Elf_Data *next_data,
> +            void *out_buf, size_t out_size, size_t block)
>  {
> -  /* The compressed data is the on-disk data.  We simplify the
> -     implementation a bit by asking for the (converted) in-memory
> -     data (which might be all there is if the user created it with
> -     elf_newdata) and then convert back to raw if needed before
> -     compressing.  Should be made a bit more clever to directly
> -     use raw if that is directly available.  */
> -  Elf_Data *data = elf_getdata (scn, NULL);
> -  if (data == NULL)
> -    return NULL;
> -
> -  /* When not forced and we immediately know we would use more data by
> -     compressing, because of the header plus zlib overhead (five bytes
> -     per 16 KB block, plus a one-time overhead of six bytes for the
> -     entire stream), don't do anything.  */
> -  Elf_Data *next_data = elf_getdata (scn, data);
> -  if (next_data == NULL && !force
> -      && data->d_size <= hsize + 5 + 6)
> -    return (void *) -1;
> -
> -  *orig_addralign = data->d_align;
> -  *orig_size = data->d_size;
> -
> -  /* Guess an output block size. 1/8th of the original Elf_Data plus
> -     hsize.  Make the first chunk twice that size (25%), then increase
> -     by a block (12.5%) when necessary.  */
> -  size_t block = (data->d_size / 8) + hsize;
> -  size_t out_size = 2 * block;
> -  void *out_buf = malloc (out_size);
> -  if (out_buf == NULL)
> -    {
> -      __libelf_seterrno (ELF_E_NOMEM);
> -      return NULL;
> -    }
> -
>    /* Caller gets to fill in the header at the start.  Just skip it here.  */
>    size_t used = hsize;
>  
> @@ -205,9 +170,186 @@ __libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
>    return out_buf;
>  }
>  
> +#ifdef USE_ZSTD
> +/* Cleanup and return result.  Don't leak memory.  */
> +static void *
> +do_zstd_cleanup (void *result, ZSTD_CCtx * const cctx, void *out_buf,
> +         Elf_Data *cdatap)
> +{
> +  ZSTD_freeCCtx (cctx);
> +  free (out_buf);
> +  if (cdatap != NULL)
> +    free (cdatap->d_buf);
> +  return result;
> +}
> +
> +#define zstd_cleanup(result, cdata) \
> +    do_zstd_cleanup(result, cctx, out_buf, cdata)
> +
>  void *
>  internal_function
> -__libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
> +__libelf_compress_zstd (Elf_Scn *scn, size_t hsize, int ei_data,
> +            size_t *orig_size, size_t *orig_addralign,
> +            size_t *new_size, bool force,
> +            Elf_Data *data, Elf_Data *next_data,
> +            void *out_buf, size_t out_size, size_t block)
> +{
> +  /* Caller gets to fill in the header at the start.  Just skip it here.  */
> +  size_t used = hsize;
> +
> +  ZSTD_CCtx* const cctx = ZSTD_createCCtx();
> +  Elf_Data cdata;
> +  cdata.d_buf = NULL;
> +
> +  /* Loop over data buffers.  */
> +  ZSTD_EndDirective mode = ZSTD_e_continue;
> +
> +  do
> +    {
> +      /* Convert to raw if different endianness.  */
> +      cdata = *data;
> +      bool convert = ei_data != MY_ELFDATA && data->d_size > 0;
> +      if (convert)
> +    {
> +      /* Don't do this conversion in place, we might want to keep
> +         the original data around, caller decides.  */
> +      cdata.d_buf = malloc (data->d_size);
> +      if (cdata.d_buf == NULL)
> +        {
> +          __libelf_seterrno (ELF_E_NOMEM);
> +          return zstd_cleanup (NULL, NULL);
> +        }
> +      if (gelf_xlatetof (scn->elf, &cdata, data, ei_data) == NULL)
> +        return zstd_cleanup (NULL, &cdata);
> +    }
> +
> +      ZSTD_inBuffer ib = { cdata.d_buf, cdata.d_size, 0 };
> +
> +      /* Get next buffer to see if this is the last one.  */
> +      data = next_data;
> +      if (data != NULL)
> +    {
> +      *orig_addralign = MAX (*orig_addralign, data->d_align);
> +      *orig_size += data->d_size;
> +      next_data = elf_getdata (scn, data);
> +    }
> +      else
> +    mode = ZSTD_e_end;
> +
> +      /* Flush one data buffer.  */
> +      for (;;)
> +    {
> +      ZSTD_outBuffer ob = { out_buf + used, out_size - used, 0 };
> +      size_t ret = ZSTD_compressStream2 (cctx, &ob, &ib, mode);
> +      if (ZSTD_isError (ret))
> +        {
> +          __libelf_seterrno (ELF_E_COMPRESS_ERROR);
> +          return zstd_cleanup (NULL, convert ? &cdata : NULL);
> +        }
> +      used += ob.pos;
> +
> +      /* Bail out if we are sure the user doesn't want the
> +         compression forced and we are using more compressed data
> +         than original data.  */
> +      if (!force && mode == ZSTD_e_end && used >= *orig_size)
> +        return zstd_cleanup ((void *) -1, convert ? &cdata : NULL);
> +
> +      if (ret > 0)
> +        {
> +          void *bigger = realloc (out_buf, out_size + block);
> +          if (bigger == NULL)
> +        {
> +          __libelf_seterrno (ELF_E_NOMEM);
> +          return zstd_cleanup (NULL, convert ? &cdata : NULL);
> +        }
> +          out_buf = bigger;
> +          out_size += block;
> +        }
> +      else
> +        break;
> +    }
> +
> +      if (convert)
> +    {
> +      free (cdata.d_buf);
> +      cdata.d_buf = NULL;
> +    }
> +    }
> +  while (mode != ZSTD_e_end); /* More data blocks.  */
> +
> +  ZSTD_freeCCtx (cctx);
> +  *new_size = used;
> +  return out_buf;
> +}
> +#endif
> +
> +/* Given a section, uses the (in-memory) Elf_Data to extract the
> +   original data size (including the given header size) and data
> +   alignment.  Returns a buffer that has at least hsize bytes (for the
> +   caller to fill in with a header) plus zlib compressed date.  Also
> +   returns the new buffer size in new_size (hsize + compressed data
> +   size).  Returns (void *) -1 when FORCE is false and the compressed
> +   data would be bigger than the original data.  */
> +void *
> +internal_function
> +__libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
> +           size_t *orig_size, size_t *orig_addralign,
> +           size_t *new_size, bool force, bool use_zstd)
> +{
> +  /* The compressed data is the on-disk data.  We simplify the
> +     implementation a bit by asking for the (converted) in-memory
> +     data (which might be all there is if the user created it with
> +     elf_newdata) and then convert back to raw if needed before
> +     compressing.  Should be made a bit more clever to directly
> +     use raw if that is directly available.  */
> +  Elf_Data *data = elf_getdata (scn, NULL);
> +  if (data == NULL)
> +    return NULL;
> +
> +  /* When not forced and we immediately know we would use more data by
> +     compressing, because of the header plus zlib overhead (five bytes
> +     per 16 KB block, plus a one-time overhead of six bytes for the
> +     entire stream), don't do anything.
> +     Size estimation for ZSTD compression would be similar.  */
> +  Elf_Data *next_data = elf_getdata (scn, data);
> +  if (next_data == NULL && !force
> +      && data->d_size <= hsize + 5 + 6)
> +    return (void *) -1;
> +
> +  *orig_addralign = data->d_align;
> +  *orig_size = data->d_size;
> +
> +  /* Guess an output block size. 1/8th of the original Elf_Data plus
> +     hsize.  Make the first chunk twice that size (25%), then increase
> +     by a block (12.5%) when necessary.  */
> +  size_t block = (data->d_size / 8) + hsize;
> +  size_t out_size = 2 * block;
> +  void *out_buf = malloc (out_size);
> +  if (out_buf == NULL)
> +    {
> +      __libelf_seterrno (ELF_E_NOMEM);
> +      return NULL;
> +    }
> +
> +  if (use_zstd)
> +#ifdef USE_ZSTD
> +    return __libelf_compress_zstd (scn, hsize, ei_data, orig_size,
> +                   orig_addralign, new_size, force,
> +                   data, next_data, out_buf, out_size,
> +                   block);
> +#else
> +    return NULL;
> +#endif
> +  else
> +    return __libelf_compress_zlib (scn, hsize, ei_data, orig_size,
> +                   orig_addralign, new_size, force,
> +                   data, next_data, out_buf, out_size,
> +                   block);
> +}
> +
> +void *
> +internal_function
> +__libelf_decompress_zlib (void *buf_in, size_t size_in, size_t size_out)
>  {
>    /* Catch highly unlikely compression ratios so we don't allocate
>       some giant amount of memory for nothing. The max compression
> @@ -218,7 +360,7 @@ __libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
>        return NULL;
>      }
>  
> -  /* Malloc might return NULL when requestion zero size.  This is highly
> +  /* Malloc might return NULL when requesting zero size.  This is highly
>       unlikely, it would only happen when the compression was forced.
>       But we do need a non-NULL buffer to return and set as result.
>       Just make sure to always allocate at least 1 byte.  */
> @@ -260,6 +402,51 @@ __libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
>    return buf_out;
>  }
>  
> +#ifdef USE_ZSTD
> +void *
> +internal_function
> +__libelf_decompress_zstd (void *buf_in, size_t size_in, size_t size_out)
> +{
> +  /* Malloc might return NULL when requesting zero size.  This is highly
> +     unlikely, it would only happen when the compression was forced.
> +     But we do need a non-NULL buffer to return and set as result.
> +     Just make sure to always allocate at least 1 byte.  */
> +  void *buf_out = malloc (size_out ?: 1);
> +  if (unlikely (buf_out == NULL))
> +    {
> +      __libelf_seterrno (ELF_E_NOMEM);
> +      return NULL;
> +    }
> +
> +  size_t ret = ZSTD_decompress (buf_out, size_out, buf_in, size_in);
> +  if (ZSTD_isError (ret))
> +    {
> +      free (buf_out);
> +      __libelf_seterrno (ELF_E_DECOMPRESS_ERROR);
> +      return NULL;
> +    }
> +  else
> +    return buf_out;
> +}
> +#endif
> +
> +void *
> +internal_function
> +__libelf_decompress (int chtype, void *buf_in, size_t size_in, size_t size_out)
> +{
> +  if (chtype == ELFCOMPRESS_ZLIB)
> +    return __libelf_decompress_zlib (buf_in, size_in, size_out);
> +  else
> +    {
> +#ifdef USE_ZSTD
> +    return __libelf_decompress_zstd (buf_in, size_in, size_out);
> +#else
> +    __libelf_seterrno (ELF_E_DECOMPRESS_ERROR);
> +    return NULL;
> +#endif
> +    }
> +}
> +
>  void *
>  internal_function
>  __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
> @@ -268,7 +455,7 @@ __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
>    if (gelf_getchdr (scn, &chdr) == NULL)
>      return NULL;
>  
> -  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
> +  if (chdr.ch_type != ELFCOMPRESS_ZLIB && chdr.ch_type != ELFCOMPRESS_ZSTD)
>      {
>        __libelf_seterrno (ELF_E_UNKNOWN_COMPRESSION_TYPE);
>        return NULL;
> @@ -295,7 +482,9 @@ __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
>            ? sizeof (Elf32_Chdr) : sizeof (Elf64_Chdr));
>    size_t size_in = data->d_size - hsize;
>    void *buf_in = data->d_buf + hsize;
> -  void *buf_out = __libelf_decompress (buf_in, size_in, chdr.ch_size);
> +  void *buf_out
> +    = __libelf_decompress (chdr.ch_type, buf_in, size_in, chdr.ch_size);
> +
>    *size_out = chdr.ch_size;
>    *addralign = chdr.ch_addralign;
>    return buf_out;
> @@ -394,7 +583,7 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
>      }
>  
>    int compressed = (sh_flags & SHF_COMPRESSED);
> -  if (type == ELFCOMPRESS_ZLIB)
> +  if (type == ELFCOMPRESS_ZLIB || type == ELFCOMPRESS_ZSTD)
>      {
>        /* Compress/Deflate.  */
>        if (compressed == 1)
> @@ -408,7 +597,8 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
>        size_t orig_size, orig_addralign, new_size;
>        void *out_buf = __libelf_compress (scn, hsize, elfdata,
>                       &orig_size, &orig_addralign,
> -                     &new_size, force);
> +                     &new_size, force,
> +                     type == ELFCOMPRESS_ZSTD);
>  
>        /* Compression would make section larger, don't change anything.  */
>        if (out_buf == (void *) -1)
> @@ -422,7 +612,7 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
>        if (elfclass == ELFCLASS32)
>      {
>        Elf32_Chdr chdr;
> -      chdr.ch_type = ELFCOMPRESS_ZLIB;
> +      chdr.ch_type = type;
>        chdr.ch_size = orig_size;
>        chdr.ch_addralign = orig_addralign;
>        if (elfdata != MY_ELFDATA)
> @@ -436,7 +626,7 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
>        else
>      {
>        Elf64_Chdr chdr;
> -      chdr.ch_type = ELFCOMPRESS_ZLIB;
> +      chdr.ch_type = type;
>        chdr.ch_reserved = 0;
>        chdr.ch_size = orig_size;
>        chdr.ch_addralign = sh_addralign;
> diff --git a/libelf/elf_compress_gnu.c b/libelf/elf_compress_gnu.c
> index 3d2977e7..8e20b30e 100644
> --- a/libelf/elf_compress_gnu.c
> +++ b/libelf/elf_compress_gnu.c
> @@ -103,7 +103,8 @@ elf_compress_gnu (Elf_Scn *scn, int inflate, unsigned int flags)
>        size_t orig_size, new_size, orig_addralign;
>        void *out_buf = __libelf_compress (scn, hsize, elfdata,
>                       &orig_size, &orig_addralign,
> -                     &new_size, force);
> +                     &new_size, force,
> +                     /* use_zstd */ false);
>  
>        /* Compression would make section larger, don't change anything.  */
>        if (out_buf == (void *) -1)
> @@ -178,7 +179,7 @@ elf_compress_gnu (Elf_Scn *scn, int inflate, unsigned int flags)
>        size_t size = gsize;
>        size_t size_in = data->d_size - hsize;
>        void *buf_in = data->d_buf + hsize;
> -      void *buf_out = __libelf_decompress (buf_in, size_in, size);
> +      void *buf_out = __libelf_decompress (ELFCOMPRESS_ZLIB, buf_in, size_in, size);
>        if (buf_out == NULL)
>      return -1;
>  
> diff --git a/libelf/libelfP.h b/libelf/libelfP.h
> index d88a613c..6624f38a 100644
> --- a/libelf/libelfP.h
> +++ b/libelf/libelfP.h
> @@ -574,10 +574,10 @@ extern uint32_t __libelf_crc32 (uint32_t crc, unsigned char *buf, size_t len)
>  
>  extern void * __libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
>                   size_t *orig_size, size_t *orig_addralign,
> -                 size_t *size, bool force)
> +                 size_t *size, bool force, bool use_zstd)
>       internal_function;
>  
> -extern void * __libelf_decompress (void *buf_in, size_t size_in,
> +extern void * __libelf_decompress (int chtype, void *buf_in, size_t size_in,
>                     size_t size_out) internal_function;
>  extern void * __libelf_decompress_elf (Elf_Scn *scn,
>                         size_t *size_out, size_t *addralign)
> diff --git a/src/elfcompress.c b/src/elfcompress.c
> index eff765e8..b0f1677c 100644
> --- a/src/elfcompress.c
> +++ b/src/elfcompress.c
> @@ -55,9 +55,10 @@ enum ch_type
>    UNSET = -1,
>    NONE,
>    ZLIB,
> +  ZSTD,
>  
>    /* Maximal supported ch_type.  */
> -  MAXIMAL_CH_TYPE = ZLIB,
> +  MAXIMAL_CH_TYPE = ZSTD,
>  
>    ZLIB_GNU = 1 << 16
>  };
> @@ -139,6 +140,12 @@ parse_opt (int key, char *arg __attribute__ ((unused)),
>      type = ZLIB;
>        else if (strcmp ("zlib-gnu", arg) == 0 || strcmp ("gnu", arg) == 0)
>      type = ZLIB_GNU;
> +      else if (strcmp ("zstd", arg) == 0)
> +#ifdef USE_ZSTD
> +    type = ZSTD;
> +#else
> +    argp_error (state, N_("ZSTD support is not enabled"));
> +#endif
>        else
>      argp_error (state, N_("unknown compression type '%s'"), arg);
>        break;
> @@ -281,6 +288,44 @@ get_sections (unsigned int *sections, size_t shnum)
>    return s;
>  }
>  
> +/* Return compression type of a given section SHDR.  */
> +
> +static enum ch_type
> +get_section_chtype (Elf_Scn *scn, GElf_Shdr *shdr, const char *sname,
> +            size_t ndx)
> +{
> +  enum ch_type chtype = UNSET;
> +  if ((shdr->sh_flags & SHF_COMPRESSED) != 0)
> +    {
> +      GElf_Chdr chdr;
> +      if (gelf_getchdr (scn, &chdr) != NULL)
> +    {
> +      chtype = (enum ch_type)chdr.ch_type;
> +      if (chtype == NONE)
> +        {
> +          error (0, 0, "Compression type for section %zd"
> +             " can't be zero ", ndx);
> +          chtype = UNSET;
> +        }
> +      else if (chtype > MAXIMAL_CH_TYPE)
> +        {
> +          error (0, 0, "Compression type (%d) for section %zd"
> +             " is unsupported ", chtype, ndx);
> +          chtype = UNSET;
> +        }
> +    }
> +      else
> +    error (0, 0, "Couldn't get chdr for section %zd", ndx);
> +    }
> +  /* Set ZLIB_GNU compression manually for .zdebug* sections.  */
> +  else if (startswith (sname, ".zdebug"))
> +    chtype = ZLIB_GNU;
> +  else
> +    chtype = NONE;
> +
> +  return chtype;
> +}
> +
>  static int
>  process_file (const char *fname)
>  {
> @@ -461,26 +506,29 @@ process_file (const char *fname)
>  
>        if (section_name_matches (sname))
>      {
> -      if (!force && type == NONE
> -          && (shdr->sh_flags & SHF_COMPRESSED) == 0
> -          && !startswith (sname, ".zdebug"))
> -        {
> -          if (verbose > 0)
> -        printf ("[%zd] %s already decompressed\n", ndx, sname);
> -        }
> -      else if (!force && type == ZLIB
> -           && (shdr->sh_flags & SHF_COMPRESSED) != 0)
> -        {
> -          if (verbose > 0)
> -        printf ("[%zd] %s already compressed\n", ndx, sname);
> -        }
> -      else if (!force && type == ZLIB_GNU
> -           && startswith (sname, ".zdebug"))
> +      enum ch_type schtype = get_section_chtype (scn, shdr, sname, ndx);
> +      if (!force && verbose > 0)
>          {
> -          if (verbose > 0)
> -        printf ("[%zd] %s already GNU compressed\n", ndx, sname);
> +          /* The current compression matches the final one.  */
> +          if (type == schtype)
> +        switch (type)
> +          {
> +          case NONE:
> +            printf ("[%zd] %s already decompressed\n", ndx, sname);
> +            break;
> +          case ZLIB:
> +          case ZSTD:
> +            printf ("[%zd] %s already compressed\n", ndx, sname);
> +            break;
> +          case ZLIB_GNU:
> +            printf ("[%zd] %s already GNU compressed\n", ndx, sname);
> +            break;
> +          default:
> +            abort ();
> +          }
>          }
> -      else if (shdr->sh_type != SHT_NOBITS
> +
> +      if (shdr->sh_type != SHT_NOBITS
>            && (shdr->sh_flags & SHF_ALLOC) == 0)
>          {
>            set_section (sections, ndx);
> @@ -692,37 +740,12 @@ process_file (const char *fname)
>           (de)compressed, invalidating the string pointers.  */
>        sname = xstrdup (sname);
>  
> +
>        /* Detect source compression that is how is the section compressed
>           now.  */
> -      GElf_Chdr chdr;
> -      enum ch_type schtype = NONE;
> -      if ((shdr->sh_flags & SHF_COMPRESSED) != 0)
> -        {
> -          if (gelf_getchdr (scn, &chdr) != NULL)
> -        {
> -          schtype = (enum ch_type)chdr.ch_type;
> -          if (schtype == NONE)
> -            {
> -              error (0, 0, "Compression type for section %zd"
> -                 " can't be zero ", ndx);
> -              goto cleanup;
> -            }
> -          else if (schtype > MAXIMAL_CH_TYPE)
> -            {
> -              error (0, 0, "Compression type (%d) for section %zd"
> -                 " is unsupported ", schtype, ndx);
> -              goto cleanup;
> -            }
> -        }
> -          else
> -        {
> -          error (0, 0, "Couldn't get chdr for section %zd", ndx);
> -          goto cleanup;
> -        }
> -        }
> -      /* Set ZLIB compression manually for .zdebug* sections.  */
> -      else if (startswith (sname, ".zdebug"))
> -        schtype = ZLIB_GNU;
> +      enum ch_type schtype = get_section_chtype (scn, shdr, sname, ndx);
> +      if (schtype == UNSET)
> +        goto cleanup;
>  
>        /* We might want to decompress (and rename), but not
>           compress during this pass since we might need the section
> @@ -754,7 +777,7 @@ process_file (const char *fname)
>          case ZLIB_GNU:
>            if (startswith (sname, ".debug"))
>          {
> -          if (schtype == ZLIB)
> +          if (schtype == ZLIB || schtype == ZSTD)
>              {
>                /* First decompress to recompress GNU style.
>               Don't report even when verbose.  */
> @@ -818,19 +841,22 @@ process_file (const char *fname)
>            break;
>  
>          case ZLIB:
> -          if ((shdr->sh_flags & SHF_COMPRESSED) == 0)
> +        case ZSTD:
> +          if (schtype != type)
>          {
> -          if (schtype == ZLIB_GNU)
> +          if (schtype != NONE)
>              {
> -              /* First decompress to recompress zlib style.
> -             Don't report even when verbose.  */
> +              /* Decompress first.  */
>                if (compress_section (scn, size, sname, NULL, ndx,
>                          schtype, NONE, false) < 0)
>              goto cleanup;
>  
> -              snamebuf[0] = '.';
> -              strcpy (&snamebuf[1], &sname[2]);
> -              newname = snamebuf;
> +              if (schtype == ZLIB_GNU)
> +            {
> +              snamebuf[0] = '.';
> +              strcpy (&snamebuf[1], &sname[2]);
> +              newname = snamebuf;
> +            }
>              }
>  
>            if (skip_compress_section)
> @@ -838,7 +864,7 @@ process_file (const char *fname)
>                if (ndx == shdrstrndx)
>              {
>                shstrtab_size = size;
> -              shstrtab_compressed = ZLIB;
> +              shstrtab_compressed = type;
>                if (shstrtab_name != NULL
>                    || shstrtab_newname != NULL)
>                  {
> @@ -855,7 +881,7 @@ process_file (const char *fname)
>                else
>              {
>                symtab_size = size;
> -              symtab_compressed = ZLIB;
> +              symtab_compressed = type;
>                symtab_name = xstrdup (sname);
>                symtab_newname = (newname == NULL
>                          ? NULL : xstrdup (newname));
> @@ -1378,7 +1404,7 @@ main (int argc, char **argv)
>      N_("Place (de)compressed output into FILE"),
>      0 },
>        { "type", 't', "TYPE", 0,
> -    N_("What type of compression to apply. TYPE can be 'none' (decompress), 'zlib' (ELF ZLIB compression, the default, 'zlib-gabi' is an alias) or 'zlib-gnu' (.zdebug GNU style compression, 'gnu' is an alias)"),
> +    N_("What type of compression to apply. TYPE can be 'none' (decompress), 'zlib' (ELF ZLIB compression, the default, 'zlib-gabi' is an alias), 'zlib-gnu' (.zdebug GNU style compression, 'gnu' is an alias) or 'zstd'"),
>      0 },
>        { "name", 'n', "SECTION", 0,
>      N_("SECTION name to (de)compress, SECTION is an extended wildcard pattern (defaults to '.?(z)debug*')"),
> diff --git a/src/readelf.c b/src/readelf.c
> index cc3e0229..451f8400 100644
> --- a/src/readelf.c
> +++ b/src/readelf.c
> @@ -1238,13 +1238,17 @@ get_visibility_type (int value)
>  static const char *
>  elf_ch_type_name (unsigned int code)
>  {
> -  if (code == 0)
> -    return "NONE";
> -
> -  if (code == ELFCOMPRESS_ZLIB)
> -    return "ZLIB";
> -
> -  return "UNKNOWN";
> +  switch (code)
> +    {
> +    case 0:
> +      return "NONE";
> +    case ELFCOMPRESS_ZLIB:
> +      return "ZLIB";
> +    case ELFCOMPRESS_ZSTD:
> +      return "ZSTD";
> +    default:
> +      return "UNKNOWN";
> +    }
>  }
>  
>  /* Print the section headers.  */
> diff --git a/tests/run-compress-test.sh b/tests/run-compress-test.sh
> index a6a298f5..3f9c990e 100755
> --- a/tests/run-compress-test.sh
> +++ b/tests/run-compress-test.sh
> @@ -61,6 +61,30 @@ testrun_elfcompress_file()
>      echo "uncompress $elfcompressedfile -> $elfuncompressedfile"
>      testrun ${abs_top_builddir}/src/elfcompress -v -t none -o ${elfuncompressedfile} ${elfcompressedfile}
>      testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${elfuncompressedfile}
> +
> +    outputfile="${infile}.gabi.zstd"
> +    tempfiles "$outputfile"
> +    echo "zstd compress $elfcompressedfile -> $outputfile"
> +    testrun ${abs_top_builddir}/src/elfcompress -v -t zstd -o ${outputfile} ${elfcompressedfile}
> +    testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${outputfile}
> +    echo "checking compressed section header" $outputfile
> +    testrun ${abs_top_builddir}/src/readelf -Sz ${outputfile} | grep "ELF ZSTD" >/dev/null
> +
> +    zstdfile="${infile}.zstd"
> +    tempfiles "$zstdfile"
> +    echo "zstd compress $uncompressedfile -> $zstdfile"
> +    testrun ${abs_top_builddir}/src/elfcompress -v -t zstd -o ${zstdfile} ${elfuncompressedfile}
> +    testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${zstdfile}
> +    echo "checking compressed section header" $zstdfile
> +    testrun ${abs_top_builddir}/src/readelf -Sz ${zstdfile} | grep "ELF ZSTD" >/dev/null
> +
> +    zstdgnufile="${infile}.zstd.gnu"
> +    tempfiles "$zstdgnufile"
> +    echo "zstd re-compress to GNU ZLIB $zstdfile -> $zstdgnufile"
> +    testrun ${abs_top_builddir}/src/elfcompress -v -t zlib-gnu -o ${zstdgnufile} ${zstdfile}
> +    testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${zstdgnufile}
> +    echo "checking .zdebug section name" $zstdgnufile
> +    testrun ${abs_top_builddir}/src/readelf -S ${zstdgnufile} | grep ".zdebug" >/dev/null
>  }
>  
>  testrun_elfcompress()


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] support ZSTD compression algorithm
  2022-11-29 12:05                 ` [PATCHv2] support ZSTD compression algorithm Martin Liška
  2022-12-09 10:17                   ` Martin Liška
@ 2022-12-15 13:17                   ` Mark Wielaard
  2022-12-19 14:19                     ` Martin Liška
  2022-12-16  0:32                   ` [PATCHv2] support ZSTD compression algorithm Mark Wielaard
  2 siblings, 1 reply; 25+ messages in thread
From: Mark Wielaard @ 2022-12-15 13:17 UTC (permalink / raw)
  To: Martin Liška, elfutils-devel

Hi Martin,

On Tue, 2022-11-29 at 13:05 +0100, Martin Liška wrote:
> There's second version of the patch that fully support both
> compression and decompression.
> 
> Changes from the v1:
> - compression support added
> - zstd detection is fixed
> - new tests are added
> - builds fine w/ and w/o the ZSTD library
> 
> What's currently missing and where I need a help:
> 
> 1) When I build ./configure --without-zstd, I don't have
> a reasonable error message (something similar to binutils's readelf:
> readelf: Warning: section '.debug_str' has unsupported compress type:
> 2)
> even though, __libelf_decompress returns NULL and __libelf_seterrno).
> One can see a garbage in the console.
> 
> How to handle that properly?

Is there a particular way you are running eu-readelf? Is it with
generic -w or -a, or decoding a specific section type?

> 2) How should I run my newly added tests conditionally if zstd
> configuration support is enabled?

eu_ZIP will define an AM_CONDITIONAL for ZSTD (note this is different
from the HAVE_ZSTD, which is the am conditional added for having the
zstd program itself). You could use it in tests/Makefile.am as:

  if ZSTD
  TESTS += run-zstd-compress-test.sh
  endif

If you had a separate test... Otherwise add some variable to
TESTS_ENVIRONMENT (and installed_TESTS_ENVIRONMENT) based on the Make
variable that is tested inside the shell script (somewhat like
USE_VALGRIND) maybe.

>   configure.ac               |   8 +-
>   libelf/Makefile.am         |   2 +-
>   libelf/elf_compress.c      | 294 ++++++++++++++++++++++++++++++--
> -----
>   libelf/elf_compress_gnu.c  |   5 +-
>   libelf/libelfP.h           |   4 +-
>   src/elfcompress.c          | 144 ++++++++++--------
>   src/readelf.c              |  18 ++-
>   tests/run-compress-test.sh |  24 +++
>   8 files changed, 373 insertions(+), 126 deletions(-)
> 
> diff --git a/configure.ac b/configure.ac
> index 59be27ac..07cfa54b 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -410,6 +410,11 @@ dnl Test for bzlib and xz/lzma/zstd, gives BZLIB/LZMALIB/ZSTD .am
>   dnl conditional and config.h USE_BZLIB/USE_LZMALIB/USE_ZSTD #define.
>   save_LIBS="$LIBS"
>   LIBS=
> +eu_ZIPLIB(zstd,ZSTD,zstd,ZSTD_decompress,[ZSTD (zst)])
> +AS_IF([test "x$with_zstd" = xyes], [LIBZSTD="libzstd"], [LIBLZSTD=""])
> +AC_SUBST([LIBZSTD])
> +zstd_LIBS="$LIBS"
> +AC_SUBST([zstd_LIBS])
>   eu_ZIPLIB(bzlib,BZLIB,bz2,BZ2_bzdopen,bzip2)
>   # We need this since bzip2 doesn't have a pkgconfig file.
>   BZ2_LIB="$LIBS"
> @@ -417,9 +422,6 @@ AC_SUBST([BZ2_LIB])
>   eu_ZIPLIB(lzma,LZMA,lzma,lzma_auto_decoder,[LZMA (xz)])
>   AS_IF([test "x$with_lzma" = xyes], [LIBLZMA="liblzma"], [LIBLZMA=""])
>   AC_SUBST([LIBLZMA])
> -eu_ZIPLIB(zstd,ZSTD,zstd,ZSTD_decompress,[ZSTD (zst)])
> -AS_IF([test "x$with_zstd" = xyes], [LIBZSTD="libzstd"], [LIBLZSTD=""])
> -AC_SUBST([LIBZSTD])
>   zip_LIBS="$LIBS"
>   LIBS="$save_LIBS"
>   AC_SUBST([zip_LIBS])

Doing AC_SUBST([zstd_LIBS]) seems correct. Why is the test moved
earlier?

You are testing for ZSTD_decompress, is that enough? Asking because I
see you are using ZSTD_compressStream2, which seems to requires libzstd
v1.4.0+.

In general how stable is the libzstd api?

You'll also need to use the AC_SUBST for LIBZSTD in config/libelf.pc.in
now, as used in the libdw.pc.in:

  Requires.private: zlib @LIBZSTD@

> diff --git a/libelf/Makefile.am b/libelf/Makefile.am
> index 560ed45f..24c25cf8 100644
> --- a/libelf/Makefile.am
> +++ b/libelf/Makefile.am
> @@ -106,7 +106,7 @@ libelf_pic_a_SOURCES =
>   am_libelf_pic_a_OBJECTS = $(libelf_a_SOURCES:.c=.os)
>   
>   libelf_so_DEPS = ../lib/libeu.a
> -libelf_so_LDLIBS = $(libelf_so_DEPS) -lz
> +libelf_so_LDLIBS = $(libelf_so_DEPS) -lz $(zstd_LIBS)
>   if USE_LOCKS
>   libelf_so_LDLIBS += -lpthread
>   endif

OK.

Haven't read the actual code yet. I'll get back to that later today.

Cheers,

Mark

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] support ZSTD compression algorithm
  2022-11-29 12:05                 ` [PATCHv2] support ZSTD compression algorithm Martin Liška
  2022-12-09 10:17                   ` Martin Liška
  2022-12-15 13:17                   ` Mark Wielaard
@ 2022-12-16  0:32                   ` Mark Wielaard
  2022-12-19 14:21                     ` Martin Liška
  2 siblings, 1 reply; 25+ messages in thread
From: Mark Wielaard @ 2022-12-16  0:32 UTC (permalink / raw)
  To: Martin Liška; +Cc: elfutils-devel

Hi Martin,

On Tue, Nov 29, 2022 at 01:05:45PM +0100, Martin Liška wrote:
> diff --git a/libelf/elf_compress.c b/libelf/elf_compress.c
> index d7f53af2..7a6e37a4 100644
> --- a/libelf/elf_compress.c
> +++ b/libelf/elf_compress.c
> @@ -39,6 +39,10 @@
>  #include <string.h>
>  #include <zlib.h>
> +#ifdef USE_ZSTD
> +#include <zstd.h>
> +#endif
> +
>  /* Cleanup and return result.  Don't leak memory.  */
>  static void *
>  do_deflate_cleanup (void *result, z_stream *z, void *out_buf,

OK.

> @@ -54,53 +58,14 @@ do_deflate_cleanup (void *result, z_stream *z, void *out_buf,
>  #define deflate_cleanup(result, cdata) \
>      do_deflate_cleanup(result, &z, out_buf, cdata)
> -/* Given a section, uses the (in-memory) Elf_Data to extract the
> -   original data size (including the given header size) and data
> -   alignment.  Returns a buffer that has at least hsize bytes (for the
> -   caller to fill in with a header) plus zlib compressed date.  Also
> -   returns the new buffer size in new_size (hsize + compressed data
> -   size).  Returns (void *) -1 when FORCE is false and the compressed
> -   data would be bigger than the original data.  */
>  void *
>  internal_function
> -__libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
> -		   size_t *orig_size, size_t *orig_addralign,
> -		   size_t *new_size, bool force)
> +__libelf_compress_zlib (Elf_Scn *scn, size_t hsize, int ei_data,
> +			size_t *orig_size, size_t *orig_addralign,
> +			size_t *new_size, bool force,
> +			Elf_Data *data, Elf_Data *next_data,
> +			void *out_buf, size_t out_size, size_t block)
>  {
> -  /* The compressed data is the on-disk data.  We simplify the
> -     implementation a bit by asking for the (converted) in-memory
> -     data (which might be all there is if the user created it with
> -     elf_newdata) and then convert back to raw if needed before
> -     compressing.  Should be made a bit more clever to directly
> -     use raw if that is directly available.  */
> -  Elf_Data *data = elf_getdata (scn, NULL);
> -  if (data == NULL)
> -    return NULL;
> -
> -  /* When not forced and we immediately know we would use more data by
> -     compressing, because of the header plus zlib overhead (five bytes
> -     per 16 KB block, plus a one-time overhead of six bytes for the
> -     entire stream), don't do anything.  */
> -  Elf_Data *next_data = elf_getdata (scn, data);
> -  if (next_data == NULL && !force
> -      && data->d_size <= hsize + 5 + 6)
> -    return (void *) -1;
> -
> -  *orig_addralign = data->d_align;
> -  *orig_size = data->d_size;
> -
> -  /* Guess an output block size. 1/8th of the original Elf_Data plus
> -     hsize.  Make the first chunk twice that size (25%), then increase
> -     by a block (12.5%) when necessary.  */
> -  size_t block = (data->d_size / 8) + hsize;
> -  size_t out_size = 2 * block;
> -  void *out_buf = malloc (out_size);
> -  if (out_buf == NULL)
> -    {
> -      __libelf_seterrno (ELF_E_NOMEM);
> -      return NULL;
> -    }
> -
>    /* Caller gets to fill in the header at the start.  Just skip it here.  */
>    size_t used = hsize;

OK, all this is moved lower and then calls either
__libelf_compress_zlib or __libelf_compress_zstd.

> @@ -205,9 +170,186 @@ __libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
>    return out_buf;
>  }
> +#ifdef USE_ZSTD
> +/* Cleanup and return result.  Don't leak memory.  */
> +static void *
> +do_zstd_cleanup (void *result, ZSTD_CCtx * const cctx, void *out_buf,
> +		 Elf_Data *cdatap)
> +{
> +  ZSTD_freeCCtx (cctx);
> +  free (out_buf);
> +  if (cdatap != NULL)
> +    free (cdatap->d_buf);
> +  return result;
> +}
>
> +#define zstd_cleanup(result, cdata) \
> +    do_zstd_cleanup(result, cctx, out_buf, cdata)
> +

OK, mimics do_deflate_cleanup.

>  void *
>  internal_function
> -__libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
> +__libelf_compress_zstd (Elf_Scn *scn, size_t hsize, int ei_data,
> +			size_t *orig_size, size_t *orig_addralign,
> +			size_t *new_size, bool force,
> +			Elf_Data *data, Elf_Data *next_data,
> +			void *out_buf, size_t out_size, size_t block)
> +{
> +  /* Caller gets to fill in the header at the start.  Just skip it here.  */
> +  size_t used = hsize;
> +
> +  ZSTD_CCtx* const cctx = ZSTD_createCCtx();
> +  Elf_Data cdata;
> +  cdata.d_buf = NULL;
> +
> +  /* Loop over data buffers.  */
> +  ZSTD_EndDirective mode = ZSTD_e_continue;
> +
> +  do
> +    {
> +      /* Convert to raw if different endianness.  */
> +      cdata = *data;
> +      bool convert = ei_data != MY_ELFDATA && data->d_size > 0;
> +      if (convert)
> +	{
> +	  /* Don't do this conversion in place, we might want to keep
> +	     the original data around, caller decides.  */
> +	  cdata.d_buf = malloc (data->d_size);
> +	  if (cdata.d_buf == NULL)
> +	    {
> +	      __libelf_seterrno (ELF_E_NOMEM);
> +	      return zstd_cleanup (NULL, NULL);
> +	    }
> +	  if (gelf_xlatetof (scn->elf, &cdata, data, ei_data) == NULL)
> +	    return zstd_cleanup (NULL, &cdata);
> +	}
> +
> +      ZSTD_inBuffer ib = { cdata.d_buf, cdata.d_size, 0 };
> +
> +      /* Get next buffer to see if this is the last one.  */
> +      data = next_data;
> +      if (data != NULL)
> +	{
> +	  *orig_addralign = MAX (*orig_addralign, data->d_align);
> +	  *orig_size += data->d_size;
> +	  next_data = elf_getdata (scn, data);
> +	}
> +      else
> +	mode = ZSTD_e_end;
> +
> +      /* Flush one data buffer.  */
> +      for (;;)
> +	{
> +	  ZSTD_outBuffer ob = { out_buf + used, out_size - used, 0 };
> +	  size_t ret = ZSTD_compressStream2 (cctx, &ob, &ib, mode);
> +	  if (ZSTD_isError (ret))
> +	    {
> +	      __libelf_seterrno (ELF_E_COMPRESS_ERROR);
> +	      return zstd_cleanup (NULL, convert ? &cdata : NULL);
> +	    }
> +	  used += ob.pos;
> +
> +	  /* Bail out if we are sure the user doesn't want the
> +	     compression forced and we are using more compressed data
> +	     than original data.  */
> +	  if (!force && mode == ZSTD_e_end && used >= *orig_size)
> +	    return zstd_cleanup ((void *) -1, convert ? &cdata : NULL);
> +
> +	  if (ret > 0)
> +	    {
> +	      void *bigger = realloc (out_buf, out_size + block);
> +	      if (bigger == NULL)
> +		{
> +		  __libelf_seterrno (ELF_E_NOMEM);
> +		  return zstd_cleanup (NULL, convert ? &cdata : NULL);
> +		}

OK, zstd_cleanup does also free out_buf.

> +	      out_buf = bigger;
> +	      out_size += block;
> +	    }
> +	  else
> +	    break;
> +	}
> +
> +      if (convert)
> +	{
> +	  free (cdata.d_buf);
> +	  cdata.d_buf = NULL;
> +	}
> +    }
> +  while (mode != ZSTD_e_end); /* More data blocks.  */
> +
> +  ZSTD_freeCCtx (cctx);
> +  *new_size = used;
> +  return out_buf;
> +}
> +#endif

Look good.

> +/* Given a section, uses the (in-memory) Elf_Data to extract the
> +   original data size (including the given header size) and data
> +   alignment.  Returns a buffer that has at least hsize bytes (for the
> +   caller to fill in with a header) plus zlib compressed date.  Also
> +   returns the new buffer size in new_size (hsize + compressed data
> +   size).  Returns (void *) -1 when FORCE is false and the compressed
> +   data would be bigger than the original data.  */
> +void *
> +internal_function
> +__libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
> +		   size_t *orig_size, size_t *orig_addralign,
> +		   size_t *new_size, bool force, bool use_zstd)
> +{
> +  /* The compressed data is the on-disk data.  We simplify the
> +     implementation a bit by asking for the (converted) in-memory
> +     data (which might be all there is if the user created it with
> +     elf_newdata) and then convert back to raw if needed before
> +     compressing.  Should be made a bit more clever to directly
> +     use raw if that is directly available.  */
> +  Elf_Data *data = elf_getdata (scn, NULL);
> +  if (data == NULL)
> +    return NULL;
> +
> +  /* When not forced and we immediately know we would use more data by
> +     compressing, because of the header plus zlib overhead (five bytes
> +     per 16 KB block, plus a one-time overhead of six bytes for the
> +     entire stream), don't do anything.
> +     Size estimation for ZSTD compression would be similar.  */
> +  Elf_Data *next_data = elf_getdata (scn, data);
> +  if (next_data == NULL && !force
> +      && data->d_size <= hsize + 5 + 6)
> +    return (void *) -1;
> +
> +  *orig_addralign = data->d_align;
> +  *orig_size = data->d_size;
> +
> +  /* Guess an output block size. 1/8th of the original Elf_Data plus
> +     hsize.  Make the first chunk twice that size (25%), then increase
> +     by a block (12.5%) when necessary.  */
> +  size_t block = (data->d_size / 8) + hsize;
> +  size_t out_size = 2 * block;
> +  void *out_buf = malloc (out_size);
> +  if (out_buf == NULL)
> +    {
> +      __libelf_seterrno (ELF_E_NOMEM);
> +      return NULL;
> +    }

OK, this is all just moved from earlier with an extra use_zstd flag.

> +  if (use_zstd)
> +#ifdef USE_ZSTD
> +    return __libelf_compress_zstd (scn, hsize, ei_data, orig_size,
> +				   orig_addralign, new_size, force,
> +				   data, next_data, out_buf, out_size,
> +				   block);
> +#else
> +    return NULL;

Should this also call __libelf_seterrno (ELF_E_UNKNOWN_COMPRESSION_TYPE) ?

> +#endif
> +  else
> +    return __libelf_compress_zlib (scn, hsize, ei_data, orig_size,
> +				   orig_addralign, new_size, force,
> +				   data, next_data, out_buf, out_size,
> +				   block);
> +}
> +
> +void *
> +internal_function
> +__libelf_decompress_zlib (void *buf_in, size_t size_in, size_t size_out)
>  {
>    /* Catch highly unlikely compression ratios so we don't allocate
>       some giant amount of memory for nothing. The max compression
> @@ -218,7 +360,7 @@ __libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
>        return NULL;
>      }
> -  /* Malloc might return NULL when requestion zero size.  This is highly
> +  /* Malloc might return NULL when requesting zero size.  This is highly
>       unlikely, it would only happen when the compression was forced.
>       But we do need a non-NULL buffer to return and set as result.
>       Just make sure to always allocate at least 1 byte.  */

OK, typo fix.

> @@ -260,6 +402,51 @@ __libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
>    return buf_out;
>  }
> +#ifdef USE_ZSTD
> +void *
> +internal_function
> +__libelf_decompress_zstd (void *buf_in, size_t size_in, size_t size_out)
> +{
> +  /* Malloc might return NULL when requesting zero size.  This is highly
> +     unlikely, it would only happen when the compression was forced.
> +     But we do need a non-NULL buffer to return and set as result.
> +     Just make sure to always allocate at least 1 byte.  */
> +  void *buf_out = malloc (size_out ?: 1);
> +  if (unlikely (buf_out == NULL))
> +    {
> +      __libelf_seterrno (ELF_E_NOMEM);
> +      return NULL;
> +    }
> +
> +  size_t ret = ZSTD_decompress (buf_out, size_out, buf_in, size_in);
> +  if (ZSTD_isError (ret))
> +    {
> +      free (buf_out);
> +      __libelf_seterrno (ELF_E_DECOMPRESS_ERROR);
> +      return NULL;
> +    }
> +  else
> +    return buf_out;
> +}
> +#endif

OK.

> +void *
> +internal_function
> +__libelf_decompress (int chtype, void *buf_in, size_t size_in, size_t size_out)
> +{
> +  if (chtype == ELFCOMPRESS_ZLIB)
> +    return __libelf_decompress_zlib (buf_in, size_in, size_out);
> +  else
> +    {
> +#ifdef USE_ZSTD
> +    return __libelf_decompress_zstd (buf_in, size_in, size_out);
> +#else
> +    __libelf_seterrno (ELF_E_DECOMPRESS_ERROR);

Maybe ELF_E_UNKNOWN_COMPRESSION_TYPE ?

> +    return NULL;
> +#endif
> +    }
> +}
> +
>  void *
>  internal_function
>  __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
> @@ -268,7 +455,7 @@ __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
>    if (gelf_getchdr (scn, &chdr) == NULL)
>      return NULL;
> -  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
> +  if (chdr.ch_type != ELFCOMPRESS_ZLIB && chdr.ch_type != ELFCOMPRESS_ZSTD)
>      {
>        __libelf_seterrno (ELF_E_UNKNOWN_COMPRESSION_TYPE);
>        return NULL;

Should the chdr.ch_type != ELFCOMPRESS_ZSTD be guarded by #ifdef USE_ZSTD ?

> @@ -295,7 +482,9 @@ __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
>  		  ? sizeof (Elf32_Chdr) : sizeof (Elf64_Chdr));
>    size_t size_in = data->d_size - hsize;
>    void *buf_in = data->d_buf + hsize;
> -  void *buf_out = __libelf_decompress (buf_in, size_in, chdr.ch_size);
> +  void *buf_out
> +    = __libelf_decompress (chdr.ch_type, buf_in, size_in, chdr.ch_size);
> +
>    *size_out = chdr.ch_size;
>    *addralign = chdr.ch_addralign;
>    return buf_out;
> @@ -394,7 +583,7 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
>      }
>    int compressed = (sh_flags & SHF_COMPRESSED);
> -  if (type == ELFCOMPRESS_ZLIB)
> +  if (type == ELFCOMPRESS_ZLIB || type == ELFCOMPRESS_ZSTD)
>      {
>        /* Compress/Deflate.  */
>        if (compressed == 1)
> @@ -408,7 +597,8 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
>        size_t orig_size, orig_addralign, new_size;
>        void *out_buf = __libelf_compress (scn, hsize, elfdata,
>  					 &orig_size, &orig_addralign,
> -					 &new_size, force);
> +					 &new_size, force,
> +					 type == ELFCOMPRESS_ZSTD);
>        /* Compression would make section larger, don't change anything.  */
>        if (out_buf == (void *) -1)
> @@ -422,7 +612,7 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
>        if (elfclass == ELFCLASS32)
>  	{
>  	  Elf32_Chdr chdr;
> -	  chdr.ch_type = ELFCOMPRESS_ZLIB;
> +	  chdr.ch_type = type;
>  	  chdr.ch_size = orig_size;
>  	  chdr.ch_addralign = orig_addralign;
>  	  if (elfdata != MY_ELFDATA)
> @@ -436,7 +626,7 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
>        else
>  	{
>  	  Elf64_Chdr chdr;
> -	  chdr.ch_type = ELFCOMPRESS_ZLIB;
> +	  chdr.ch_type = type;
>  	  chdr.ch_reserved = 0;
>  	  chdr.ch_size = orig_size;
>  	  chdr.ch_addralign = sh_addralign;

OK.

> diff --git a/libelf/elf_compress_gnu.c b/libelf/elf_compress_gnu.c
> index 3d2977e7..8e20b30e 100644
> --- a/libelf/elf_compress_gnu.c
> +++ b/libelf/elf_compress_gnu.c
> @@ -103,7 +103,8 @@ elf_compress_gnu (Elf_Scn *scn, int inflate, unsigned int flags)
>        size_t orig_size, new_size, orig_addralign;
>        void *out_buf = __libelf_compress (scn, hsize, elfdata,
>  					 &orig_size, &orig_addralign,
> -					 &new_size, force);
> +					 &new_size, force,
> +					 /* use_zstd */ false);
>        /* Compression would make section larger, don't change anything.  */
>        if (out_buf == (void *) -1)

OK.

> @@ -178,7 +179,7 @@ elf_compress_gnu (Elf_Scn *scn, int inflate, unsigned int flags)
>        size_t size = gsize;
>        size_t size_in = data->d_size - hsize;
>        void *buf_in = data->d_buf + hsize;
> -      void *buf_out = __libelf_decompress (buf_in, size_in, size);
> +      void *buf_out = __libelf_decompress (ELFCOMPRESS_ZLIB, buf_in, size_in, size);
>        if (buf_out == NULL)
>  	return -1;

OK.

> diff --git a/libelf/libelfP.h b/libelf/libelfP.h
> index d88a613c..6624f38a 100644
> --- a/libelf/libelfP.h
> +++ b/libelf/libelfP.h
> @@ -574,10 +574,10 @@ extern uint32_t __libelf_crc32 (uint32_t crc, unsigned char *buf, size_t len)
>  extern void * __libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
>  				 size_t *orig_size, size_t *orig_addralign,
> -				 size_t *size, bool force)
> +				 size_t *size, bool force, bool use_zstd)
>       internal_function;
> -extern void * __libelf_decompress (void *buf_in, size_t size_in,
> +extern void * __libelf_decompress (int chtype, void *buf_in, size_t size_in,
>  				   size_t size_out) internal_function;
>  extern void * __libelf_decompress_elf (Elf_Scn *scn,
>  				       size_t *size_out, size_t *addralign)

OK.

> diff --git a/src/elfcompress.c b/src/elfcompress.c
> index eff765e8..b0f1677c 100644
> --- a/src/elfcompress.c
> +++ b/src/elfcompress.c
> @@ -55,9 +55,10 @@ enum ch_type
>    UNSET = -1,
>    NONE,
>    ZLIB,
> +  ZSTD,
>    /* Maximal supported ch_type.  */
> -  MAXIMAL_CH_TYPE = ZLIB,
> +  MAXIMAL_CH_TYPE = ZSTD,
>    ZLIB_GNU = 1 << 16
>  };

OK.

> @@ -139,6 +140,12 @@ parse_opt (int key, char *arg __attribute__ ((unused)),
>  	type = ZLIB;
>        else if (strcmp ("zlib-gnu", arg) == 0 || strcmp ("gnu", arg) == 0)
>  	type = ZLIB_GNU;
> +      else if (strcmp ("zstd", arg) == 0)
> +#ifdef USE_ZSTD
> +	type = ZSTD;
> +#else
> +	argp_error (state, N_("ZSTD support is not enabled"));
> +#endif
>        else
>  	argp_error (state, N_("unknown compression type '%s'"), arg);
>        break;

Nice error handling.

> @@ -281,6 +288,44 @@ get_sections (unsigned int *sections, size_t shnum)
>    return s;
>  }
> +/* Return compression type of a given section SHDR.  */
> +
> +static enum ch_type
> +get_section_chtype (Elf_Scn *scn, GElf_Shdr *shdr, const char *sname,
> +		    size_t ndx)
> +{
> +  enum ch_type chtype = UNSET;
> +  if ((shdr->sh_flags & SHF_COMPRESSED) != 0)
> +    {
> +      GElf_Chdr chdr;
> +      if (gelf_getchdr (scn, &chdr) != NULL)
> +	{
> +	  chtype = (enum ch_type)chdr.ch_type;
> +	  if (chtype == NONE)
> +	    {
> +	      error (0, 0, "Compression type for section %zd"
> +		     " can't be zero ", ndx);
> +	      chtype = UNSET;
> +	    }
> +	  else if (chtype > MAXIMAL_CH_TYPE)
> +	    {
> +	      error (0, 0, "Compression type (%d) for section %zd"
> +		     " is unsupported ", chtype, ndx);
> +	      chtype = UNSET;
> +	    }
> +	}
> +      else
> +	error (0, 0, "Couldn't get chdr for section %zd", ndx);

Shouldn't this error be fatal?

> +    }
> +  /* Set ZLIB_GNU compression manually for .zdebug* sections.  */
> +  else if (startswith (sname, ".zdebug"))
> +    chtype = ZLIB_GNU;
> +  else
> +    chtype = NONE;
> +
> +  return chtype;
> +}
> +
>  static int
>  process_file (const char *fname)
>  {
> @@ -461,26 +506,29 @@ process_file (const char *fname)
>        if (section_name_matches (sname))
>  	{
> -	  if (!force && type == NONE
> -	      && (shdr->sh_flags & SHF_COMPRESSED) == 0
> -	      && !startswith (sname, ".zdebug"))
> -	    {
> -	      if (verbose > 0)
> -		printf ("[%zd] %s already decompressed\n", ndx, sname);
> -	    }
> -	  else if (!force && type == ZLIB
> -		   && (shdr->sh_flags & SHF_COMPRESSED) != 0)
> -	    {
> -	      if (verbose > 0)
> -		printf ("[%zd] %s already compressed\n", ndx, sname);
> -	    }
> -	  else if (!force && type == ZLIB_GNU
> -		   && startswith (sname, ".zdebug"))
> +	  enum ch_type schtype = get_section_chtype (scn, shdr, sname, ndx);
> +	  if (!force && verbose > 0)
>  	    {
> -	      if (verbose > 0)
> -		printf ("[%zd] %s already GNU compressed\n", ndx, sname);
> +	      /* The current compression matches the final one.  */
> +	      if (type == schtype)
> +		switch (type)
> +		  {
> +		  case NONE:
> +		    printf ("[%zd] %s already decompressed\n", ndx, sname);
> +		    break;
> +		  case ZLIB:
> +		  case ZSTD:
> +		    printf ("[%zd] %s already compressed\n", ndx, sname);
> +		    break;
> +		  case ZLIB_GNU:
> +		    printf ("[%zd] %s already GNU compressed\n", ndx, sname);
> +		    break;
> +		  default:
> +		    abort ();
> +		  }
>  	    }
> -	  else if (shdr->sh_type != SHT_NOBITS
> +
> +	  if (shdr->sh_type != SHT_NOBITS
>  	      && (shdr->sh_flags & SHF_ALLOC) == 0)
>  	    {
>  	      set_section (sections, ndx);
> @@ -692,37 +740,12 @@ process_file (const char *fname)
>  	     (de)compressed, invalidating the string pointers.  */
>  	  sname = xstrdup (sname);
> +
>  	  /* Detect source compression that is how is the section compressed
>  	     now.  */
> -	  GElf_Chdr chdr;
> -	  enum ch_type schtype = NONE;
> -	  if ((shdr->sh_flags & SHF_COMPRESSED) != 0)
> -	    {
> -	      if (gelf_getchdr (scn, &chdr) != NULL)
> -		{
> -		  schtype = (enum ch_type)chdr.ch_type;
> -		  if (schtype == NONE)
> -		    {
> -		      error (0, 0, "Compression type for section %zd"
> -			     " can't be zero ", ndx);
> -		      goto cleanup;
> -		    }
> -		  else if (schtype > MAXIMAL_CH_TYPE)
> -		    {
> -		      error (0, 0, "Compression type (%d) for section %zd"
> -			     " is unsupported ", schtype, ndx);
> -		      goto cleanup;
> -		    }
> -		}
> -	      else
> -		{
> -		  error (0, 0, "Couldn't get chdr for section %zd", ndx);
> -		  goto cleanup;
> -		}
> -	    }
> -	  /* Set ZLIB compression manually for .zdebug* sections.  */
> -	  else if (startswith (sname, ".zdebug"))
> -	    schtype = ZLIB_GNU;
> +	  enum ch_type schtype = get_section_chtype (scn, shdr, sname, ndx);
> +	  if (schtype == UNSET)
> +	    goto cleanup;
>  	  /* We might want to decompress (and rename), but not
>  	     compress during this pass since we might need the section
> @@ -754,7 +777,7 @@ process_file (const char *fname)
>  	    case ZLIB_GNU:
>  	      if (startswith (sname, ".debug"))
>  		{
> -		  if (schtype == ZLIB)
> +		  if (schtype == ZLIB || schtype == ZSTD)
>  		    {
>  		      /* First decompress to recompress GNU style.
>  			 Don't report even when verbose.  */

OK.

> @@ -818,19 +841,22 @@ process_file (const char *fname)
>  	      break;
>  	    case ZLIB:
> -	      if ((shdr->sh_flags & SHF_COMPRESSED) == 0)
> +	    case ZSTD:
> +	      if (schtype != type)
>  		{
> -		  if (schtype == ZLIB_GNU)
> +		  if (schtype != NONE)
>  		    {
> -		      /* First decompress to recompress zlib style.
> -			 Don't report even when verbose.  */
> +		      /* Decompress first.  */
>  		      if (compress_section (scn, size, sname, NULL, ndx,
>  					    schtype, NONE, false) < 0)
>  			goto cleanup;
> -		      snamebuf[0] = '.';
> -		      strcpy (&snamebuf[1], &sname[2]);
> -		      newname = snamebuf;
> +		      if (schtype == ZLIB_GNU)
> +			{
> +			  snamebuf[0] = '.';
> +			  strcpy (&snamebuf[1], &sname[2]);
> +			  newname = snamebuf;
> +			}
>  		    }
>  		  if (skip_compress_section)

OK.

> @@ -838,7 +864,7 @@ process_file (const char *fname)
>  		      if (ndx == shdrstrndx)
>  			{
>  			  shstrtab_size = size;
> -			  shstrtab_compressed = ZLIB;
> +			  shstrtab_compressed = type;
>  			  if (shstrtab_name != NULL
>  			      || shstrtab_newname != NULL)
>  			    {
> @@ -855,7 +881,7 @@ process_file (const char *fname)
>  		      else
>  			{
>  			  symtab_size = size;
> -			  symtab_compressed = ZLIB;
> +			  symtab_compressed = type;
>  			  symtab_name = xstrdup (sname);
>  			  symtab_newname = (newname == NULL
>  					    ? NULL : xstrdup (newname));

OK.

> @@ -1378,7 +1404,7 @@ main (int argc, char **argv)
>  	N_("Place (de)compressed output into FILE"),
>  	0 },
>        { "type", 't', "TYPE", 0,
> -	N_("What type of compression to apply. TYPE can be 'none' (decompress), 'zlib' (ELF ZLIB compression, the default, 'zlib-gabi' is an alias) or 'zlib-gnu' (.zdebug GNU style compression, 'gnu' is an alias)"),
> +	N_("What type of compression to apply. TYPE can be 'none' (decompress), 'zlib' (ELF ZLIB compression, the default, 'zlib-gabi' is an alias), 'zlib-gnu' (.zdebug GNU style compression, 'gnu' is an alias) or 'zstd'"),
>  	0 },
>        { "name", 'n', "SECTION", 0,
>  	N_("SECTION name to (de)compress, SECTION is an extended wildcard pattern (defaults to '.?(z)debug*')"),

I would say or 'zstd' (ELF ZSTD compression)" to match the 'zlib; type
description.

> diff --git a/src/readelf.c b/src/readelf.c
> index cc3e0229..451f8400 100644
> --- a/src/readelf.c
> +++ b/src/readelf.c
> @@ -1238,13 +1238,17 @@ get_visibility_type (int value)
>  static const char *
>  elf_ch_type_name (unsigned int code)
>  {
> -  if (code == 0)
> -    return "NONE";
> -
> -  if (code == ELFCOMPRESS_ZLIB)
> -    return "ZLIB";
> -
> -  return "UNKNOWN";
> +  switch (code)
> +    {
> +    case 0:
> +      return "NONE";
> +    case ELFCOMPRESS_ZLIB:
> +      return "ZLIB";
> +    case ELFCOMPRESS_ZSTD:
> +      return "ZSTD";
> +    default:
> +      return "UNKNOWN";
> +    }
>  }
>  /* Print the section headers.  */

OK.

> diff --git a/tests/run-compress-test.sh b/tests/run-compress-test.sh
> index a6a298f5..3f9c990e 100755
> --- a/tests/run-compress-test.sh
> +++ b/tests/run-compress-test.sh
> @@ -61,6 +61,30 @@ testrun_elfcompress_file()
>      echo "uncompress $elfcompressedfile -> $elfuncompressedfile"
>      testrun ${abs_top_builddir}/src/elfcompress -v -t none -o ${elfuncompressedfile} ${elfcompressedfile}
>      testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${elfuncompressedfile}
> +
> +    outputfile="${infile}.gabi.zstd"
> +    tempfiles "$outputfile"
> +    echo "zstd compress $elfcompressedfile -> $outputfile"
> +    testrun ${abs_top_builddir}/src/elfcompress -v -t zstd -o ${outputfile} ${elfcompressedfile}
> +    testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${outputfile}
> +    echo "checking compressed section header" $outputfile
> +    testrun ${abs_top_builddir}/src/readelf -Sz ${outputfile} | grep "ELF ZSTD" >/dev/null
> +
> +    zstdfile="${infile}.zstd"
> +    tempfiles "$zstdfile"
> +    echo "zstd compress $uncompressedfile -> $zstdfile"
> +    testrun ${abs_top_builddir}/src/elfcompress -v -t zstd -o ${zstdfile} ${elfuncompressedfile}
> +    testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${zstdfile}
> +    echo "checking compressed section header" $zstdfile
> +    testrun ${abs_top_builddir}/src/readelf -Sz ${zstdfile} | grep "ELF ZSTD" >/dev/null
> +
> +    zstdgnufile="${infile}.zstd.gnu"
> +    tempfiles "$zstdgnufile"
> +    echo "zstd re-compress to GNU ZLIB $zstdfile -> $zstdgnufile"
> +    testrun ${abs_top_builddir}/src/elfcompress -v -t zlib-gnu -o ${zstdgnufile} ${zstdfile}
> +    testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${zstdgnufile}
> +    echo "checking .zdebug section name" $zstdgnufile
> +    testrun ${abs_top_builddir}/src/readelf -S ${zstdgnufile} | grep ".zdebug" >/dev/null
>  }
>  testrun_elfcompress()

You might add these to a separate run test file or pass some ZSTD flag
through the test environment to conditionally run these new tests.

Cheers,

Mark

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] support ZSTD compression algorithm
  2022-12-15 13:17                   ` Mark Wielaard
@ 2022-12-19 14:19                     ` Martin Liška
  2022-12-19 15:09                       ` Mark Wielaard
  0 siblings, 1 reply; 25+ messages in thread
From: Martin Liška @ 2022-12-19 14:19 UTC (permalink / raw)
  To: Mark Wielaard, elfutils-devel

On 12/15/22 14:17, Mark Wielaard wrote:
> Hi Martin,
> 
> On Tue, 2022-11-29 at 13:05 +0100, Martin Liška wrote:
>> There's second version of the patch that fully support both
>> compression and decompression.
>>
>> Changes from the v1:
>> - compression support added
>> - zstd detection is fixed
>> - new tests are added
>> - builds fine w/ and w/o the ZSTD library
>>
>> What's currently missing and where I need a help:
>>
>> 1) When I build ./configure --without-zstd, I don't have
>> a reasonable error message (something similar to binutils's readelf:
>> readelf: Warning: section '.debug_str' has unsupported compress type:
>> 2)
>> even though, __libelf_decompress returns NULL and __libelf_seterrno).
>> One can see a garbage in the console.
>>
>> How to handle that properly?
> 
> Is there a particular way you are running eu-readelf? Is it with
> generic -w or -a, or decoding a specific section type?

Hello.

$ LD_LIBRARY_PATH=./libelf ./src/readelf -w ~/Programming/testcases/a.out

where I get:

./src/readelf: cannot get debug context descriptor: No DWARF information found

DWARF section [37] '.debug_info' at offset 0x1ab2:
  [Offset]
./src/readelf: cannot get next unit: no error

Call frame information section [13] '.eh_frame' at offset 0x4a8:
...
                                                                                                                                                t��o5��=I�iAp@a����S^R/<�����^�qi�ַp@
                                                                                                                                                                                    E���Z
�O��Š�w��/#�!���7�6�#�I����*���굮R�v妗�/Z���O����Oի���E��z�����K��(��9���:{ѧ�zOa;̥�O޾�����+O�̰҆ˑ}��C��ۋ_�k9��
                                                                                                           Jv�>;)`F��	:!�-�ˏQ@�L,
V��6cI�Ъ^�6z��6�4�Fz���n���}~�U��R�])��zF��#�V��E�eȹ/�
                                                       Z�A!DJP%"�H�$i� Bc3�"
  *** error, missing string terminator

So basically a garbage. And I don't know how to bail out properly?

> 
>> 2) How should I run my newly added tests conditionally if zstd
>> configuration support is enabled?
> 
> eu_ZIP will define an AM_CONDITIONAL for ZSTD (note this is different
> from the HAVE_ZSTD, which is the am conditional added for having the
> zstd program itself). You could use it in tests/Makefile.am as:
> 
>    if ZSTD
>    TESTS += run-zstd-compress-test.sh
>    endif
> 
> If you had a separate test... Otherwise add some variable to
> TESTS_ENVIRONMENT (and installed_TESTS_ENVIRONMENT) based on the Make
> variable that is tested inside the shell script (somewhat like
> USE_VALGRIND) maybe.

All right, I have a working version where I utilize an env. variable.

> 
>>    configure.ac               |   8 +-
>>    libelf/Makefile.am         |   2 +-
>>    libelf/elf_compress.c      | 294 ++++++++++++++++++++++++++++++--
>> -----
>>    libelf/elf_compress_gnu.c  |   5 +-
>>    libelf/libelfP.h           |   4 +-
>>    src/elfcompress.c          | 144 ++++++++++--------
>>    src/readelf.c              |  18 ++-
>>    tests/run-compress-test.sh |  24 +++
>>    8 files changed, 373 insertions(+), 126 deletions(-)
>>
>> diff --git a/configure.ac b/configure.ac
>> index 59be27ac..07cfa54b 100644
>> --- a/configure.ac
>> +++ b/configure.ac
>> @@ -410,6 +410,11 @@ dnl Test for bzlib and xz/lzma/zstd, gives BZLIB/LZMALIB/ZSTD .am
>>    dnl conditional and config.h USE_BZLIB/USE_LZMALIB/USE_ZSTD #define.
>>    save_LIBS="$LIBS"
>>    LIBS=
>> +eu_ZIPLIB(zstd,ZSTD,zstd,ZSTD_decompress,[ZSTD (zst)])
>> +AS_IF([test "x$with_zstd" = xyes], [LIBZSTD="libzstd"], [LIBLZSTD=""])
>> +AC_SUBST([LIBZSTD])
>> +zstd_LIBS="$LIBS"
>> +AC_SUBST([zstd_LIBS])
>>    eu_ZIPLIB(bzlib,BZLIB,bz2,BZ2_bzdopen,bzip2)
>>    # We need this since bzip2 doesn't have a pkgconfig file.
>>    BZ2_LIB="$LIBS"
>> @@ -417,9 +422,6 @@ AC_SUBST([BZ2_LIB])
>>    eu_ZIPLIB(lzma,LZMA,lzma,lzma_auto_decoder,[LZMA (xz)])
>>    AS_IF([test "x$with_lzma" = xyes], [LIBLZMA="liblzma"], [LIBLZMA=""])
>>    AC_SUBST([LIBLZMA])
>> -eu_ZIPLIB(zstd,ZSTD,zstd,ZSTD_decompress,[ZSTD (zst)])
>> -AS_IF([test "x$with_zstd" = xyes], [LIBZSTD="libzstd"], [LIBLZSTD=""])
>> -AC_SUBST([LIBZSTD])
>>    zip_LIBS="$LIBS"
>>    LIBS="$save_LIBS"
>>    AC_SUBST([zip_LIBS])
> 
> Doing AC_SUBST([zstd_LIBS]) seems correct. Why is the test moved
> earlier?
> 
> You are testing for ZSTD_decompress, is that enough? Asking because I
> see you are using ZSTD_compressStream2, which seems to requires libzstd
> v1.4.0+.
> 
> In general how stable is the libzstd api?

It's hopefully stable, but yes, I should check for ZSTD_compressStream2 in configure.ac.
I'm going to update that.

> 
> You'll also need to use the AC_SUBST for LIBZSTD in config/libelf.pc.in
> now, as used in the libdw.pc.in:
> 
>    Requires.private: zlib @LIBZSTD@

Oh, thanks.

Cheers,
Martin

> 
>> diff --git a/libelf/Makefile.am b/libelf/Makefile.am
>> index 560ed45f..24c25cf8 100644
>> --- a/libelf/Makefile.am
>> +++ b/libelf/Makefile.am
>> @@ -106,7 +106,7 @@ libelf_pic_a_SOURCES =
>>    am_libelf_pic_a_OBJECTS = $(libelf_a_SOURCES:.c=.os)
>>    
>>    libelf_so_DEPS = ../lib/libeu.a
>> -libelf_so_LDLIBS = $(libelf_so_DEPS) -lz
>> +libelf_so_LDLIBS = $(libelf_so_DEPS) -lz $(zstd_LIBS)
>>    if USE_LOCKS
>>    libelf_so_LDLIBS += -lpthread
>>    endif
> 
> OK.
> 
> Haven't read the actual code yet. I'll get back to that later today.
> 
> Cheers,
> 
> Mark


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] support ZSTD compression algorithm
  2022-12-16  0:32                   ` [PATCHv2] support ZSTD compression algorithm Mark Wielaard
@ 2022-12-19 14:21                     ` Martin Liška
  2022-12-19 15:16                       ` Mark Wielaard
  0 siblings, 1 reply; 25+ messages in thread
From: Martin Liška @ 2022-12-19 14:21 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: elfutils-devel

Hi.

>> +  if (use_zstd)
>> +#ifdef USE_ZSTD
>> +    return __libelf_compress_zstd (scn, hsize, ei_data, orig_size,
>> +				   orig_addralign, new_size, force,
>> +				   data, next_data, out_buf, out_size,
>> +				   block);
>> +#else
>> +    return NULL;
> 
> Should this also call __libelf_seterrno (ELF_E_UNKNOWN_COMPRESSION_TYPE) ?

Yes, will fix that.

> 
>> +#endif
>> +  else
>> +    return __libelf_compress_zlib (scn, hsize, ei_data, orig_size,
>> +				   orig_addralign, new_size, force,
>> +				   data, next_data, out_buf, out_size,
>> +				   block);
>> +}
>> +
>> +void *
>> +internal_function
>> +__libelf_decompress_zlib (void *buf_in, size_t size_in, size_t size_out)
>>   {
>>     /* Catch highly unlikely compression ratios so we don't allocate
>>        some giant amount of memory for nothing. The max compression
>> @@ -218,7 +360,7 @@ __libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
>>         return NULL;
>>       }
>> -  /* Malloc might return NULL when requestion zero size.  This is highly
>> +  /* Malloc might return NULL when requesting zero size.  This is highly
>>        unlikely, it would only happen when the compression was forced.
>>        But we do need a non-NULL buffer to return and set as result.
>>        Just make sure to always allocate at least 1 byte.  */
> 
> OK, typo fix.
> 
>> @@ -260,6 +402,51 @@ __libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
>>     return buf_out;
>>   }
>> +#ifdef USE_ZSTD
>> +void *
>> +internal_function
>> +__libelf_decompress_zstd (void *buf_in, size_t size_in, size_t size_out)
>> +{
>> +  /* Malloc might return NULL when requesting zero size.  This is highly
>> +     unlikely, it would only happen when the compression was forced.
>> +     But we do need a non-NULL buffer to return and set as result.
>> +     Just make sure to always allocate at least 1 byte.  */
>> +  void *buf_out = malloc (size_out ?: 1);
>> +  if (unlikely (buf_out == NULL))
>> +    {
>> +      __libelf_seterrno (ELF_E_NOMEM);
>> +      return NULL;
>> +    }
>> +
>> +  size_t ret = ZSTD_decompress (buf_out, size_out, buf_in, size_in);
>> +  if (ZSTD_isError (ret))
>> +    {
>> +      free (buf_out);
>> +      __libelf_seterrno (ELF_E_DECOMPRESS_ERROR);
>> +      return NULL;
>> +    }
>> +  else
>> +    return buf_out;
>> +}
>> +#endif
> 
> OK.
> 
>> +void *
>> +internal_function
>> +__libelf_decompress (int chtype, void *buf_in, size_t size_in, size_t size_out)
>> +{
>> +  if (chtype == ELFCOMPRESS_ZLIB)
>> +    return __libelf_decompress_zlib (buf_in, size_in, size_out);
>> +  else
>> +    {
>> +#ifdef USE_ZSTD
>> +    return __libelf_decompress_zstd (buf_in, size_in, size_out);
>> +#else
>> +    __libelf_seterrno (ELF_E_DECOMPRESS_ERROR);
> 
> Maybe ELF_E_UNKNOWN_COMPRESSION_TYPE ?

Yes, will fix that.

> 
>> +    return NULL;
>> +#endif
>> +    }
>> +}
>> +
>>   void *
>>   internal_function
>>   __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
>> @@ -268,7 +455,7 @@ __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
>>     if (gelf_getchdr (scn, &chdr) == NULL)
>>       return NULL;
>> -  if (chdr.ch_type != ELFCOMPRESS_ZLIB)
>> +  if (chdr.ch_type != ELFCOMPRESS_ZLIB && chdr.ch_type != ELFCOMPRESS_ZSTD)
>>       {
>>         __libelf_seterrno (ELF_E_UNKNOWN_COMPRESSION_TYPE);
>>         return NULL;
> 
> Should the chdr.ch_type != ELFCOMPRESS_ZSTD be guarded by #ifdef USE_ZSTD ?

Yes, will fix that.
>> +      else
>> +	error (0, 0, "Couldn't get chdr for section %zd", ndx);
> 
> Shouldn't this error be fatal?

What do you use for fatal errors?
> 
>> @@ -1378,7 +1404,7 @@ main (int argc, char **argv)
>>   	N_("Place (de)compressed output into FILE"),
>>   	0 },
>>         { "type", 't', "TYPE", 0,
>> -	N_("What type of compression to apply. TYPE can be 'none' (decompress), 'zlib' (ELF ZLIB compression, the default, 'zlib-gabi' is an alias) or 'zlib-gnu' (.zdebug GNU style compression, 'gnu' is an alias)"),
>> +	N_("What type of compression to apply. TYPE can be 'none' (decompress), 'zlib' (ELF ZLIB compression, the default, 'zlib-gabi' is an alias), 'zlib-gnu' (.zdebug GNU style compression, 'gnu' is an alias) or 'zstd'"),
>>   	0 },
>>         { "name", 'n', "SECTION", 0,
>>   	N_("SECTION name to (de)compress, SECTION is an extended wildcard pattern (defaults to '.?(z)debug*')"),
> 
> I would say or 'zstd' (ELF ZSTD compression)" to match the 'zlib; type
> description.

Works for me.

Cheers,
Martin

> 
>> diff --git a/src/readelf.c b/src/readelf.c
>> index cc3e0229..451f8400 100644
>> --- a/src/readelf.c
>> +++ b/src/readelf.c
>> @@ -1238,13 +1238,17 @@ get_visibility_type (int value)
>>   static const char *
>>   elf_ch_type_name (unsigned int code)
>>   {
>> -  if (code == 0)
>> -    return "NONE";
>> -
>> -  if (code == ELFCOMPRESS_ZLIB)
>> -    return "ZLIB";
>> -
>> -  return "UNKNOWN";
>> +  switch (code)
>> +    {
>> +    case 0:
>> +      return "NONE";
>> +    case ELFCOMPRESS_ZLIB:
>> +      return "ZLIB";
>> +    case ELFCOMPRESS_ZSTD:
>> +      return "ZSTD";
>> +    default:
>> +      return "UNKNOWN";
>> +    }
>>   }
>>   /* Print the section headers.  */
> 
> OK.
> 
>> diff --git a/tests/run-compress-test.sh b/tests/run-compress-test.sh
>> index a6a298f5..3f9c990e 100755
>> --- a/tests/run-compress-test.sh
>> +++ b/tests/run-compress-test.sh
>> @@ -61,6 +61,30 @@ testrun_elfcompress_file()
>>       echo "uncompress $elfcompressedfile -> $elfuncompressedfile"
>>       testrun ${abs_top_builddir}/src/elfcompress -v -t none -o ${elfuncompressedfile} ${elfcompressedfile}
>>       testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${elfuncompressedfile}
>> +
>> +    outputfile="${infile}.gabi.zstd"
>> +    tempfiles "$outputfile"
>> +    echo "zstd compress $elfcompressedfile -> $outputfile"
>> +    testrun ${abs_top_builddir}/src/elfcompress -v -t zstd -o ${outputfile} ${elfcompressedfile}
>> +    testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${outputfile}
>> +    echo "checking compressed section header" $outputfile
>> +    testrun ${abs_top_builddir}/src/readelf -Sz ${outputfile} | grep "ELF ZSTD" >/dev/null
>> +
>> +    zstdfile="${infile}.zstd"
>> +    tempfiles "$zstdfile"
>> +    echo "zstd compress $uncompressedfile -> $zstdfile"
>> +    testrun ${abs_top_builddir}/src/elfcompress -v -t zstd -o ${zstdfile} ${elfuncompressedfile}
>> +    testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${zstdfile}
>> +    echo "checking compressed section header" $zstdfile
>> +    testrun ${abs_top_builddir}/src/readelf -Sz ${zstdfile} | grep "ELF ZSTD" >/dev/null
>> +
>> +    zstdgnufile="${infile}.zstd.gnu"
>> +    tempfiles "$zstdgnufile"
>> +    echo "zstd re-compress to GNU ZLIB $zstdfile -> $zstdgnufile"
>> +    testrun ${abs_top_builddir}/src/elfcompress -v -t zlib-gnu -o ${zstdgnufile} ${zstdfile}
>> +    testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${zstdgnufile}
>> +    echo "checking .zdebug section name" $zstdgnufile
>> +    testrun ${abs_top_builddir}/src/readelf -S ${zstdgnufile} | grep ".zdebug" >/dev/null
>>   }
>>   testrun_elfcompress()
> 
> You might add these to a separate run test file or pass some ZSTD flag
> through the test environment to conditionally run these new tests.
> 
> Cheers,
> 
> Mark


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] support ZSTD compression algorithm
  2022-12-19 14:19                     ` Martin Liška
@ 2022-12-19 15:09                       ` Mark Wielaard
  2022-12-21 11:13                         ` Martin Liška
  0 siblings, 1 reply; 25+ messages in thread
From: Mark Wielaard @ 2022-12-19 15:09 UTC (permalink / raw)
  To: Martin Liška, elfutils-devel

Hi Martin,

On Mon, 2022-12-19 at 15:19 +0100, Martin Liška wrote:
> On 12/15/22 14:17, Mark Wielaard wrote:
> > Is there a particular way you are running eu-readelf? Is it with
> > generic -w or -a, or decoding a specific section type?
> 
> Hello.
> 
> $ LD_LIBRARY_PATH=./libelf ./src/readelf -w
> ~/Programming/testcases/a.out
> 
> where I get:
> 
> ./src/readelf: cannot get debug context descriptor: No DWARF
> information found
> 
> DWARF section [37] '.debug_info' at offset 0x1ab2:
>   [Offset]
> ./src/readelf: cannot get next unit: no error
> 
> Call frame information section [13] '.eh_frame' at offset 0x4a8:
> ...
>                                                                      
>                                                                      
>       t��o5��=I�iAp@a����S^R/<�����^�qi�ַp@

[...]

> So basically a garbage. And I don't know how to bail out properly?

Aha. If you have that a.out somewhere I can take a look. I suspect this
is because we expect all .debug sections to have been decompressed in
libdw/dwarf_begin_elf.c, but that isn't really true, see check_section
in that file which has:

  if ((shdr->sh_flags & SHF_COMPRESSED) != 0)
    {
      if (elf_compress (scn, 0, 0) < 0)
        {
          /* It would be nice if we could fail with a specific error.
             But we don't know if this was an essential section or not.
             So just continue for now. See also valid_p().  */
          return result;
        }
    }

We should probably adjust valid_p so it produces a more appropriate
error message and/or add additional checks in readlelf.c.

But lets do that after this patch goes in.

Cheers,

Mark

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] support ZSTD compression algorithm
  2022-12-19 14:21                     ` Martin Liška
@ 2022-12-19 15:16                       ` Mark Wielaard
  2022-12-21 11:09                         ` Martin Liška
  0 siblings, 1 reply; 25+ messages in thread
From: Mark Wielaard @ 2022-12-19 15:16 UTC (permalink / raw)
  To: Martin Liška; +Cc: elfutils-devel

Hi Martin,

On Mon, 2022-12-19 at 15:21 +0100, Martin Liška wrote:
> > > +      else
> > > +    error (0, 0, "Couldn't get chdr for section %zd", ndx);
> > 
> > Shouldn't this error be fatal?
> 
> What do you use for fatal errors?

Depends a bit on context. It might be that this error isn't fatal, then
zero as first (status) argument is fine, just know that the program
will just continue. And it looked like not all callers were prepared
for this function to return with a bogus return value.

If it is fatal then depending on context you either call error_exit (0,
"Couldn't get chdr for section %zd", ndx); [see system.h, this really
is just error (EXIT_FAILURE, 0, ...)] if the program needs to terminate
right now.

Or you return a special value from the function (assuming all callers
check for an error here). And/Or if the program needs a cleanup you'll
goto cleanup (as is done in process_file).

Cheers,

Mark

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] support ZSTD compression algorithm
  2022-12-19 15:16                       ` Mark Wielaard
@ 2022-12-21 11:09                         ` Martin Liška
  2022-12-21 23:14                           ` Mark Wielaard
  0 siblings, 1 reply; 25+ messages in thread
From: Martin Liška @ 2022-12-21 11:09 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: elfutils-devel

On 12/19/22 16:16, Mark Wielaard wrote:
> Hi Martin,
> 
> On Mon, 2022-12-19 at 15:21 +0100, Martin Liška wrote:
>>>> +      else
>>>> +    error (0, 0, "Couldn't get chdr for section %zd", ndx);
>>>
>>> Shouldn't this error be fatal?
>>
>> What do you use for fatal errors?
> 
> Depends a bit on context. It might be that this error isn't fatal, then
> zero as first (status) argument is fine, just know that the program
> will just continue. And it looked like not all callers were prepared
> for this function to return with a bogus return value.
> 
> If it is fatal then depending on context you either call error_exit (0,
> "Couldn't get chdr for section %zd", ndx); [see system.h, this really
> is just error (EXIT_FAILURE, 0, ...)] if the program needs to terminate
> right now.
> 
> Or you return a special value from the function (assuming all callers
> check for an error here). And/Or if the program needs a cleanup you'll
> goto cleanup (as is done in process_file).

I think it's fine as we return UNSET in that case and the caller goes directly
to cleanup (or abort is called for the second call site):

	  enum ch_type schtype = get_section_chtype (scn, shdr, sname, ndx);
	  if (schtype == UNSET)
	    goto cleanup;

Martin

> 
> Cheers,
> 
> Mark


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] support ZSTD compression algorithm
  2022-12-19 15:09                       ` Mark Wielaard
@ 2022-12-21 11:13                         ` Martin Liška
  2023-01-10 17:44                           ` [PATCH] readelf: Check compression status of .debug section data Mark Wielaard
  0 siblings, 1 reply; 25+ messages in thread
From: Martin Liška @ 2022-12-21 11:13 UTC (permalink / raw)
  To: Mark Wielaard, elfutils-devel

On 12/19/22 16:09, Mark Wielaard wrote:
> Hi Martin,
> 
> On Mon, 2022-12-19 at 15:19 +0100, Martin Liška wrote:
>> On 12/15/22 14:17, Mark Wielaard wrote:
>>> Is there a particular way you are running eu-readelf? Is it with
>>> generic -w or -a, or decoding a specific section type?
>>
>> Hello.
>>
>> $ LD_LIBRARY_PATH=./libelf ./src/readelf -w
>> ~/Programming/testcases/a.out
>>
>> where I get:
>>
>> ./src/readelf: cannot get debug context descriptor: No DWARF
>> information found
>>
>> DWARF section [37] '.debug_info' at offset 0x1ab2:
>>   [Offset]
>> ./src/readelf: cannot get next unit: no error
>>
>> Call frame information section [13] '.eh_frame' at offset 0x4a8:
>> ...
>>                                                                      
>>                                                                      
>>       t��o5��=I�iAp@a����S^R/<�����^�qi�ַp@
> 
> [...]
> 
>> So basically a garbage. And I don't know how to bail out properly?
> 
> Aha. If you have that a.out somewhere I can take a look. I suspect this
> is because we expect all .debug sections to have been decompressed in
> libdw/dwarf_begin_elf.c, but that isn't really true, see check_section
> in that file which has:
> 
>   if ((shdr->sh_flags & SHF_COMPRESSED) != 0)
>     {
>       if (elf_compress (scn, 0, 0) < 0)
>         {
>           /* It would be nice if we could fail with a specific error.
>              But we don't know if this was an essential section or not.
>              So just continue for now. See also valid_p().  */
>           return result;
>         }
>     }

Sure, there's a file example:
https://splichal.eu/tmp/zstd.out

$ ./src/readelf -Sz zstd.out
...
[35] .debug_abbrev        PROGBITS     0000000000000000 000018b4 00000177  0 C      0   0  1
     [ELF ZSTD (2) 00000318  1]

$ LD_LIBRARY_PATH=libelf/ ./src/readelf -w zstd.out
(prints garbage if not configured with zstd)

Please use code from my branch:
https://sourceware.org/git/?p=elfutils.git;a=shortlog;h=refs/heads/users/marxin/try-zstd-support-v2

Cheers,
Martin

> 
> We should probably adjust valid_p so it produces a more appropriate
> error message and/or add additional checks in readlelf.c.
> 
> But lets do that after this patch goes in.
> 
> Cheers,
> 
> Mark


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] support ZSTD compression algorithm
  2022-12-21 11:09                         ` Martin Liška
@ 2022-12-21 23:14                           ` Mark Wielaard
  2022-12-22  9:17                             ` Martin Liška
  0 siblings, 1 reply; 25+ messages in thread
From: Mark Wielaard @ 2022-12-21 23:14 UTC (permalink / raw)
  To: Martin Liška; +Cc: elfutils-devel

Hi Martin,

On Wed, Dec 21, 2022 at 12:09:21PM +0100, Martin Liška wrote:
> > Or you return a special value from the function (assuming all callers
> > check for an error here). And/Or if the program needs a cleanup you'll
> > goto cleanup (as is done in process_file).
> 
> I think it's fine as we return UNSET in that case and the caller goes directly
> to cleanup (or abort is called for the second call site):
> 
> 	  enum ch_type schtype = get_section_chtype (scn, shdr, sname, ndx);
> 	  if (schtype == UNSET)
> 	    goto cleanup;

O, that is good.

Is the abort () at the second call site because that cannot happen? Or
should that also goto cleanup?

Cheers,

Mark

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCHv2] support ZSTD compression algorithm
  2022-12-21 23:14                           ` Mark Wielaard
@ 2022-12-22  9:17                             ` Martin Liška
  2022-12-22 18:36                               ` Mark Wielaard
  0 siblings, 1 reply; 25+ messages in thread
From: Martin Liška @ 2022-12-22  9:17 UTC (permalink / raw)
  To: elfutils-devel; +Cc: Mark Wielaard

Hi.

> Is the abort () at the second call site because that cannot happen? Or
> should that also goto cleanup?

Yes, it can't happen.

There's V2 (including ChangeLog) where all issues apart from the libzstd configure.ac detection
should be addressed.

Cheers,
Martin

config/ChangeLog:

	* libelf.pc.in: Add LIBLZSTD to Requires.private.

ChangeLog:

	* configure.ac: Detect ZSTD streaming API.

libelf/ChangeLog:

	* Makefile.am: Use zstd_LIBS.
	* elf_compress.c:
	(__libelf_compress): Split into ...
	(__libelf_compress_zlib): ... this.
	(do_zstd_cleanup): New.
	(zstd_cleanup): New.
	(__libelf_compress_zstd): New.
	(__libelf_decompress): Switch in between zlib and zstd.
	(__libelf_decompress_zlib): Renamed from __libelf_decompress.
	(__libelf_decompress_zstd): New.
	(__libelf_decompress_elf): Dispatch in between compression
	algorithms.
	(elf_compress): Likewise.
	* elf_compress_gnu.c (elf_compress_gnu): Call with
	ELFCOMPRESS_ZLIB.
	* libelfP.h (__libelf_compress): Add new argument.
	(__libelf_decompress): Add chtype argument.

src/ChangeLog:

	* elfcompress.c (enum ch_type): Add ZSTD.
	(parse_opt): Parse "zstd".
	(get_section_chtype): New.
	(process_file): Support zstd compression.
	(main): Add zstd to help.
	* readelf.c (elf_ch_type_name): Rewrite with switch.

tests/ChangeLog:

	* Makefile.am: Add ELFUTILS_ZSTD if zstd is enabled.
	* run-compress-test.sh: Test zstd compression algorithm
	for debug sections.
---
 config/libelf.pc.in        |   2 +-
 configure.ac               |   4 +-
 libelf/Makefile.am         |   2 +-
 libelf/elf_compress.c      | 307 +++++++++++++++++++++++++++++++------
 libelf/elf_compress_gnu.c  |   5 +-
 libelf/libelfP.h           |   4 +-
 src/elfcompress.c          | 145 +++++++++++-------
 src/readelf.c              |  18 ++-
 tests/Makefile.am          |   1 +
 tests/run-compress-test.sh |  28 ++++
 10 files changed, 392 insertions(+), 124 deletions(-)

diff --git a/config/libelf.pc.in b/config/libelf.pc.in
index 48f3f021..0d2ce968 100644
--- a/config/libelf.pc.in
+++ b/config/libelf.pc.in
@@ -11,4 +11,4 @@ URL: http://elfutils.org/
 Libs: -L${libdir} -lelf
 Cflags: -I${includedir}
 
-Requires.private: zlib
+Requires.private: zlib @LIBZSTD@
diff --git a/configure.ac b/configure.ac
index 59be27ac..ef16f79e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -417,9 +417,11 @@ AC_SUBST([BZ2_LIB])
 eu_ZIPLIB(lzma,LZMA,lzma,lzma_auto_decoder,[LZMA (xz)])
 AS_IF([test "x$with_lzma" = xyes], [LIBLZMA="liblzma"], [LIBLZMA=""])
 AC_SUBST([LIBLZMA])
-eu_ZIPLIB(zstd,ZSTD,zstd,ZSTD_decompress,[ZSTD (zst)])
+eu_ZIPLIB(zstd,ZSTD,zstd,ZSTD_compressStream2,[ZSTD (zst)])
 AS_IF([test "x$with_zstd" = xyes], [LIBZSTD="libzstd"], [LIBLZSTD=""])
 AC_SUBST([LIBZSTD])
+zstd_LIBS="$LIBS"
+AC_SUBST([zstd_LIBS])
 zip_LIBS="$LIBS"
 LIBS="$save_LIBS"
 AC_SUBST([zip_LIBS])
diff --git a/libelf/Makefile.am b/libelf/Makefile.am
index 560ed45f..24c25cf8 100644
--- a/libelf/Makefile.am
+++ b/libelf/Makefile.am
@@ -106,7 +106,7 @@ libelf_pic_a_SOURCES =
 am_libelf_pic_a_OBJECTS = $(libelf_a_SOURCES:.c=.os)
 
 libelf_so_DEPS = ../lib/libeu.a
-libelf_so_LDLIBS = $(libelf_so_DEPS) -lz
+libelf_so_LDLIBS = $(libelf_so_DEPS) -lz $(zstd_LIBS)
 if USE_LOCKS
 libelf_so_LDLIBS += -lpthread
 endif
diff --git a/libelf/elf_compress.c b/libelf/elf_compress.c
index d7f53af2..deb585b7 100644
--- a/libelf/elf_compress.c
+++ b/libelf/elf_compress.c
@@ -39,6 +39,10 @@
 #include <string.h>
 #include <zlib.h>
 
+#ifdef USE_ZSTD
+#include <zstd.h>
+#endif
+
 /* Cleanup and return result.  Don't leak memory.  */
 static void *
 do_deflate_cleanup (void *result, z_stream *z, void *out_buf,
@@ -54,53 +58,14 @@ do_deflate_cleanup (void *result, z_stream *z, void *out_buf,
 #define deflate_cleanup(result, cdata) \
     do_deflate_cleanup(result, &z, out_buf, cdata)
 
-/* Given a section, uses the (in-memory) Elf_Data to extract the
-   original data size (including the given header size) and data
-   alignment.  Returns a buffer that has at least hsize bytes (for the
-   caller to fill in with a header) plus zlib compressed date.  Also
-   returns the new buffer size in new_size (hsize + compressed data
-   size).  Returns (void *) -1 when FORCE is false and the compressed
-   data would be bigger than the original data.  */
 void *
 internal_function
-__libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
-		   size_t *orig_size, size_t *orig_addralign,
-		   size_t *new_size, bool force)
+__libelf_compress_zlib (Elf_Scn *scn, size_t hsize, int ei_data,
+			size_t *orig_size, size_t *orig_addralign,
+			size_t *new_size, bool force,
+			Elf_Data *data, Elf_Data *next_data,
+			void *out_buf, size_t out_size, size_t block)
 {
-  /* The compressed data is the on-disk data.  We simplify the
-     implementation a bit by asking for the (converted) in-memory
-     data (which might be all there is if the user created it with
-     elf_newdata) and then convert back to raw if needed before
-     compressing.  Should be made a bit more clever to directly
-     use raw if that is directly available.  */
-  Elf_Data *data = elf_getdata (scn, NULL);
-  if (data == NULL)
-    return NULL;
-
-  /* When not forced and we immediately know we would use more data by
-     compressing, because of the header plus zlib overhead (five bytes
-     per 16 KB block, plus a one-time overhead of six bytes for the
-     entire stream), don't do anything.  */
-  Elf_Data *next_data = elf_getdata (scn, data);
-  if (next_data == NULL && !force
-      && data->d_size <= hsize + 5 + 6)
-    return (void *) -1;
-
-  *orig_addralign = data->d_align;
-  *orig_size = data->d_size;
-
-  /* Guess an output block size. 1/8th of the original Elf_Data plus
-     hsize.  Make the first chunk twice that size (25%), then increase
-     by a block (12.5%) when necessary.  */
-  size_t block = (data->d_size / 8) + hsize;
-  size_t out_size = 2 * block;
-  void *out_buf = malloc (out_size);
-  if (out_buf == NULL)
-    {
-      __libelf_seterrno (ELF_E_NOMEM);
-      return NULL;
-    }
-
   /* Caller gets to fill in the header at the start.  Just skip it here.  */
   size_t used = hsize;
 
@@ -205,9 +170,189 @@ __libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
   return out_buf;
 }
 
+#ifdef USE_ZSTD
+/* Cleanup and return result.  Don't leak memory.  */
+static void *
+do_zstd_cleanup (void *result, ZSTD_CCtx * const cctx, void *out_buf,
+		 Elf_Data *cdatap)
+{
+  ZSTD_freeCCtx (cctx);
+  free (out_buf);
+  if (cdatap != NULL)
+    free (cdatap->d_buf);
+  return result;
+}
+
+#define zstd_cleanup(result, cdata) \
+    do_zstd_cleanup(result, cctx, out_buf, cdata)
+
+void *
+internal_function
+__libelf_compress_zstd (Elf_Scn *scn, size_t hsize, int ei_data,
+			size_t *orig_size, size_t *orig_addralign,
+			size_t *new_size, bool force,
+			Elf_Data *data, Elf_Data *next_data,
+			void *out_buf, size_t out_size, size_t block)
+{
+  /* Caller gets to fill in the header at the start.  Just skip it here.  */
+  size_t used = hsize;
+
+  ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+  Elf_Data cdata;
+  cdata.d_buf = NULL;
+
+  /* Loop over data buffers.  */
+  ZSTD_EndDirective mode = ZSTD_e_continue;
+
+  do
+    {
+      /* Convert to raw if different endianness.  */
+      cdata = *data;
+      bool convert = ei_data != MY_ELFDATA && data->d_size > 0;
+      if (convert)
+	{
+	  /* Don't do this conversion in place, we might want to keep
+	     the original data around, caller decides.  */
+	  cdata.d_buf = malloc (data->d_size);
+	  if (cdata.d_buf == NULL)
+	    {
+	      __libelf_seterrno (ELF_E_NOMEM);
+	      return zstd_cleanup (NULL, NULL);
+	    }
+	  if (gelf_xlatetof (scn->elf, &cdata, data, ei_data) == NULL)
+	    return zstd_cleanup (NULL, &cdata);
+	}
+
+      ZSTD_inBuffer ib = { cdata.d_buf, cdata.d_size, 0 };
+
+      /* Get next buffer to see if this is the last one.  */
+      data = next_data;
+      if (data != NULL)
+	{
+	  *orig_addralign = MAX (*orig_addralign, data->d_align);
+	  *orig_size += data->d_size;
+	  next_data = elf_getdata (scn, data);
+	}
+      else
+	mode = ZSTD_e_end;
+
+      /* Flush one data buffer.  */
+      for (;;)
+	{
+	  ZSTD_outBuffer ob = { out_buf + used, out_size - used, 0 };
+	  size_t ret = ZSTD_compressStream2 (cctx, &ob, &ib, mode);
+	  if (ZSTD_isError (ret))
+	    {
+	      __libelf_seterrno (ELF_E_COMPRESS_ERROR);
+	      return zstd_cleanup (NULL, convert ? &cdata : NULL);
+	    }
+	  used += ob.pos;
+
+	  /* Bail out if we are sure the user doesn't want the
+	     compression forced and we are using more compressed data
+	     than original data.  */
+	  if (!force && mode == ZSTD_e_end && used >= *orig_size)
+	    return zstd_cleanup ((void *) -1, convert ? &cdata : NULL);
+
+	  if (ret > 0)
+	    {
+	      void *bigger = realloc (out_buf, out_size + block);
+	      if (bigger == NULL)
+		{
+		  __libelf_seterrno (ELF_E_NOMEM);
+		  return zstd_cleanup (NULL, convert ? &cdata : NULL);
+		}
+	      out_buf = bigger;
+	      out_size += block;
+	    }
+	  else
+	    break;
+	}
+
+      if (convert)
+	{
+	  free (cdata.d_buf);
+	  cdata.d_buf = NULL;
+	}
+    }
+  while (mode != ZSTD_e_end); /* More data blocks.  */
+
+  ZSTD_freeCCtx (cctx);
+  *new_size = used;
+  return out_buf;
+}
+#endif
+
+/* Given a section, uses the (in-memory) Elf_Data to extract the
+   original data size (including the given header size) and data
+   alignment.  Returns a buffer that has at least hsize bytes (for the
+   caller to fill in with a header) plus zlib compressed date.  Also
+   returns the new buffer size in new_size (hsize + compressed data
+   size).  Returns (void *) -1 when FORCE is false and the compressed
+   data would be bigger than the original data.  */
+void *
+internal_function
+__libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
+		   size_t *orig_size, size_t *orig_addralign,
+		   size_t *new_size, bool force, bool use_zstd)
+{
+  /* The compressed data is the on-disk data.  We simplify the
+     implementation a bit by asking for the (converted) in-memory
+     data (which might be all there is if the user created it with
+     elf_newdata) and then convert back to raw if needed before
+     compressing.  Should be made a bit more clever to directly
+     use raw if that is directly available.  */
+  Elf_Data *data = elf_getdata (scn, NULL);
+  if (data == NULL)
+    return NULL;
+
+  /* When not forced and we immediately know we would use more data by
+     compressing, because of the header plus zlib overhead (five bytes
+     per 16 KB block, plus a one-time overhead of six bytes for the
+     entire stream), don't do anything.
+     Size estimation for ZSTD compression would be similar.  */
+  Elf_Data *next_data = elf_getdata (scn, data);
+  if (next_data == NULL && !force
+      && data->d_size <= hsize + 5 + 6)
+    return (void *) -1;
+
+  *orig_addralign = data->d_align;
+  *orig_size = data->d_size;
+
+  /* Guess an output block size. 1/8th of the original Elf_Data plus
+     hsize.  Make the first chunk twice that size (25%), then increase
+     by a block (12.5%) when necessary.  */
+  size_t block = (data->d_size / 8) + hsize;
+  size_t out_size = 2 * block;
+  void *out_buf = malloc (out_size);
+  if (out_buf == NULL)
+    {
+      __libelf_seterrno (ELF_E_NOMEM);
+      return NULL;
+    }
+
+  if (use_zstd)
+    {
+#ifdef USE_ZSTD
+      return __libelf_compress_zstd (scn, hsize, ei_data, orig_size,
+				   orig_addralign, new_size, force,
+				   data, next_data, out_buf, out_size,
+				   block);
+#else
+    __libelf_seterrno (ELF_E_UNKNOWN_COMPRESSION_TYPE);
+    return NULL;
+#endif
+    }
+  else
+    return __libelf_compress_zlib (scn, hsize, ei_data, orig_size,
+				   orig_addralign, new_size, force,
+				   data, next_data, out_buf, out_size,
+				   block);
+}
+
 void *
 internal_function
-__libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
+__libelf_decompress_zlib (void *buf_in, size_t size_in, size_t size_out)
 {
   /* Catch highly unlikely compression ratios so we don't allocate
      some giant amount of memory for nothing. The max compression
@@ -218,7 +363,7 @@ __libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
       return NULL;
     }
 
-  /* Malloc might return NULL when requestion zero size.  This is highly
+  /* Malloc might return NULL when requesting zero size.  This is highly
      unlikely, it would only happen when the compression was forced.
      But we do need a non-NULL buffer to return and set as result.
      Just make sure to always allocate at least 1 byte.  */
@@ -260,6 +405,51 @@ __libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
   return buf_out;
 }
 
+#ifdef USE_ZSTD
+void *
+internal_function
+__libelf_decompress_zstd (void *buf_in, size_t size_in, size_t size_out)
+{
+  /* Malloc might return NULL when requesting zero size.  This is highly
+     unlikely, it would only happen when the compression was forced.
+     But we do need a non-NULL buffer to return and set as result.
+     Just make sure to always allocate at least 1 byte.  */
+  void *buf_out = malloc (size_out ?: 1);
+  if (unlikely (buf_out == NULL))
+    {
+      __libelf_seterrno (ELF_E_NOMEM);
+      return NULL;
+    }
+
+  size_t ret = ZSTD_decompress (buf_out, size_out, buf_in, size_in);
+  if (ZSTD_isError (ret))
+    {
+      free (buf_out);
+      __libelf_seterrno (ELF_E_UNKNOWN_COMPRESSION_TYPE);
+      return NULL;
+    }
+  else
+    return buf_out;
+}
+#endif
+
+void *
+internal_function
+__libelf_decompress (int chtype, void *buf_in, size_t size_in, size_t size_out)
+{
+  if (chtype == ELFCOMPRESS_ZLIB)
+    return __libelf_decompress_zlib (buf_in, size_in, size_out);
+  else
+    {
+#ifdef USE_ZSTD
+    return __libelf_decompress_zstd (buf_in, size_in, size_out);
+#else
+    __libelf_seterrno (ELF_E_DECOMPRESS_ERROR);
+    return NULL;
+#endif
+    }
+}
+
 void *
 internal_function
 __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
@@ -268,7 +458,19 @@ __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
   if (gelf_getchdr (scn, &chdr) == NULL)
     return NULL;
 
+  bool unknown_compression = false;
   if (chdr.ch_type != ELFCOMPRESS_ZLIB)
+    {
+      if (chdr.ch_type != ELFCOMPRESS_ZSTD)
+	unknown_compression = true;
+
+#ifndef USE_ZSTD
+      if (chdr.ch_type == ELFCOMPRESS_ZSTD)
+	unknown_compression = true;
+#endif
+    }
+
+  if (unknown_compression)
     {
       __libelf_seterrno (ELF_E_UNKNOWN_COMPRESSION_TYPE);
       return NULL;
@@ -295,7 +497,9 @@ __libelf_decompress_elf (Elf_Scn *scn, size_t *size_out, size_t *addralign)
 		  ? sizeof (Elf32_Chdr) : sizeof (Elf64_Chdr));
   size_t size_in = data->d_size - hsize;
   void *buf_in = data->d_buf + hsize;
-  void *buf_out = __libelf_decompress (buf_in, size_in, chdr.ch_size);
+  void *buf_out
+    = __libelf_decompress (chdr.ch_type, buf_in, size_in, chdr.ch_size);
+
   *size_out = chdr.ch_size;
   *addralign = chdr.ch_addralign;
   return buf_out;
@@ -394,7 +598,7 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
     }
 
   int compressed = (sh_flags & SHF_COMPRESSED);
-  if (type == ELFCOMPRESS_ZLIB)
+  if (type == ELFCOMPRESS_ZLIB || type == ELFCOMPRESS_ZSTD)
     {
       /* Compress/Deflate.  */
       if (compressed == 1)
@@ -408,7 +612,8 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
       size_t orig_size, orig_addralign, new_size;
       void *out_buf = __libelf_compress (scn, hsize, elfdata,
 					 &orig_size, &orig_addralign,
-					 &new_size, force);
+					 &new_size, force,
+					 type == ELFCOMPRESS_ZSTD);
 
       /* Compression would make section larger, don't change anything.  */
       if (out_buf == (void *) -1)
@@ -422,7 +627,7 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
       if (elfclass == ELFCLASS32)
 	{
 	  Elf32_Chdr chdr;
-	  chdr.ch_type = ELFCOMPRESS_ZLIB;
+	  chdr.ch_type = type;
 	  chdr.ch_size = orig_size;
 	  chdr.ch_addralign = orig_addralign;
 	  if (elfdata != MY_ELFDATA)
@@ -436,7 +641,7 @@ elf_compress (Elf_Scn *scn, int type, unsigned int flags)
       else
 	{
 	  Elf64_Chdr chdr;
-	  chdr.ch_type = ELFCOMPRESS_ZLIB;
+	  chdr.ch_type = type;
 	  chdr.ch_reserved = 0;
 	  chdr.ch_size = orig_size;
 	  chdr.ch_addralign = sh_addralign;
diff --git a/libelf/elf_compress_gnu.c b/libelf/elf_compress_gnu.c
index 3d2977e7..8e20b30e 100644
--- a/libelf/elf_compress_gnu.c
+++ b/libelf/elf_compress_gnu.c
@@ -103,7 +103,8 @@ elf_compress_gnu (Elf_Scn *scn, int inflate, unsigned int flags)
       size_t orig_size, new_size, orig_addralign;
       void *out_buf = __libelf_compress (scn, hsize, elfdata,
 					 &orig_size, &orig_addralign,
-					 &new_size, force);
+					 &new_size, force,
+					 /* use_zstd */ false);
 
       /* Compression would make section larger, don't change anything.  */
       if (out_buf == (void *) -1)
@@ -178,7 +179,7 @@ elf_compress_gnu (Elf_Scn *scn, int inflate, unsigned int flags)
       size_t size = gsize;
       size_t size_in = data->d_size - hsize;
       void *buf_in = data->d_buf + hsize;
-      void *buf_out = __libelf_decompress (buf_in, size_in, size);
+      void *buf_out = __libelf_decompress (ELFCOMPRESS_ZLIB, buf_in, size_in, size);
       if (buf_out == NULL)
 	return -1;
 
diff --git a/libelf/libelfP.h b/libelf/libelfP.h
index d88a613c..6624f38a 100644
--- a/libelf/libelfP.h
+++ b/libelf/libelfP.h
@@ -574,10 +574,10 @@ extern uint32_t __libelf_crc32 (uint32_t crc, unsigned char *buf, size_t len)
 
 extern void * __libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
 				 size_t *orig_size, size_t *orig_addralign,
-				 size_t *size, bool force)
+				 size_t *size, bool force, bool use_zstd)
      internal_function;
 
-extern void * __libelf_decompress (void *buf_in, size_t size_in,
+extern void * __libelf_decompress (int chtype, void *buf_in, size_t size_in,
 				   size_t size_out) internal_function;
 extern void * __libelf_decompress_elf (Elf_Scn *scn,
 				       size_t *size_out, size_t *addralign)
diff --git a/src/elfcompress.c b/src/elfcompress.c
index eff765e8..bfdac2b4 100644
--- a/src/elfcompress.c
+++ b/src/elfcompress.c
@@ -55,9 +55,10 @@ enum ch_type
   UNSET = -1,
   NONE,
   ZLIB,
+  ZSTD,
 
   /* Maximal supported ch_type.  */
-  MAXIMAL_CH_TYPE = ZLIB,
+  MAXIMAL_CH_TYPE = ZSTD,
 
   ZLIB_GNU = 1 << 16
 };
@@ -139,6 +140,12 @@ parse_opt (int key, char *arg __attribute__ ((unused)),
 	type = ZLIB;
       else if (strcmp ("zlib-gnu", arg) == 0 || strcmp ("gnu", arg) == 0)
 	type = ZLIB_GNU;
+      else if (strcmp ("zstd", arg) == 0)
+#ifdef USE_ZSTD
+	type = ZSTD;
+#else
+	argp_error (state, N_("ZSTD support is not enabled"));
+#endif
       else
 	argp_error (state, N_("unknown compression type '%s'"), arg);
       break;
@@ -281,6 +288,44 @@ get_sections (unsigned int *sections, size_t shnum)
   return s;
 }
 
+/* Return compression type of a given section SHDR.  */
+
+static enum ch_type
+get_section_chtype (Elf_Scn *scn, GElf_Shdr *shdr, const char *sname,
+		    size_t ndx)
+{
+  enum ch_type chtype = UNSET;
+  if ((shdr->sh_flags & SHF_COMPRESSED) != 0)
+    {
+      GElf_Chdr chdr;
+      if (gelf_getchdr (scn, &chdr) != NULL)
+	{
+	  chtype = (enum ch_type)chdr.ch_type;
+	  if (chtype == NONE)
+	    {
+	      error (0, 0, "Compression type for section %zd"
+		     " can't be zero ", ndx);
+	      chtype = UNSET;
+	    }
+	  else if (chtype > MAXIMAL_CH_TYPE)
+	    {
+	      error (0, 0, "Compression type (%d) for section %zd"
+		     " is unsupported ", chtype, ndx);
+	      chtype = UNSET;
+	    }
+	}
+      else
+	error (0, 0, "Couldn't get chdr for section %zd", ndx);
+    }
+  /* Set ZLIB_GNU compression manually for .zdebug* sections.  */
+  else if (startswith (sname, ".zdebug"))
+    chtype = ZLIB_GNU;
+  else
+    chtype = NONE;
+
+  return chtype;
+}
+
 static int
 process_file (const char *fname)
 {
@@ -461,26 +506,29 @@ process_file (const char *fname)
 
       if (section_name_matches (sname))
 	{
-	  if (!force && type == NONE
-	      && (shdr->sh_flags & SHF_COMPRESSED) == 0
-	      && !startswith (sname, ".zdebug"))
-	    {
-	      if (verbose > 0)
-		printf ("[%zd] %s already decompressed\n", ndx, sname);
-	    }
-	  else if (!force && type == ZLIB
-		   && (shdr->sh_flags & SHF_COMPRESSED) != 0)
-	    {
-	      if (verbose > 0)
-		printf ("[%zd] %s already compressed\n", ndx, sname);
-	    }
-	  else if (!force && type == ZLIB_GNU
-		   && startswith (sname, ".zdebug"))
+	  enum ch_type schtype = get_section_chtype (scn, shdr, sname, ndx);
+	  if (!force && verbose > 0)
 	    {
-	      if (verbose > 0)
-		printf ("[%zd] %s already GNU compressed\n", ndx, sname);
+	      /* The current compression matches the final one.  */
+	      if (type == schtype)
+		switch (type)
+		  {
+		  case NONE:
+		    printf ("[%zd] %s already decompressed\n", ndx, sname);
+		    break;
+		  case ZLIB:
+		  case ZSTD:
+		    printf ("[%zd] %s already compressed\n", ndx, sname);
+		    break;
+		  case ZLIB_GNU:
+		    printf ("[%zd] %s already GNU compressed\n", ndx, sname);
+		    break;
+		  default:
+		    abort ();
+		  }
 	    }
-	  else if (shdr->sh_type != SHT_NOBITS
+
+	  if (shdr->sh_type != SHT_NOBITS
 	      && (shdr->sh_flags & SHF_ALLOC) == 0)
 	    {
 	      set_section (sections, ndx);
@@ -692,37 +740,12 @@ process_file (const char *fname)
 	     (de)compressed, invalidating the string pointers.  */
 	  sname = xstrdup (sname);
 
+
 	  /* Detect source compression that is how is the section compressed
 	     now.  */
-	  GElf_Chdr chdr;
-	  enum ch_type schtype = NONE;
-	  if ((shdr->sh_flags & SHF_COMPRESSED) != 0)
-	    {
-	      if (gelf_getchdr (scn, &chdr) != NULL)
-		{
-		  schtype = (enum ch_type)chdr.ch_type;
-		  if (schtype == NONE)
-		    {
-		      error (0, 0, "Compression type for section %zd"
-			     " can't be zero ", ndx);
-		      goto cleanup;
-		    }
-		  else if (schtype > MAXIMAL_CH_TYPE)
-		    {
-		      error (0, 0, "Compression type (%d) for section %zd"
-			     " is unsupported ", schtype, ndx);
-		      goto cleanup;
-		    }
-		}
-	      else
-		{
-		  error (0, 0, "Couldn't get chdr for section %zd", ndx);
-		  goto cleanup;
-		}
-	    }
-	  /* Set ZLIB compression manually for .zdebug* sections.  */
-	  else if (startswith (sname, ".zdebug"))
-	    schtype = ZLIB_GNU;
+	  enum ch_type schtype = get_section_chtype (scn, shdr, sname, ndx);
+	  if (schtype == UNSET)
+	    goto cleanup;
 
 	  /* We might want to decompress (and rename), but not
 	     compress during this pass since we might need the section
@@ -754,7 +777,7 @@ process_file (const char *fname)
 	    case ZLIB_GNU:
 	      if (startswith (sname, ".debug"))
 		{
-		  if (schtype == ZLIB)
+		  if (schtype == ZLIB || schtype == ZSTD)
 		    {
 		      /* First decompress to recompress GNU style.
 			 Don't report even when verbose.  */
@@ -818,19 +841,22 @@ process_file (const char *fname)
 	      break;
 
 	    case ZLIB:
-	      if ((shdr->sh_flags & SHF_COMPRESSED) == 0)
+	    case ZSTD:
+	      if (schtype != type)
 		{
-		  if (schtype == ZLIB_GNU)
+		  if (schtype != NONE)
 		    {
-		      /* First decompress to recompress zlib style.
-			 Don't report even when verbose.  */
+		      /* Decompress first.  */
 		      if (compress_section (scn, size, sname, NULL, ndx,
 					    schtype, NONE, false) < 0)
 			goto cleanup;
 
-		      snamebuf[0] = '.';
-		      strcpy (&snamebuf[1], &sname[2]);
-		      newname = snamebuf;
+		      if (schtype == ZLIB_GNU)
+			{
+			  snamebuf[0] = '.';
+			  strcpy (&snamebuf[1], &sname[2]);
+			  newname = snamebuf;
+			}
 		    }
 
 		  if (skip_compress_section)
@@ -838,7 +864,7 @@ process_file (const char *fname)
 		      if (ndx == shdrstrndx)
 			{
 			  shstrtab_size = size;
-			  shstrtab_compressed = ZLIB;
+			  shstrtab_compressed = type;
 			  if (shstrtab_name != NULL
 			      || shstrtab_newname != NULL)
 			    {
@@ -855,7 +881,7 @@ process_file (const char *fname)
 		      else
 			{
 			  symtab_size = size;
-			  symtab_compressed = ZLIB;
+			  symtab_compressed = type;
 			  symtab_name = xstrdup (sname);
 			  symtab_newname = (newname == NULL
 					    ? NULL : xstrdup (newname));
@@ -1378,7 +1404,8 @@ main (int argc, char **argv)
 	N_("Place (de)compressed output into FILE"),
 	0 },
       { "type", 't', "TYPE", 0,
-	N_("What type of compression to apply. TYPE can be 'none' (decompress), 'zlib' (ELF ZLIB compression, the default, 'zlib-gabi' is an alias) or 'zlib-gnu' (.zdebug GNU style compression, 'gnu' is an alias)"),
+	N_("What type of compression to apply. TYPE can be 'none' (decompress), 'zlib' (ELF ZLIB compression, the default, 'zlib-gabi' is an alias), "
+	   "'zlib-gnu' (.zdebug GNU style compression, 'gnu' is an alias) or 'zstd' (ELF ZSTD compression)"),
 	0 },
       { "name", 'n', "SECTION", 0,
 	N_("SECTION name to (de)compress, SECTION is an extended wildcard pattern (defaults to '.?(z)debug*')"),
diff --git a/src/readelf.c b/src/readelf.c
index cc3e0229..451f8400 100644
--- a/src/readelf.c
+++ b/src/readelf.c
@@ -1238,13 +1238,17 @@ get_visibility_type (int value)
 static const char *
 elf_ch_type_name (unsigned int code)
 {
-  if (code == 0)
-    return "NONE";
-
-  if (code == ELFCOMPRESS_ZLIB)
-    return "ZLIB";
-
-  return "UNKNOWN";
+  switch (code)
+    {
+    case 0:
+      return "NONE";
+    case ELFCOMPRESS_ZLIB:
+      return "ZLIB";
+    case ELFCOMPRESS_ZSTD:
+      return "ZSTD";
+    default:
+      return "UNKNOWN";
+    }
 }
 
 /* Print the section headers.  */
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 356b3fbf..c1868307 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -218,6 +218,7 @@ endif
 
 if HAVE_ZSTD
 TESTS += run-readelf-compressed-zstd.sh
+export ELFUTILS_ZSTD = 1
 endif
 
 if DEBUGINFOD
diff --git a/tests/run-compress-test.sh b/tests/run-compress-test.sh
index a6a298f5..2d4eebd6 100755
--- a/tests/run-compress-test.sh
+++ b/tests/run-compress-test.sh
@@ -61,6 +61,34 @@ testrun_elfcompress_file()
     echo "uncompress $elfcompressedfile -> $elfuncompressedfile"
     testrun ${abs_top_builddir}/src/elfcompress -v -t none -o ${elfuncompressedfile} ${elfcompressedfile}
     testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${elfuncompressedfile}
+
+    if test -z "$ELFUTILS_ZSTD"; then
+      return;
+    fi
+
+    outputfile="${infile}.gabi.zstd"
+    tempfiles "$outputfile"
+    echo "zstd compress $elfcompressedfile -> $outputfile"
+    testrun ${abs_top_builddir}/src/elfcompress -v -t zstd -o ${outputfile} ${elfcompressedfile}
+    testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${outputfile}
+    echo "checking compressed section header" $outputfile
+    testrun ${abs_top_builddir}/src/readelf -Sz ${outputfile} | grep "ELF ZSTD" >/dev/null
+
+    zstdfile="${infile}.zstd"
+    tempfiles "$zstdfile"
+    echo "zstd compress $uncompressedfile -> $zstdfile"
+    testrun ${abs_top_builddir}/src/elfcompress -v -t zstd -o ${zstdfile} ${elfuncompressedfile}
+    testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${zstdfile}
+    echo "checking compressed section header" $zstdfile
+    testrun ${abs_top_builddir}/src/readelf -Sz ${zstdfile} | grep "ELF ZSTD" >/dev/null
+
+    zstdgnufile="${infile}.zstd.gnu"
+    tempfiles "$zstdgnufile"
+    echo "zstd re-compress to GNU ZLIB $zstdfile -> $zstdgnufile"
+    testrun ${abs_top_builddir}/src/elfcompress -v -t zlib-gnu -o ${zstdgnufile} ${zstdfile}
+    testrun ${abs_top_builddir}/src/elfcmp ${uncompressedfile} ${zstdgnufile}
+    echo "checking .zdebug section name" $zstdgnufile
+    testrun ${abs_top_builddir}/src/readelf -S ${zstdgnufile} | grep ".zdebug" >/dev/null
 }
 
 testrun_elfcompress()
-- 
2.39.0


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] support ZSTD compression algorithm
  2022-12-22  9:17                             ` Martin Liška
@ 2022-12-22 18:36                               ` Mark Wielaard
  2022-12-23  8:44                                 ` Martin Liška
  0 siblings, 1 reply; 25+ messages in thread
From: Mark Wielaard @ 2022-12-22 18:36 UTC (permalink / raw)
  To: Martin Liška, elfutils-devel

Hi Martin,

On Thu, 2022-12-22 at 10:17 +0100, Martin Liška wrote:
> Is the abort () at the second call site because that cannot happen?
> > Or should that also goto cleanup?
> 
> Yes, it can't happen.

Aha, because it should already have seen that section before, in which
case it would have errored out.

> There's V2 (including ChangeLog) where all issues apart from the
> libzstd configure.ac detection should be addressed.

See also my other email. I think that addresses everything. Just merge
those and push please.

> diff --git a/configure.ac b/configure.ac
> index 59be27ac..ef16f79e 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -417,9 +417,11 @@ AC_SUBST([BZ2_LIB])
>  eu_ZIPLIB(lzma,LZMA,lzma,lzma_auto_decoder,[LZMA (xz)])
>  AS_IF([test "x$with_lzma" = xyes], [LIBLZMA="liblzma"],
> [LIBLZMA=""])
>  AC_SUBST([LIBLZMA])
> -eu_ZIPLIB(zstd,ZSTD,zstd,ZSTD_decompress,[ZSTD (zst)])
> +eu_ZIPLIB(zstd,ZSTD,zstd,ZSTD_compressStream2,[ZSTD (zst)])
>  AS_IF([test "x$with_zstd" = xyes], [LIBZSTD="libzstd"],
> [LIBLZSTD=""])
>  AC_SUBST([LIBZSTD])
> +zstd_LIBS="$LIBS"
> +AC_SUBST([zstd_LIBS])
>  zip_LIBS="$LIBS"
>  LIBS="$save_LIBS"
>  AC_SUBST([zip_LIBS])

See the other email for an idea how to keep most of the checks the
same, but just for decompress (in the case of zstd). Then define a
USE_ZSTD_COMPRESS only for new enough libzstd.

> diff --git a/libelf/elf_compress.c b/libelf/elf_compress.c
> index d7f53af2..deb585b7 100644
> --- a/libelf/elf_compress.c
> +++ b/libelf/elf_compress.c
> @@ -39,6 +39,10 @@
>  #include <string.h>
>  #include <zlib.h>
>  
> +#ifdef USE_ZSTD
> +#include <zstd.h>
> +#endif
> +
>  /* Cleanup and return result.  Don't leak memory.  */
>  static void *
>  do_deflate_cleanup (void *result, z_stream *z, void *out_buf,
> @@ -54,53 +58,14 @@ do_deflate_cleanup (void *result, z_stream *z,
> void *out_buf,
>  #define deflate_cleanup(result, cdata) \
>      do_deflate_cleanup(result, &z, out_buf, cdata)
>  
> -/* Given a section, uses the (in-memory) Elf_Data to extract the
> -   original data size (including the given header size) and data
> -   alignment.  Returns a buffer that has at least hsize bytes (for
> the
> -   caller to fill in with a header) plus zlib compressed date.  Also
> -   returns the new buffer size in new_size (hsize + compressed data
> -   size).  Returns (void *) -1 when FORCE is false and the
> compressed
> -   data would be bigger than the original data.  */
>  void *
>  internal_function
> -__libelf_compress (Elf_Scn *scn, size_t hsize, int ei_data,
> -		   size_t *orig_size, size_t *orig_addralign,
> -		   size_t *new_size, bool force)
> +__libelf_compress_zlib (Elf_Scn *scn, size_t hsize, int ei_data,
> +			size_t *orig_size, size_t *orig_addralign,
> +			size_t *new_size, bool force,
> +			Elf_Data *data, Elf_Data *next_data,
> +			void *out_buf, size_t out_size, size_t block)

Missed this in my other patch, but __libelf_compress_zlib also isn't an
internal_function, it can/must just be static.
 
> [...]
> +#ifdef USE_ZSTD

So this is the compression part, guard that with USE_ZSTD_COMPRESS.

> +
>  void *
>  internal_function
> -__libelf_decompress (void *buf_in, size_t size_in, size_t size_out)
> +__libelf_decompress_zlib (void *buf_in, size_t size_in, size_t
> size_out)
>  {

So this isn't an internal function, just mark it static void *.

> +#ifdef USE_ZSTD
> +void *
> +internal_function
> +__libelf_decompress_zstd (void *buf_in, size_t size_in, size_t
> size_out)
> +{
> +  /* Malloc might return NULL when requesting zero size.  This is
> highly
> +     unlikely, it would only happen when the compression was forced.
> +     But we do need a non-NULL buffer to return and set as result.
> +     Just make sure to always allocate at least 1 byte.  */
> +  void *buf_out = malloc (size_out ?: 1);
> +  if (unlikely (buf_out == NULL))
> +    {
> +      __libelf_seterrno (ELF_E_NOMEM);
> +      return NULL;
> +    }
> +
> +  size_t ret = ZSTD_decompress (buf_out, size_out, buf_in, size_in);
> +  if (ZSTD_isError (ret))
> +    {
> +      free (buf_out);
> +      __libelf_seterrno (ELF_E_UNKNOWN_COMPRESSION_TYPE);

This should be ELF_E_DECOMPRESS_ERROR.

> +      return NULL;
> +    }
> +  else
> +    return buf_out;
> +}
> +#endif
> +
> +void *
> +internal_function
> +__libelf_decompress (int chtype, void *buf_in, size_t size_in,
> size_t size_out)
> +{
> +  if (chtype == ELFCOMPRESS_ZLIB)
> +    return __libelf_decompress_zlib (buf_in, size_in, size_out);
> +  else
> +    {
> +#ifdef USE_ZSTD
> +    return __libelf_decompress_zstd (buf_in, size_in, size_out);
> +#else
> +    __libelf_seterrno (ELF_E_DECOMPRESS_ERROR);

And this should be ELF_E_UNKNOWN_COMPRESSION_TYPE.

> +    return NULL;
> +#endif
> +    }
> +}

> diff --git a/src/elfcompress.c b/src/elfcompress.c
> index eff765e8..bfdac2b4 100644
> --- a/src/elfcompress.c
> +++ b/src/elfcompress.c
> @@ -55,9 +55,10 @@ enum ch_type
>    UNSET = -1,
>    NONE,
>    ZLIB,
> +  ZSTD,
>  
>    /* Maximal supported ch_type.  */
> -  MAXIMAL_CH_TYPE = ZLIB,
> +  MAXIMAL_CH_TYPE = ZSTD,
>  
>    ZLIB_GNU = 1 << 16
>  };
> @@ -139,6 +140,12 @@ parse_opt (int key, char *arg __attribute__
> ((unused)),
>  	type = ZLIB;
>        else if (strcmp ("zlib-gnu", arg) == 0 || strcmp ("gnu", arg)
> == 0)
>  	type = ZLIB_GNU;
> +      else if (strcmp ("zstd", arg) == 0)
> +#ifdef USE_ZSTD
> +	type = ZSTD;
> +#else
> +	argp_error (state, N_("ZSTD support is not enabled"));
> +#endif

This is slightly trickier now. I think you can use USE_ZSTD_COMPRESS as
guard here, since this is always about compression. In the new setup
you can have either no zstd support, only decompression support or
both.

But you can also just let elf_compress return the right error and make
another change:

diff --git a/src/elfcompress.c b/src/elfcompress.c
index bfdac2b4..1f32331c 100644
--- a/src/elfcompress.c
+++ b/src/elfcompress.c
@@ -230,7 +230,8 @@ compress_section (Elf_Scn *scn, size_t orig_size,
const char *name,
     res = elf_compress (scn, dchtype, flags);
 
   if (res < 0)
-    error (0, 0, "Couldn't decompress section [%zd] %s: %s",
+    error (0, 0, "Couldn't %s section [%zd] %s: %s",
+          compress ? "compress" : "decompress",
           ndx, name, elf_errmsg (-1));
   else
     {

So the user knows whether it is compression or decompression that
failed.

> diff --git a/src/readelf.c b/src/readelf.c
> index cc3e0229..451f8400 100644
> --- a/src/readelf.c
> +++ b/src/readelf.c
> @@ -1238,13 +1238,17 @@ get_visibility_type (int value)
>  static const char *
>  elf_ch_type_name (unsigned int code)
>  {
> -  if (code == 0)
> -    return "NONE";
> -
> -  if (code == ELFCOMPRESS_ZLIB)
> -    return "ZLIB";
> -
> -  return "UNKNOWN";
> +  switch (code)
> +    {
> +    case 0:
> +      return "NONE";
> +    case ELFCOMPRESS_ZLIB:
> +      return "ZLIB";
> +    case ELFCOMPRESS_ZSTD:
> +      return "ZSTD";
> +    default:
> +      return "UNKNOWN";
> +    }
>  }
>  
>  /* Print the section headers.  */
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 356b3fbf..c1868307 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -218,6 +218,7 @@ endif
>  
>  if HAVE_ZSTD
>  TESTS += run-readelf-compressed-zstd.sh
> +export ELFUTILS_ZSTD = 1
>  endif

So this is the wrong guard. HAVE_ZSTD is whether we have the zstd
binary or not. Guard export ELFUTILS_ZSTD = 1 with USE_ZSTD_COMPRESS.

Thanks,

Mark

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] support ZSTD compression algorithm
  2022-12-22 18:36                               ` Mark Wielaard
@ 2022-12-23  8:44                                 ` Martin Liška
  0 siblings, 0 replies; 25+ messages in thread
From: Martin Liška @ 2022-12-23  8:44 UTC (permalink / raw)
  To: Mark Wielaard, elfutils-devel

On 12/22/22 19:36, Mark Wielaard wrote:
> |See also my other email. I think that addresses everything. Just merge those and push please.|

Hi.

Thank you Mark for help! I've just finished the patch and pushed that to master.

Cheers,
Martin

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH] readelf: Check compression status of .debug section data
  2022-12-21 11:13                         ` Martin Liška
@ 2023-01-10 17:44                           ` Mark Wielaard
  2023-01-16 19:39                             ` Mark Wielaard
  0 siblings, 1 reply; 25+ messages in thread
From: Mark Wielaard @ 2023-01-10 17:44 UTC (permalink / raw)
  To: elfutils-devel; +Cc: Martin Liška, Mark Wielaard

The various print_debug_*_section functions didn't get the section
data in the same way. Add a new get_debug_elf_data function that
gets the (possibly relocated) section data and that checks (and
warns) if the data might still be compressed in a way that we
cannot decompress.

Signed-off-by: Mark Wielaard <mark@klomp.org>
---
 src/ChangeLog |  19 +++++
 src/readelf.c | 213 ++++++++++++++++++++++++++------------------------
 2 files changed, 130 insertions(+), 102 deletions(-)

diff --git a/src/ChangeLog b/src/ChangeLog
index db8a81ee..0490088e 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,22 @@
+2023-01-10  Mark Wielaard  <mark@klomp.org>
+
+	* readelf.c (get_debug_elf_data): New function.
+	(print_debug_abbrev_section): Use get_debug_elf_data.
+	(print_debug_addr_section): Likewise.
+	(print_debug_aranges_section): Likewise.
+	(print_debug_rnglists_section): Likewise.
+	(print_debug_ranges_section): Likewise.
+	(print_debug_frame_section): Likewise.
+	(print_debug_units): Likewise.
+	(print_debug_line_section): Likewise.
+	(print_debug_loclists_section): Likewise.
+	(print_debug_loc_section): Likewise.
+	(print_debug_macinfo_section): Likewise.
+	(print_debug_macro_section): Likewise.
+	(print_debug_str_section): Likewise.
+	(print_debug_str_offsets_section): Likewise.
+	(print_debug_pubnames_section): Likewise.
+
 2022-12-21  Shahab Vahedi  <shahab@synopsys.email>
 
 	* elflint.c (valid_e_machine): Add EM_ARCV2.
diff --git a/src/readelf.c b/src/readelf.c
index 451f8400..51b0e8b9 100644
--- a/src/readelf.c
+++ b/src/readelf.c
@@ -3857,6 +3857,39 @@ print_attributes (Ebl *ebl, const GElf_Ehdr *ehdr)
     }
 }
 
+/* Returns either the (relocated) data from the Dwarf, or tries to get
+   the "raw" (uncompressed) data from the Elf section. Produces a
+   warning if the data cannot be found (or decompressed).  */
+static Elf_Data *
+get_debug_elf_data (Dwarf *dbg, Ebl *ebl, int idx, Elf_Scn *scn)
+{
+  /* We prefer to get the section data from the Dwarf because that
+     might have been relocated already.  Note this is subtly wrong if
+     there are multiple sections with the same .debug name.  */
+  if (dbg->sectiondata[idx] != NULL)
+    return dbg->sectiondata[idx];
+
+  GElf_Shdr shdr_mem;
+  GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
+  if (shdr != NULL && (shdr->sh_flags & SHF_COMPRESSED) != 0)
+    {
+      if (elf_compress (scn, 0, 0) < 0)
+	{
+	  error (0, 0, "%s [%zd] '%s'\n",
+		 _("Couldn't uncompress section"),
+		 elf_ndxscn (scn), section_name (ebl, shdr));
+	  return NULL;
+	}
+    }
+
+  Elf_Data *data = elf_getdata (scn, NULL);
+  if (data == NULL)
+    error (0, 0, "%s [%zd] '%s': %s\n",
+	   _("Couldn't get data from section"),
+	   elf_ndxscn (scn), section_name (ebl, shdr), elf_errmsg (-1));
+
+  return elf_getdata (scn, NULL);
+}
 
 void
 print_dwarf_addr (Dwfl_Module *dwflmod,
@@ -5271,8 +5304,11 @@ print_debug_abbrev_section (Dwfl_Module *dwflmod __attribute__ ((unused)),
 			    Ebl *ebl, GElf_Ehdr *ehdr __attribute__ ((unused)),
 			    Elf_Scn *scn, GElf_Shdr *shdr, Dwarf *dbg)
 {
-  const size_t sh_size = (dbg->sectiondata[IDX_debug_abbrev] ?
-			  dbg->sectiondata[IDX_debug_abbrev]->d_size : 0);
+  Elf_Data *elf_data = get_debug_elf_data (dbg, ebl, IDX_debug_abbrev, scn);
+  if (elf_data == NULL)
+    return;
+
+  const size_t sh_size = elf_data->d_size;
 
   printf (_("\nDWARF section [%2zu] '%s' at offset %#" PRIx64 ":\n"
 		   " [ Code]\n"),
@@ -5344,6 +5380,10 @@ print_debug_addr_section (Dwfl_Module *dwflmod __attribute__ ((unused)),
 			  Ebl *ebl, GElf_Ehdr *ehdr,
 			  Elf_Scn *scn, GElf_Shdr *shdr, Dwarf *dbg)
 {
+  Elf_Data *data = get_debug_elf_data (dbg, ebl, IDX_debug_addr, scn);
+  if (data == NULL)
+    return;
+
   printf (_("\
 \nDWARF section [%2zu] '%s' at offset %#" PRIx64 ":\n"),
 	  elf_ndxscn (scn), section_name (ebl, shdr),
@@ -5352,16 +5392,6 @@ print_debug_addr_section (Dwfl_Module *dwflmod __attribute__ ((unused)),
   if (shdr->sh_size == 0)
     return;
 
-  /* We like to get the section from libdw to make sure they are relocated.  */
-  Elf_Data *data = (dbg->sectiondata[IDX_debug_addr]
-		    ?: elf_rawdata (scn, NULL));
-  if (unlikely (data == NULL))
-    {
-      error (0, 0, _("cannot get .debug_addr section data: %s"),
-	     elf_errmsg (-1));
-      return;
-    }
-
   size_t idx = 0;
   sort_listptr (&known_addrbases, "addr_base");
 
@@ -5643,15 +5673,9 @@ print_debug_aranges_section (Dwfl_Module *dwflmod __attribute__ ((unused)),
       return;
     }
 
-  Elf_Data *data = (dbg->sectiondata[IDX_debug_aranges]
-		    ?: elf_rawdata (scn, NULL));
-
-  if (unlikely (data == NULL))
-    {
-      error (0, 0, _("cannot get .debug_aranges content: %s"),
-	     elf_errmsg (-1));
-      return;
-    }
+  Elf_Data *data = get_debug_elf_data (dbg, ebl, IDX_debug_aranges, scn);
+  if (data == NULL)
+    return;
 
   printf (_("\
 \nDWARF section [%2zu] '%s' at offset %#" PRIx64 ":\n"),
@@ -5820,20 +5844,15 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
 			      Elf_Scn *scn, GElf_Shdr *shdr,
 			      Dwarf *dbg __attribute__((unused)))
 {
+  Elf_Data *data = get_debug_elf_data (dbg, ebl, IDX_debug_rnglists, scn);
+  if (data == NULL)
+    return;
+
   printf (_("\
 \nDWARF section [%2zu] '%s' at offset %#" PRIx64 ":\n"),
 	  elf_ndxscn (scn), section_name (ebl, shdr),
 	  (uint64_t) shdr->sh_offset);
 
-  Elf_Data *data =(dbg->sectiondata[IDX_debug_rnglists]
-		   ?: elf_rawdata (scn, NULL));
-  if (unlikely (data == NULL))
-    {
-      error (0, 0, _("cannot get .debug_rnglists content: %s"),
-	     elf_errmsg (-1));
-      return;
-    }
-
   /* For the listptr to get the base address/CU.  */
   sort_listptr (&known_rnglistptr, "rnglistptr");
   size_t listptr_idx = 0;
@@ -6196,14 +6215,9 @@ print_debug_ranges_section (Dwfl_Module *dwflmod,
 			    Elf_Scn *scn, GElf_Shdr *shdr,
 			    Dwarf *dbg)
 {
-  Elf_Data *data = (dbg->sectiondata[IDX_debug_ranges]
-		    ?: elf_rawdata (scn, NULL));
-  if (unlikely (data == NULL))
-    {
-      error (0, 0, _("cannot get .debug_ranges content: %s"),
-	     elf_errmsg (-1));
-      return;
-    }
+  Elf_Data *data = get_debug_elf_data (dbg, ebl, IDX_debug_ranges, scn);
+  if (data == NULL)
+    return;
 
   printf (_("\
 \nDWARF section [%2zu] '%s' at offset %#" PRIx64 ":\n"),
@@ -6804,16 +6818,22 @@ print_debug_frame_section (Dwfl_Module *dwflmod, Ebl *ebl, GElf_Ehdr *ehdr,
     }
 
   bool is_eh_frame = strcmp (scnname, ".eh_frame") == 0;
-  Elf_Data *data = (is_eh_frame
-		    ? elf_rawdata (scn, NULL)
-		    : (dbg->sectiondata[IDX_debug_frame]
-		       ?: elf_rawdata (scn, NULL)));
-
-  if (unlikely (data == NULL))
+  Elf_Data *data;
+  if (is_eh_frame)
     {
-      error (0, 0, _("cannot get %s content: %s"),
-	     scnname, elf_errmsg (-1));
-      return;
+      data = elf_rawdata (scn, NULL);
+      if (data == NULL)
+	{
+	  error (0, 0, _("cannot get %s content: %s"),
+		 scnname, elf_errmsg (-1));
+	  return;
+	}
+    }
+  else
+    {
+      data = get_debug_elf_data (dbg, ebl, IDX_debug_frame, scn);
+      if (data == NULL)
+	return;
     }
 
   if (is_eh_frame)
@@ -7912,6 +7932,13 @@ print_debug_units (Dwfl_Module *dwflmod,
   const bool silent = !(print_debug_sections & section_info) && !debug_types;
   const char *secname = section_name (ebl, shdr);
 
+  /* Check section actually exists.  */
+  if (!silent)
+    if (get_debug_elf_data (dbg, ebl,
+			    debug_types ? IDX_debug_types : IDX_debug_info,
+			    scn) == NULL)
+      return;
+
   if (!silent)
     printf (_("\
 \nDWARF section [%2zu] '%s' at offset %#" PRIx64 ":\n [Offset]\n"),
@@ -8576,6 +8603,10 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl *ebl, GElf_Ehdr *ehdr,
       return;
     }
 
+  Elf_Data *data = get_debug_elf_data (dbg, ebl, IDX_debug_line, scn);
+  if (data == NULL)
+    return;
+
   printf (_("\
 \nDWARF section [%2zu] '%s' at offset %#" PRIx64 ":\n"),
 	  elf_ndxscn (scn), section_name (ebl, shdr),
@@ -8586,14 +8617,6 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl *ebl, GElf_Ehdr *ehdr,
 
   /* There is no functionality in libdw to read the information in the
      way it is represented here.  Hardcode the decoder.  */
-  Elf_Data *data = (dbg->sectiondata[IDX_debug_line]
-		    ?: elf_rawdata (scn, NULL));
-  if (unlikely (data == NULL))
-    {
-      error (0, 0, _("cannot get line data section data: %s"),
-	     elf_errmsg (-1));
-      return;
-    }
 
   const unsigned char *linep = (const unsigned char *) data->d_buf;
   const unsigned char *lineendp;
@@ -9322,20 +9345,15 @@ print_debug_loclists_section (Dwfl_Module *dwflmod,
 			      Elf_Scn *scn, GElf_Shdr *shdr,
 			      Dwarf *dbg)
 {
+  Elf_Data *data = get_debug_elf_data (dbg, ebl, IDX_debug_loclists, scn);
+  if (data == NULL)
+    return;
+
   printf (_("\
 \nDWARF section [%2zu] '%s' at offset %#" PRIx64 ":\n"),
 	  elf_ndxscn (scn), section_name (ebl, shdr),
 	  (uint64_t) shdr->sh_offset);
 
-  Elf_Data *data = (dbg->sectiondata[IDX_debug_loclists]
-		    ?: elf_rawdata (scn, NULL));
-  if (unlikely (data == NULL))
-    {
-      error (0, 0, _("cannot get .debug_loclists content: %s"),
-	     elf_errmsg (-1));
-      return;
-    }
-
   /* For the listptr to get the base address/CU.  */
   sort_listptr (&known_loclistsptr, "loclistsptr");
   size_t listptr_idx = 0;
@@ -9795,15 +9813,9 @@ print_debug_loc_section (Dwfl_Module *dwflmod,
 			 Ebl *ebl, GElf_Ehdr *ehdr,
 			 Elf_Scn *scn, GElf_Shdr *shdr, Dwarf *dbg)
 {
-  Elf_Data *data = (dbg->sectiondata[IDX_debug_loc]
-		    ?: elf_rawdata (scn, NULL));
-
-  if (unlikely (data == NULL))
-    {
-      error (0, 0, _("cannot get .debug_loc content: %s"),
-	     elf_errmsg (-1));
-      return;
-    }
+  Elf_Data *data = get_debug_elf_data (dbg, ebl, IDX_debug_loc, scn);
+  if (data == NULL)
+    return;
 
   printf (_("\
 \nDWARF section [%2zu] '%s' at offset %#" PRIx64 ":\n"),
@@ -10056,6 +10068,10 @@ print_debug_macinfo_section (Dwfl_Module *dwflmod __attribute__ ((unused)),
 			     GElf_Ehdr *ehdr __attribute__ ((unused)),
 			     Elf_Scn *scn, GElf_Shdr *shdr, Dwarf *dbg)
 {
+  Elf_Data *data = get_debug_elf_data (dbg, ebl, IDX_debug_macinfo, scn);
+  if (data == NULL)
+    return;
+
   printf (_("\
 \nDWARF section [%2zu] '%s' at offset %#" PRIx64 ":\n"),
 	  elf_ndxscn (scn), section_name (ebl, shdr),
@@ -10064,14 +10080,6 @@ print_debug_macinfo_section (Dwfl_Module *dwflmod __attribute__ ((unused)),
 
   /* There is no function in libdw to iterate over the raw content of
      the section but it is easy enough to do.  */
-  Elf_Data *data = (dbg->sectiondata[IDX_debug_macinfo]
-		    ?: elf_rawdata (scn, NULL));
-  if (unlikely (data == NULL))
-    {
-      error (0, 0, _("cannot get macro information section data: %s"),
-	     elf_errmsg (-1));
-      return;
-    }
 
   /* Get the source file information for all CUs.  */
   Dwarf_Off offset;
@@ -10222,20 +10230,16 @@ print_debug_macro_section (Dwfl_Module *dwflmod __attribute__ ((unused)),
 			   GElf_Ehdr *ehdr __attribute__ ((unused)),
 			   Elf_Scn *scn, GElf_Shdr *shdr, Dwarf *dbg)
 {
+  Elf_Data *data = get_debug_elf_data (dbg, ebl, IDX_debug_macro, scn);
+  if (data == NULL)
+    return;
+
   printf (_("\
 \nDWARF section [%2zu] '%s' at offset %#" PRIx64 ":\n"),
 	  elf_ndxscn (scn), section_name (ebl, shdr),
 	  (uint64_t) shdr->sh_offset);
   putc_unlocked ('\n', stdout);
 
-  Elf_Data *data =  elf_getdata (scn, NULL);
-  if (unlikely (data == NULL))
-    {
-      error (0, 0, _("cannot get macro information section data: %s"),
-	     elf_errmsg (-1));
-      return;
-    }
-
   /* Get the source file information for all CUs.  Uses same
      datastructure as macinfo.  But uses offset field to directly
      match .debug_line offset.  And just stored in a list.  */
@@ -10609,6 +10613,10 @@ print_debug_pubnames_section (Dwfl_Module *dwflmod __attribute__ ((unused)),
 			      GElf_Ehdr *ehdr __attribute__ ((unused)),
 			      Elf_Scn *scn, GElf_Shdr *shdr, Dwarf *dbg)
 {
+  /* Check section actually exists.  */
+  if (get_debug_elf_data (dbg, ebl, IDX_debug_pubnames, scn) == NULL)
+      return;
+
   printf (_("\nDWARF section [%2zu] '%s' at offset %#" PRIx64 ":\n"),
 	  elf_ndxscn (scn), section_name (ebl, shdr),
 	  (uint64_t) shdr->sh_offset);
@@ -10617,7 +10625,8 @@ print_debug_pubnames_section (Dwfl_Module *dwflmod __attribute__ ((unused)),
   (void) dwarf_getpubnames (dbg, print_pubnames, &n, 0);
 }
 
-/* Print the content of the DWARF string section '.debug_str'.  */
+/* Print the content of the DWARF string section '.debug_str'
+   or 'debug_line_str'.  */
 static void
 print_debug_str_section (Dwfl_Module *dwflmod __attribute__ ((unused)),
 			 Ebl *ebl,
@@ -10625,8 +10634,14 @@ print_debug_str_section (Dwfl_Module *dwflmod __attribute__ ((unused)),
 			 Elf_Scn *scn, GElf_Shdr *shdr,
 			 Dwarf *dbg __attribute__ ((unused)))
 {
-  Elf_Data *data = elf_rawdata (scn, NULL);
-  const size_t sh_size = data ? data->d_size : 0;
+  const char *name = section_name (ebl, shdr);
+  int idx = ((name != NULL && strstr (name, "debug_line_str") != NULL)
+	     ? IDX_debug_line_str : IDX_debug_str);
+  Elf_Data *data = get_debug_elf_data (dbg, ebl, idx, scn);
+  if (data == NULL)
+    return;
+
+  const size_t sh_size = data->d_size;
 
   /* Compute floor(log16(shdr->sh_size)).  */
   GElf_Addr tmp = sh_size;
@@ -10669,6 +10684,10 @@ print_debug_str_offsets_section (Dwfl_Module *dwflmod __attribute__ ((unused)),
 				 GElf_Ehdr *ehdr __attribute__ ((unused)),
 				 Elf_Scn *scn, GElf_Shdr *shdr, Dwarf *dbg)
 {
+  Elf_Data *data = get_debug_elf_data (dbg, ebl, IDX_debug_str_offsets, scn);
+  if (data == NULL)
+    return;
+
   printf (_("\
 \nDWARF section [%2zu] '%s' at offset %#" PRIx64 ":\n"),
 	  elf_ndxscn (scn), section_name (ebl, shdr),
@@ -10677,16 +10696,6 @@ print_debug_str_offsets_section (Dwfl_Module *dwflmod __attribute__ ((unused)),
   if (shdr->sh_size == 0)
     return;
 
-  /* We like to get the section from libdw to make sure they are relocated.  */
-  Elf_Data *data = (dbg->sectiondata[IDX_debug_str_offsets]
-		    ?: elf_rawdata (scn, NULL));
-  if (unlikely (data == NULL))
-    {
-      error (0, 0, _("cannot get .debug_str_offsets section data: %s"),
-	     elf_errmsg (-1));
-      return;
-    }
-
   size_t idx = 0;
   sort_listptr (&known_stroffbases, "str_offsets");
 
-- 
2.39.0


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] readelf: Check compression status of .debug section data
  2023-01-10 17:44                           ` [PATCH] readelf: Check compression status of .debug section data Mark Wielaard
@ 2023-01-16 19:39                             ` Mark Wielaard
  0 siblings, 0 replies; 25+ messages in thread
From: Mark Wielaard @ 2023-01-16 19:39 UTC (permalink / raw)
  To: elfutils-devel; +Cc: Martin Liška

Hi,

On Tue, 2023-01-10 at 18:44 +0100, Mark Wielaard wrote:
> The various print_debug_*_section functions didn't get the section
> data in the same way. Add a new get_debug_elf_data function that
> gets the (possibly relocated) section data and that checks (and
> warns) if the data might still be compressed in a way that we
> cannot decompress.

Pushed.

Cheers,

Mark

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2023-01-16 19:39 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-24 11:09 [PATCH][RFC] readelf: partial support of ZSTD compression Martin Liška
2022-10-24 11:41 ` Dmitry V. Levin
2022-10-24 12:17   ` Martin Liška
2022-10-24 16:48     ` Dmitry V. Levin
2022-10-24 18:16       ` Martin Liška
2022-10-28 22:21         ` Mark Wielaard
2022-11-28 13:16           ` Martin Liška
2022-11-28 22:29             ` Mark Wielaard
2022-11-29  9:34               ` Martin Liška
2022-11-29 12:05                 ` [PATCHv2] support ZSTD compression algorithm Martin Liška
2022-12-09 10:17                   ` Martin Liška
2022-12-15 13:17                   ` Mark Wielaard
2022-12-19 14:19                     ` Martin Liška
2022-12-19 15:09                       ` Mark Wielaard
2022-12-21 11:13                         ` Martin Liška
2023-01-10 17:44                           ` [PATCH] readelf: Check compression status of .debug section data Mark Wielaard
2023-01-16 19:39                             ` Mark Wielaard
2022-12-16  0:32                   ` [PATCHv2] support ZSTD compression algorithm Mark Wielaard
2022-12-19 14:21                     ` Martin Liška
2022-12-19 15:16                       ` Mark Wielaard
2022-12-21 11:09                         ` Martin Liška
2022-12-21 23:14                           ` Mark Wielaard
2022-12-22  9:17                             ` Martin Liška
2022-12-22 18:36                               ` Mark Wielaard
2022-12-23  8:44                                 ` Martin Liška

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).