* Add zstd support to libbacktrace @ 2022-12-08 0:22 Ian Lance Taylor 2022-12-10 1:47 ` Ian Lance Taylor 0 siblings, 1 reply; 3+ messages in thread From: Ian Lance Taylor @ 2022-12-08 0:22 UTC (permalink / raw) To: gcc-patches [-- Attachment #1: Type: text/plain, Size: 2430 bytes --] This patch adds zstd support to libbacktrace, to support the new linker option --compress-debug-sections=zstd. The zstd format is fairly complicated, so it's likely that there are some bugs here. It does pass the tests, at least. Unfortunately this decompressor only runs at about 1/3 the speed to the zstd library decompressor. Still, it's smaller and simpler, and I think it uses less memory. Plus of course it uses the signal-safe libbacktrace memory allocator. Perhaps people can make a bit faster over time. Bootstrapped and ran libbacktrace and Go tests while using a linker that compressed using zstd. Committed to mainline. Ian Support decompressing --compress-debug-sections=zstd. * configure.ac: Check for zstd library and --compress-debug-sections=zstd linker option. * Makefile.am (zstdtest_*): New targets. (zstdtest_alloc_*, ctestzstd_*): New targets. (BUILDTESTS): Add zstdtest, zstdtest_alloc, ctestzstd as appropriate. * elf.c (ELFCOMPRESS_ZSTD): Define. (elf_fetch_bits): Rename from elf_zlib_fetch. Update uses. (elf_fetch_bits_backward): New static function. (ZLIB_HUFFMAN_*): Rename from HUFFMAN_*. Update uses. (ZLIB_TABLE_*): Rename from ZDEBUG_TABLE_*. Update uses. (ZSTD_TABLE_*): Define. (struct elf_zstd_fse_entry): Define. (elf_zstd_read_fse): New static function. (elf_zstd_build_fse): Likewise. (lit): Define if BACKTRACE_GENERATE_ZSTD_FSE_TABLES. (match, offset, next, print_table, main): Likewise. (elf_zstd_lit_table): New static const array. (elf_zstd_match_table, elf_zstd_offset_table): Likewise. (elf_zstd_read_huff): New static function. (struct elf_zstd_seq_decode): Define. (elf_zstd_unpack_seq_decode): New static function. (ZSTD_LIT_*): Define. (struct elf_zstd_literals): Define. (elf_zstd_literal_output): New static function. (ZSTD_LITERAL_LENGTH_BASELINE_OFFSET): Define. (elf_zstd_literal_length_baseline): New static const array. (elf_zstd_literal_length_bits): Likewise. (ZSTD_MATCH_LENGTH_BASELINE_OFFSET): Define. (elf_zstd_match_length_baseline): New static const array. (elf_zstd_match_length_bits): Likewise. (elf_zstd_decompress): New static function. (ZDEBUG_TABLE_SIZE): New definition. (elf_uncompress_chdr): Support ELF_COMPRESS_ZSTD. (backtrace_uncompress_zstd): New function. (elf_add): Use ZLIB_TABLE_SIZE for zlib-gnu sections. * internal.h (backtrace_uncompress_zstd): Declare. * zstdtest.c: New file. * configure, config.h.in, Makefile.in: Regenerate. [-- Attachment #2: patch.txt --] [-- Type: text/plain, Size: 98361 bytes --] diff --git a/libbacktrace/Makefile.am b/libbacktrace/Makefile.am index 9f8516d00e2..047b573c29a 100644 --- a/libbacktrace/Makefile.am +++ b/libbacktrace/Makefile.am @@ -368,6 +368,25 @@ ztest_alloc_CFLAGS = $(ztest_CFLAGS) BUILDTESTS += ztest_alloc +zstdtest_SOURCES = zstdtest.c testlib.c +zstdtest_CFLAGS = $(libbacktrace_TEST_CFLAGS) -DSRCDIR=\"$(srcdir)\" +zstdtest_LDADD = libbacktrace.la +zstdtest_alloc_LDADD = libbacktrace_alloc.la + +if HAVE_ZSTD +zstdtest_LDADD += -lzstd +zstdtest_alloc_LDADD += -lzstd +endif +zstdtest_LDADD += $(CLOCK_GETTIME_LINK) +zstdtest_alloc_LDADD += $(CLOCK_GETTIME_LINK) + +BUILDTESTS += zstdtest + +zstdtest_alloc_SOURCES = $(zstdtest_SOURCES) +zstdtest_alloc_CFLAGS = $(zstdtest_CFLAGS) + +BUILDTESTS += zstdtest_alloc + endif HAVE_ELF edtest_SOURCES = edtest.c edtest2_build.c testlib.c @@ -450,6 +469,17 @@ ctesta_LDADD = libbacktrace.la BUILDTESTS += ctestg ctesta +if HAVE_COMPRESSED_DEBUG_ZSTD + +ctestzstd_SOURCES = btest.c testlib.c +ctestzstd_CFLAGS = $(libbacktrace_TEST_CFLAGS) +ctestzstd_LDFLAGS = -Wl,--compress-debug-sections=zstd +ctestzstd_LDADD = libbacktrace.la + +BUILDTESTS += ctestzstd + +endif + ctestg_alloc_SOURCES = $(ctestg_SOURCES) ctestg_alloc_CFLAGS = $(ctestg_CFLAGS) ctestg_alloc_LDFLAGS = $(ctestg_LDFLAGS) diff --git a/libbacktrace/configure.ac b/libbacktrace/configure.ac index 1daaa2f62d2..d0a0475cfa8 100644 --- a/libbacktrace/configure.ac +++ b/libbacktrace/configure.ac @@ -495,6 +495,21 @@ AC_LINK_IFELSE([AC_LANG_PROGRAM(,)], LDFLAGS=$LDFLAGS_hold]) AM_CONDITIONAL(HAVE_COMPRESSED_DEBUG, test "$libgo_cv_ld_compress" = yes) +AC_CHECK_LIB([zstd], [ZSTD_compress], + [AC_DEFINE(HAVE_ZSTD, 1, [Define if -lzstd is available.])]) +AM_CONDITIONAL(HAVE_ZSTD, test "$ac_cv_lib_zstd_ZSTD_compress" = yes) + +dnl Test whether the linker supports --compress-debug-sections=zstd option. +AC_CACHE_CHECK([whether --compress-debug-sections=zstd is supported], +[libgo_cv_ld_compress_zstd], +[LDFLAGS_hold=$LDFLAGS +LDFLAGS="$LDFLAGS -Wl,--compress-debug-sections=zstd" +AC_LINK_IFELSE([AC_LANG_PROGRAM(,)], +[libgo_cv_ld_compress_zstd=yes], +[libgo_cv_ld_compress_zstd=no]) +LDFLAGS=$LDFLAGS_hold]) +AM_CONDITIONAL(HAVE_COMPRESSED_DEBUG_ZSTD, test "$libgo_cv_ld_compress_zstd" = yes) + AC_ARG_VAR(OBJCOPY, [location of objcopy]) AC_CHECK_PROG(OBJCOPY, objcopy, objcopy,) AC_CHECK_PROG(READELF, readelf, readelf) diff --git a/libbacktrace/elf.c b/libbacktrace/elf.c index 181d195fe35..15e6f284db6 100644 --- a/libbacktrace/elf.c +++ b/libbacktrace/elf.c @@ -184,6 +184,7 @@ dl_iterate_phdr (int (*callback) (struct dl_phdr_info *, #undef STT_FUNC #undef NT_GNU_BUILD_ID #undef ELFCOMPRESS_ZLIB +#undef ELFCOMPRESS_ZSTD /* Basic types. */ @@ -341,6 +342,7 @@ typedef struct #endif /* BACKTRACE_ELF_SIZE != 32 */ #define ELFCOMPRESS_ZLIB 1 +#define ELFCOMPRESS_ZSTD 2 /* Names of sections, indexed by enum dwarf_section in internal.h. */ @@ -1113,7 +1115,7 @@ elf_uncompress_failed(void) on error. */ static int -elf_zlib_fetch (const unsigned char **ppin, const unsigned char *pinend, +elf_fetch_bits (const unsigned char **ppin, const unsigned char *pinend, uint64_t *pval, unsigned int *pbits) { unsigned int bits; @@ -1160,6 +1162,67 @@ elf_zlib_fetch (const unsigned char **ppin, const unsigned char *pinend, return 1; } +/* This is like elf_fetch_bits, but it fetchs the bits backward, and ensures at + least 16 bits. This is for zstd. */ + +static int +elf_fetch_bits_backward (const unsigned char **ppin, + const unsigned char *pinend, + uint64_t *pval, unsigned int *pbits) +{ + unsigned int bits; + const unsigned char *pin; + uint64_t val; + uint32_t next; + + bits = *pbits; + if (bits >= 16) + return 1; + pin = *ppin; + val = *pval; + + if (unlikely (pin <= pinend)) + { + if (bits == 0) + { + elf_uncompress_failed (); + return 0; + } + return 1; + } + + pin -= 4; + +#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) \ + && defined(__ORDER_BIG_ENDIAN__) \ + && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ \ + || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + /* We've ensured that PIN is aligned. */ + next = *(const uint32_t *)pin; + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + next = __builtin_bswap32 (next); +#endif +#else + next = pin[0] | (pin[1] << 8) | (pin[2] << 16) | (pin[3] << 24); +#endif + + val <<= 32; + val |= next; + bits += 32; + + if (unlikely (pin < pinend)) + { + val >>= (pinend - pin) * 8; + bits -= (pinend - pin) * 8; + } + + *ppin = pin; + *pval = val; + *pbits = bits; + return 1; +} + /* Huffman code tables, like the rest of the zlib format, are defined by RFC 1951. We store a Huffman code table as a series of tables stored sequentially in memory. Each entry in a table is 16 bits. @@ -1194,14 +1257,14 @@ elf_zlib_fetch (const unsigned char **ppin, const unsigned char *pinend, /* Number of entries we allocate to for one code table. We get a page for the two code tables we need. */ -#define HUFFMAN_TABLE_SIZE (1024) +#define ZLIB_HUFFMAN_TABLE_SIZE (1024) /* Bit masks and shifts for the values in the table. */ -#define HUFFMAN_VALUE_MASK 0x01ff -#define HUFFMAN_BITS_SHIFT 9 -#define HUFFMAN_BITS_MASK 0x7 -#define HUFFMAN_SECONDARY_SHIFT 12 +#define ZLIB_HUFFMAN_VALUE_MASK 0x01ff +#define ZLIB_HUFFMAN_BITS_SHIFT 9 +#define ZLIB_HUFFMAN_BITS_MASK 0x7 +#define ZLIB_HUFFMAN_SECONDARY_SHIFT 12 /* For working memory while inflating we need two code tables, we need an array of code lengths (max value 15, so we use unsigned char), @@ -1209,17 +1272,17 @@ elf_zlib_fetch (const unsigned char **ppin, const unsigned char *pinend, latter two arrays must be large enough to hold the maximum number of code lengths, which RFC 1951 defines as 286 + 30. */ -#define ZDEBUG_TABLE_SIZE \ - (2 * HUFFMAN_TABLE_SIZE * sizeof (uint16_t) \ +#define ZLIB_TABLE_SIZE \ + (2 * ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t) \ + (286 + 30) * sizeof (uint16_t) \ + (286 + 30) * sizeof (unsigned char)) -#define ZDEBUG_TABLE_CODELEN_OFFSET \ - (2 * HUFFMAN_TABLE_SIZE * sizeof (uint16_t) \ +#define ZLIB_TABLE_CODELEN_OFFSET \ + (2 * ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t) \ + (286 + 30) * sizeof (uint16_t)) -#define ZDEBUG_TABLE_WORK_OFFSET \ - (2 * HUFFMAN_TABLE_SIZE * sizeof (uint16_t)) +#define ZLIB_TABLE_WORK_OFFSET \ + (2 * ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t)) #ifdef BACKTRACE_GENERATE_FIXED_HUFFMAN_TABLE @@ -1252,7 +1315,7 @@ elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, next value after VAL with the same bit length. */ next = (uint16_t *) (((unsigned char *) zdebug_table) - + ZDEBUG_TABLE_WORK_OFFSET); + + ZLIB_TABLE_WORK_OFFSET); memset (&count[0], 0, 16 * sizeof (uint16_t)); for (i = 0; i < codes_len; ++i) @@ -1280,7 +1343,7 @@ elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, /* For each length, fill in the table for the codes of that length. */ - memset (table, 0, HUFFMAN_TABLE_SIZE * sizeof (uint16_t)); + memset (table, 0, ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t)); /* Handle the values that do not require a secondary table. */ @@ -1314,13 +1377,13 @@ elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, /* In the compressed bit stream, the value VAL is encoded as J bits with the value C. */ - if (unlikely ((val & ~HUFFMAN_VALUE_MASK) != 0)) + if (unlikely ((val & ~ZLIB_HUFFMAN_VALUE_MASK) != 0)) { elf_uncompress_failed (); return 0; } - tval = val | ((j - 1) << HUFFMAN_BITS_SHIFT); + tval = val | ((j - 1) << ZLIB_HUFFMAN_BITS_SHIFT); /* The table lookup uses 8 bits. If J is less than 8, we don't know what the other bits will be. We need to fill @@ -1470,7 +1533,7 @@ elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, { /* Start a new secondary table. */ - if (unlikely ((next_secondary & HUFFMAN_VALUE_MASK) + if (unlikely ((next_secondary & ZLIB_HUFFMAN_VALUE_MASK) != next_secondary)) { elf_uncompress_failed (); @@ -1481,22 +1544,23 @@ elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, secondary_bits = j - 8; next_secondary += 1 << secondary_bits; table[primary] = (secondary - + ((j - 8) << HUFFMAN_BITS_SHIFT) - + (1U << HUFFMAN_SECONDARY_SHIFT)); + + ((j - 8) << ZLIB_HUFFMAN_BITS_SHIFT) + + (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)); } else { /* There is an existing entry. It had better be a secondary table with enough bits. */ - if (unlikely ((tprimary & (1U << HUFFMAN_SECONDARY_SHIFT)) + if (unlikely ((tprimary + & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) == 0)) { elf_uncompress_failed (); return 0; } - secondary = tprimary & HUFFMAN_VALUE_MASK; - secondary_bits = ((tprimary >> HUFFMAN_BITS_SHIFT) - & HUFFMAN_BITS_MASK); + secondary = tprimary & ZLIB_HUFFMAN_VALUE_MASK; + secondary_bits = ((tprimary >> ZLIB_HUFFMAN_BITS_SHIFT) + & ZLIB_HUFFMAN_BITS_MASK); if (unlikely (secondary_bits < j - 8)) { elf_uncompress_failed (); @@ -1507,7 +1571,7 @@ elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, /* Fill in secondary table entries. */ - tval = val | ((j - 8) << HUFFMAN_BITS_SHIFT); + tval = val | ((j - 8) << ZLIB_HUFFMAN_BITS_SHIFT); for (ind = code >> 8; ind < (1U << secondary_bits); @@ -1550,7 +1614,7 @@ elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, #include <stdio.h> -static uint16_t table[ZDEBUG_TABLE_SIZE]; +static uint16_t table[ZLIB_TABLE_SIZE]; static unsigned char codes[288]; int @@ -1778,7 +1842,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, const uint16_t *tlit; const uint16_t *tdist; - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; last = val & 1; @@ -1866,7 +1930,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, /* Read a Huffman encoding table. The various magic numbers here are from RFC 1951. */ - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; nlit = (val & 0x1f) + 257; @@ -1891,7 +1955,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, /* There are always at least 4 elements in the table. */ - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; codebits[16] = val & 7; @@ -1911,7 +1975,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, if (nclen == 5) goto codebitsdone; - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; codebits[7] = val & 7; @@ -1949,7 +2013,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, if (nclen == 10) goto codebitsdone; - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; codebits[11] = val & 7; @@ -1987,7 +2051,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, if (nclen == 15) goto codebitsdone; - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; codebits[2] = val & 7; @@ -2026,7 +2090,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, at the end of zdebug_table to hold them. */ plenbase = (((unsigned char *) zdebug_table) - + ZDEBUG_TABLE_CODELEN_OFFSET); + + ZLIB_TABLE_CODELEN_OFFSET); plen = plenbase; plenend = plen + nlit + ndist; while (plen < plenend) @@ -2035,24 +2099,25 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, unsigned int b; uint16_t v; - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; t = zdebug_table[val & 0xff]; /* The compression here uses bit lengths up to 7, so a secondary table is never necessary. */ - if (unlikely ((t & (1U << HUFFMAN_SECONDARY_SHIFT)) != 0)) + if (unlikely ((t & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) + != 0)) { elf_uncompress_failed (); return 0; } - b = (t >> HUFFMAN_BITS_SHIFT) & HUFFMAN_BITS_MASK; + b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; val >>= b + 1; bits -= b + 1; - v = t & HUFFMAN_VALUE_MASK; + v = t & ZLIB_HUFFMAN_VALUE_MASK; if (v < 16) *plen++ = v; else if (v == 16) @@ -2069,7 +2134,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, } /* We used up to 7 bits since the last - elf_zlib_fetch, so we have at least 8 bits + elf_fetch_bits, so we have at least 8 bits available here. */ c = 3 + (val & 0x3); @@ -2104,7 +2169,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, /* Store zero 3 to 10 times. */ /* We used up to 7 bits since the last - elf_zlib_fetch, so we have at least 8 bits + elf_fetch_bits, so we have at least 8 bits available here. */ c = 3 + (val & 0x7); @@ -2150,7 +2215,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, /* Store zero 11 to 138 times. */ /* We used up to 7 bits since the last - elf_zlib_fetch, so we have at least 8 bits + elf_fetch_bits, so we have at least 8 bits available here. */ c = 11 + (val & 0x7f); @@ -2187,10 +2252,11 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, zdebug_table)) return 0; if (!elf_zlib_inflate_table (plen + nlit, ndist, zdebug_table, - zdebug_table + HUFFMAN_TABLE_SIZE)) + (zdebug_table + + ZLIB_HUFFMAN_TABLE_SIZE))) return 0; tlit = zdebug_table; - tdist = zdebug_table + HUFFMAN_TABLE_SIZE; + tdist = zdebug_table + ZLIB_HUFFMAN_TABLE_SIZE; } /* Inflate values until the end of the block. This is the @@ -2203,14 +2269,14 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, uint16_t v; unsigned int lit; - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; t = tlit[val & 0xff]; - b = (t >> HUFFMAN_BITS_SHIFT) & HUFFMAN_BITS_MASK; - v = t & HUFFMAN_VALUE_MASK; + b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; + v = t & ZLIB_HUFFMAN_VALUE_MASK; - if ((t & (1U << HUFFMAN_SECONDARY_SHIFT)) == 0) + if ((t & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) == 0) { lit = v; val >>= b + 1; @@ -2219,8 +2285,8 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, else { t = tlit[v + 0x100 + ((val >> 8) & ((1U << b) - 1))]; - b = (t >> HUFFMAN_BITS_SHIFT) & HUFFMAN_BITS_MASK; - lit = t & HUFFMAN_VALUE_MASK; + b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; + lit = t & ZLIB_HUFFMAN_VALUE_MASK; val >>= b + 8; bits -= b + 8; } @@ -2265,7 +2331,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, { unsigned int extra; - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; /* This is an expression for the table of length @@ -2280,14 +2346,14 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, bits -= extra; } - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; t = tdist[val & 0xff]; - b = (t >> HUFFMAN_BITS_SHIFT) & HUFFMAN_BITS_MASK; - v = t & HUFFMAN_VALUE_MASK; + b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; + v = t & ZLIB_HUFFMAN_VALUE_MASK; - if ((t & (1U << HUFFMAN_SECONDARY_SHIFT)) == 0) + if ((t & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) == 0) { dist = v; val >>= b + 1; @@ -2296,8 +2362,9 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, else { t = tdist[v + 0x100 + ((val >> 8) & ((1U << b) - 1))]; - b = (t >> HUFFMAN_BITS_SHIFT) & HUFFMAN_BITS_MASK; - dist = t & HUFFMAN_VALUE_MASK; + b = ((t >> ZLIB_HUFFMAN_BITS_SHIFT) + & ZLIB_HUFFMAN_BITS_MASK); + dist = t & ZLIB_HUFFMAN_VALUE_MASK; val >>= b + 8; bits -= b + 8; } @@ -2321,204 +2388,2348 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, return 0; } - memset (pout, pout[-1], len); - pout += len; - } - else if (unlikely (dist > 29)) - { - elf_uncompress_failed (); + memset (pout, pout[-1], len); + pout += len; + } + else if (unlikely (dist > 29)) + { + elf_uncompress_failed (); + return 0; + } + else + { + if (dist < 4) + dist = dist + 1; + else + { + unsigned int extra; + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + /* This is an expression for the table of + distance codes in RFC 1951 3.2.5. */ + dist -= 4; + extra = (dist >> 1) + 1; + dist = (dist & 1) << extra; + dist += 5; + dist += ((1U << (extra - 1)) - 1) << 2; + dist += val & ((1U << extra) - 1); + val >>= extra; + bits -= extra; + } + + /* Go back dist bytes, and copy len bytes from + there. */ + + if (unlikely ((unsigned int) (pout - porigout) < dist)) + { + elf_uncompress_failed (); + return 0; + } + + if (unlikely ((unsigned int) (poutend - pout) < len)) + { + elf_uncompress_failed (); + return 0; + } + + if (dist >= len) + { + memcpy (pout, pout - dist, len); + pout += len; + } + else + { + while (len > 0) + { + unsigned int copy; + + copy = len < dist ? len : dist; + memcpy (pout, pout - dist, copy); + len -= copy; + pout += copy; + } + } + } + } + } + } + } + + /* We should have filled the output buffer. */ + if (unlikely (pout != poutend)) + { + elf_uncompress_failed (); + return 0; + } + + return 1; +} + +/* Verify the zlib checksum. The checksum is in the 4 bytes at + CHECKBYTES, and the uncompressed data is at UNCOMPRESSED / + UNCOMPRESSED_SIZE. Returns 1 on success, 0 on failure. */ + +static int +elf_zlib_verify_checksum (const unsigned char *checkbytes, + const unsigned char *uncompressed, + size_t uncompressed_size) +{ + unsigned int i; + unsigned int cksum; + const unsigned char *p; + uint32_t s1; + uint32_t s2; + size_t hsz; + + cksum = 0; + for (i = 0; i < 4; i++) + cksum = (cksum << 8) | checkbytes[i]; + + s1 = 1; + s2 = 0; + + /* Minimize modulo operations. */ + + p = uncompressed; + hsz = uncompressed_size; + while (hsz >= 5552) + { + for (i = 0; i < 5552; i += 16) + { + /* Manually unroll loop 16 times. */ + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + } + hsz -= 5552; + s1 %= 65521; + s2 %= 65521; + } + + while (hsz >= 16) + { + /* Manually unroll loop 16 times. */ + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + + hsz -= 16; + } + + for (i = 0; i < hsz; ++i) + { + s1 = s1 + *p++; + s2 = s2 + s1; + } + + s1 %= 65521; + s2 %= 65521; + + if (unlikely ((s2 << 16) + s1 != cksum)) + { + elf_uncompress_failed (); + return 0; + } + + return 1; +} + +/* Inflate a zlib stream from PIN/SIN to POUT/SOUT, and verify the + checksum. Return 1 on success, 0 on error. */ + +static int +elf_zlib_inflate_and_verify (const unsigned char *pin, size_t sin, + uint16_t *zdebug_table, unsigned char *pout, + size_t sout) +{ + if (!elf_zlib_inflate (pin, sin, zdebug_table, pout, sout)) + return 0; + if (!elf_zlib_verify_checksum (pin + sin - 4, pout, sout)) + return 0; + return 1; +} + +/* For working memory during zstd compression, we need + - a literal length FSE table: 512 32-bit values == 2048 bytes + - a match length FSE table: 512 32-bit values == 2048 bytes + - a offset FSE table: 256 32-bit values == 1024 bytes + - a Huffman tree: 2048 uint16_t values == 4096 bytes + - scratch space, one of + - to build an FSE table: 512 uint16_t values == 1024 bytes + - to build a Huffman tree: 512 uint16_t + 256 uint32_t == 2048 bytes + - buffer for literal values == 2048 bytes +*/ + +#define ZSTD_TABLE_SIZE \ + (2 * 512 * sizeof (struct elf_zstd_fse_entry) \ + + 256 * sizeof (struct elf_zstd_fse_entry) \ + + 2048 * sizeof (uint16_t) \ + + 2048) + +#define ZSTD_TABLE_LITERAL_FSE_OFFSET (0) + +#define ZSTD_TABLE_MATCH_FSE_OFFSET (512 * sizeof (struct elf_zstd_fse_entry)) + +#define ZSTD_TABLE_OFFSET_FSE_OFFSET \ + (ZSTD_TABLE_MATCH_FSE_OFFSET + 512 * sizeof (struct elf_zstd_fse_entry)) + +#define ZSTD_TABLE_HUFFMAN_OFFSET \ + (ZSTD_TABLE_OFFSET_FSE_OFFSET + 256 * sizeof (struct elf_zstd_fse_entry)) + +#define ZSTD_TABLE_WORK_OFFSET \ + (ZSTD_TABLE_HUFFMAN_OFFSET + 2048 * sizeof (uint16_t)) + +#define ZSTD_TABLE_WORK_LIT_SIZE 2048 + +/* An entry in a zstd FSE table. */ + +struct elf_zstd_fse_entry +{ + unsigned char symbol; + unsigned char bits; + uint16_t base; +}; + +static int +elf_zstd_build_fse (const int16_t *, int, uint16_t *, int, + struct elf_zstd_fse_entry *); + +/* Read a zstd FSE table and build the decoding table in *TABLE, updating *PPIN + as it reads. ZDEBUG_TABLE is scratch space; it must be enough for 512 + uint16_t values (1024 bytes). MAXIDX is the maximum number of symbols + permitted. *TABLE_BITS is the maximum number of bits for symbols in the + table: the size of *TABLE is at least 1 << *TABLE_BITS. This updates + *TABLE_BITS to the actual number of bits. Returns 1 on success, 0 on + error. */ + +static int +elf_zstd_read_fse (const unsigned char **ppin, const unsigned char *pinend, + uint16_t *zdebug_table, int maxidx, + struct elf_zstd_fse_entry *table, int *table_bits) +{ + const unsigned char *pin; + int16_t *norm; + uint16_t *next; + uint64_t val; + unsigned int bits; + int accuracy_log; + uint32_t remaining; + uint32_t threshold; + int bits_needed; + int idx; + int prev0; + + pin = *ppin; + + norm = (int16_t *) zdebug_table; + next = zdebug_table + 256; + + if (unlikely (pin + 3 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + + /* Align PIN to a 32-bit boundary. */ + + val = 0; + bits = 0; + while ((((uintptr_t) pin) & 3) != 0) + { + val |= (uint64_t)*pin << bits; + bits += 8; + ++pin; + } + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + accuracy_log = (val & 0xf) + 5; + if (accuracy_log > *table_bits) + { + elf_uncompress_failed (); + return 0; + } + *table_bits = accuracy_log; + val >>= 4; + bits -= 4; + + /* This code is mostly copied from the reference implementation. */ + + /* The number of remaining probabilities, plus 1. This sets the number of + bits that need to be read for the next value. */ + remaining = (1 << accuracy_log) + 1; + + /* The current difference between small and large values, which depends on + the number of remaining values. Small values use one less bit. */ + threshold = 1 << accuracy_log; + + /* The number of bits used to compute threshold. */ + bits_needed = accuracy_log + 1; + + /* The next character value. */ + idx = 0; + + /* Whether the last count was 0. */ + prev0 = 0; + + while (remaining > 1 && idx <= maxidx) + { + uint32_t max; + int32_t count; + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + if (prev0) + { + int zidx; + + /* Previous count was 0, so there is a 2-bit repeat flag. If the + 2-bit flag is 0b11, it adds 3 and then there is another repeat + flag. */ + zidx = idx; + while ((val & 0xfff) == 0xfff) + { + zidx += 3 * 6; + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + val >>= 12; + bits -= 12; + } + while ((val & 3) == 3) + { + zidx += 3; + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + val >>= 2; + bits -= 2; + } + /* We have at least 13 bits here, don't need to fetch. */ + zidx += val & 3; + val >>= 2; + bits -= 2; + + if (unlikely (zidx > maxidx)) + { + elf_uncompress_failed (); + return 0; + } + + for (; idx < zidx; idx++) + norm[idx] = 0; + + prev0 = 0; + continue; + } + + max = (2 * threshold - 1) - remaining; + if ((val & (threshold - 1)) < max) + { + /* A small value. */ + count = (int32_t) ((uint32_t) val & (threshold - 1)); + val >>= bits_needed - 1; + bits -= bits_needed - 1; + } + else + { + /* A large value. */ + count = (int32_t) ((uint32_t) val & (2 * threshold - 1)); + if (count >= (int32_t) threshold) + count -= (int32_t) max; + val >>= bits_needed; + bits -= bits_needed; + } + + count--; + if (count >= 0) + remaining -= count; + else + remaining--; + if (unlikely (idx >= 256)) + { + elf_uncompress_failed (); + return 0; + } + norm[idx] = (int16_t) count; + ++idx; + + prev0 = count == 0; + + while (remaining < threshold) + { + bits_needed--; + threshold >>= 1; + } + } + + if (unlikely (remaining != 1)) + { + elf_uncompress_failed (); + return 0; + } + + /* If we've read ahead more than a byte, back up. */ + while (bits >= 8) + { + --pin; + bits -= 8; + } + + *ppin = pin; + + for (; idx <= maxidx; idx++) + norm[idx] = 0; + + return elf_zstd_build_fse (norm, idx, next, *table_bits, table); +} + +/* Build the FSE decoding table from a list of probabilities. This reads from + NORM of length IDX, uses NEXT as scratch space, and writes to *TABLE, whose + size is TABLE_BITS. */ + +static int +elf_zstd_build_fse (const int16_t *norm, int idx, uint16_t *next, + int table_bits, struct elf_zstd_fse_entry *table) +{ + int table_size; + int high_threshold; + int i; + int pos; + int step; + int mask; + + table_size = 1 << table_bits; + high_threshold = table_size - 1; + for (i = 0; i < idx; i++) + { + int16_t n; + + n = norm[i]; + if (n >= 0) + next[i] = (uint16_t) n; + else + { + table[high_threshold].symbol = (unsigned char) i; + high_threshold--; + next[i] = 1; + } + } + + pos = 0; + step = (table_size >> 1) + (table_size >> 3) + 3; + mask = table_size - 1; + for (i = 0; i < idx; i++) + { + int n; + int j; + + n = (int) norm[i]; + for (j = 0; j < n; j++) + { + table[pos].symbol = (unsigned char) i; + pos = (pos + step) & mask; + while (unlikely (pos > high_threshold)) + pos = (pos + step) & mask; + } + } + if (pos != 0) + { + elf_uncompress_failed (); + return 0; + } + + for (i = 0; i < table_size; i++) + { + unsigned char sym; + uint16_t next_state; + int high_bit; + int bits; + + sym = table[i].symbol; + next_state = next[sym]; + ++next[sym]; + + if (next_state == 0) + { + elf_uncompress_failed (); + return 0; + } + high_bit = 31 - __builtin_clz (next_state); + + bits = table_bits - high_bit; + table[i].bits = (unsigned char) bits; + table[i].base = (uint16_t) ((next_state << bits) - table_size); + } + + return 1; +} + +#ifdef BACKTRACE_GENERATE_ZSTD_FSE_TABLES + +/* Used to generate the predefined FSE decoding tables for zstd. */ + +#include <stdio.h> + +/* These values are straight from RFC 8878. */ + +static int16_t lit[36] = +{ + 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, + -1,-1,-1,-1 +}; + +static int16_t match[53] = +{ + 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, + -1,-1,-1,-1,-1 +}; + +static int16_t offset[29] = +{ + 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 +}; + +static uint16_t next[256]; + +static void +print_table (const struct elf_zstd_fse_entry *table, size_t size) +{ + size_t i; + + printf ("{\n"); + for (i = 0; i < size; i += 4) + { + int j; + + printf (" "); + for (j = 0; j < 4 && i + j < size; ++j) + printf (" { %d, %d, %d },", table[i + j].symbol, table[i + j].bits, + table[i + j].base); + printf ("\n"); + } + printf ("};\n"); +} + +int +main () +{ + struct elf_zstd_fse_entry lit_table[64]; + struct elf_zstd_fse_entry match_table[64]; + struct elf_zstd_fse_entry offset_table[32]; + + if (!elf_zstd_build_fse (lit, sizeof lit / sizeof lit[0], next, + 6, lit_table)) + { + fprintf (stderr, "elf_zstd_build_fse failed\n"); + exit (EXIT_FAILURE); + } + + printf ("static const struct elf_zstd_fse_entry " + "elf_zstd_lit_table[64] =\n"); + print_table (lit_table, sizeof lit_table / sizeof lit_table[0]); + printf ("\n"); + + if (!elf_zstd_build_fse (match, sizeof match / sizeof match[0], next, + 6, match_table)) + { + fprintf (stderr, "elf_zstd_build_fse failed\n"); + exit (EXIT_FAILURE); + } + + printf ("static const struct elf_zstd_fse_entry " + "elf_zstd_match_table[64] =\n"); + print_table (match_table, sizeof match_table / sizeof match_table[0]); + printf ("\n"); + + if (!elf_zstd_build_fse (offset, sizeof offset / sizeof offset[0], next, + 5, offset_table)) + { + fprintf (stderr, "elf_zstd_build_fse failed\n"); + exit (EXIT_FAILURE); + } + + printf ("static const struct elf_zstd_fse_entry " + "elf_zstd_offset_table[32] =\n"); + print_table (offset_table, sizeof offset_table / sizeof offset_table[0]); + printf ("\n"); + + return 0; +} + +#endif + +/* The fixed tables generated by the #ifdef'ed out main function + above. */ + +static const struct elf_zstd_fse_entry elf_zstd_lit_table[64] = +{ + { 0, 4, 0 }, { 0, 4, 16 }, { 1, 5, 32 }, { 3, 5, 0 }, + { 4, 5, 0 }, { 6, 5, 0 }, { 7, 5, 0 }, { 9, 5, 0 }, + { 10, 5, 0 }, { 12, 5, 0 }, { 14, 6, 0 }, { 16, 5, 0 }, + { 18, 5, 0 }, { 19, 5, 0 }, { 21, 5, 0 }, { 22, 5, 0 }, + { 24, 5, 0 }, { 25, 5, 32 }, { 26, 5, 0 }, { 27, 6, 0 }, + { 29, 6, 0 }, { 31, 6, 0 }, { 0, 4, 32 }, { 1, 4, 0 }, + { 2, 5, 0 }, { 4, 5, 32 }, { 5, 5, 0 }, { 7, 5, 32 }, + { 8, 5, 0 }, { 10, 5, 32 }, { 11, 5, 0 }, { 13, 6, 0 }, + { 16, 5, 32 }, { 17, 5, 0 }, { 19, 5, 32 }, { 20, 5, 0 }, + { 22, 5, 32 }, { 23, 5, 0 }, { 25, 4, 0 }, { 25, 4, 16 }, + { 26, 5, 32 }, { 28, 6, 0 }, { 30, 6, 0 }, { 0, 4, 48 }, + { 1, 4, 16 }, { 2, 5, 32 }, { 3, 5, 32 }, { 5, 5, 32 }, + { 6, 5, 32 }, { 8, 5, 32 }, { 9, 5, 32 }, { 11, 5, 32 }, + { 12, 5, 32 }, { 15, 6, 0 }, { 17, 5, 32 }, { 18, 5, 32 }, + { 20, 5, 32 }, { 21, 5, 32 }, { 23, 5, 32 }, { 24, 5, 32 }, + { 35, 6, 0 }, { 34, 6, 0 }, { 33, 6, 0 }, { 32, 6, 0 }, +}; + +static const struct elf_zstd_fse_entry elf_zstd_match_table[64] = +{ + { 0, 6, 0 }, { 1, 4, 0 }, { 2, 5, 32 }, { 3, 5, 0 }, + { 5, 5, 0 }, { 6, 5, 0 }, { 8, 5, 0 }, { 10, 6, 0 }, + { 13, 6, 0 }, { 16, 6, 0 }, { 19, 6, 0 }, { 22, 6, 0 }, + { 25, 6, 0 }, { 28, 6, 0 }, { 31, 6, 0 }, { 33, 6, 0 }, + { 35, 6, 0 }, { 37, 6, 0 }, { 39, 6, 0 }, { 41, 6, 0 }, + { 43, 6, 0 }, { 45, 6, 0 }, { 1, 4, 16 }, { 2, 4, 0 }, + { 3, 5, 32 }, { 4, 5, 0 }, { 6, 5, 32 }, { 7, 5, 0 }, + { 9, 6, 0 }, { 12, 6, 0 }, { 15, 6, 0 }, { 18, 6, 0 }, + { 21, 6, 0 }, { 24, 6, 0 }, { 27, 6, 0 }, { 30, 6, 0 }, + { 32, 6, 0 }, { 34, 6, 0 }, { 36, 6, 0 }, { 38, 6, 0 }, + { 40, 6, 0 }, { 42, 6, 0 }, { 44, 6, 0 }, { 1, 4, 32 }, + { 1, 4, 48 }, { 2, 4, 16 }, { 4, 5, 32 }, { 5, 5, 32 }, + { 7, 5, 32 }, { 8, 5, 32 }, { 11, 6, 0 }, { 14, 6, 0 }, + { 17, 6, 0 }, { 20, 6, 0 }, { 23, 6, 0 }, { 26, 6, 0 }, + { 29, 6, 0 }, { 52, 6, 0 }, { 51, 6, 0 }, { 50, 6, 0 }, + { 49, 6, 0 }, { 48, 6, 0 }, { 47, 6, 0 }, { 46, 6, 0 }, +}; + +static const struct elf_zstd_fse_entry elf_zstd_offset_table[32] = +{ + { 0, 5, 0 }, { 6, 4, 0 }, { 9, 5, 0 }, { 15, 5, 0 }, + { 21, 5, 0 }, { 3, 5, 0 }, { 7, 4, 0 }, { 12, 5, 0 }, + { 18, 5, 0 }, { 23, 5, 0 }, { 5, 5, 0 }, { 8, 4, 0 }, + { 14, 5, 0 }, { 20, 5, 0 }, { 2, 5, 0 }, { 7, 4, 16 }, + { 11, 5, 0 }, { 17, 5, 0 }, { 22, 5, 0 }, { 4, 5, 0 }, + { 8, 4, 16 }, { 13, 5, 0 }, { 19, 5, 0 }, { 1, 5, 0 }, + { 6, 4, 16 }, { 10, 5, 0 }, { 16, 5, 0 }, { 28, 5, 0 }, + { 27, 5, 0 }, { 26, 5, 0 }, { 25, 5, 0 }, { 24, 5, 0 }, +}; + +/* Read a zstd Huffman table and build the decoding table in *TABLE, reading + and updating *PPIN. This sets *PTABLE_BITS to the number of bits of the + table, such that the table length is 1 << *TABLE_BITS. ZDEBUG_TABLE is + scratch space; it must be enough for 512 uint16_t values + 256 32-bit values + (2048 bytes). Returns 1 on success, 0 on error. */ + +static int +elf_zstd_read_huff (const unsigned char **ppin, const unsigned char *pinend, + uint16_t *zdebug_table, uint16_t *table, int *ptable_bits) +{ + const unsigned char *pin; + unsigned char hdr; + unsigned char *weights; + size_t count; + uint32_t *weight_mark; + size_t i; + uint32_t weight_mask; + size_t table_bits; + + pin = *ppin; + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + hdr = *pin; + ++pin; + + weights = (unsigned char *) zdebug_table; + + if (hdr < 128) + { + /* Table is compressed using FSE. */ + + struct elf_zstd_fse_entry *fse_table; + int fse_table_bits; + uint16_t *scratch; + const unsigned char *pfse; + const unsigned char *pback; + unsigned char stream_start; + uint64_t val; + unsigned int bits; + unsigned int state1, state2; + + /* SCRATCH is used temporarily by elf_zstd_read_fse. It overlaps + WEIGHTS. */ + scratch = zdebug_table; + fse_table = (struct elf_zstd_fse_entry *) (scratch + 512); + fse_table_bits = 6; + + pfse = pin; + if (!elf_zstd_read_fse (&pfse, pinend, scratch, 255, fse_table, + &fse_table_bits)) + return 0; + + if (unlikely (pin + hdr > pinend)) + { + elf_uncompress_failed (); + return 0; + } + + /* We no longer need SCRATCH. Start recording weights. We need up to + 256 bytes of weights and 64 bytes of rank counts, so it won't overlap + FSE_TABLE. */ + + pback = pin + hdr - 1; + stream_start = *pback; + if (unlikely (stream_start == 0)) + { + elf_uncompress_failed (); + return 0; + } + val = 0; + bits = 0; + while ((((uintptr_t)pback) & 3) != 0) + { + val <<= 8; + val |= (uint64_t)*pback; + bits += 8; + --pback; + } + val <<= 8; + val |= (uint64_t)*pback; + bits += 8; + + if (!elf_fetch_bits_backward (&pback, pfse, &val, &bits)) + return 0; + + bits -= __builtin_clz (stream_start) - 24 + 1; + + if (!elf_fetch_bits_backward (&pback, pfse, &val, &bits)) + return 0; + + bits -= fse_table_bits; + state1 = (val >> bits) & ((1U << fse_table_bits) - 1); + bits -= fse_table_bits; + state2 = (val >> bits) & ((1U << fse_table_bits) - 1); + + /* There are two independent FSE streams, tracked by STATE1 and STATE2. + We decode them alternately. */ + + count = 0; + while (1) + { + struct elf_zstd_fse_entry *pt; + uint64_t v; + + pt = &fse_table[state1]; + + if (unlikely (pin < pinend) && bits < pt->bits) + { + if (unlikely (count >= 254)) + { + elf_uncompress_failed (); + return 0; + } + weights[count] = (unsigned char) pt->symbol; + weights[count + 1] = (unsigned char) fse_table[state2].symbol; + count += 2; + break; + } + + if (unlikely (pt->bits == 0)) + v = 0; + else + { + if (!elf_fetch_bits_backward (&pback, pfse, &val, &bits)) + return 0; + + bits -= pt->bits; + v = (val >> bits) & (((uint64_t)1 << pt->bits) - 1); + } + + state1 = pt->base + v; + + if (unlikely (count >= 255)) + { + elf_uncompress_failed (); + return 0; + } + + weights[count] = pt->symbol; + ++count; + + pt = &fse_table[state2]; + + if (unlikely (pin < pinend && bits < pt->bits)) + { + if (unlikely (count >= 254)) + { + elf_uncompress_failed (); + return 0; + } + weights[count] = (unsigned char) pt->symbol; + weights[count + 1] = (unsigned char) fse_table[state1].symbol; + count += 2; + break; + } + + if (unlikely (pt->bits == 0)) + v = 0; + else + { + if (!elf_fetch_bits_backward (&pback, pfse, &val, &bits)) + return 0; + + bits -= pt->bits; + v = (val >> bits) & (((uint64_t)1 << pt->bits) - 1); + } + + state2 = pt->base + v; + + if (unlikely (count >= 255)) + { + elf_uncompress_failed (); + return 0; + } + + weights[count] = pt->symbol; + ++count; + } + + pin += hdr; + } + else + { + /* Table is not compressed. Each weight is 4 bits. */ + + count = hdr - 127; + if (unlikely (pin + ((count + 1) / 2) >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + for (i = 0; i < count; i += 2) + { + unsigned char b; + + b = *pin; + ++pin; + weights[i] = b >> 4; + weights[i + 1] = b & 0xf; + } + } + + weight_mark = (uint32_t *) (weights + 256); + memset (weight_mark, 0, 12 * sizeof (uint32_t)); + weight_mask = 0; + for (i = 0; i < count; ++i) + { + unsigned char w; + + w = weights[i]; + if (unlikely (w > 12)) + { + elf_uncompress_failed (); + return 0; + } + ++weight_mark[w]; + if (w > 0) + weight_mask += 1U << (w - 1); + } + if (unlikely (weight_mask == 0)) + { + elf_uncompress_failed (); + return 0; + } + + table_bits = 32 - __builtin_clz (weight_mask); + if (unlikely (table_bits > 11)) + { + elf_uncompress_failed (); + return 0; + } + + /* Work out the last weight value, which is omitted because the weights must + sum to a power of two. */ + { + uint32_t left; + uint32_t high_bit; + + left = ((uint32_t)1 << table_bits) - weight_mask; + if (left == 0) + { + elf_uncompress_failed (); + return 0; + } + high_bit = 31 - __builtin_clz (left); + if (((uint32_t)1 << high_bit) != left) + { + elf_uncompress_failed (); + return 0; + } + + if (unlikely (count >= 256)) + { + elf_uncompress_failed (); + return 0; + } + + weights[count] = high_bit + 1; + ++count; + ++weight_mark[high_bit + 1]; + } + + if (weight_mark[1] < 2 || (weight_mark[1] & 1) != 0) + { + elf_uncompress_failed (); + return 0; + } + + /* Change WEIGHT_MARK from a count of weights to the index of the first + symbol for that weight. We shift the indexes to also store how many we + hae seen so far, below. */ + { + uint32_t next; + + next = 0; + for (i = 0; i < table_bits; ++i) + { + uint32_t cur; + + cur = next; + next += weight_mark[i + 1] << i; + weight_mark[i + 1] = cur; + } + } + + for (i = 0; i < count; ++i) + { + unsigned char weight; + uint32_t length; + uint16_t tval; + size_t start; + uint32_t j; + + weight = weights[i]; + if (weight == 0) + continue; + + length = 1U << (weight - 1); + tval = (i << 8) | (table_bits + 1 - weight); + start = weight_mark[weight]; + for (j = 0; j < length; ++j) + table[start + j] = tval; + weight_mark[weight] += length; + } + + *ppin = pin; + *ptable_bits = (int)table_bits; + + return 1; +} + +/* The information used to decompress a sequence code, which can be a literal + length, an offset, or a match length. */ + +struct elf_zstd_seq_decode +{ + const struct elf_zstd_fse_entry *table; + int table_bits; + int use_rle; + unsigned char rle; +}; + +/* Unpack a sequence code compression mode. */ + +static int +elf_zstd_unpack_seq_decode (int mode, + const unsigned char **ppin, + const unsigned char *pinend, + const struct elf_zstd_fse_entry *predefined, + int predefined_bits, uint16_t *scratch, + int maxidx, struct elf_zstd_fse_entry *fse_table, + int fse_table_bits, + struct elf_zstd_seq_decode *decode) +{ + switch (mode) + { + case 0: + decode->table = predefined; + decode->table_bits = predefined_bits; + decode->use_rle = 0; + break; + + case 1: + if (unlikely (*ppin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + decode->use_rle = 1; + decode->rle = **ppin; + decode->table_bits = 0; + ++*ppin; + break; + + case 2: + decode->table_bits = fse_table_bits; + if (!elf_zstd_read_fse (ppin, pinend, scratch, maxidx, fse_table, + &decode->table_bits)) + return 0; + decode->table = fse_table; + decode->use_rle = 0; + break; + + case 3: + if (unlikely (decode->table_bits == 0 && !decode->use_rle)) + { + elf_uncompress_failed (); + return 0; + } + break; + + default: + elf_uncompress_failed (); + return 0; + } + + return 1; +} + +/* The different ways that the literals are encoded. */ + +#define ZSTD_LIT_RAW (0) +#define ZSTD_LIT_RLE (1) +#define ZSTD_LIT_HUFF (2) + +/* A struct used to decompress the literals. The order of these fields is + chosen for packing, not for comprehensibility. */ + +struct elf_zstd_literals +{ + /* Current bits in Huffman encoded stream. */ + uint64_t val; + + /* For RAW, the current position in the byte stream. + For RLE, a pointer to the byte being repeated. + For HUFF, start of encoded streams. + */ + const unsigned char *plit; + + /* Current position of current Huffman encoded stream. */ + const unsigned char *pback; + + /* End (reading backward) of current Huffman encoded stream. */ + const unsigned char *pbackend; + + /* The Huffman table. */ + const uint16_t *huffman_table; + + /* Remaining number of uncompressed bytes. */ + uint32_t regenerated_size; + + /* Current number of available bits in Huffman encoded stream. */ + unsigned int bits; + + /* Number of bits in the Huffman table. */ + int huffman_table_bits; + + /* Offsets from PLIT to next Huffman encoded streams, 0 if none. */ + uint32_t stream_off[3]; + + /* Sizes of next Huffman encoded streams, 0 if none. */ + uint32_t stream_size[3]; + + /* A ZSTD_LIT_* code. */ + unsigned char type; +}; + +/* Output COUNT bytes from the literal byte stream in LITERALS to POUT. */ + +static int +elf_zstd_literal_output (struct elf_zstd_literals *literals, + size_t count, + unsigned char *pout) +{ + size_t i; + const unsigned char *pback; + const unsigned char *pbackend; + uint64_t val; + unsigned int bits; + const uint16_t *huffman_table; + unsigned int huffman_table_bits; + uint64_t huffman_mask; + + if (literals->regenerated_size < count) + { + elf_uncompress_failed (); + return 0; + } + literals->regenerated_size -= count; + + switch (literals->type) + { + case ZSTD_LIT_RAW: + memcpy (pout, literals->plit, count); + literals->plit += count; + return 1; + + case ZSTD_LIT_RLE: + memset (pout, *literals->plit, count); + return 1; + + case ZSTD_LIT_HUFF: + break; + + default: + elf_uncompress_failed (); + return 0; + } + + /* The literal string is Huffman encoded. */ + + pback = literals->pback; + pbackend = literals->pbackend; + val = literals->val; + bits = literals->bits; + + huffman_table = literals->huffman_table; + huffman_table_bits = literals->huffman_table_bits; + huffman_mask = ((uint64_t)1 << huffman_table_bits) - 1; + + /* This is one of the inner loops of the decompression algorithm, so we put + some effort into optimization. We can't get more than 64 bytes from a + single call to elf_fetch_bits_backward, and we can't subtract more than 11 + bits at a time. */ + + if (count >= 64) + { + unsigned char *poutstart; + unsigned char *poutstop; + + poutstart = pout; + poutstop = pout + count - 64; + while (pout <= poutstop) + { + uint16_t t; + + if (!elf_fetch_bits_backward (&pback, pbackend, &val, &bits)) + return 0; + + if (bits < 16) + break; + + while (bits >= 33) + { + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *pout = t >> 8; + ++pout; + bits -= t & 0xff; + + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *pout = t >> 8; + ++pout; + bits -= t & 0xff; + + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *pout = t >> 8; + ++pout; + bits -= t & 0xff; + } + + while (bits > 11) + { + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *pout = t >> 8; + ++pout; + bits -= t & 0xff; + } + } + + count -= pout - poutstart; + + if (!elf_fetch_bits_backward (&pback, pbackend, &val, &bits)) + return 0; + } + + for (i = 0; i < count; ++i) + { + uint16_t t; + + if (unlikely (bits == 0)) + { + unsigned char stream_start; + + /* Advance to next stream. */ + if (unlikely (literals->stream_off[0] == 0)) + { + elf_uncompress_failed (); + return 0; + } + + pback = literals->plit + literals->stream_off[0]; + pbackend = pback; + pback += literals->stream_size[0]; + + /* Align to a 32-bit boundary. */ + val = 0; + bits = 0; + --pback; + stream_start = *pback; + if (unlikely (stream_start == 0)) + { + elf_uncompress_failed (); + return 0; + } + while ((((uintptr_t) pback) & 3) != 0) + { + val <<= 8; + val |= (uint64_t)*pback; + bits += 8; + --pback; + } + val <<= 8; + val |= (uint64_t)*pback; + bits += 8; + + if (!elf_fetch_bits_backward (&pback, pbackend, &val, &bits)) + return 0; + + bits -= __builtin_clz (stream_start) - 24 + 1; + + literals->stream_off[0] = literals->stream_off[1]; + literals->stream_off[1] = literals->stream_off[2]; + literals->stream_off[2] = 0; + literals->stream_size[0] = literals->stream_size[1]; + literals->stream_size[1] = literals->stream_size[2]; + literals->stream_size[2] = 0; + } + + if (!elf_fetch_bits_backward (&pback, pbackend, &val, &bits)) + return 0; + + if (unlikely (bits < huffman_table_bits)) + { + t = huffman_table[(val << (huffman_table_bits - bits)) + & huffman_mask]; + if (unlikely (bits < (t & 0xff))) + { + elf_uncompress_failed (); + return 0; + } + } + else + t = huffman_table[(val >> (bits - huffman_table_bits)) & huffman_mask]; + + *pout = t >> 8; + ++pout; + + bits -= t & 0xff; + } + + literals->pback = pback; + literals->pbackend = pbackend; + literals->val = val; + literals->bits = bits; + + return 1; +} + +/* Given a literal length code, we need to read a number of bits and add that + to a baseline. For states 0 to 15 the baseline is the state and the number + of bits is zero. */ + +#define ZSTD_LITERAL_LENGTH_BASELINE_OFFSET (16) + +static const uint32_t elf_zstd_literal_length_baseline[] = +{ + 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 128, 256, 512, + 1024, 2048, 4096, 8192, 16384, 32768, 65536 +}; + +static const unsigned char elf_zstd_literal_length_bits[] = +{ + 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +}; + +/* The same applies to match length codes. For states 0 to 31 the baseline is + the state + 3 and the number of bits is zero. */ + +#define ZSTD_MATCH_LENGTH_BASELINE_OFFSET (32) + +static const uint32_t elf_zstd_match_length_baseline[] = +{ + 35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 131, 259, 515, + 1027, 2051, 4099, 8195, 16387, 32771, 65539 +}; + +static const unsigned char elf_zstd_match_length_bits[] = +{ + 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +}; + +/* Decompress a zstd stream from PIN/SIN to POUT/SOUT. Code based on RFC 8878. + Return 1 on success, 0 on error. */ + +static int +elf_zstd_decompress (const unsigned char *pin, size_t sin, + unsigned char *zdebug_table, unsigned char *pout, + size_t sout) +{ + const unsigned char *pinend; + unsigned char *poutstart; + unsigned char *poutend; + struct elf_zstd_seq_decode literal_decode; + struct elf_zstd_fse_entry *literal_fse_table; + struct elf_zstd_seq_decode match_decode; + struct elf_zstd_fse_entry *match_fse_table; + struct elf_zstd_seq_decode offset_decode; + struct elf_zstd_fse_entry *offset_fse_table; + uint16_t *huffman_table; + int huffman_table_bits; + uint32_t repeated_offset1; + uint32_t repeated_offset2; + uint32_t repeated_offset3; + uint16_t *scratch; + unsigned char hdr; + int has_checksum; + uint64_t content_size; + int last_block; + + pinend = pin + sin; + poutstart = pout; + poutend = pout + sout; + + literal_decode.table = NULL; + literal_decode.table_bits = 0; + literal_decode.use_rle = 0; + literal_fse_table = ((struct elf_zstd_fse_entry *) + (zdebug_table + ZSTD_TABLE_LITERAL_FSE_OFFSET)); + + match_decode.table = NULL; + match_decode.table_bits = 0; + match_decode.use_rle = 0; + match_fse_table = ((struct elf_zstd_fse_entry *) + (zdebug_table + ZSTD_TABLE_MATCH_FSE_OFFSET)); + + offset_decode.table = NULL; + offset_decode.table_bits = 0; + offset_decode.use_rle = 0; + offset_fse_table = ((struct elf_zstd_fse_entry *) + (zdebug_table + ZSTD_TABLE_OFFSET_FSE_OFFSET)); + huffman_table = ((uint16_t *) + (zdebug_table + ZSTD_TABLE_HUFFMAN_OFFSET)); + huffman_table_bits = 0; + scratch = ((uint16_t *) + (zdebug_table + ZSTD_TABLE_WORK_OFFSET)); + + repeated_offset1 = 1; + repeated_offset2 = 4; + repeated_offset3 = 8; + + if (unlikely (sin < 4)) + { + elf_uncompress_failed (); + return 0; + } + + /* These values are the zstd magic number. */ + if (unlikely (pin[0] != 0x28 + || pin[1] != 0xb5 + || pin[2] != 0x2f + || pin[3] != 0xfd)) + { + elf_uncompress_failed (); + return 0; + } + + pin += 4; + + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + + hdr = *pin++; + + /* We expect a single frame. */ + if (unlikely ((hdr & (1 << 5)) == 0)) + { + elf_uncompress_failed (); + return 0; + } + /* Reserved bit must be zero. */ + if (unlikely ((hdr & (1 << 3)) != 0)) + { + elf_uncompress_failed (); + return 0; + } + /* We do not expect a dictionary. */ + if (unlikely ((hdr & 3) != 0)) + { + elf_uncompress_failed (); + return 0; + } + has_checksum = (hdr & (1 << 2)) != 0; + switch (hdr >> 6) + { + case 0: + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + content_size = (uint64_t) *pin++; + break; + case 1: + if (unlikely (pin + 1 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + content_size = (((uint64_t) pin[0]) | (((uint64_t) pin[1]) << 8)) + 256; + pin += 2; + break; + case 2: + if (unlikely (pin + 3 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + content_size = ((uint64_t) pin[0] + | (((uint64_t) pin[1]) << 8) + | (((uint64_t) pin[2]) << 16) + | (((uint64_t) pin[3]) << 24)); + pin += 4; + break; + case 3: + if (unlikely (pin + 7 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + content_size = ((uint64_t) pin[0] + | (((uint64_t) pin[1]) << 8) + | (((uint64_t) pin[2]) << 16) + | (((uint64_t) pin[3]) << 24) + | (((uint64_t) pin[4]) << 32) + | (((uint64_t) pin[5]) << 40) + | (((uint64_t) pin[6]) << 48) + | (((uint64_t) pin[7]) << 56)); + pin += 8; + break; + default: + elf_uncompress_failed (); + return 0; + } + + if (unlikely (content_size != (size_t) content_size + || (size_t) content_size != sout)) + { + elf_uncompress_failed (); + return 0; + } + + last_block = 0; + while (!last_block) + { + uint32_t block_hdr; + int block_type; + uint32_t block_size; + + if (unlikely (pin + 2 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + block_hdr = ((uint32_t) pin[0] + | (((uint32_t) pin[1]) << 8) + | (((uint32_t) pin[2]) << 16)); + pin += 3; + + last_block = block_hdr & 1; + block_type = (block_hdr >> 1) & 3; + block_size = block_hdr >> 3; + + switch (block_type) + { + case 0: + /* Raw_Block */ + if (unlikely ((size_t) block_size > (size_t) (pinend - pin))) + { + elf_uncompress_failed (); + return 0; + } + if (unlikely ((size_t) block_size > (size_t) (poutend - pout))) + { + elf_uncompress_failed (); + return 0; + } + memcpy (pout, pin, block_size); + pout += block_size; + pin += block_size; + break; + + case 1: + /* RLE_Block */ + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + if (unlikely ((size_t) block_size > (size_t) (poutend - pout))) + { + elf_uncompress_failed (); + return 0; + } + memset (pout, *pin, block_size); + pout += block_size; + pin++; + break; + + case 2: + { + const unsigned char *pblockend; + struct elf_zstd_literals literals; + unsigned char lit_hdr; + uint32_t lit_section_content; + uint32_t lit_compressed_size; + uint32_t lit_total_streams_size; + const unsigned char *plitend; + unsigned char *plitexp; + size_t litexp_count; + int lit_streams; + uint32_t stream_size_1; + unsigned char seq_hdr; + size_t seq_count; + size_t seq; + const unsigned char *pback; + uint64_t val; + unsigned int bits; + unsigned int literal_state; + unsigned int offset_state; + unsigned int match_state; + unsigned char stream_start; + + /* Compressed_Block */ + if (unlikely ((size_t) block_size > (size_t) (pinend - pin))) + { + elf_uncompress_failed (); + return 0; + } + + pblockend = pin + block_size; + + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + lit_hdr = *pin; + ++pin; + + if ((lit_hdr & 3) == 0 || (lit_hdr & 3) == 1) + { + if ((lit_hdr & 3) == 0) + literals.type = ZSTD_LIT_RAW; + else + literals.type = ZSTD_LIT_RLE; + + /* Raw_literals_Block or RLE_Literals_Block */ + switch ((lit_hdr >> 2) & 3) + { + case 0: case 2: + literals.regenerated_size = lit_hdr >> 3; + break; + case 1: + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + literals.regenerated_size = (lit_hdr >> 4) + ((*pin) << 4); + pin++; + break; + case 3: + if (unlikely (pin + 1 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + literals.regenerated_size = ((lit_hdr >> 4) + + (*pin << 4) + + (pin[1] << 12)); + pin += 2; + break; + default: + elf_uncompress_failed (); + return 0; + } + if (literals.type == ZSTD_LIT_RAW) + lit_section_content = literals.regenerated_size; + else + lit_section_content = 1; + lit_compressed_size = 0; + lit_streams = 1; + } + else + { + /* Compressed_Literals_Block or Treeless_Literals_Block */ + literals.type = ZSTD_LIT_HUFF; + switch ((lit_hdr >> 2) & 3) + { + case 0: case 1: + if (unlikely (pin + 1 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + literals.regenerated_size = ((lit_hdr >> 4) + | ((*pin & 0x3f) << 4)); + lit_compressed_size = ((*pin >> 6) + | (pin[1] << 2)); + pin += 2; + lit_streams = ((lit_hdr >> 2) & 3) == 0 ? 1 : 4; + break; + case 2: + if (unlikely (pin + 2 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + literals.regenerated_size = ((lit_hdr >> 4) + | (*pin << 4) + | ((pin[1] & 3) << 12)); + lit_compressed_size = ((pin[1] >> 2) + | (pin[2] << 6)); + pin += 3; + lit_streams = 4; + break; + case 3: + if (unlikely (pin + 3 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + literals.regenerated_size = ((lit_hdr >> 4) + | (*pin << 4) + | ((pin[1] & 0x3f) << 12)); + lit_compressed_size = ((pin[1] >> 6) + | (pin[2] << 2) + | (pin[3] << 10)); + pin += 4; + lit_streams = 4; + break; + default: + elf_uncompress_failed (); + return 0; + } + + lit_section_content = lit_compressed_size; + } + + if (unlikely ((size_t)lit_section_content > (size_t)(pinend - pin))) + { + elf_uncompress_failed (); + return 0; + } + plitend = pin + lit_section_content; + + lit_total_streams_size = lit_compressed_size; + if ((lit_hdr & 3) == 2) + { + /* Compressed_Literals_Block. Read Huffman tree. */ + + const unsigned char *ptable; + + ptable = pin; + if (!elf_zstd_read_huff (&ptable, pinend, scratch, + huffman_table, &huffman_table_bits)) + return 0; + literals.huffman_table = huffman_table; + literals.huffman_table_bits = huffman_table_bits; + + lit_total_streams_size -= ptable - pin; + pin = ptable; + } + else if ((lit_hdr & 3) == 3) + { + /* Treeless_Literals_Block. Reuse previous Huffman tree. */ + if (huffman_table_bits == 0) + { + elf_uncompress_failed (); + return 0; + } + literals.huffman_table = huffman_table; + literals.huffman_table_bits = huffman_table_bits; + } + else + { + literals.huffman_table = NULL; + literals.huffman_table_bits = 0; + } + + if (lit_streams == 1) + { + stream_size_1 = block_size; + literals.stream_off[0] = 0; + literals.stream_off[1] = 0; + literals.stream_off[2] = 0; + literals.stream_size[0] = 0; + literals.stream_size[1] = 0; + literals.stream_size[2] = 0; + } + else + { + uint32_t tot; + + /* Read jump table. */ + if (unlikely (pin + 5 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + stream_size_1 = *pin | (pin[1] << 8); + pin += 2; + literals.stream_size[0] = *pin | (pin[1] << 8); + pin += 2; + literals.stream_size[1] = *pin | (pin[1] << 8); + pin += 2; + tot = (stream_size_1 + + literals.stream_size[0] + + literals.stream_size[1]); + if (unlikely (tot > lit_total_streams_size - 6)) + { + elf_uncompress_failed (); + return 0; + } + literals.stream_size[2] = lit_total_streams_size - 6 - tot; + + literals.stream_off[0] = stream_size_1; + literals.stream_off[1] = (literals.stream_off[0] + + literals.stream_size[0]); + literals.stream_off[2] = (literals.stream_off[1] + + literals.stream_size[1]); + } + + literals.plit = pin; + + if (literals.type == ZSTD_LIT_HUFF) + { + const unsigned char *plback; + + /* Set up the first huffman stream. */ + + literals.pbackend = literals.plit; + plback = literals.plit + stream_size_1; + literals.val = 0; + literals.bits = 0; + --plback; + stream_start = *plback; + if (unlikely (stream_start == 0)) + { + elf_uncompress_failed (); + return 0; + } + while ((((uintptr_t) plback) & 3) != 0) + { + literals.val <<= 8; + literals.val |= (uint64_t)*plback; + literals.bits += 8; + --plback; + } + literals.val <<= 8; + literals.val |= (uint64_t)*plback; + literals.bits += 8; + + if (!elf_fetch_bits_backward (&plback, literals.pbackend, + &literals.val, &literals.bits)) + return 0; + + literals.bits -= __builtin_clz (stream_start) - 24 + 1; + + literals.pback = plback; + } + else + { + literals.val = 0; + literals.bits = 0; + literals.pback = NULL; + literals.pbackend = NULL; + } + + /* We have read all the literal header information. The literal + data starts at LITERALS.PLIT. Skip ahead to the sequences. */ + + pin = plitend; + + seq_hdr = *pin; + pin++; + if (seq_hdr < 128) + seq_count = seq_hdr; + else if (seq_hdr < 255) + { + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + seq_count = ((seq_hdr - 128) << 8) + *pin; + pin++; + } + else + { + if (unlikely (pin + 1 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + seq_count = *pin + (pin[1] << 8) + 0x7f00; + pin += 2; + } + + if (seq_count > 0) + { + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + seq_hdr = *pin; + ++pin; + + if (!elf_zstd_unpack_seq_decode ((seq_hdr >> 6) & 3, + &pin, pinend, + &elf_zstd_lit_table[0], 6, + scratch, 35, + literal_fse_table, 9, + &literal_decode)) + return 0; + + if (!elf_zstd_unpack_seq_decode ((seq_hdr >> 4) & 3, + &pin, pinend, + &elf_zstd_offset_table[0], 5, + scratch, 31, + offset_fse_table, 8, + &offset_decode)) + return 0; + + if (!elf_zstd_unpack_seq_decode ((seq_hdr >> 2) & 3, + &pin, pinend, + &elf_zstd_match_table[0], 6, + scratch, 52, + match_fse_table, 9, + &match_decode)) + return 0; + } + + /* Expand 2048 bytes of literals. The expanded literals are + recorded in PLITEXP and LITEXP_COUNT. */ + + if (literals.type != ZSTD_LIT_HUFF + || literals.regenerated_size == 0) + { + plitexp = NULL; + litexp_count = 0; + } + else + { + plitexp = (unsigned char *)scratch; + litexp_count = ZSTD_TABLE_WORK_LIT_SIZE; + if (litexp_count > literals.regenerated_size) + litexp_count = literals.regenerated_size; + if (!elf_zstd_literal_output (&literals, litexp_count, + plitexp)) + return 0; + } + + pback = pblockend - 1; + val = 0; + bits = 0; + stream_start = *pback; + if (unlikely (stream_start == 0)) + { + elf_uncompress_failed (); + return 0; + } + while ((((uintptr_t)pback) & 3) != 0) + { + val <<= 8; + val |= (uint64_t)*pback; + bits += 8; + --pback; + } + val <<= 8; + val |= (uint64_t)*pback; + bits += 8; + + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + + bits -= __builtin_clz (stream_start) - 24 + 1; + + if (unlikely (literal_decode.use_rle)) + literal_state = 0; + else + { + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= literal_decode.table_bits; + literal_state = ((val >> bits) + & ((1U << literal_decode.table_bits) - 1)); + } + + if (unlikely (offset_decode.use_rle)) + offset_state = 0; + else + { + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= offset_decode.table_bits; + offset_state = ((val >> bits) + & ((1U << offset_decode.table_bits) - 1)); + } + + if (unlikely (match_decode.use_rle)) + match_state = 0; + else + { + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= match_decode.table_bits; + match_state = ((val >> bits) + & ((1U << match_decode.table_bits) - 1)); + } + + seq = 0; + while (1) + { + uint32_t offset_base; + uint32_t need; + uint32_t add; + uint32_t offset; + uint32_t use_offset; + uint32_t match_base; + uint32_t match; + uint32_t literal_base; + uint32_t literal; + const struct elf_zstd_fse_entry *pt; + uint64_t v; + + if (unlikely (offset_decode.use_rle)) + offset_base = offset_decode.rle; + else + offset_base = offset_decode.table[offset_state].symbol; + + if (unlikely (match_decode.use_rle)) + match_base = match_decode.rle; + else + match_base = match_decode.table[match_state].symbol; + + if (unlikely (literal_decode.use_rle)) + literal_base = literal_decode.rle; + else + literal_base = literal_decode.table[literal_state].symbol; + + need = offset_base; + if (unlikely (need > 31)) + { + elf_uncompress_failed (); + return 0; + } + + /* elf_fetch_bits_backward only promises us 16 bits. */ + add = 0; + if (unlikely (need > 16)) + { + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) return 0; - } - else - { - if (dist < 4) - dist = dist + 1; - else - { - unsigned int extra; + bits -= 16; + add = (val >> bits) & ((1U << 16) - 1); + need -= 16; + add <<= need; + } + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= need; + add += (val >> bits) & ((1U << need) - 1); - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) - return 0; + offset = (1U << offset_base) + add; - /* This is an expression for the table of - distance codes in RFC 1951 3.2.5. */ - dist -= 4; - extra = (dist >> 1) + 1; - dist = (dist & 1) << extra; - dist += 5; - dist += ((1U << (extra - 1)) - 1) << 2; - dist += val & ((1U << extra) - 1); - val >>= extra; - bits -= extra; - } + if (match_base < ZSTD_MATCH_LENGTH_BASELINE_OFFSET) + match = match_base + 3; + else + { + unsigned int idx; + unsigned int baseline; - /* Go back dist bytes, and copy len bytes from - there. */ + if (unlikely (match_base > 52)) + { + elf_uncompress_failed (); + return 0; + } - if (unlikely ((unsigned int) (pout - porigout) < dist)) - { - elf_uncompress_failed (); - return 0; - } + idx = match_base - ZSTD_MATCH_LENGTH_BASELINE_OFFSET; + baseline = elf_zstd_match_length_baseline[idx]; + need = elf_zstd_match_length_bits[idx]; - if (unlikely ((unsigned int) (poutend - pout) < len)) - { - elf_uncompress_failed (); - return 0; - } + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= need; + add = (val >> bits) & ((1U << need) - 1); + + match = baseline + add; + } + + if (literal_base < ZSTD_LITERAL_LENGTH_BASELINE_OFFSET) + literal = literal_base; + else + { + unsigned int idx; + unsigned int baseline; + + if (unlikely (literal_base > 35)) + { + elf_uncompress_failed (); + return 0; + } - if (dist >= len) - { - memcpy (pout, pout - dist, len); - pout += len; - } - else - { - while (len > 0) - { - unsigned int copy; + idx = literal_base - ZSTD_LITERAL_LENGTH_BASELINE_OFFSET; + baseline = elf_zstd_literal_length_baseline[idx]; + need = elf_zstd_literal_length_bits[idx]; - copy = len < dist ? len : dist; - memcpy (pout, pout - dist, copy); - len -= copy; - pout += copy; - } - } - } - } - } - } - } + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= need; + add = (val >> bits) & ((1U << need) - 1); - /* We should have filled the output buffer. */ - if (unlikely (pout != poutend)) - { - elf_uncompress_failed (); - return 0; - } + literal = baseline + add; + } - return 1; -} + switch (offset) + { + case 0: + elf_uncompress_failed (); + return 0; + case 1: + if (literal == 0) + { + use_offset = repeated_offset2; + repeated_offset2 = repeated_offset1; + } + else + use_offset = repeated_offset1; + break; + case 2: + if (literal == 0) + { + use_offset = repeated_offset3; + repeated_offset3 = repeated_offset2; + } + else + use_offset = repeated_offset2; + repeated_offset2 = repeated_offset1; + break; + case 3: + if (literal == 0) + use_offset = repeated_offset1 - 1; + else + use_offset = repeated_offset3; + repeated_offset3 = repeated_offset2; + repeated_offset2 = repeated_offset1; + break; + default: + use_offset = offset - 3; + repeated_offset3 = repeated_offset2; + repeated_offset2 = repeated_offset1; + break; + } + + repeated_offset1 = use_offset; + + ++seq; + if (seq < seq_count) + { + /* Update the three states. */ + + if (unlikely (literal_decode.use_rle)) + ; + else + { + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + pt = &literal_decode.table[literal_state]; + bits -= pt->bits; + v = (val >> bits) & (((uint64_t)1 << pt->bits) - 1); + literal_state = pt->base + v; + } + + if (unlikely (match_decode.use_rle)) + ; + else + { + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + pt = &match_decode.table[match_state]; + bits -= pt->bits; + v = (val >> bits) & (((uint64_t)1 << pt->bits) - 1); + match_state = pt->base + v; + } + + if (unlikely (offset_decode.use_rle)) + ; + else + { + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + pt = &offset_decode.table[offset_state]; + bits -= pt->bits; + v = (val >> bits) & (((uint64_t)1 << pt->bits) - 1); + offset_state = pt->base + v; + } + } + + /* The next sequence is now in LITERAL, USE_OFFSET, MATCH. */ + + if (literal > 0) + { + /* Copy LITERAL bytes from the literals. */ + + if (unlikely ((size_t)(poutend - pout) < literal)) + { + elf_uncompress_failed (); + return 0; + } + + if (literal <= litexp_count) + { + memcpy (pout, plitexp, literal); + plitexp += literal; + litexp_count -= literal; + pout += literal; + } + else + { + if (litexp_count > 0) + { + memcpy (pout, plitexp, litexp_count); + pout += litexp_count; + literal -= litexp_count; + plitexp = NULL; + litexp_count = 0; + } + + if (literals.type != ZSTD_LIT_HUFF + || literal >= ZSTD_TABLE_WORK_LIT_SIZE) + { + if (!elf_zstd_literal_output (&literals, literal, + pout)) + return 0; + pout += literal; + literal = 0; + } + + if (literals.type != ZSTD_LIT_HUFF + || literals.regenerated_size == 0) + { + plitexp = NULL; + litexp_count = 0; + if (unlikely (literal > 0)) + { + elf_uncompress_failed (); + return 0; + } + } + else + { + plitexp = (unsigned char *)scratch; + litexp_count = ZSTD_TABLE_WORK_LIT_SIZE; + if (litexp_count > literals.regenerated_size) + litexp_count = literals.regenerated_size; + if (!elf_zstd_literal_output (&literals, + litexp_count, + plitexp)) + return 0; + + if (unlikely (literal > litexp_count)) + { + elf_uncompress_failed (); + return 0; + } + + memcpy (pout, plitexp, literal); + plitexp += literal; + litexp_count -= literal; + pout += literal; + } + } + } + + /* Copy MATCH bytes from the decoded output at USE_OFFSET. */ + + if (unlikely ((size_t)(poutend - pout) < match)) + { + elf_uncompress_failed (); + return 0; + } -/* Verify the zlib checksum. The checksum is in the 4 bytes at - CHECKBYTES, and the uncompressed data is at UNCOMPRESSED / - UNCOMPRESSED_SIZE. Returns 1 on success, 0 on failure. */ + if (match > 0) + { + if (unlikely ((size_t)(pout - poutstart) < use_offset)) + { + elf_uncompress_failed (); + return 0; + } + + if (use_offset >= match) + { + memcpy (pout, pout - use_offset, match); + pout += match; + } + else + { + while (match > 0) + { + uint32_t copy; + + copy = match < use_offset ? match : use_offset; + memcpy (pout, pout - use_offset, copy); + match -= copy; + pout += copy; + } + } + } + + if (unlikely (seq >= seq_count)) + { + size_t copy; + + /* Copy remaining literals. */ + if (litexp_count > 0) + { + if (unlikely ((size_t)(poutend - pout) < litexp_count)) + { + elf_uncompress_failed (); + return 0; + } + memcpy (pout, plitexp, litexp_count); + pout += litexp_count; + } + copy = literals.regenerated_size; + if (copy > 0) + { + if (unlikely ((size_t)(poutend - pout) < copy)) + { + elf_uncompress_failed (); + return 0; + } -static int -elf_zlib_verify_checksum (const unsigned char *checkbytes, - const unsigned char *uncompressed, - size_t uncompressed_size) -{ - unsigned int i; - unsigned int cksum; - const unsigned char *p; - uint32_t s1; - uint32_t s2; - size_t hsz; + if (!elf_zstd_literal_output (&literals, copy, pout)) + return 0; - cksum = 0; - for (i = 0; i < 4; i++) - cksum = (cksum << 8) | checkbytes[i]; + pout += copy; + } - s1 = 1; - s2 = 0; + break; + } + } - /* Minimize modulo operations. */ + pin = pblockend; + } + break; - p = uncompressed; - hsz = uncompressed_size; - while (hsz >= 5552) - { - for (i = 0; i < 5552; i += 16) - { - /* Manually unroll loop 16 times. */ - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; + case 3: + default: + elf_uncompress_failed (); + return 0; } - hsz -= 5552; - s1 %= 65521; - s2 %= 65521; } - while (hsz >= 16) + if (has_checksum) { - /* Manually unroll loop 16 times. */ - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; - s1 = s1 + *p++; - s2 = s2 + s1; + if (unlikely (pin + 4 > pinend)) + { + elf_uncompress_failed (); + return 0; + } - hsz -= 16; - } + /* We don't currently verify the checksum. Currently running GNU ld with + --compress-debug-sections=zstd does not seem to generate a + checksum. */ - for (i = 0; i < hsz; ++i) - { - s1 = s1 + *p++; - s2 = s2 + s1; + pin += 4; } - s1 %= 65521; - s2 %= 65521; - - if (unlikely ((s2 << 16) + s1 != cksum)) + if (pin != pinend) { elf_uncompress_failed (); return 0; @@ -2527,20 +4738,8 @@ elf_zlib_verify_checksum (const unsigned char *checkbytes, return 1; } -/* Inflate a zlib stream from PIN/SIN to POUT/SOUT, and verify the - checksum. Return 1 on success, 0 on error. */ - -static int -elf_zlib_inflate_and_verify (const unsigned char *pin, size_t sin, - uint16_t *zdebug_table, unsigned char *pout, - size_t sout) -{ - if (!elf_zlib_inflate (pin, sin, zdebug_table, pout, sout)) - return 0; - if (!elf_zlib_verify_checksum (pin + sin - 4, pout, sout)) - return 0; - return 1; -} +#define ZDEBUG_TABLE_SIZE \ + (ZLIB_TABLE_SIZE > ZSTD_TABLE_SIZE ? ZLIB_TABLE_SIZE : ZSTD_TABLE_SIZE) /* Uncompress the old compressed debug format, the one emitted by --compress-debug-sections=zlib-gnu. The compressed data is in @@ -2611,6 +4810,8 @@ elf_uncompress_chdr (struct backtrace_state *state, unsigned char **uncompressed, size_t *uncompressed_size) { const b_elf_chdr *chdr; + char *alc; + size_t alc_len; unsigned char *po; *uncompressed = NULL; @@ -2622,31 +4823,50 @@ elf_uncompress_chdr (struct backtrace_state *state, chdr = (const b_elf_chdr *) compressed; - if (chdr->ch_type != ELFCOMPRESS_ZLIB) - { - /* Unsupported compression algorithm. */ - return 1; - } - + alc = NULL; + alc_len = 0; if (*uncompressed != NULL && *uncompressed_size >= chdr->ch_size) po = *uncompressed; else { - po = (unsigned char *) backtrace_alloc (state, chdr->ch_size, - error_callback, data); - if (po == NULL) + alc_len = chdr->ch_size; + alc = backtrace_alloc (state, alc_len, error_callback, data); + if (alc == NULL) return 0; + po = (unsigned char *) alc; } - if (!elf_zlib_inflate_and_verify (compressed + sizeof (b_elf_chdr), - compressed_size - sizeof (b_elf_chdr), - zdebug_table, po, chdr->ch_size)) - return 1; + switch (chdr->ch_type) + { + case ELFCOMPRESS_ZLIB: + if (!elf_zlib_inflate_and_verify (compressed + sizeof (b_elf_chdr), + compressed_size - sizeof (b_elf_chdr), + zdebug_table, po, chdr->ch_size)) + goto skip; + break; + + case ELFCOMPRESS_ZSTD: + if (!elf_zstd_decompress (compressed + sizeof (b_elf_chdr), + compressed_size - sizeof (b_elf_chdr), + (unsigned char *)zdebug_table, po, + chdr->ch_size)) + goto skip; + break; + + default: + /* Unsupported compression algorithm. */ + goto skip; + } *uncompressed = po; *uncompressed_size = chdr->ch_size; return 1; + + skip: + if (alc != NULL && alc_len > 0) + backtrace_free (state, alc, alc_len, error_callback, data); + return 1; } /* This function is a hook for testing the zlib support. It is only @@ -2675,6 +4895,31 @@ backtrace_uncompress_zdebug (struct backtrace_state *state, return ret; } +/* This function is a hook for testing the zstd support. It is only used by + tests. */ + +int +backtrace_uncompress_zstd (struct backtrace_state *state, + const unsigned char *compressed, + size_t compressed_size, + backtrace_error_callback error_callback, + void *data, unsigned char *uncompressed, + size_t uncompressed_size) +{ + unsigned char *zdebug_table; + int ret; + + zdebug_table = ((unsigned char *) backtrace_alloc (state, ZDEBUG_TABLE_SIZE, + error_callback, data)); + if (zdebug_table == NULL) + return 0; + ret = elf_zstd_decompress (compressed, compressed_size, + zdebug_table, uncompressed, uncompressed_size); + backtrace_free (state, zdebug_table, ZDEBUG_TABLE_SIZE, + error_callback, data); + return ret; +} + /* Number of LZMA states. */ #define LZMA_STATES (12) @@ -4671,7 +6916,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, if (zdebug_table == NULL) { zdebug_table = ((uint16_t *) - backtrace_alloc (state, ZDEBUG_TABLE_SIZE, + backtrace_alloc (state, ZLIB_TABLE_SIZE, error_callback, data)); if (zdebug_table == NULL) goto fail; @@ -4697,8 +6942,15 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, } } + if (zdebug_table != NULL) + { + backtrace_free (state, zdebug_table, ZLIB_TABLE_SIZE, + error_callback, data); + zdebug_table = NULL; + } + /* Uncompress the official ELF format - (--compress-debug-sections=zlib-gabi). */ + (--compress-debug-sections=zlib-gabi, --compress-debug-sections=zstd). */ for (i = 0; i < (int) DEBUG_MAX; ++i) { unsigned char *uncompressed_data; diff --git a/libbacktrace/internal.h b/libbacktrace/internal.h index 1e5b269fb3b..95e884558e4 100644 --- a/libbacktrace/internal.h +++ b/libbacktrace/internal.h @@ -368,6 +368,15 @@ extern int backtrace_uncompress_zdebug (struct backtrace_state *, unsigned char **uncompressed, size_t *uncompressed_size); +/* A test-only hook for elf_zstd_decompress. */ + +extern int backtrace_uncompress_zstd (struct backtrace_state *, + const unsigned char *compressed, + size_t compressed_size, + backtrace_error_callback, void *data, + unsigned char *uncompressed, + size_t uncompressed_size); + /* A test-only hook for elf_uncompress_lzma. */ extern int backtrace_uncompress_lzma (struct backtrace_state *, diff --git a/libbacktrace/zstdtest.c b/libbacktrace/zstdtest.c new file mode 100644 index 00000000000..fe31b157a41 --- /dev/null +++ b/libbacktrace/zstdtest.c @@ -0,0 +1,523 @@ +/* ztest.c -- Test for libbacktrace zstd code. + Copyright (C) 2022 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +#include "config.h" + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> + +#ifdef HAVE_ZSTD +#include <zstd.h> +#endif + +#include "backtrace.h" +#include "backtrace-supported.h" + +#include "internal.h" +#include "testlib.h" + +#ifndef HAVE_CLOCK_GETTIME + +typedef int xclockid_t; + +static int +xclock_gettime (xclockid_t id ATTRIBUTE_UNUSED, + struct timespec *ts ATTRIBUTE_UNUSED) +{ + errno = EINVAL; + return -1; +} + +#define clockid_t xclockid_t +#define clock_gettime xclock_gettime +#undef CLOCK_REALTIME +#define CLOCK_REALTIME 0 + +#endif /* !defined(HAVE_CLOCK_GETTIME) */ + +#ifdef CLOCK_PROCESS_CPUTIME_ID +#define ZSTD_CLOCK_GETTIME_ARG CLOCK_PROCESS_CPUTIME_ID +#else +#define ZSTD_CLOCK_GETTIME_ARG CLOCK_REALTIME +#endif + +/* Some tests for the local zstd inflation code. */ + +struct zstd_test +{ + const char *name; + const char *uncompressed; + size_t uncompressed_len; + const char *compressed; + size_t compressed_len; +}; + +/* Error callback. */ + +static void +error_callback_compress (void *vdata ATTRIBUTE_UNUSED, const char *msg, + int errnum) +{ + fprintf (stderr, "%s", msg); + if (errnum > 0) + fprintf (stderr, ": %s", strerror (errnum)); + fprintf (stderr, "\n"); + exit (EXIT_FAILURE); +} + +static const struct zstd_test tests[] = +{ + { + "empty", + "", + 0, + "\x28\xb5\x2f\xfd\x24\x00\x01\x00\x00\x99\xe9\xd8\x51", + 13, + }, + { + "hello", + "hello, world\n", + 0, + ("\x28\xb5\x2f\xfd\x24\x0d\x69\x00\x00\x68\x65\x6c\x6c\x6f\x2c\x20" + "\x77\x6f\x72\x6c\x64\x0a\x4c\x1f\xf9\xf1"), + 26, + }, + { + "goodbye", + "goodbye, world", + 0, + ("\x28\xb5\x2f\xfd\x24\x0e\x71\x00\x00\x67\x6f\x6f\x64\x62\x79\x65" + "\x2c\x20\x77\x6f\x72\x6c\x64\x61\x7b\x4b\x83"), + 27, + }, + { + "ranges", + ("\xcc\x11\x00\x00\x00\x00\x00\x00\xd5\x13\x00\x00\x00\x00\x00\x00" + "\x1c\x14\x00\x00\x00\x00\x00\x00\x72\x14\x00\x00\x00\x00\x00\x00" + "\x9d\x14\x00\x00\x00\x00\x00\x00\xd5\x14\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\xfb\x12\x00\x00\x00\x00\x00\x00\x09\x13\x00\x00\x00\x00\x00\x00" + "\x0c\x13\x00\x00\x00\x00\x00\x00\xcb\x13\x00\x00\x00\x00\x00\x00" + "\x29\x14\x00\x00\x00\x00\x00\x00\x4e\x14\x00\x00\x00\x00\x00\x00" + "\x9d\x14\x00\x00\x00\x00\x00\x00\xd5\x14\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\xfb\x12\x00\x00\x00\x00\x00\x00\x09\x13\x00\x00\x00\x00\x00\x00" + "\x67\x13\x00\x00\x00\x00\x00\x00\xcb\x13\x00\x00\x00\x00\x00\x00" + "\x9d\x14\x00\x00\x00\x00\x00\x00\xd5\x14\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x5f\x0b\x00\x00\x00\x00\x00\x00\x6c\x0b\x00\x00\x00\x00\x00\x00" + "\x7d\x0b\x00\x00\x00\x00\x00\x00\x7e\x0c\x00\x00\x00\x00\x00\x00" + "\x38\x0f\x00\x00\x00\x00\x00\x00\x5c\x0f\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x83\x0c\x00\x00\x00\x00\x00\x00\xfa\x0c\x00\x00\x00\x00\x00\x00" + "\xfd\x0d\x00\x00\x00\x00\x00\x00\xef\x0e\x00\x00\x00\x00\x00\x00" + "\x14\x0f\x00\x00\x00\x00\x00\x00\x38\x0f\x00\x00\x00\x00\x00\x00" + "\x9f\x0f\x00\x00\x00\x00\x00\x00\xac\x0f\x00\x00\x00\x00\x00\x00" + "\xdb\x0f\x00\x00\x00\x00\x00\x00\xff\x0f\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\xfd\x0d\x00\x00\x00\x00\x00\x00\xd8\x0e\x00\x00\x00\x00\x00\x00" + "\x9f\x0f\x00\x00\x00\x00\x00\x00\xac\x0f\x00\x00\x00\x00\x00\x00" + "\xdb\x0f\x00\x00\x00\x00\x00\x00\xff\x0f\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\xfa\x0c\x00\x00\x00\x00\x00\x00\xea\x0d\x00\x00\x00\x00\x00\x00" + "\xef\x0e\x00\x00\x00\x00\x00\x00\x14\x0f\x00\x00\x00\x00\x00\x00" + "\x5c\x0f\x00\x00\x00\x00\x00\x00\x9f\x0f\x00\x00\x00\x00\x00\x00" + "\xac\x0f\x00\x00\x00\x00\x00\x00\xdb\x0f\x00\x00\x00\x00\x00\x00" + "\xff\x0f\x00\x00\x00\x00\x00\x00\x2c\x10\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x60\x11\x00\x00\x00\x00\x00\x00\xd1\x16\x00\x00\x00\x00\x00\x00" + "\x40\x0b\x00\x00\x00\x00\x00\x00\x2c\x10\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x7a\x00\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00" + "\x9f\x01\x00\x00\x00\x00\x00\x00\xa7\x01\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x7a\x00\x00\x00\x00\x00\x00\x00\xa9\x00\x00\x00\x00\x00\x00\x00" + "\x9f\x01\x00\x00\x00\x00\x00\x00\xa7\x01\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"), + 672, + ("\x28\xb5\x2f\xfd\x64\xa0\x01\x2d\x05\x00\xc4\x04\xcc\x11\x00\xd5" + "\x13\x00\x1c\x14\x00\x72\x9d\xd5\xfb\x12\x00\x09\x0c\x13\xcb\x13" + "\x29\x4e\x67\x5f\x0b\x6c\x0b\x7d\x0b\x7e\x0c\x38\x0f\x5c\x0f\x83" + "\x0c\xfa\x0c\xfd\x0d\xef\x0e\x14\x38\x9f\x0f\xac\x0f\xdb\x0f\xff" + "\x0f\xd8\x9f\xac\xdb\xff\xea\x5c\x2c\x10\x60\xd1\x16\x40\x0b\x7a" + "\x00\xb6\x00\x9f\x01\xa7\x01\xa9\x36\x20\xa0\x83\x14\x34\x63\x4a" + "\x21\x70\x8c\x07\x46\x03\x4e\x10\x62\x3c\x06\x4e\xc8\x8c\xb0\x32" + "\x2a\x59\xad\xb2\xf1\x02\x82\x7c\x33\xcb\x92\x6f\x32\x4f\x9b\xb0" + "\xa2\x30\xf0\xc0\x06\x1e\x98\x99\x2c\x06\x1e\xd8\xc0\x03\x56\xd8" + "\xc0\x03\x0f\x6c\xe0\x01\xf1\xf0\xee\x9a\xc6\xc8\x97\x99\xd1\x6c" + "\xb4\x21\x45\x3b\x10\xe4\x7b\x99\x4d\x8a\x36\x64\x5c\x77\x08\x02" + "\xcb\xe0\xce"), + 179, + } +}; + +/* Test the hand coded samples. */ + +static void +test_samples (struct backtrace_state *state) +{ + size_t i; + + for (i = 0; i < sizeof tests / sizeof tests[0]; ++i) + { + unsigned char *uncompressed; + size_t uncompressed_len; + + uncompressed = (unsigned char *) malloc (tests[i].uncompressed_len); + if (uncompressed == NULL) + { + perror ("malloc"); + fprintf (stderr, "test %s: uncompress failed\n", tests[i].name); + ++failures; + continue; + } + + uncompressed_len = tests[i].uncompressed_len; + if (uncompressed_len == 0) + uncompressed_len = strlen (tests[i].uncompressed); + + if (!backtrace_uncompress_zstd (state, + ((const unsigned char *) + tests[i].compressed), + tests[i].compressed_len, + error_callback_compress, NULL, + uncompressed, uncompressed_len)) + { + fprintf (stderr, "test %s: uncompress failed\n", tests[i].name); + ++failures; + } + else + { + if (memcmp (tests[i].uncompressed, uncompressed, uncompressed_len) + != 0) + { + size_t j; + + fprintf (stderr, "test %s: uncompressed data mismatch\n", + tests[i].name); + for (j = 0; j < uncompressed_len; ++j) + if (tests[i].uncompressed[j] != uncompressed[j]) + fprintf (stderr, " %zu: got %#x want %#x\n", j, + uncompressed[j], tests[i].uncompressed[j]); + ++failures; + } + else + printf ("PASS: uncompress %s\n", tests[i].name); + } + + free (uncompressed); + } +} + +#ifdef HAVE_ZSTD + +/* Given a set of TRIALS timings, discard the lowest and highest + values and return the mean average of the rest. */ + +static size_t +average_time (const size_t *times, size_t trials) +{ + size_t imax; + size_t max; + size_t imin; + size_t min; + size_t i; + size_t sum; + + imin = 0; + imax = 0; + min = times[0]; + max = times[0]; + for (i = 1; i < trials; ++i) + { + if (times[i] < min) + { + imin = i; + min = times[i]; + } + if (times[i] > max) + { + imax = i; + max = times[i]; + } + } + + sum = 0; + for (i = 0; i < trials; ++i) + { + if (i != imax && i != imin) + sum += times[i]; + } + return sum / (trials - 2); +} + +#endif + +/* Test a larger text, if available. */ + +static void +test_large (struct backtrace_state *state ATTRIBUTE_UNUSED) +{ +#ifdef HAVE_ZSTD + unsigned char *orig_buf; + size_t orig_bufsize; + size_t i; + char *compressed_buf; + size_t compressed_bufsize; + size_t compressed_size; + unsigned char *uncompressed_buf; + size_t r; + clockid_t cid; + struct timespec ts1; + struct timespec ts2; + size_t ctime; + size_t ztime; + const size_t trials = 16; + size_t ctimes[16]; + size_t ztimes[16]; + static const char * const names[] = { + "Isaac.Newton-Opticks.txt", + "../libgo/go/testdata/Isaac.Newton-Opticks.txt", + }; + + orig_buf = NULL; + orig_bufsize = 0; + uncompressed_buf = NULL; + compressed_buf = NULL; + + for (i = 0; i < sizeof names / sizeof names[0]; ++i) + { + size_t len; + char *namebuf; + FILE *e; + struct stat st; + char *rbuf; + size_t got; + + len = strlen (SRCDIR) + strlen (names[i]) + 2; + namebuf = malloc (len); + if (namebuf == NULL) + { + perror ("malloc"); + goto fail; + } + snprintf (namebuf, len, "%s/%s", SRCDIR, names[i]); + e = fopen (namebuf, "r"); + free (namebuf); + if (e == NULL) + continue; + if (fstat (fileno (e), &st) < 0) + { + perror ("fstat"); + fclose (e); + continue; + } + rbuf = malloc (st.st_size); + if (rbuf == NULL) + { + perror ("malloc"); + goto fail; + } + got = fread (rbuf, 1, st.st_size, e); + fclose (e); + if (got > 0) + { + orig_buf = (unsigned char *) rbuf; + orig_bufsize = got; + break; + } + free (rbuf); + } + + if (orig_buf == NULL) + { + /* We couldn't find an input file. */ + printf ("UNSUPPORTED: zstd large\n"); + return; + } + + compressed_bufsize = ZSTD_compressBound (orig_bufsize); + compressed_buf = malloc (compressed_bufsize); + if (compressed_buf == NULL) + { + perror ("malloc"); + goto fail; + } + + r = ZSTD_compress (compressed_buf, compressed_bufsize, + orig_buf, orig_bufsize, + ZSTD_CLEVEL_DEFAULT); + if (ZSTD_isError (r)) + { + fprintf (stderr, "zstd compress failed: %s\n", ZSTD_getErrorName (r)); + goto fail; + } + compressed_size = r; + + uncompressed_buf = malloc (orig_bufsize); + if (uncompressed_buf == NULL) + { + perror ("malloc"); + goto fail; + } + + if (!backtrace_uncompress_zstd (state, (unsigned char *) compressed_buf, + compressed_size, + error_callback_compress, NULL, + uncompressed_buf, orig_bufsize)) + { + fprintf (stderr, "zstd large: backtrace_uncompress_zstd failed\n"); + goto fail; + } + + if (memcmp (uncompressed_buf, orig_buf, orig_bufsize) != 0) + { + size_t j; + + fprintf (stderr, "zstd large: uncompressed data mismatch\n"); + for (j = 0; j < orig_bufsize; ++j) + if (orig_buf[j] != uncompressed_buf[j]) + fprintf (stderr, " %zu: got %#x want %#x\n", j, + uncompressed_buf[j], orig_buf[j]); + goto fail; + } + + printf ("PASS: zstd large\n"); + + for (i = 0; i < trials; ++i) + { + cid = ZSTD_CLOCK_GETTIME_ARG; + if (clock_gettime (cid, &ts1) < 0) + { + if (errno == EINVAL) + return; + perror ("clock_gettime"); + return; + } + + if (!backtrace_uncompress_zstd (state, + (unsigned char *) compressed_buf, + compressed_size, + error_callback_compress, NULL, + uncompressed_buf, + orig_bufsize)) + { + fprintf (stderr, + ("zstd large: " + "benchmark backtrace_uncompress_zstd failed\n")); + return; + } + + if (clock_gettime (cid, &ts2) < 0) + { + perror ("clock_gettime"); + return; + } + + ctime = (ts2.tv_sec - ts1.tv_sec) * 1000000000; + ctime += ts2.tv_nsec - ts1.tv_nsec; + ctimes[i] = ctime; + + if (clock_gettime (cid, &ts1) < 0) + { + perror("clock_gettime"); + return; + } + + r = ZSTD_decompress (uncompressed_buf, orig_bufsize, + compressed_buf, compressed_size); + + if (clock_gettime (cid, &ts2) < 0) + { + perror ("clock_gettime"); + return; + } + + if (ZSTD_isError (r)) + { + fprintf (stderr, + "zstd large: benchmark zlib uncompress failed: %s\n", + ZSTD_getErrorName (r)); + return; + } + + ztime = (ts2.tv_sec - ts1.tv_sec) * 1000000000; + ztime += ts2.tv_nsec - ts1.tv_nsec; + ztimes[i] = ztime; + } + + /* Toss the highest and lowest times and average the rest. */ + ctime = average_time (ctimes, trials); + ztime = average_time (ztimes, trials); + + printf ("backtrace: %zu ns\n", ctime); + printf ("zstd : %zu ns\n", ztime); + printf ("ratio : %g\n", (double) ztime / (double) ctime); + + return; + + fail: + printf ("FAIL: zstd large\n"); + ++failures; + + if (orig_buf != NULL) + free (orig_buf); + if (compressed_buf != NULL) + free (compressed_buf); + if (uncompressed_buf != NULL) + free (uncompressed_buf); + +#else /* !HAVE_ZSTD */ + + printf ("UNSUPPORTED: zstd large\n"); + +#endif /* !HAVE_ZSTD */ +} + +int +main (int argc ATTRIBUTE_UNUSED, char **argv) +{ + struct backtrace_state *state; + + state = backtrace_create_state (argv[0], BACKTRACE_SUPPORTS_THREADS, + error_callback_create, NULL); + + test_samples (state); + test_large (state); + + exit (failures != 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} ^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: Add zstd support to libbacktrace 2022-12-08 0:22 Add zstd support to libbacktrace Ian Lance Taylor @ 2022-12-10 1:47 ` Ian Lance Taylor 2022-12-17 2:48 ` Ian Lance Taylor 0 siblings, 1 reply; 3+ messages in thread From: Ian Lance Taylor @ 2022-12-10 1:47 UTC (permalink / raw) To: gcc-patches [-- Attachment #1: Type: text/plain, Size: 1538 bytes --] On Wed, Dec 7, 2022 at 4:22 PM Ian Lance Taylor <iant@golang.org> wrote: > > This patch adds zstd support to libbacktrace, to support the new > linker option --compress-debug-sections=zstd. This patch rewrites and simplifies the main zstd decompression loop using some ideas from the reference implementation. This speeds it up a bit, although it still runs at about 35% of the speed of the reference implementaiton. Bootstrapped and ran libbacktrace tests on x86_64-pc-linux-gnu. Committed to mainline. Ian * elf.c (ZSTD_TABLE_*): Use elf_zstd_fse_baseline_entry. (ZSTD_ENCODE_BASELINE_BITS): Define. (ZSTD_DECODE_BASELINE, ZSTD_DECODE_BASEBITS): Define. (elf_zstd_literal_length_base): New static const array. (elf_zstd_match_length_base): Likewise. (struct elf_zstd_fse_baseline_entry): Define. (elf_zstd_make_literal_baseline_fse): New static function. (elf_zstd_make_offset_baseline_fse): Likewise. (elf_zstd_make_match_baseline_fse): Likewise. (print_table, main): Use elf_zstd_fse_baseline_entry. (elf_zstd_lit_table, elf_zstd_match_table): Likewise. (elf_zstd_offset_table): Likewise. (struct elf_zstd_seq_decode): Likewise. Remove use_rle and rle fields. (elf_zstd_unpack_seq_decode): Use elf_zstd_fse_baseline_entry, taking a conversion function. Convert RLE to FSE. (elf_zstd_literal_length_baseline): Remove. (elf_zstd_literal_length_bits): Remove. (elf_zstd_match_length_baseline): Remove. (elf_zstd_match_length_bits): Remove. (elf_zstd_decompress): Use elf_zstd_fse_baseline_entry. Rewrite and simplify main loop. [-- Attachment #2: patch.txt --] [-- Type: text/plain, Size: 39677 bytes --] diff --git a/libbacktrace/elf.c b/libbacktrace/elf.c index 15e6f284db6..ece02db27f1 100644 --- a/libbacktrace/elf.c +++ b/libbacktrace/elf.c @@ -2610,9 +2610,9 @@ elf_zlib_inflate_and_verify (const unsigned char *pin, size_t sin, } /* For working memory during zstd compression, we need - - a literal length FSE table: 512 32-bit values == 2048 bytes - - a match length FSE table: 512 32-bit values == 2048 bytes - - a offset FSE table: 256 32-bit values == 1024 bytes + - a literal length FSE table: 512 64-bit values == 4096 bytes + - a match length FSE table: 512 64-bit values == 4096 bytes + - a offset FSE table: 256 64-bit values == 2048 bytes - a Huffman tree: 2048 uint16_t values == 4096 bytes - scratch space, one of - to build an FSE table: 512 uint16_t values == 1024 bytes @@ -2620,21 +2620,24 @@ elf_zlib_inflate_and_verify (const unsigned char *pin, size_t sin, - buffer for literal values == 2048 bytes */ -#define ZSTD_TABLE_SIZE \ - (2 * 512 * sizeof (struct elf_zstd_fse_entry) \ - + 256 * sizeof (struct elf_zstd_fse_entry) \ - + 2048 * sizeof (uint16_t) \ +#define ZSTD_TABLE_SIZE \ + (2 * 512 * sizeof (struct elf_zstd_fse_baseline_entry) \ + + 256 * sizeof (struct elf_zstd_fse_baseline_entry) \ + + 2048 * sizeof (uint16_t) \ + 2048) #define ZSTD_TABLE_LITERAL_FSE_OFFSET (0) -#define ZSTD_TABLE_MATCH_FSE_OFFSET (512 * sizeof (struct elf_zstd_fse_entry)) +#define ZSTD_TABLE_MATCH_FSE_OFFSET \ + (512 * sizeof (struct elf_zstd_fse_baseline_entry)) -#define ZSTD_TABLE_OFFSET_FSE_OFFSET \ - (ZSTD_TABLE_MATCH_FSE_OFFSET + 512 * sizeof (struct elf_zstd_fse_entry)) +#define ZSTD_TABLE_OFFSET_FSE_OFFSET \ + (ZSTD_TABLE_MATCH_FSE_OFFSET \ + + 512 * sizeof (struct elf_zstd_fse_baseline_entry)) -#define ZSTD_TABLE_HUFFMAN_OFFSET \ - (ZSTD_TABLE_OFFSET_FSE_OFFSET + 256 * sizeof (struct elf_zstd_fse_entry)) +#define ZSTD_TABLE_HUFFMAN_OFFSET \ + (ZSTD_TABLE_OFFSET_FSE_OFFSET \ + + 256 * sizeof (struct elf_zstd_fse_baseline_entry)) #define ZSTD_TABLE_WORK_OFFSET \ (ZSTD_TABLE_HUFFMAN_OFFSET + 2048 * sizeof (uint16_t)) @@ -2645,8 +2648,11 @@ elf_zlib_inflate_and_verify (const unsigned char *pin, size_t sin, struct elf_zstd_fse_entry { + /* The value that this FSE entry represents. */ unsigned char symbol; + /* The number of bits to read to determine the next state. */ unsigned char bits; + /* Add the bits to this base to get the next state. */ uint16_t base; }; @@ -2925,6 +2931,270 @@ elf_zstd_build_fse (const int16_t *norm, int idx, uint16_t *next, return 1; } +/* Encode the baseline and bits into a single 32-bit value. */ + +#define ZSTD_ENCODE_BASELINE_BITS(baseline, basebits) \ + ((uint32_t)(baseline) | ((uint32_t)(basebits) << 24)) + +#define ZSTD_DECODE_BASELINE(baseline_basebits) \ + ((uint32_t)(baseline_basebits) & 0xffffff) + +#define ZSTD_DECODE_BASEBITS(baseline_basebits) \ + ((uint32_t)(baseline_basebits) >> 24) + +/* Given a literal length code, we need to read a number of bits and add that + to a baseline. For states 0 to 15 the baseline is the state and the number + of bits is zero. */ + +#define ZSTD_LITERAL_LENGTH_BASELINE_OFFSET (16) + +static const uint32_t elf_zstd_literal_length_base[] = +{ + ZSTD_ENCODE_BASELINE_BITS(16, 1), + ZSTD_ENCODE_BASELINE_BITS(18, 1), + ZSTD_ENCODE_BASELINE_BITS(20, 1), + ZSTD_ENCODE_BASELINE_BITS(22, 1), + ZSTD_ENCODE_BASELINE_BITS(24, 2), + ZSTD_ENCODE_BASELINE_BITS(28, 2), + ZSTD_ENCODE_BASELINE_BITS(32, 3), + ZSTD_ENCODE_BASELINE_BITS(40, 3), + ZSTD_ENCODE_BASELINE_BITS(48, 4), + ZSTD_ENCODE_BASELINE_BITS(64, 6), + ZSTD_ENCODE_BASELINE_BITS(128, 7), + ZSTD_ENCODE_BASELINE_BITS(256, 8), + ZSTD_ENCODE_BASELINE_BITS(512, 9), + ZSTD_ENCODE_BASELINE_BITS(1024, 10), + ZSTD_ENCODE_BASELINE_BITS(2048, 11), + ZSTD_ENCODE_BASELINE_BITS(4096, 12), + ZSTD_ENCODE_BASELINE_BITS(8192, 13), + ZSTD_ENCODE_BASELINE_BITS(16384, 14), + ZSTD_ENCODE_BASELINE_BITS(32768, 15), + ZSTD_ENCODE_BASELINE_BITS(65536, 16) +}; + +/* The same applies to match length codes. For states 0 to 31 the baseline is + the state + 3 and the number of bits is zero. */ + +#define ZSTD_MATCH_LENGTH_BASELINE_OFFSET (32) + +static const uint32_t elf_zstd_match_length_base[] = +{ + ZSTD_ENCODE_BASELINE_BITS(35, 1), + ZSTD_ENCODE_BASELINE_BITS(37, 1), + ZSTD_ENCODE_BASELINE_BITS(39, 1), + ZSTD_ENCODE_BASELINE_BITS(41, 1), + ZSTD_ENCODE_BASELINE_BITS(43, 2), + ZSTD_ENCODE_BASELINE_BITS(47, 2), + ZSTD_ENCODE_BASELINE_BITS(51, 3), + ZSTD_ENCODE_BASELINE_BITS(59, 3), + ZSTD_ENCODE_BASELINE_BITS(67, 4), + ZSTD_ENCODE_BASELINE_BITS(83, 4), + ZSTD_ENCODE_BASELINE_BITS(99, 5), + ZSTD_ENCODE_BASELINE_BITS(131, 7), + ZSTD_ENCODE_BASELINE_BITS(259, 8), + ZSTD_ENCODE_BASELINE_BITS(515, 9), + ZSTD_ENCODE_BASELINE_BITS(1027, 10), + ZSTD_ENCODE_BASELINE_BITS(2051, 11), + ZSTD_ENCODE_BASELINE_BITS(4099, 12), + ZSTD_ENCODE_BASELINE_BITS(8195, 13), + ZSTD_ENCODE_BASELINE_BITS(16387, 14), + ZSTD_ENCODE_BASELINE_BITS(32771, 15), + ZSTD_ENCODE_BASELINE_BITS(65539, 16) +}; + +/* An entry in an FSE table used for literal/match/length values. For these we + have to map the symbol to a baseline value, and we have to read zero or more + bits and add that value to the baseline value. Rather than look the values + up in a separate table, we grow the FSE table so that we get better memory + caching. */ + +struct elf_zstd_fse_baseline_entry +{ + /* The baseline for the value that this FSE entry represents.. */ + uint32_t baseline; + /* The number of bits to read to add to the baseline. */ + unsigned char basebits; + /* The number of bits to read to determine the next state. */ + unsigned char bits; + /* Add the bits to this base to get the next state. */ + uint16_t base; +}; + +/* Convert the literal length FSE table FSE_TABLE to an FSE baseline table at + BASELINE_TABLE. Note that FSE_TABLE and BASELINE_TABLE will overlap. */ + +static int +elf_zstd_make_literal_baseline_fse ( + const struct elf_zstd_fse_entry *fse_table, + int table_bits, + struct elf_zstd_fse_baseline_entry *baseline_table) +{ + size_t count; + const struct elf_zstd_fse_entry *pfse; + struct elf_zstd_fse_baseline_entry *pbaseline; + + /* Convert backward to avoid overlap. */ + + count = 1U << table_bits; + pfse = fse_table + count; + pbaseline = baseline_table + count; + while (pfse > fse_table) + { + unsigned char symbol; + unsigned char bits; + uint16_t base; + + --pfse; + --pbaseline; + symbol = pfse->symbol; + bits = pfse->bits; + base = pfse->base; + if (symbol < ZSTD_LITERAL_LENGTH_BASELINE_OFFSET) + { + pbaseline->baseline = (uint32_t)symbol; + pbaseline->basebits = 0; + } + else + { + unsigned int idx; + uint32_t basebits; + + if (unlikely (symbol > 35)) + { + elf_uncompress_failed (); + return 0; + } + idx = symbol - ZSTD_LITERAL_LENGTH_BASELINE_OFFSET; + basebits = elf_zstd_literal_length_base[idx]; + pbaseline->baseline = ZSTD_DECODE_BASELINE(basebits); + pbaseline->basebits = ZSTD_DECODE_BASEBITS(basebits); + } + pbaseline->bits = bits; + pbaseline->base = base; + } + + return 1; +} + +/* Convert the offset length FSE table FSE_TABLE to an FSE baseline table at + BASELINE_TABLE. Note that FSE_TABLE and BASELINE_TABLE will overlap. */ + +static int +elf_zstd_make_offset_baseline_fse ( + const struct elf_zstd_fse_entry *fse_table, + int table_bits, + struct elf_zstd_fse_baseline_entry *baseline_table) +{ + size_t count; + const struct elf_zstd_fse_entry *pfse; + struct elf_zstd_fse_baseline_entry *pbaseline; + + /* Convert backward to avoid overlap. */ + + count = 1U << table_bits; + pfse = fse_table + count; + pbaseline = baseline_table + count; + while (pfse > fse_table) + { + unsigned char symbol; + unsigned char bits; + uint16_t base; + + --pfse; + --pbaseline; + symbol = pfse->symbol; + bits = pfse->bits; + base = pfse->base; + if (unlikely (symbol > 31)) + { + elf_uncompress_failed (); + return 0; + } + + /* The simple way to write this is + + pbaseline->baseline = (uint32_t)1 << symbol; + pbaseline->basebits = symbol; + + That will give us an offset value that corresponds to the one + described in the RFC. However, for offset values > 3, we have to + subtract 3. And for offset values 1, 2, 3 we use a repeated offset. + The baseline is always a power of 2, and is never 0, so for these low + values we will see one entry that is baseline 1, basebits 0, and one + entry that is baseline 2, basebits 1. All other entries will have + baseline >= 4 and basebits >= 2. + + So we can check for RFC offset <= 3 by checking for basebits <= 1. + And that means that we can subtract 3 here and not worry about doing + it in the hot loop. */ + + pbaseline->baseline = (uint32_t)1 << symbol; + if (symbol >= 2) + pbaseline->baseline -= 3; + pbaseline->basebits = symbol; + pbaseline->bits = bits; + pbaseline->base = base; + } + + return 1; +} + +/* Convert the match length FSE table FSE_TABLE to an FSE baseline table at + BASELINE_TABLE. Note that FSE_TABLE and BASELINE_TABLE will overlap. */ + +static int +elf_zstd_make_match_baseline_fse ( + const struct elf_zstd_fse_entry *fse_table, + int table_bits, + struct elf_zstd_fse_baseline_entry *baseline_table) +{ + size_t count; + const struct elf_zstd_fse_entry *pfse; + struct elf_zstd_fse_baseline_entry *pbaseline; + + /* Convert backward to avoid overlap. */ + + count = 1U << table_bits; + pfse = fse_table + count; + pbaseline = baseline_table + count; + while (pfse > fse_table) + { + unsigned char symbol; + unsigned char bits; + uint16_t base; + + --pfse; + --pbaseline; + symbol = pfse->symbol; + bits = pfse->bits; + base = pfse->base; + if (symbol < ZSTD_MATCH_LENGTH_BASELINE_OFFSET) + { + pbaseline->baseline = (uint32_t)symbol + 3; + pbaseline->basebits = 0; + } + else + { + unsigned int idx; + uint32_t basebits; + + if (unlikely (symbol > 52)) + { + elf_uncompress_failed (); + return 0; + } + idx = symbol - ZSTD_MATCH_LENGTH_BASELINE_OFFSET; + basebits = elf_zstd_match_length_base[idx]; + pbaseline->baseline = ZSTD_DECODE_BASELINE(basebits); + pbaseline->basebits = ZSTD_DECODE_BASEBITS(basebits); + } + pbaseline->bits = bits; + pbaseline->base = base; + } + + return 1; +} + #ifdef BACKTRACE_GENERATE_ZSTD_FSE_TABLES /* Used to generate the predefined FSE decoding tables for zstd. */ @@ -2957,18 +3227,19 @@ static int16_t offset[29] = static uint16_t next[256]; static void -print_table (const struct elf_zstd_fse_entry *table, size_t size) +print_table (const struct elf_zstd_fse_baseline_entry *table, size_t size) { size_t i; printf ("{\n"); - for (i = 0; i < size; i += 4) + for (i = 0; i < size; i += 3) { int j; printf (" "); - for (j = 0; j < 4 && i + j < size; ++j) - printf (" { %d, %d, %d },", table[i + j].symbol, table[i + j].bits, + for (j = 0; j < 3 && i + j < size; ++j) + printf (" { %u, %d, %d, %d },", table[i + j].baseline, + table[i + j].basebits, table[i + j].bits, table[i + j].base); printf ("\n"); } @@ -2979,8 +3250,11 @@ int main () { struct elf_zstd_fse_entry lit_table[64]; + struct elf_zstd_fse_baseline_entry lit_baseline[64]; struct elf_zstd_fse_entry match_table[64]; + struct elf_zstd_fse_baseline_entry match_baseline[64]; struct elf_zstd_fse_entry offset_table[32]; + struct elf_zstd_fse_baseline_entry offset_baseline[32]; if (!elf_zstd_build_fse (lit, sizeof lit / sizeof lit[0], next, 6, lit_table)) @@ -2989,9 +3263,16 @@ main () exit (EXIT_FAILURE); } - printf ("static const struct elf_zstd_fse_entry " + if (!elf_zstd_make_literal_baseline_fse (lit_table, 6, lit_baseline)) + { + fprintf (stderr, "elf_zstd_make_literal_baseline_fse failed\n"); + exit (EXIT_FAILURE); + } + + printf ("static const struct elf_zstd_fse_baseline_entry " "elf_zstd_lit_table[64] =\n"); - print_table (lit_table, sizeof lit_table / sizeof lit_table[0]); + print_table (lit_baseline, + sizeof lit_baseline / sizeof lit_baseline[0]); printf ("\n"); if (!elf_zstd_build_fse (match, sizeof match / sizeof match[0], next, @@ -3001,9 +3282,16 @@ main () exit (EXIT_FAILURE); } - printf ("static const struct elf_zstd_fse_entry " + if (!elf_zstd_make_match_baseline_fse (match_table, 6, match_baseline)) + { + fprintf (stderr, "elf_zstd_make_match_baseline_fse failed\n"); + exit (EXIT_FAILURE); + } + + printf ("static const struct elf_zstd_fse_baseline_entry " "elf_zstd_match_table[64] =\n"); - print_table (match_table, sizeof match_table / sizeof match_table[0]); + print_table (match_baseline, + sizeof match_baseline / sizeof match_baseline[0]); printf ("\n"); if (!elf_zstd_build_fse (offset, sizeof offset / sizeof offset[0], next, @@ -3013,9 +3301,16 @@ main () exit (EXIT_FAILURE); } - printf ("static const struct elf_zstd_fse_entry " + if (!elf_zstd_make_offset_baseline_fse (offset_table, 5, offset_baseline)) + { + fprintf (stderr, "elf_zstd_make_offset_baseline_fse failed\n"); + exit (EXIT_FAILURE); + } + + printf ("static const struct elf_zstd_fse_baseline_entry " "elf_zstd_offset_table[32] =\n"); - print_table (offset_table, sizeof offset_table / sizeof offset_table[0]); + print_table (offset_baseline, + sizeof offset_baseline / sizeof offset_baseline[0]); printf ("\n"); return 0; @@ -3026,56 +3321,71 @@ main () /* The fixed tables generated by the #ifdef'ed out main function above. */ -static const struct elf_zstd_fse_entry elf_zstd_lit_table[64] = +static const struct elf_zstd_fse_baseline_entry elf_zstd_lit_table[64] = { - { 0, 4, 0 }, { 0, 4, 16 }, { 1, 5, 32 }, { 3, 5, 0 }, - { 4, 5, 0 }, { 6, 5, 0 }, { 7, 5, 0 }, { 9, 5, 0 }, - { 10, 5, 0 }, { 12, 5, 0 }, { 14, 6, 0 }, { 16, 5, 0 }, - { 18, 5, 0 }, { 19, 5, 0 }, { 21, 5, 0 }, { 22, 5, 0 }, - { 24, 5, 0 }, { 25, 5, 32 }, { 26, 5, 0 }, { 27, 6, 0 }, - { 29, 6, 0 }, { 31, 6, 0 }, { 0, 4, 32 }, { 1, 4, 0 }, - { 2, 5, 0 }, { 4, 5, 32 }, { 5, 5, 0 }, { 7, 5, 32 }, - { 8, 5, 0 }, { 10, 5, 32 }, { 11, 5, 0 }, { 13, 6, 0 }, - { 16, 5, 32 }, { 17, 5, 0 }, { 19, 5, 32 }, { 20, 5, 0 }, - { 22, 5, 32 }, { 23, 5, 0 }, { 25, 4, 0 }, { 25, 4, 16 }, - { 26, 5, 32 }, { 28, 6, 0 }, { 30, 6, 0 }, { 0, 4, 48 }, - { 1, 4, 16 }, { 2, 5, 32 }, { 3, 5, 32 }, { 5, 5, 32 }, - { 6, 5, 32 }, { 8, 5, 32 }, { 9, 5, 32 }, { 11, 5, 32 }, - { 12, 5, 32 }, { 15, 6, 0 }, { 17, 5, 32 }, { 18, 5, 32 }, - { 20, 5, 32 }, { 21, 5, 32 }, { 23, 5, 32 }, { 24, 5, 32 }, - { 35, 6, 0 }, { 34, 6, 0 }, { 33, 6, 0 }, { 32, 6, 0 }, + { 0, 0, 4, 0 }, { 0, 0, 4, 16 }, { 1, 0, 5, 32 }, + { 3, 0, 5, 0 }, { 4, 0, 5, 0 }, { 6, 0, 5, 0 }, + { 7, 0, 5, 0 }, { 9, 0, 5, 0 }, { 10, 0, 5, 0 }, + { 12, 0, 5, 0 }, { 14, 0, 6, 0 }, { 16, 1, 5, 0 }, + { 20, 1, 5, 0 }, { 22, 1, 5, 0 }, { 28, 2, 5, 0 }, + { 32, 3, 5, 0 }, { 48, 4, 5, 0 }, { 64, 6, 5, 32 }, + { 128, 7, 5, 0 }, { 256, 8, 6, 0 }, { 1024, 10, 6, 0 }, + { 4096, 12, 6, 0 }, { 0, 0, 4, 32 }, { 1, 0, 4, 0 }, + { 2, 0, 5, 0 }, { 4, 0, 5, 32 }, { 5, 0, 5, 0 }, + { 7, 0, 5, 32 }, { 8, 0, 5, 0 }, { 10, 0, 5, 32 }, + { 11, 0, 5, 0 }, { 13, 0, 6, 0 }, { 16, 1, 5, 32 }, + { 18, 1, 5, 0 }, { 22, 1, 5, 32 }, { 24, 2, 5, 0 }, + { 32, 3, 5, 32 }, { 40, 3, 5, 0 }, { 64, 6, 4, 0 }, + { 64, 6, 4, 16 }, { 128, 7, 5, 32 }, { 512, 9, 6, 0 }, + { 2048, 11, 6, 0 }, { 0, 0, 4, 48 }, { 1, 0, 4, 16 }, + { 2, 0, 5, 32 }, { 3, 0, 5, 32 }, { 5, 0, 5, 32 }, + { 6, 0, 5, 32 }, { 8, 0, 5, 32 }, { 9, 0, 5, 32 }, + { 11, 0, 5, 32 }, { 12, 0, 5, 32 }, { 15, 0, 6, 0 }, + { 18, 1, 5, 32 }, { 20, 1, 5, 32 }, { 24, 2, 5, 32 }, + { 28, 2, 5, 32 }, { 40, 3, 5, 32 }, { 48, 4, 5, 32 }, + { 65536, 16, 6, 0 }, { 32768, 15, 6, 0 }, { 16384, 14, 6, 0 }, + { 8192, 13, 6, 0 }, }; -static const struct elf_zstd_fse_entry elf_zstd_match_table[64] = +static const struct elf_zstd_fse_baseline_entry elf_zstd_match_table[64] = { - { 0, 6, 0 }, { 1, 4, 0 }, { 2, 5, 32 }, { 3, 5, 0 }, - { 5, 5, 0 }, { 6, 5, 0 }, { 8, 5, 0 }, { 10, 6, 0 }, - { 13, 6, 0 }, { 16, 6, 0 }, { 19, 6, 0 }, { 22, 6, 0 }, - { 25, 6, 0 }, { 28, 6, 0 }, { 31, 6, 0 }, { 33, 6, 0 }, - { 35, 6, 0 }, { 37, 6, 0 }, { 39, 6, 0 }, { 41, 6, 0 }, - { 43, 6, 0 }, { 45, 6, 0 }, { 1, 4, 16 }, { 2, 4, 0 }, - { 3, 5, 32 }, { 4, 5, 0 }, { 6, 5, 32 }, { 7, 5, 0 }, - { 9, 6, 0 }, { 12, 6, 0 }, { 15, 6, 0 }, { 18, 6, 0 }, - { 21, 6, 0 }, { 24, 6, 0 }, { 27, 6, 0 }, { 30, 6, 0 }, - { 32, 6, 0 }, { 34, 6, 0 }, { 36, 6, 0 }, { 38, 6, 0 }, - { 40, 6, 0 }, { 42, 6, 0 }, { 44, 6, 0 }, { 1, 4, 32 }, - { 1, 4, 48 }, { 2, 4, 16 }, { 4, 5, 32 }, { 5, 5, 32 }, - { 7, 5, 32 }, { 8, 5, 32 }, { 11, 6, 0 }, { 14, 6, 0 }, - { 17, 6, 0 }, { 20, 6, 0 }, { 23, 6, 0 }, { 26, 6, 0 }, - { 29, 6, 0 }, { 52, 6, 0 }, { 51, 6, 0 }, { 50, 6, 0 }, - { 49, 6, 0 }, { 48, 6, 0 }, { 47, 6, 0 }, { 46, 6, 0 }, + { 3, 0, 6, 0 }, { 4, 0, 4, 0 }, { 5, 0, 5, 32 }, + { 6, 0, 5, 0 }, { 8, 0, 5, 0 }, { 9, 0, 5, 0 }, + { 11, 0, 5, 0 }, { 13, 0, 6, 0 }, { 16, 0, 6, 0 }, + { 19, 0, 6, 0 }, { 22, 0, 6, 0 }, { 25, 0, 6, 0 }, + { 28, 0, 6, 0 }, { 31, 0, 6, 0 }, { 34, 0, 6, 0 }, + { 37, 1, 6, 0 }, { 41, 1, 6, 0 }, { 47, 2, 6, 0 }, + { 59, 3, 6, 0 }, { 83, 4, 6, 0 }, { 131, 7, 6, 0 }, + { 515, 9, 6, 0 }, { 4, 0, 4, 16 }, { 5, 0, 4, 0 }, + { 6, 0, 5, 32 }, { 7, 0, 5, 0 }, { 9, 0, 5, 32 }, + { 10, 0, 5, 0 }, { 12, 0, 6, 0 }, { 15, 0, 6, 0 }, + { 18, 0, 6, 0 }, { 21, 0, 6, 0 }, { 24, 0, 6, 0 }, + { 27, 0, 6, 0 }, { 30, 0, 6, 0 }, { 33, 0, 6, 0 }, + { 35, 1, 6, 0 }, { 39, 1, 6, 0 }, { 43, 2, 6, 0 }, + { 51, 3, 6, 0 }, { 67, 4, 6, 0 }, { 99, 5, 6, 0 }, + { 259, 8, 6, 0 }, { 4, 0, 4, 32 }, { 4, 0, 4, 48 }, + { 5, 0, 4, 16 }, { 7, 0, 5, 32 }, { 8, 0, 5, 32 }, + { 10, 0, 5, 32 }, { 11, 0, 5, 32 }, { 14, 0, 6, 0 }, + { 17, 0, 6, 0 }, { 20, 0, 6, 0 }, { 23, 0, 6, 0 }, + { 26, 0, 6, 0 }, { 29, 0, 6, 0 }, { 32, 0, 6, 0 }, + { 65539, 16, 6, 0 }, { 32771, 15, 6, 0 }, { 16387, 14, 6, 0 }, + { 8195, 13, 6, 0 }, { 4099, 12, 6, 0 }, { 2051, 11, 6, 0 }, + { 1027, 10, 6, 0 }, }; -static const struct elf_zstd_fse_entry elf_zstd_offset_table[32] = +static const struct elf_zstd_fse_baseline_entry elf_zstd_offset_table[32] = { - { 0, 5, 0 }, { 6, 4, 0 }, { 9, 5, 0 }, { 15, 5, 0 }, - { 21, 5, 0 }, { 3, 5, 0 }, { 7, 4, 0 }, { 12, 5, 0 }, - { 18, 5, 0 }, { 23, 5, 0 }, { 5, 5, 0 }, { 8, 4, 0 }, - { 14, 5, 0 }, { 20, 5, 0 }, { 2, 5, 0 }, { 7, 4, 16 }, - { 11, 5, 0 }, { 17, 5, 0 }, { 22, 5, 0 }, { 4, 5, 0 }, - { 8, 4, 16 }, { 13, 5, 0 }, { 19, 5, 0 }, { 1, 5, 0 }, - { 6, 4, 16 }, { 10, 5, 0 }, { 16, 5, 0 }, { 28, 5, 0 }, - { 27, 5, 0 }, { 26, 5, 0 }, { 25, 5, 0 }, { 24, 5, 0 }, + { 1, 0, 5, 0 }, { 64, 6, 4, 0 }, { 512, 9, 5, 0 }, + { 32768, 15, 5, 0 }, { 2097152, 21, 5, 0 }, { 8, 3, 5, 0 }, + { 128, 7, 4, 0 }, { 4096, 12, 5, 0 }, { 262144, 18, 5, 0 }, + { 8388608, 23, 5, 0 }, { 32, 5, 5, 0 }, { 256, 8, 4, 0 }, + { 16384, 14, 5, 0 }, { 1048576, 20, 5, 0 }, { 4, 2, 5, 0 }, + { 128, 7, 4, 16 }, { 2048, 11, 5, 0 }, { 131072, 17, 5, 0 }, + { 4194304, 22, 5, 0 }, { 16, 4, 5, 0 }, { 256, 8, 4, 16 }, + { 8192, 13, 5, 0 }, { 524288, 19, 5, 0 }, { 2, 1, 5, 0 }, + { 64, 6, 4, 16 }, { 1024, 10, 5, 0 }, { 65536, 16, 5, 0 }, + { 268435456, 28, 5, 0 }, { 134217728, 27, 5, 0 }, { 67108864, 26, 5, 0 }, + { 33554432, 25, 5, 0 }, { 16777216, 24, 5, 0 }, }; /* Read a zstd Huffman table and build the decoding table in *TABLE, reading @@ -3397,10 +3707,8 @@ elf_zstd_read_huff (const unsigned char **ppin, const unsigned char *pinend, struct elf_zstd_seq_decode { - const struct elf_zstd_fse_entry *table; + const struct elf_zstd_fse_baseline_entry *table; int table_bits; - int use_rle; - unsigned char rle; }; /* Unpack a sequence code compression mode. */ @@ -3409,43 +3717,62 @@ static int elf_zstd_unpack_seq_decode (int mode, const unsigned char **ppin, const unsigned char *pinend, - const struct elf_zstd_fse_entry *predefined, - int predefined_bits, uint16_t *scratch, - int maxidx, struct elf_zstd_fse_entry *fse_table, - int fse_table_bits, + const struct elf_zstd_fse_baseline_entry *predef, + int predef_bits, + uint16_t *scratch, + int maxidx, + struct elf_zstd_fse_baseline_entry *table, + int table_bits, + int (*conv)(const struct elf_zstd_fse_entry *, + int, + struct elf_zstd_fse_baseline_entry *), struct elf_zstd_seq_decode *decode) { switch (mode) { case 0: - decode->table = predefined; - decode->table_bits = predefined_bits; - decode->use_rle = 0; + decode->table = predef; + decode->table_bits = predef_bits; break; case 1: - if (unlikely (*ppin >= pinend)) - { - elf_uncompress_failed (); + { + struct elf_zstd_fse_entry entry; + + if (unlikely (*ppin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + entry.symbol = **ppin; + ++*ppin; + entry.bits = 0; + entry.base = 0; + decode->table_bits = 0; + if (!conv (&entry, 0, table)) return 0; - } - decode->use_rle = 1; - decode->rle = **ppin; - decode->table_bits = 0; - ++*ppin; + } break; case 2: - decode->table_bits = fse_table_bits; - if (!elf_zstd_read_fse (ppin, pinend, scratch, maxidx, fse_table, - &decode->table_bits)) - return 0; - decode->table = fse_table; - decode->use_rle = 0; + { + struct elf_zstd_fse_entry *fse_table; + + /* We use the same space for the simple FSE table and the baseline + table. */ + fse_table = (struct elf_zstd_fse_entry *)table; + decode->table_bits = table_bits; + if (!elf_zstd_read_fse (ppin, pinend, scratch, maxidx, fse_table, + &decode->table_bits)) + return 0; + if (!conv (fse_table, decode->table_bits, table)) + return 0; + decode->table = table; + } break; case 3: - if (unlikely (decode->table_bits == 0 && !decode->use_rle)) + if (unlikely (decode->table_bits == -1)) { elf_uncompress_failed (); return 0; @@ -3703,39 +4030,6 @@ elf_zstd_literal_output (struct elf_zstd_literals *literals, return 1; } -/* Given a literal length code, we need to read a number of bits and add that - to a baseline. For states 0 to 15 the baseline is the state and the number - of bits is zero. */ - -#define ZSTD_LITERAL_LENGTH_BASELINE_OFFSET (16) - -static const uint32_t elf_zstd_literal_length_baseline[] = -{ - 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 128, 256, 512, - 1024, 2048, 4096, 8192, 16384, 32768, 65536 -}; - -static const unsigned char elf_zstd_literal_length_bits[] = -{ - 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 -}; - -/* The same applies to match length codes. For states 0 to 31 the baseline is - the state + 3 and the number of bits is zero. */ - -#define ZSTD_MATCH_LENGTH_BASELINE_OFFSET (32) - -static const uint32_t elf_zstd_match_length_baseline[] = -{ - 35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 131, 259, 515, - 1027, 2051, 4099, 8195, 16387, 32771, 65539 -}; - -static const unsigned char elf_zstd_match_length_bits[] = -{ - 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 -}; - /* Decompress a zstd stream from PIN/SIN to POUT/SOUT. Code based on RFC 8878. Return 1 on success, 0 on error. */ @@ -3748,11 +4042,11 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, unsigned char *poutstart; unsigned char *poutend; struct elf_zstd_seq_decode literal_decode; - struct elf_zstd_fse_entry *literal_fse_table; + struct elf_zstd_fse_baseline_entry *literal_fse_table; struct elf_zstd_seq_decode match_decode; - struct elf_zstd_fse_entry *match_fse_table; + struct elf_zstd_fse_baseline_entry *match_fse_table; struct elf_zstd_seq_decode offset_decode; - struct elf_zstd_fse_entry *offset_fse_table; + struct elf_zstd_fse_baseline_entry *offset_fse_table; uint16_t *huffman_table; int huffman_table_bits; uint32_t repeated_offset1; @@ -3769,21 +4063,18 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, poutend = pout + sout; literal_decode.table = NULL; - literal_decode.table_bits = 0; - literal_decode.use_rle = 0; - literal_fse_table = ((struct elf_zstd_fse_entry *) + literal_decode.table_bits = -1; + literal_fse_table = ((struct elf_zstd_fse_baseline_entry *) (zdebug_table + ZSTD_TABLE_LITERAL_FSE_OFFSET)); match_decode.table = NULL; - match_decode.table_bits = 0; - match_decode.use_rle = 0; - match_fse_table = ((struct elf_zstd_fse_entry *) + match_decode.table_bits = -1; + match_fse_table = ((struct elf_zstd_fse_baseline_entry *) (zdebug_table + ZSTD_TABLE_MATCH_FSE_OFFSET)); offset_decode.table = NULL; - offset_decode.table_bits = 0; - offset_decode.use_rle = 0; - offset_fse_table = ((struct elf_zstd_fse_entry *) + offset_decode.table_bits = -1; + offset_fse_table = ((struct elf_zstd_fse_baseline_entry *) (zdebug_table + ZSTD_TABLE_OFFSET_FSE_OFFSET)); huffman_table = ((uint16_t *) (zdebug_table + ZSTD_TABLE_HUFFMAN_OFFSET)); @@ -4259,6 +4550,9 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, if (seq_count > 0) { + int (*pfn)(const struct elf_zstd_fse_entry *, + int, struct elf_zstd_fse_baseline_entry *); + if (unlikely (pin >= pinend)) { elf_uncompress_failed (); @@ -4267,27 +4561,30 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, seq_hdr = *pin; ++pin; + pfn = elf_zstd_make_literal_baseline_fse; if (!elf_zstd_unpack_seq_decode ((seq_hdr >> 6) & 3, &pin, pinend, &elf_zstd_lit_table[0], 6, scratch, 35, - literal_fse_table, 9, + literal_fse_table, 9, pfn, &literal_decode)) return 0; + pfn = elf_zstd_make_offset_baseline_fse; if (!elf_zstd_unpack_seq_decode ((seq_hdr >> 4) & 3, &pin, pinend, &elf_zstd_offset_table[0], 5, scratch, 31, - offset_fse_table, 8, + offset_fse_table, 8, pfn, &offset_decode)) return 0; + pfn = elf_zstd_make_match_baseline_fse; if (!elf_zstd_unpack_seq_decode ((seq_hdr >> 2) & 3, &pin, pinend, &elf_zstd_match_table[0], 6, scratch, 52, - match_fse_table, 9, + match_fse_table, 9, pfn, &match_decode)) return 0; } @@ -4337,77 +4634,53 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, bits -= __builtin_clz (stream_start) - 24 + 1; - if (unlikely (literal_decode.use_rle)) - literal_state = 0; - else - { - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - bits -= literal_decode.table_bits; - literal_state = ((val >> bits) - & ((1U << literal_decode.table_bits) - 1)); - } + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= literal_decode.table_bits; + literal_state = ((val >> bits) + & ((1U << literal_decode.table_bits) - 1)); - if (unlikely (offset_decode.use_rle)) - offset_state = 0; - else - { - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - bits -= offset_decode.table_bits; - offset_state = ((val >> bits) - & ((1U << offset_decode.table_bits) - 1)); - } + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= offset_decode.table_bits; + offset_state = ((val >> bits) + & ((1U << offset_decode.table_bits) - 1)); - if (unlikely (match_decode.use_rle)) - match_state = 0; - else - { - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - bits -= match_decode.table_bits; - match_state = ((val >> bits) - & ((1U << match_decode.table_bits) - 1)); - } + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= match_decode.table_bits; + match_state = ((val >> bits) + & ((1U << match_decode.table_bits) - 1)); seq = 0; while (1) { + const struct elf_zstd_fse_baseline_entry *pt; + uint32_t offset_basebits; + uint32_t offset_baseline; + uint32_t offset_bits; uint32_t offset_base; - uint32_t need; - uint32_t add; uint32_t offset; - uint32_t use_offset; + uint32_t match_baseline; + uint32_t match_bits; uint32_t match_base; uint32_t match; + uint32_t literal_baseline; + uint32_t literal_bits; uint32_t literal_base; uint32_t literal; - const struct elf_zstd_fse_entry *pt; - uint64_t v; - - if (unlikely (offset_decode.use_rle)) - offset_base = offset_decode.rle; - else - offset_base = offset_decode.table[offset_state].symbol; - - if (unlikely (match_decode.use_rle)) - match_base = match_decode.rle; - else - match_base = match_decode.table[match_state].symbol; - - if (unlikely (literal_decode.use_rle)) - literal_base = literal_decode.rle; - else - literal_base = literal_decode.table[literal_state].symbol; + uint32_t need; + uint32_t add; - need = offset_base; - if (unlikely (need > 31)) - { - elf_uncompress_failed (); - return 0; - } + pt = &offset_decode.table[offset_state]; + offset_basebits = pt->basebits; + offset_baseline = pt->baseline; + offset_bits = pt->bits; + offset_base = pt->base; - /* elf_fetch_bits_backward only promises us 16 bits. */ + /* This case can be more than 16 bits, which is all that + elf_fetch_bits_backward promises. */ + need = offset_basebits; add = 0; if (unlikely (need > 16)) { @@ -4418,147 +4691,122 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, need -= 16; add <<= need; } - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - bits -= need; - add += (val >> bits) & ((1U << need) - 1); - - offset = (1U << offset_base) + add; - - if (match_base < ZSTD_MATCH_LENGTH_BASELINE_OFFSET) - match = match_base + 3; - else + if (need > 0) { - unsigned int idx; - unsigned int baseline; + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= need; + add += (val >> bits) & ((1U << need) - 1); + } - if (unlikely (match_base > 52)) - { - elf_uncompress_failed (); - return 0; - } + offset = offset_baseline + add; - idx = match_base - ZSTD_MATCH_LENGTH_BASELINE_OFFSET; - baseline = elf_zstd_match_length_baseline[idx]; - need = elf_zstd_match_length_bits[idx]; + pt = &match_decode.table[match_state]; + need = pt->basebits; + match_baseline = pt->baseline; + match_bits = pt->bits; + match_base = pt->base; + add = 0; + if (need > 0) + { if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) return 0; bits -= need; add = (val >> bits) & ((1U << need) - 1); - - match = baseline + add; } - if (literal_base < ZSTD_LITERAL_LENGTH_BASELINE_OFFSET) - literal = literal_base; - else - { - unsigned int idx; - unsigned int baseline; - - if (unlikely (literal_base > 35)) - { - elf_uncompress_failed (); - return 0; - } + match = match_baseline + add; - idx = literal_base - ZSTD_LITERAL_LENGTH_BASELINE_OFFSET; - baseline = elf_zstd_literal_length_baseline[idx]; - need = elf_zstd_literal_length_bits[idx]; + pt = &literal_decode.table[literal_state]; + need = pt->basebits; + literal_baseline = pt->baseline; + literal_bits = pt->bits; + literal_base = pt->base; + add = 0; + if (need > 0) + { if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) return 0; bits -= need; add = (val >> bits) & ((1U << need) - 1); - - literal = baseline + add; } - switch (offset) + literal = literal_baseline + add; + + /* See the comment in elf_zstd_make_offset_baseline_fse. */ + if (offset_basebits > 1) { - case 0: - elf_uncompress_failed (); - return 0; - case 1: - if (literal == 0) + repeated_offset3 = repeated_offset2; + repeated_offset2 = repeated_offset1; + repeated_offset1 = offset; + } + else + { + if (unlikely (literal == 0)) + ++offset; + switch (offset) { - use_offset = repeated_offset2; + case 1: + offset = repeated_offset1; + break; + case 2: + offset = repeated_offset2; repeated_offset2 = repeated_offset1; - } - else - use_offset = repeated_offset1; - break; - case 2: - if (literal == 0) - { - use_offset = repeated_offset3; + repeated_offset1 = offset; + break; + case 3: + offset = repeated_offset3; + repeated_offset3 = repeated_offset2; + repeated_offset2 = repeated_offset1; + repeated_offset1 = offset; + break; + case 4: + offset = repeated_offset1 - 1; repeated_offset3 = repeated_offset2; + repeated_offset2 = repeated_offset1; + repeated_offset1 = offset; + break; } - else - use_offset = repeated_offset2; - repeated_offset2 = repeated_offset1; - break; - case 3: - if (literal == 0) - use_offset = repeated_offset1 - 1; - else - use_offset = repeated_offset3; - repeated_offset3 = repeated_offset2; - repeated_offset2 = repeated_offset1; - break; - default: - use_offset = offset - 3; - repeated_offset3 = repeated_offset2; - repeated_offset2 = repeated_offset1; - break; } - repeated_offset1 = use_offset; - ++seq; if (seq < seq_count) { + uint32_t v; + /* Update the three states. */ - if (unlikely (literal_decode.use_rle)) - ; - else - { - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - pt = &literal_decode.table[literal_state]; - bits -= pt->bits; - v = (val >> bits) & (((uint64_t)1 << pt->bits) - 1); - literal_state = pt->base + v; - } + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; - if (unlikely (match_decode.use_rle)) - ; - else - { - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - pt = &match_decode.table[match_state]; - bits -= pt->bits; - v = (val >> bits) & (((uint64_t)1 << pt->bits) - 1); - match_state = pt->base + v; - } + need = literal_bits; + bits -= need; + v = (val >> bits) & (((uint32_t)1 << need) - 1); - if (unlikely (offset_decode.use_rle)) - ; - else - { - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; - pt = &offset_decode.table[offset_state]; - bits -= pt->bits; - v = (val >> bits) & (((uint64_t)1 << pt->bits) - 1); - offset_state = pt->base + v; - } + literal_state = literal_base + v; + + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + + need = match_bits; + bits -= need; + v = (val >> bits) & (((uint32_t)1 << need) - 1); + + match_state = match_base + v; + + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + + need = offset_bits; + bits -= need; + v = (val >> bits) & (((uint32_t)1 << need) - 1); + + offset_state = offset_base + v; } - /* The next sequence is now in LITERAL, USE_OFFSET, MATCH. */ + /* The next sequence is now in LITERAL, OFFSET, MATCH. */ if (literal > 0) { @@ -4570,7 +4818,14 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, return 0; } - if (literal <= litexp_count) + if (literals.type != ZSTD_LIT_HUFF) + { + if (!elf_zstd_literal_output (&literals, literal, + pout)) + return 0; + pout += literal; + } + else if (literal <= litexp_count) { memcpy (pout, plitexp, literal); plitexp += literal; @@ -4579,17 +4834,12 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, } else { - if (litexp_count > 0) - { - memcpy (pout, plitexp, litexp_count); - pout += litexp_count; - literal -= litexp_count; - plitexp = NULL; - litexp_count = 0; - } + memcpy (pout, plitexp, litexp_count); + pout += litexp_count; + literal -= litexp_count; + litexp_count = 0; - if (literals.type != ZSTD_LIT_HUFF - || literal >= ZSTD_TABLE_WORK_LIT_SIZE) + if (unlikely (literal >= ZSTD_TABLE_WORK_LIT_SIZE)) { if (!elf_zstd_literal_output (&literals, literal, pout)) @@ -4598,61 +4848,47 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, literal = 0; } - if (literals.type != ZSTD_LIT_HUFF - || literals.regenerated_size == 0) + plitexp = (unsigned char *)scratch; + litexp_count = ZSTD_TABLE_WORK_LIT_SIZE; + if (litexp_count > literals.regenerated_size) + litexp_count = literals.regenerated_size; + if (!elf_zstd_literal_output (&literals, + litexp_count, + plitexp)) + return 0; + + if (unlikely (literal > litexp_count)) { - plitexp = NULL; - litexp_count = 0; - if (unlikely (literal > 0)) - { - elf_uncompress_failed (); - return 0; - } + elf_uncompress_failed (); + return 0; } - else - { - plitexp = (unsigned char *)scratch; - litexp_count = ZSTD_TABLE_WORK_LIT_SIZE; - if (litexp_count > literals.regenerated_size) - litexp_count = literals.regenerated_size; - if (!elf_zstd_literal_output (&literals, - litexp_count, - plitexp)) - return 0; - if (unlikely (literal > litexp_count)) - { - elf_uncompress_failed (); - return 0; - } - - memcpy (pout, plitexp, literal); - plitexp += literal; - litexp_count -= literal; - pout += literal; - } + memcpy (pout, plitexp, literal); + plitexp += literal; + litexp_count -= literal; + pout += literal; } } - /* Copy MATCH bytes from the decoded output at USE_OFFSET. */ - - if (unlikely ((size_t)(poutend - pout) < match)) - { - elf_uncompress_failed (); - return 0; - } - if (match > 0) { - if (unlikely ((size_t)(pout - poutstart) < use_offset)) + /* Copy MATCH bytes from the decoded output at OFFSET. */ + + if (unlikely ((size_t)(poutend - pout) < match)) + { + elf_uncompress_failed (); + return 0; + } + + if (unlikely ((size_t)(pout - poutstart) < offset)) { elf_uncompress_failed (); return 0; } - if (use_offset >= match) + if (offset >= match) { - memcpy (pout, pout - use_offset, match); + memcpy (pout, pout - offset, match); pout += match; } else @@ -4661,8 +4897,8 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, { uint32_t copy; - copy = match < use_offset ? match : use_offset; - memcpy (pout, pout - use_offset, copy); + copy = match < offset ? match : offset; + memcpy (pout, pout - offset, copy); match -= copy; pout += copy; } ^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: Add zstd support to libbacktrace 2022-12-10 1:47 ` Ian Lance Taylor @ 2022-12-17 2:48 ` Ian Lance Taylor 0 siblings, 0 replies; 3+ messages in thread From: Ian Lance Taylor @ 2022-12-17 2:48 UTC (permalink / raw) To: gcc-patches [-- Attachment #1: Type: text/plain, Size: 804 bytes --] Some more tweaks of the libbacktrace zstd decompressor to make decompressing slightly faster: unpack all the literal data into the output buffer, rather than using scratch space. Bootstrapped and ran libbacktrace tests on x86_64-pc-linux-gnu. Committed to mainline. Ian * elf.c (elf_fetch_backward_init): New static function. (ZSTD_TABLE_SIZE): Use huffman scratch space size rather than literal size. (ZSTD_TABLE_WORK_LIT_SIZE): Don't define. (elf_zstd_read_huff): Use elf_fetch_backward_init. (elf_zstd_read_literals): New static function. (ZSTD_LIT_RAW, ZSTD_LIT_RLE, ZSTD_LIT_HUFF): Don't define. (struct elf_zstd_literals): Don't define. (elf_zstd_literal_output): Remove static function. (elf_zstd_decompress): Use elf_fetch_backward_init and elf_zstd_read_literals. Rewrite literal copying.< [-- Attachment #2: patch.txt --] [-- Type: text/plain, Size: 38807 bytes --] diff --git a/libbacktrace/elf.c b/libbacktrace/elf.c index ece02db27f1..135a94245a4 100644 --- a/libbacktrace/elf.c +++ b/libbacktrace/elf.c @@ -1223,6 +1223,57 @@ elf_fetch_bits_backward (const unsigned char **ppin, return 1; } +/* Initialize backward fetching when the bitstream starts with a 1 bit in the + last byte in memory (which is the first one that we read). This is used by + zstd decompression. Returns 1 on success, 0 on error. */ + +static int +elf_fetch_backward_init (const unsigned char **ppin, + const unsigned char *pinend, + uint64_t *pval, unsigned int *pbits) +{ + const unsigned char *pin; + unsigned int stream_start; + uint64_t val; + unsigned int bits; + + pin = *ppin; + stream_start = (unsigned int)*pin; + if (unlikely (stream_start == 0)) + { + elf_uncompress_failed (); + return 0; + } + val = 0; + bits = 0; + + /* Align to a 32-bit boundary. */ + while ((((uintptr_t)pin) & 3) != 0) + { + val <<= 8; + val |= (uint64_t)*pin; + bits += 8; + --pin; + } + + val <<= 8; + val |= (uint64_t)*pin; + bits += 8; + + *ppin = pin; + *pval = val; + *pbits = bits; + if (!elf_fetch_bits_backward (ppin, pinend, pval, pbits)) + return 0; + + *pbits -= __builtin_clz (stream_start) - (sizeof (unsigned int) - 1) * 8 + 1; + + if (!elf_fetch_bits_backward (ppin, pinend, pval, pbits)) + return 0; + + return 1; +} + /* Huffman code tables, like the rest of the zlib format, are defined by RFC 1951. We store a Huffman code table as a series of tables stored sequentially in memory. Each entry in a table is 16 bits. @@ -2617,14 +2668,13 @@ elf_zlib_inflate_and_verify (const unsigned char *pin, size_t sin, - scratch space, one of - to build an FSE table: 512 uint16_t values == 1024 bytes - to build a Huffman tree: 512 uint16_t + 256 uint32_t == 2048 bytes - - buffer for literal values == 2048 bytes */ #define ZSTD_TABLE_SIZE \ (2 * 512 * sizeof (struct elf_zstd_fse_baseline_entry) \ + 256 * sizeof (struct elf_zstd_fse_baseline_entry) \ + 2048 * sizeof (uint16_t) \ - + 2048) + + 512 * sizeof (uint16_t) + 256 * sizeof (uint32_t)) #define ZSTD_TABLE_LITERAL_FSE_OFFSET (0) @@ -2642,8 +2692,6 @@ elf_zlib_inflate_and_verify (const unsigned char *pin, size_t sin, #define ZSTD_TABLE_WORK_OFFSET \ (ZSTD_TABLE_HUFFMAN_OFFSET + 2048 * sizeof (uint16_t)) -#define ZSTD_TABLE_WORK_LIT_SIZE 2048 - /* An entry in a zstd FSE table. */ struct elf_zstd_fse_entry @@ -3427,7 +3475,6 @@ elf_zstd_read_huff (const unsigned char **ppin, const unsigned char *pinend, uint16_t *scratch; const unsigned char *pfse; const unsigned char *pback; - unsigned char stream_start; uint64_t val; unsigned int bits; unsigned int state1, state2; @@ -3454,31 +3501,8 @@ elf_zstd_read_huff (const unsigned char **ppin, const unsigned char *pinend, FSE_TABLE. */ pback = pin + hdr - 1; - stream_start = *pback; - if (unlikely (stream_start == 0)) - { - elf_uncompress_failed (); - return 0; - } - val = 0; - bits = 0; - while ((((uintptr_t)pback) & 3) != 0) - { - val <<= 8; - val |= (uint64_t)*pback; - bits += 8; - --pback; - } - val <<= 8; - val |= (uint64_t)*pback; - bits += 8; - - if (!elf_fetch_bits_backward (&pback, pfse, &val, &bits)) - return 0; - - bits -= __builtin_clz (stream_start) - 24 + 1; - if (!elf_fetch_bits_backward (&pback, pfse, &val, &bits)) + if (!elf_fetch_backward_init (&pback, pfse, &val, &bits)) return 0; bits -= fse_table_bits; @@ -3702,331 +3726,615 @@ elf_zstd_read_huff (const unsigned char **ppin, const unsigned char *pinend, return 1; } -/* The information used to decompress a sequence code, which can be a literal - length, an offset, or a match length. */ +/* Read and decompress the literals and store them ending at POUTEND. This + works because we are going to use all the literals in the output, so they + must fit into the output buffer. HUFFMAN_TABLE, and PHUFFMAN_TABLE_BITS + store the Huffman table across calls. SCRATCH is used to read a Huffman + table. Store the start of the decompressed literals in *PPLIT. Update + *PPIN. Return 1 on success, 0 on error. */ -struct elf_zstd_seq_decode +static int +elf_zstd_read_literals (const unsigned char **ppin, + const unsigned char *pinend, + unsigned char *pout, + unsigned char *poutend, + uint16_t *scratch, + uint16_t *huffman_table, + int *phuffman_table_bits, + unsigned char **pplit) { - const struct elf_zstd_fse_baseline_entry *table; - int table_bits; -}; + const unsigned char *pin; + unsigned char *plit; + unsigned char hdr; + uint32_t regenerated_size; + uint32_t compressed_size; + int streams; + uint32_t total_streams_size; + unsigned int huffman_table_bits; + uint64_t huffman_mask; -/* Unpack a sequence code compression mode. */ + pin = *ppin; + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + hdr = *pin; + ++pin; -static int -elf_zstd_unpack_seq_decode (int mode, - const unsigned char **ppin, - const unsigned char *pinend, - const struct elf_zstd_fse_baseline_entry *predef, - int predef_bits, - uint16_t *scratch, - int maxidx, - struct elf_zstd_fse_baseline_entry *table, - int table_bits, - int (*conv)(const struct elf_zstd_fse_entry *, - int, - struct elf_zstd_fse_baseline_entry *), - struct elf_zstd_seq_decode *decode) -{ - switch (mode) + if ((hdr & 3) == 0 || (hdr & 3) == 1) { - case 0: - decode->table = predef; - decode->table_bits = predef_bits; - break; + int raw; - case 1: - { - struct elf_zstd_fse_entry entry; + /* Raw_literals_Block or RLE_Literals_Block */ - if (unlikely (*ppin >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - entry.symbol = **ppin; - ++*ppin; - entry.bits = 0; - entry.base = 0; - decode->table_bits = 0; - if (!conv (&entry, 0, table)) + raw = (hdr & 3) == 0; + + switch ((hdr >> 2) & 3) + { + case 0: case 2: + regenerated_size = hdr >> 3; + break; + case 1: + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + regenerated_size = (hdr >> 4) + ((uint32_t)(*pin) << 4); + ++pin; + break; + case 3: + if (unlikely (pin + 1 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + regenerated_size = ((hdr >> 4) + + ((uint32_t)*pin << 4) + + ((uint32_t)pin[1] << 12)); + pin += 2; + break; + default: + elf_uncompress_failed (); return 0; - } - break; + } - case 2: - { - struct elf_zstd_fse_entry *fse_table; + if (unlikely ((size_t)(poutend - pout) < regenerated_size)) + { + elf_uncompress_failed (); + return 0; + } - /* We use the same space for the simple FSE table and the baseline - table. */ - fse_table = (struct elf_zstd_fse_entry *)table; - decode->table_bits = table_bits; - if (!elf_zstd_read_fse (ppin, pinend, scratch, maxidx, fse_table, - &decode->table_bits)) + plit = poutend - regenerated_size; + + if (raw) + { + if (unlikely (pin + regenerated_size >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + memcpy (plit, pin, regenerated_size); + pin += regenerated_size; + } + else + { + if (pin >= pinend) + { + elf_uncompress_failed (); + return 0; + } + memset (plit, *pin, regenerated_size); + ++pin; + } + + *ppin = pin; + *pplit = plit; + + return 1; + } + + /* Compressed_Literals_Block or Treeless_Literals_Block */ + + switch ((hdr >> 2) & 3) + { + case 0: case 1: + if (unlikely (pin + 1 >= pinend)) + { + elf_uncompress_failed (); return 0; - if (!conv (fse_table, decode->table_bits, table)) + } + regenerated_size = (hdr >> 4) | ((uint32_t)(*pin & 0x3f) << 4); + compressed_size = (uint32_t)*pin >> 6 | ((uint32_t)pin[1] << 2); + pin += 2; + streams = ((hdr >> 2) & 3) == 0 ? 1 : 4; + break; + case 2: + if (unlikely (pin + 2 >= pinend)) + { + elf_uncompress_failed (); return 0; - decode->table = table; - } + } + regenerated_size = (((uint32_t)hdr >> 4) + | ((uint32_t)*pin << 4) + | (((uint32_t)pin[1] & 3) << 12)); + compressed_size = (((uint32_t)pin[1] >> 2) + | ((uint32_t)pin[2] << 6)); + pin += 3; + streams = 4; break; - case 3: - if (unlikely (decode->table_bits == -1)) + if (unlikely (pin + 3 >= pinend)) { elf_uncompress_failed (); return 0; } + regenerated_size = (((uint32_t)hdr >> 4) + | ((uint32_t)*pin << 4) + | (((uint32_t)pin[1] & 0x3f) << 12)); + compressed_size = (((uint32_t)pin[1] >> 6) + | ((uint32_t)pin[2] << 2) + | ((uint32_t)pin[3] << 10)); + pin += 4; + streams = 4; break; - default: elf_uncompress_failed (); return 0; } - return 1; -} - -/* The different ways that the literals are encoded. */ - -#define ZSTD_LIT_RAW (0) -#define ZSTD_LIT_RLE (1) -#define ZSTD_LIT_HUFF (2) - -/* A struct used to decompress the literals. The order of these fields is - chosen for packing, not for comprehensibility. */ - -struct elf_zstd_literals -{ - /* Current bits in Huffman encoded stream. */ - uint64_t val; - - /* For RAW, the current position in the byte stream. - For RLE, a pointer to the byte being repeated. - For HUFF, start of encoded streams. - */ - const unsigned char *plit; + if (unlikely (pin + compressed_size > pinend)) + { + elf_uncompress_failed (); + return 0; + } - /* Current position of current Huffman encoded stream. */ - const unsigned char *pback; + pinend = pin + compressed_size; + *ppin = pinend; - /* End (reading backward) of current Huffman encoded stream. */ - const unsigned char *pbackend; + if (unlikely ((size_t)(poutend - pout) < regenerated_size)) + { + elf_uncompress_failed (); + return 0; + } - /* The Huffman table. */ - const uint16_t *huffman_table; + plit = poutend - regenerated_size; - /* Remaining number of uncompressed bytes. */ - uint32_t regenerated_size; + *pplit = plit; - /* Current number of available bits in Huffman encoded stream. */ - unsigned int bits; + total_streams_size = compressed_size; + if ((hdr & 3) == 2) + { + const unsigned char *ptable; - /* Number of bits in the Huffman table. */ - int huffman_table_bits; + /* Compressed_Literals_Block. Read Huffman tree. */ - /* Offsets from PLIT to next Huffman encoded streams, 0 if none. */ - uint32_t stream_off[3]; + ptable = pin; + if (!elf_zstd_read_huff (&ptable, pinend, scratch, huffman_table, + phuffman_table_bits)) + return 0; - /* Sizes of next Huffman encoded streams, 0 if none. */ - uint32_t stream_size[3]; + if (unlikely (total_streams_size < (size_t)(ptable - pin))) + { + elf_uncompress_failed (); + return 0; + } - /* A ZSTD_LIT_* code. */ - unsigned char type; -}; + total_streams_size -= ptable - pin; + pin = ptable; + } + else + { + /* Treeless_Literals_Block. Reuse previous Huffman tree. */ + if (unlikely (*phuffman_table_bits == 0)) + { + elf_uncompress_failed (); + return 0; + } + } -/* Output COUNT bytes from the literal byte stream in LITERALS to POUT. */ + /* Decompress COMPRESSED_SIZE bytes of data at PIN using the huffman table, + storing REGENERATED_SIZE bytes of decompressed data at PLIT. */ -static int -elf_zstd_literal_output (struct elf_zstd_literals *literals, - size_t count, - unsigned char *pout) -{ - size_t i; - const unsigned char *pback; - const unsigned char *pbackend; - uint64_t val; - unsigned int bits; - const uint16_t *huffman_table; - unsigned int huffman_table_bits; - uint64_t huffman_mask; + huffman_table_bits = (unsigned int)*phuffman_table_bits; + huffman_mask = ((uint64_t)1 << huffman_table_bits) - 1; - if (literals->regenerated_size < count) + if (streams == 1) { - elf_uncompress_failed (); - return 0; - } - literals->regenerated_size -= count; + const unsigned char *pback; + const unsigned char *pbackend; + uint64_t val; + unsigned int bits; + uint32_t i; - switch (literals->type) - { - case ZSTD_LIT_RAW: - memcpy (pout, literals->plit, count); - literals->plit += count; - return 1; + pback = pin + compressed_size - 1; + pbackend = pin; + if (!elf_fetch_backward_init (&pback, pbackend, &val, &bits)) + return 0; - case ZSTD_LIT_RLE: - memset (pout, *literals->plit, count); - return 1; + /* This is one of the inner loops of the decompression algorithm, so we + put some effort into optimization. We can't get more than 64 bytes + from a single call to elf_fetch_bits_backward, and we can't subtract + more than 11 bits at a time. */ - case ZSTD_LIT_HUFF: - break; + if (regenerated_size >= 64) + { + unsigned char *plitstart; + unsigned char *plitstop; - default: - elf_uncompress_failed (); - return 0; - } + plitstart = plit; + plitstop = plit + regenerated_size - 64; + while (plit < plitstop) + { + uint16_t t; - /* The literal string is Huffman encoded. */ + if (!elf_fetch_bits_backward (&pback, pbackend, &val, &bits)) + return 0; - pback = literals->pback; - pbackend = literals->pbackend; - val = literals->val; - bits = literals->bits; + if (bits < 16) + break; - huffman_table = literals->huffman_table; - huffman_table_bits = literals->huffman_table_bits; - huffman_mask = ((uint64_t)1 << huffman_table_bits) - 1; + while (bits >= 33) + { + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *plit = t >> 8; + ++plit; + bits -= t & 0xff; + + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *plit = t >> 8; + ++plit; + bits -= t & 0xff; + + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *plit = t >> 8; + ++plit; + bits -= t & 0xff; + } - /* This is one of the inner loops of the decompression algorithm, so we put - some effort into optimization. We can't get more than 64 bytes from a - single call to elf_fetch_bits_backward, and we can't subtract more than 11 - bits at a time. */ + while (bits > 11) + { + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *plit = t >> 8; + ++plit; + bits -= t & 0xff; + } + } - if (count >= 64) - { - unsigned char *poutstart; - unsigned char *poutstop; + regenerated_size -= plit - plitstart; + } - poutstart = pout; - poutstop = pout + count - 64; - while (pout <= poutstop) + for (i = 0; i < regenerated_size; ++i) { uint16_t t; if (!elf_fetch_bits_backward (&pback, pbackend, &val, &bits)) return 0; - if (bits < 16) - break; - - while (bits >= 33) + if (unlikely (bits < huffman_table_bits)) { - t = huffman_table[(val >> (bits - huffman_table_bits)) + t = huffman_table[(val << (huffman_table_bits - bits)) & huffman_mask]; - *pout = t >> 8; - ++pout; - bits -= t & 0xff; - - t = huffman_table[(val >> (bits - huffman_table_bits)) - & huffman_mask]; - *pout = t >> 8; - ++pout; - bits -= t & 0xff; - - t = huffman_table[(val >> (bits - huffman_table_bits)) - & huffman_mask]; - *pout = t >> 8; - ++pout; - bits -= t & 0xff; + if (unlikely (bits < (t & 0xff))) + { + elf_uncompress_failed (); + return 0; + } } + else + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; - while (bits > 11) - { - t = huffman_table[(val >> (bits - huffman_table_bits)) - & huffman_mask]; - *pout = t >> 8; - ++pout; - bits -= t & 0xff; - } + *plit = t >> 8; + ++plit; + bits -= t & 0xff; } - count -= pout - poutstart; + return 1; + } - if (!elf_fetch_bits_backward (&pback, pbackend, &val, &bits)) + { + uint32_t stream_size1, stream_size2, stream_size3, stream_size4; + uint32_t tot; + const unsigned char *pback1, *pback2, *pback3, *pback4; + const unsigned char *pbackend1, *pbackend2, *pbackend3, *pbackend4; + uint64_t val1, val2, val3, val4; + unsigned int bits1, bits2, bits3, bits4; + unsigned char *plit1, *plit2, *plit3, *plit4; + uint32_t regenerated_stream_size; + uint32_t regenerated_stream_size4; + uint16_t t1, t2, t3, t4; + uint32_t i; + uint32_t limit; + + /* Read jump table. */ + if (unlikely (pin + 5 >= pinend)) + { + elf_uncompress_failed (); return 0; - } + } + stream_size1 = (uint32_t)*pin | ((uint32_t)pin[1] << 8); + pin += 2; + stream_size2 = (uint32_t)*pin | ((uint32_t)pin[1] << 8); + pin += 2; + stream_size3 = (uint32_t)*pin | ((uint32_t)pin[1] << 8); + pin += 2; + tot = stream_size1 + stream_size2 + stream_size3; + if (unlikely (tot > total_streams_size - 6)) + { + elf_uncompress_failed (); + return 0; + } + stream_size4 = total_streams_size - 6 - tot; - for (i = 0; i < count; ++i) - { - uint16_t t; + pback1 = pin + stream_size1 - 1; + pbackend1 = pin; - if (unlikely (bits == 0)) - { - unsigned char stream_start; + pback2 = pback1 + stream_size2; + pbackend2 = pback1 + 1; - /* Advance to next stream. */ - if (unlikely (literals->stream_off[0] == 0)) - { - elf_uncompress_failed (); - return 0; - } + pback3 = pback2 + stream_size3; + pbackend3 = pback2 + 1; - pback = literals->plit + literals->stream_off[0]; - pbackend = pback; - pback += literals->stream_size[0]; + pback4 = pback3 + stream_size4; + pbackend4 = pback3 + 1; - /* Align to a 32-bit boundary. */ - val = 0; - bits = 0; - --pback; - stream_start = *pback; - if (unlikely (stream_start == 0)) - { - elf_uncompress_failed (); + if (!elf_fetch_backward_init (&pback1, pbackend1, &val1, &bits1)) + return 0; + if (!elf_fetch_backward_init (&pback2, pbackend2, &val2, &bits2)) + return 0; + if (!elf_fetch_backward_init (&pback3, pbackend3, &val3, &bits3)) + return 0; + if (!elf_fetch_backward_init (&pback4, pbackend4, &val4, &bits4)) + return 0; + + regenerated_stream_size = (regenerated_size + 3) / 4; + + plit1 = plit; + plit2 = plit1 + regenerated_stream_size; + plit3 = plit2 + regenerated_stream_size; + plit4 = plit3 + regenerated_stream_size; + + regenerated_stream_size4 = regenerated_size - regenerated_stream_size * 3; + + /* We can't get more than 64 literal bytes from a single call to + elf_fetch_bits_backward. The fourth stream can be up to 3 bytes less, + so use as the limit. */ + + limit = regenerated_stream_size4 <= 64 ? 0 : regenerated_stream_size4 - 64; + i = 0; + while (i < limit) + { + if (!elf_fetch_bits_backward (&pback1, pbackend1, &val1, &bits1)) + return 0; + if (!elf_fetch_bits_backward (&pback2, pbackend2, &val2, &bits2)) + return 0; + if (!elf_fetch_bits_backward (&pback3, pbackend3, &val3, &bits3)) + return 0; + if (!elf_fetch_bits_backward (&pback4, pbackend4, &val4, &bits4)) + return 0; + + /* We can't subtract more than 11 bits at a time. */ + + do + { + t1 = huffman_table[(val1 >> (bits1 - huffman_table_bits)) + & huffman_mask]; + t2 = huffman_table[(val2 >> (bits2 - huffman_table_bits)) + & huffman_mask]; + t3 = huffman_table[(val3 >> (bits3 - huffman_table_bits)) + & huffman_mask]; + t4 = huffman_table[(val4 >> (bits4 - huffman_table_bits)) + & huffman_mask]; + + *plit1 = t1 >> 8; + ++plit1; + bits1 -= t1 & 0xff; + + *plit2 = t2 >> 8; + ++plit2; + bits2 -= t2 & 0xff; + + *plit3 = t3 >> 8; + ++plit3; + bits3 -= t3 & 0xff; + + *plit4 = t4 >> 8; + ++plit4; + bits4 -= t4 & 0xff; + + ++i; + } + while (bits1 > 11 && bits2 > 11 && bits3 > 11 && bits4 > 11); + } + + while (i < regenerated_stream_size) + { + int use4; + + use4 = i < regenerated_stream_size4; + + if (!elf_fetch_bits_backward (&pback1, pbackend1, &val1, &bits1)) + return 0; + if (!elf_fetch_bits_backward (&pback2, pbackend2, &val2, &bits2)) + return 0; + if (!elf_fetch_bits_backward (&pback3, pbackend3, &val3, &bits3)) + return 0; + if (use4) + { + if (!elf_fetch_bits_backward (&pback4, pbackend4, &val4, &bits4)) return 0; - } - while ((((uintptr_t) pback) & 3) != 0) - { - val <<= 8; - val |= (uint64_t)*pback; - bits += 8; - --pback; - } - val <<= 8; - val |= (uint64_t)*pback; - bits += 8; + } - if (!elf_fetch_bits_backward (&pback, pbackend, &val, &bits)) - return 0; + if (unlikely (bits1 < huffman_table_bits)) + { + t1 = huffman_table[(val1 << (huffman_table_bits - bits1)) + & huffman_mask]; + if (unlikely (bits1 < (t1 & 0xff))) + { + elf_uncompress_failed (); + return 0; + } + } + else + t1 = huffman_table[(val1 >> (bits1 - huffman_table_bits)) + & huffman_mask]; - bits -= __builtin_clz (stream_start) - 24 + 1; + if (unlikely (bits2 < huffman_table_bits)) + { + t2 = huffman_table[(val2 << (huffman_table_bits - bits2)) + & huffman_mask]; + if (unlikely (bits2 < (t2 & 0xff))) + { + elf_uncompress_failed (); + return 0; + } + } + else + t2 = huffman_table[(val2 >> (bits2 - huffman_table_bits)) + & huffman_mask]; - literals->stream_off[0] = literals->stream_off[1]; - literals->stream_off[1] = literals->stream_off[2]; - literals->stream_off[2] = 0; - literals->stream_size[0] = literals->stream_size[1]; - literals->stream_size[1] = literals->stream_size[2]; - literals->stream_size[2] = 0; - } + if (unlikely (bits3 < huffman_table_bits)) + { + t3 = huffman_table[(val3 << (huffman_table_bits - bits3)) + & huffman_mask]; + if (unlikely (bits3 < (t3 & 0xff))) + { + elf_uncompress_failed (); + return 0; + } + } + else + t3 = huffman_table[(val3 >> (bits3 - huffman_table_bits)) + & huffman_mask]; - if (!elf_fetch_bits_backward (&pback, pbackend, &val, &bits)) - return 0; + if (use4) + { + if (unlikely (bits4 < huffman_table_bits)) + { + t4 = huffman_table[(val4 << (huffman_table_bits - bits4)) + & huffman_mask]; + if (unlikely (bits4 < (t4 & 0xff))) + { + elf_uncompress_failed (); + return 0; + } + } + else + t4 = huffman_table[(val4 >> (bits4 - huffman_table_bits)) + & huffman_mask]; + + *plit4 = t4 >> 8; + ++plit4; + bits4 -= t4 & 0xff; + } + + *plit1 = t1 >> 8; + ++plit1; + bits1 -= t1 & 0xff; + + *plit2 = t2 >> 8; + ++plit2; + bits2 -= t2 & 0xff; + + *plit3 = t3 >> 8; + ++plit3; + bits3 -= t3 & 0xff; + + ++i; + } + } + + return 1; +} + +/* The information used to decompress a sequence code, which can be a literal + length, an offset, or a match length. */ + +struct elf_zstd_seq_decode +{ + const struct elf_zstd_fse_baseline_entry *table; + int table_bits; +}; + +/* Unpack a sequence code compression mode. */ - if (unlikely (bits < huffman_table_bits)) +static int +elf_zstd_unpack_seq_decode (int mode, + const unsigned char **ppin, + const unsigned char *pinend, + const struct elf_zstd_fse_baseline_entry *predef, + int predef_bits, + uint16_t *scratch, + int maxidx, + struct elf_zstd_fse_baseline_entry *table, + int table_bits, + int (*conv)(const struct elf_zstd_fse_entry *, + int, + struct elf_zstd_fse_baseline_entry *), + struct elf_zstd_seq_decode *decode) +{ + switch (mode) + { + case 0: + decode->table = predef; + decode->table_bits = predef_bits; + break; + + case 1: + { + struct elf_zstd_fse_entry entry; + + if (unlikely (*ppin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + entry.symbol = **ppin; + ++*ppin; + entry.bits = 0; + entry.base = 0; + decode->table_bits = 0; + if (!conv (&entry, 0, table)) + return 0; + } + break; + + case 2: + { + struct elf_zstd_fse_entry *fse_table; + + /* We use the same space for the simple FSE table and the baseline + table. */ + fse_table = (struct elf_zstd_fse_entry *)table; + decode->table_bits = table_bits; + if (!elf_zstd_read_fse (ppin, pinend, scratch, maxidx, fse_table, + &decode->table_bits)) + return 0; + if (!conv (fse_table, decode->table_bits, table)) + return 0; + decode->table = table; + } + break; + + case 3: + if (unlikely (decode->table_bits == -1)) { - t = huffman_table[(val << (huffman_table_bits - bits)) - & huffman_mask]; - if (unlikely (bits < (t & 0xff))) - { - elf_uncompress_failed (); - return 0; - } + elf_uncompress_failed (); + return 0; } - else - t = huffman_table[(val >> (bits - huffman_table_bits)) & huffman_mask]; - - *pout = t >> 8; - ++pout; + break; - bits -= t & 0xff; + default: + elf_uncompress_failed (); + return 0; } - literals->pback = pback; - literals->pbackend = pbackend; - literals->val = val; - literals->bits = bits; - return 1; } @@ -4250,16 +4558,9 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, case 2: { const unsigned char *pblockend; - struct elf_zstd_literals literals; - unsigned char lit_hdr; - uint32_t lit_section_content; - uint32_t lit_compressed_size; - uint32_t lit_total_streams_size; - const unsigned char *plitend; - unsigned char *plitexp; - size_t litexp_count; - int lit_streams; - uint32_t stream_size_1; + unsigned char *plitstack; + unsigned char *plit; + uint32_t literal_count; unsigned char seq_hdr; size_t seq_count; size_t seq; @@ -4269,7 +4570,6 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, unsigned int literal_state; unsigned int offset_state; unsigned int match_state; - unsigned char stream_start; /* Compressed_Block */ if (unlikely ((size_t) block_size > (size_t) (pinend - pin))) @@ -4280,248 +4580,16 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, pblockend = pin + block_size; - if (unlikely (pin >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - lit_hdr = *pin; - ++pin; - - if ((lit_hdr & 3) == 0 || (lit_hdr & 3) == 1) - { - if ((lit_hdr & 3) == 0) - literals.type = ZSTD_LIT_RAW; - else - literals.type = ZSTD_LIT_RLE; - - /* Raw_literals_Block or RLE_Literals_Block */ - switch ((lit_hdr >> 2) & 3) - { - case 0: case 2: - literals.regenerated_size = lit_hdr >> 3; - break; - case 1: - if (unlikely (pin >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - literals.regenerated_size = (lit_hdr >> 4) + ((*pin) << 4); - pin++; - break; - case 3: - if (unlikely (pin + 1 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - literals.regenerated_size = ((lit_hdr >> 4) - + (*pin << 4) - + (pin[1] << 12)); - pin += 2; - break; - default: - elf_uncompress_failed (); - return 0; - } - if (literals.type == ZSTD_LIT_RAW) - lit_section_content = literals.regenerated_size; - else - lit_section_content = 1; - lit_compressed_size = 0; - lit_streams = 1; - } - else - { - /* Compressed_Literals_Block or Treeless_Literals_Block */ - literals.type = ZSTD_LIT_HUFF; - switch ((lit_hdr >> 2) & 3) - { - case 0: case 1: - if (unlikely (pin + 1 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - literals.regenerated_size = ((lit_hdr >> 4) - | ((*pin & 0x3f) << 4)); - lit_compressed_size = ((*pin >> 6) - | (pin[1] << 2)); - pin += 2; - lit_streams = ((lit_hdr >> 2) & 3) == 0 ? 1 : 4; - break; - case 2: - if (unlikely (pin + 2 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - literals.regenerated_size = ((lit_hdr >> 4) - | (*pin << 4) - | ((pin[1] & 3) << 12)); - lit_compressed_size = ((pin[1] >> 2) - | (pin[2] << 6)); - pin += 3; - lit_streams = 4; - break; - case 3: - if (unlikely (pin + 3 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - literals.regenerated_size = ((lit_hdr >> 4) - | (*pin << 4) - | ((pin[1] & 0x3f) << 12)); - lit_compressed_size = ((pin[1] >> 6) - | (pin[2] << 2) - | (pin[3] << 10)); - pin += 4; - lit_streams = 4; - break; - default: - elf_uncompress_failed (); - return 0; - } - - lit_section_content = lit_compressed_size; - } - - if (unlikely ((size_t)lit_section_content > (size_t)(pinend - pin))) - { - elf_uncompress_failed (); - return 0; - } - plitend = pin + lit_section_content; - - lit_total_streams_size = lit_compressed_size; - if ((lit_hdr & 3) == 2) - { - /* Compressed_Literals_Block. Read Huffman tree. */ - - const unsigned char *ptable; - - ptable = pin; - if (!elf_zstd_read_huff (&ptable, pinend, scratch, - huffman_table, &huffman_table_bits)) - return 0; - literals.huffman_table = huffman_table; - literals.huffman_table_bits = huffman_table_bits; - - lit_total_streams_size -= ptable - pin; - pin = ptable; - } - else if ((lit_hdr & 3) == 3) - { - /* Treeless_Literals_Block. Reuse previous Huffman tree. */ - if (huffman_table_bits == 0) - { - elf_uncompress_failed (); - return 0; - } - literals.huffman_table = huffman_table; - literals.huffman_table_bits = huffman_table_bits; - } - else - { - literals.huffman_table = NULL; - literals.huffman_table_bits = 0; - } + /* Read the literals into the end of the output space, and leave + PLIT pointing at them. */ - if (lit_streams == 1) - { - stream_size_1 = block_size; - literals.stream_off[0] = 0; - literals.stream_off[1] = 0; - literals.stream_off[2] = 0; - literals.stream_size[0] = 0; - literals.stream_size[1] = 0; - literals.stream_size[2] = 0; - } - else - { - uint32_t tot; - - /* Read jump table. */ - if (unlikely (pin + 5 >= pinend)) - { - elf_uncompress_failed (); - return 0; - } - stream_size_1 = *pin | (pin[1] << 8); - pin += 2; - literals.stream_size[0] = *pin | (pin[1] << 8); - pin += 2; - literals.stream_size[1] = *pin | (pin[1] << 8); - pin += 2; - tot = (stream_size_1 - + literals.stream_size[0] - + literals.stream_size[1]); - if (unlikely (tot > lit_total_streams_size - 6)) - { - elf_uncompress_failed (); - return 0; - } - literals.stream_size[2] = lit_total_streams_size - 6 - tot; - - literals.stream_off[0] = stream_size_1; - literals.stream_off[1] = (literals.stream_off[0] - + literals.stream_size[0]); - literals.stream_off[2] = (literals.stream_off[1] - + literals.stream_size[1]); - } - - literals.plit = pin; - - if (literals.type == ZSTD_LIT_HUFF) - { - const unsigned char *plback; - - /* Set up the first huffman stream. */ - - literals.pbackend = literals.plit; - plback = literals.plit + stream_size_1; - literals.val = 0; - literals.bits = 0; - --plback; - stream_start = *plback; - if (unlikely (stream_start == 0)) - { - elf_uncompress_failed (); - return 0; - } - while ((((uintptr_t) plback) & 3) != 0) - { - literals.val <<= 8; - literals.val |= (uint64_t)*plback; - literals.bits += 8; - --plback; - } - literals.val <<= 8; - literals.val |= (uint64_t)*plback; - literals.bits += 8; - - if (!elf_fetch_bits_backward (&plback, literals.pbackend, - &literals.val, &literals.bits)) - return 0; - - literals.bits -= __builtin_clz (stream_start) - 24 + 1; - - literals.pback = plback; - } - else - { - literals.val = 0; - literals.bits = 0; - literals.pback = NULL; - literals.pbackend = NULL; - } - - /* We have read all the literal header information. The literal - data starts at LITERALS.PLIT. Skip ahead to the sequences. */ - - pin = plitend; + if (!elf_zstd_read_literals (&pin, pblockend, pout, poutend, + scratch, huffman_table, + &huffman_table_bits, + &plitstack)) + return 0; + plit = plitstack; + literal_count = poutend - plit; seq_hdr = *pin; pin++; @@ -4589,53 +4657,10 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, return 0; } - /* Expand 2048 bytes of literals. The expanded literals are - recorded in PLITEXP and LITEXP_COUNT. */ - - if (literals.type != ZSTD_LIT_HUFF - || literals.regenerated_size == 0) - { - plitexp = NULL; - litexp_count = 0; - } - else - { - plitexp = (unsigned char *)scratch; - litexp_count = ZSTD_TABLE_WORK_LIT_SIZE; - if (litexp_count > literals.regenerated_size) - litexp_count = literals.regenerated_size; - if (!elf_zstd_literal_output (&literals, litexp_count, - plitexp)) - return 0; - } - pback = pblockend - 1; - val = 0; - bits = 0; - stream_start = *pback; - if (unlikely (stream_start == 0)) - { - elf_uncompress_failed (); - return 0; - } - while ((((uintptr_t)pback) & 3) != 0) - { - val <<= 8; - val |= (uint64_t)*pback; - bits += 8; - --pback; - } - val <<= 8; - val |= (uint64_t)*pback; - bits += 8; - - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + if (!elf_fetch_backward_init (&pback, pin, &val, &bits)) return 0; - bits -= __builtin_clz (stream_start) - 24 + 1; - - if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) - return 0; bits -= literal_decode.table_bits; literal_state = ((val >> bits) & ((1U << literal_decode.table_bits) - 1)); @@ -4808,66 +4833,71 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, /* The next sequence is now in LITERAL, OFFSET, MATCH. */ - if (literal > 0) + /* Copy LITERAL bytes from the literals. */ + + if (unlikely ((size_t)(poutend - pout) < literal)) { - /* Copy LITERAL bytes from the literals. */ + elf_uncompress_failed (); + return 0; + } - if (unlikely ((size_t)(poutend - pout) < literal)) - { - elf_uncompress_failed (); - return 0; - } + if (unlikely (literal_count < literal)) + { + elf_uncompress_failed (); + return 0; + } - if (literals.type != ZSTD_LIT_HUFF) - { - if (!elf_zstd_literal_output (&literals, literal, - pout)) - return 0; - pout += literal; - } - else if (literal <= litexp_count) - { - memcpy (pout, plitexp, literal); - plitexp += literal; - litexp_count -= literal; - pout += literal; - } - else - { - memcpy (pout, plitexp, litexp_count); - pout += litexp_count; - literal -= litexp_count; - litexp_count = 0; + literal_count -= literal; - if (unlikely (literal >= ZSTD_TABLE_WORK_LIT_SIZE)) - { - if (!elf_zstd_literal_output (&literals, literal, - pout)) - return 0; - pout += literal; - literal = 0; - } + /* Often LITERAL is small, so handle small cases quickly. */ + switch (literal) + { + case 8: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 7: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 6: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 5: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 4: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 3: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 2: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 1: + *pout++ = *plit++; + break; - plitexp = (unsigned char *)scratch; - litexp_count = ZSTD_TABLE_WORK_LIT_SIZE; - if (litexp_count > literals.regenerated_size) - litexp_count = literals.regenerated_size; - if (!elf_zstd_literal_output (&literals, - litexp_count, - plitexp)) - return 0; + case 0: + break; - if (unlikely (literal > litexp_count)) + default: + if (unlikely ((size_t)(plit - pout) < literal)) + { + uint32_t move; + + move = plit - pout; + while (literal > move) { - elf_uncompress_failed (); - return 0; + memcpy (pout, plit, move); + pout += move; + plit += move; + literal -= move; } - - memcpy (pout, plitexp, literal); - plitexp += literal; - litexp_count -= literal; - pout += literal; } + + memcpy (pout, plit, literal); + pout += literal; + plit += literal; } if (match > 0) @@ -4907,34 +4937,35 @@ elf_zstd_decompress (const unsigned char *pin, size_t sin, if (unlikely (seq >= seq_count)) { - size_t copy; - /* Copy remaining literals. */ - if (litexp_count > 0) + if (literal_count > 0 && plit != pout) { - if (unlikely ((size_t)(poutend - pout) < litexp_count)) + if (unlikely ((size_t)(poutend - pout) + < literal_count)) { elf_uncompress_failed (); return 0; } - memcpy (pout, plitexp, litexp_count); - pout += litexp_count; - } - copy = literals.regenerated_size; - if (copy > 0) - { - if (unlikely ((size_t)(poutend - pout) < copy)) + + if ((size_t)(plit - pout) < literal_count) { - elf_uncompress_failed (); - return 0; + uint32_t move; + + move = plit - pout; + while (literal_count > move) + { + memcpy (pout, plit, move); + pout += move; + plit += move; + literal_count -= move; + } } - if (!elf_zstd_literal_output (&literals, copy, pout)) - return 0; - - pout += copy; + memcpy (pout, plit, literal_count); } + pout += literal_count; + break; } } ^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2022-12-17 2:49 UTC | newest] Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2022-12-08 0:22 Add zstd support to libbacktrace Ian Lance Taylor 2022-12-10 1:47 ` Ian Lance Taylor 2022-12-17 2:48 ` Ian Lance Taylor
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).