* Re: [PATCH v2] Fix leb128 reading
2020-10-28 23:26 [PATCH v2] Fix leb128 reading Tom Tromey
@ 2020-10-29 23:06 ` Mark Wielaard
2020-10-30 14:31 ` Tom Tromey
0 siblings, 1 reply; 3+ messages in thread
From: Mark Wielaard @ 2020-10-29 23:06 UTC (permalink / raw)
To: Tom Tromey, elfutils-devel
[-- Attachment #1: Type: text/plain, Size: 12624 bytes --]
Hi Tom,
On Wed, 2020-10-28 at 17:26 -0600, Tom Tromey wrote:
> PR 26773 points out that some sleb128 values are decoded incorrectly.
>
> This version of the fix only examines the sleb128 conversion.
> Overlong encodings are not handled, and the uleb128 decoders are not
> touched. The approach taken here is to do the work in an unsigned
> type, and then rely on an implementation-defined cast to convert to
> signed.
I like this variant, it still handles longer than necessary encodings,
just not those that use more than 10 bytes. It also removes all the
tricky cases I had questions about. And keeps the parts I already acked
last time.
> Signed-off-by: Tom Tromey <tom@tromey.com>
> diff --git a/.gitignore b/.gitignore
> index c9790941..d737b14d 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -151,6 +151,7 @@ Makefile.in
> /tests/get-units-invalid
> /tests/get-units-split
> /tests/hash
> +/tests/leb128
> /tests/line2addr
> /tests/low_high_pc
> /tests/msg_tst
> diff --git a/ChangeLog b/ChangeLog
> index 72e8397c..4c8699f8 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,7 @@
> +2020-10-28 Tom Tromey <tom@tromey.com>
> +
> + * .gitignore: Add /tests/leb128.
> +
> 2020-10-01 Frank Ch. Eigler <fche@redhat.com>
Ack.
> diff --git a/libdw/ChangeLog b/libdw/ChangeLog
> index 731c7e79..289bb4c9 100644
> --- a/libdw/ChangeLog
> +++ b/libdw/ChangeLog
> @@ -1,3 +1,13 @@
> +2020-10-28 Tom Tromey <tom@tromey.com>
> +
> + PR26773
> + * dwarf_getlocation.c (store_implicit_value): Use
> + __libdw_get_uleb128_unchecked.
> + * memory-access.h (get_sleb128_step): Assume unsigned type for
> + 'var'.
> + (__libdw_get_sleb128, __libdw_get_sleb128_unchecked): Work in
> + unsigned type. Handle final byte.
> +
> 2020-10-19 Mark Wielaard <mark@klomp.org>
>
> * dwarf_frame_register.c (dwarf_frame_register): Declare ops_mem
> diff --git a/libdw/dwarf_getlocation.c b/libdw/dwarf_getlocation.c
> index 4617f9e9..f2bad5a9 100644
> --- a/libdw/dwarf_getlocation.c
> +++ b/libdw/dwarf_getlocation.c
> @@ -130,9 +130,8 @@ store_implicit_value (Dwarf *dbg, void **cache, Dwarf_Op *op)
> struct loc_block_s *block = libdw_alloc (dbg, struct loc_block_s,
> sizeof (struct loc_block_s), 1);
> const unsigned char *data = (const unsigned char *) (uintptr_t) op->number2;
> - uint64_t len = __libdw_get_uleb128 (&data, data + len_leb128 (Dwarf_Word));
> - if (unlikely (len != op->number))
> - return -1;
> + /* Skip the block length. */
> + __libdw_get_uleb128_unchecked (&data);
> block->addr = op;
> block->data = (unsigned char *) data;
> block->length = op->number;
Ack.
> diff --git a/libdw/memory-access.h b/libdw/memory-access.h
> index 14436a71..8b2386ee 100644
> --- a/libdw/memory-access.h
> +++ b/libdw/memory-access.h
> @@ -113,19 +113,22 @@ __libdw_get_uleb128_unchecked (const unsigned char **addrp)
> #define get_sleb128_step(var, addr, nth) \
> do { \
> unsigned char __b = *(addr)++; \
> + (var) |= (typeof (var)) (__b & 0x7f) << ((nth) * 7); \
> if (likely ((__b & 0x80) == 0)) \
> { \
> - struct { signed int i:7; } __s = { .i = __b }; \
> - (var) |= (typeof (var)) __s.i * ((typeof (var)) 1 << ((nth) * 7)); \
> + if ((__b & 0x40) != 0) \
> + (var) |= - ((typeof (var)) 1 << (((nth) + 1) * 7)); \
> return (var); \
> } \
> - (var) |= (typeof (var)) (__b & 0x7f) << ((nth) * 7); \
> } while (0)
Here it should be noted that var is already the unsigned acc from
__libdw_get_sleb, so the left shift is always fine. The value
extraction is now the same for both the continuation path and the last
byte path.
> static inline int64_t
> __libdw_get_sleb128 (const unsigned char **addrp, const unsigned char *end)
> {
> - int64_t acc = 0;
> + /* Do the work in an unsigned type, but use implementation-defined
> + behavior to cast to signed on return. This avoids some undefined
> + behavior when shifting. */
> + uint64_t acc = 0;
>
> /* Unroll the first step to help the compiler optimize
> for the common single-byte case. */
> @@ -134,6 +137,20 @@ __libdw_get_sleb128 (const unsigned char **addrp, const unsigned char *end)
> const size_t max = __libdw_max_len_sleb128 (*addrp - 1, end);
> for (size_t i = 1; i < max; ++i)
> get_sleb128_step (acc, *addrp, i);
> + if (*addrp == end)
> + return INT64_MAX;
OK, we keep the __libdw_max_len_sleb128 as is, so it is one smaller
than the actual max number of bytes, but reaching the end of data is
still overflow.
> + /* There might be one extra byte. */
> + unsigned char b = **addrp;
> + ++*addrp;
> + if (likely ((b & 0x80) == 0))
> + {
> + /* We only need the low bit of the final byte, and as it is the
> + sign bit, we don't need to do anything else here. */
> + acc |= ((typeof (acc)) b) << 7 * max;
> + return acc;
> + }
> +
> /* Other implementations set VALUE to INT_MAX in this
> case. So we better do this as well. */
> return INT64_MAX;
Nice and simple.
> @@ -142,7 +159,10 @@ __libdw_get_sleb128 (const unsigned char **addrp, const unsigned char *end)
> static inline int64_t
> __libdw_get_sleb128_unchecked (const unsigned char **addrp)
> {
> - int64_t acc = 0;
> + /* Do the work in an unsigned type, but use implementation-defined
> + behavior to cast to signed on return. This avoids some undefined
> + behavior when shifting. */
> + uint64_t acc = 0;
>
> /* Unroll the first step to help the compiler optimize
> for the common single-byte case. */
> @@ -152,6 +172,18 @@ __libdw_get_sleb128_unchecked (const unsigned char **addrp)
> const size_t max = len_leb128 (int64_t) - 1;
> for (size_t i = 1; i < max; ++i)
> get_sleb128_step (acc, *addrp, i);
Same as above, we just substract one here when using len_leb128 instead
of using __libdw_max_len_sleb128.
> + /* There might be one extra byte. */
> + unsigned char b = **addrp;
> + ++*addrp;
> + if (likely ((b & 0x80) == 0))
> + {
> + /* We only need the low bit of the final byte, and as it is the
> + sign bit, we don't need to do anything else here. */
> + acc |= ((typeof (acc)) b) << 7 * max;
> + return acc;
> + }
OK, as above.
> /* Other implementations set VALUE to INT_MAX in this
> case. So we better do this as well. */
> return INT64_MAX;
> diff --git a/tests/ChangeLog b/tests/ChangeLog
> index e9d1e260..91aeadaf 100644
> --- a/tests/ChangeLog
> +++ b/tests/ChangeLog
> @@ -1,3 +1,10 @@
> +2020-10-28 Tom Tromey <tom@tromey.com>
> +
> + PR26773
> + * Makefile.am (check_PROGRAMS, TESTS): Add leb128.
> + (leb128_LDADD): New variable.
> + * leb128.c: New file.
> +
> 2020-10-19 Mark Wielaard <mark@klomp.org>
>
> * addrcfi.c (print_register): Make ops_mem 3 elements.
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index bc5d034f..1b51ab8d 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -63,7 +63,7 @@ check_PROGRAMS = arextract arsymtest newfile saridx scnnames sectiondump \
> all-dwarf-ranges unit-info next_cfi \
> elfcopy addsections xlate_notes elfrdwrnop \
> dwelf_elf_e_machine_string \
> - getphdrnum
> + getphdrnum leb128
>
> asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \
> asm-tst6 asm-tst7 asm-tst8 asm-tst9
> @@ -185,7 +185,8 @@ TESTS = run-arextract.sh run-arsymtest.sh run-ar.sh newfile test-nlist \
> run-elfclassify.sh run-elfclassify-self.sh \
> run-disasm-riscv64.sh \
> run-pt_gnu_prop-tests.sh \
> - run-getphdrnum.sh run-test-includes.sh
> + run-getphdrnum.sh run-test-includes.sh \
> + leb128
>
> if !BIARCH
> export ELFUTILS_DISABLE_BIARCH = 1
> @@ -694,6 +695,7 @@ xlate_notes_LDADD = $(libelf)
> elfrdwrnop_LDADD = $(libelf)
> dwelf_elf_e_machine_string_LDADD = $(libelf) $(libdw)
> getphdrnum_LDADD = $(libelf) $(libdw)
> +leb128_LDADD = $(libelf) $(libdw)
ack
> # We want to test the libelf header against the system elf.h header.
> # Don't include any -I CPPFLAGS. Except when we install our own elf.h.
> diff --git a/tests/leb128.c b/tests/leb128.c
> new file mode 100644
> index 00000000..6994a436
> --- /dev/null
> +++ b/tests/leb128.c
> @@ -0,0 +1,140 @@
> +/* Test program for leb128
> + Copyright (C) 2020 Tom Tromey
> + This file is part of elfutils.
> +
> + This file is free software; you can redistribute it and/or modify
> + it under the terms of the GNU General Public License as published by
> + the Free Software Foundation; either version 3 of the License, or
> + (at your option) any later version.
> +
> + elfutils is distributed in the hope that it will be useful, but
> + WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + GNU General Public License for more details.
> +
> + You should have received a copy of the GNU General Public License
> + along with this program. If not, see <http://www.gnu.org/licenses/>. */
> +
> +#include <config.h>
> +#include <stddef.h>
> +#include <stdbool.h>
> +#include <libdw.h>
> +#include "../libdw/libdwP.h"
> +#include "../libdw/memory-access.h"
> +
> +#define OK 0
> +#define FAIL 1
> +
> +static const unsigned char v0[] = { 0x0 };
> +static const unsigned char v23[] = { 23 };
> +static const unsigned char vm_2[] = { 0x7e };
> +static const unsigned char v128[] = { 0x80, 0x01 };
> +static const unsigned char vm_128[] = { 0x80, 0x7f };
> +static const unsigned char vhuge[] =
> + {
> + 0xff, 0xff, 0xff, 0xff, 0xff,
> + 0xff, 0xff, 0xff, 0xff, 0x0
> + };
> +static const unsigned char most_positive[] =
> + {
> + 0xff, 0xff, 0xff, 0xff, 0xff,
> + 0xff, 0xff, 0xff, 0x3f
> + };
> +static const unsigned char most_negative[] =
> + {
> + 0x80, 0x80, 0x80, 0x80, 0x80,
> + 0x80, 0x80, 0x80, 0x40
> + };
> +static const unsigned char minus_one[] =
> + {
> + 0xff, 0xff, 0xff, 0xff, 0xff,
> + 0xff, 0xff, 0xff, 0x7f
> + };
> +
> +static int
> +test_one_sleb (const unsigned char *data, size_t len, int64_t expect)
> +{
> + int64_t value;
> + const unsigned char *p;
> +
> + p = data;
> + get_sleb128 (value, p, p + len);
> + if (value != expect || p != data + len)
> + return FAIL;
> +
> + p = data;
> + get_sleb128_unchecked (value, p);
> + if (value != expect || p != data + len)
> + return FAIL;
> +
> + return OK;
> +}
> +
> +static int
> +test_sleb (void)
> +{
> +#define TEST(ARRAY, V) \
> + if (test_one_sleb (ARRAY, sizeof (ARRAY), V) != OK) \
> + return FAIL;
> +
> + TEST (v0, 0);
> + TEST (v23, 23);
> + TEST (vm_2, -2);
> + TEST (v128, 128);
> + TEST (vm_128, -128);
> + TEST (vhuge, 9223372036854775807ll);
> + TEST (most_positive, 4611686018427387903ll);
> + TEST (most_negative, -4611686018427387904ll);
> + TEST (minus_one, -1);
> +
> +#undef TEST
> +
> + return OK;
> +}
> +
> +static int
> +test_one_uleb (const unsigned char *data, size_t len, uint64_t expect)
> +{
> + uint64_t value;
> + const unsigned char *p;
> +
> + p = data;
> + get_uleb128 (value, p, p + len);
> + if (value != expect || p != data + len)
> + return FAIL;
> +
> + p = data;
> + get_uleb128_unchecked (value, p);
> + if (value != expect || p != data + len)
> + return FAIL;
> +
> + return OK;
> +}
> +
> +static int
> +test_uleb (void)
> +{
> +#define TEST(ARRAY, V) \
> + if (test_one_uleb (ARRAY, sizeof (ARRAY), V) != OK) \
> + return FAIL;
> +
> + TEST (v0, 0);
> + TEST (v23, 23);
> + TEST (vm_2, 126);
> + TEST (v128, 128);
> + TEST (vm_128, 16256);
> + TEST (vhuge, 9223372036854775807ull);
> + TEST (most_positive, 4611686018427387903ull);
> + TEST (most_negative, 4611686018427387904ull);
> + TEST (minus_one, 9223372036854775807ull);
> +
> +#undef TEST
> +
> + return OK;
> +}
> +
> +int
> +main (void)
> +{
> + return test_sleb () || test_uleb ();
> +}
Looks good. While reviewing I did try out some extra corner cases that
I added (see attached). It also shows that some values can have
multiple representations. I added them to your commit before I pushed
it. Hope you don't mind.
Thanks,
Mark
[-- Attachment #2: extra-tests.diff --]
[-- Type: text/x-patch, Size: 2552 bytes --]
diff --git a/tests/leb128.c b/tests/leb128.c
index 6994a436..47b57c0d 100644
--- a/tests/leb128.c
+++ b/tests/leb128.c
@@ -18,6 +18,7 @@
#include <config.h>
#include <stddef.h>
#include <stdbool.h>
+#include <stdint.h>
#include <libdw.h>
#include "../libdw/libdwP.h"
#include "../libdw/memory-access.h"
@@ -26,10 +27,16 @@
#define FAIL 1
static const unsigned char v0[] = { 0x0 };
+static const unsigned char v1[] = { 0x1 };
static const unsigned char v23[] = { 23 };
+static const unsigned char vm_1[] = { 0x7f };
static const unsigned char vm_2[] = { 0x7e };
+static const unsigned char s127[] = { 0xff, 0x00 };
static const unsigned char v128[] = { 0x80, 0x01 };
+static const unsigned char v129[] = { 0x81, 0x01 };
+static const unsigned char vm_127[] = { 0x81, 0x7f };
static const unsigned char vm_128[] = { 0x80, 0x7f };
+static const unsigned char vm_129[] = { 0xff, 0x7e };
static const unsigned char vhuge[] =
{
0xff, 0xff, 0xff, 0xff, 0xff,
@@ -50,6 +57,16 @@ static const unsigned char minus_one[] =
0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0x7f
};
+static const unsigned char int64_max_m1[] =
+ {
+ 0xfe, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0x00
+ };
+static const unsigned char int64_min_p1[] =
+ {
+ 0x81, 0x80, 0x80, 0x80, 0x80,
+ 0x80, 0x80, 0x80, 0x80, 0x7f
+ };
static int
test_one_sleb (const unsigned char *data, size_t len, int64_t expect)
@@ -78,14 +95,22 @@ test_sleb (void)
return FAIL;
TEST (v0, 0);
+ TEST (v1, 1);
TEST (v23, 23);
+ TEST (vm_1, -1);
TEST (vm_2, -2);
+ TEST (s127, 127);
TEST (v128, 128);
+ TEST (v129, 129);
+ TEST (vm_127, -127);
TEST (vm_128, -128);
+ TEST (vm_129, -129);
TEST (vhuge, 9223372036854775807ll);
TEST (most_positive, 4611686018427387903ll);
TEST (most_negative, -4611686018427387904ll);
TEST (minus_one, -1);
+ TEST (int64_max_m1, INT64_MAX - 1);
+ TEST (int64_min_p1, INT64_MIN + 1);
#undef TEST
@@ -119,14 +144,22 @@ test_uleb (void)
return FAIL;
TEST (v0, 0);
+ TEST (v1, 1);
TEST (v23, 23);
+ TEST (vm_1, 127);
TEST (vm_2, 126);
+ TEST (s127, 127);
TEST (v128, 128);
+ TEST (v129, 129);
+ TEST (vm_127, 16257);
TEST (vm_128, 16256);
+ TEST (vm_129, 16255);
TEST (vhuge, 9223372036854775807ull);
TEST (most_positive, 4611686018427387903ull);
TEST (most_negative, 4611686018427387904ull);
TEST (minus_one, 9223372036854775807ull);
+ TEST (int64_max_m1, INT64_MAX - 1);
+ TEST (int64_min_p1, INT64_MIN + 1);
#undef TEST
^ permalink raw reply [flat|nested] 3+ messages in thread