public inbox for elfutils@sourceware.org
 help / color / mirror / Atom feed
From: Mark Wielaard <mark@klomp.org>
To: Aleksei Vetrov <vvvvvv@google.com>
Cc: elfutils-devel@sourceware.org, kernel-team@android.com,
	maennich@google.com
Subject: Re: [PATCH] libdw: check memory access in get_(u|s)leb128
Date: Tue, 07 Feb 2023 18:17:40 +0100	[thread overview]
Message-ID: <7485818501031d589569f23e014b98df4e68cf74.camel@klomp.org> (raw)
In-Reply-To: <CAH2MdgrbaGBYuWf+-FAN4odx2n_1rndoKm8-wBVUeQN84RcTYA@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1667 bytes --]

Hi Aleksei,

On Tue, 2023-02-07 at 16:17 +0000, Aleksei Vetrov wrote:
> > Did you actually find situations where these functions were called
> > with addrp
> > > = endp?
> 
> Yes, for example libdw/libdw_form.c:91:7.
> 

Urgh. There are actually 3 places in that function that need a guard.
Then I started reviewing all the library code and came up with more
than 10 other places that had a guard missing (see attached). I'll
clean this up and also audit elflint.c and readelf.c and submit a
proper patch for that.

> > It turns out that get_[su]leb128 dominates some operations and really does
> > have to be as fast as possible. So I do like to know what the impact is of
> > this change.
> 
> This patch just moves __libdw_max_len_uleb128 to the beginning of the function
> and adds only one new if. So hopefully it shouldn't affect performance at all.

Yeah. At first I thought it might be unnecessary because we already
have this check in the calling code. But given that I quickly found 10+
places were that wasn't true I don't feel very confident about that
now...

Then I thought maybe we can just remove the calling code checks and
simply always check like you are proposing. But the checks are slightly
different. The caller guards signal bad data errors, while yours are
hardening checks that make sure no invalid data is read. It is better
to have both. Even if it might slow down the code.

I'll finish the audit of readelf.c and elflint.c and see if adding both
those checks and your hardening checks don't pessimize the code too
much. Hopefully it won't and we can just add all the extra checks.

Cheers,

Mark

[-- Attachment #2: get-leb128-guard.patch --]
[-- Type: text/x-patch, Size: 5655 bytes --]

diff --git a/libdw/cfi.c b/libdw/cfi.c
index 6d08ca90..a7174405 100644
--- a/libdw/cfi.c
+++ b/libdw/cfi.c
@@ -239,6 +239,7 @@ execute_cfi (Dwarf_CFI *cache,
 
 	case DW_CFA_offset_extended_sf:
 	  get_uleb128 (operand, program, end);
+	  cfi_assert (program < end);
 	  get_sleb128 (sf_offset, program, end);
 	offset_extended_sf:
 	  offset = sf_offset * cie->data_alignment_factor;
@@ -294,6 +295,7 @@ execute_cfi (Dwarf_CFI *cache,
 	  get_uleb128 (regno, program, end);
 	  /* DW_FORM_block is a ULEB128 length followed by that many bytes.  */
 	  offset = program - (const uint8_t *) cache->data->d.d_buf;
+	  cfi_assert (program < end);
 	  get_uleb128 (operand, program, end);
 	  cfi_assert (operand <= (Dwarf_Word) (end - program));
 	  program += operand;
diff --git a/libdw/dwarf_child.c b/libdw/dwarf_child.c
index c8c8bb61..96a0d608 100644
--- a/libdw/dwarf_child.c
+++ b/libdw/dwarf_child.c
@@ -73,10 +73,13 @@ __libdw_find_attr (Dwarf_Die *die, unsigned int search_name,
 
       if (attr_form == DW_FORM_indirect)
 	{
+	  if (readp >= endp)
+	    goto invalid;
 	  get_uleb128 (attr_form, readp, endp);
 	  if (attr_form == DW_FORM_indirect ||
 	      attr_form == DW_FORM_implicit_const)
 	    {
+	    invalid:
 	      __libdw_seterrno (DWARF_E_INVALID_DWARF);
 	      return NULL;
 	    }
diff --git a/libdw/dwarf_frame_register.c b/libdw/dwarf_frame_register.c
index bcf3fa03..a6b7c4c1 100644
--- a/libdw/dwarf_frame_register.c
+++ b/libdw/dwarf_frame_register.c
@@ -100,6 +100,11 @@ dwarf_frame_register (Dwarf_Frame *fs, int regno, Dwarf_Op ops_mem[3],
 	const uint8_t *p = fs->cache->data->d.d_buf + reg->value;
 	const uint8_t *end = (fs->cache->data->d.d_buf
 			      + fs->cache->data->d.d_size);
+	if (p >= end)
+	  {
+	    __libdw_seterrno (DWARF_E_INVALID_DWARF);
+	    return -1;
+	  }
 	get_uleb128 (block.length, p, end);
 	block.data = (void *) p;
 
diff --git a/libdw/dwarf_getabbrev.c b/libdw/dwarf_getabbrev.c
index 13bee493..5b02333f 100644
--- a/libdw/dwarf_getabbrev.c
+++ b/libdw/dwarf_getabbrev.c
@@ -77,6 +77,7 @@ __libdw_getabbrev (Dwarf *dbg, struct Dwarf_CU *cu, Dwarf_Off offset,
 			      + dbg->sectiondata[IDX_debug_abbrev]->d_size);
   const unsigned char *start_abbrevp = abbrevp;
   unsigned int code;
+  // We start off with abbrevp at offset, which is checked above.
   get_uleb128 (code, abbrevp, end);
 
   /* Check whether this code is already in the hash table.  */
diff --git a/libdw/dwarf_getlocation.c b/libdw/dwarf_getlocation.c
index d0d78163..4e8c047b 100644
--- a/libdw/dwarf_getlocation.c
+++ b/libdw/dwarf_getlocation.c
@@ -545,7 +545,7 @@ __libdw_intern_expression (Dwarf *dbg, bool other_byte_order,
 	case DW_OP_deref_type:
 	case DW_OP_GNU_deref_type:
 	case DW_OP_xderef_type:
-	  if (unlikely (data + 1 >= end_data))
+	  if (unlikely (data + 2 >= end_data))
 	    goto invalid;
 	  newloc->number = *data++;
 	  get_uleb128 (newloc->number2, data, end_data);
diff --git a/libdw/dwarf_getsrclines.c b/libdw/dwarf_getsrclines.c
index 2c1d7a40..df003c5f 100644
--- a/libdw/dwarf_getsrclines.c
+++ b/libdw/dwarf_getsrclines.c
@@ -572,6 +572,8 @@ read_srclines (Dwarf *dbg,
 	goto invalid_data;
 
       size_t nfiles;
+      if ((size_t) (lineendp - linep) < 1)
+	goto invalid_data;
       get_uleb128 (nfiles, linep, lineendp);
 
       if (nforms == 0 && nfiles != 0)
diff --git a/libdw/encoded-value.h b/libdw/encoded-value.h
index d4e01924..4566ef96 100644
--- a/libdw/encoded-value.h
+++ b/libdw/encoded-value.h
@@ -196,10 +196,14 @@ read_encoded_value (const Dwarf_CFI *cache, uint8_t encoding,
       break;
 
     case DW_EH_PE_uleb128:
+      if (*p >= endp)
+	goto invalid_data;
       get_uleb128 (value, *p, endp);
       break;
 
     case DW_EH_PE_sleb128:
+      if (*p >= endp)
+	goto invalid_data;
       get_sleb128 (value, *p, endp);
       break;
 
diff --git a/libdw/fde.c b/libdw/fde.c
index f5f6fbe1..73d551b6 100644
--- a/libdw/fde.c
+++ b/libdw/fde.c
@@ -104,9 +104,12 @@ intern_fde (Dwarf_CFI *cache, const Dwarf_FDE *entry)
       /* The CIE augmentation says the FDE has a DW_FORM_block
 	 before its actual instruction stream.  */
       Dwarf_Word len;
+      if (fde->instructions >= fde->instructions_end)
+	goto invalid;
       get_uleb128 (len, fde->instructions, fde->instructions_end);
       if ((Dwarf_Word) (fde->instructions_end - fde->instructions) < len)
 	{
+	invalid:
 	  free (fde);
 	  __libdw_seterrno (DWARF_E_INVALID_DWARF);
 	  return NULL;
diff --git a/libdw/libdw_form.c b/libdw/libdw_form.c
index c83dfb39..40045440 100644
--- a/libdw/libdw_form.c
+++ b/libdw/libdw_form.c
@@ -88,6 +88,8 @@ __libdw_form_val_compute_len (struct Dwarf_CU *cu, unsigned int form,
 
     case DW_FORM_block:
     case DW_FORM_exprloc:
+      if (valp >= endp)
+       goto invalid;
       get_uleb128 (u128, valp, endp);
       result = u128 + (valp - startp);
       break;
@@ -111,6 +113,8 @@ __libdw_form_val_compute_len (struct Dwarf_CU *cu, unsigned int form,
     case DW_FORM_strx:
     case DW_FORM_GNU_addr_index:
     case DW_FORM_GNU_str_index:
+      if (valp >= endp)
+       goto invalid;
       get_uleb128 (u128, valp, endp);
       result = valp - startp;
       break;
@@ -119,6 +123,8 @@ __libdw_form_val_compute_len (struct Dwarf_CU *cu, unsigned int form,
       /* The amount of data to skip in the DIE is the size of the actual
 	 FORM data (which is __libdw_form_val_len) plus the size of the
 	 uleb128 encoding that FORM (which is valp - startp).  */
+      if (valp >= endp)
+	goto invalid;
       get_uleb128 (u128, valp, endp);
       if (*valp == DW_FORM_indirect || *valp == DW_FORM_implicit_const)
 	return (size_t) -1;

  reply	other threads:[~2023-02-07 17:17 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-25 16:05 vvvvvv
2023-01-26 21:05 ` Mark Wielaard
2023-02-07 16:17   ` Aleksei Vetrov
2023-02-07 17:17     ` Mark Wielaard [this message]
2023-02-11 23:42 ` Mark Wielaard
2023-02-13 20:03   ` Aleksei Vetrov
2023-02-13 20:10     ` [PATCH v2] " vvvvvv
2023-02-14 15:44       ` Mark Wielaard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7485818501031d589569f23e014b98df4e68cf74.camel@klomp.org \
    --to=mark@klomp.org \
    --cc=elfutils-devel@sourceware.org \
    --cc=kernel-team@android.com \
    --cc=maennich@google.com \
    --cc=vvvvvv@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).