public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* Protect mips_hi16_list from fuzzed debug info
@ 2023-02-06 12:33 Alan Modra
  2023-02-07 14:11 ` Maciej W. Rozycki
  0 siblings, 1 reply; 11+ messages in thread
From: Alan Modra @ 2023-02-06 12:33 UTC (permalink / raw)
  To: binutils; +Cc: Chenghua Xu, Maciej W. Rozycki

This is another fix for the testcase mentioned in
https://sourceware.org/pipermail/binutils/2023-February/125915.html
either of which will stop the addr2line segfault.  This one also fixes
a potential problem when linking corrupted debug info.

OK to apply?

	* elfxx-mips.c (struct mips_elf_obj_tdata): Add freeze_mips_hi16_list.
	(_bfd_mips_elf_hi16_reloc): Heed freeze_mips_hi16_list.
	(_bfd_mips_elf_lo16_reloc): Likewise.
	(find_nearest_line): Rename from _bfd_mips_elf_find_nearest_line
	and make static.
	(_bfd_mips_elf_find_nearest_line): New wrapper function setting
	freeze_mips_hi16_list around find_nearest_line.

diff --git a/bfd/elfxx-mips.c b/bfd/elfxx-mips.c
index e9fb61ff9e7..20934c477e2 100644
--- a/bfd/elfxx-mips.c
+++ b/bfd/elfxx-mips.c
@@ -599,6 +599,7 @@ struct mips_elf_obj_tdata
   asection *elf_text_section;
 
   struct mips_hi16 *mips_hi16_list;
+  bool freeze_mips_hi16_list;
 };
 
 /* Get MIPS ELF private object data from BFD's tdata.  */
@@ -2534,11 +2535,14 @@ _bfd_mips_elf_hi16_reloc (bfd *abfd, arelent *reloc_entry,
   if (reloc_entry->address > bfd_get_section_limit (abfd, input_section))
     return bfd_reloc_outofrange;
 
+  tdata = mips_elf_tdata (abfd);
+  if (tdata->freeze_mips_hi16_list)
+    return bfd_reloc_outofrange;
+
   n = bfd_malloc (sizeof *n);
   if (n == NULL)
     return bfd_reloc_outofrange;
 
-  tdata = mips_elf_tdata (abfd);
   n->next = tdata->mips_hi16_list;
   n->data = data;
   n->input_section = input_section;
@@ -2596,38 +2600,41 @@ _bfd_mips_elf_lo16_reloc (bfd *abfd, arelent *reloc_entry, asymbol *symbol,
 			       location);
 
   tdata = mips_elf_tdata (abfd);
-  while (tdata->mips_hi16_list != NULL)
-    {
-      bfd_reloc_status_type ret;
-      struct mips_hi16 *hi;
-
-      hi = tdata->mips_hi16_list;
-
-      /* R_MIPS*_GOT16 relocations are something of a special case.  We
-	 want to install the addend in the same way as for a R_MIPS*_HI16
-	 relocation (with a rightshift of 16).  However, since GOT16
-	 relocations can also be used with global symbols, their howto
-	 has a rightshift of 0.  */
-      if (hi->rel.howto->type == R_MIPS_GOT16)
-	hi->rel.howto = MIPS_ELF_RTYPE_TO_HOWTO (abfd, R_MIPS_HI16, false);
-      else if (hi->rel.howto->type == R_MIPS16_GOT16)
-	hi->rel.howto = MIPS_ELF_RTYPE_TO_HOWTO (abfd, R_MIPS16_HI16, false);
-      else if (hi->rel.howto->type == R_MICROMIPS_GOT16)
-	hi->rel.howto = MIPS_ELF_RTYPE_TO_HOWTO (abfd, R_MICROMIPS_HI16, false);
-
-      /* VALLO is a signed 16-bit number.  Bias it by 0x8000 so that any
-	 carry or borrow will induce a change of +1 or -1 in the high part.  */
-      hi->rel.addend += (vallo + 0x8000) & 0xffff;
-
-      ret = _bfd_mips_elf_generic_reloc (abfd, &hi->rel, symbol, hi->data,
-					 hi->input_section, output_bfd,
-					 error_message);
-      if (ret != bfd_reloc_ok)
-	return ret;
-
-      tdata->mips_hi16_list = hi->next;
-      free (hi);
-    }
+  if (!tdata->freeze_mips_hi16_list)
+    while (tdata->mips_hi16_list != NULL)
+      {
+	bfd_reloc_status_type ret;
+	struct mips_hi16 *hi;
+
+	hi = tdata->mips_hi16_list;
+
+	/* R_MIPS*_GOT16 relocations are something of a special case.
+	   We want to install the addend in the same way as for a
+	   R_MIPS*_HI16 relocation (with a rightshift of 16).
+	   However, since GOT16 relocations can also be used with
+	   global symbols, their howto has a rightshift of 0.  */
+	if (hi->rel.howto->type == R_MIPS_GOT16)
+	  hi->rel.howto = MIPS_ELF_RTYPE_TO_HOWTO (abfd, R_MIPS_HI16, false);
+	else if (hi->rel.howto->type == R_MIPS16_GOT16)
+	  hi->rel.howto = MIPS_ELF_RTYPE_TO_HOWTO (abfd, R_MIPS16_HI16, false);
+	else if (hi->rel.howto->type == R_MICROMIPS_GOT16)
+	  hi->rel.howto = MIPS_ELF_RTYPE_TO_HOWTO (abfd, R_MICROMIPS_HI16,
+						   false);
+
+	/* VALLO is a signed 16-bit number.  Bias it by 0x8000 so that
+	   any carry or borrow will induce a change of +1 or -1 in the
+	   high part.  */
+	hi->rel.addend += (vallo + 0x8000) & 0xffff;
+
+	ret = _bfd_mips_elf_generic_reloc (abfd, &hi->rel, symbol, hi->data,
+					   hi->input_section, output_bfd,
+					   error_message);
+	if (ret != bfd_reloc_ok)
+	  return ret;
+
+	tdata->mips_hi16_list = hi->next;
+	free (hi);
+      }
 
   return _bfd_mips_elf_generic_reloc (abfd, reloc_entry, symbol, data,
 				      input_section, output_bfd,
@@ -13118,13 +13125,11 @@ struct mips_elf_find_line
   struct ecoff_find_line i;
 };
 
-bool
-_bfd_mips_elf_find_nearest_line (bfd *abfd, asymbol **symbols,
-				 asection *section, bfd_vma offset,
-				 const char **filename_ptr,
-				 const char **functionname_ptr,
-				 unsigned int *line_ptr,
-				 unsigned int *discriminator_ptr)
+static bool
+find_nearest_line (bfd *abfd, asymbol **symbols,
+		   asection *section, bfd_vma offset,
+		   const char **filename_ptr, const char **functionname_ptr,
+		   unsigned int *line_ptr, unsigned int *discriminator_ptr)
 {
   asection *msec;
 
@@ -13228,6 +13233,25 @@ _bfd_mips_elf_find_nearest_line (bfd *abfd, asymbol **symbols,
 				     line_ptr, discriminator_ptr);
 }
 
+bool
+_bfd_mips_elf_find_nearest_line (bfd *abfd, asymbol **symbols,
+				 asection *section, bfd_vma offset,
+				 const char **filename_ptr,
+				 const char **functionname_ptr,
+				 unsigned int *line_ptr,
+				 unsigned int *discriminator_ptr)
+{
+  /* Debug info should not contain hi16 or lo16 relocs.  If it does
+     then someone is playing fuzzing games.  Altering the hi16 list
+     during linking when printing an error message is bad.  */
+  mips_elf_tdata (abfd)->freeze_mips_hi16_list = true;
+  bool ret = find_nearest_line (abfd, symbols, section, offset,
+				filename_ptr, functionname_ptr,
+				line_ptr, discriminator_ptr);
+  mips_elf_tdata (abfd)->freeze_mips_hi16_list = false;
+  return ret;
+}
+
 bool
 _bfd_mips_elf_find_inliner_info (bfd *abfd,
 				 const char **filename_ptr,
@@ -13321,16 +13345,19 @@ _bfd_elf_mips_get_relocated_section_contents
 	 mips_hi16_list that point into this section's data.  Data
 	 will typically be freed on return from this function.  */
       tdata = mips_elf_tdata (abfd);
-      hip = &tdata->mips_hi16_list;
-      while ((hi = *hip) != NULL)
+      if (!tdata->freeze_mips_hi16_list)
 	{
-	  if (hi->input_section == input_section)
+	  hip = &tdata->mips_hi16_list;
+	  while ((hi = *hip) != NULL)
 	    {
-	      *hip = hi->next;
-	      free (hi);
+	      if (hi->input_section == input_section)
+		{
+		  *hip = hi->next;
+		  free (hi);
+		}
+	      else
+		hip = &hi->next;
 	    }
-	  else
-	    hip = &hi->next;
 	}
       if (orig_data == NULL)
 	free (data);

-- 
Alan Modra
Australia Development Lab, IBM

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Protect mips_hi16_list from fuzzed debug info
  2023-02-06 12:33 Protect mips_hi16_list from fuzzed debug info Alan Modra
@ 2023-02-07 14:11 ` Maciej W. Rozycki
  2023-02-08  0:32   ` Alan Modra
  0 siblings, 1 reply; 11+ messages in thread
From: Maciej W. Rozycki @ 2023-02-07 14:11 UTC (permalink / raw)
  To: Alan Modra; +Cc: binutils, Chenghua Xu

On Mon, 6 Feb 2023, Alan Modra wrote:

> This is another fix for the testcase mentioned in
> https://sourceware.org/pipermail/binutils/2023-February/125915.html
> either of which will stop the addr2line segfault.  This one also fixes
> a potential problem when linking corrupted debug info.

 Hmm, from the other message I gather DWARF info is not going to be 
processed twice anymore, so why is this change to the MIPS backend also 
required?

 Also would it be possible to have a MIPS test case for your change?  
Orchestrating HI16/LO16 relocations in a debug section should be pretty 
straightforward with the use of the `.reloc' pseudo-op.  This might help 
me understand what is really going on here.

 Also one concern about code proposed itself, see below.

> @@ -2596,38 +2600,41 @@ _bfd_mips_elf_lo16_reloc (bfd *abfd, arelent *reloc_entry, asymbol *symbol,
[...]
> +  if (!tdata->freeze_mips_hi16_list)

 This conditional ought to wrap all the preceding code in the function as 
well (including the declaration block), because it's sole purpose is to 
retrieve `vallo', which is only used within the `while' loop now placed 
under the conditional...

> +    while (tdata->mips_hi16_list != NULL)
> +      {
> +	bfd_reloc_status_type ret;
> +	struct mips_hi16 *hi;
> +
> +	hi = tdata->mips_hi16_list;
> +
> +	/* R_MIPS*_GOT16 relocations are something of a special case.
> +	   We want to install the addend in the same way as for a
> +	   R_MIPS*_HI16 relocation (with a rightshift of 16).
> +	   However, since GOT16 relocations can also be used with
> +	   global symbols, their howto has a rightshift of 0.  */
> +	if (hi->rel.howto->type == R_MIPS_GOT16)
> +	  hi->rel.howto = MIPS_ELF_RTYPE_TO_HOWTO (abfd, R_MIPS_HI16, false);
> +	else if (hi->rel.howto->type == R_MIPS16_GOT16)
> +	  hi->rel.howto = MIPS_ELF_RTYPE_TO_HOWTO (abfd, R_MIPS16_HI16, false);
> +	else if (hi->rel.howto->type == R_MICROMIPS_GOT16)
> +	  hi->rel.howto = MIPS_ELF_RTYPE_TO_HOWTO (abfd, R_MICROMIPS_HI16,
> +						   false);
> +
> +	/* VALLO is a signed 16-bit number.  Bias it by 0x8000 so that
> +	   any carry or borrow will induce a change of +1 or -1 in the
> +	   high part.  */
> +	hi->rel.addend += (vallo + 0x8000) & 0xffff;

 ... here.

> +  /* Debug info should not contain hi16 or lo16 relocs.  If it does
> +     then someone is playing fuzzing games.  Altering the hi16 list
> +     during linking when printing an error message is bad.  */

 And I really cannot extract the meaning of the second sentence here.  I 
mean I know what it literally means, but that does not really tell me 
anything.  Why would altering the list be a problem given that we're 
bailing out anyway?  I'm confused.

  Maciej

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Protect mips_hi16_list from fuzzed debug info
  2023-02-07 14:11 ` Maciej W. Rozycki
@ 2023-02-08  0:32   ` Alan Modra
  2023-02-08 23:28     ` Maciej W. Rozycki
  0 siblings, 1 reply; 11+ messages in thread
From: Alan Modra @ 2023-02-08  0:32 UTC (permalink / raw)
  To: Maciej W. Rozycki; +Cc: binutils, Chenghua Xu

On Tue, Feb 07, 2023 at 02:11:23PM +0000, Maciej W. Rozycki wrote:
> On Mon, 6 Feb 2023, Alan Modra wrote:
> 
> > This is another fix for the testcase mentioned in
> > https://sourceware.org/pipermail/binutils/2023-February/125915.html
> > either of which will stop the addr2line segfault.  This one also fixes
> > a potential problem when linking corrupted debug info.
> 
>  Hmm, from the other message I gather DWARF info is not going to be 
> processed twice anymore, so why is this change to the MIPS backend also 
> required?

See below.

>  Also would it be possible to have a MIPS test case for your change?  
> Orchestrating HI16/LO16 relocations in a debug section should be pretty 
> straightforward with the use of the `.reloc' pseudo-op.  This might help 
> me understand what is really going on here.

I could do that, but my time is limited for mips problems.  I'll
understand if you say the patch is not worth committing just to cover
a potential fuzzed object file segfault.

>  Also one concern about code proposed itself, see below.
> 
> > @@ -2596,38 +2600,41 @@ _bfd_mips_elf_lo16_reloc (bfd *abfd, arelent *reloc_entry, asymbol *symbol,
> [...]
> > +  if (!tdata->freeze_mips_hi16_list)
> 
>  This conditional ought to wrap all the preceding code in the function as 
> well (including the declaration block), because it's sole purpose is to 
> retrieve `vallo', which is only used within the `while' loop now placed 
> under the conditional...

OK, done.  I'm presuming I don't need to repost the patch.

> > +  /* Debug info should not contain hi16 or lo16 relocs.  If it does
> > +     then someone is playing fuzzing games.  Altering the hi16 list
> > +     during linking when printing an error message is bad.  */

s/an error/a warning/

>  And I really cannot extract the meaning of the second sentence here.  I 
> mean I know what it literally means, but that does not really tell me 
> anything.  Why would altering the list be a problem given that we're 
> bailing out anyway?  I'm confused.

A number of the error/warning handlers in ldmain.c use %C.  This can
cause debug info to be parsed for the first time in order to print
file/function/line.  If one of those warnings is triggered after some
hi16 relocs have been processed but before the matching lo16 reloc is
handled, *and* the debug info is corrupted with a lo16 reloc, then the
mips_hi16_list will be flushed with the result that printing a warning
changes linker output.  It is also possible that corrupted debug info
adds to the hi16 list, with the result that when the linker handles a
later lo16 reloc in a text section, ld will segfault accessing
mips_hi16.data after the debug buffers have be freed.  Is this likely
to happen in the real world?  No, of course not, but fuzzers keep
finding this sort of thing, and the occasional real problem found by
the fuzzers is enough that I haven't yet decided to ignore all fuzzing
reports.

-- 
Alan Modra
Australia Development Lab, IBM

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Protect mips_hi16_list from fuzzed debug info
  2023-02-08  0:32   ` Alan Modra
@ 2023-02-08 23:28     ` Maciej W. Rozycki
  2023-02-09  0:29       ` Alan Modra
  0 siblings, 1 reply; 11+ messages in thread
From: Maciej W. Rozycki @ 2023-02-08 23:28 UTC (permalink / raw)
  To: Alan Modra; +Cc: binutils, Chenghua Xu

On Wed, 8 Feb 2023, Alan Modra wrote:

> >  Also would it be possible to have a MIPS test case for your change?  
> > Orchestrating HI16/LO16 relocations in a debug section should be pretty 
> > straightforward with the use of the `.reloc' pseudo-op.  This might help 
> > me understand what is really going on here.
> 
> I could do that, but my time is limited for mips problems.

 Completely understood (and mine BTW too).

>  I'll
> understand if you say the patch is not worth committing just to cover
> a potential fuzzed object file segfault.

 No, not at all, I agree we do need to maintain at least basic quality,
and preventing crashes from happening even for garbage input is about the 
minimum required.

> >  Also one concern about code proposed itself, see below.
> > 
> > > @@ -2596,38 +2600,41 @@ _bfd_mips_elf_lo16_reloc (bfd *abfd, arelent *reloc_entry, asymbol *symbol,
> > [...]
> > > +  if (!tdata->freeze_mips_hi16_list)
> > 
> >  This conditional ought to wrap all the preceding code in the function as 
> > well (including the declaration block), because it's sole purpose is to 
> > retrieve `vallo', which is only used within the `while' loop now placed 
> > under the conditional...
> 
> OK, done.  I'm presuming I don't need to repost the patch.

 Sure, no need to repost.

> > > +  /* Debug info should not contain hi16 or lo16 relocs.  If it does
> > > +     then someone is playing fuzzing games.  Altering the hi16 list
> > > +     during linking when printing an error message is bad.  */
> 
> s/an error/a warning/

 Thanks, that changes things a bit.

> >  And I really cannot extract the meaning of the second sentence here.  I 
> > mean I know what it literally means, but that does not really tell me 
> > anything.  Why would altering the list be a problem given that we're 
> > bailing out anyway?  I'm confused.
> 
> A number of the error/warning handlers in ldmain.c use %C.  This can
> cause debug info to be parsed for the first time in order to print
> file/function/line.

 Hmm, I find it an interesting general phenomenon.  What it means the 
order sections are processed in can change depending on whether a warning 
has been issued in the course or not.  Is it not a problem in the first 
place?  Shouldn't we give priority to debugs sections and parse them first 
then before moving on to the other sections?

>  If one of those warnings is triggered after some
> hi16 relocs have been processed but before the matching lo16 reloc is
> handled, *and* the debug info is corrupted with a lo16 reloc, then the
> mips_hi16_list will be flushed with the result that printing a warning
> changes linker output.

 OK, but wouldn't these relocs be resolved in the same problematic way 
anyway when the turn came to processing debug sections?

>  It is also possible that corrupted debug info
> adds to the hi16 list, with the result that when the linker handles a
> later lo16 reloc in a text section, ld will segfault accessing
> mips_hi16.data after the debug buffers have be freed.

 This smells a HI16/LO16 pair processing bug to me by itself.  Such pairs 
must come from the same relocation section, so any HI16/LO16 relocations 
in a relocation section associated with a debug section are not supposed 
to influence any such relocations referring to the text section.  I think 
I need to look into it (though see above as to my availability).

>  Is this likely
> to happen in the real world?  No, of course not, but fuzzers keep
> finding this sort of thing, and the occasional real problem found by
> the fuzzers is enough that I haven't yet decided to ignore all fuzzing
> reports.

 Well, we must not crash, period!

 I hope there's no hurry with this change, so please let me chew it over 
yet for a couple days.

 Long-term I think the MIPS target would benefit from proper day-to-day 
maintenance (and I can't just clone myself no matter how much I might 
desire).

  Maciej

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Protect mips_hi16_list from fuzzed debug info
  2023-02-08 23:28     ` Maciej W. Rozycki
@ 2023-02-09  0:29       ` Alan Modra
  2023-02-09  1:26         ` Maciej W. Rozycki
  0 siblings, 1 reply; 11+ messages in thread
From: Alan Modra @ 2023-02-09  0:29 UTC (permalink / raw)
  To: Maciej W. Rozycki; +Cc: binutils, Chenghua Xu

On Wed, Feb 08, 2023 at 11:28:08PM +0000, Maciej W. Rozycki wrote:
> On Wed, 8 Feb 2023, Alan Modra wrote:
> > A number of the error/warning handlers in ldmain.c use %C.  This can
> > cause debug info to be parsed for the first time in order to print
> > file/function/line.
> 
>  Hmm, I find it an interesting general phenomenon.  What it means the 
> order sections are processed in can change depending on whether a warning 
> has been issued in the course or not.  Is it not a problem in the first 
> place?  Shouldn't we give priority to debugs sections and parse them first 
> then before moving on to the other sections?

The normal linker processing of sections occurs as usual.  Parsing of
the debug info is separate to this, relocations being applied to
.debug_info by bfd_simple_get_relocated_section_contents for the
error/warning message.  That relocated copy of .debug_info is not used
by the linker to produce the output file .debug_info.

> >  If one of those warnings is triggered after some
> > hi16 relocs have been processed but before the matching lo16 reloc is
> > handled, *and* the debug info is corrupted with a lo16 reloc, then the
> > mips_hi16_list will be flushed with the result that printing a warning
> > changes linker output.
> 
>  OK, but wouldn't these relocs be resolved in the same problematic way 
> anyway when the turn came to processing debug sections?
> 
> >  It is also possible that corrupted debug info
> > adds to the hi16 list, with the result that when the linker handles a
> > later lo16 reloc in a text section, ld will segfault accessing
> > mips_hi16.data after the debug buffers have be freed.
> 
>  This smells a HI16/LO16 pair processing bug to me by itself.  Such pairs 
> must come from the same relocation section, so any HI16/LO16 relocations 
> in a relocation section associated with a debug section are not supposed 
> to influence any such relocations referring to the text section.  I think 
> I need to look into it (though see above as to my availability).

If it is really true that hi16/lo16 pairs are always in the same
section, then we wouldn't need freeze_mips_hi16_list.  Instead it
would be much better to attach the list to mips_elf_section_data.  I
wasn't sure enough to do that, given things like gcc's hot/cold
section partitioning, when I moved the mip_hi16_list to be per-bfd.

-- 
Alan Modra
Australia Development Lab, IBM

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Protect mips_hi16_list from fuzzed debug info
  2023-02-09  0:29       ` Alan Modra
@ 2023-02-09  1:26         ` Maciej W. Rozycki
  2023-02-09 10:14           ` Alan Modra
  0 siblings, 1 reply; 11+ messages in thread
From: Maciej W. Rozycki @ 2023-02-09  1:26 UTC (permalink / raw)
  To: Alan Modra; +Cc: binutils, Chenghua Xu

On Thu, 9 Feb 2023, Alan Modra wrote:

> >  Hmm, I find it an interesting general phenomenon.  What it means the 
> > order sections are processed in can change depending on whether a warning 
> > has been issued in the course or not.  Is it not a problem in the first 
> > place?  Shouldn't we give priority to debugs sections and parse them first 
> > then before moving on to the other sections?
> 
> The normal linker processing of sections occurs as usual.  Parsing of
> the debug info is separate to this, relocations being applied to
> .debug_info by bfd_simple_get_relocated_section_contents for the
> error/warning message.  That relocated copy of .debug_info is not used
> by the linker to produce the output file .debug_info.

 Ack, makes sense to me.

> >  This smells a HI16/LO16 pair processing bug to me by itself.  Such pairs 
> > must come from the same relocation section, so any HI16/LO16 relocations 
> > in a relocation section associated with a debug section are not supposed 
> > to influence any such relocations referring to the text section.  I think 
> > I need to look into it (though see above as to my availability).
> 
> If it is really true that hi16/lo16 pairs are always in the same
> section, then we wouldn't need freeze_mips_hi16_list.

 It is, by definition[1]:

"The AHL addend is a composite computed from the addends of two 
consecutive relocation entries.  Each relocation type of R_MIPS_HI16 must 
have an associated R_MIPS_LO16 entry immediately following it in the list 
of relocations.

"These relocation entries are always processed as a pair and both addend 
fields contribute to the AHL addend.  If AHI and ALO are the addends from 
the paired R_MIPS_HI16 and R_MIPS_LO16 entries, then the addend AHL is com 
puted as (AHI << 16) + (short)ALO.  R_MIPS_LO16 entries without an 
R_MIPS_HI16 entry immediately preceding are orphaned and the previously 
defined R_MIPS_HI16 is used for computing the addend."

Two consecutive entries must come from a single .rel.* section or they 
wouldn't be consecutive (there's no relationship between different .rel.* 
sections).

 While not explicitly stated I don't think anyone considered reusing an 
R_MIPS_HI16 reloc defined in another relocation section for an orphaned 
R_MIPS_LO16 reloc.  That would IMO make no semantic sense (for instance 
you can shuffle sections via a linker script in a relocatable link) and I 
think we ought not strive for doing so (i.e. there is not supposed to be 
any "previously defined R_MIPS_HI16" at the beginning of any given reloc 
section).

 Of course this consideration applies to the REL format only (i.e. o32); 
there's no need to track HI16/LO16 pairing with RELA objects as the addend 
is readily available.  AFAICT we don't get this right either: for 
`!partial_inplace' howtos we need not use `_bfd_mips_elf_hi16_reloc' or 
`_bfd_mips_elf_lo16_reloc' and just `_bfd_mips_elf_generic_reloc' will do.

>  Instead it
> would be much better to attach the list to mips_elf_section_data.  I
> wasn't sure enough to do that, given things like gcc's hot/cold
> section partitioning, when I moved the mip_hi16_list to be per-bfd.

 That sounds like a plan to me, but it's unrealistic for me to commit to 
it in the next two days (and then I head out to California for a week).

References:

[1] "SYSTEM V APPLICATION BINARY INTERFACE, MIPS RISC Processor
    Supplement, 3rd Edition", Section "Relocation", pp. 4-17, 4-18

  Maciej

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Protect mips_hi16_list from fuzzed debug info
  2023-02-09  1:26         ` Maciej W. Rozycki
@ 2023-02-09 10:14           ` Alan Modra
  2023-02-10 18:13             ` Maciej W. Rozycki
  2023-05-20 11:41             ` Alan Modra
  0 siblings, 2 replies; 11+ messages in thread
From: Alan Modra @ 2023-02-09 10:14 UTC (permalink / raw)
  To: Maciej W. Rozycki; +Cc: binutils, Chenghua Xu

[-- Attachment #1: Type: text/plain, Size: 2875 bytes --]

On Thu, Feb 09, 2023 at 01:26:11AM +0000, Maciej W. Rozycki wrote:
> On Thu, 9 Feb 2023, Alan Modra wrote:
> > If it is really true that hi16/lo16 pairs are always in the same
> > section, then we wouldn't need freeze_mips_hi16_list.
> 
>  It is, by definition[1]:
> 
> "The AHL addend is a composite computed from the addends of two 
> consecutive relocation entries.  Each relocation type of R_MIPS_HI16 must 
> have an associated R_MIPS_LO16 entry immediately following it in the list 
> of relocations.
> 
> "These relocation entries are always processed as a pair and both addend 
> fields contribute to the AHL addend.  If AHI and ALO are the addends from 
> the paired R_MIPS_HI16 and R_MIPS_LO16 entries, then the addend AHL is com 
> puted as (AHI << 16) + (short)ALO.  R_MIPS_LO16 entries without an 
> R_MIPS_HI16 entry immediately preceding are orphaned and the previously 
> defined R_MIPS_HI16 is used for computing the addend."
> 
> Two consecutive entries must come from a single .rel.* section or they 
> wouldn't be consecutive (there's no relationship between different .rel.* 
> sections).

Yes, but see the comment before _bfd_mips_elf_hi16_reloc

   The ABI requires that the *LO16 immediately follow the *HI16.
   However, as a GNU extension, we permit an arbitrary number of
   *HI16s to be associated with a single *LO16.  This significantly
   simplies the relocation handling in gcc.

>  While not explicitly stated I don't think anyone considered reusing an 
> R_MIPS_HI16 reloc defined in another relocation section for an orphaned 
> R_MIPS_LO16 reloc.  That would IMO make no semantic sense (for instance 
> you can shuffle sections via a linker script in a relocatable link) and I 
> think we ought not strive for doing so (i.e. there is not supposed to be 
> any "previously defined R_MIPS_HI16" at the beginning of any given reloc 
> section).
> 
>  Of course this consideration applies to the REL format only (i.e. o32); 
> there's no need to track HI16/LO16 pairing with RELA objects as the addend 
> is readily available.  AFAICT we don't get this right either: for 
> `!partial_inplace' howtos we need not use `_bfd_mips_elf_hi16_reloc' or 
> `_bfd_mips_elf_lo16_reloc' and just `_bfd_mips_elf_generic_reloc' will do.
> 
> >  Instead it
> > would be much better to attach the list to mips_elf_section_data.  I
> > wasn't sure enough to do that, given things like gcc's hot/cold
> > section partitioning, when I moved the mip_hi16_list to be per-bfd.
> 
>  That sounds like a plan to me, but it's unrealistic for me to commit to 
> it in the next two days (and then I head out to California for a week).

Patch attached.  Note the FIXME.

> References:
> 
> [1] "SYSTEM V APPLICATION BINARY INTERFACE, MIPS RISC Processor
>     Supplement, 3rd Edition", Section "Relocation", pp. 4-17, 4-18

-- 
Alan Modra
Australia Development Lab, IBM

[-- Attachment #2: 0002-Move-mips_hi16_list-to-mips_elf_section_data.patch --]
[-- Type: text/x-diff, Size: 6946 bytes --]

From 4febdc8426f3b834bc5bb5a9e6cc67f2315018c4 Mon Sep 17 00:00:00 2001
From: Alan Modra <amodra@gmail.com>
Date: Thu, 9 Feb 2023 14:24:49 +1030
Subject: Move mips_hi16_list to mips_elf_section_data

	* elfxx-mips.c (struct mips_hi16): Move earlier, deleting
	input_section and data fields.
	(struct _mips_elf_section_data): Add mips_hi16_list.
	(struct mips_elf_obj_tdata): Delete mips_hi16_list.
	(free_mips_hi16_list): New function.
	(_bfd_mips_elf_close_and_cleanup): Adjust to suit new location
	of mips_hi16_list.
	(_bfd_mips_elf_hi16_reloc, _bfd_mips_elf_lo16_reloc): Likewise.
	(_bfd_elf_mips_get_relocated_section_contents): Likewise.

diff --git a/bfd/elfxx-mips.c b/bfd/elfxx-mips.c
index e9fb61ff9e7..8dcfba3c27e 100644
--- a/bfd/elfxx-mips.c
+++ b/bfd/elfxx-mips.c
@@ -222,6 +222,15 @@ struct mips_elf_traverse_got_arg
   int value;
 };
 
+/* Used to store a REL high-part relocation such as R_MIPS_HI16 or
+   R_MIPS_GOT16.  */
+
+struct mips_hi16
+{
+  struct mips_hi16 *next;
+  arelent rel;
+};
+
 struct _mips_elf_section_data
 {
   struct bfd_elf_section_data elf;
@@ -229,6 +238,8 @@ struct _mips_elf_section_data
   {
     bfd_byte *tdata;
   } u;
+
+  struct mips_hi16 *mips_hi16_list;
 };
 
 #define mips_elf_section_data(sec) \
@@ -549,19 +560,6 @@ struct mips_htab_traverse_info
   bool error;
 };
 
-/* Used to store a REL high-part relocation such as R_MIPS_HI16 or
-   R_MIPS_GOT16.  REL is the relocation, INPUT_SECTION is the section
-   that contains the relocation field and DATA points to the start of
-   INPUT_SECTION.  */
-
-struct mips_hi16
-{
-  struct mips_hi16 *next;
-  bfd_byte *data;
-  asection *input_section;
-  arelent rel;
-};
-
 /* MIPS ELF private object data.  */
 
 struct mips_elf_obj_tdata
@@ -597,8 +595,6 @@ struct mips_elf_obj_tdata
   asymbol *elf_text_symbol;
   asection *elf_data_section;
   asection *elf_text_section;
-
-  struct mips_hi16 *mips_hi16_list;
 };
 
 /* Get MIPS ELF private object data from BFD's tdata.  */
@@ -1380,19 +1376,37 @@ _bfd_mips_elf_mkobject (bfd *abfd)
 				  MIPS_ELF_DATA);
 }
 
+/* Free the mips_hi16_list attached to S.  Return true if there were
+   unmatched hi16 relocs.  */
+
+static bool
+free_mips_hi16_list (asection *s)
+{
+  struct mips_hi16 *hi;
+  struct mips_hi16 **hip = &mips_elf_section_data (s)->mips_hi16_list;
+  bool ret = *hip != NULL;
+
+  while ((hi = *hip) != NULL)
+    {
+      *hip = hi->next;
+      free (hi);
+    }
+  return ret;
+}
+
 bool
 _bfd_mips_elf_close_and_cleanup (bfd *abfd)
 {
-  struct mips_elf_obj_tdata *tdata = mips_elf_tdata (abfd);
-  if (tdata != NULL && bfd_get_format (abfd) == bfd_object)
+  if (bfd_get_format (abfd) == bfd_object)
     {
-      BFD_ASSERT (tdata->root.object_id == MIPS_ELF_DATA);
-      while (tdata->mips_hi16_list != NULL)
-	{
-	  struct mips_hi16 *hi = tdata->mips_hi16_list;
-	  tdata->mips_hi16_list = hi->next;
-	  free (hi);
-	}
+      for (asection *s = abfd->sections; s; s = s->next)
+	if (free_mips_hi16_list (s) && 0)
+	   /* FIXME: Disabled until _bfd_mips_elf_got16_reloc is
+	      taught that vxworks does not pair the got16 relocs with
+	      lo16 relocs, and thus should not put them on this list.
+	      See gas/config/tc-mips.c reloc_needs_lo_p.  */
+	  _bfd_error_handler
+	    (_("%pB(%pA): unmatched hi16 reloc"), abfd, s);
     }
   return _bfd_elf_close_and_cleanup (abfd);
 }
@@ -2524,26 +2538,22 @@ _bfd_mips_elf_gprel16_with_gp (bfd *abfd, asymbol *symbol,
 
 bfd_reloc_status_type
 _bfd_mips_elf_hi16_reloc (bfd *abfd, arelent *reloc_entry,
-			  asymbol *symbol ATTRIBUTE_UNUSED, void *data,
+			  asymbol *symbol ATTRIBUTE_UNUSED,
+			  void *data ATTRIBUTE_UNUSED,
 			  asection *input_section, bfd *output_bfd,
 			  char **error_message ATTRIBUTE_UNUSED)
 {
-  struct mips_hi16 *n;
-  struct mips_elf_obj_tdata *tdata;
-
   if (reloc_entry->address > bfd_get_section_limit (abfd, input_section))
     return bfd_reloc_outofrange;
 
-  n = bfd_malloc (sizeof *n);
+  struct mips_hi16 *n = bfd_malloc (sizeof *n);
   if (n == NULL)
     return bfd_reloc_outofrange;
 
-  tdata = mips_elf_tdata (abfd);
-  n->next = tdata->mips_hi16_list;
-  n->data = data;
-  n->input_section = input_section;
+  struct _mips_elf_section_data *sdata = mips_elf_section_data (input_section);
+  n->next = sdata->mips_hi16_list;
   n->rel = *reloc_entry;
-  tdata->mips_hi16_list = n;
+  sdata->mips_hi16_list = n;
 
   if (output_bfd != NULL)
     reloc_entry->address += input_section->output_offset;
@@ -2583,7 +2593,6 @@ _bfd_mips_elf_lo16_reloc (bfd *abfd, arelent *reloc_entry, asymbol *symbol,
 {
   bfd_vma vallo;
   bfd_byte *location = (bfd_byte *) data + reloc_entry->address;
-  struct mips_elf_obj_tdata *tdata;
 
   if (!bfd_reloc_offset_in_range (reloc_entry->howto, abfd, input_section,
 				  reloc_entry->address))
@@ -2595,13 +2604,11 @@ _bfd_mips_elf_lo16_reloc (bfd *abfd, arelent *reloc_entry, asymbol *symbol,
   _bfd_mips_elf_reloc_shuffle (abfd, reloc_entry->howto->type, false,
 			       location);
 
-  tdata = mips_elf_tdata (abfd);
-  while (tdata->mips_hi16_list != NULL)
+  struct _mips_elf_section_data *sdata = mips_elf_section_data (input_section);
+  while (sdata->mips_hi16_list != NULL)
     {
       bfd_reloc_status_type ret;
-      struct mips_hi16 *hi;
-
-      hi = tdata->mips_hi16_list;
+      struct mips_hi16 *hi = sdata->mips_hi16_list;
 
       /* R_MIPS*_GOT16 relocations are something of a special case.  We
 	 want to install the addend in the same way as for a R_MIPS*_HI16
@@ -2619,13 +2626,13 @@ _bfd_mips_elf_lo16_reloc (bfd *abfd, arelent *reloc_entry, asymbol *symbol,
 	 carry or borrow will induce a change of +1 or -1 in the high part.  */
       hi->rel.addend += (vallo + 0x8000) & 0xffff;
 
-      ret = _bfd_mips_elf_generic_reloc (abfd, &hi->rel, symbol, hi->data,
-					 hi->input_section, output_bfd,
+      ret = _bfd_mips_elf_generic_reloc (abfd, &hi->rel, symbol, data,
+					 input_section, output_bfd,
 					 error_message);
       if (ret != bfd_reloc_ok)
 	return ret;
 
-      tdata->mips_hi16_list = hi->next;
+      sdata->mips_hi16_list = hi->next;
       free (hi);
     }
 
@@ -13314,24 +13321,8 @@ _bfd_elf_mips_get_relocated_section_contents
   reloc_vector = (arelent **) bfd_malloc (reloc_size);
   if (reloc_vector == NULL)
     {
-      struct mips_elf_obj_tdata *tdata;
-      struct mips_hi16 **hip, *hi;
     error_return:
-      /* If we are going to return an error, remove entries on
-	 mips_hi16_list that point into this section's data.  Data
-	 will typically be freed on return from this function.  */
-      tdata = mips_elf_tdata (abfd);
-      hip = &tdata->mips_hi16_list;
-      while ((hi = *hip) != NULL)
-	{
-	  if (hi->input_section == input_section)
-	    {
-	      *hip = hi->next;
-	      free (hi);
-	    }
-	  else
-	    hip = &hi->next;
-	}
+      free_mips_hi16_list (input_section);
       if (orig_data == NULL)
 	free (data);
       data = NULL;

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Protect mips_hi16_list from fuzzed debug info
  2023-02-09 10:14           ` Alan Modra
@ 2023-02-10 18:13             ` Maciej W. Rozycki
  2023-05-20 11:41             ` Alan Modra
  1 sibling, 0 replies; 11+ messages in thread
From: Maciej W. Rozycki @ 2023-02-10 18:13 UTC (permalink / raw)
  To: Alan Modra; +Cc: binutils, Chenghua Xu

On Thu, 9 Feb 2023, Alan Modra wrote:

> > Two consecutive entries must come from a single .rel.* section or they 
> > wouldn't be consecutive (there's no relationship between different .rel.* 
> > sections).
> 
> Yes, but see the comment before _bfd_mips_elf_hi16_reloc
> 
>    The ABI requires that the *LO16 immediately follow the *HI16.
>    However, as a GNU extension, we permit an arbitrary number of
>    *HI16s to be associated with a single *LO16.  This significantly
>    simplies the relocation handling in gcc.

 Yes, I am aware of this GNU extension, but it does not change anything 
for this consideration.  There's just meant not to be any predefined HI16 
value at the beginning of any relocation section (IOW any initial orphan 
LO16 reloc has to resolve with the high part of the addend implied zero).

> >  That sounds like a plan to me, but it's unrealistic for me to commit to 
> > it in the next two days (and then I head out to California for a week).
> 
> Patch attached.  Note the FIXME.

 I think there's no need to bail out on unmatched HI16 relocs.  These 
relocs are used with the LUI instruction as the first part of an address 
load sequence.  If the low part is never referred, then the address will 
not have been fully loaded anyway, so such code won't do anything harmful.  

 I think such orphan HI16 relocs used to appear in harmless leftovers from 
optimised code with certain versions of GCC, so adding such an error would 
cause a regression as such code wouldn't link anymore.  I wouldn't add any 
extra error cases here beyond what we might already have.

 I'll have a look into this issue and dive into code to better understand 
it once I'm back from California.  I'll try to address the RELA side too.

 Thank you for your patches and your involvement with this issue, really 
appreciated.  And as I say, more proper support is required with the MIPS 
target given it's still commercially active.  It's not like say VAX, which 
only has a bunch of enthusiasts to fiddle with.

  Maciej

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Protect mips_hi16_list from fuzzed debug info
  2023-02-09 10:14           ` Alan Modra
  2023-02-10 18:13             ` Maciej W. Rozycki
@ 2023-05-20 11:41             ` Alan Modra
  2023-05-20 11:44               ` coff-mips refhi list Alan Modra
  2023-05-23 21:19               ` Protect mips_hi16_list from fuzzed debug info Maciej W. Rozycki
  1 sibling, 2 replies; 11+ messages in thread
From: Alan Modra @ 2023-05-20 11:41 UTC (permalink / raw)
  To: binutils; +Cc: Maciej W. Rozycki, Chenghua Xu

This is a slightly modified version of the patch posted at
https://sourceware.org/pipermail/binutils/2023-February/125916.html
with the logic for detecting orphan hi16 relocs in free_mips_hi16_list
improved so that the warning can be enabled.

OK to apply?

===
This patch is in response to fuzzing testcases that manage to cause
segfaults due to stale references to freed memory via mips_hi16.data.

A number of the error/warning handlers in ldmain.c use %C.  This can
cause debug info to be parsed for the first time in order to print
file/function/line.  If one of those warnings is triggered after some
hi16 relocs have been processed but before the matching lo16 reloc is
handled, *and* the debug info is corrupted with a lo16 reloc, then the
mips_hi16_list will be flushed with the result that printing a warning
changes linker output.  It is also possible that corrupted debug info
adds to the hi16 list, with the result that when the linker handles a
later lo16 reloc in a text section, ld will segfault accessing
mips_hi16.data after the debug buffers have be freed.  Both of these
problems are fixed by keeping a per-section mips_hi16_list rather than
a per-file list.

	* elfxx-mips.c (struct mips_hi16): Move earlier, deleting
	input_section and data fields.
	(struct _mips_elf_section_data): Add mips_hi16_list.
	(struct mips_elf_obj_tdata): Delete mips_hi16_list.
	(free_mips_hi16_list): New function.
	(_bfd_mips_elf_close_and_cleanup): Adjust to suit new location
	of mips_hi16_list.
	(_bfd_mips_elf_hi16_reloc, _bfd_mips_elf_lo16_reloc): Likewise.
	(_bfd_elf_mips_get_relocated_section_contents): Likewise.

diff --git a/bfd/elfxx-mips.c b/bfd/elfxx-mips.c
index 49355a42f7d..18b04a1abb5 100644
--- a/bfd/elfxx-mips.c
+++ b/bfd/elfxx-mips.c
@@ -222,6 +222,15 @@ struct mips_elf_traverse_got_arg
   int value;
 };
 
+/* Used to store a REL high-part relocation such as R_MIPS_HI16 or
+   R_MIPS_GOT16.  */
+
+struct mips_hi16
+{
+  struct mips_hi16 *next;
+  arelent rel;
+};
+
 struct _mips_elf_section_data
 {
   struct bfd_elf_section_data elf;
@@ -229,6 +238,8 @@ struct _mips_elf_section_data
   {
     bfd_byte *tdata;
   } u;
+
+  struct mips_hi16 *mips_hi16_list;
 };
 
 #define mips_elf_section_data(sec) \
@@ -549,19 +560,6 @@ struct mips_htab_traverse_info
   bool error;
 };
 
-/* Used to store a REL high-part relocation such as R_MIPS_HI16 or
-   R_MIPS_GOT16.  REL is the relocation, INPUT_SECTION is the section
-   that contains the relocation field and DATA points to the start of
-   INPUT_SECTION.  */
-
-struct mips_hi16
-{
-  struct mips_hi16 *next;
-  bfd_byte *data;
-  asection *input_section;
-  arelent rel;
-};
-
 /* MIPS ELF private object data.  */
 
 struct mips_elf_obj_tdata
@@ -597,8 +595,6 @@ struct mips_elf_obj_tdata
   asymbol *elf_text_symbol;
   asection *elf_data_section;
   asection *elf_text_section;
-
-  struct mips_hi16 *mips_hi16_list;
 };
 
 /* Get MIPS ELF private object data from BFD's tdata.  */
@@ -1418,6 +1414,30 @@ free_ecoff_debug (struct ecoff_debug_info *debug)
   debug->external_ext = NULL;
 }
 
+/* Free the mips_hi16_list attached to S.  Return true if there were
+   unmatched hi16 relocs.  */
+
+static bool
+free_mips_hi16_list (asection *s)
+{
+  struct mips_hi16 *hi;
+  struct mips_hi16 **hip = &mips_elf_section_data (s)->mips_hi16_list;
+  bool ret = false;
+
+  while ((hi = *hip) != NULL)
+    {
+      *hip = hi->next;
+      /* See gas/config/tc-mips.c reloc_needs_lo_p.  Not all hi16
+	 relocs need lo16 relocs.  */
+      if (hi->rel.howto->type == R_MIPS_HI16
+	  || hi->rel.howto->type == R_MIPS16_HI16
+	  || hi->rel.howto->type == R_MICROMIPS_HI16)
+	ret = true;
+      free (hi);
+    }
+  return ret;
+}
+
 bool
 _bfd_mips_elf_close_and_cleanup (bfd *abfd)
 {
@@ -1427,15 +1447,13 @@ _bfd_mips_elf_close_and_cleanup (bfd *abfd)
       if (tdata != NULL)
 	{
 	  BFD_ASSERT (tdata->root.object_id == MIPS_ELF_DATA);
-	  while (tdata->mips_hi16_list != NULL)
-	    {
-	      struct mips_hi16 *hi = tdata->mips_hi16_list;
-	      tdata->mips_hi16_list = hi->next;
-	      free (hi);
-	    }
 	  if (tdata->find_line_info != NULL)
 	    free_ecoff_debug (&tdata->find_line_info->d);
 	}
+      for (asection *s = abfd->sections; s; s = s->next)
+	if (free_mips_hi16_list (s))
+	  _bfd_error_handler
+	    (_("%pB(%pA): unmatched hi16 reloc"), abfd, s);
     }
   return _bfd_elf_close_and_cleanup (abfd);
 }
@@ -2557,26 +2575,22 @@ _bfd_mips_elf_gprel16_with_gp (bfd *abfd, asymbol *symbol,
 
 bfd_reloc_status_type
 _bfd_mips_elf_hi16_reloc (bfd *abfd, arelent *reloc_entry,
-			  asymbol *symbol ATTRIBUTE_UNUSED, void *data,
+			  asymbol *symbol ATTRIBUTE_UNUSED,
+			  void *data ATTRIBUTE_UNUSED,
 			  asection *input_section, bfd *output_bfd,
 			  char **error_message ATTRIBUTE_UNUSED)
 {
-  struct mips_hi16 *n;
-  struct mips_elf_obj_tdata *tdata;
-
   if (reloc_entry->address > bfd_get_section_limit (abfd, input_section))
     return bfd_reloc_outofrange;
 
-  n = bfd_malloc (sizeof *n);
+  struct mips_hi16 *n = bfd_malloc (sizeof *n);
   if (n == NULL)
     return bfd_reloc_outofrange;
 
-  tdata = mips_elf_tdata (abfd);
-  n->next = tdata->mips_hi16_list;
-  n->data = data;
-  n->input_section = input_section;
+  struct _mips_elf_section_data *sdata = mips_elf_section_data (input_section);
+  n->next = sdata->mips_hi16_list;
   n->rel = *reloc_entry;
-  tdata->mips_hi16_list = n;
+  sdata->mips_hi16_list = n;
 
   if (output_bfd != NULL)
     reloc_entry->address += input_section->output_offset;
@@ -2615,40 +2629,40 @@ _bfd_mips_elf_lo16_reloc (bfd *abfd, arelent *reloc_entry, asymbol *symbol,
 			  bfd *output_bfd, char **error_message)
 {
   bfd_vma vallo;
-  bfd_byte *location = (bfd_byte *) data + reloc_entry->address;
-  struct mips_elf_obj_tdata *tdata;
+  struct _mips_elf_section_data *sdata = mips_elf_section_data (input_section);
 
-  if (!bfd_reloc_offset_in_range (reloc_entry->howto, abfd, input_section,
-				  reloc_entry->address))
-    return bfd_reloc_outofrange;
+  if (sdata->mips_hi16_list != NULL)
+    {
+      if (!bfd_reloc_offset_in_range (reloc_entry->howto, abfd, input_section,
+				      reloc_entry->address))
+	return bfd_reloc_outofrange;
 
-  _bfd_mips_elf_reloc_unshuffle (abfd, reloc_entry->howto->type, false,
-				 location);
-  /* The high 16 bits of the addend are stored in the high insn, the
-     low 16 bits in the low insn, but there is a catch:  You can't
-     just concatenate the high and low parts.  The high part of the
-     addend is adjusted for the fact that the low part is sign
-     extended.  For example, an addend of 0x38000 would have 0x0004 in
-     the high part and 0x8000 (=0xff..f8000) in the low part.
-     To extract the actual addend, calculate (a)
-     ((hi & 0xffff) << 16) + ((lo & 0xffff) ^ 0x8000) - 0x8000.
-     We will be applying (symbol + addend) & 0xffff to the low insn,
-     and we want to apply (b) (symbol + addend + 0x8000) >> 16 to the
-     high insn (the +0x8000 adjusting for when the applied low part is
-     negative).  Substituting (a) into (b) and recognising that
-     (hi & 0xffff) is already in the high insn gives a high part
-     addend adjustment of (lo & 0xffff) ^ 0x8000.  */
-  vallo = (bfd_get_32 (abfd, location) & 0xffff) ^ 0x8000;
-  _bfd_mips_elf_reloc_shuffle (abfd, reloc_entry->howto->type, false,
-			       location);
+      bfd_byte *location = (bfd_byte *) data + reloc_entry->address;
+      _bfd_mips_elf_reloc_unshuffle (abfd, reloc_entry->howto->type, false,
+				     location);
+      /* The high 16 bits of the addend are stored in the high insn, the
+	 low 16 bits in the low insn, but there is a catch:  You can't
+	 just concatenate the high and low parts.  The high part of the
+	 addend is adjusted for the fact that the low part is sign
+	 extended.  For example, an addend of 0x38000 would have 0x0004 in
+	 the high part and 0x8000 (=0xff..f8000) in the low part.
+	 To extract the actual addend, calculate (a)
+	 ((hi & 0xffff) << 16) + ((lo & 0xffff) ^ 0x8000) - 0x8000.
+	 We will be applying (symbol + addend) & 0xffff to the low insn,
+	 and we want to apply (b) (symbol + addend + 0x8000) >> 16 to the
+	 high insn (the +0x8000 adjusting for when the applied low part is
+	 negative).  Substituting (a) into (b) and recognising that
+	 (hi & 0xffff) is already in the high insn gives a high part
+	 addend adjustment of (lo & 0xffff) ^ 0x8000.  */
+      vallo = (bfd_get_32 (abfd, location) & 0xffff) ^ 0x8000;
+      _bfd_mips_elf_reloc_shuffle (abfd, reloc_entry->howto->type, false,
+				   location);
+    }
 
-  tdata = mips_elf_tdata (abfd);
-  while (tdata->mips_hi16_list != NULL)
+  while (sdata->mips_hi16_list != NULL)
     {
       bfd_reloc_status_type ret;
-      struct mips_hi16 *hi;
-
-      hi = tdata->mips_hi16_list;
+      struct mips_hi16 *hi = sdata->mips_hi16_list;
 
       /* R_MIPS*_GOT16 relocations are something of a special case.  We
 	 want to install the addend in the same way as for a R_MIPS*_HI16
@@ -2664,13 +2678,13 @@ _bfd_mips_elf_lo16_reloc (bfd *abfd, arelent *reloc_entry, asymbol *symbol,
 
       hi->rel.addend += vallo;
 
-      ret = _bfd_mips_elf_generic_reloc (abfd, &hi->rel, symbol, hi->data,
-					 hi->input_section, output_bfd,
+      ret = _bfd_mips_elf_generic_reloc (abfd, &hi->rel, symbol, data,
+					 input_section, output_bfd,
 					 error_message);
       if (ret != bfd_reloc_ok)
 	return ret;
 
-      tdata->mips_hi16_list = hi->next;
+      sdata->mips_hi16_list = hi->next;
       free (hi);
     }
 
@@ -13344,24 +13358,8 @@ _bfd_elf_mips_get_relocated_section_contents
   reloc_vector = (arelent **) bfd_malloc (reloc_size);
   if (reloc_vector == NULL)
     {
-      struct mips_elf_obj_tdata *tdata;
-      struct mips_hi16 **hip, *hi;
     error_return:
-      /* If we are going to return an error, remove entries on
-	 mips_hi16_list that point into this section's data.  Data
-	 will typically be freed on return from this function.  */
-      tdata = mips_elf_tdata (abfd);
-      hip = &tdata->mips_hi16_list;
-      while ((hi = *hip) != NULL)
-	{
-	  if (hi->input_section == input_section)
-	    {
-	      *hip = hi->next;
-	      free (hi);
-	    }
-	  else
-	    hip = &hi->next;
-	}
+      free_mips_hi16_list (input_section);
       if (orig_data == NULL)
 	free (data);
       data = NULL;


-- 
Alan Modra
Australia Development Lab, IBM

^ permalink raw reply	[flat|nested] 11+ messages in thread

* coff-mips refhi list
  2023-05-20 11:41             ` Alan Modra
@ 2023-05-20 11:44               ` Alan Modra
  2023-05-23 21:19               ` Protect mips_hi16_list from fuzzed debug info Maciej W. Rozycki
  1 sibling, 0 replies; 11+ messages in thread
From: Alan Modra @ 2023-05-20 11:44 UTC (permalink / raw)
  To: binutils; +Cc: Maciej W. Rozycki, Chenghua Xu

Like "Move mips_hi16_list to mips_elf_section_data" but for coff.
Also makes mips_refhi_reloc and mips_reflo_reloc a little more elegant
in that they now make use of the generic reloc machinery in order to
apply the relocations.

OK?

	* libecoff.h (struct mips_hi): Delete
	(struct mips_h16): New.
	(struct ecoff_tdata): Delete mips_refhi_list.
	(struct ecoff_section_tdata): Put existing gp into a union.
	Add mips_hi16_list.
	* coff-alpha.c (alpha_relocate_section): Adjust section data access.
	* coff-mips.c (mips_refhi_reloc): Delete dead code.  Stash hi reloc
	on ecoff_section_tdata.
	(mips_reflo_reloc): Use new hi16 list.  Apply hi reloc using
	bfd_perform_relocation.
	* ecoff.c (free_mips_hi16_list): New function.
	(_bfd_ecoff_close_and_cleanup): Clear new hi16 list attached to
	sections rather than bfd tdata.

diff --git a/bfd/coff-alpha.c b/bfd/coff-alpha.c
index 45b3f760f55..79bae7a24d4 100644
--- a/bfd/coff-alpha.c
+++ b/bfd/coff-alpha.c
@@ -1422,11 +1422,11 @@ alpha_relocate_section (bfd *output_bfd,
 	  lita_sec->used_by_bfd = lita_sec_data;
 	}
 
-      if (lita_sec_data->gp != 0)
+      if (lita_sec_data->u.gp != 0)
 	{
 	  /* If we already assigned a gp to this section, we better
 	     stick with that value.  */
-	  gp = lita_sec_data->gp;
+	  gp = lita_sec_data->u.gp;
 	}
       else
 	{
@@ -1459,7 +1459,7 @@ alpha_relocate_section (bfd *output_bfd,
 
 	    }
 
-	  lita_sec_data->gp = gp;
+	  lita_sec_data->u.gp = gp;
 	}
 
       _bfd_set_gp_value (output_bfd, gp);
diff --git a/bfd/coff-mips.c b/bfd/coff-mips.c
index fdc0771979d..11b6a064248 100644
--- a/bfd/coff-mips.c
+++ b/bfd/coff-mips.c
@@ -427,17 +427,11 @@ static bfd_reloc_status_type
 mips_refhi_reloc (bfd *abfd,
 		  arelent *reloc_entry,
 		  asymbol *symbol,
-		  void * data,
+		  void *data ATTRIBUTE_UNUSED,
 		  asection *input_section,
 		  bfd *output_bfd,
 		  char **error_message ATTRIBUTE_UNUSED)
 {
-  bfd_reloc_status_type ret;
-  bfd_vma relocation;
-  struct mips_hi *n;
-
-  /* If we're relocating, and this an external symbol, we don't want
-     to change anything.  */
   if (output_bfd != (bfd *) NULL
       && (symbol->flags & BSF_SECTION_SYM) == 0
       && reloc_entry->addend == 0)
@@ -446,36 +440,33 @@ mips_refhi_reloc (bfd *abfd,
       return bfd_reloc_ok;
     }
 
-  ret = bfd_reloc_ok;
-  if (bfd_is_und_section (symbol->section)
-      && output_bfd == (bfd *) NULL)
-    ret = bfd_reloc_undefined;
-
-  if (bfd_is_com_section (symbol->section))
-    relocation = 0;
-  else
-    relocation = symbol->value;
-
-  relocation += symbol->section->output_section->vma;
-  relocation += symbol->section->output_offset;
-  relocation += reloc_entry->addend;
-
-  if (reloc_entry->address > bfd_get_section_limit (abfd, input_section))
-    return bfd_reloc_outofrange;
+  /* Is this the call via bfd_perform_relocation in mips_reflo_reloc?
+     If so, continue and apply the reloc.  */
+  struct ecoff_section_tdata *sdata = input_section->used_by_bfd;
+  if (sdata != NULL
+      && sdata->u.mips_hi16_list != NULL
+      && reloc_entry == &sdata->u.mips_hi16_list->rel)
+    return bfd_reloc_continue;
 
+  if (sdata == NULL)
+    {
+      sdata = bfd_zalloc (abfd, sizeof (*sdata));
+      input_section->used_by_bfd = sdata;
+      if (sdata == NULL)
+	return bfd_reloc_outofrange;
+    }
   /* Save the information, and let REFLO do the actual relocation.  */
-  n = (struct mips_hi *) bfd_malloc ((bfd_size_type) sizeof *n);
+  struct mips_hi16 *n = bfd_malloc (sizeof (*n));
   if (n == NULL)
     return bfd_reloc_outofrange;
-  n->addr = (bfd_byte *) data + reloc_entry->address;
-  n->addend = relocation;
-  n->next = ecoff_data (abfd)->mips_refhi_list;
-  ecoff_data (abfd)->mips_refhi_list = n;
+  n->rel = *reloc_entry;
+  n->next = sdata->u.mips_hi16_list;
+  sdata->u.mips_hi16_list = n;
 
   if (output_bfd != (bfd *) NULL)
     reloc_entry->address += input_section->output_offset;
 
-  return ret;
+  return bfd_reloc_ok;
 }
 
 /* Do a REFLO relocation.  This is a straightforward 16 bit inplace
@@ -491,54 +482,36 @@ mips_reflo_reloc (bfd *abfd,
 		  bfd *output_bfd,
 		  char **error_message)
 {
-  if (ecoff_data (abfd)->mips_refhi_list != NULL)
+  bfd_size_type octets = (reloc_entry->address
+			  * OCTETS_PER_BYTE (abfd, input_section));
+
+  if (!bfd_reloc_offset_in_range (reloc_entry->howto, abfd,
+				  input_section, octets))
+    return bfd_reloc_outofrange;
+
+  struct ecoff_section_tdata* sdata = input_section->used_by_bfd;
+  if (sdata != NULL && sdata->u.mips_hi16_list != NULL)
     {
-      struct mips_hi *l;
+      struct mips_hi16 *hi;
+      bfd_byte *loc = (bfd_byte *) data + octets;
+      /* Adjustment for the high part addend.  See longer explanation
+	 in elfxx-mips.c _bfd_mips_elf_lo16_reloc.  */
+      bfd_vma vallo = (bfd_get_32 (abfd, loc) & 0x8000) ^ 0x8000;
 
-      l = ecoff_data (abfd)->mips_refhi_list;
-      while (l != NULL)
+      while ((hi = sdata->u.mips_hi16_list) != NULL)
 	{
-	  unsigned long insn;
-	  unsigned long val;
-	  unsigned long vallo;
-	  struct mips_hi *next;
-	  bfd_size_type octets = (reloc_entry->address
-				  * OCTETS_PER_BYTE (abfd, input_section));
-	  bfd_byte *loc = (bfd_byte *) data + octets;
-
-	  if (!bfd_reloc_offset_in_range (reloc_entry->howto, abfd,
-					  input_section, octets))
-	    return bfd_reloc_outofrange;
-
-	  /* Do the REFHI relocation.  Note that we actually don't
-	     need to know anything about the REFLO itself, except
-	     where to find the low 16 bits of the addend needed by the
-	     REFHI.  */
-	  insn = bfd_get_32 (abfd, l->addr);
-	  vallo = bfd_get_32 (abfd, loc) & 0xffff;
-	  val = ((insn & 0xffff) << 16) + vallo;
-	  val += l->addend;
-
-	  /* The low order 16 bits are always treated as a signed
-	     value.  Therefore, a negative value in the low order bits
-	     requires an adjustment in the high order bits.  We need
-	     to make this adjustment in two ways: once for the bits we
-	     took from the data, and once for the bits we are putting
-	     back in to the data.  */
-	  if ((vallo & 0x8000) != 0)
-	    val -= 0x10000;
-	  if ((val & 0x8000) != 0)
-	    val += 0x10000;
-
-	  insn = (insn &~ (unsigned) 0xffff) | ((val >> 16) & 0xffff);
-	  bfd_put_32 (abfd, (bfd_vma) insn, l->addr);
-
-	  next = l->next;
-	  free (l);
-	  l = next;
-	}
+	  bfd_reloc_status_type ret;
 
-      ecoff_data (abfd)->mips_refhi_list = NULL;
+	  /* Apply the REFHI relocation.  */
+	  hi->rel.addend += vallo;
+	  ret = bfd_perform_relocation (abfd, &hi->rel, data, input_section,
+					output_bfd, error_message);
+	  if (ret != bfd_reloc_ok)
+	    return ret;
+
+	  sdata->u.mips_hi16_list = hi->next;
+	  free (hi);
+	}
     }
 
   /* Now do the REFLO reloc in the usual way.  */
diff --git a/bfd/ecoff.c b/bfd/ecoff.c
index 676b8d84017..8e3b4585555 100644
--- a/bfd/ecoff.c
+++ b/bfd/ecoff.c
@@ -109,18 +109,40 @@ _bfd_ecoff_mkobject_hook (bfd *abfd, void * filehdr, void * aouthdr)
   return (void *) ecoff;
 }
 
+/* Free the mips_hi16_list attached to S.  Return true if there were
+   unmatched hi16 relocs.  */
+
+static bool
+free_mips_hi16_list (asection *s)
+{
+  struct ecoff_section_tdata* sdata = s->used_by_bfd;
+  if (sdata != NULL)
+    {
+      struct mips_hi16 *hi;
+      struct mips_hi16 **hip = &sdata->u.mips_hi16_list;
+      bool ret = *hip != NULL;
+
+      while ((hi = *hip) != NULL)
+	{
+	  *hip = hi->next;
+	  free (hi);
+	}
+      return ret;
+    }
+  return false;
+}
+
 bool
 _bfd_ecoff_close_and_cleanup (bfd *abfd)
 {
-  struct ecoff_tdata *tdata = ecoff_data (abfd);
-
-  if (tdata != NULL && bfd_get_format (abfd) == bfd_object)
-    while (tdata->mips_refhi_list != NULL)
-      {
-	struct mips_hi *ref = tdata->mips_refhi_list;
-	tdata->mips_refhi_list = ref->next;
-	free (ref);
-      }
+  if (bfd_get_format (abfd) == bfd_object
+      && ecoff_backend (abfd)->arch == bfd_arch_mips)
+    {
+      for (asection *s = abfd->sections; s; s = s->next)
+	if (free_mips_hi16_list (s))
+	  _bfd_error_handler
+	    (_("%pB(%pA): unmatched hi16 reloc"), abfd, s);
+    }
   return _bfd_generic_close_and_cleanup (abfd);
 }
 
diff --git a/bfd/libecoff.h b/bfd/libecoff.h
index 12664b890c4..96649c39e3d 100644
--- a/bfd/libecoff.h
+++ b/bfd/libecoff.h
@@ -80,11 +80,10 @@ struct ecoff_backend_data
   members of the embedded bfd_coff_backend_data struct.  */
 #define ECOFF_NO_LONG_SECTION_NAMES (false), _bfd_ecoff_no_long_sections
 
-struct mips_hi
+struct mips_hi16
 {
-  struct mips_hi *next;
-  bfd_byte *addr;
-  bfd_vma addend;
+  struct mips_hi16 *next;
+  arelent rel;
 };
 
 /* This is the target specific information kept for ECOFF files.  */
@@ -154,9 +153,6 @@ typedef struct ecoff_tdata
      particular ECOFF file.  This is not valid until
      ecoff_compute_section_file_positions is called.  */
   bool rdata_in_text;
-
-  /* Used by coff-mips.c to track REFHI relocs for pairing with REFLO.  */
-  struct mips_hi *mips_refhi_list;
 } ecoff_data_type;
 
 /* Each canonical asymbol really looks like this.  */
@@ -195,13 +191,18 @@ typedef struct ecoff_symbol_struct
 
 struct ecoff_section_tdata
 {
-  /* When producing an executable (i.e., final, non-relocatable link)
-     on the Alpha, we may need to use multiple global pointer values
-     to span the entire .lita section.  In essence, we allow each
-     input .lita section to have its own gp value.  To support this,
-     we need to keep track of the gp values that we picked for each
-     input .lita section . */
-  bfd_vma gp;
+  union
+  {
+    /* When producing an executable (i.e., final, non-relocatable link)
+       on the Alpha, we may need to use multiple global pointer values
+       to span the entire .lita section.  In essence, we allow each
+       input .lita section to have its own gp value.  To support this,
+       we need to keep track of the gp values that we picked for each
+       input .lita section . */
+    bfd_vma gp;
+    /* Used by coff-mips.c to track hi16 relocs.  */
+    struct mips_hi16 *mips_hi16_list;
+  } u;
 };
 
 /* An accessor macro for the ecoff_section_tdata structure.  */

-- 
Alan Modra
Australia Development Lab, IBM

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Protect mips_hi16_list from fuzzed debug info
  2023-05-20 11:41             ` Alan Modra
  2023-05-20 11:44               ` coff-mips refhi list Alan Modra
@ 2023-05-23 21:19               ` Maciej W. Rozycki
  1 sibling, 0 replies; 11+ messages in thread
From: Maciej W. Rozycki @ 2023-05-23 21:19 UTC (permalink / raw)
  To: Alan Modra; +Cc: binutils, Chenghua Xu

On Sat, 20 May 2023, Alan Modra wrote:

> This is a slightly modified version of the patch posted at
> https://sourceware.org/pipermail/binutils/2023-February/125916.html
> with the logic for detecting orphan hi16 relocs in free_mips_hi16_list
> improved so that the warning can be enabled.

 I haven't lost your orignal fix from my radar and meant to come back to 
it around this time.  I still need a couple days to get through all this 
again.  I'll appreciate a bit of patience.

  Maciej

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2023-05-23 21:19 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-06 12:33 Protect mips_hi16_list from fuzzed debug info Alan Modra
2023-02-07 14:11 ` Maciej W. Rozycki
2023-02-08  0:32   ` Alan Modra
2023-02-08 23:28     ` Maciej W. Rozycki
2023-02-09  0:29       ` Alan Modra
2023-02-09  1:26         ` Maciej W. Rozycki
2023-02-09 10:14           ` Alan Modra
2023-02-10 18:13             ` Maciej W. Rozycki
2023-05-20 11:41             ` Alan Modra
2023-05-20 11:44               ` coff-mips refhi list Alan Modra
2023-05-23 21:19               ` Protect mips_hi16_list from fuzzed debug info Maciej W. Rozycki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).