public inbox for elfutils@sourceware.org
 help / color / mirror / Atom feed
From: Aaron Merey <amerey@redhat.com>
To: elfutils-devel@sourceware.org
Cc: Aaron Merey <amerey@redhat.com>
Subject: [PATCH] unstrip: Call adjust_relocs no more than once per section.
Date: Mon,  5 Feb 2024 18:11:04 -0500	[thread overview]
Message-ID: <20240205231104.125617-1-amerey@redhat.com> (raw)

During symtab merging, adjust_relocs might be called multiple times on
some SHT_REL/SHT_RELA sections.  In these cases it is possible for a
relocation's symbol index to be correctly mapped from X to Y during the
first call to adjust_relocs but then wrongly remapped from Y to Z during
the second call.

Fix this by adjusting relocation symbol indices just once per section.

Also add stable sorting for symbols during symtab merging so that the
symbol order in the output file's symtab does not depend on undefined
behaviour in qsort.

Note that adjust_relocs still might be called a second time on a section
during add_new_section_symbols.  However since add_new_section_symbols
generates its own distinct symbol index map, this should not trigger the
bug described above.

https://sourceware.org/bugzilla/show_bug.cgi?id=31097

Signed-off-by: Aaron Merey <amerey@redhat.com>
---

I added some tests to run-unstrip-test.sh to verify that strip+unstrip
does not alter the symbols referred to by relocations.  I tried to use
eu-elfcmp for this purpose.  However for a pair of i386 ET_REL binaries
I did some testing with, eu-elfcmp skips checking the SHT_REL/SHT_RELA
sections because the sh_flags do not contain SHF_ALLOC (the flags only
contain SHF_INFO_LINK in this case).

I'm not sure if this eu-elfcmp behaviour is intentional or if eu-elfcmp
should start comparing REL/RELA sections even if they aren't allocated.

 src/unstrip.c                |  81 ++++++++++++++++----
 tests/.gitignore             |   1 +
 tests/Makefile.am            |   3 +-
 tests/elf-print-reloc-syms.c | 144 +++++++++++++++++++++++++++++++++++
 tests/run-unstrip-test.sh    |   8 ++
 5 files changed, 221 insertions(+), 16 deletions(-)
 create mode 100644 tests/elf-print-reloc-syms.c

diff --git a/src/unstrip.c b/src/unstrip.c
index d5bd1821..f37d6c58 100644
--- a/src/unstrip.c
+++ b/src/unstrip.c
@@ -598,21 +598,30 @@ adjust_relocs (Elf_Scn *outscn, Elf_Scn *inscn, const GElf_Shdr *shdr,
 /* Adjust all the relocation sections in the file.  */
 static void
 adjust_all_relocs (Elf *elf, Elf_Scn *symtab, const GElf_Shdr *symshdr,
-		   size_t map[], size_t map_size)
+		   size_t map[], size_t map_size, bool *scn_filter)
 {
   size_t new_sh_link = elf_ndxscn (symtab);
   Elf_Scn *scn = NULL;
   while ((scn = elf_nextscn (elf, scn)) != NULL)
     if (scn != symtab)
       {
+	if (scn_filter != NULL)
+	  {
+	    size_t ndx = elf_ndxscn (scn);
+
+	    /* Skip relocations that were already mapped during adjust_relocs
+	       for the stripped symtab.  This is to avoid mapping a relocation's
+	       symbol index from X to Y during the first adjust_relocs and then
+	       wrongly remapping it from Y to Z during the second call.  */
+	    if (scn_filter[ndx])
+	      continue;
+	  }
+
 	GElf_Shdr shdr_mem;
 	GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
 	ELF_CHECK (shdr != NULL, _("cannot get section header: %s"));
-	/* Don't redo SHT_GROUP, groups are in both the stripped and debug,
-	   it will already have been done by adjust_relocs for the
-	   stripped_symtab.  */
-	if (shdr->sh_type != SHT_NOBITS && shdr->sh_type != SHT_GROUP
-	    && shdr->sh_link == new_sh_link)
+
+	if (shdr->sh_type != SHT_NOBITS && shdr->sh_link == new_sh_link)
 	  adjust_relocs (scn, scn, shdr, map, map_size, symshdr);
       }
 }
@@ -697,7 +706,7 @@ add_new_section_symbols (Elf_Scn *old_symscn, size_t old_shnum,
     }
 
   /* Adjust any relocations referring to the old symbol table.  */
-  adjust_all_relocs (elf, symscn, shdr, symndx_map, nsym - 1);
+  adjust_all_relocs (elf, symscn, shdr, symndx_map, nsym - 1, NULL);
 
   return symdata;
 }
@@ -874,6 +883,7 @@ collect_symbols (Elf *outelf, bool rel, Elf_Scn *symscn, Elf_Scn *strscn,
       s->shndx = shndx;
       s->info.info = sym->st_info;
       s->info.other = sym->st_other;
+      s->duplicate = NULL;
 
       if (scnmap != NULL && shndx != SHN_UNDEF && shndx < SHN_LORESERVE)
 	s->shndx = scnmap[shndx - 1];
@@ -903,8 +913,7 @@ collect_symbols (Elf *outelf, bool rel, Elf_Scn *symscn, Elf_Scn *strscn,
   if (s1->value > s2->value)						      \
     return 1
 
-/* Compare symbols with a consistent ordering,
-   but one only meaningful for equality.  */
+/* Symbol comparison used to sort symbols in preparation for deduplication.  */
 static int
 compare_symbols (const void *a, const void *b)
 {
@@ -915,6 +924,38 @@ compare_symbols (const void *a, const void *b)
   CMP (size);
   CMP (shndx);
 
+  int res = s1->compare - s2->compare;
+  if (res != 0)
+    return res;
+
+  res = strcmp (s1->name, s2->name);
+  if (res != 0)
+    return res;
+
+  /* Duplicates still have distinct positions in the symbol index map.
+     Compare map positions to ensure that duplicate symbols are ordered
+     consistently even if the sort function is unstable.  */
+  CMP (map);
+  error_exit (0, _("found two identical index map positions."));
+}
+
+/* Symbol comparison used to deduplicate symbols found in both the stripped
+   and unstripped input files.
+
+   Similar to compare_symbols, but does not differentiate symbols based
+   on their position in the symbol index map.  Duplicates can't be found
+   by comparing index map postions because they always have distinct
+   positions in the map.  */
+static int
+compare_symbols_duplicate (const void *a, const void *b)
+{
+  const struct symbol *s1 = a;
+  const struct symbol *s2 = b;
+
+  CMP (value);
+  CMP (size);
+  CMP (shndx);
+
   return (s1->compare - s2->compare) ?: strcmp (s1->name, s2->name);
 }
 
@@ -946,13 +987,13 @@ compare_symbols_output (const void *a, const void *b)
 	  /* binutils always puts section symbols in section index order.  */
 	  CMP (shndx);
 	  else if (s1 != s2)
-	    error_exit (0, "section symbols in unexpected order");
+	    error_exit (0, _("section symbols in unexpected order"));
 	}
 
       /* Nothing really matters, so preserve the original order.  */
       CMP (map);
       else if (s1 != s2)
-	error_exit (0, "found two identical symbols");
+	error_exit (0, _("found two identical symbols"));
     }
 
   return cmp;
@@ -1855,7 +1896,8 @@ more sections in stripped file than debug file -- arguments reversed?"));
 	    }
 
 	  struct symbol *n = s;
-	  while (n + 1 < &symbols[total_syms] && !compare_symbols (s, n + 1))
+	  while (n + 1 < &symbols[total_syms]
+		 && !compare_symbols_duplicate (s, n + 1))
 	    ++n;
 
 	  while (s < n)
@@ -1992,6 +2034,11 @@ more sections in stripped file than debug file -- arguments reversed?"));
       elf_flagdata (symdata, ELF_C_SET, ELF_F_DIRTY);
       update_shdr (unstripped_symtab, shdr);
 
+      /* Track which sections are adjusted during the first round
+	 of calls to adjust_relocs.  */
+      bool scn_adjusted[unstripped_shnum];
+      memset (scn_adjusted, 0, sizeof scn_adjusted);
+
       if (stripped_symtab != NULL)
 	{
 	  /* Adjust any relocations referring to the old symbol table.  */
@@ -2000,14 +2047,18 @@ more sections in stripped file than debug file -- arguments reversed?"));
 	       sec < &sections[stripped_shnum - 1];
 	       ++sec)
 	    if (sec->outscn != NULL && sec->shdr.sh_link == old_sh_link)
-	      adjust_relocs (sec->outscn, sec->scn, &sec->shdr,
-			     symndx_map, total_syms, shdr);
+	      {
+		adjust_relocs (sec->outscn, sec->scn, &sec->shdr,
+			       symndx_map, total_syms, shdr);
+		scn_adjusted[elf_ndxscn (sec->outscn)] = true;
+	      }
 	}
 
       /* Also adjust references to the other old symbol table.  */
       adjust_all_relocs (unstripped, unstripped_symtab, shdr,
 			 &symndx_map[stripped_nsym - 1],
-			 total_syms - (stripped_nsym - 1));
+			 total_syms - (stripped_nsym - 1),
+			 scn_adjusted);
 
       free (symbols);
       free (symndx_map);
diff --git a/tests/.gitignore b/tests/.gitignore
index 5bebb2c4..d00a883e 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -63,6 +63,7 @@
 /elfshphehdr
 /elfstrmerge
 /elfstrtab
+/elf-print-reloc-syms
 /emptyfile
 /fillfile
 /find-prologues
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 2373c980..13bd9d56 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -62,7 +62,7 @@ check_PROGRAMS = arextract arsymtest newfile saridx scnnames sectiondump \
 		  dwelf_elf_e_machine_string \
 		  getphdrnum leb128 read_unaligned \
 		  msg_tst system-elf-libelf-test system-elf-gelf-test \
-		  nvidia_extended_linemap_libdw \
+		  nvidia_extended_linemap_libdw elf-print-reloc-syms \
 		  $(asm_TESTS)
 
 asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \
@@ -810,6 +810,7 @@ getphdrnum_LDADD = $(libelf) $(libdw)
 leb128_LDADD = $(libelf) $(libdw)
 read_unaligned_LDADD = $(libelf) $(libdw)
 nvidia_extended_linemap_libdw_LDADD = $(libelf) $(libdw)
+elf_print_reloc_syms_LDADD = $(libelf)
 
 # We want to test the libelf headers against the system elf.h header.
 # Don't include any -I CPPFLAGS. Except when we install our own elf.h.
diff --git a/tests/elf-print-reloc-syms.c b/tests/elf-print-reloc-syms.c
new file mode 100644
index 00000000..d6b7867d
--- /dev/null
+++ b/tests/elf-print-reloc-syms.c
@@ -0,0 +1,144 @@
+/* Copyright (C) 2024 Red Hat, Inc.
+   This file is part of elfutils.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+
+static void
+print_reloc_symnames (Elf *elf, Elf_Scn *scn, GElf_Shdr *shdr, size_t sh_entsize)
+{
+  int nentries = shdr->sh_size / sh_entsize;
+
+  /* Get the data of the section.  */
+  Elf_Data *data = elf_getdata (scn, NULL);
+  assert (data != NULL);
+
+  /* Get the symbol table information.  */
+  Elf_Scn *symscn = elf_getscn (elf, shdr->sh_link);
+  GElf_Shdr symshdr_mem;
+  GElf_Shdr *symshdr = gelf_getshdr (symscn, &symshdr_mem);
+  Elf_Data *symdata = elf_getdata (symscn, NULL);
+  assert (symshdr != NULL);
+  assert (symdata != NULL);
+
+  /* Search for the optional extended section index table.  */
+  Elf_Data *xndxdata = NULL;
+  int xndxscnidx = elf_scnshndx (scn);
+  if (xndxscnidx)
+    xndxdata = elf_getdata (elf_getscn (elf, xndxscnidx), NULL);
+
+  /* Get the section header string table index.  */
+  size_t shstrndx;
+  assert (elf_getshdrstrndx (elf, &shstrndx) >= 0);
+
+  printf("Section: %s\n", elf_strptr (elf, shstrndx, shdr->sh_name));
+  for (int cnt = 0; cnt < nentries; ++cnt)
+    {
+      GElf_Rel relmem;
+      GElf_Rel *rel = gelf_getrel (data, cnt, &relmem);
+
+
+      if (likely (rel != NULL))
+        {
+          GElf_Sym symmem;
+          Elf32_Word xndx;
+          GElf_Sym *sym = gelf_getsymshndx (symdata, xndxdata,
+                                            GELF_R_SYM (rel->r_info),
+                                            &symmem, &xndx);
+
+	  if (sym == NULL)
+	    {
+	      printf ("<SYM NOT FOUND>\n");
+	      continue;
+	    }
+
+          if (GELF_ST_TYPE (sym->st_info) != STT_SECTION)
+            printf ("%s\n", elf_strptr (elf, symshdr->sh_link, sym->st_name));
+          else
+            {
+              /* This is a relocation against a STT_SECTION symbol.  */
+              GElf_Shdr secshdr_mem;
+              GElf_Shdr *secshdr;
+              secshdr = gelf_getshdr (elf_getscn (elf,
+                                                  sym->st_shndx == SHN_XINDEX
+                                                  ? xndx : sym->st_shndx),
+                                      &secshdr_mem);
+
+	      if (secshdr == NULL)
+		printf("<SECTION NOT FOUND>\n");
+              else
+                printf ("%s\n",
+			elf_strptr (elf, shstrndx, secshdr->sh_name));
+            }
+        }
+    }
+}
+
+int
+main (int argc, char *argv[])
+{
+  if (argc != 2)
+    {
+      printf ("Usage: elf_print_reloc_syms FILE\n");
+      return -1;
+    }
+
+  elf_version (EV_CURRENT);
+
+  int fd = open(argv[1], O_RDONLY);
+  assert (fd != -1);
+
+  Elf *elf = elf_begin (fd, ELF_C_READ, NULL);
+  assert (elf != NULL);
+
+  size_t shnums;
+  assert (elf_getshdrnum (elf, &shnums) >= 0);
+
+  Elf_Scn *scn = NULL;
+  while ((scn = elf_nextscn (elf, scn)) != NULL)
+    {
+      GElf_Shdr shdr_mem;
+      GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
+
+      if (shdr != NULL)
+	{
+	  /* Print the names of symbols referred to by relocations.  */
+	  if (shdr->sh_type == SHT_REL)
+	    {
+	      size_t sh_entsize = gelf_fsize (elf, ELF_T_REL, 1, EV_CURRENT);
+	      print_reloc_symnames (elf, scn, shdr, sh_entsize);
+	    }
+	  else if (shdr->sh_type == SHT_RELA)
+	    {
+	      size_t sh_entsize = gelf_fsize (elf, ELF_T_RELA, 1, EV_CURRENT);
+	      print_reloc_symnames (elf, scn, shdr, sh_entsize);
+	    }
+	}
+    }
+}
diff --git a/tests/run-unstrip-test.sh b/tests/run-unstrip-test.sh
index dc7d3a42..03373b3f 100755
--- a/tests/run-unstrip-test.sh
+++ b/tests/run-unstrip-test.sh
@@ -33,6 +33,14 @@ testrun ${abs_top_builddir}/src/unstrip -o testfile.unstrip $stripped $debugfile
 
 testrun ${abs_top_builddir}/src/elfcmp --hash-inexact $original testfile.unstrip
 
+tempfiles syms-orig syms-testfile
+
+# Check whether relocated symbols changed.
+${abs_top_builddir}/tests/elf-print-reloc-syms $original > syms-orig
+${abs_top_builddir}/tests/elf-print-reloc-syms testfile.unstrip > syms-testfile
+
+testrun diff syms-orig syms-testfile
+
 # Also test modifying the file in place.
 
 rm -f testfile.inplace
-- 
2.43.0


             reply	other threads:[~2024-02-05 23:11 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-05 23:11 Aaron Merey [this message]
2024-02-06 15:36 ` Mark Wielaard
2024-02-06 20:20   ` Aaron Merey

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240205231104.125617-1-amerey@redhat.com \
    --to=amerey@redhat.com \
    --cc=elfutils-devel@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).