public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc/fw/glibcelf] Use custom linker script by default (bug 25812)
@ 2022-04-08 18:09 Florian Weimer
  0 siblings, 0 replies; only message in thread
From: Florian Weimer @ 2022-04-08 18:09 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=73460ea779f69b559f5abccd738c5d905cb44746

commit 73460ea779f69b559f5abccd738c5d905cb44746
Author: Florian Weimer <fweimer@redhat.com>
Date:   Fri Apr 8 20:09:16 2022 +0200

    Use custom linker script by default (bug 25812)

Diff:
---
 INSTALL                    |   6 ++
 configure                  |  65 +----------
 configure.ac               |  55 +---------
 elf/Makefile               |  37 +++++++
 elf/check-relro-symbols.py | 124 +++++++++++++++++++++
 manual/install.texi        |   6 ++
 scripts/glibcelf.py        | 263 ++++++++++++++++++++++++++++++++++++++++++---
 7 files changed, 426 insertions(+), 130 deletions(-)

diff --git a/INSTALL b/INSTALL
index 63c022d6b9..4a10f06d3d 100644
--- a/INSTALL
+++ b/INSTALL
@@ -90,6 +90,12 @@ if 'CFLAGS' is specified it must enable optimization.  For example:
      library will still be usable, but functionality may be lost--for
      example, you can't build a shared libc with old binutils.
 
+'--with-default-link=FLAG'
+     '--with-default-link=yes' does not use a custom linker scipt for
+     linking the individual parts of the GNU C Library.  The default for
+     FLAG is 'no' because the custom linker script is needed for full
+     RELRO protection.
+
 '--with-nonshared-cflags=CFLAGS'
      Use additional compiler flags CFLAGS to build the parts of the
      library which are always statically linked into applications and
diff --git a/configure b/configure
index d2f413d05d..650bfd982c 100755
--- a/configure
+++ b/configure
@@ -3375,7 +3375,7 @@ fi
 if test "${with_default_link+set}" = set; then :
   withval=$with_default_link; use_default_link=$withval
 else
-  use_default_link=default
+  use_default_link=no
 fi
 
 
@@ -6184,69 +6184,6 @@ fi
 $as_echo "$libc_cv_hashstyle" >&6; }
 
 
-# The linker's default -shared behavior is good enough if it
-# does these things that our custom linker scripts ensure that
-# all allocated NOTE sections come first.
-if test "$use_default_link" = default; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sufficient default -shared layout" >&5
-$as_echo_n "checking for sufficient default -shared layout... " >&6; }
-if ${libc_cv_use_default_link+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-    libc_cv_use_default_link=no
-  cat > conftest.s <<\EOF
-	  .section .note.a,"a",%note
-	  .balign 4
-	  .long 4,4,9
-	  .string "GNU"
-	  .string "foo"
-	  .section .note.b,"a",%note
-	  .balign 4
-	  .long 4,4,9
-	  .string "GNU"
-	  .string "bar"
-EOF
-  if { ac_try='  ${CC-cc} $ASFLAGS -shared -o conftest.so conftest.s 1>&5'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; } &&
-       ac_try=`$READELF -S conftest.so | sed -n \
-	 '${x;p;}
-	  s/^ *\[ *[1-9][0-9]*\]  *\([^ ][^ ]*\)  *\([^ ][^ ]*\) .*$/\2 \1/
-	  t a
-	  b
-	  : a
-	  H'`
-  then
-    libc_seen_a=no libc_seen_b=no
-    set -- $ac_try
-    while test $# -ge 2 -a "$1" = NOTE; do
-      case "$2" in
-      .note.a) libc_seen_a=yes ;;
-      .note.b) libc_seen_b=yes ;;
-      esac
-      shift 2
-    done
-    case "$libc_seen_a$libc_seen_b" in
-    yesyes)
-      libc_cv_use_default_link=yes
-      ;;
-    *)
-      echo >&5 "\
-$libc_seen_a$libc_seen_b from:
-$ac_try"
-      ;;
-    esac
-  fi
-  rm -f conftest*
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_use_default_link" >&5
-$as_echo "$libc_cv_use_default_link" >&6; }
-  use_default_link=$libc_cv_use_default_link
-fi
-
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GLOB_DAT reloc" >&5
 $as_echo_n "checking for GLOB_DAT reloc... " >&6; }
 if ${libc_cv_has_glob_dat+:} false; then :
diff --git a/configure.ac b/configure.ac
index b6a747dece..605efd549d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -153,7 +153,7 @@ AC_ARG_WITH([default-link],
 	    AS_HELP_STRING([--with-default-link],
 			   [do not use explicit linker scripts]),
 	    [use_default_link=$withval],
-	    [use_default_link=default])
+	    [use_default_link=no])
 
 dnl Additional build flags injection.
 AC_ARG_WITH([nonshared-cflags],
@@ -1371,59 +1371,6 @@ fi
 rm -f conftest*])
 AC_SUBST(libc_cv_hashstyle)
 
-# The linker's default -shared behavior is good enough if it
-# does these things that our custom linker scripts ensure that
-# all allocated NOTE sections come first.
-if test "$use_default_link" = default; then
-  AC_CACHE_CHECK([for sufficient default -shared layout],
-		  libc_cv_use_default_link, [dnl
-  libc_cv_use_default_link=no
-  cat > conftest.s <<\EOF
-	  .section .note.a,"a",%note
-	  .balign 4
-	  .long 4,4,9
-	  .string "GNU"
-	  .string "foo"
-	  .section .note.b,"a",%note
-	  .balign 4
-	  .long 4,4,9
-	  .string "GNU"
-	  .string "bar"
-EOF
-  if AC_TRY_COMMAND([dnl
-  ${CC-cc} $ASFLAGS -shared -o conftest.so conftest.s 1>&AS_MESSAGE_LOG_FD]) &&
-       ac_try=`$READELF -S conftest.so | sed -n \
-	 ['${x;p;}
-	  s/^ *\[ *[1-9][0-9]*\]  *\([^ ][^ ]*\)  *\([^ ][^ ]*\) .*$/\2 \1/
-	  t a
-	  b
-	  : a
-	  H']`
-  then
-    libc_seen_a=no libc_seen_b=no
-    set -- $ac_try
-    while test $# -ge 2 -a "$1" = NOTE; do
-      case "$2" in
-      .note.a) libc_seen_a=yes ;;
-      .note.b) libc_seen_b=yes ;;
-      esac
-      shift 2
-    done
-    case "$libc_seen_a$libc_seen_b" in
-    yesyes)
-      libc_cv_use_default_link=yes
-      ;;
-    *)
-      echo >&AS_MESSAGE_LOG_FD "\
-$libc_seen_a$libc_seen_b from:
-$ac_try"
-      ;;
-    esac
-  fi
-  rm -f conftest*])
-  use_default_link=$libc_cv_use_default_link
-fi
-
 AC_CACHE_CHECK(for GLOB_DAT reloc,
 	       libc_cv_has_glob_dat, [dnl
 cat > conftest.c <<EOF
diff --git a/elf/Makefile b/elf/Makefile
index c96924e9c2..a8ba22ff11 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -543,6 +543,43 @@ endif
 endif
 endif
 
+tests-special += $(objpfx)tst-relro-ldso.out $(objpfx)tst-relro-libc.out
+$(objpfx)tst-relro-ldso.out: check-relro-symbols.py $(..)/scripts/glibcelf.py \
+  $(objpfx)ld.so
+	$(PYTHON) check-relro-symbols.py $(objpfx)ld.so \
+	  _rtld_global_ro \
+	  > $@ 2>&1; $(evaluate-test)
+ifdef have-GLIBC_2.0
+# Symbols for the old, pre-2.1 vtables.
+tst-relro-libc-old-jumps = \
+  _IO_old_cookie_jumps \
+  _IO_old_file_jumps \
+  _IO_old_proc_jumps \
+  # tst-relro-libc-old-jumps
+endif
+$(objpfx)tst-relro-libc.out: check-relro-symbols.py $(..)/scripts/glibcelf.py \
+  $(common-objpfx)libc.so
+	$(PYTHON) check-relro-symbols.py $(common-objpfx)libc.so \
+	    _IO_cookie_jumps \
+	    _IO_file_jumps \
+	    _IO_file_jumps_maybe_mmap \
+	    _IO_file_jumps_mmap \
+	    _IO_helper_jumps \
+	    _IO_mem_jumps \
+	    _IO_obstack_jumps \
+	    _IO_proc_jumps \
+	    _IO_str_chk_jumps \
+	    _IO_str_jumps \
+	    _IO_strn_jumps \
+	    _IO_wfile_jumps \
+	    _IO_wfile_jumps_maybe_mmap \
+	    _IO_wfile_jumps_mmap \
+	    _IO_wmem_jumps \
+	    _IO_wstr_jumps \
+	    _IO_wstrn_jumps \
+	    $(tst-relro-libc-old-jumps) \
+	  > $@ 2>&1; $(evaluate-test)
+
 ifeq ($(run-built-tests),yes)
 tests-special += $(objpfx)tst-valgrind-smoke.out
 endif
diff --git a/elf/check-relro-symbols.py b/elf/check-relro-symbols.py
new file mode 100644
index 0000000000..bbcbe52b74
--- /dev/null
+++ b/elf/check-relro-symbols.py
@@ -0,0 +1,124 @@
+#!/usr/bin/python3
+# Verify that certain symbols are covered by RELRO.
+# Copyright (C) 2022 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+"""Analyze a (shared) object to verify that certain symbols are
+present and covered by the PT_GNU_RELRO segment.
+
+"""
+
+import argparse
+import os.path
+import sys
+
+# Make available glibc Python modules.
+sys.path.append(os.path.join(
+    os.path.dirname(os.path.realpath(__file__)), os.path.pardir, 'scripts'))
+
+import glibcelf
+
+def find_relro(path: str, img: glibcelf.ElfImage) -> (int, int):
+    """Discover the address range of the PT_GNU_RELRO segment."""
+
+    for phdr in img.phdrs():
+        if phdr.p_type == glibcelf.ElfPt.PT_GNU_RELRO:
+            return phdr.p_vaddr, phdr.p_vaddr + phdr.p_memsz
+    sys.stdout.write('{}: error: no PT_GNU_RELRO segment\n'.format(path))
+
+def get_parser():
+    """Return an argument parser for this script."""
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('object', help='path to object file to check')
+    parser.add_argument('symbols', help='name of the symbol to move',
+                        nargs='+')
+    return parser
+
+def main(argv):
+    """The main entry point."""
+    parser = get_parser()
+    opts = parser.parse_args(argv)
+    with open(opts.object, 'rb') as inp:
+        img = glibcelf.ElfImage(memoryview(inp.read()))
+
+    check_symbols = frozenset([sym.encode('UTF-8') for sym in opts.symbols])
+
+    # Tracks the symbols in check_symbols that have been found.
+    symbols_found = set()
+
+    # Discover the extent of the RELRO segment.
+    relro_begin, relro_end = find_relro(opts.object, img)
+    symbol_table_found = False
+
+    errors = False
+    def error(msg: str) -> None:
+        """Record an error condition and write a message to standard output."""
+        nonlocal errors
+        errors = True
+        sys.stdout.write('{}: error: {}\n'.format(opts.object, msg))
+
+    # Iterate over section headers to find the symbol table.
+    for shdr in img.shdrs():
+        if shdr.sh_type == glibcelf.ElfSht.SHT_SYMTAB:
+            symbol_table_found = True
+            for sym in img.syms(shdr):
+                symbol_name = img.symbol_name(shdr, sym)
+                if symbol_name in check_symbols:
+                    symbols_found.add(symbol_name)
+
+                    # Validate symbol type, section, and size.
+                    if sym.st_info.type != glibcelf.ElfStt.STT_OBJECT:
+                        error('symbol {!r} has wrong type {}'.format(
+                            symbol_name.decode('UTF-8'), sym.st_info.type))
+                    if sym.st_shndx in glibcelf.ElfShn:
+                        error('symbol {!r} has reserved section {}'.format(
+                            symbol_name.decode('UTF-8'), sym.st_shndx))
+                        continue
+                    if sym.st_size == 0:
+                        error('symbol {!r} has size zero'.format(
+                            symbol_name.decode('UTF-8')))
+                        continue
+
+                    # Compute the extent of the symbol
+                    symbol_start = sym.st_value
+                    symbol_end = symbol_start + sym.st_size - 1
+
+                    # Check if the symbol completely falls within the
+                    # RELRO segment.
+                    if not (relro_begin <= symbol_start < symbol_end
+                            < relro_end):
+                        error(
+                            'symbol {!r} of size {} at 0x{:x} is not in RELRO range [0x{:x}, 0x{:x})'.format(
+                                symbol_name.decode('UTF-8'),
+                                sym.st_size, sym.st_value,
+                                relro_begin, relro_end))
+
+    if symbols_found != check_symbols:
+        for sym in sorted(check_symbols - symbols_found):
+            error('symbol {!r} not found'.format(sym.decode('UTF-8')))
+
+    if errors:
+        sys.exit(1)
+
+    if not symbol_table_found:
+        sys.stdout.write(
+            '{}: warning: no symbol table found (stripped object)\n'.format(
+                opts.object))
+        sys.exit(77)
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
diff --git a/manual/install.texi b/manual/install.texi
index 29c52f2927..a77ce09fa8 100644
--- a/manual/install.texi
+++ b/manual/install.texi
@@ -117,6 +117,12 @@ problem and suppress these constructs, so that the library will still be
 usable, but functionality may be lost---for example, you can't build a
 shared libc with old binutils.
 
+@item --with-default-link=@var{FLAG}
+@code{--with-default-link=yes} does not use a custom linker scipt for
+linking the individual parts of @theglibc{}.  The default for @var{FLAG}
+is @samp{no} because the custom linker script is needed for full RELRO
+protection.
+
 @item --with-nonshared-cflags=@var{cflags}
 Use additional compiler flags @var{cflags} to build the parts of the
 library which are always statically linked into applications and
diff --git a/scripts/glibcelf.py b/scripts/glibcelf.py
index 1bb94b51a9..3fd180bfe9 100644
--- a/scripts/glibcelf.py
+++ b/scripts/glibcelf.py
@@ -324,6 +324,87 @@ class ElfStt(OpenIntEnum):
     STT_TLS = 6
     STT_GNU_IFUNC = 10
 
+class ElfPt(OpenIntEnum):
+    "ELF program header types.  Type of ElfPhdr.p_type."
+    PT_NULL = 0
+    PT_LOAD = 1
+    PT_DYNAMIC = 2
+    PT_INTERP = 3
+    PT_NOTE = 4
+    PT_SHLIB = 5
+    PT_PHDR = 6
+    PT_TLS = 7
+    PT_NUM = 8
+    PT_GNU_EH_FRAME = 0x6474e550
+    PT_GNU_STACK = 0x6474e551
+    PT_GNU_RELRO = 0x6474e552
+    PT_GNU_PROPERTY = 0x6474e553
+    PT_SUNWBSS = 0x6ffffffa
+    PT_SUNWSTACK = 0x6ffffffb
+
+class ElfDt(OpenIntEnum):
+    "ELF dynamic segment tags.  Type of ElfDyn.d_val."
+    DT_NULL = 0
+    DT_NEEDED = 1
+    DT_PLTRELSZ = 2
+    DT_PLTGOT = 3
+    DT_HASH = 4
+    DT_STRTAB = 5
+    DT_SYMTAB = 6
+    DT_RELA = 7
+    DT_RELASZ = 8
+    DT_RELAENT = 9
+    DT_STRSZ = 10
+    DT_SYMENT = 11
+    DT_INIT = 12
+    DT_FINI = 13
+    DT_SONAME = 14
+    DT_RPATH = 15
+    DT_SYMBOLIC = 16
+    DT_REL = 17
+    DT_RELSZ = 18
+    DT_RELENT = 19
+    DT_PLTREL = 20
+    DT_DEBUG = 21
+    DT_TEXTREL = 22
+    DT_JMPREL = 23
+    DT_RUNPATH = 29
+    DT_FLAGS = 30
+    DT_ENCODING = 32
+    DT_PREINIT_ARRAY = 32
+    DT_PREINIT_ARRAYSZ = 33
+    DT_SYMTAB_SHNDX = 34
+    DT_GNU_PRELINKED = 0x6ffffdf5
+    DT_GNU_CONFLICTSZ = 0x6ffffdf6
+    DT_GNU_LIBLISTSZ = 0x6ffffdf7
+    DT_CHECKSUM = 0x6ffffdf8
+    DT_PLTPADSZ = 0x6ffffdf9
+    DT_MOVEENT = 0x6ffffdfa
+    DT_MOVESZ = 0x6ffffdfb
+    DT_FEATURE_1 = 0x6ffffdfc
+    DT_POSFLAG_1 = 0x6ffffdfd
+    DT_SYMINSZ = 0x6ffffdfe
+    DT_SYMINENT = 0x6ffffdff
+    DT_GNU_HASH = 0x6ffffef5
+    DT_TLSDESC_PLT = 0x6ffffef6
+    DT_TLSDESC_GOT = 0x6ffffef7
+    DT_GNU_CONFLICT = 0x6ffffef8
+    DT_GNU_LIBLIST = 0x6ffffef9
+    DT_CONFIG = 0x6ffffefa
+    DT_DEPAUDIT = 0x6ffffefb
+    DT_AUDIT = 0x6ffffefc
+    DT_SYMINFO = 0x6ffffeff
+    DT_VERSYM = 0x6ffffff0
+    DT_RELACOUNT = 0x6ffffff9
+    DT_RELCOUNT = 0x6ffffffa
+    DT_FLAGS_1 = 0x6ffffffb
+    DT_VERDEF = 0x6ffffffc
+    DT_VERDEFNUM = 0x6ffffffd
+    DT_VERNEED = 0x6ffffffe
+    DT_VERNEEDNUM = 0x6fffffff
+    DT_AUXILIARY = 0x7ffffffd
+    DT_FILTER = 0x7fffffff
+
 class ElfStInfo:
     "ELF symbol binding and type.  Type of the ElfSym.st_info field."
     def __init__(self, arg0, arg1=None):
@@ -433,10 +514,8 @@ class {classname}({baseclass}):
                 code += '{}return {}({})\n'.format(
                     indent, baseclass.__name__, field_names)
 
-            print(code)
             exec(code, env)
             cls = env[classname]
-            print(cls)
             cls.size = struct.calcsize(layout)
             classes[(elfclass, elfdata)] = cls
     baseclass.variants = classes
@@ -478,7 +557,7 @@ _define_variants(ElfPhdr,
                  fields32=('p_type', 'p_offset', 'p_vaddr', 'p_paddr',
                            'p_filesz', 'p_memsz', 'p_flags', 'p_align'),
                  layout64='2I6Q',
-                 types=dict(p_flags=ElfPf))
+                 types=dict(p_type=ElfPt, p_flags=ElfPf))
 
 
 # Corresponds to Elf32_Shdr and Elf64_Shdr.
@@ -488,14 +567,25 @@ ElfShdr = collections.namedtuple('ElfShdr',
 _define_variants(ElfShdr,
                  layout32='10I',
                  layout64='2I4Q2I2Q',
-                 types=dict(sh_flags=ElfShf))
+                 types=dict(sh_type=ElfSht,
+                            sh_flags=ElfShf,
+                            sh_link=ElfShn))
+
+# Corresponds to Elf32_Dyn and Elf64_Dyn.  The nesting through the
+# d_un union is skipped, and d_ptr is missing (its representation in
+# Python would be identical to d_val).
+ElfDyn = collections.namedtuple('ElfDyn', 'd_tag d_val')
+_define_variants(ElfDyn,
+                 layout32='2i',
+                 layout64='2q',
+                 types=dict(d_tag=ElfDt))
 
 # Corresponds to Elf32_Sym and Elf64_Sym.
 ElfSym = collections.namedtuple('ElfSym',
     'st_name st_info st_other st_shndx st_value st_size')
 _define_variants(ElfSym,
                  layout32='3I2BH',
-                 layout64='Q2BH2Q',
+                 layout64='I2BH2Q',
                  fields32=('st_name', 'st_value', 'st_size', 'st_info',
                            'st_other', 'st_shndx'),
                  types=dict(st_shndx=ElfShn,
@@ -513,6 +603,24 @@ _define_variants(ElfRela,
                  layout32='3I',
                  layout64='3Q')
 
+class ElfStringTable:
+    "ELF string table."
+    def __init__(self, blob):
+        """Create a new string table backed by the data in the blob.
+
+        blob: a memoryview-like object
+
+        """
+        self.blob = blob
+
+    def get(self, index) -> bytes:
+        blob = self.blob
+        endindex = index
+        while True:
+            if blob[endindex] == 0:
+                return bytes(blob[index:endindex])
+            endindex += 1
+
 class ElfImage:
     "ELF image parser."
     def __init__(self, image):
@@ -526,22 +634,153 @@ class ElfImage:
         ident = self.read(ElfIdent, 0)
         classdata = (ident.ei_class, ident.ei_data)
         # Set self.Ehdr etc. to the subtypes with the right parsers.
-        for typ in (ElfEhdr, ElfPhdr, ElfShdr, ElfSym, ElfRel, ElfRela):
+        for typ in (ElfEhdr, ElfPhdr, ElfShdr, ElfDyn, ElfSym, ElfRel,
+                    ElfRela):
             setattr(self, typ.__name__[3:], typ.variants.get(classdata, None))
 
         if self.Ehdr is not None:
             self.ehdr = self.read(self.Ehdr, 0)
+            self._shdr_num = self._compute_shdr_num()
         else:
             self.ehdr = None
+            self._shdr_num = 0
+
+        self._section = {}
+        self._stringtab = {}
+
+    def _compute_shdr_num(self) -> int:
+        shnum = self.ehdr.e_shnum
+        if shnum == 0:
+            if self.ehdr.e_shoff == 0 or self.ehdr.e_shentsize == 0:
+                # No section headers.
+                return 0
+            # Otherwise the extension mechanism is used (which may be
+            # needed because e_shnum is just 16 bits).
+            return self.read(self.Shdr, self.ehdr.e_shoff).sh_size
+        return shnum
 
     def read(self, typ, offset):
         return typ.unpack(self.image[offset: offset + typ.size])
 
+    def phdrs(self) -> ElfPhdr:
+        "Generator iterating over the program headers."
+        if self.ehdr is None:
+            return
+        size = self.ehdr.e_phentsize
+        if size != self.Phdr.size:
+            raise ValueError('Unexpected Phdr size in ELF header: {} != {}'
+                             .format(size, self.ElfPhdr.size))
+
+        offset = self.ehdr.e_phoff
+        for _ in range(self.ehdr.e_phnum):
+            yield self.read(self.Phdr, offset)
+            offset += size
+
+    def shdrs(self) -> ElfShdr:
+        if self._shdr_num == 0:
+            return
+
+        size = self.ehdr.e_shentsize
+        if size != self.Shdr.size:
+            raise ValueError('Unexpected Shdr size in ELF header: {} != {}'
+                             .format(size, self.ElfShdr.size))
+
+        offset = self.ehdr.e_shoff
+        for _ in range(self._shdr_num):
+            yield self.read(self.Shdr, offset)
+            offset += size
+
+    def dynamic(self) -> ElfDyn:
+        "Generator iterating over the dynamic segment."
+        for phdr in self.phdrs():
+            if phdr.p_type == ElfPt.PT_DYNAMIC:
+                # Pick the first dynamic segment, like the loader.
+                if phdr.p_filesz == 0:
+                    # Probably separated debuginfo.
+                    return
+                offset = phdr.p_offset
+                end = offset + phdr.p_memsz
+                size = self.Dyn.size
+                while True:
+                    next_offset = offset + size
+                    if next_offset > end:
+                        raise ValueError(
+                            'Dynamic segment size {} is not a multiple of ElfDyn size {}'.format(
+                                phdr.p_memsz, size))
+                    yield self.read(self.Dyn, offset)
+                    if next_offset == end:
+                        return
+                    offset = next_offset
+
+    def syms(self, shdr: ElfShdr) -> ElfSym:
+        "A generator iterating over a symbol table."
+        assert shdr.sh_type == ElfSht.SHT_SYMTAB
+        size = shdr.sh_entsize
+        if size != self.Sym.size:
+            raise ValueError('Invalid symbol table entry size {}'.format(size))
+        offset = shdr.sh_offset
+        end = shdr.sh_offset + shdr.sh_size
+        while offset < end:
+            yield self.read(self.Sym, offset)
+            offset += size
+        if offset != end:
+            raise ValueError('Symbol table is not a multiple of entry size')
+
+    def lookup_string(self, strtab_index: int, strtab_offset: int) -> bytes:
+        try:
+            strtab = self._stringtab[strtab_index]
+        except KeyError:
+            strtab = self._find_stringtab(strtab_index)
+        return strtab.get(strtab_offset)
+
+    def symbol_name(self, section: ElfShdr, sym: ElfSym) -> bytes:
+        return self.lookup_string(section.sh_link, sym.st_name)
+
+    def symbol_address(self, sym: ElfSym) -> int:
+        if sym.st_shndx == ElfShn.SHN_ABS:
+            return sym.st_value
+        if sym.st_shndx in ElfShn:
+            raise ValueError('Symbol address in section {} unknown'.format(
+                sym.st_shndx))
+        try:
+            shdr = self._section[sym.st_shndx.value]
+        except KeyError:
+            shdr = self.find_section(sym.st_shndx)
+        return shdr.sh_addr + sym.st_value
+
+    def find_section(self, shndx: ElfShn) -> ElfShdr:
+        try:
+            return self._section[shndx]
+        except KeyError:
+            pass
+        if shndx in ElfShn:
+            raise ValueError('Reserved section index {}'.format(shndx))
+        idx = shndx.value
+        if idx < 0 or idx > self._shdr_num:
+            raise ValueError('Section index {} out of range [0, {})'.format(
+                idx, self._shdr_num))
+        shdr = self.read(
+            self.Shdr, self.ehdr.e_shoff + idx * self.Shdr.size)
+        self._section[shndx] = shdr
+        return shdr
+
+    def _find_stringtab(self, sh_link: int) -> ElfStringTable:
+        if sh_link < 0 or sh_link >= self._shdr_num:
+            raise ValueError('Section index {} out of range [0, {})'.format(
+                sh_link, self._shdr_num))
+        shdr = self.read(
+            self.Shdr, self.ehdr.e_shoff + sh_link * self.Shdr.size)
+        if shdr.sh_type != ElfSht.SHT_STRTAB:
+            raise ValueError(
+                'Section {} is not a string table: {}'.format(
+                    sh_link, shdr.sh_type))
+        strtab = ElfStringTable(
+            self.image[shdr.sh_offset:shdr.sh_offset + shdr.sh_size])
+        # This could retrain essentially arbitrary amounts of data,
+        # but caching string tables seems important for performance.
+        self._stringtab[sh_link] = strtab
+        return strtab
+
+
 # Only Elf names are exported.
 __all__ = [name for name in dir() if name.startswith('Elf')]
-
-with open('/usr/bin/ld.so', 'rb') as inp:
-    img = ElfImage(memoryview(inp.read()))
-print(img.ehdr)
-print(img.read(img.Shdr, img.ehdr.e_shoff))
-print(img.read(img.Shdr, img.ehdr.e_shoff + img.ehdr.e_shentsize))


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-04-08 18:09 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-08 18:09 [glibc/fw/glibcelf] Use custom linker script by default (bug 25812) Florian Weimer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).