public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc/fw/glibcelf] WIP ELF parser in Python
@ 2022-04-07 19:31 Florian Weimer
0 siblings, 0 replies; only message in thread
From: Florian Weimer @ 2022-04-07 19:31 UTC (permalink / raw)
To: glibc-cvs
https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=b3fafa0f13765ee3518cc6e19c73750220fcccf8
commit b3fafa0f13765ee3518cc6e19c73750220fcccf8
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Apr 7 21:30:53 2022 +0200
WIP ELF parser in Python
Diff:
---
scripts/glibcelf.py | 547 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 547 insertions(+)
diff --git a/scripts/glibcelf.py b/scripts/glibcelf.py
new file mode 100644
index 0000000000..1bb94b51a9
--- /dev/null
+++ b/scripts/glibcelf.py
@@ -0,0 +1,547 @@
+#!/usr/bin/python3
+# ELF support functionality for Python.
+# Copyright (C) 2022 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+import collections
+import enum
+import struct
+
+class OpenIntEnum(enum.IntEnum):
+ "Integer enumeration that supports arbitrary int values."
+ @classmethod
+ def _missing_(cls, value):
+ # See enum.IntFlag._create_pseudo_member_. This allows
+ # creating of enum constants with arbitrary integer values.
+ pseudo_member = int.__new__(cls, value)
+ pseudo_member._name_ = None
+ pseudo_member._value_ = value
+ return pseudo_member
+
+ def __repr__(self):
+ name = self._name_
+ if name is not None:
+ # The names have prefixes like SHT_, implying their type.
+ return name
+ return '{}({})'.format(self.__class__.__name__, self._value_)
+
+ def __str__(self):
+ name = self._name_
+ if name is not None:
+ return name
+ return str(self._value_)
+
+class ElfClass(OpenIntEnum):
+ "ELF word size. Type of EI_CLASS values."
+ ELFCLASSNONE = 0
+ ELFCLASS32 = 1
+ ELFCLASS64 = 2
+
+class ElfData(OpenIntEnum):
+ "ELF endianess. Type of EI_DATA values."
+ ELFDATANONE = 0
+ ELFDATA2LSB = 1
+ ELFDATA2MSB = 2
+
+class ElfMachine(OpenIntEnum):
+ "ELF machine type. Type of values in ElfEhdr.e_machine field."
+ EM_NONE = 0
+ EM_M32 = 1
+ EM_SPARC = 2
+ EM_386 = 3
+ EM_68K = 4
+ EM_88K = 5
+ EM_IAMCU = 6
+ EM_860 = 7
+ EM_MIPS = 8
+ EM_S370 = 9
+ EM_MIPS_RS3_LE = 10
+ EM_PARISC = 15
+ EM_VPP500 = 17
+ EM_SPARC32PLUS = 18
+ EM_960 = 19
+ EM_PPC = 20
+ EM_PPC64 = 21
+ EM_S390 = 22
+ EM_SPU = 23
+ EM_V800 = 36
+ EM_FR20 = 37
+ EM_RH32 = 38
+ EM_RCE = 39
+ EM_ARM = 40
+ EM_FAKE_ALPHA = 41
+ EM_SH = 42
+ EM_SPARCV9 = 43
+ EM_TRICORE = 44
+ EM_ARC = 45
+ EM_H8_300 = 46
+ EM_H8_300H = 47
+ EM_H8S = 48
+ EM_H8_500 = 49
+ EM_IA_64 = 50
+ EM_MIPS_X = 51
+ EM_COLDFIRE = 52
+ EM_68HC12 = 53
+ EM_MMA = 54
+ EM_PCP = 55
+ EM_NCPU = 56
+ EM_NDR1 = 57
+ EM_STARCORE = 58
+ EM_ME16 = 59
+ EM_ST100 = 60
+ EM_TINYJ = 61
+ EM_X86_64 = 62
+ EM_PDSP = 63
+ EM_PDP10 = 64
+ EM_PDP11 = 65
+ EM_FX66 = 66
+ EM_ST9PLUS = 67
+ EM_ST7 = 68
+ EM_68HC16 = 69
+ EM_68HC11 = 70
+ EM_68HC08 = 71
+ EM_68HC05 = 72
+ EM_SVX = 73
+ EM_ST19 = 74
+ EM_VAX = 75
+ EM_CRIS = 76
+ EM_JAVELIN = 77
+ EM_FIREPATH = 78
+ EM_ZSP = 79
+ EM_MMIX = 80
+ EM_HUANY = 81
+ EM_PRISM = 82
+ EM_AVR = 83
+ EM_FR30 = 84
+ EM_D10V = 85
+ EM_D30V = 86
+ EM_V850 = 87
+ EM_M32R = 88
+ EM_MN10300 = 89
+ EM_MN10200 = 90
+ EM_PJ = 91
+ EM_OPENRISC = 92
+ EM_ARC_COMPACT = 93
+ EM_XTENSA = 94
+ EM_VIDEOCORE = 95
+ EM_TMM_GPP = 96
+ EM_NS32K = 97
+ EM_TPC = 98
+ EM_SNP1K = 99
+ EM_ST200 = 100
+ EM_IP2K = 101
+ EM_MAX = 102
+ EM_CR = 103
+ EM_F2MC16 = 104
+ EM_MSP430 = 105
+ EM_BLACKFIN = 106
+ EM_SE_C33 = 107
+ EM_SEP = 108
+ EM_ARCA = 109
+ EM_UNICORE = 110
+ EM_EXCESS = 111
+ EM_DXP = 112
+ EM_ALTERA_NIOS2 = 113
+ EM_CRX = 114
+ EM_XGATE = 115
+ EM_C166 = 116
+ EM_M16C = 117
+ EM_DSPIC30F = 118
+ EM_CE = 119
+ EM_M32C = 120
+ EM_TSK3000 = 131
+ EM_RS08 = 132
+ EM_SHARC = 133
+ EM_ECOG2 = 134
+ EM_SCORE7 = 135
+ EM_DSP24 = 136
+ EM_VIDEOCORE3 = 137
+ EM_LATTICEMICO32 = 138
+ EM_SE_C17 = 139
+ EM_TI_C6000 = 140
+ EM_TI_C2000 = 141
+ EM_TI_C5500 = 142
+ EM_TI_ARP32 = 143
+ EM_TI_PRU = 144
+ EM_MMDSP_PLUS = 160
+ EM_CYPRESS_M8C = 161
+ EM_R32C = 162
+ EM_TRIMEDIA = 163
+ EM_QDSP6 = 164
+ EM_8051 = 165
+ EM_STXP7X = 166
+ EM_NDS32 = 167
+ EM_ECOG1X = 168
+ EM_MAXQ30 = 169
+ EM_XIMO16 = 170
+ EM_MANIK = 171
+ EM_CRAYNV2 = 172
+ EM_RX = 173
+ EM_METAG = 174
+ EM_MCST_ELBRUS = 175
+ EM_ECOG16 = 176
+ EM_CR16 = 177
+ EM_ETPU = 178
+ EM_SLE9X = 179
+ EM_L10M = 180
+ EM_K10M = 181
+ EM_AARCH64 = 183
+ EM_AVR32 = 185
+ EM_STM8 = 186
+ EM_TILE64 = 187
+ EM_TILEPRO = 188
+ EM_MICROBLAZE = 189
+ EM_CUDA = 190
+ EM_TILEGX = 191
+ EM_CLOUDSHIELD = 192
+ EM_COREA_1ST = 193
+ EM_COREA_2ND = 194
+ EM_ARCV2 = 195
+ EM_OPEN8 = 196
+ EM_RL78 = 197
+ EM_VIDEOCORE5 = 198
+ EM_78KOR = 199
+ EM_56800EX = 200
+ EM_BA1 = 201
+ EM_BA2 = 202
+ EM_XCORE = 203
+ EM_MCHP_PIC = 204
+ EM_INTELGT = 205
+ EM_KM32 = 210
+ EM_KMX32 = 211
+ EM_EMX16 = 212
+ EM_EMX8 = 213
+ EM_KVARC = 214
+ EM_CDP = 215
+ EM_COGE = 216
+ EM_COOL = 217
+ EM_NORC = 218
+ EM_CSR_KALIMBA = 219
+ EM_Z80 = 220
+ EM_VISIUM = 221
+ EM_FT32 = 222
+ EM_MOXIE = 223
+ EM_AMDGPU = 224
+ EM_RISCV = 243
+ EM_BPF = 247
+ EM_CSKY = 252
+ EM_NUM = 253
+ EM_ALPHA = 0x9026
+
+class ElfEt(OpenIntEnum):
+ "ELF file type. Type of ET_* values and the Ehdr.e_type field."
+ ET_NONE = 0
+ ET_REL = 1
+ ET_EXEC = 2
+ ET_DYN = 3
+ ET_CORE = 4
+
+class ElfShn(OpenIntEnum):
+ "ELF reserved section indices."
+ SHN_UNDEF = 0
+ SHN_ABS = 0xfff1
+ SHN_COMMON = 0xfff2
+ SHN_XINDEX = 0xffff
+
+class ElfSht(OpenIntEnum):
+ "ELF section types. Type of SHT_* values."
+ SHT_NULL = 0
+ SHT_PROGBITS = 1
+ SHT_SYMTAB = 2
+ SHT_STRTAB = 3
+ SHT_RELA = 4
+ SHT_HASH = 5
+ SHT_DYNAMIC = 6
+ SHT_NOTE = 7
+ SHT_NOBITS = 8
+ SHT_REL = 9
+ SHT_DYNSYM = 11
+ SHT_INIT_ARRAY = 14
+ SHT_FINI_ARRAY = 15
+ SHT_PREINIT_ARRAY = 16
+ SHT_GROUP = 17
+ SHT_SYMTAB_SHNDX = 18
+ SHT_GNU_ATTRIBUTES = 0x6ffffff5
+ SHT_GNU_HASH = 0x6ffffff6
+ SHT_GNU_LIBLIST = 0x6ffffff7
+ SHT_CHECKSUM = 0x6ffffff8
+ SHT_GNU_verdef = 0x6ffffffd
+ SHT_GNU_verneed = 0x6ffffffe
+ SHT_GNU_versym = 0x6fffffff
+
+class ElfPf(enum.IntFlag):
+ "Program header flags. Type of ElfPhdr.p_flags values."
+ PF_X = 1
+ PF_W = 2
+ PF_R = 4
+
+class ElfShf(enum.IntFlag):
+ "Section flags. Type of ElfShdr.sh_type values."
+ SHF_WRITE = 1 << 0
+ SHF_ALLOC = 1 << 1
+ SHF_EXECINSTR = 1 << 2
+ SHF_MERGE = 1 << 4
+ SHF_STRINGS = 1 << 5
+ SHF_INFO_LINK = 1 << 6
+ SHF_LINK_ORDER = 1 << 7
+ SHF_OS_NONCONFORMING = 256
+ SHF_GROUP = 1 << 9
+ SHF_TLS = 1 << 10
+ SHF_COMPRESSED = 1 << 11
+ SHF_GNU_RETAIN = 1 << 21
+ SHF_ORDERED = 1 << 30
+ SHF_RETAIN = 1 << 31
+
+class ElfStb(OpenIntEnum):
+ "ELF symbol binding type."
+ STB_LOCAL = 0
+ STB_GLOBAL = 1
+ STB_WEAK = 3
+ STB_GNU_UNIQUE = 10
+
+class ElfStt(OpenIntEnum):
+ "ELF symbol type."
+ STT_NOTYPE = 0
+ STT_OBJECT = 1
+ STT_FUNC = 2
+ STT_SECTION = 3
+ STT_FILE = 4
+ STT_COMMON = 5
+ STT_TLS = 6
+ STT_GNU_IFUNC = 10
+
+class ElfStInfo:
+ "ELF symbol binding and type. Type of the ElfSym.st_info field."
+ def __init__(self, arg0, arg1=None):
+ if type(arg0) is int and arg1 is None:
+ self.bind = ElfStb(arg0 >> 4)
+ self.type = ElfStt(arg0 & 15)
+ else:
+ self.bind = ElfStb(arg0)
+ self.type = ElfStt(arg1)
+
+ def value(self):
+ return (self.bind.value() << 4) | (self.type.value())
+
+def _define_variants(baseclass: type, layout32: str, layout64: str,
+ types: dict[str, type] | None=None,
+ fields32: tuple[str] | None=None):
+ struct32 = struct.Struct(layout32)
+ struct64 = struct.Struct(layout32)
+
+ # Check that the struct formats yield the right number of components.
+ for s in (struct32, struct64):
+ example = s.unpack(b' ' * s.size)
+ if len(example) != len(baseclass._fields):
+ raise ValueError('{!r} yields wrong field count: {} != {}'.format(
+ s.format, len(example), len(baseclass._fields)))
+
+ # Check that field names in types are correct.
+ if types is None:
+ types = ()
+ for n in types:
+ if n not in baseclass._fields:
+ raise ValueError('{} does not have field {!r}'.format(
+ baseclass.__name__, n))
+
+ if fields32 is not None \
+ and set(fields32) != set(baseclass._fields):
+ raise ValueError('{!r} is not a permutation of the fields {!r}'.format(
+ fields32, baseclass._fields))
+
+ def unique_name(name, used_names = (set((baseclass.__name__,))
+ | set(baseclass._fields)
+ | {n.__name__
+ for n in (types or {}).values()})):
+ "Find a name that is not used for a class or field name."
+ candidate = name
+ n = 0
+ while candidate in used_names:
+ n += 1
+ candidate = '{}{}'.format(name, n)
+ used_names.add(candidate)
+ return candidate
+ blob_name = unique_name('blob')
+ struct_unpack_name = unique_name('struct_unpack')
+ comps_name = unique_name('comps')
+
+ classes = {}
+ for (bits, elfclass, layout, fields) in (
+ (32, ElfClass.ELFCLASS32, layout32, fields32),
+ (64, ElfClass.ELFCLASS64, layout64, None),
+ ):
+ for (elfdata, structprefix, classsuffix) in (
+ (ElfData.ELFDATA2LSB, '<', 'LE'),
+ (ElfData.ELFDATA2MSB, '>', 'BE'),
+ ):
+ env = {
+ baseclass.__name__: baseclass,
+ struct_unpack_name: struct.unpack,
+ }
+
+ # Add the type converters.
+ if types:
+ for cls in types.values():
+ env[cls.__name__] = cls
+
+ classname = '{}{}{}'.format(baseclass.__name__, bits, classsuffix)
+
+ code = '''
+class {classname}({baseclass}):
+ @staticmethod
+ def unpack({blob_name}):
+'''.format(classname=classname, baseclass=baseclass.__name__,
+ blob_name='blob')
+
+ indent = ' ' * 8
+ unpack_call = '{}({!r}, {})'.format(
+ struct_unpack_name, layout, blob_name)
+ field_names = ', '.join(baseclass._fields)
+ if types is None and fields is None:
+ code += '{}return {}({})\n'.format(
+ indent, baseclass.__name__, unpack_call)
+ else:
+ # Destructuring tuple assignment.
+ if fields is None:
+ code += '{}{} = {}\n'.format(
+ indent, field_names, unpack_call)
+ else:
+ # Use custom field order.
+ code += '{}{} = {}\n'.format(
+ indent, ', '.join(fields), unpack_call)
+
+ # Perform the type conversions.
+ for n in baseclass._fields:
+ if n in types:
+ code += '{}{} = {}({})\n'.format(
+ indent, n, types[n].__name__, n)
+ # Create the named tuple.
+ code += '{}return {}({})\n'.format(
+ indent, baseclass.__name__, field_names)
+
+ print(code)
+ exec(code, env)
+ cls = env[classname]
+ print(cls)
+ cls.size = struct.calcsize(layout)
+ classes[(elfclass, elfdata)] = cls
+ baseclass.variants = classes
+
+
+# Corresponds to EI_* indices into Elf*_Ehdr.e_indent.
+class ElfIdent(collections.namedtuple('ElfIdent',
+ 'ei_mag ei_class ei_data ei_version ei_osabi ei_abiversion ei_pad')):
+
+ def __new__(cls, *args):
+ if len(args) == 1:
+ return cls.unpack(args[0])
+ return cls.__base__.__new__(cls, *args)
+
+ @staticmethod
+ def unpack(blob):
+ ei_mag, ei_class, ei_data, ei_version, ei_osabi, ei_abiversion, \
+ ei_pad = struct.unpack('4s5B7s', blob)
+ return ElfIdent(ei_mag, ElfClass(ei_class), ElfData(ei_data),
+ ei_version, ei_osabi, ei_abiversion, ei_pad)
+ size = 16
+
+# Corresponds to Elf32_Ehdr and Elf64_Ehdr.
+ElfEhdr = collections.namedtuple('ElfEhdr',
+ 'e_ident e_type e_machine e_version e_entry e_phoff e_shoff e_flags'
+ + ' e_ehsize e_phentsize e_phnum e_shentsize e_shnum e_shstrndx')
+_define_variants(ElfEhdr,
+ layout32='16s2H5I6H',
+ layout64='16s2HI3QI6H',
+ types=dict(e_ident=ElfIdent,
+ e_machine=ElfMachine,
+ e_type=ElfEt))
+
+# Corresponds to Elf32_Phdr and Elf64_Pdhr. Order follows the latter.
+ElfPhdr = collections.namedtuple('ElfPhdr',
+ 'p_type p_flags p_offset p_vaddr p_paddr p_filesz p_memsz p_align')
+_define_variants(ElfPhdr,
+ layout32='8I',
+ fields32=('p_type', 'p_offset', 'p_vaddr', 'p_paddr',
+ 'p_filesz', 'p_memsz', 'p_flags', 'p_align'),
+ layout64='2I6Q',
+ types=dict(p_flags=ElfPf))
+
+
+# Corresponds to Elf32_Shdr and Elf64_Shdr.
+ElfShdr = collections.namedtuple('ElfShdr',
+ 'sh_name sh_type sh_flags sh_addr sh_offset sh_size sh_link sh_info'
+ + ' sh_addralign sh_entsize')
+_define_variants(ElfShdr,
+ layout32='10I',
+ layout64='2I4Q2I2Q',
+ types=dict(sh_flags=ElfShf))
+
+# Corresponds to Elf32_Sym and Elf64_Sym.
+ElfSym = collections.namedtuple('ElfSym',
+ 'st_name st_info st_other st_shndx st_value st_size')
+_define_variants(ElfSym,
+ layout32='3I2BH',
+ layout64='Q2BH2Q',
+ fields32=('st_name', 'st_value', 'st_size', 'st_info',
+ 'st_other', 'st_shndx'),
+ types=dict(st_shndx=ElfShn,
+ st_info=ElfStInfo))
+
+# Corresponds to Elf32_Rel and Elf64_Rel.
+ElfRel = collections.namedtuple('ElfRel', 'r_offset r_info')
+_define_variants(ElfRel,
+ layout32='2I',
+ layout64='2Q')
+
+# Corresponds to Elf32_Rel and Elf64_Rel.
+ElfRela = collections.namedtuple('ElfRela', 'r_offset r_info r_addend')
+_define_variants(ElfRela,
+ layout32='3I',
+ layout64='3Q')
+
+class ElfImage:
+ "ELF image parser."
+ def __init__(self, image):
+ """Create an ELF image from binary image data.
+
+ image: a memoryview-like object that supports efficient range
+ subscripting.
+
+ """
+ self.image = image
+ ident = self.read(ElfIdent, 0)
+ classdata = (ident.ei_class, ident.ei_data)
+ # Set self.Ehdr etc. to the subtypes with the right parsers.
+ for typ in (ElfEhdr, ElfPhdr, ElfShdr, ElfSym, ElfRel, ElfRela):
+ setattr(self, typ.__name__[3:], typ.variants.get(classdata, None))
+
+ if self.Ehdr is not None:
+ self.ehdr = self.read(self.Ehdr, 0)
+ else:
+ self.ehdr = None
+
+ def read(self, typ, offset):
+ return typ.unpack(self.image[offset: offset + typ.size])
+
+# Only Elf names are exported.
+__all__ = [name for name in dir() if name.startswith('Elf')]
+
+with open('/usr/bin/ld.so', 'rb') as inp:
+ img = ElfImage(memoryview(inp.read()))
+print(img.ehdr)
+print(img.read(img.Shdr, img.ehdr.e_shoff))
+print(img.read(img.Shdr, img.ehdr.e_shoff + img.ehdr.e_shentsize))
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-04-07 19:31 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-07 19:31 [glibc/fw/glibcelf] WIP ELF parser in Python Florian Weimer
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).