#!python3 """ Support for dumping enums and macros. Run within gdb, e.g. source "stap-init-gdb.py" py dumper = TypeDumper(open("demo.c","w")) file /path/to/postgres break main run py dumper.write_header() py dumper.dump_macro("PG_VERSION_NUM") py dumper.dump_enum("enum WalLevel") Expects to be told which macros and enum syms to dump. Doesn't seem to be a way to ask gdb to enumerate all known macros and types unfortunately, not without parsing 'info macros' and 'info types' output. Sigh. No filter for objfile etc. Important limitations that aren't easily fixed: * This tool can only dump macros and static symbols when the current scope gdb invokes them in makes them visible. The Python API doesn't seem to give enough access to gdb innards to select an active scope / block / objfile to search for macro info, and you can't use gdb.lookup_symbol() for macros. So it's up to the script to ensure the current frame is in the target compilation unit or scope. For enum value symbols (but not typenames) lookup_symbol() can be used on the Objfile target. [TODO]s * Doesn't remap enum values and emit the enum type instead yet * Doesn't load a typelist file yet, must do your own Python calls * Doesn't auto-prefix "enum" on bare enum typenames, you have to qualify them * Doesn't seem to follow typdefs for enums properly * Resolve enums using the name of an enum-value not just the enum type, and walk all objfiles to look it up, or allow one objfile to be specified. See references: * https://sourceware.org/gdb/onlinedocs/gdb/Python-API.html """ import gdb import sys import re import textwrap valid_identifier_regexp = re.compile("^[a-zA-Z0-9_]*$") macro_define_regexp = re.compile(".*^#define", re.MULTILINE|re.DOTALL) class TypeDumper: def __init__(self, outfile): self.out_stream = outfile def write(self, out): """Allow direct stream writes for prefix etc""" self.out_stream.write(out) def write_header(self): fn = gdb.selected_inferior().progspace.filename main_objfile = [x for x in gdb.selected_inferior().progspace.objfiles() if x.filename == gdb.selected_inferior().progspace.filename] build_id = None if main_objfile: build_id = main_objfile[0].build_id self.out_stream.write("/*\n * Generated by stap-enum-gdb.py\n * from {}\n * with build-id {}\n */\n\n".format(fn, build_id)) def dump_enum(self, typename, value_name_prefix=""): """ Dump an enum's fields as const symbols. TODO try again with enum prefix on fail """ buf = "" # Get GDB to describe the type and fields its own way for comment use. ptype_output = gdb.execute("ptype " + typename, to_string=True) ptype_output = ptype_output.strip() enum_type = gdb.lookup_type(typename) base_type = enum_type.target() buf += "/*\n * {}\n */\n".format(ptype_output) for (n, v) in enum_type.items(): buf += "{} {}{} = {}; /* {} {} */\n".format( base_type.const(), value_name_prefix, n, v.enumval, enum_type.name, n) self.out_stream.write(buf) self.out_stream.write("\n") def dump_enum_by_value(self, enumname): """TODO: given a value of an enum, dump the whole enum if not already dumped""" pass def dump_macro(self, macroname): """ Output a static variable with the value of the given macro. gdb python API doesn't seem to expose sensible access to macros. You can use 'info macro' but it's clumsy. """ buf = "" if not valid_identifier_regexp.match(macroname): raise ValueError("{} is not a legal C identifier, refusing to try to evaluate as a macro") # Try to double check it's really a macro exec_result = gdb.execute("info macro {}".format(macroname), to_string=True) if macro_define_regexp.match(exec_result): # Write a comment with gdb's description of the macro buf += "/*\n" buf += textwrap.indent(exec_result.strip(), " * ") buf += "\n */\n" else: # TODO handle enums and static constants too raise ValueError("\"info macro {}\" output did not appear to contain a #define, maybe this isn't a macro?:\n{}\n\n".format(macroname, exec_result)) macrovalue = gdb.parse_and_eval(macroname) basetype = macrovalue.type.unqualified() if basetype.code == gdb.TYPE_CODE_ARRAY: # doesn't handle multidim arrays nelems = basetype.code.sizeof if basetype.target().code == gdb.TYPE_CODE_INT: # must be a better way to handle string constants?? if basetype.startswith("char") or basetype.startswith("unsigned char") or basetype.startswith("signed char"): valuerepr = macrovalue.format_string(raw=True, format='s') else: valuerepr = macrovalue.format_string(raw=True, format='x') buf += "{} {} = {};\n".format(basetype.target().const().array(nelems), macroname, valuerepr) else: raise TypeError("don't know how to handle arrays of non-integer types"); else: while basetype.code == gdb.TYPE_CODE_ENUM or basetype.code == gdb.TYPE_CODE_TYPEDEF: basetype = basetype.target() if basetype.code == gdb.TYPE_CODE_INT or basetype.code == gdb.TYPE_CODE_FLT: valuerepr = macrovalue.cast(basetype).format_string(raw=True) buf += "{} {} = {};\n".format(basetype.const(), macroname, valuerepr) else: raise TypeError("expression basetype {} didn't resolve to integer or float") self.out_stream.write(buf) self.out_stream.write("\n") dumper = TypeDumper(sys.stderr)