public inbox for gdb-patches@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 1/1] Add support for symbol addition to the Python API
@ 2023-01-06  1:37 dark.ryu.550
  2023-01-06 20:21 ` Simon Marchi
  0 siblings, 1 reply; 6+ messages in thread
From: dark.ryu.550 @ 2023-01-06  1:37 UTC (permalink / raw)
  To: gdb-patches

This patch adds support for symbol creation and registration. It currently 
supports adding type symbols (VAR_DOMAIN/LOC_TYPEDEF), static symbols 
(VAR_DOMAIN/LOC_STATIC) and goto target labels (LABEL_DOMAIN/LOC_LABEL). It 
adds the `add_type_symbol`, `add_static_symbol` and `add_label_symbol`
functions
to the `gdb.Objfile` type, allowing for the addition of the aforementioned
types of
symbols.

This is done through building a new `compunit_symtab`s for each symbol that
is
to be added, owned by a given objfile and whose lifetimes is bound to it. i 
might be missing something here, but there doesn't seem to be an intended
way 
to add new symbols to a compunit_symtab after it's been finished. if there
is, 
then the efficiency of this method could very much be improved. It could
also be
made more efficient by having a way to add whole batches of symbols at once,

which would then all get added to the same `compunit_symtab`.

For now, though, this implementation lets us add symbols that can be used
to,
for instance, query registered types through `gdb.lookup_type`, and allows
reverse engineering GDB plugins (such as Pwndbg [0] or decomp2gdb [1]) to
add 
symbols directly through the Python API instead of having to compile an
object 
file for the target architecture that they later load through the
add-symbol-file 
command. [2]

[0] https://github.com/pwndbg/pwndbg/
[1] https://github.com/mahaloz/decomp2dbg
[2]
https://github.com/mahaloz/decomp2dbg/blob/055be6b2001954d00db2d683f20e9b714
af75880/decomp2dbg/clients/gdb/symbol_mapper.py#L235-L243]


---
diff --git a/gdb/python/py-objfile.c b/gdb/python/py-objfile.c
index c278925531b..9b884d4c414 100644
--- a/gdb/python/py-objfile.c
+++ b/gdb/python/py-objfile.c
@@ -25,6 +25,7 @@
 #include "build-id.h"
 #include "symtab.h"
 #include "python.h"
+#include "buildsym.h"
 
 struct objfile_object
 {
@@ -527,6 +528,229 @@ objfpy_lookup_static_symbol (PyObject *self, PyObject
*args, PyObject *kw)
   Py_RETURN_NONE;
 }
 
+static struct symbol *
+add_new_symbol(
+  struct objfile *objfile,
+  const char *name,
+  enum language language,
+  enum domain_enum domain,
+  enum address_class aclass,
+  short section_index,
+  CORE_ADDR last_addr,
+  CORE_ADDR end_addr,
+  bool global,
+  std::function<void(struct symbol*)> params)
+{
+  struct symbol *symbol = new (&objfile->objfile_obstack) struct symbol();
+  OBJSTAT (objfile, n_syms++);
+
+  symbol->set_language(language, &objfile->objfile_obstack);
+  symbol->compute_and_set_names(gdb::string_view (name), true,
objfile->per_bfd);
+
+  symbol->set_is_objfile_owned (true);
+  symbol->set_section_index (aclass);
+  symbol->set_domain (domain);
+  symbol->set_aclass_index (aclass);
+
+  params(symbol);
+
+  buildsym_compunit builder (objfile, "", "", language, last_addr);
+  add_symbol_to_list (symbol, global ? builder.get_global_symbols() :
builder.get_file_symbols());
+  builder.end_compunit_symtab(end_addr, section_index);
+
+  return symbol;
+}
+
+static enum language
+parse_language(const char *language)
+{
+  if (strcmp (language, "c") == 0)
+    return language_c;
+  else if (strcmp (language, "objc") == 0)
+    return language_objc;
+  else if (strcmp (language, "cplus") == 0)
+    return language_cplus;
+  else if (strcmp (language, "d") == 0)
+    return language_d;
+  else if (strcmp (language, "go") == 0)
+    return language_go;
+  else if (strcmp (language, "fortran") == 0)
+    return language_fortran;
+  else if (strcmp (language, "m2") == 0)
+    return language_m2;
+  else if (strcmp (language, "asm") == 0)
+    return language_asm;
+  else if (strcmp (language, "pascal") == 0)
+    return language_pascal;
+  else if (strcmp (language, "opencl") == 0)
+    return language_opencl;
+  else if (strcmp (language, "rust") == 0)
+    return language_rust;
+  else if (strcmp (language, "ada") == 0)
+    return language_ada;
+  else if (strcmp (language, "auto") == 0)
+    return language_auto;
+  else
+    return language_unknown;
+}
+
+/* Adds a type (LOC_TYPEDEF) symbol to a given objfile. */
+
+static PyObject *
+objfpy_add_type_symbol (PyObject *self, PyObject *args, PyObject *kw)
+{
+  static const char *format = "sO|s";
+  static const char *keywords[] =
+    {
+      "name", "type", "language",NULL
+    };
+
+  PyObject *type_object;
+  const char *name;
+  const char *language_name = nullptr;
+
+  if (!gdb_PyArg_ParseTupleAndKeywords(args, kw, format, keywords, &name,
+                                       &type_object, &language_name))
+    return nullptr;
+
+  struct objfile *objfile = objfile_object_to_objfile(self);
+  if (objfile == nullptr)
+    return nullptr;
+
+  struct type *type = type_object_to_type(type_object);
+  if (type == nullptr)
+    return nullptr;
+
+  if (language_name == nullptr)
+    language_name = "auto";
+  enum language language = parse_language(language_name);
+  if (language == language_unknown)
+  {
+    PyErr_SetString(PyExc_ValueError, "invalid language name");
+    return nullptr;
+  }
+
+  struct symbol* symbol = add_new_symbol(
+    objfile,
+    name,
+    language,
+    VAR_DOMAIN,
+    LOC_TYPEDEF,
+    0,
+    0,
+    0,
+    false,
+    [&](struct symbol* temp_symbol)
+    {
+      temp_symbol->set_type(type);
+    });
+
+
+  return symbol_to_symbol_object(symbol);
+}
+
+/* Adds a label (LOC_LABEL) symbol to a given objfile. */
+
+static PyObject *
+objfpy_add_label_symbol (PyObject *self, PyObject *args, PyObject *kw)
+{
+  static const char *format = "sk|s";
+  static const char *keywords[] =
+    {
+      "name", "address", "language",NULL
+    };
+
+  const char *name;
+  CORE_ADDR address;
+  const char *language_name = nullptr;
+
+  if (!gdb_PyArg_ParseTupleAndKeywords(args, kw, format, keywords, &name,
+                                       &address, &language_name))
+    return nullptr;
+
+  struct objfile *objfile = objfile_object_to_objfile(self);
+  if (objfile == nullptr)
+    return nullptr;
+
+  if (language_name == nullptr)
+    language_name = "auto";
+  enum language language = parse_language(language_name);
+  if (language == language_unknown)
+  {
+    PyErr_SetString(PyExc_ValueError, "invalid language name");
+    return nullptr;
+  }
+
+  struct symbol* symbol = add_new_symbol(
+    objfile,
+    name,
+    language,
+    LABEL_DOMAIN,
+    LOC_LABEL,
+    0,
+    0,
+    0,
+    false,
+    [&](struct symbol* temp_symbol)
+    {
+      temp_symbol->set_value_address(address);
+    });
+
+
+  return symbol_to_symbol_object(symbol);
+}
+
+/* Adds a static (LOC_STATIC) symbol to a given objfile. */
+
+static PyObject *
+objfpy_add_static_symbol (PyObject *self, PyObject *args, PyObject *kw)
+{
+  static const char *format = "sk|s";
+  static const char *keywords[] =
+    {
+      "name", "address", "language", NULL
+    };
+
+  const char *name;
+  CORE_ADDR address;
+  const char *language_name = nullptr;
+
+  if (!gdb_PyArg_ParseTupleAndKeywords(args, kw, format, keywords, &name,
+                                       &address, &language_name))
+    return nullptr;
+
+  struct objfile *objfile = objfile_object_to_objfile(self);
+  if (objfile == nullptr)
+    return nullptr;
+
+  if (language_name == nullptr)
+    language_name = "auto";
+  enum language language = parse_language(language_name);
+  if (language == language_unknown)
+  {
+    PyErr_SetString(PyExc_ValueError, "invalid language name");
+    return nullptr;
+  }
+
+  struct symbol* symbol = add_new_symbol(
+    objfile,
+    name,
+    language,
+    VAR_DOMAIN,
+    LOC_STATIC,
+    0,
+    0,
+    0,
+    false,
+    [&](struct symbol* temp_symbol)
+    {
+      temp_symbol->set_value_address(address);
+    });
+
+
+  return symbol_to_symbol_object(symbol);
+}
+
 /* Implement repr() for gdb.Objfile.  */
 
 static PyObject *
@@ -704,6 +928,18 @@ objfile_to_objfile_object (struct objfile *objfile)
   return gdbpy_ref<>::new_reference (result);
 }
 
+struct objfile *
+objfile_object_to_objfile (PyObject *self)
+{
+  if (!PyObject_TypeCheck (self, &objfile_object_type))
+    return nullptr;
+
+  auto objfile_object = (struct objfile_object*) self;
+  OBJFPY_REQUIRE_VALID (objfile_object);
+
+  return objfile_object->objfile;
+}
+
 int
 gdbpy_initialize_objfile (void)
 {
@@ -737,6 +973,18 @@ Look up a global symbol in this objfile and return it."
},
     "lookup_static_symbol (name [, domain]).\n\
 Look up a static-linkage global symbol in this objfile and return it." },
 
+  { "add_type_symbol", (PyCFunction) objfpy_add_type_symbol,
+    METH_VARARGS | METH_KEYWORDS,
+    "add_type_symbol(name: string, type: gdb.Type, [language: string])\n\
+    Registers a new symbol inside VAR_DOMAIN/LOC_TYPEDEF, with the given
name\
+    referring to the given type." },
+
+  { "add_label_symbol", (PyCFunction) objfpy_add_label_symbol,
+    METH_VARARGS | METH_KEYWORDS,
+    "add_label_symbol(name: string, address: int, [language: string])\n\
+    Registers a new symbol inside LABEL_DOMAIN/LOC_LABEL, with the given
name\
+    pointing to the given address." },
+
+  { "add_static_symbol", (PyCFunction) objfpy_add_static_symbol,
+    METH_VARARGS | METH_KEYWORDS,
+    "add_static_symbol(name: string, address: int, [language: string])\n\
+    Registers a new symbol inside VAR_DOMAIN/LOC_STATIC, with the given
name\
+    pointing to the given address." },
+
   { NULL }
 };
diff --git a/gdb/python/python-internal.h b/gdb/python/python-internal.h
index 06357cc8c0b..3877f8a7ca9 100644
--- a/gdb/python/python-internal.h
+++ b/gdb/python/python-internal.h
@@ -481,6 +494,8 @@ struct symtab *symtab_object_to_symtab (PyObject *obj);
 struct symtab_and_line *sal_object_to_symtab_and_line (PyObject *obj);
 frame_info_ptr frame_object_to_frame_info (PyObject *frame_obj);
 struct gdbarch *arch_object_to_gdbarch (PyObject *obj);
+struct objfile *objfile_object_to_objfile (PyObject *self);
 
 /* Convert Python object OBJ to a program_space pointer.  OBJ must be a
    gdb.Progspace reference.  Return nullptr if the gdb.Progspace is not


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/1] Add support for symbol addition to the Python API
  2023-01-06  1:37 [PATCH 1/1] Add support for symbol addition to the Python API dark.ryu.550
@ 2023-01-06 20:21 ` Simon Marchi
  2023-01-12  2:00   ` [PATCH] " Matheus Branco Borella
  0 siblings, 1 reply; 6+ messages in thread
From: Simon Marchi @ 2023-01-06 20:21 UTC (permalink / raw)
  To: dark.ryu.550, gdb-patches

Same as with the other patches, I can't apply that patch, it seems
misformatted.

On 1/5/23 20:37, dark.ryu.550--- via Gdb-patches wrote:
> This patch adds support for symbol creation and registration. It currently 
> supports adding type symbols (VAR_DOMAIN/LOC_TYPEDEF), static symbols 
> (VAR_DOMAIN/LOC_STATIC) and goto target labels (LABEL_DOMAIN/LOC_LABEL). It 
> adds the `add_type_symbol`, `add_static_symbol` and `add_label_symbol`
> functions
> to the `gdb.Objfile` type, allowing for the addition of the aforementioned
> types of
> symbols.
> 
> This is done through building a new `compunit_symtab`s for each symbol that
> is
> to be added, owned by a given objfile and whose lifetimes is bound to it. i 
> might be missing something here, but there doesn't seem to be an intended
> way 
> to add new symbols to a compunit_symtab after it's been finished. if there
> is, 
> then the efficiency of this method could very much be improved. It could
> also be
> made more efficient by having a way to add whole batches of symbols at once,
> 
> which would then all get added to the same `compunit_symtab`.

Indeed, I don't think there's a way today to add symbols to a finished
compunit_symtab.  Maybe it would be worth exploring that.  First, to
avoid creating one compunit_symtab per created user symbol.  But also
because I wonder how user-created symbols interact with existing
symbols.  Let's say I have a symbol that comes from DWARF in an existing
compunit_symtab, and I create a user symbol for that function's address.
The new symbol is in a new compunit_symtab.  This means there is some
overlap in the addresses of two compunit_symtabs.  What would functions
like find_compunit_symtab_by_address return?  Should the new symbol be
added to an existing compunit_symtab, if the address falls into an
existing compunit_symtab's address range?

I think I'll have more questions / worries, but I'll wait until I can
actually apply the patch and read it (I can't read diffs, sorry).

Simon

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] Add support for symbol addition to the Python API
  2023-01-06 20:21 ` Simon Marchi
@ 2023-01-12  2:00   ` Matheus Branco Borella
  0 siblings, 0 replies; 6+ messages in thread
From: Matheus Branco Borella @ 2023-01-12  2:00 UTC (permalink / raw)
  To: gdb-patches; +Cc: Matheus Branco Borella


> overlap in the addresses of two compunit_symtabs.  What would functions
> like find_compunit_symtab_by_address return?  Should the new symbol be
> added to an existing compunit_symtab, if the address falls into an
> existing compunit_symtab's address range?

I'm actually not sure, from what I can tell `find_compunit_symtab_by_address`
looks into the qfs, that aren't changed by buildsym_compunit, at least not
from what I can tell. I'm probably wrong though, this part of the code is
still pretty confusing to me.

This patch adds support for symbol creation and registration. It currently
supports adding type symbols (VAR_DOMAIN/LOC_TYPEDEF), static symbols
(VAR_DOMAIN/LOC_STATIC) and goto target labels (LABEL_DOMAIN/LOC_LABEL). It
adds the `add_type_symbol`, `add_static_symbol` and `add_label_symbol`
functions to the `gdb.Objfile` type, allowing for the addition of the 
aforementioned types of symbols.

This is done through building a new `compunit_symtab`s for each symbol that is
to be added, owned by a given objfile and whose lifetimes is bound to it. I
might be missing something here, but there doesn't seem to be an intended way
to add new symbols to a compunit_symtab after it's been finished. If there is,
then the efficiency of this method could very much be improved. It could also be
made more efficient by having a way to add whole batches of symbols at once,
which would then all get added to the same `compunit_symtab`.

For now, though, this implementation lets us add symbols that can be used to,
for instance, query registered types through `gdb.lookup_type`, and allows
reverse engineering GDB plugins (such as Pwndbg [0] or decomp2gdb [1]) to add
symbols directly through the Python API instead of having to compile an object
file for the target architecture that they later load through the add-symbol-
file command. [2]

[0] https://github.com/pwndbg/pwndbg/
[1] https://github.com/mahaloz/decomp2dbg
[2] https://github.com/mahaloz/decomp2dbg/blob/055be6b2001954d00db2d683f20e9b714af75880/decomp2dbg/clients/gdb/symbol_mapper.py#L235-L243]
---
 gdb/python/py-objfile.c      | 258 +++++++++++++++++++++++++++++++++++
 gdb/python/python-internal.h |   2 +
 2 files changed, 260 insertions(+)

diff --git a/gdb/python/py-objfile.c b/gdb/python/py-objfile.c
index c278925531b..00fe8de74f1 100644
--- a/gdb/python/py-objfile.c
+++ b/gdb/python/py-objfile.c
@@ -25,6 +25,7 @@
 #include "build-id.h"
 #include "symtab.h"
 #include "python.h"
+#include "buildsym.h"
 
 struct objfile_object
 {
@@ -527,6 +528,233 @@ objfpy_lookup_static_symbol (PyObject *self, PyObject *args, PyObject *kw)
   Py_RETURN_NONE;
 }
 
+/* Adds a new symbol to the given objfile. */
+
+static struct symbol *
+add_new_symbol
+  (struct objfile *objfile,
+   const char *name,
+   enum language language,
+   enum domain_enum domain,
+   enum address_class aclass,
+   short section_index,
+   CORE_ADDR last_addr,
+   CORE_ADDR end_addr,
+   bool global,
+   std::function<void(struct symbol*)> params)
+{
+  struct symbol *symbol = new (&objfile->objfile_obstack) struct symbol ();
+  OBJSTAT (objfile, n_syms++);
+
+  symbol->set_language (language, &objfile->objfile_obstack);
+  symbol->compute_and_set_names (gdb::string_view (name), true, objfile->per_bfd);
+
+  symbol->set_is_objfile_owned (true);
+  symbol->set_section_index (aclass);
+  symbol->set_domain (domain);
+  symbol->set_aclass_index (aclass);
+
+  params (symbol);
+
+  buildsym_compunit builder (objfile, "", "", language, last_addr);
+  add_symbol_to_list (symbol, global ? builder.get_global_symbols() : builder.get_file_symbols ());
+  builder.end_compunit_symtab (end_addr, section_index);
+
+  return symbol;
+}
+
+/* Parses a language from a string (coming from Python) into a language variant. */
+
+static enum language
+parse_language (const char *language)
+{
+  if (strcmp (language, "c") == 0)
+    return language_c;
+  else if (strcmp (language, "objc") == 0)
+    return language_objc;
+  else if (strcmp (language, "cplus") == 0)
+    return language_cplus;
+  else if (strcmp (language, "d") == 0)
+    return language_d;
+  else if (strcmp (language, "go") == 0)
+    return language_go;
+  else if (strcmp (language, "fortran") == 0)
+    return language_fortran;
+  else if (strcmp (language, "m2") == 0)
+    return language_m2;
+  else if (strcmp (language, "asm") == 0)
+    return language_asm;
+  else if (strcmp (language, "pascal") == 0)
+    return language_pascal;
+  else if (strcmp (language, "opencl") == 0)
+    return language_opencl;
+  else if (strcmp (language, "rust") == 0)
+    return language_rust;
+  else if (strcmp (language, "ada") == 0)
+    return language_ada;
+  else if (strcmp (language, "auto") == 0)
+    return language_auto;
+  else
+    return language_unknown;
+}
+
+/* Adds a type (LOC_TYPEDEF) symbol to a given objfile. */
+
+static PyObject *
+objfpy_add_type_symbol (PyObject *self, PyObject *args, PyObject *kw)
+{
+  static const char *format = "sO|s";
+  static const char *keywords[] =
+    {
+      "name", "type", "language",NULL
+    };
+
+  PyObject *type_object;
+  const char *name;
+  const char *language_name = nullptr;
+
+  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name,
+                                        &type_object, &language_name))
+    return nullptr;
+
+  struct objfile *objfile = objfile_object_to_objfile (self);
+  if (objfile == nullptr)
+    return nullptr;
+
+  struct type *type = type_object_to_type (type_object);
+  if (type == nullptr)
+    return nullptr;
+
+  if (language_name == nullptr)
+    language_name = "auto";
+  enum language language = parse_language (language_name);
+  if (language == language_unknown)
+  {
+    PyErr_SetString (PyExc_ValueError, "invalid language name");
+    return nullptr;
+  }
+
+  struct symbol* symbol = add_new_symbol
+    (objfile,
+     name,
+     language,
+     VAR_DOMAIN,
+     LOC_TYPEDEF,
+     0,
+     0,
+     0,
+     false,
+     [&](struct symbol* temp_symbol)
+     {
+       temp_symbol->set_type(type);
+     });
+
+
+  return symbol_to_symbol_object (symbol);
+}
+
+/* Adds a label (LOC_LABEL) symbol to a given objfile. */
+
+static PyObject *
+objfpy_add_label_symbol (PyObject *self, PyObject *args, PyObject *kw)
+{
+  static const char *format = "sk|s";
+  static const char *keywords[] =
+    {
+      "name", "address", "language",NULL
+    };
+
+  const char *name;
+  CORE_ADDR address;
+  const char *language_name = nullptr;
+
+  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name,
+                                        &address, &language_name))
+    return nullptr;
+
+  struct objfile *objfile = objfile_object_to_objfile (self);
+  if (objfile == nullptr)
+    return nullptr;
+
+  if (language_name == nullptr)
+    language_name = "auto";
+  enum language language = parse_language (language_name);
+  if (language == language_unknown)
+  {
+    PyErr_SetString (PyExc_ValueError, "invalid language name");
+    return nullptr;
+  }
+
+  struct symbol* symbol = add_new_symbol
+    (objfile,
+     name,
+     language,
+     LABEL_DOMAIN,
+     LOC_LABEL,
+     0,
+     0,
+     0,
+     false,
+     [&](struct symbol* temp_symbol)
+     {
+       temp_symbol->set_value_address(address);
+     });
+
+
+  return symbol_to_symbol_object (symbol);
+}
+
+/* Adds a static (LOC_STATIC) symbol to a given objfile. */
+
+static PyObject *
+objfpy_add_static_symbol (PyObject *self, PyObject *args, PyObject *kw)
+{
+  static const char *format = "sk|s";
+  static const char *keywords[] =
+    {
+      "name", "address", "language", NULL
+    };
+
+  const char *name;
+  CORE_ADDR address;
+  const char *language_name = nullptr;
+
+  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name,
+                                        &address, &language_name))
+    return nullptr;
+
+  struct objfile *objfile = objfile_object_to_objfile (self);
+  if (objfile == nullptr)
+    return nullptr;
+
+  if (language_name == nullptr)
+    language_name = "auto";
+  enum language language = parse_language (language_name);
+  if (language == language_unknown)
+  {
+    PyErr_SetString (PyExc_ValueError, "invalid language name");
+    return nullptr;
+  }
+
+  struct symbol* symbol = add_new_symbol
+    (objfile,
+     name,
+     language,
+     VAR_DOMAIN,
+     LOC_STATIC,
+     0,
+     0,
+     0,
+     false,
+     [&](struct symbol* temp_symbol)
+     {
+       temp_symbol->set_value_address(address);
+     });
+
+
+  return symbol_to_symbol_object (symbol);
+}
+
 /* Implement repr() for gdb.Objfile.  */
 
 static PyObject *
@@ -704,6 +932,18 @@ objfile_to_objfile_object (struct objfile *objfile)
   return gdbpy_ref<>::new_reference (result);
 }
 
+struct objfile *
+objfile_object_to_objfile (PyObject *self)
+{
+  if (!PyObject_TypeCheck (self, &objfile_object_type))
+    return nullptr;
+
+  auto objfile_object = (struct objfile_object*) self;
+  OBJFPY_REQUIRE_VALID (objfile_object);
+
+  return objfile_object->objfile;
+}
+
 int
 gdbpy_initialize_objfile (void)
 {
@@ -737,6 +977,24 @@ Look up a global symbol in this objfile and return it." },
     "lookup_static_symbol (name [, domain]).\n\
 Look up a static-linkage global symbol in this objfile and return it." },
 
+  { "add_type_symbol", (PyCFunction) objfpy_add_type_symbol,
+    METH_VARARGS | METH_KEYWORDS,
+    "add_type_symbol(name: string, type: gdb.Type, [language: string])\n\
+    Registers a new symbol inside VAR_DOMAIN/LOC_TYPEDEF, with the given name\
+    referring to the given type." },
+
+  { "add_label_symbol", (PyCFunction) objfpy_add_label_symbol,
+    METH_VARARGS | METH_KEYWORDS,
+    "add_label_symbol(name: string, address: int, [language: string])\n\
+    Registers a new symbol inside LABEL_DOMAIN/LOC_LABEL, with the given name\
+    pointing to the given address." },
+
+  { "add_static_symbol", (PyCFunction) objfpy_add_static_symbol,
+    METH_VARARGS | METH_KEYWORDS,
+    "add_static_symbol(name: string, address: int, [language: string])\n\
+    Registers a new symbol inside VAR_DOMAIN/LOC_STATIC, with the given name\
+    pointing to the given address." },
+
   { NULL }
 };
 
diff --git a/gdb/python/python-internal.h b/gdb/python/python-internal.h
index 06357cc8c0b..bb10df63077 100644
--- a/gdb/python/python-internal.h
+++ b/gdb/python/python-internal.h
@@ -481,6 +481,8 @@ struct symtab *symtab_object_to_symtab (PyObject *obj);
 struct symtab_and_line *sal_object_to_symtab_and_line (PyObject *obj);
 frame_info_ptr frame_object_to_frame_info (PyObject *frame_obj);
 struct gdbarch *arch_object_to_gdbarch (PyObject *obj);
+struct objfile *objfile_object_to_objfile (PyObject *self);
+struct floatformat *float_format_object_as_float_format (PyObject *self);
 
 /* Convert Python object OBJ to a program_space pointer.  OBJ must be a
    gdb.Progspace reference.  Return nullptr if the gdb.Progspace is not
-- 
2.37.3.windows.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] Add support for symbol addition to the Python API
  2023-07-04 15:14 ` Andrew Burgess
@ 2023-07-07 23:13   ` Matheus Branco Borella
  0 siblings, 0 replies; 6+ messages in thread
From: Matheus Branco Borella @ 2023-07-07 23:13 UTC (permalink / raw)
  To: aburgess; +Cc: gdb-patches, Matheus Branco Borella

Andrew Burgess <aburgess@redhat.com> wrote:
> I started taking a look through this.  I didn't manage to build the code
> due to the use of C++17 features, so I've only given a couple of really
> minor bits of feedback.

My bad. My compiler defaults to C++17 and for some reason it didn't 
occur to me that GDB uses an earlier version. It should build with 
C++11 now.

> I think that adding a first simple test would be a solid idea, this will
> give reviewers something to play with, you can always expand the test
> later to cover more cases.

I've added one to py-objfile.exp, it builds a new object and adds a 
symbol to it, then looks it up. It's simple, but it should be able to
illustrate how this feature works.

> Could this make use of `language_enum` (from language.c)?

It could. Are the enum variants available from inside Python? If not, 
should I add them? I can't seem to find them there, but it could be
that I'm just not looking hard enough.

> Is this change really needed?

Nope.

> Likewise, I suspect this change is not needed.

Likewise, you'd be correct.

The branch I'm working on for this patch was spun off the branch for 
another one. I missed that these two had stayed in there when I was
taking out the other patch's changes. I've taken them out now, thanks
for pointing them out.

Anyway, looking forward to hearing your thoughts on this patch.

---
This patch adds support for symbol creation and registration. It currently
supports adding type symbols (VAR_DOMAIN/LOC_TYPEDEF), static symbols
(VAR_DOMAIN/LOC_STATIC) and goto target labels (LABEL_DOMAIN/LOC_LABEL). It
adds a new `gdb.ObjfileBuilder` type, with `add_type_symbol`,
`add_static_symbol` and `add_label_symbol` functions, allowing for the addition
of the aforementioned types of symbols.

Symbol addition is achieved by constructing a new objfile with msyms and full
symbols reflecting the symbols that were previously added to the builder through
its methods. This approach lets us get most of the way to full symbol addition
support, but due to not being backed up by BFD, it does have a few limitations,
which I will go over them here.

PC-based minsym lookup does not work, because those would require a more
complete set of BFD structures than I think would be good practice to pretend to
have them all and crash GDB later on when it expects things to be there that
aren't.

In the same vein, PC-based function name lookup also does not work, although
there may be a way to have the feature work using overlays. However, this patch
does not make an attempt to do so

For now, though, this implementation lets us add symbols that can be used to,
for instance, query registered types through `gdb.lookup_type`, and allows
reverse engineering GDB plugins (such as Pwndbg [0] or decomp2gdb [1]) to add
symbols directly through the Python API instead of having to compile an object
file for the target architecture that they later load through the add-symbol-
file command. [2]

[0] https://github.com/pwndbg/pwndbg/
[1] https://github.com/mahaloz/decomp2dbg
[2] https://github.com/mahaloz/decomp2dbg/blob/055be6b2001954d00db2d683f20e9b714af75880/decomp2dbg/clients/gdb/symbol_mapper.py#L235-L243]
---
 gdb/Makefile.in                         |   1 +
 gdb/python/py-objfile-builder.c         | 642 ++++++++++++++++++++++++
 gdb/testsuite/gdb.python/py-objfile.exp |  11 +
 3 files changed, 654 insertions(+)
 create mode 100644 gdb/python/py-objfile-builder.c

diff --git a/gdb/Makefile.in b/gdb/Makefile.in
index 14b5dd0bad..c0eecb81b6 100644
--- a/gdb/Makefile.in
+++ b/gdb/Makefile.in
@@ -417,6 +417,7 @@ SUBDIR_PYTHON_SRCS = \
 	python/py-micmd.c \
 	python/py-newobjfileevent.c \
 	python/py-objfile.c \
+	python/py-objfile-builder.c \
 	python/py-param.c \
 	python/py-prettyprint.c \
 	python/py-progspace.c \
diff --git a/gdb/python/py-objfile-builder.c b/gdb/python/py-objfile-builder.c
new file mode 100644
index 0000000000..dd93a95138
--- /dev/null
+++ b/gdb/python/py-objfile-builder.c
@@ -0,0 +1,642 @@
+/* Python class allowing users to build and install objfiles.
+
+   Copyright (C) 2013-2023 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "defs.h"
+#include "python-internal.h"
+#include "quick-symbol.h"
+#include "objfiles.h"
+#include "minsyms.h"
+#include "buildsym.h"
+#include "observable.h"
+#include <string>
+#include <unordered_map>
+#include <type_traits>
+#include <optional>
+
+/* This module relies on symbols being trivially copyable. */
+static_assert (std::is_trivially_copyable<struct symbol>::value);
+
+/* Interface to be implemented for symbol types supported by this interface. */
+class symbol_def
+{
+public:
+  virtual ~symbol_def () = default;
+
+  virtual void register_msymbol (const std::string& name, 
+                                 struct objfile* objfile,
+                                 minimal_symbol_reader& reader) const = 0;
+  virtual void register_symbol (const std::string& name, 
+                                struct objfile* objfile,
+                                buildsym_compunit& builder) const = 0;
+};
+
+/* Shorthand for a unique_ptr to a symbol. */
+typedef std::unique_ptr<symbol_def> symbol_def_up;
+
+/* Data being held by the gdb.ObjfileBuilder.
+ *
+ * This structure needs to have its constructor run in order for its lifetime
+ * to begin. Because of how Python handles its objects, we can't just reconstruct
+ * the object structure as a whole, as that would overwrite things the runtime
+ * cares about, so these fields had to be broken off into their own structure. */
+struct objfile_builder_data
+{
+  /* Indicates whether the objfile has already been built and added to the
+   * current context. We enforce that objfiles can't be installed twice. */
+  bool installed = false;
+
+  /* The symbols that will be added to new newly built objfile. */
+  std::unordered_map<std::string, symbol_def_up> symbols;
+
+  /* The name given to this objfile. */
+  std::string name;
+
+  /* Adds a symbol definition with the given name. */
+  bool add_symbol_def (std::string name, symbol_def_up&& symbol_def)
+  {
+    return std::get<1> (symbols.insert ({name, std::move (symbol_def)}));
+  }
+};
+
+/* Structure backing the gdb.ObjfileBuilder type. */
+
+struct objfile_builder_object
+{
+  PyObject_HEAD
+
+  /* See objfile_builder_data. */
+  objfile_builder_data inner;
+};
+
+extern PyTypeObject objfile_builder_object_type
+    CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("objfile_builder_object_type");
+
+/* Constructs a new objfile from an objfile_builder. */
+static struct objfile *
+build_new_objfile (const objfile_builder_object& builder)
+{
+  gdb_assert (!builder.inner.installed);
+
+  auto of = objfile::make (nullptr, builder.inner.name.c_str (), 
+                           OBJF_READNOW | OBJF_NOT_FILENAME, 
+                           nullptr);
+
+  /* Setup object file sections. */
+  of->sections_start = OBSTACK_CALLOC (&of->objfile_obstack,
+                                       4,
+                                       struct obj_section);
+  of->sections_end = of->sections_start + 4;
+
+  const auto init_section = [&](struct obj_section* sec)
+    {
+      sec->objfile = of;
+      sec->ovly_mapped = false;
+      
+      /* We're not being backed by BFD. So we have no real section data to speak 
+       * of, but, because specifying sections requires BFD structures, we have to
+       * play a little game of predend. */
+      auto bfd = obstack_new<bfd_section> (&of->objfile_obstack);
+      bfd->vma = 0;
+      bfd->size = 0;
+      bfd->lma = 0; /* Prevents insert_section_p in objfiles.c from trying to 
+                     * dereference the bfd structure we don't have. */
+      sec->the_bfd_section = bfd;
+    };
+  init_section (&of->sections_start[0]);
+  init_section (&of->sections_start[1]);
+  init_section (&of->sections_start[2]);
+  init_section (&of->sections_start[4]);
+
+  of->sect_index_text = 0;
+  of->sect_index_data = 1;
+  of->sect_index_rodata = 2;
+  of->sect_index_bss = 3;
+
+  /* While buildsym_compunit expects the symbol function pointer structure to be
+   * present, it also gracefully handles the case where all of the pointers in
+   * it are set to null. So, make sure we have a valid structure, but there's
+   * no need to do more than that. */
+  of->sf = obstack_new<struct sym_fns> (&of->objfile_obstack);
+
+  /* We need to tell GDB what architecture the objfile uses. */
+  if (has_stack_frames ())
+    of->per_bfd->gdbarch = get_frame_arch (get_selected_frame (nullptr));
+  else
+    of->per_bfd->gdbarch = target_gdbarch ();
+
+  /* Construct the minimal symbols. */
+  minimal_symbol_reader msym (of);
+  for (const auto& element : builder.inner.symbols)
+      std::get<1> (element)->register_msymbol (std::get<0> (element), of, msym);
+  msym.install ();
+
+  /* Construct the full symbols. */
+  buildsym_compunit fsym (of, builder.inner.name.c_str (), "", language_c, 0);
+  for (const auto& element : builder.inner.symbols)
+    std::get<1> (element)->register_symbol (std::get<0> (element), of, fsym);
+  fsym.end_compunit_symtab (0);
+
+  /* Notify the rest of GDB this objfile has been created. Requires 
+   * OBJF_NOT_FILENAME to be used, to prevent any of the functions attatched to
+   * the observable from trying to dereference of->bfd. */
+  gdb::observers::new_objfile.notify (of);
+
+  return of;
+}
+
+/* Implementation of the quick symbol functions used by the objfiles created 
+ * using this interface. Turns out we have our work cut out for us here, as we
+ * can get something that works by effectively just using no-ops, and the rest
+ * of the code will fall back to using just the minimal and full symbol data. It
+ * is important to note, though, that this only works because we're marking our 
+ * objfile with `OBJF_READNOW`. */
+class runtime_objfile : public quick_symbol_functions
+{
+  virtual bool has_symbols (struct objfile*) override
+  {
+    return false;
+  }
+
+  virtual void dump (struct objfile *objfile) override
+  {
+  }
+
+  virtual void expand_matching_symbols
+    (struct objfile *,
+     const lookup_name_info &lookup_name,
+     domain_enum domain,
+     int global,
+     symbol_compare_ftype *ordered_compare) override
+  {
+  }
+
+  virtual bool expand_symtabs_matching
+    (struct objfile *objfile,
+     gdb::function_view<expand_symtabs_file_matcher_ftype> file_matcher,
+     const lookup_name_info *lookup_name,
+     gdb::function_view<expand_symtabs_symbol_matcher_ftype> symbol_matcher,
+     gdb::function_view<expand_symtabs_exp_notify_ftype> expansion_notify,
+     block_search_flags search_flags,
+     domain_enum domain,
+     enum search_domain kind) override
+  {
+    return true;
+  }
+};
+
+
+/* Create a new symbol alocated in the given objfile. */
+
+static struct symbol *
+new_symbol
+  (struct objfile *objfile,
+   const char *name,
+   enum language language,
+   enum domain_enum domain,
+   enum address_class aclass,
+   short section_index)
+{
+  auto symbol = new (&objfile->objfile_obstack) struct symbol ();
+  OBJSTAT (objfile, n_syms++);
+
+  symbol->set_language (language, &objfile->objfile_obstack);
+  symbol->compute_and_set_names (gdb::string_view (name), true, 
+                                 objfile->per_bfd);
+
+  symbol->set_is_objfile_owned (true);
+  symbol->set_section_index (section_index);
+  symbol->set_domain (domain);
+  symbol->set_aclass_index (aclass);
+
+  return symbol;
+}
+
+/* Parses a language from a string (coming from Python) into a language 
+ * variant. */
+
+static enum language
+parse_language (const char *language)
+{
+  if (strcmp (language, "c") == 0)
+    return language_c;
+  else if (strcmp (language, "objc") == 0)
+    return language_objc;
+  else if (strcmp (language, "cplus") == 0)
+    return language_cplus;
+  else if (strcmp (language, "d") == 0)
+    return language_d;
+  else if (strcmp (language, "go") == 0)
+    return language_go;
+  else if (strcmp (language, "fortran") == 0)
+    return language_fortran;
+  else if (strcmp (language, "m2") == 0)
+    return language_m2;
+  else if (strcmp (language, "asm") == 0)
+    return language_asm;
+  else if (strcmp (language, "pascal") == 0)
+    return language_pascal;
+  else if (strcmp (language, "opencl") == 0)
+    return language_opencl;
+  else if (strcmp (language, "rust") == 0)
+    return language_rust;
+  else if (strcmp (language, "ada") == 0)
+    return language_ada;
+  else
+    return language_unknown;
+}
+
+/* Convenience function that performs a checked coversion from a PyObject to
+ * a objfile_builder_object structure pointer. */
+inline static struct objfile_builder_object *
+validate_objfile_builder_object (PyObject *self)
+{
+  if (!PyObject_TypeCheck (self, &objfile_builder_object_type))
+    return nullptr;
+  return (struct objfile_builder_object*) self;
+}
+
+/* Registers symbols added with add_label_symbol. */
+class typedef_symbol_def : public symbol_def
+{
+public:
+  struct type* type;
+  enum language language;
+
+  virtual void register_msymbol (const std::string& name,
+                                 struct objfile *objfile,
+                                 minimal_symbol_reader& reader) const override
+  {
+  }
+
+  virtual void register_symbol (const std::string& name,
+                                struct objfile *objfile,
+                                buildsym_compunit& builder) const override
+  {
+    auto symbol = new_symbol (objfile, name.c_str (), language, LABEL_DOMAIN,
+                              LOC_TYPEDEF, objfile->sect_index_text);
+
+    symbol->set_type (type);
+
+    add_symbol_to_list (symbol, builder.get_file_symbols ());
+  }
+};
+
+/* Adds a type (LOC_TYPEDEF) symbol to a given objfile. */
+static PyObject *
+objbdpy_add_type_symbol (PyObject *self, PyObject *args, PyObject *kw)
+{
+  static const char *format = "sOs";
+  static const char *keywords[] =
+    {
+      "name", "type", "language", NULL
+    };
+
+  PyObject *type_object;
+  const char *name;
+  const char *language_name = nullptr;
+
+  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name,
+                                        &type_object, &language_name))
+    return nullptr;
+
+  auto builder = validate_objfile_builder_object (self);
+  if (builder == nullptr)
+    return nullptr;
+
+  struct type *type = type_object_to_type (type_object);
+  if (type == nullptr)
+    return nullptr;
+
+  if (language_name == nullptr)
+    language_name = "auto";
+  enum language language = parse_language (language_name);
+  if (language == language_unknown)
+    {
+      PyErr_SetString (PyExc_ValueError, "invalid language name");
+      return nullptr;
+    }
+
+  auto def = std::unique_ptr<typedef_symbol_def> (new typedef_symbol_def ());
+  def->type = type;
+  def->language = language;
+
+  builder->inner.add_symbol_def (name, std::move (def));
+
+  Py_RETURN_NONE;
+}
+
+
+/* Registers symbols added with add_label_symbol. */
+class label_symbol_def : public symbol_def
+{
+public:
+  CORE_ADDR address;
+  enum language language;
+
+  virtual void register_msymbol (const std::string& name,
+                                 struct objfile *objfile,
+                                 minimal_symbol_reader& reader) const override
+  {
+    reader.record (name.c_str (), 
+                   unrelocated_addr (address), 
+                   minimal_symbol_type::mst_text);
+  }
+
+  virtual void register_symbol (const std::string& name,
+                                struct objfile *objfile,
+                                buildsym_compunit& builder) const override
+  {
+    printf("Adding label %s\n", name.c_str ());
+    auto symbol = new_symbol (objfile, name.c_str (), language, LABEL_DOMAIN,
+                              LOC_LABEL, objfile->sect_index_text);
+
+    symbol->set_value_address (address);
+
+    add_symbol_to_list (symbol, builder.get_file_symbols ());
+  }
+};
+
+/* Adds a label (LOC_LABEL) symbol to a given objfile. */
+static PyObject *
+objbdpy_add_label_symbol (PyObject *self, PyObject *args, PyObject *kw)
+{
+  static const char *format = "sks";
+  static const char *keywords[] =
+    {
+      "name", "address", "language", NULL
+    };
+
+  const char *name;
+  CORE_ADDR address;
+  const char *language_name = nullptr;
+
+  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name,
+                                        &address, &language_name))
+    return nullptr;
+
+  auto builder = validate_objfile_builder_object (self);
+  if (builder == nullptr)
+    return nullptr;
+
+  if (language_name == nullptr)
+    language_name = "auto";
+  enum language language = parse_language (language_name);
+  if (language == language_unknown)
+    {
+      PyErr_SetString (PyExc_ValueError, "invalid language name");
+      return nullptr;
+    }
+
+  auto def = std::unique_ptr<label_symbol_def> (new label_symbol_def ());
+  def->address = address;
+  def->language = language;
+
+  builder->inner.add_symbol_def (name, std::move (def));
+
+  Py_RETURN_NONE;
+}
+
+/* Registers symbols added with add_static_symbol. */
+class static_symbol_def : public symbol_def
+{
+public:
+  CORE_ADDR address;
+  enum language language;
+
+  virtual void register_msymbol (const std::string& name,
+                                 struct objfile *objfile,
+                                 minimal_symbol_reader& reader) const override
+  {
+    reader.record (name.c_str (), 
+                   unrelocated_addr (address), 
+                   minimal_symbol_type::mst_bss);
+  }
+
+  virtual void register_symbol (const std::string& name,
+                                struct objfile *objfile,
+                                buildsym_compunit& builder) const override
+  {
+    auto symbol = new_symbol (objfile, name.c_str (), language, VAR_DOMAIN,
+                              LOC_STATIC, objfile->sect_index_bss);
+
+    symbol->set_value_address (address);
+
+    add_symbol_to_list (symbol, builder.get_file_symbols ());
+  }
+};
+
+/* Adds a static (LOC_STATIC) symbol to a given objfile. */
+static PyObject *
+objbdpy_add_static_symbol (PyObject *self, PyObject *args, PyObject *kw)
+{
+  static const char *format = "sks";
+  static const char *keywords[] =
+    {
+      "name", "address", "language", NULL
+    };
+
+  const char *name;
+  CORE_ADDR address;
+  const char *language_name = nullptr;
+
+  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name,
+                                        &address, &language_name))
+    return nullptr;
+
+  auto builder = validate_objfile_builder_object (self);
+  if (builder == nullptr)
+    return nullptr;
+
+  if (language_name == nullptr)
+    language_name = "auto";
+  enum language language = parse_language (language_name);
+  if (language == language_unknown)
+    {
+      PyErr_SetString (PyExc_ValueError, "invalid language name");
+      return nullptr;
+    }
+
+  auto def = std::unique_ptr<static_symbol_def> (new static_symbol_def ());
+  def->address = address;
+  def->language = language;
+
+  builder->inner.add_symbol_def (name, std::move (def));
+
+  Py_RETURN_NONE;
+}
+
+/* Builds the object file. */
+static PyObject *
+objbdpy_build (PyObject *self, PyObject *args)
+{
+  auto builder = validate_objfile_builder_object (self);
+  if (builder == nullptr)
+    return nullptr;
+
+  if (builder->inner.installed)
+    {
+      PyErr_SetString (PyExc_ValueError, "build() cannot be run twice on the \
+                       same object");
+      return nullptr;
+    }
+  auto of = build_new_objfile (*builder);
+  builder->inner.installed = true;
+
+
+  auto objpy = objfile_to_objfile_object (of).get ();
+  Py_INCREF(objpy);
+  return objpy;
+}
+
+/* Implements the __init__() function. */
+static int
+objbdpy_init (PyObject *self0, PyObject *args, PyObject *kw)
+{
+  static const char *format = "s";
+  static const char *keywords[] =
+    {
+      "name", NULL
+    };
+
+  const char *name;
+  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name))
+    return -1;
+
+  auto self = (objfile_builder_object *)self0;
+  self->inner.name = name;
+  self->inner.symbols.clear ();
+
+  return 0;
+}
+
+/* The function handling construction of the ObjfileBuilder object. 
+ *
+ * We need to have a custom function here as, even though Python manages the 
+ * memory backing the object up, it assumes clearing the memory is enough to
+ * begin its lifetime, which is not the case here, and would lead to undefined 
+ * behavior as soon as we try to use it in any meaningful way.
+ * 
+ * So, what we have to do here is manually begin the lifecycle of our new object
+ * by constructing it in place, using the memory region Python just allocated
+ * for us. This ensures the object will have already started its lifetime by 
+ * the time we start using it. */
+static PyObject *
+objbdpy_new (PyTypeObject *subtype, PyObject *args, PyObject *kwds)
+{
+  objfile_builder_object *region = 
+    (objfile_builder_object *) subtype->tp_alloc(subtype, 1);
+  gdb_assert ((size_t)region % alignof (objfile_builder_object) == 0);
+  gdb_assert (region != nullptr);
+
+  new (&region->inner) objfile_builder_data ();
+
+  return (PyObject *)region;
+}
+
+/* The function handling destruction of the ObjfileBuilder object. 
+ *
+ * While running the destructor of our object isn't _strictly_ necessary, we
+ * would very much like for the memory it owns to be freed, but, because it was
+ * constructed in place, we have to call its destructor manually here. */
+static void 
+objbdpy_dealloc (PyObject *self0)
+{
+  auto self = (objfile_builder_object *)self0;
+  PyTypeObject *tp = Py_TYPE(self);
+
+  self->inner.~objfile_builder_data ();
+
+  tp->tp_free(self);
+  Py_DECREF(tp);
+}
+
+static int CPYCHECKER_NEGATIVE_RESULT_SETS_EXCEPTION
+gdbpy_initialize_objfile_builder (void)
+{
+  if (PyType_Ready (&objfile_builder_object_type) < 0)
+    return -1;
+
+  return gdb_pymodule_addobject (gdb_module, "ObjfileBuilder",
+				 (PyObject *) &objfile_builder_object_type);
+}
+
+GDBPY_INITIALIZE_FILE (gdbpy_initialize_objfile_builder);
+
+static PyMethodDef objfile_builder_object_methods[] =
+{
+  { "build", (PyCFunction) objbdpy_build, METH_NOARGS,
+    "build ().\n\
+Build a new objfile containing the symbols added to builder." },
+  { "add_type_symbol", (PyCFunction) objbdpy_add_type_symbol,
+    METH_VARARGS | METH_KEYWORDS,
+    "add_type_symbol (name [str], type [gdb.Type], language [str]).\n\
+Add a new type symbol in the given language, associated with the given type." },
+  { "add_label_symbol", (PyCFunction) objbdpy_add_label_symbol,
+    METH_VARARGS | METH_KEYWORDS,
+    "add_label_symbol (name [str], address [int], language [str]).\n\
+Add a new label symbol in the given language, at the given address." },
+  { "add_static_symbol", (PyCFunction) objbdpy_add_static_symbol,
+    METH_VARARGS | METH_KEYWORDS,
+    "add_static_symbol (name [str], address [int], language [str]).\n\
+Add a new static symbol in the given language, at the given address." },
+  { NULL }
+};
+
+PyTypeObject objfile_builder_object_type = {
+  PyVarObject_HEAD_INIT (NULL, 0)
+  "gdb.ObjfileBuilder",               /* tp_name */
+  sizeof (objfile_builder_object),    /* tp_basicsize */
+  0,                                  /* tp_itemsize */
+  objbdpy_dealloc,                    /* tp_dealloc */
+  0,                                  /* tp_vectorcall_offset */
+  nullptr,                            /* tp_getattr */
+  nullptr,                            /* tp_setattr */
+  nullptr,                            /* tp_compare */
+  nullptr,                            /* tp_repr */
+  nullptr,                            /* tp_as_number */
+  nullptr,                            /* tp_as_sequence */
+  nullptr,                            /* tp_as_mapping */
+  nullptr,                            /* tp_hash  */
+  nullptr,                            /* tp_call */
+  nullptr,                            /* tp_str */
+  nullptr,                            /* tp_getattro */
+  nullptr,                            /* tp_setattro */
+  nullptr,                            /* tp_as_buffer */
+  Py_TPFLAGS_DEFAULT,                 /* tp_flags */
+  "GDB object file builder",          /* tp_doc */
+  nullptr,                            /* tp_traverse */
+  nullptr,                            /* tp_clear */
+  nullptr,                            /* tp_richcompare */
+  0,                                  /* tp_weaklistoffset */
+  nullptr,                            /* tp_iter */
+  nullptr,                            /* tp_iternext */
+  objfile_builder_object_methods,     /* tp_methods */
+  nullptr,                            /* tp_members */
+  nullptr,                            /* tp_getset */
+  nullptr,                            /* tp_base */
+  nullptr,                            /* tp_dict */
+  nullptr,                            /* tp_descr_get */
+  nullptr,                            /* tp_descr_set */
+  0,                                  /* tp_dictoffset */
+  objbdpy_init,                       /* tp_init */
+  PyType_GenericAlloc,                /* tp_alloc */
+  objbdpy_new,                        /* tp_new */
+};
+
+
diff --git a/gdb/testsuite/gdb.python/py-objfile.exp b/gdb/testsuite/gdb.python/py-objfile.exp
index 61b9942de7..ab2413e317 100644
--- a/gdb/testsuite/gdb.python/py-objfile.exp
+++ b/gdb/testsuite/gdb.python/py-objfile.exp
@@ -173,3 +173,14 @@ gdb_py_test_silent_cmd "python objfile = gdb.objfiles()\[0\]" \
     "get first objfile" 1
 gdb_file_cmd ${binfile}
 gdb_test "python print(objfile)" "<gdb.Objfile \\\(invalid\\\)>"
+
+# Test adding a new objfile.
+gdb_py_test_silent_cmd "python builder = gdb.ObjfileBuilder(\"test_objfile\")" \
+    "Create an object file builder" 1
+gdb_test "python print(repr(builder))" "<gdb.ObjfileBuilder .*>"
+
+gdb_py_test_silent_cmd "python builder.add_static_symbol(name = \"test\", address = 0, language = \"c\")" \
+    "Add a static symbol to the object file builder" 1
+gdb_py_test_silent_cmd "python objfile = builder.build()" \
+    "Build an object from an objcect file builder" 1
+gdb_test "python print(repr(objfile.lookup_static_symbol(\"test\")))" "<gdb.Symbol .*>"
-- 
2.40.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Add support for symbol addition to the Python API
  2023-05-27  1:24 Matheus Branco Borella
@ 2023-07-04 15:14 ` Andrew Burgess
  2023-07-07 23:13   ` Matheus Branco Borella
  0 siblings, 1 reply; 6+ messages in thread
From: Andrew Burgess @ 2023-07-04 15:14 UTC (permalink / raw)
  To: Matheus Branco Borella via Gdb-patches, gdb-patches
  Cc: Matheus Branco Borella

Matheus Branco Borella via Gdb-patches <gdb-patches@sourceware.org>
writes:

> Disclaimer:
>
> This patch is a rework of a six-month old patch I submitted to the mailing list
> that considerably reduces the hackyness of the original solution to the problem,
> now that I've had more time to read through and understand how symbols are 
> handled and searched for inside GDB. So, I'd like to please ask for comments on 
> things I can still improve in this patch, before I resubmit it. I also plan to 
> add tests to it once I'm more secure about the approach I'm taking to solve the
> problem now.
>
> The interfaces in this patch can be tested like so:
> ```
> (gdb) pi
>>>> builder = gdb.ObjfileBuilder(name = "some_name")
>>>> builder.add_static_symbol(name = "some_sym", address = 0x41414141, 
>         language = "c")
>>>> objfile = builder.build()
> ```
>
> ---
>
> This patch adds support for symbol creation and registration. It currently
> supports adding type symbols (VAR_DOMAIN/LOC_TYPEDEF), static symbols
> (VAR_DOMAIN/LOC_STATIC) and goto target labels (LABEL_DOMAIN/LOC_LABEL). It
> adds a new `gdb.ObjfileBuilder` type, with `add_type_symbol`,
> `add_static_symbol` and `add_label_symbol` functions, allowing for the addition
> of the aforementioned types of symbols.
>
> Symbol addition is achieved by constructing a new objfile with msyms and full
> symbols reflecting the symbols that were previously added to the builder through
> its methods. This approach lets us get most of the way to full symbol addition
> support, but due to not being backed up by BFD, it does have a few limitations,
> which I will go over them here.
>
> PC-based minsym lookup does not work, because those would require a more
> complete set of BFD structures than I think would be good practice to pretend to
> have them all and crash GDB later on when it expects things to be there that
> aren't.
>
> In the same vein, PC-based function name lookup also does not work, although
> there may be a way to have the feature work using overlays. However, this patch
> does not make an attempt to do so
>
> For now, though, this implementation lets us add symbols that can be used to,
> for instance, query registered types through `gdb.lookup_type`, and allows
> reverse engineering GDB plugins (such as Pwndbg [0] or decomp2gdb [1]) to add
> symbols directly through the Python API instead of having to compile an object
> file for the target architecture that they later load through the add-symbol-
> file command. [2]

I started taking a look through this.  I didn't manage to build the code
due to the use of C++17 features, so I've only given a couple of really
minor bits of feedback.

I think that adding a first simple test would be a solid idea, this will
give reviewers something to play with, you can always expand the test
later to cover more cases.

>
> [0] https://github.com/pwndbg/pwndbg/
> [1] https://github.com/mahaloz/decomp2dbg
> [2] https://github.com/mahaloz/decomp2dbg/blob/055be6b2001954d00db2d683f20e9b714af75880/decomp2dbg/clients/gdb/symbol_mapper.py#L235-L243]
> ---
>  gdb/Makefile.in                 |   1 +
>  gdb/python/py-objfile-builder.c | 648 ++++++++++++++++++++++++++++++++
>  gdb/python/py-objfile.c         |   1 +
>  gdb/python/python-internal.h    |   1 +
>  4 files changed, 651 insertions(+)
>  create mode 100644 gdb/python/py-objfile-builder.c
>
> diff --git a/gdb/Makefile.in b/gdb/Makefile.in
> index 14b5dd0bad..c0eecb81b6 100644
> --- a/gdb/Makefile.in
> +++ b/gdb/Makefile.in
> @@ -417,6 +417,7 @@ SUBDIR_PYTHON_SRCS = \
>  	python/py-micmd.c \
>  	python/py-newobjfileevent.c \
>  	python/py-objfile.c \
> +	python/py-objfile-builder.c \
>  	python/py-param.c \
>  	python/py-prettyprint.c \
>  	python/py-progspace.c \
> diff --git a/gdb/python/py-objfile-builder.c b/gdb/python/py-objfile-builder.c
> new file mode 100644
> index 0000000000..1e3110c613
> --- /dev/null
> +++ b/gdb/python/py-objfile-builder.c
> @@ -0,0 +1,648 @@
> +/* Python class allowing users to build and install objfiles.
> +
> +   Copyright (C) 2013-2023 Free Software Foundation, Inc.
> +
> +   This file is part of GDB.
> +
> +   This program is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   This program is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
> +
> +#include "defs.h"
> +#include "python-internal.h"
> +#include "quick-symbol.h"
> +#include "objfiles.h"
> +#include "minsyms.h"
> +#include "buildsym.h"
> +#include "observable.h"
> +#include <string>
> +#include <unordered_map>
> +#include <type_traits>
> +#include <optional>
> +
> +/* This module relies on symbols being trivially copyable. */
> +static_assert (std::is_trivially_copyable_v<struct symbol>);

I believe that std::is_trivially_copyable_v is a C++17 feature and
(currently) GDB is C++11.  There's actually a bunch of C++17 code in
this patch -- you'll either need to wait until GDB moves to C++17, or
update things to compile with C++11.

> +
> +/* Interface to be implemented for symbol types supported by this interface. */
> +class symbol_def
> +{
> +public:
> +  virtual ~symbol_def () = default;
> +
> +  virtual void register_msymbol (const std::string& name, 
> +                                 struct objfile* objfile,
> +                                 minimal_symbol_reader& reader) const = 0;
> +  virtual void register_symbol (const std::string& name, 
> +                                struct objfile* objfile,
> +                                buildsym_compunit& builder) const = 0;
> +};
> +
> +/* Shorthand for a unique_ptr to a symbol. */
> +typedef std::unique_ptr<symbol_def> symbol_def_up;
> +
> +/* Data being held by the gdb.ObjfileBuilder.
> + *
> + * This structure needs to have its constructor run in order for its lifetime
> + * to begin. Because of how Python handles its objects, we can't just reconstruct
> + * the object structure as a whole, as that would overwrite things the runtime
> + * cares about, so these fields had to be broken off into their own structure. */
> +struct objfile_builder_data
> +{
> +  /* Indicates whether the objfile has already been built and added to the
> +   * current context. We enforce that objfiles can't be installed twice. */
> +  bool installed = false;
> +
> +  /* The symbols that will be added to new newly built objfile. */
> +  std::unordered_map<std::string, symbol_def_up> symbols;
> +
> +  /* The name given to this objfile. */
> +  std::string name;
> +
> +  /* Adds a symbol definition with the given name. */
> +  bool add_symbol_def (std::string name, symbol_def_up&& symbol_def)
> +  {
> +    return std::get<1> (symbols.insert ({name, std::move (symbol_def)}));
> +  }
> +};
> +
> +/* Structure backing the gdb.ObjfileBuilder type. */
> +
> +struct objfile_builder_object
> +{
> +  PyObject_HEAD
> +
> +  /* See objfile_builder_data. */
> +  objfile_builder_data inner;
> +};
> +
> +extern PyTypeObject objfile_builder_object_type
> +    CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("objfile_builder_object_type");
> +
> +/* Constructs a new objfile from an objfile_builder. */
> +static struct objfile *
> +build_new_objfile (const objfile_builder_object& builder)
> +{
> +  gdb_assert (!builder.inner.installed);
> +
> +  auto of = objfile::make (nullptr, builder.inner.name.c_str (), 
> +                           OBJF_READNOW | OBJF_NOT_FILENAME, 
> +                           nullptr);
> +
> +  /* Setup object file sections. */
> +  of->sections_start = OBSTACK_CALLOC (&of->objfile_obstack,
> +                                       4,
> +                                       struct obj_section);
> +  of->sections_end = of->sections_start + 4;
> +
> +  const auto init_section = [&](struct obj_section* sec)
> +    {
> +      sec->objfile = of;
> +      sec->ovly_mapped = false;
> +      
> +      /* We're not being backed by BFD. So we have no real section data to speak 
> +       * of, but, because specifying sections requires BFD structures, we have to
> +       * play a little game of predend. */
> +      auto bfd = obstack_new<bfd_section> (&of->objfile_obstack);
> +      bfd->vma = 0;
> +      bfd->size = 0;
> +      bfd->lma = 0; /* Prevents insert_section_p in objfiles.c from trying to 
> +                     * dereference the bfd structure we don't have. */
> +      sec->the_bfd_section = bfd;
> +    };
> +  init_section (&of->sections_start[0]);
> +  init_section (&of->sections_start[1]);
> +  init_section (&of->sections_start[2]);
> +  init_section (&of->sections_start[4]);
> +
> +  of->sect_index_text = 0;
> +  of->sect_index_data = 1;
> +  of->sect_index_rodata = 2;
> +  of->sect_index_bss = 3;
> +
> +  /* While buildsym_compunit expects the symbol function pointer structure to be
> +   * present, it also gracefully handles the case where all of the pointers in
> +   * it are set to null. So, make sure we have a valid structure, but there's
> +   * no need to do more than that. */
> +  of->sf = obstack_new<struct sym_fns> (&of->objfile_obstack);
> +
> +  /* We need to tell GDB what architecture the objfile uses. */
> +  if (has_stack_frames ())
> +    of->per_bfd->gdbarch = get_frame_arch (get_selected_frame (nullptr));
> +  else
> +    of->per_bfd->gdbarch = target_gdbarch ();
> +
> +  /* Construct the minimal symbols. */
> +  minimal_symbol_reader msym (of);
> +  for (const auto& [name, symbol] : builder.inner.symbols)
> +      symbol->register_msymbol (name, of, msym);
> +  msym.install ();
> +
> +  /* Construct the full symbols. */
> +  buildsym_compunit fsym (of, builder.inner.name.c_str (), "", language_c, 0);
> +  for (const auto& [name, symbol] : builder.inner.symbols)
> +    symbol->register_symbol (name, of, fsym);
> +  fsym.end_compunit_symtab (0);
> +
> +  /* Notify the rest of GDB this objfile has been created. Requires 
> +   * OBJF_NOT_FILENAME to be used, to prevent any of the functions attatched to
> +   * the observable from trying to dereference of->bfd. */
> +  gdb::observers::new_objfile.notify (of);
> +
> +  return of;
> +}
> +
> +/* Implementation of the quick symbol functions used by the objfiles created 
> + * using this interface. Turns out we have our work cut out for us here, as we
> + * can get something that works by effectively just using no-ops, and the rest
> + * of the code will fall back to using just the minimal and full symbol data. It
> + * is important to note, though, that this only works because we're marking our 
> + * objfile with `OBJF_READNOW`. */
> +class runtime_objfile : public quick_symbol_functions
> +{
> +  virtual bool has_symbols (struct objfile*) override
> +  {
> +    return false;
> +  }
> +
> +  virtual void dump (struct objfile *objfile) override
> +  {
> +  }
> +
> +  virtual void expand_matching_symbols
> +    (struct objfile *,
> +     const lookup_name_info &lookup_name,
> +     domain_enum domain,
> +     int global,
> +     symbol_compare_ftype *ordered_compare) override
> +  {
> +  }
> +
> +  virtual bool expand_symtabs_matching
> +    (struct objfile *objfile,
> +     gdb::function_view<expand_symtabs_file_matcher_ftype> file_matcher,
> +     const lookup_name_info *lookup_name,
> +     gdb::function_view<expand_symtabs_symbol_matcher_ftype> symbol_matcher,
> +     gdb::function_view<expand_symtabs_exp_notify_ftype> expansion_notify,
> +     block_search_flags search_flags,
> +     domain_enum domain,
> +     enum search_domain kind) override
> +  {
> +    return true;
> +  }
> +};
> +
> +
> +/* Create a new symbol alocated in the given objfile. */
> +
> +static struct symbol *
> +new_symbol
> +  (struct objfile *objfile,
> +   const char *name,
> +   enum language language,
> +   enum domain_enum domain,
> +   enum address_class aclass,
> +   short section_index)
> +{
> +  auto symbol = new (&objfile->objfile_obstack) struct symbol ();
> +  OBJSTAT (objfile, n_syms++);
> +
> +  symbol->set_language (language, &objfile->objfile_obstack);
> +  symbol->compute_and_set_names (gdb::string_view (name), true, 
> +                                 objfile->per_bfd);
> +
> +  symbol->set_is_objfile_owned (true);
> +  symbol->set_section_index (section_index);
> +  symbol->set_domain (domain);
> +  symbol->set_aclass_index (aclass);
> +
> +  return symbol;
> +}
> +
> +/* Parses a language from a string (coming from Python) into a language 
> + * variant. */
> +
> +static enum language
> +parse_language (const char *language)
> +{

Could this make use of `language_enum` (from language.c)?

> +  if (strcmp (language, "c") == 0)
> +    return language_c;
> +  else if (strcmp (language, "objc") == 0)
> +    return language_objc;
> +  else if (strcmp (language, "cplus") == 0)
> +    return language_cplus;
> +  else if (strcmp (language, "d") == 0)
> +    return language_d;
> +  else if (strcmp (language, "go") == 0)
> +    return language_go;
> +  else if (strcmp (language, "fortran") == 0)
> +    return language_fortran;
> +  else if (strcmp (language, "m2") == 0)
> +    return language_m2;
> +  else if (strcmp (language, "asm") == 0)
> +    return language_asm;
> +  else if (strcmp (language, "pascal") == 0)
> +    return language_pascal;
> +  else if (strcmp (language, "opencl") == 0)
> +    return language_opencl;
> +  else if (strcmp (language, "rust") == 0)
> +    return language_rust;
> +  else if (strcmp (language, "ada") == 0)
> +    return language_ada;
> +  else
> +    return language_unknown;
> +}
> +
> +/* Convenience function that performs a checked coversion from a PyObject to
> + * a objfile_builder_object structure pointer. */
> +inline static struct objfile_builder_object *
> +validate_objfile_builder_object (PyObject *self)
> +{
> +  if (!PyObject_TypeCheck (self, &objfile_builder_object_type))
> +    return nullptr;
> +  return (struct objfile_builder_object*) self;
> +}
> +
> +/* Registers symbols added with add_label_symbol. */
> +class typedef_symbol_def : public symbol_def
> +{
> +public:
> +  struct type* type;
> +  enum language language;
> +
> +  virtual void register_msymbol (const std::string& name,
> +                                 struct objfile *objfile,
> +                                 minimal_symbol_reader& reader) const override
> +  {
> +  }
> +
> +  virtual void register_symbol (const std::string& name,
> +                                struct objfile *objfile,
> +                                buildsym_compunit& builder) const override
> +  {
> +    auto symbol = new_symbol (objfile, name.c_str (), language, LABEL_DOMAIN,
> +                              LOC_TYPEDEF, objfile->sect_index_text);
> +
> +    symbol->set_type (type);
> +
> +    add_symbol_to_list (symbol, builder.get_file_symbols ());
> +  }
> +};
> +
> +/* Adds a type (LOC_TYPEDEF) symbol to a given objfile. */
> +static PyObject *
> +objbdpy_add_type_symbol (PyObject *self, PyObject *args, PyObject *kw)
> +{
> +  static const char *format = "sOs";
> +  static const char *keywords[] =
> +    {
> +      "name", "type", "language", NULL
> +    };
> +
> +  PyObject *type_object;
> +  const char *name;
> +  const char *language_name = nullptr;
> +
> +  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name,
> +                                        &type_object, &language_name))
> +    return nullptr;
> +
> +  auto builder = validate_objfile_builder_object (self);
> +  if (builder == nullptr)
> +    return nullptr;
> +
> +  struct type *type = type_object_to_type (type_object);
> +  if (type == nullptr)
> +    return nullptr;
> +
> +  if (language_name == nullptr)
> +    language_name = "auto";
> +  enum language language = parse_language (language_name);
> +  if (language == language_unknown)
> +    {
> +      PyErr_SetString (PyExc_ValueError, "invalid language name");
> +      return nullptr;
> +    }
> +
> +  auto def = std::make_unique<typedef_symbol_def> ();
> +  def->type = type;
> +  def->language = language;
> +
> +  builder->inner.add_symbol_def (name, std::move (def));
> +
> +  Py_RETURN_NONE;
> +}
> +
> +
> +/* Registers symbols added with add_label_symbol. */
> +class label_symbol_def : public symbol_def
> +{
> +public:
> +  CORE_ADDR address;
> +  enum language language;
> +
> +  virtual void register_msymbol (const std::string& name,
> +                                 struct objfile *objfile,
> +                                 minimal_symbol_reader& reader) const override
> +  {
> +    reader.record (name.c_str (), 
> +                   unrelocated_addr (address), 
> +                   minimal_symbol_type::mst_text);
> +  }
> +
> +  virtual void register_symbol (const std::string& name,
> +                                struct objfile *objfile,
> +                                buildsym_compunit& builder) const override
> +  {
> +    printf("Adding label %s\n", name.c_str ());
> +    auto symbol = new_symbol (objfile, name.c_str (), language, LABEL_DOMAIN,
> +                              LOC_LABEL, objfile->sect_index_text);
> +
> +    symbol->set_value_address (address);
> +
> +    add_symbol_to_list (symbol, builder.get_file_symbols ());
> +  }
> +};
> +
> +/* Adds a label (LOC_LABEL) symbol to a given objfile. */
> +static PyObject *
> +objbdpy_add_label_symbol (PyObject *self, PyObject *args, PyObject *kw)
> +{
> +  static const char *format = "sks";
> +  static const char *keywords[] =
> +    {
> +      "name", "address", "language", NULL
> +    };
> +
> +  const char *name;
> +  CORE_ADDR address;
> +  const char *language_name = nullptr;
> +
> +  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name,
> +                                        &address, &language_name))
> +    return nullptr;
> +
> +  auto builder = validate_objfile_builder_object (self);
> +  if (builder == nullptr)
> +    return nullptr;
> +
> +  if (language_name == nullptr)
> +    language_name = "auto";
> +  enum language language = parse_language (language_name);
> +  if (language == language_unknown)
> +    {
> +      PyErr_SetString (PyExc_ValueError, "invalid language name");
> +      return nullptr;
> +    }
> +
> +  auto def = std::make_unique<label_symbol_def> ();
> +  def->address = address;
> +  def->language = language;
> +
> +  builder->inner.add_symbol_def (name, std::move (def));
> +
> +  Py_RETURN_NONE;
> +}
> +
> +/* Registers symbols added with add_static_symbol. */
> +class static_symbol_def : public symbol_def
> +{
> +public:
> +  CORE_ADDR address;
> +  enum language language;
> +
> +  virtual void register_msymbol (const std::string& name,
> +                                 struct objfile *objfile,
> +                                 minimal_symbol_reader& reader) const override
> +  {
> +    reader.record (name.c_str (), 
> +                   unrelocated_addr (address), 
> +                   minimal_symbol_type::mst_bss);
> +  }
> +
> +  virtual void register_symbol (const std::string& name,
> +                                struct objfile *objfile,
> +                                buildsym_compunit& builder) const override
> +  {
> +    auto symbol = new_symbol (objfile, name.c_str (), language, VAR_DOMAIN,
> +                              LOC_STATIC, objfile->sect_index_bss);
> +
> +    symbol->set_value_address (address);
> +
> +    add_symbol_to_list (symbol, builder.get_file_symbols ());
> +  }
> +};
> +
> +/* Adds a static (LOC_STATIC) symbol to a given objfile. */
> +static PyObject *
> +objbdpy_add_static_symbol (PyObject *self, PyObject *args, PyObject *kw)
> +{
> +  static const char *format = "sks";
> +  static const char *keywords[] =
> +    {
> +      "name", "address", "language", NULL
> +    };
> +
> +  const char *name;
> +  CORE_ADDR address;
> +  const char *language_name = nullptr;
> +
> +  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name,
> +                                        &address, &language_name))
> +    return nullptr;
> +
> +  auto builder = validate_objfile_builder_object (self);
> +  if (builder == nullptr)
> +    return nullptr;
> +
> +  if (language_name == nullptr)
> +    language_name = "auto";
> +  enum language language = parse_language (language_name);
> +  if (language == language_unknown)
> +    {
> +      PyErr_SetString (PyExc_ValueError, "invalid language name");
> +      return nullptr;
> +    }
> +
> +  auto def = std::make_unique<static_symbol_def> ();
> +  def->address = address;
> +  def->language = language;
> +
> +  builder->inner.add_symbol_def (name, std::move (def));
> +
> +  Py_RETURN_NONE;
> +}
> +
> +/* Builds the object file. */
> +static PyObject *
> +objbdpy_build (PyObject *self, PyObject *args)
> +{
> +  auto builder = validate_objfile_builder_object (self);
> +  if (builder == nullptr)
> +    return nullptr;
> +
> +  if (builder->inner.installed)
> +    {
> +      PyErr_SetString (PyExc_ValueError, "build() cannot be run twice on the \
> +                       same object");
> +      return nullptr;
> +    }
> +  auto of = build_new_objfile (*builder);
> +  builder->inner.installed = true;
> +
> +
> +  auto objpy = objfile_to_objfile_object (of).get ();
> +  Py_INCREF(objpy);
> +  return objpy;
> +}
> +
> +/* Implements the __init__() function. */
> +static int
> +objbdpy_init (PyObject *self0, PyObject *args, PyObject *kw)
> +{
> +  static const char *format = "s";
> +  static const char *keywords[] =
> +    {
> +      "name", NULL
> +    };
> +
> +  const char *name;
> +  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name))
> +    return -1;
> +
> +  auto self = (objfile_builder_object *)self0;
> +  self->inner.name = name;
> +  self->inner.symbols.clear ();
> +
> +  return 0;
> +}
> +
> +/* The function handling construction of the ObjfileBuilder object. 
> + *
> + * We need to have a custom function here as, even though Python manages the 
> + * memory backing the object up, it assumes clearing the memory is enough to
> + * begin its lifetime, which is not the case here, and would lead to undefined 
> + * behavior as soon as we try to use it in any meaningful way.
> + * 
> + * So, what we have to do here is manually begin the lifecycle of our new object
> + * by constructing it in place, using the memory region Python just allocated
> + * for us. This ensures the object will have already started its lifetime by 
> + * the time we start using it. */
> +static PyObject *
> +objbdpy_new (PyTypeObject *subtype, PyObject *args, PyObject *kwds)
> +{
> +  objfile_builder_object *region = 
> +    (objfile_builder_object *) subtype->tp_alloc(subtype, 1);
> +  gdb_assert ((size_t)region % alignof (objfile_builder_object) == 0);
> +  gdb_assert (region != nullptr);
> +
> +  new (&region->inner) objfile_builder_data ();
> +  
> +  return (PyObject *)region;
> +}
> +
> +/* The function handling destruction of the ObjfileBuilder object. 
> + *
> + * While running the destructor of our object isn't _strictly_ necessary, we
> + * would very much like for the memory it owns to be freed, but, because it was
> + * constructed in place, we have to call its destructor manually here. */
> +static void 
> +objbdpy_dealloc (PyObject *self0)
> +{
> +  
> +  auto self = (objfile_builder_object *)self0;
> +  PyTypeObject *tp = Py_TYPE(self);
> +  
> +  self->inner.~objfile_builder_data ();
> +  
> +  tp->tp_free(self);
> +  Py_DECREF(tp);
> +}
> +
> +static int CPYCHECKER_NEGATIVE_RESULT_SETS_EXCEPTION
> +gdbpy_initialize_objfile_builder (void)
> +{
> +  if (PyType_Ready (&objfile_builder_object_type) < 0)
> +    return -1;
> +
> +  return gdb_pymodule_addobject (gdb_module, "ObjfileBuilder",
> +				 (PyObject *) &objfile_builder_object_type);
> +}
> +
> +GDBPY_INITIALIZE_FILE (gdbpy_initialize_objfile_builder);
> +
> +static PyMethodDef objfile_builder_object_methods[] =
> +{
> +  { "build", (PyCFunction) objbdpy_build, METH_NOARGS,
> +    "build ().\n\
> +Build a new objfile containing the symbols added to builder." },
> +  { "add_type_symbol", (PyCFunction) objbdpy_add_type_symbol,
> +    METH_VARARGS | METH_KEYWORDS,
> +    "add_type_symbol (name [str], type [gdb.Type], language [str]).\n\
> +Add a new type symbol in the given language, associated with the given type." },
> +  { "add_label_symbol", (PyCFunction) objbdpy_add_label_symbol,
> +    METH_VARARGS | METH_KEYWORDS,
> +    "add_label_symbol (name [str], address [int], language [str]).\n\
> +Add a new label symbol in the given language, at the given address." },
> +  { "add_static_symbol", (PyCFunction) objbdpy_add_static_symbol,
> +    METH_VARARGS | METH_KEYWORDS,
> +    "add_static_symbol (name [str], address [int], language [str]).\n\
> +Add a new static symbol in the given language, at the given address." },
> +  { NULL }
> +};
> +
> +PyTypeObject objfile_builder_object_type = {
> +  PyVarObject_HEAD_INIT (NULL, 0)
> +  "gdb.ObjfileBuilder",               /* tp_name */
> +  sizeof (objfile_builder_object),    /* tp_basicsize */
> +  0,                                  /* tp_itemsize */
> +  objbdpy_dealloc,                    /* tp_dealloc */
> +  0,                                  /* tp_vectorcall_offset */
> +  nullptr,                            /* tp_getattr */
> +  nullptr,                            /* tp_setattr */
> +  nullptr,                            /* tp_compare */
> +  nullptr,                            /* tp_repr */
> +  nullptr,                            /* tp_as_number */
> +  nullptr,                            /* tp_as_sequence */
> +  nullptr,                            /* tp_as_mapping */
> +  nullptr,                            /* tp_hash  */
> +  nullptr,                            /* tp_call */
> +  nullptr,                            /* tp_str */
> +  nullptr,                            /* tp_getattro */
> +  nullptr,                            /* tp_setattro */
> +  nullptr,                            /* tp_as_buffer */
> +  Py_TPFLAGS_DEFAULT,                 /* tp_flags */
> +  "GDB object file builder",          /* tp_doc */
> +  nullptr,                            /* tp_traverse */
> +  nullptr,                            /* tp_clear */
> +  nullptr,                            /* tp_richcompare */
> +  0,                                  /* tp_weaklistoffset */
> +  nullptr,                            /* tp_iter */
> +  nullptr,                            /* tp_iternext */
> +  objfile_builder_object_methods,     /* tp_methods */
> +  nullptr,                            /* tp_members */
> +  nullptr,                            /* tp_getset */
> +  nullptr,                            /* tp_base */
> +  nullptr,                            /* tp_dict */
> +  nullptr,                            /* tp_descr_get */
> +  nullptr,                            /* tp_descr_set */
> +  0,                                  /* tp_dictoffset */
> +  objbdpy_init,                       /* tp_init */
> +  PyType_GenericAlloc,                /* tp_alloc */
> +  objbdpy_new,                        /* tp_new */
> +};
> +
> +
> diff --git a/gdb/python/py-objfile.c b/gdb/python/py-objfile.c
> index ad72f3f042..be21011ce6 100644
> --- a/gdb/python/py-objfile.c
> +++ b/gdb/python/py-objfile.c
> @@ -25,6 +25,7 @@
>  #include "build-id.h"
>  #include "symtab.h"
>  #include "python.h"
> +#include "buildsym.h"
>

Is this change really needed?

>  struct objfile_object
>  {
> diff --git a/gdb/python/python-internal.h b/gdb/python/python-internal.h
> index dbd33570a7..fbf9b06af5 100644
> --- a/gdb/python/python-internal.h
> +++ b/gdb/python/python-internal.h
> @@ -480,6 +480,7 @@ struct symtab *symtab_object_to_symtab (PyObject *obj);
>  struct symtab_and_line *sal_object_to_symtab_and_line (PyObject *obj);
>  frame_info_ptr frame_object_to_frame_info (PyObject *frame_obj);
>  struct gdbarch *arch_object_to_gdbarch (PyObject *obj);
> +struct floatformat *float_format_object_as_float_format (PyObject *self);

Likewise, I suspect this change is not needed.

>  
>  /* Convert Python object OBJ to a program_space pointer.  OBJ must be a
>     gdb.Progspace reference.  Return nullptr if the gdb.Progspace is not
> -- 
> 2.40.1

Thanks,
Andrew


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] Add support for symbol addition to the Python API
@ 2023-05-27  1:24 Matheus Branco Borella
  2023-07-04 15:14 ` Andrew Burgess
  0 siblings, 1 reply; 6+ messages in thread
From: Matheus Branco Borella @ 2023-05-27  1:24 UTC (permalink / raw)
  To: gdb-patches; +Cc: Matheus Branco Borella

Disclaimer:

This patch is a rework of a six-month old patch I submitted to the mailing list
that considerably reduces the hackyness of the original solution to the problem,
now that I've had more time to read through and understand how symbols are 
handled and searched for inside GDB. So, I'd like to please ask for comments on 
things I can still improve in this patch, before I resubmit it. I also plan to 
add tests to it once I'm more secure about the approach I'm taking to solve the
problem now.

The interfaces in this patch can be tested like so:
```
(gdb) pi
>>> builder = gdb.ObjfileBuilder(name = "some_name")
>>> builder.add_static_symbol(name = "some_sym", address = 0x41414141, 
        language = "c")
>>> objfile = builder.build()
```

---

This patch adds support for symbol creation and registration. It currently
supports adding type symbols (VAR_DOMAIN/LOC_TYPEDEF), static symbols
(VAR_DOMAIN/LOC_STATIC) and goto target labels (LABEL_DOMAIN/LOC_LABEL). It
adds a new `gdb.ObjfileBuilder` type, with `add_type_symbol`,
`add_static_symbol` and `add_label_symbol` functions, allowing for the addition
of the aforementioned types of symbols.

Symbol addition is achieved by constructing a new objfile with msyms and full
symbols reflecting the symbols that were previously added to the builder through
its methods. This approach lets us get most of the way to full symbol addition
support, but due to not being backed up by BFD, it does have a few limitations,
which I will go over them here.

PC-based minsym lookup does not work, because those would require a more
complete set of BFD structures than I think would be good practice to pretend to
have them all and crash GDB later on when it expects things to be there that
aren't.

In the same vein, PC-based function name lookup also does not work, although
there may be a way to have the feature work using overlays. However, this patch
does not make an attempt to do so

For now, though, this implementation lets us add symbols that can be used to,
for instance, query registered types through `gdb.lookup_type`, and allows
reverse engineering GDB plugins (such as Pwndbg [0] or decomp2gdb [1]) to add
symbols directly through the Python API instead of having to compile an object
file for the target architecture that they later load through the add-symbol-
file command. [2]

[0] https://github.com/pwndbg/pwndbg/
[1] https://github.com/mahaloz/decomp2dbg
[2] https://github.com/mahaloz/decomp2dbg/blob/055be6b2001954d00db2d683f20e9b714af75880/decomp2dbg/clients/gdb/symbol_mapper.py#L235-L243]
---
 gdb/Makefile.in                 |   1 +
 gdb/python/py-objfile-builder.c | 648 ++++++++++++++++++++++++++++++++
 gdb/python/py-objfile.c         |   1 +
 gdb/python/python-internal.h    |   1 +
 4 files changed, 651 insertions(+)
 create mode 100644 gdb/python/py-objfile-builder.c

diff --git a/gdb/Makefile.in b/gdb/Makefile.in
index 14b5dd0bad..c0eecb81b6 100644
--- a/gdb/Makefile.in
+++ b/gdb/Makefile.in
@@ -417,6 +417,7 @@ SUBDIR_PYTHON_SRCS = \
 	python/py-micmd.c \
 	python/py-newobjfileevent.c \
 	python/py-objfile.c \
+	python/py-objfile-builder.c \
 	python/py-param.c \
 	python/py-prettyprint.c \
 	python/py-progspace.c \
diff --git a/gdb/python/py-objfile-builder.c b/gdb/python/py-objfile-builder.c
new file mode 100644
index 0000000000..1e3110c613
--- /dev/null
+++ b/gdb/python/py-objfile-builder.c
@@ -0,0 +1,648 @@
+/* Python class allowing users to build and install objfiles.
+
+   Copyright (C) 2013-2023 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "defs.h"
+#include "python-internal.h"
+#include "quick-symbol.h"
+#include "objfiles.h"
+#include "minsyms.h"
+#include "buildsym.h"
+#include "observable.h"
+#include <string>
+#include <unordered_map>
+#include <type_traits>
+#include <optional>
+
+/* This module relies on symbols being trivially copyable. */
+static_assert (std::is_trivially_copyable_v<struct symbol>);
+
+/* Interface to be implemented for symbol types supported by this interface. */
+class symbol_def
+{
+public:
+  virtual ~symbol_def () = default;
+
+  virtual void register_msymbol (const std::string& name, 
+                                 struct objfile* objfile,
+                                 minimal_symbol_reader& reader) const = 0;
+  virtual void register_symbol (const std::string& name, 
+                                struct objfile* objfile,
+                                buildsym_compunit& builder) const = 0;
+};
+
+/* Shorthand for a unique_ptr to a symbol. */
+typedef std::unique_ptr<symbol_def> symbol_def_up;
+
+/* Data being held by the gdb.ObjfileBuilder.
+ *
+ * This structure needs to have its constructor run in order for its lifetime
+ * to begin. Because of how Python handles its objects, we can't just reconstruct
+ * the object structure as a whole, as that would overwrite things the runtime
+ * cares about, so these fields had to be broken off into their own structure. */
+struct objfile_builder_data
+{
+  /* Indicates whether the objfile has already been built and added to the
+   * current context. We enforce that objfiles can't be installed twice. */
+  bool installed = false;
+
+  /* The symbols that will be added to new newly built objfile. */
+  std::unordered_map<std::string, symbol_def_up> symbols;
+
+  /* The name given to this objfile. */
+  std::string name;
+
+  /* Adds a symbol definition with the given name. */
+  bool add_symbol_def (std::string name, symbol_def_up&& symbol_def)
+  {
+    return std::get<1> (symbols.insert ({name, std::move (symbol_def)}));
+  }
+};
+
+/* Structure backing the gdb.ObjfileBuilder type. */
+
+struct objfile_builder_object
+{
+  PyObject_HEAD
+
+  /* See objfile_builder_data. */
+  objfile_builder_data inner;
+};
+
+extern PyTypeObject objfile_builder_object_type
+    CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("objfile_builder_object_type");
+
+/* Constructs a new objfile from an objfile_builder. */
+static struct objfile *
+build_new_objfile (const objfile_builder_object& builder)
+{
+  gdb_assert (!builder.inner.installed);
+
+  auto of = objfile::make (nullptr, builder.inner.name.c_str (), 
+                           OBJF_READNOW | OBJF_NOT_FILENAME, 
+                           nullptr);
+
+  /* Setup object file sections. */
+  of->sections_start = OBSTACK_CALLOC (&of->objfile_obstack,
+                                       4,
+                                       struct obj_section);
+  of->sections_end = of->sections_start + 4;
+
+  const auto init_section = [&](struct obj_section* sec)
+    {
+      sec->objfile = of;
+      sec->ovly_mapped = false;
+      
+      /* We're not being backed by BFD. So we have no real section data to speak 
+       * of, but, because specifying sections requires BFD structures, we have to
+       * play a little game of predend. */
+      auto bfd = obstack_new<bfd_section> (&of->objfile_obstack);
+      bfd->vma = 0;
+      bfd->size = 0;
+      bfd->lma = 0; /* Prevents insert_section_p in objfiles.c from trying to 
+                     * dereference the bfd structure we don't have. */
+      sec->the_bfd_section = bfd;
+    };
+  init_section (&of->sections_start[0]);
+  init_section (&of->sections_start[1]);
+  init_section (&of->sections_start[2]);
+  init_section (&of->sections_start[4]);
+
+  of->sect_index_text = 0;
+  of->sect_index_data = 1;
+  of->sect_index_rodata = 2;
+  of->sect_index_bss = 3;
+
+  /* While buildsym_compunit expects the symbol function pointer structure to be
+   * present, it also gracefully handles the case where all of the pointers in
+   * it are set to null. So, make sure we have a valid structure, but there's
+   * no need to do more than that. */
+  of->sf = obstack_new<struct sym_fns> (&of->objfile_obstack);
+
+  /* We need to tell GDB what architecture the objfile uses. */
+  if (has_stack_frames ())
+    of->per_bfd->gdbarch = get_frame_arch (get_selected_frame (nullptr));
+  else
+    of->per_bfd->gdbarch = target_gdbarch ();
+
+  /* Construct the minimal symbols. */
+  minimal_symbol_reader msym (of);
+  for (const auto& [name, symbol] : builder.inner.symbols)
+      symbol->register_msymbol (name, of, msym);
+  msym.install ();
+
+  /* Construct the full symbols. */
+  buildsym_compunit fsym (of, builder.inner.name.c_str (), "", language_c, 0);
+  for (const auto& [name, symbol] : builder.inner.symbols)
+    symbol->register_symbol (name, of, fsym);
+  fsym.end_compunit_symtab (0);
+
+  /* Notify the rest of GDB this objfile has been created. Requires 
+   * OBJF_NOT_FILENAME to be used, to prevent any of the functions attatched to
+   * the observable from trying to dereference of->bfd. */
+  gdb::observers::new_objfile.notify (of);
+
+  return of;
+}
+
+/* Implementation of the quick symbol functions used by the objfiles created 
+ * using this interface. Turns out we have our work cut out for us here, as we
+ * can get something that works by effectively just using no-ops, and the rest
+ * of the code will fall back to using just the minimal and full symbol data. It
+ * is important to note, though, that this only works because we're marking our 
+ * objfile with `OBJF_READNOW`. */
+class runtime_objfile : public quick_symbol_functions
+{
+  virtual bool has_symbols (struct objfile*) override
+  {
+    return false;
+  }
+
+  virtual void dump (struct objfile *objfile) override
+  {
+  }
+
+  virtual void expand_matching_symbols
+    (struct objfile *,
+     const lookup_name_info &lookup_name,
+     domain_enum domain,
+     int global,
+     symbol_compare_ftype *ordered_compare) override
+  {
+  }
+
+  virtual bool expand_symtabs_matching
+    (struct objfile *objfile,
+     gdb::function_view<expand_symtabs_file_matcher_ftype> file_matcher,
+     const lookup_name_info *lookup_name,
+     gdb::function_view<expand_symtabs_symbol_matcher_ftype> symbol_matcher,
+     gdb::function_view<expand_symtabs_exp_notify_ftype> expansion_notify,
+     block_search_flags search_flags,
+     domain_enum domain,
+     enum search_domain kind) override
+  {
+    return true;
+  }
+};
+
+
+/* Create a new symbol alocated in the given objfile. */
+
+static struct symbol *
+new_symbol
+  (struct objfile *objfile,
+   const char *name,
+   enum language language,
+   enum domain_enum domain,
+   enum address_class aclass,
+   short section_index)
+{
+  auto symbol = new (&objfile->objfile_obstack) struct symbol ();
+  OBJSTAT (objfile, n_syms++);
+
+  symbol->set_language (language, &objfile->objfile_obstack);
+  symbol->compute_and_set_names (gdb::string_view (name), true, 
+                                 objfile->per_bfd);
+
+  symbol->set_is_objfile_owned (true);
+  symbol->set_section_index (section_index);
+  symbol->set_domain (domain);
+  symbol->set_aclass_index (aclass);
+
+  return symbol;
+}
+
+/* Parses a language from a string (coming from Python) into a language 
+ * variant. */
+
+static enum language
+parse_language (const char *language)
+{
+  if (strcmp (language, "c") == 0)
+    return language_c;
+  else if (strcmp (language, "objc") == 0)
+    return language_objc;
+  else if (strcmp (language, "cplus") == 0)
+    return language_cplus;
+  else if (strcmp (language, "d") == 0)
+    return language_d;
+  else if (strcmp (language, "go") == 0)
+    return language_go;
+  else if (strcmp (language, "fortran") == 0)
+    return language_fortran;
+  else if (strcmp (language, "m2") == 0)
+    return language_m2;
+  else if (strcmp (language, "asm") == 0)
+    return language_asm;
+  else if (strcmp (language, "pascal") == 0)
+    return language_pascal;
+  else if (strcmp (language, "opencl") == 0)
+    return language_opencl;
+  else if (strcmp (language, "rust") == 0)
+    return language_rust;
+  else if (strcmp (language, "ada") == 0)
+    return language_ada;
+  else
+    return language_unknown;
+}
+
+/* Convenience function that performs a checked coversion from a PyObject to
+ * a objfile_builder_object structure pointer. */
+inline static struct objfile_builder_object *
+validate_objfile_builder_object (PyObject *self)
+{
+  if (!PyObject_TypeCheck (self, &objfile_builder_object_type))
+    return nullptr;
+  return (struct objfile_builder_object*) self;
+}
+
+/* Registers symbols added with add_label_symbol. */
+class typedef_symbol_def : public symbol_def
+{
+public:
+  struct type* type;
+  enum language language;
+
+  virtual void register_msymbol (const std::string& name,
+                                 struct objfile *objfile,
+                                 minimal_symbol_reader& reader) const override
+  {
+  }
+
+  virtual void register_symbol (const std::string& name,
+                                struct objfile *objfile,
+                                buildsym_compunit& builder) const override
+  {
+    auto symbol = new_symbol (objfile, name.c_str (), language, LABEL_DOMAIN,
+                              LOC_TYPEDEF, objfile->sect_index_text);
+
+    symbol->set_type (type);
+
+    add_symbol_to_list (symbol, builder.get_file_symbols ());
+  }
+};
+
+/* Adds a type (LOC_TYPEDEF) symbol to a given objfile. */
+static PyObject *
+objbdpy_add_type_symbol (PyObject *self, PyObject *args, PyObject *kw)
+{
+  static const char *format = "sOs";
+  static const char *keywords[] =
+    {
+      "name", "type", "language", NULL
+    };
+
+  PyObject *type_object;
+  const char *name;
+  const char *language_name = nullptr;
+
+  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name,
+                                        &type_object, &language_name))
+    return nullptr;
+
+  auto builder = validate_objfile_builder_object (self);
+  if (builder == nullptr)
+    return nullptr;
+
+  struct type *type = type_object_to_type (type_object);
+  if (type == nullptr)
+    return nullptr;
+
+  if (language_name == nullptr)
+    language_name = "auto";
+  enum language language = parse_language (language_name);
+  if (language == language_unknown)
+    {
+      PyErr_SetString (PyExc_ValueError, "invalid language name");
+      return nullptr;
+    }
+
+  auto def = std::make_unique<typedef_symbol_def> ();
+  def->type = type;
+  def->language = language;
+
+  builder->inner.add_symbol_def (name, std::move (def));
+
+  Py_RETURN_NONE;
+}
+
+
+/* Registers symbols added with add_label_symbol. */
+class label_symbol_def : public symbol_def
+{
+public:
+  CORE_ADDR address;
+  enum language language;
+
+  virtual void register_msymbol (const std::string& name,
+                                 struct objfile *objfile,
+                                 minimal_symbol_reader& reader) const override
+  {
+    reader.record (name.c_str (), 
+                   unrelocated_addr (address), 
+                   minimal_symbol_type::mst_text);
+  }
+
+  virtual void register_symbol (const std::string& name,
+                                struct objfile *objfile,
+                                buildsym_compunit& builder) const override
+  {
+    printf("Adding label %s\n", name.c_str ());
+    auto symbol = new_symbol (objfile, name.c_str (), language, LABEL_DOMAIN,
+                              LOC_LABEL, objfile->sect_index_text);
+
+    symbol->set_value_address (address);
+
+    add_symbol_to_list (symbol, builder.get_file_symbols ());
+  }
+};
+
+/* Adds a label (LOC_LABEL) symbol to a given objfile. */
+static PyObject *
+objbdpy_add_label_symbol (PyObject *self, PyObject *args, PyObject *kw)
+{
+  static const char *format = "sks";
+  static const char *keywords[] =
+    {
+      "name", "address", "language", NULL
+    };
+
+  const char *name;
+  CORE_ADDR address;
+  const char *language_name = nullptr;
+
+  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name,
+                                        &address, &language_name))
+    return nullptr;
+
+  auto builder = validate_objfile_builder_object (self);
+  if (builder == nullptr)
+    return nullptr;
+
+  if (language_name == nullptr)
+    language_name = "auto";
+  enum language language = parse_language (language_name);
+  if (language == language_unknown)
+    {
+      PyErr_SetString (PyExc_ValueError, "invalid language name");
+      return nullptr;
+    }
+
+  auto def = std::make_unique<label_symbol_def> ();
+  def->address = address;
+  def->language = language;
+
+  builder->inner.add_symbol_def (name, std::move (def));
+
+  Py_RETURN_NONE;
+}
+
+/* Registers symbols added with add_static_symbol. */
+class static_symbol_def : public symbol_def
+{
+public:
+  CORE_ADDR address;
+  enum language language;
+
+  virtual void register_msymbol (const std::string& name,
+                                 struct objfile *objfile,
+                                 minimal_symbol_reader& reader) const override
+  {
+    reader.record (name.c_str (), 
+                   unrelocated_addr (address), 
+                   minimal_symbol_type::mst_bss);
+  }
+
+  virtual void register_symbol (const std::string& name,
+                                struct objfile *objfile,
+                                buildsym_compunit& builder) const override
+  {
+    auto symbol = new_symbol (objfile, name.c_str (), language, VAR_DOMAIN,
+                              LOC_STATIC, objfile->sect_index_bss);
+
+    symbol->set_value_address (address);
+
+    add_symbol_to_list (symbol, builder.get_file_symbols ());
+  }
+};
+
+/* Adds a static (LOC_STATIC) symbol to a given objfile. */
+static PyObject *
+objbdpy_add_static_symbol (PyObject *self, PyObject *args, PyObject *kw)
+{
+  static const char *format = "sks";
+  static const char *keywords[] =
+    {
+      "name", "address", "language", NULL
+    };
+
+  const char *name;
+  CORE_ADDR address;
+  const char *language_name = nullptr;
+
+  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name,
+                                        &address, &language_name))
+    return nullptr;
+
+  auto builder = validate_objfile_builder_object (self);
+  if (builder == nullptr)
+    return nullptr;
+
+  if (language_name == nullptr)
+    language_name = "auto";
+  enum language language = parse_language (language_name);
+  if (language == language_unknown)
+    {
+      PyErr_SetString (PyExc_ValueError, "invalid language name");
+      return nullptr;
+    }
+
+  auto def = std::make_unique<static_symbol_def> ();
+  def->address = address;
+  def->language = language;
+
+  builder->inner.add_symbol_def (name, std::move (def));
+
+  Py_RETURN_NONE;
+}
+
+/* Builds the object file. */
+static PyObject *
+objbdpy_build (PyObject *self, PyObject *args)
+{
+  auto builder = validate_objfile_builder_object (self);
+  if (builder == nullptr)
+    return nullptr;
+
+  if (builder->inner.installed)
+    {
+      PyErr_SetString (PyExc_ValueError, "build() cannot be run twice on the \
+                       same object");
+      return nullptr;
+    }
+  auto of = build_new_objfile (*builder);
+  builder->inner.installed = true;
+
+
+  auto objpy = objfile_to_objfile_object (of).get ();
+  Py_INCREF(objpy);
+  return objpy;
+}
+
+/* Implements the __init__() function. */
+static int
+objbdpy_init (PyObject *self0, PyObject *args, PyObject *kw)
+{
+  static const char *format = "s";
+  static const char *keywords[] =
+    {
+      "name", NULL
+    };
+
+  const char *name;
+  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name))
+    return -1;
+
+  auto self = (objfile_builder_object *)self0;
+  self->inner.name = name;
+  self->inner.symbols.clear ();
+
+  return 0;
+}
+
+/* The function handling construction of the ObjfileBuilder object. 
+ *
+ * We need to have a custom function here as, even though Python manages the 
+ * memory backing the object up, it assumes clearing the memory is enough to
+ * begin its lifetime, which is not the case here, and would lead to undefined 
+ * behavior as soon as we try to use it in any meaningful way.
+ * 
+ * So, what we have to do here is manually begin the lifecycle of our new object
+ * by constructing it in place, using the memory region Python just allocated
+ * for us. This ensures the object will have already started its lifetime by 
+ * the time we start using it. */
+static PyObject *
+objbdpy_new (PyTypeObject *subtype, PyObject *args, PyObject *kwds)
+{
+  objfile_builder_object *region = 
+    (objfile_builder_object *) subtype->tp_alloc(subtype, 1);
+  gdb_assert ((size_t)region % alignof (objfile_builder_object) == 0);
+  gdb_assert (region != nullptr);
+
+  new (&region->inner) objfile_builder_data ();
+  
+  return (PyObject *)region;
+}
+
+/* The function handling destruction of the ObjfileBuilder object. 
+ *
+ * While running the destructor of our object isn't _strictly_ necessary, we
+ * would very much like for the memory it owns to be freed, but, because it was
+ * constructed in place, we have to call its destructor manually here. */
+static void 
+objbdpy_dealloc (PyObject *self0)
+{
+  
+  auto self = (objfile_builder_object *)self0;
+  PyTypeObject *tp = Py_TYPE(self);
+  
+  self->inner.~objfile_builder_data ();
+  
+  tp->tp_free(self);
+  Py_DECREF(tp);
+}
+
+static int CPYCHECKER_NEGATIVE_RESULT_SETS_EXCEPTION
+gdbpy_initialize_objfile_builder (void)
+{
+  if (PyType_Ready (&objfile_builder_object_type) < 0)
+    return -1;
+
+  return gdb_pymodule_addobject (gdb_module, "ObjfileBuilder",
+				 (PyObject *) &objfile_builder_object_type);
+}
+
+GDBPY_INITIALIZE_FILE (gdbpy_initialize_objfile_builder);
+
+static PyMethodDef objfile_builder_object_methods[] =
+{
+  { "build", (PyCFunction) objbdpy_build, METH_NOARGS,
+    "build ().\n\
+Build a new objfile containing the symbols added to builder." },
+  { "add_type_symbol", (PyCFunction) objbdpy_add_type_symbol,
+    METH_VARARGS | METH_KEYWORDS,
+    "add_type_symbol (name [str], type [gdb.Type], language [str]).\n\
+Add a new type symbol in the given language, associated with the given type." },
+  { "add_label_symbol", (PyCFunction) objbdpy_add_label_symbol,
+    METH_VARARGS | METH_KEYWORDS,
+    "add_label_symbol (name [str], address [int], language [str]).\n\
+Add a new label symbol in the given language, at the given address." },
+  { "add_static_symbol", (PyCFunction) objbdpy_add_static_symbol,
+    METH_VARARGS | METH_KEYWORDS,
+    "add_static_symbol (name [str], address [int], language [str]).\n\
+Add a new static symbol in the given language, at the given address." },
+  { NULL }
+};
+
+PyTypeObject objfile_builder_object_type = {
+  PyVarObject_HEAD_INIT (NULL, 0)
+  "gdb.ObjfileBuilder",               /* tp_name */
+  sizeof (objfile_builder_object),    /* tp_basicsize */
+  0,                                  /* tp_itemsize */
+  objbdpy_dealloc,                    /* tp_dealloc */
+  0,                                  /* tp_vectorcall_offset */
+  nullptr,                            /* tp_getattr */
+  nullptr,                            /* tp_setattr */
+  nullptr,                            /* tp_compare */
+  nullptr,                            /* tp_repr */
+  nullptr,                            /* tp_as_number */
+  nullptr,                            /* tp_as_sequence */
+  nullptr,                            /* tp_as_mapping */
+  nullptr,                            /* tp_hash  */
+  nullptr,                            /* tp_call */
+  nullptr,                            /* tp_str */
+  nullptr,                            /* tp_getattro */
+  nullptr,                            /* tp_setattro */
+  nullptr,                            /* tp_as_buffer */
+  Py_TPFLAGS_DEFAULT,                 /* tp_flags */
+  "GDB object file builder",          /* tp_doc */
+  nullptr,                            /* tp_traverse */
+  nullptr,                            /* tp_clear */
+  nullptr,                            /* tp_richcompare */
+  0,                                  /* tp_weaklistoffset */
+  nullptr,                            /* tp_iter */
+  nullptr,                            /* tp_iternext */
+  objfile_builder_object_methods,     /* tp_methods */
+  nullptr,                            /* tp_members */
+  nullptr,                            /* tp_getset */
+  nullptr,                            /* tp_base */
+  nullptr,                            /* tp_dict */
+  nullptr,                            /* tp_descr_get */
+  nullptr,                            /* tp_descr_set */
+  0,                                  /* tp_dictoffset */
+  objbdpy_init,                       /* tp_init */
+  PyType_GenericAlloc,                /* tp_alloc */
+  objbdpy_new,                        /* tp_new */
+};
+
+
diff --git a/gdb/python/py-objfile.c b/gdb/python/py-objfile.c
index ad72f3f042..be21011ce6 100644
--- a/gdb/python/py-objfile.c
+++ b/gdb/python/py-objfile.c
@@ -25,6 +25,7 @@
 #include "build-id.h"
 #include "symtab.h"
 #include "python.h"
+#include "buildsym.h"
 
 struct objfile_object
 {
diff --git a/gdb/python/python-internal.h b/gdb/python/python-internal.h
index dbd33570a7..fbf9b06af5 100644
--- a/gdb/python/python-internal.h
+++ b/gdb/python/python-internal.h
@@ -480,6 +480,7 @@ struct symtab *symtab_object_to_symtab (PyObject *obj);
 struct symtab_and_line *sal_object_to_symtab_and_line (PyObject *obj);
 frame_info_ptr frame_object_to_frame_info (PyObject *frame_obj);
 struct gdbarch *arch_object_to_gdbarch (PyObject *obj);
+struct floatformat *float_format_object_as_float_format (PyObject *self);
 
 /* Convert Python object OBJ to a program_space pointer.  OBJ must be a
    gdb.Progspace reference.  Return nullptr if the gdb.Progspace is not
-- 
2.40.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-07-07 23:14 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-06  1:37 [PATCH 1/1] Add support for symbol addition to the Python API dark.ryu.550
2023-01-06 20:21 ` Simon Marchi
2023-01-12  2:00   ` [PATCH] " Matheus Branco Borella
2023-05-27  1:24 Matheus Branco Borella
2023-07-04 15:14 ` Andrew Burgess
2023-07-07 23:13   ` Matheus Branco Borella

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).