public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] ld: Generate PDB string table
@ 2022-11-24  3:40 Mark Harmstone
  2022-11-24  7:24 ` Jan Beulich
  0 siblings, 1 reply; 2+ messages in thread
From: Mark Harmstone @ 2022-11-24  3:40 UTC (permalink / raw)
  To: binutils; +Cc: Mark Harmstone

---
 ld/pdb.c                          | 296 +++++++++++++++++++++++++++++-
 ld/pdb.h                          |  12 ++
 ld/testsuite/ld-pe/pdb-strings.d  |  10 +
 ld/testsuite/ld-pe/pdb-strings1.s |  19 ++
 ld/testsuite/ld-pe/pdb-strings2.s |  19 ++
 ld/testsuite/ld-pe/pdb.exp        | 122 ++++++++++++
 6 files changed, 472 insertions(+), 6 deletions(-)
 create mode 100644 ld/testsuite/ld-pe/pdb-strings.d
 create mode 100644 ld/testsuite/ld-pe/pdb-strings1.s
 create mode 100644 ld/testsuite/ld-pe/pdb-strings2.s

diff --git a/ld/pdb.c b/ld/pdb.c
index 6f69574289d..9e8f0ed1ce3 100644
--- a/ld/pdb.c
+++ b/ld/pdb.c
@@ -41,6 +41,23 @@ struct public
   uint32_t address;
 };
 
+struct string
+{
+  struct string *next;
+  uint32_t hash;
+  uint32_t offset;
+  size_t len;
+  char s[];
+};
+
+struct string_table
+{
+  struct string *strings_head;
+  struct string *strings_tail;
+  uint32_t strings_len;
+  htab_t hashmap;
+};
+
 /* Add a new stream to the PDB archive, and return its BFD.  */
 static bfd *
 add_stream (bfd *pdb, const char *name, uint16_t *stream_num)
@@ -383,15 +400,170 @@ get_arch_number (bfd *abfd)
   return IMAGE_FILE_MACHINE_I386;
 }
 
+/* Add a string to the strings table, if it's not already there.  */
+static void
+add_string (char *str, size_t len, struct string_table *strings)
+{
+  uint32_t hash = calc_hash (str, len);
+  void **slot;
+
+  slot = htab_find_slot_with_hash (strings->hashmap, str, hash, INSERT);
+
+  if (slot && !*slot)
+    {
+      struct string *s;
+
+      *slot = xmalloc (offsetof (struct string, s) + len);
+
+      s = (struct string *) *slot;
+
+      s->next = NULL;
+      s->hash = hash;
+      s->offset = strings->strings_len;
+      s->len = len;
+      memcpy (s->s, str, len);
+
+      if (strings->strings_tail)
+	strings->strings_tail->next = s;
+      else
+	strings->strings_head = s;
+
+      strings->strings_tail = s;
+
+      strings->strings_len += len + 1;
+    }
+}
+
+/* Return the hash of an entry in the string table.  */
+static hashval_t
+hash_string_table_entry (const void * p)
+{
+  struct string *s = (struct string *) p;
+
+  return s->hash;
+}
+
+/* Compare an entry in the string table with a string.  */
+static int
+eq_string_table_entry (const void *a, const void *b)
+{
+  struct string *s1 = (struct string *) a;
+  const char *s2 = (const char *) b;
+  size_t s2_len = strlen (s2);
+
+  if (s2_len != s1->len)
+    return 0;
+
+  return memcmp (s1->s, s2, s2_len) == 0;
+}
+
+/* Parse the string table within the .debug$S section.  */
+static void
+parse_string_table (bfd_byte *data, size_t size,
+		    struct string_table *strings)
+{
+  while (true)
+    {
+      size_t len = strnlen ((char *) data, size);
+
+      add_string ((char *) data, len, strings);
+
+      data += len + 1;
+
+      if (size <= len + 1)
+	break;
+
+      size -= len + 1;
+    }
+}
+
+/* Parse the .debug$S section within an object file.  */
+static bool
+handle_debugs_section (asection *s, bfd *mod, struct string_table *strings)
+{
+  bfd_byte *data = NULL;
+  size_t off;
+
+  if (!bfd_get_full_section_contents (mod, s, &data))
+    return false;
+
+  if (!data)
+    return false;
+
+  if (bfd_getl32 (data) != CV_SIGNATURE_C13)
+    {
+      free (data);
+      return true;
+    }
+
+  off = sizeof (uint32_t);
+
+  while (off + sizeof (uint32_t) <= s->size)
+    {
+      uint32_t type, size;
+
+      type = bfd_getl32 (data + off);
+
+      off += sizeof (uint32_t);
+
+      if (off + sizeof (uint32_t) > s->size)
+	{
+	  free (data);
+	  bfd_set_error (bfd_error_bad_value);
+	  return false;
+	}
+
+      size = bfd_getl32 (data + off);
+
+      off += sizeof (uint32_t);
+
+      if (off + size > s->size)
+	{
+	  free (data);
+	  bfd_set_error (bfd_error_bad_value);
+	  return false;
+	}
+
+      switch (type)
+	{
+	case DEBUG_S_STRINGTABLE:
+	  parse_string_table (data + off, size, strings);
+
+	  break;
+	}
+
+      off += size;
+
+      if (off % sizeof (uint32_t))
+	off += sizeof (uint32_t) - (off % sizeof (uint32_t));
+    }
+
+  free (data);
+
+  return true;
+}
+
 /* Populate the module stream, which consists of the transformed .debug$S
    data for each object file.  */
 static bool
-populate_module_stream (bfd *stream, uint32_t *sym_byte_size)
+populate_module_stream (bfd *stream, bfd *mod, uint32_t *sym_byte_size,
+			struct string_table *strings)
 {
   uint8_t int_buf[sizeof (uint32_t)];
 
   *sym_byte_size = sizeof (uint32_t);
 
+  /* Process .debug$S section(s).  */
+
+  for (asection *s = mod->sections; s; s = s->next)
+    {
+      if (!strcmp (s->name, ".debug$S") && s->size >= sizeof (uint32_t))
+	{
+	  if (!handle_debugs_section (s, mod, strings))
+	      return false;
+	}
+    }
+
   /* Write the signature.  */
 
   bfd_putl32 (CV_SIGNATURE_C13, int_buf);
@@ -412,7 +584,7 @@ populate_module_stream (bfd *stream, uint32_t *sym_byte_size)
 /* Create the module info substream within the DBI.  */
 static bool
 create_module_info_substream (bfd *abfd, bfd *pdb, void **data,
-			      uint32_t *size)
+			      uint32_t *size, struct string_table *strings)
 {
   uint8_t *ptr;
 
@@ -482,7 +654,8 @@ create_module_info_substream (bfd *abfd, bfd *pdb, void **data,
 	  return false;
 	}
 
-      if (!populate_module_stream (stream, &sym_byte_size))
+      if (!populate_module_stream (stream, in, &sym_byte_size,
+				   strings))
 	{
 	  free (*data);
 	  return false;
@@ -687,14 +860,16 @@ static bool
 populate_dbi_stream (bfd *stream, bfd *abfd, bfd *pdb,
 		     uint16_t section_header_stream_num,
 		     uint16_t sym_rec_stream_num,
-		     uint16_t publics_stream_num)
+		     uint16_t publics_stream_num,
+		     struct string_table *strings)
 {
   struct pdb_dbi_stream_header h;
   struct optional_dbg_header opt;
   void *mod_info, *sc;
   uint32_t mod_info_size, sc_size;
 
-  if (!create_module_info_substream (abfd, pdb, &mod_info, &mod_info_size))
+  if (!create_module_info_substream (abfd, pdb, &mod_info, &mod_info_size,
+				     strings))
     return false;
 
   if (!create_section_contrib_substream (abfd, &sc, &sc_size))
@@ -1107,6 +1282,95 @@ create_section_header_stream (bfd *pdb, bfd *abfd, uint16_t *num)
   return true;
 }
 
+/* Populate the "/names" named stream, which contains the string table.  */
+static bool
+populate_names_stream (bfd *stream, struct string_table *strings)
+{
+  char int_buf[sizeof (uint32_t)];
+  struct string_table_header h;
+  uint32_t num_strings = 0, num_buckets;
+  struct string **buckets;
+
+  bfd_putl32 (STRING_TABLE_SIGNATURE, &h.signature);
+  bfd_putl32 (STRING_TABLE_VERSION, &h.version);
+
+  if (bfd_bwrite (&h, sizeof (h), stream) != sizeof (h))
+    return false;
+
+  bfd_putl32 (strings->strings_len, int_buf);
+
+  if (bfd_bwrite (int_buf, sizeof (uint32_t), stream) != sizeof (uint32_t))
+    return false;
+
+  int_buf[0] = 0;
+
+  if (bfd_bwrite (int_buf, 1, stream) != 1)
+    return false;
+
+  for (struct string *s = strings->strings_head; s; s = s->next)
+    {
+      if (bfd_bwrite (s->s, s->len, stream) != s->len)
+	return false;
+
+      if (bfd_bwrite (int_buf, 1, stream) != 1)
+	return false;
+
+      num_strings++;
+    }
+
+  num_buckets = num_strings * 2;
+
+  buckets = xmalloc (sizeof (struct string *) * num_buckets);
+  memset (buckets, 0, sizeof (struct string *) * num_buckets);
+
+  for (struct string *s = strings->strings_head; s; s = s->next)
+    {
+      uint32_t bucket_num = s->hash % num_buckets;
+
+      while (buckets[bucket_num])
+	{
+	  bucket_num++;
+
+	  if (bucket_num == num_buckets)
+	    bucket_num = 0;
+	}
+
+      buckets[bucket_num] = s;
+    }
+
+  bfd_putl32 (num_buckets, int_buf);
+
+  if (bfd_bwrite (int_buf, sizeof (uint32_t), stream) != sizeof (uint32_t))
+    {
+      free (buckets);
+      return false;
+    }
+
+  for (unsigned int i = 0; i < num_buckets; i++)
+    {
+      if (buckets[i])
+	bfd_putl32 (buckets[i]->offset, int_buf);
+      else
+	bfd_putl32 (0, int_buf);
+
+      if (bfd_bwrite (int_buf, sizeof (uint32_t), stream) !=
+	  sizeof (uint32_t))
+	{
+	  free (buckets);
+	  return false;
+	}
+    }
+
+  free (buckets);
+
+  bfd_putl32 (num_strings, int_buf);
+
+  if (bfd_bwrite (int_buf, sizeof (uint32_t), stream) != sizeof (uint32_t))
+    return false;
+
+  return true;
+}
+
 /* Create a PDB debugging file for the PE image file abfd with the build ID
    guid, stored at pdb_name.  */
 bool
@@ -1117,6 +1381,7 @@ create_pdb_file (bfd *abfd, const char *pdb_name, const unsigned char *guid)
   bfd *info_stream, *dbi_stream, *names_stream, *sym_rec_stream,
     *publics_stream;
   uint16_t section_header_stream_num, sym_rec_stream_num, publics_stream_num;
+  struct string_table strings;
 
   pdb = bfd_openw (pdb_name, "pdb");
   if (!pdb)
@@ -1125,6 +1390,13 @@ create_pdb_file (bfd *abfd, const char *pdb_name, const unsigned char *guid)
       return false;
     }
 
+  strings.strings_head = NULL;
+  strings.strings_tail = NULL;
+  strings.strings_len = 1;
+  strings.hashmap = htab_create_alloc (0, hash_string_table_entry,
+				       eq_string_table_entry, free,
+				       xcalloc, free);
+
   bfd_set_format (pdb, bfd_archive);
 
   if (!create_old_directory_stream (pdb))
@@ -1201,13 +1473,23 @@ create_pdb_file (bfd *abfd, const char *pdb_name, const unsigned char *guid)
     }
 
   if (!populate_dbi_stream (dbi_stream, abfd, pdb, section_header_stream_num,
-			    sym_rec_stream_num, publics_stream_num))
+			    sym_rec_stream_num, publics_stream_num,
+			    &strings))
     {
       einfo (_("%P: warning: cannot populate DBI stream "
 	       "in PDB file: %E\n"));
       goto end;
     }
 
+  add_string ("", 0, &strings);
+
+  if (!populate_names_stream (names_stream, &strings))
+    {
+      einfo (_("%P: warning: cannot populate names stream "
+	       "in PDB file: %E\n"));
+      goto end;
+    }
+
   if (!populate_publics_stream (publics_stream, abfd, sym_rec_stream))
     {
       einfo (_("%P: warning: cannot populate publics stream "
@@ -1227,5 +1509,7 @@ create_pdb_file (bfd *abfd, const char *pdb_name, const unsigned char *guid)
 end:
   bfd_close (pdb);
 
+  htab_delete (strings.hashmap);
+
   return ret;
 }
diff --git a/ld/pdb.h b/ld/pdb.h
index e22dea18eca..611f71041c0 100644
--- a/ld/pdb.h
+++ b/ld/pdb.h
@@ -155,6 +155,18 @@ struct optional_dbg_header
 
 #define CV_SIGNATURE_C13		4
 
+#define DEBUG_S_STRINGTABLE		0xf3
+
+#define STRING_TABLE_SIGNATURE		0xeffeeffe
+#define STRING_TABLE_VERSION		1
+
+/* VHdr in nmt.h */
+struct string_table_header
+{
+  uint32_t signature;
+  uint32_t version;
+};
+
 #define SECTION_CONTRIB_VERSION_60	0xf12eba2d
 
 /* SC in dbicommon.h */
diff --git a/ld/testsuite/ld-pe/pdb-strings.d b/ld/testsuite/ld-pe/pdb-strings.d
new file mode 100644
index 00000000000..8be853efb72
--- /dev/null
+++ b/ld/testsuite/ld-pe/pdb-strings.d
@@ -0,0 +1,10 @@
+
+*:     file format binary
+
+Contents of section .data:
+ 0000 feeffeef 01000000 17000000 0000666f  ..............fo
+ 0010 6f006261 72006261 7a007175 78007175  o.bar.baz.qux.qu
+ 0020 7578000c 00000001 0000000a 00000000  ux..............
+ 0030 00000000 00000000 00000012 00000000  ................
+ 0040 00000000 00000002 00000006 00000000  ................
+ 0050 0000000e 00000006 000000             ...........     
\ No newline at end of file
diff --git a/ld/testsuite/ld-pe/pdb-strings1.s b/ld/testsuite/ld-pe/pdb-strings1.s
new file mode 100644
index 00000000000..09eedd93fb3
--- /dev/null
+++ b/ld/testsuite/ld-pe/pdb-strings1.s
@@ -0,0 +1,19 @@
+.equ CV_SIGNATURE_C13, 4
+.equ DEBUG_S_STRINGTABLE, 0xf3
+
+.section ".debug$S", "rn"
+.long CV_SIGNATURE_C13
+.long DEBUG_S_STRINGTABLE
+.long .strings_end - .strings_start
+
+.strings_start:
+
+.asciz ""
+.asciz "foo"
+.asciz "bar"
+.asciz "baz"
+.asciz "qux"
+
+.strings_end:
+
+.balign 4
diff --git a/ld/testsuite/ld-pe/pdb-strings2.s b/ld/testsuite/ld-pe/pdb-strings2.s
new file mode 100644
index 00000000000..33d9215e4c8
--- /dev/null
+++ b/ld/testsuite/ld-pe/pdb-strings2.s
@@ -0,0 +1,19 @@
+.equ CV_SIGNATURE_C13, 4
+.equ DEBUG_S_STRINGTABLE, 0xf3
+
+.section ".debug$S", "rn"
+.long CV_SIGNATURE_C13
+.long DEBUG_S_STRINGTABLE
+.long .strings_end - .strings_start
+
+.strings_start:
+
+.asciz ""
+.asciz "bar"
+.asciz "baz"
+.asciz "qux"
+.asciz "quux"
+
+.strings_end:
+
+.balign 4
diff --git a/ld/testsuite/ld-pe/pdb.exp b/ld/testsuite/ld-pe/pdb.exp
index 0be65e22fb6..09e9b4a8809 100644
--- a/ld/testsuite/ld-pe/pdb.exp
+++ b/ld/testsuite/ld-pe/pdb.exp
@@ -703,5 +703,127 @@ proc test2 { } {
     test_section_contrib $section_contrib
 }
 
+proc find_named_stream { pdb name } {
+    global ar
+
+    set exec_output [run_host_cmd "$ar" "x --output tmpdir $pdb 0001"]
+
+    if ![string match "" $exec_output] {
+	return 0
+    }
+
+    set fi [open tmpdir/0001]
+    fconfigure $fi -translation binary
+
+    seek $fi 0x1c
+
+    set data [read $fi 4]
+    binary scan $data i string_len
+
+    set strings [read $fi $string_len]
+
+    set string_off 0
+
+    while {[string first \000 $strings $string_off] != -1 } {
+	set str [string range $strings $string_off [expr [string first \000 $strings $string_off] - 1]]
+
+	if { $str eq $name } {
+	    break
+	}
+
+	incr string_off [expr [string length $str] + 1]
+    }
+
+    if { [string length $strings] == $string_off } { # string not found
+	close $fi
+	return 0
+    }
+
+    set data [read $fi 4]
+    binary scan $data i num_entries
+
+    seek $fi 4 current
+
+    set data [read $fi 4]
+    binary scan $data i present_bitmap_len
+
+    seek $fi [expr $present_bitmap_len * 4] current
+
+    set data [read $fi 4]
+    binary scan $data i deleted_bitmap_len
+
+    seek $fi [expr $deleted_bitmap_len * 4] current
+
+    for {set i 0} {$i < $num_entries} {incr i} {
+	set data [read $fi 4]
+	binary scan $data i offset
+
+	if { $offset == $string_off } {
+	    set data [read $fi 4]
+	    binary scan $data i value
+	    close $fi
+
+	    return $value
+	}
+
+	seek $fi 4 current
+    }
+
+    close $fi
+
+    return 0
+}
+
+proc test3 { } {
+    global as
+    global ar
+    global ld
+    global objdump
+    global srcdir
+    global subdir
+
+    if ![ld_assemble $as $srcdir/$subdir/pdb-strings1.s tmpdir/pdb-strings1.o] {
+	unsupported "Build pdb-strings1.o"
+	return
+    }
+
+    if ![ld_assemble $as $srcdir/$subdir/pdb-strings2.s tmpdir/pdb-strings2.o] {
+	unsupported "Build pdb-strings2.o"
+	return
+    }
+
+    if ![ld_link $ld "tmpdir/pdb-strings.exe" "--pdb=tmpdir/pdb-strings.pdb tmpdir/pdb-strings1.o tmpdir/pdb-strings2.o"] {
+	unsupported "Create PE image with PDB file"
+	return
+    }
+
+    set index [find_named_stream "tmpdir/pdb-strings.pdb" "/names"]
+
+    if { $index == 0 } {
+	fail "Could not find /names stream"
+	return
+    } else {
+	pass "Found /names stream"
+    }
+
+    set index_str [format "%04x" $index]
+
+    set exec_output [run_host_cmd "$ar" "x --output tmpdir tmpdir/pdb-strings.pdb $index_str"]
+
+    if ![string match "" $exec_output] {
+	return 0
+    }
+
+    set exp [file_contents "$srcdir/$subdir/pdb-strings.d"]
+    set got [run_host_cmd "$objdump" "-s --target=binary tmpdir/$index_str"]
+
+    if ![string match $exp $got] {
+	fail "Strings table was not as expected"
+    } else {
+	pass "Strings table was as expected"
+    }
+}
+
 test1
 test2
+test3
-- 
2.37.4


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] ld: Generate PDB string table
  2022-11-24  3:40 [PATCH] ld: Generate PDB string table Mark Harmstone
@ 2022-11-24  7:24 ` Jan Beulich
  0 siblings, 0 replies; 2+ messages in thread
From: Jan Beulich @ 2022-11-24  7:24 UTC (permalink / raw)
  To: Mark Harmstone; +Cc: binutils

On 24.11.2022 04:40, Mark Harmstone wrote:
> ---

A little bit of a description would be nice in almost every patch.
Beyond that just one remark for the moment (I may be able to get to
this later):

> +/* Return the hash of an entry in the string table.  */
> +static hashval_t
> +hash_string_table_entry (const void * p)
> +{
> +  struct string *s = (struct string *) p;

Whenever possible please avoid casting away const (or modifiers in
general). Here I don't see a need for a cast in the first place:
"const void *" converts find to "const struct ... *" in C.

Jan

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-11-24  7:24 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-24  3:40 [PATCH] ld: Generate PDB string table Mark Harmstone
2022-11-24  7:24 ` Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).