public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
From: Jan Beulich <jbeulich@suse.com>
To: Binutils <binutils@sourceware.org>
Cc: Dave Korn <dave.korn.cygwin@gmail.com>
Subject: [PATCH 5/5] COFF: use hash for string table also when copying / stripping
Date: Fri, 6 May 2022 08:55:40 +0200	[thread overview]
Message-ID: <f96d64bf-92d4-c50c-4167-efa4fb466f5a@suse.com> (raw)
In-Reply-To: <4242b48a-f2c3-4af2-db1e-35dbbbdc1b2e@suse.com>

Otherwise the string table may grow and hence e.g. change a final binary
(observed with PE/COFF ones) even if really there's no change. Doing so
in fact reduces the overall amount of code, and in particular the number
of places which need to remain in sync.

Afaics there's no real equivalent to the "traditional_format" field used
when linking, so hashing is always enabled when copying / stripping.

--- a/bfd/coffgen.c
+++ b/bfd/coffgen.c
@@ -847,11 +847,12 @@ coff_mangle_symbols (bfd *bfd_ptr)
     }
 }
 
-static void
+static bool
 coff_write_auxent_fname (bfd *abfd,
 			 char *str,
 			 union internal_auxent *auxent,
-			 bfd_size_type *string_size_p)
+			 struct bfd_strtab_hash *strtab,
+			 bool hash)
 {
   unsigned int str_length = strlen (str);
   unsigned int filnmlen = bfd_coff_filnmlen (abfd);
@@ -862,9 +863,13 @@ coff_write_auxent_fname (bfd *abfd,
 	strncpy (auxent->x_file.x_n.x_fname, str, filnmlen);
       else
 	{
-	  auxent->x_file.x_n.x_n.x_offset = *string_size_p + STRING_SIZE_SIZE;
+	  bfd_size_type indx = _bfd_stringtab_add (strtab, str, hash, false);
+
+	  if (indx == (bfd_size_type) -1)
+	    return false;
+
+	  auxent->x_file.x_n.x_n.x_offset = STRING_SIZE_SIZE + indx;
 	  auxent->x_file.x_n.x_n.x_zeroes = 0;
-	  *string_size_p += str_length + 1;
 	}
     }
   else
@@ -873,18 +878,22 @@ coff_write_auxent_fname (bfd *abfd,
       if (str_length > filnmlen)
 	str[filnmlen] = '\0';
     }
+
+  return true;
 }
 
-static void
+static bool
 coff_fix_symbol_name (bfd *abfd,
 		      asymbol *symbol,
 		      combined_entry_type *native,
-		      bfd_size_type *string_size_p,
+		      struct bfd_strtab_hash *strtab,
+		      bool hash,
 		      asection **debug_string_section_p,
 		      bfd_size_type *debug_string_size_p)
 {
   unsigned int name_length;
   char *name = (char *) (symbol->name);
+  bfd_size_type indx;
 
   if (name == NULL)
     {
@@ -900,17 +909,20 @@ coff_fix_symbol_name (bfd *abfd,
     {
       if (bfd_coff_force_symnames_in_strings (abfd))
 	{
-	  native->u.syment._n._n_n._n_offset =
-	      (*string_size_p + STRING_SIZE_SIZE);
+	  indx = _bfd_stringtab_add (strtab, ".file", hash, false);
+	  if (indx == (bfd_size_type) -1)
+	    return false;
+
+	  native->u.syment._n._n_n._n_offset = STRING_SIZE_SIZE + indx;
 	  native->u.syment._n._n_n._n_zeroes = 0;
-	  *string_size_p += 6;  /* strlen(".file") + 1 */
 	}
       else
 	strncpy (native->u.syment._n._n_name, ".file", SYMNMLEN);
 
       BFD_ASSERT (! (native + 1)->is_sym);
-      coff_write_auxent_fname (abfd, name, &(native + 1)->u.auxent,
-			       string_size_p);
+      if (!coff_write_auxent_fname (abfd, name, &(native + 1)->u.auxent,
+			       strtab, hash))
+	return false;
     }
   else
     {
@@ -920,10 +932,12 @@ coff_fix_symbol_name (bfd *abfd,
 
       else if (!bfd_coff_symname_in_debug (abfd, &native->u.syment))
 	{
-	  native->u.syment._n._n_n._n_offset = (*string_size_p
-						+ STRING_SIZE_SIZE);
+	  indx = _bfd_stringtab_add (strtab, name, hash, false);
+	  if (indx == (bfd_size_type) -1)
+	    return false;
+
+	  native->u.syment._n._n_n._n_offset = STRING_SIZE_SIZE + indx;
 	  native->u.syment._n._n_n._n_zeroes = 0;
-	  *string_size_p += name_length + 1;
 	}
       else
 	{
@@ -964,6 +978,8 @@ coff_fix_symbol_name (bfd *abfd,
 	  *debug_string_size_p += name_length + 1 + prefix_len;
 	}
     }
+
+  return true;
 }
 
 /* We need to keep track of the symbol index so that when we write out
@@ -979,7 +995,8 @@ coff_write_symbol (bfd *abfd,
 		   asymbol *symbol,
 		   combined_entry_type *native,
 		   bfd_vma *written,
-		   bfd_size_type *string_size_p,
+		   struct bfd_strtab_hash *strtab,
+		   bool hash,
 		   asection **debug_string_section_p,
 		   bfd_size_type *debug_string_size_p)
 {
@@ -1011,8 +1028,9 @@ coff_write_symbol (bfd *abfd,
     native->u.syment.n_scnum =
       output_section->target_index;
 
-  coff_fix_symbol_name (abfd, symbol, native, string_size_p,
-			debug_string_section_p, debug_string_size_p);
+  if (!coff_fix_symbol_name (abfd, symbol, native, strtab, hash,
+			     debug_string_section_p, debug_string_size_p))
+    return false;
 
   symesz = bfd_coff_symesz (abfd);
   buf = bfd_alloc (abfd, symesz);
@@ -1041,7 +1059,7 @@ coff_write_symbol (bfd *abfd,
 	  if (native->u.syment.n_sclass == C_FILE
 	      && (native + j + 1)->u.auxent.x_file.x_ftype)
 	    coff_write_auxent_fname (abfd, (char *) (native + j + 1)->extrap,
-				     &(native + j + 1)->u.auxent, string_size_p);
+				     &(native + j + 1)->u.auxent, strtab, hash);
 
 	  bfd_coff_swap_aux_out (abfd,
 				 &((native + j + 1)->u.auxent),
@@ -1069,9 +1087,9 @@ bool
 coff_write_alien_symbol (bfd *abfd,
 			 asymbol *symbol,
 			 struct internal_syment *isym,
-			 union internal_auxent *iaux,
 			 bfd_vma *written,
-			 bfd_size_type *string_size_p,
+			 struct bfd_strtab_hash *strtab,
+			 bool hash,
 			 asection **debug_string_section_p,
 			 bfd_size_type *debug_string_size_p)
 {
@@ -1152,12 +1170,10 @@ coff_write_alien_symbol (bfd *abfd,
   else
     native->u.syment.n_sclass = C_EXT;
 
-  ret = coff_write_symbol (abfd, symbol, native, written, string_size_p,
+  ret = coff_write_symbol (abfd, symbol, native, written, strtab, hash,
 			   debug_string_section_p, debug_string_size_p);
   if (isym != NULL)
     *isym = native->u.syment;
-  if (iaux != NULL && native->u.syment.n_numaux)
-    *iaux = native[1].u.auxent;
   return ret;
 }
 
@@ -1167,7 +1183,7 @@ static bool
 coff_write_native_symbol (bfd *abfd,
 			  coff_symbol_type *symbol,
 			  bfd_vma *written,
-			  bfd_size_type *string_size_p,
+			  struct bfd_strtab_hash *strtab,
 			  asection **debug_string_section_p,
 			  bfd_size_type *debug_string_size_p)
 {
@@ -1217,7 +1233,7 @@ coff_write_native_symbol (bfd *abfd,
     }
 
   return coff_write_symbol (abfd, &(symbol->symbol), native, written,
-			    string_size_p, debug_string_section_p,
+			    strtab, true, debug_string_section_p,
 			    debug_string_size_p);
 }
 
@@ -1232,7 +1248,7 @@ null_error_handler (const char *fmt ATTR
 bool
 coff_write_symbols (bfd *abfd)
 {
-  bfd_size_type string_size;
+  struct bfd_strtab_hash *strtab;
   asection *debug_string_section;
   bfd_size_type debug_string_size;
   unsigned int i;
@@ -1240,26 +1256,26 @@ coff_write_symbols (bfd *abfd)
   bfd_vma written = 0;
   asymbol **p;
 
-  string_size = 0;
   debug_string_section = NULL;
   debug_string_size = 0;
 
+  strtab = _bfd_stringtab_init ();
+  if (strtab == NULL)
+    return false;
+
   /* If this target supports long section names, they must be put into
      the string table.  This is supported by PE.  This code must
      handle section names just as they are handled in
-     coff_write_object_contents.  */
+     coff_write_object_contents.  This is why we pass hash as FALSE below.  */
   if (bfd_coff_long_section_names (abfd))
     {
       asection *o;
 
       for (o = abfd->sections; o != NULL; o = o->next)
-	{
-	  size_t len;
-
-	  len = strlen (o->name);
-	  if (len > SCNNMLEN)
-	    string_size += len + 1;
-	}
+	if (strlen (o->name) > SCNNMLEN
+	    && _bfd_stringtab_add (strtab, o->name, false, false)
+	       == (bfd_size_type) -1)
+	  return false;
     }
 
   /* Seek to the right place.  */
@@ -1276,8 +1292,8 @@ coff_write_symbols (bfd *abfd)
       if (c_symbol == (coff_symbol_type *) NULL
 	  || c_symbol->native == (combined_entry_type *) NULL)
 	{
-	  if (!coff_write_alien_symbol (abfd, symbol, NULL, NULL, &written,
-					&string_size, &debug_string_section,
+	  if (!coff_write_alien_symbol (abfd, symbol, NULL, &written,
+					strtab, true, &debug_string_section,
 					&debug_string_size))
 	    return false;
 	}
@@ -1322,7 +1338,7 @@ coff_write_symbols (bfd *abfd)
 	    }
 
 	  if (!coff_write_native_symbol (abfd, c_symbol, &written,
-					 &string_size, &debug_string_section,
+					 strtab, &debug_string_section,
 					 &debug_string_size))
 	    return false;
 	}
@@ -1330,141 +1346,28 @@ coff_write_symbols (bfd *abfd)
 
   obj_raw_syment_count (abfd) = written;
 
-  /* Now write out strings.  */
-  if (string_size != 0)
-    {
-      unsigned int size = string_size + STRING_SIZE_SIZE;
-      bfd_byte buffer[STRING_SIZE_SIZE];
+  /* Now write out strings.
+
+     We would normally not write anything here if there are no strings, but
+     we'll write out 4 so that any stupid coff reader which tries to read the
+     string table even when there isn't one won't croak.  */
+  {
+    bfd_byte buffer[STRING_SIZE_SIZE];
 
 #if STRING_SIZE_SIZE == 4
-      H_PUT_32 (abfd, size, buffer);
+    H_PUT_32 (abfd, _bfd_stringtab_size (strtab) + STRING_SIZE_SIZE, buffer);
 #else
  #error Change H_PUT_32
 #endif
-      if (bfd_bwrite ((void *) buffer, (bfd_size_type) sizeof (buffer), abfd)
-	  != sizeof (buffer))
-	return false;
-
-      /* Handle long section names.  This code must handle section
-	 names just as they are handled in coff_write_object_contents.  */
-      if (bfd_coff_long_section_names (abfd))
-	{
-	  asection *o;
-
-	  for (o = abfd->sections; o != NULL; o = o->next)
-	    {
-	      size_t len;
-
-	      len = strlen (o->name);
-	      if (len > SCNNMLEN)
-		{
-		  if (bfd_bwrite (o->name, (bfd_size_type) (len + 1), abfd)
-		      != len + 1)
-		    return false;
-		}
-	    }
-	}
-
-      for (p = abfd->outsymbols, i = 0;
-	   i < limit;
-	   i++, p++)
-	{
-	  asymbol *q = *p;
-	  size_t name_length = strlen (q->name);
-	  coff_symbol_type *c_symbol = coff_symbol_from (q);
-	  size_t maxlen;
-	  bool is_c_file = false;
-
-	  /* Figure out whether the symbol name should go in the string
-	     table.  Symbol names that are short enough are stored
-	     directly in the syment structure.  File names permit a
-	     different, longer, length in the syment structure.  On
-	     XCOFF, some symbol names are stored in the .debug section
-	     rather than in the string table.  */
-
-	  if (c_symbol == NULL
-	      || c_symbol->native == NULL)
-	    /* This is not a COFF symbol, so it certainly is not a
-	       file name, nor does it go in the .debug section.  */
-	    maxlen = bfd_coff_force_symnames_in_strings (abfd) ? 0 : SYMNMLEN;
-
-	  else if (! c_symbol->native->is_sym)
-	    maxlen = bfd_coff_force_symnames_in_strings (abfd) ? 0 : SYMNMLEN;
-
-	  else if (bfd_coff_symname_in_debug (abfd,
-					      &c_symbol->native->u.syment))
-	    /* This symbol name is in the XCOFF .debug section.
-	       Don't write it into the string table.  */
-	    maxlen = name_length;
-
-	  else if (c_symbol->native->u.syment.n_sclass == C_FILE
-		   && c_symbol->native->u.syment.n_numaux > 0)
-	    {
-	      is_c_file=true;
-	      if (bfd_coff_force_symnames_in_strings (abfd))
-		{
-		  if (bfd_bwrite (".file", (bfd_size_type) 6, abfd) != 6)
-		    return false;
-		}
-	      maxlen = bfd_coff_filnmlen (abfd);
-	    }
-	  else
-	    maxlen = bfd_coff_force_symnames_in_strings (abfd) ? 0 : SYMNMLEN;
-
-	  if (name_length > maxlen)
-	    {
-	      if (bfd_bwrite ((void *) (q->name), (bfd_size_type) name_length + 1,
-			     abfd) != name_length + 1)
-		return false;
-	    }
-
-	  /* Add strings for C_FILE aux entries. */
-	  if (is_c_file
-	      && c_symbol->native->u.syment.n_numaux > 1)
-	    {
-	      for (int j = 1; j < c_symbol->native->u.syment.n_numaux; j++)
-		{
-		  char *str;
-		  size_t str_length;
-
-		  /* Add strings from aux entries only if this isn't the
-		     filename auxiliary entry.  */
-		  if (!c_symbol->native[j + 1].u.auxent.x_file.x_ftype)
-		    continue;
-
-		  if (c_symbol->native[j + 1].u.auxent.x_file.x_n.x_fname[0] != 0)
-		    continue;
-
-		  str = (char *) c_symbol->native[j + 1].extrap;
-		  str_length = strlen (str);
-		  if (str_length > maxlen)
-		    {
-		      if (bfd_bwrite ((void *) (str), (bfd_size_type) str_length + 1,
-				      abfd) != str_length + 1)
-			return false;
-		    }
+    if (bfd_bwrite ((void *) buffer, (bfd_size_type) sizeof (buffer), abfd)
+	!= sizeof (buffer))
+      return false;
 
-		}
-	    }
-	}
-    }
-  else
-    {
-      /* We would normally not write anything here, but we'll write
-	 out 4 so that any stupid coff reader which tries to read the
-	 string table even when there isn't one won't croak.  */
-      unsigned int size = STRING_SIZE_SIZE;
-      bfd_byte buffer[STRING_SIZE_SIZE];
+    if (! _bfd_stringtab_emit (abfd, strtab))
+      return false;
+  }
 
-#if STRING_SIZE_SIZE == 4
-      H_PUT_32 (abfd, size, buffer);
-#else
- #error Change H_PUT_32
-#endif
-      if (bfd_bwrite ((void *) buffer, (bfd_size_type) STRING_SIZE_SIZE, abfd)
-	  != STRING_SIZE_SIZE)
-	return false;
-    }
+  _bfd_stringtab_free (strtab);
 
   /* Make sure the .debug section was created to be the correct size.
      We should create it ourselves on the fly, but we don't because
--- a/bfd/cofflink.c
+++ b/bfd/cofflink.c
@@ -896,10 +896,8 @@ _bfd_coff_final_link (bfd *abfd,
 	      asymbol *sym = bfd_get_outsymbols (sub) [i];
 	      file_ptr pos;
 	      struct internal_syment isym;
-	      union internal_auxent iaux;
-	      bfd_size_type string_size = 0, indx;
 	      bfd_vma written = 0;
-	      bool rewrite = false, hash;
+	      bool rewrite = false;
 
 	      if (! (sym->flags & BSF_LOCAL)
 		  || (sym->flags & (BSF_SECTION_SYM | BSF_DEBUGGING_RELOC
@@ -925,54 +923,12 @@ _bfd_coff_final_link (bfd *abfd,
 					     * symesz;
 	      if (bfd_seek (abfd, pos, SEEK_SET) != 0)
 		goto error_return;
-	      if (! coff_write_alien_symbol(abfd, sym, &isym, &iaux, &written,
-					    &string_size, NULL, NULL))
+	      if (! coff_write_alien_symbol(abfd, sym, &isym, &written,
+					    flaginfo.strtab,
+					    !flaginfo.info->traditional_format,
+					    NULL, NULL))
 		goto error_return;
 
-	      hash = !flaginfo.info->traditional_format;
-
-	      if (string_size >= 6 && isym.n_sclass == C_FILE
-		  && ! isym._n._n_n._n_zeroes && isym.n_numaux)
-		{
-		  indx = _bfd_stringtab_add (flaginfo.strtab, ".file", hash,
-					     false);
-		  if (indx == (bfd_size_type) -1)
-		    goto error_return;
-		  isym._n._n_n._n_offset = STRING_SIZE_SIZE + indx;
-		  bfd_coff_swap_sym_out (abfd, &isym, flaginfo.outsyms);
-		  if (bfd_seek (abfd, pos, SEEK_SET) != 0
-		      || bfd_bwrite (flaginfo.outsyms, symesz,
-				     abfd) != symesz)
-		    goto error_return;
-		  string_size -= 6;
-		}
-
-	      if (string_size)
-		{
-		  indx = _bfd_stringtab_add (flaginfo.strtab,
-					     bfd_asymbol_name (sym), hash,
-					     false);
-		  if (indx == (bfd_size_type) -1)
-		    goto error_return;
-		  if (isym.n_sclass != C_FILE)
-		    {
-		      isym._n._n_n._n_offset = STRING_SIZE_SIZE + indx;
-		      bfd_coff_swap_sym_out (abfd, &isym, flaginfo.outsyms);
-		      rewrite = true;
-		    }
-		  else
-		    {
-		      BFD_ASSERT (isym.n_numaux == 1);
-		      iaux.x_file.x_n.x_n.x_offset = STRING_SIZE_SIZE + indx;
-		      bfd_coff_swap_aux_out (abfd, &iaux, isym.n_type, C_FILE,
-					     0, 1, flaginfo.outsyms + symesz);
-		      if (bfd_seek (abfd, pos + symesz, SEEK_SET) != 0
-			  || bfd_bwrite (flaginfo.outsyms + symesz, symesz,
-					 abfd) != symesz)
-			goto error_return;
-		    }
-		}
-
 	      if (isym.n_sclass == C_FILE)
 		{
 		  if (flaginfo.last_file_index != -1)
--- a/bfd/libcoff-in.h
+++ b/bfd/libcoff-in.h
@@ -329,8 +329,8 @@ extern void coff_mangle_symbols
 extern bool coff_write_symbols
   (bfd *);
 extern bool coff_write_alien_symbol
-  (bfd *, asymbol *, struct internal_syment *, union internal_auxent *,
-   bfd_vma *, bfd_size_type *, asection **, bfd_size_type *);
+  (bfd *, asymbol *, struct internal_syment *, bfd_vma *,
+   struct bfd_strtab_hash *, bool, asection **, bfd_size_type *);
 extern bool coff_write_linenumbers
   (bfd *);
 extern alent *coff_get_lineno
--- a/bfd/libcoff.h
+++ b/bfd/libcoff.h
@@ -329,8 +329,8 @@ extern void coff_mangle_symbols
 extern bool coff_write_symbols
   (bfd *);
 extern bool coff_write_alien_symbol
-  (bfd *, asymbol *, struct internal_syment *, union internal_auxent *,
-   bfd_vma *, bfd_size_type *, asection **, bfd_size_type *);
+  (bfd *, asymbol *, struct internal_syment *, bfd_vma *,
+   struct bfd_strtab_hash *, bool, asection **, bfd_size_type *);
 extern bool coff_write_linenumbers
   (bfd *);
 extern alent *coff_get_lineno


      parent reply	other threads:[~2022-05-06  6:55 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-06  6:53 [PATCH 0/5] PE/COFF: assorted adjustments Jan Beulich
2022-05-06  6:54 ` [PATCH 1/5] don't over-align file positions of PE executable sections Jan Beulich
2022-05-06  6:54 ` [PATCH 2/5] COFF: make objcopy / strip honor --keep-file-symbols Jan Beulich
2022-05-06  6:54 ` [PATCH 3/5] COFF/PE: don't leave zero timestamp after objcopy / strip Jan Beulich
2022-05-06  6:55 ` [PATCH 4/5] COFF/PE: keep linker version during " Jan Beulich
2022-05-06  6:55 ` Jan Beulich [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f96d64bf-92d4-c50c-4167-efa4fb466f5a@suse.com \
    --to=jbeulich@suse.com \
    --cc=binutils@sourceware.org \
    --cc=dave.korn.cygwin@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).