public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH V4] optimize handle_COMDAT
@ 2023-08-23 21:59 Oleg Tolmatcev
  2023-08-24  6:21 ` Alan Modra
  0 siblings, 1 reply; 2+ messages in thread
From: Oleg Tolmatcev @ 2023-08-23 21:59 UTC (permalink / raw)
  To: binutils

[-- Attachment #1: Type: text/plain, Size: 131 bytes --]

Thank you Alan, I could reproduce the bugs and I have fixed them.
"make check" does not fail anymore.

Best regards
Oleg Tolmatcev

[-- Attachment #2: 0001-optimize-handle_COMDAT.patch --]
[-- Type: application/octet-stream, Size: 14553 bytes --]

From da7c00b3145b04e1fa208b97cf8233a0e42a4050 Mon Sep 17 00:00:00 2001
From: Oleg Tolmatcev <oleg.tolmatcev@gmail.com>
Date: Sun, 18 Jun 2023 19:35:38 +0200
Subject: [PATCH] optimize handle_COMDAT

Signed-off-by: Oleg Tolmatcev <oleg.tolmatcev@gmail.com>
---
 bfd/coffcode.h   | 289 +++++++++++++++++++++++++++--------------------
 bfd/coffgen.c    |   8 ++
 bfd/libcoff-in.h |  12 ++
 bfd/peicode.h    |  17 +++
 4 files changed, 202 insertions(+), 124 deletions(-)

diff --git a/bfd/coffcode.h b/bfd/coffcode.h
index 6789f7f3cc..35d40a9501 100644
--- a/bfd/coffcode.h
+++ b/bfd/coffcode.h
@@ -352,6 +352,7 @@ CODE_FRAGMENT
 */
 
 #include "libiberty.h"
+#include <string.h>
 
 #ifdef COFF_WITH_PE
 #include "peicode.h"
@@ -852,19 +853,19 @@ styp_to_sec_flags (bfd *abfd,
 
 #else /* COFF_WITH_PE */
 
+static struct comdat_hash_entry *
+find_flags (htab_t comdat_hash, int target_index)
+{
+  struct comdat_hash_entry needle;
+  needle.target_index = target_index;
+
+  return htab_find (comdat_hash, &needle);
+}
+
 static bool
-handle_COMDAT (bfd * abfd,
-	       flagword *sec_flags,
-	       void * hdr,
-	       const char *name,
-	       asection *section)
+fill_comdat_hash (bfd *abfd)
 {
-  struct internal_scnhdr *internal_s = (struct internal_scnhdr *) hdr;
   bfd_byte *esymstart, *esym, *esymend;
-  int seen_state = 0;
-  char *target_name = NULL;
-
-  *sec_flags |= SEC_LINK_ONCE;
 
   /* Unfortunately, the PE format stores essential information in
      the symbol table, of all places.  We need to extract that
@@ -895,13 +896,10 @@ handle_COMDAT (bfd * abfd,
     {
       char buf[SYMNMLEN + 1];
       const char *symname;
+      flagword sec_flags = SEC_LINK_ONCE;
 
       bfd_coff_swap_sym_in (abfd, esym, &isym);
 
-      BFD_ASSERT (sizeof (internal_s->s_name) <= SYMNMLEN);
-
-      if (isym.n_scnum == section->target_index)
-	{
 	  /* According to the MSVC documentation, the first
 	     TWO entries with the section # are both of
 	     interest to us.  The first one is the "section
@@ -934,121 +932,76 @@ handle_COMDAT (bfd * abfd,
 	    {
 	      _bfd_error_handler (_("%pB: unable to load COMDAT section name"),
 				  abfd);
-	      return false;
+	      continue;
 	    }
 
-	  switch (seen_state)
-	    {
-	    case 0:
-	      {
-		/* The first time we've seen the symbol.  */
-		union internal_auxent aux;
-
-		/* If it isn't the stuff we're expecting, die;
-		   The MS documentation is vague, but it
-		   appears that the second entry serves BOTH
-		   as the comdat symbol and the defining
-		   symbol record (either C_STAT or C_EXT,
-		   possibly with an aux entry with debug
-		   information if it's a function.)  It
-		   appears the only way to find the second one
-		   is to count.  (On Intel, they appear to be
-		   adjacent, but on Alpha, they have been
-		   found separated.)
-
-		   Here, we think we've found the first one,
-		   but there's some checking we can do to be
-		   sure.  */
-
-		if (! ((isym.n_sclass == C_STAT
-			|| isym.n_sclass == C_EXT)
-		       && BTYPE (isym.n_type) == T_NULL
-		       && isym.n_value == 0))
-		  {
-		    /* Malformed input files can trigger this test.
-		       cf PR 21781.  */
-		    _bfd_error_handler (_("%pB: error: unexpected symbol '%s' in COMDAT section"),
-					abfd, symname);
-		    return false;
-		  }
+      union internal_auxent aux;
 
-		/* FIXME LATER: MSVC generates section names
-		   like .text for comdats.  Gas generates
-		   names like .text$foo__Fv (in the case of a
-		   function).  See comment above for more.  */
+      struct comdat_hash_entry needle;
+      needle.target_index = isym.n_scnum;
 
-		if (isym.n_sclass == C_STAT && strcmp (name, symname) != 0)
-		  /* xgettext:c-format */
-		  _bfd_error_handler (_("%pB: warning: COMDAT symbol '%s'"
-					" does not match section name '%s'"),
-				      abfd, symname, name);
-
-		/* This is the section symbol.  */
-		seen_state = 1;
-		target_name = strchr (name, '$');
-		if (target_name != NULL)
-		  {
-		    /* Gas mode.  */
-		    seen_state = 2;
-		    /* Skip the `$'.  */
-		    target_name += 1;
-		  }
+      void **slot
+	  = htab_find_slot (pe_data (abfd)->comdat_hash, &needle, INSERT);
+      if (slot == NULL)
+	return false;
 
-		if (isym.n_numaux == 0)
-		  aux.x_scn.x_comdat = 0;
-		else
-		  {
+      if (*slot == NULL)
+	{
+	  if (isym.n_numaux == 0)
+	    aux.x_scn.x_comdat = 0;
+	  else
+	    {
 		    /* PR 17512: file: e2cfe54f.  */
-		    if (esym + bfd_coff_symesz (abfd) >= esymend)
-		      {
-			/* xgettext:c-format */
-			_bfd_error_handler (_("%pB: warning: no symbol for"
-					      " section '%s' found"),
-					    abfd, symname);
-			break;
-		      }
-		    bfd_coff_swap_aux_in (abfd, esym + bfd_coff_symesz (abfd),
-					  isym.n_type, isym.n_sclass,
-					  0, isym.n_numaux, &aux);
-		  }
+	      if (esym + bfd_coff_symesz (abfd) >= esymend)
+		{
+		  /* xgettext:c-format */
+		  _bfd_error_handler (_ ("%pB: warning: no symbol for"
+					 " section '%s' found"),
+				      abfd, symname);
+		  continue;
+		}
+	      bfd_coff_swap_aux_in (abfd, (esym + bfd_coff_symesz (abfd)),
+				    isym.n_type, isym.n_sclass, 0,
+				    isym.n_numaux, &aux);
+	    }
 
-		/* FIXME: Microsoft uses NODUPLICATES and
-		   ASSOCIATIVE, but gnu uses ANY and
-		   SAME_SIZE.  Unfortunately, gnu doesn't do
-		   the comdat symbols right.  So, until we can
-		   fix it to do the right thing, we are
-		   temporarily disabling comdats for the MS
-		   types (they're used in DLLs and C++, but we
-		   don't support *their* C++ libraries anyway
-		   - DJ.  */
-
-		/* Cygwin does not follow the MS style, and
-		   uses ANY and SAME_SIZE where NODUPLICATES
-		   and ASSOCIATIVE should be used.  For
-		   Interix, we just do the right thing up
-		   front.  */
-
-		switch (aux.x_scn.x_comdat)
-		  {
-		  case IMAGE_COMDAT_SELECT_NODUPLICATES:
+	  /* FIXME: Microsoft uses NODUPLICATES and
+	     ASSOCIATIVE, but gnu uses ANY and
+	     SAME_SIZE.  Unfortunately, gnu doesn't do
+	     the comdat symbols right.  So, until we can
+	     fix it to do the right thing, we are
+	     temporarily disabling comdats for the MS
+	     types (they're used in DLLs and C++, but we
+	     don't support *their* C++ libraries anyway
+	     - DJ.  */
+
+	  /* Cygwin does not follow the MS style, and
+	     uses ANY and SAME_SIZE where NODUPLICATES
+	     and ASSOCIATIVE should be used.  For
+	     Interix, we just do the right thing up
+	     front.  */
+
+	  switch (aux.x_scn.x_comdat)
+	    {
+	    case IMAGE_COMDAT_SELECT_NODUPLICATES:
 #ifdef STRICT_PE_FORMAT
-		    *sec_flags |= SEC_LINK_DUPLICATES_ONE_ONLY;
+	      sec_flags |= SEC_LINK_DUPLICATES_ONE_ONLY;
 #else
-		    *sec_flags &= ~SEC_LINK_ONCE;
+	      sec_flags &= ~SEC_LINK_ONCE;
 #endif
 		    break;
 
 		  case IMAGE_COMDAT_SELECT_ANY:
-		    *sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
+	      sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
 		    break;
 
 		  case IMAGE_COMDAT_SELECT_SAME_SIZE:
-		    *sec_flags |= SEC_LINK_DUPLICATES_SAME_SIZE;
+	      sec_flags |= SEC_LINK_DUPLICATES_SAME_SIZE;
 		    break;
 
 		  case IMAGE_COMDAT_SELECT_EXACT_MATCH:
 		    /* Not yet fully implemented ??? */
-		    *sec_flags |= SEC_LINK_DUPLICATES_SAME_CONTENTS;
+	      sec_flags |= SEC_LINK_DUPLICATES_SAME_CONTENTS;
 		    break;
 
 		    /* debug$S gets this case; other
@@ -1061,24 +1014,42 @@ handle_COMDAT (bfd * abfd,
 		  case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
 #ifdef STRICT_PE_FORMAT
 		    /* FIXME: This is not currently implemented.  */
-		    *sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
+	      sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
 #else
-		    *sec_flags &= ~SEC_LINK_ONCE;
+	      sec_flags &= ~SEC_LINK_ONCE;
 #endif
 		    break;
 
 		  default:  /* 0 means "no symbol" */
 		    /* debug$F gets this case; other
 		       implications ??? */
-		    *sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
+	      sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
 		    break;
-		  }
-	      }
-	      break;
+	    }
 
-	    case 2:
+	  *slot = bfd_zmalloc (sizeof (struct comdat_hash_entry));
+	  if (*slot == NULL)
+	    return false;
+	  struct comdat_hash_entry *newentry = *slot;
+	  newentry->sec_flags = sec_flags;
+	  newentry->symname = bfd_strdup (symname);
+	  newentry->target_index = isym.n_scnum;
+	  newentry->isym = isym;
+	  newentry->comdat_symbol = -1;
+	}
+      else
+	{
+	  struct comdat_hash_entry *entry = *slot;
+
+          if (entry->comdat_symbol != -1)
+            continue;
+
+	  char *target_name = strchr (entry->symname, '$');
+	  if (target_name != NULL)
+	    {
 	      /* Gas mode: the first matching on partial name.  */
 
+	      target_name += 1;
 #ifndef TARGET_UNDERSCORE
 #define TARGET_UNDERSCORE 0
 #endif
@@ -1089,16 +1060,26 @@ handle_COMDAT (bfd * abfd,
 		  /* Not the name we're looking for */
 		  continue;
 		}
-	      /* Fall through.  */
-	    case 1:
+	    }
 	      /* MSVC mode: the lexically second symbol (or
 		 drop through from the above).  */
-	      {
 		/* This must the second symbol with the
 		   section #.  It is the actual symbol name.
 		   Intel puts the two adjacent, but Alpha (at
 		   least) spreads them out.  */
 
+	  entry->comdat_symbol = (esym - esymstart) / bfd_coff_symesz (abfd);
+	  entry->comdat_name = bfd_strdup (symname);
+	}
+    }
+
+  return true;
+}
+
+static bool
+insert_coff_comdat_info (bfd *abfd, asection *section, const char *symname,
+			 long symbol)
+{
 		struct coff_comdat_info *comdat;
 		size_t len = strlen (symname) + 1;
 
@@ -1107,16 +1088,76 @@ handle_COMDAT (bfd * abfd,
 		  return false;
 
 		coff_section_data (abfd, section)->comdat = comdat;
-		comdat->symbol = (esym - esymstart) / bfd_coff_symesz (abfd);
+		comdat->symbol = symbol;
 		char *newname = (char *) (comdat + 1);
 		comdat->name = newname;
 		memcpy (newname, symname, len);
 		return true;
-	      }
-	    }
+}
+
+static bool
+handle_COMDAT (bfd *abfd, flagword *sec_flags, const char *name,
+	       asection *section)
+{
+  if (htab_elements (pe_data (abfd)->comdat_hash) == 0)
+    if (! fill_comdat_hash (abfd))
+      return false;
+
+  struct comdat_hash_entry *found
+      = find_flags (pe_data (abfd)->comdat_hash, section->target_index);
+  if (found != NULL)
+    {
+
+      struct internal_syment isym = found->isym;
+
+      /* If it isn't the stuff we're expecting, die;
+	 The MS documentation is vague, but it
+	 appears that the second entry serves BOTH
+	 as the comdat symbol and the defining
+	 symbol record (either C_STAT or C_EXT,
+	 possibly with an aux entry with debug
+	 information if it's a function.)  It
+	 appears the only way to find the second one
+	 is to count.  (On Intel, they appear to be
+	 adjacent, but on Alpha, they have been
+	 found separated.)
+
+	 Here, we think we've found the first one,
+	 but there's some checking we can do to be
+	 sure.  */
+
+      if (! ((isym.n_sclass == C_STAT || isym.n_sclass == C_EXT)
+	     && BTYPE (isym.n_type) == T_NULL && isym.n_value == 0))
+	{
+	  /* Malformed input files can trigger this test.
+	     cf PR 21781.  */
+	  _bfd_error_handler (
+	      _ ("%pB: error: unexpected symbol '%s' in COMDAT section"), abfd,
+	      found->symname);
+	  return false;
 	}
-    }
 
+      /* FIXME LATER: MSVC generates section names
+	 like .text for comdats.  Gas generates
+	 names like .text$foo__Fv (in the case of a
+	 function).  See comment above for more.  */
+
+      if (isym.n_sclass == C_STAT && strcmp (name, found->symname) != 0)
+	/* xgettext:c-format */
+	_bfd_error_handler (_ ("%pB: warning: COMDAT symbol '%s'"
+			       " does not match section name '%s'"),
+			    abfd, found->symname, name);
+
+      if (found->comdat_symbol != -1)
+	{
+	  if (! insert_coff_comdat_info (abfd, section, found->comdat_name,
+					 found->comdat_symbol))
+	    return false;
+	}
+      *sec_flags = *sec_flags | found->sec_flags;
+      return true;
+    }
+  *sec_flags = *sec_flags | SEC_LINK_ONCE;
   return true;
 }
 
@@ -1268,7 +1309,7 @@ styp_to_sec_flags (bfd *abfd,
 	  break;
 	case IMAGE_SCN_LNK_COMDAT:
 	  /* COMDAT gets very special treatment.  */
-	  if (!handle_COMDAT (abfd, &sec_flags, hdr, name, section))
+	  if (!handle_COMDAT (abfd, &sec_flags, name, section))
 	    result = false;
 	  break;
 	default:
diff --git a/bfd/coffgen.c b/bfd/coffgen.c
index 1ec9a5185c..bf9633a2b3 100644
--- a/bfd/coffgen.c
+++ b/bfd/coffgen.c
@@ -293,6 +293,8 @@ coff_object_cleanup (bfd *abfd)
 	    htab_delete (td->section_by_index);
 	  if (td->section_by_target_index)
 	    htab_delete (td->section_by_target_index);
+	  if (obj_pe (abfd) && pe_data (abfd)->comdat_hash)
+	    htab_delete (pe_data (abfd)->comdat_hash);
 	}
     }
 }
@@ -3292,6 +3294,12 @@ _bfd_coff_free_cached_info (bfd *abfd)
 	  tdata->section_by_target_index = NULL;
 	}
 
+      if (obj_pe (abfd) && pe_data (abfd)->comdat_hash)
+	{
+	  htab_delete (pe_data (abfd)->comdat_hash);
+	  pe_data (abfd)->comdat_hash = NULL;
+	}
+
       _bfd_dwarf2_cleanup_debug_info (abfd, &tdata->dwarf2_find_line_info);
       _bfd_stab_cleanup (abfd, &tdata->line_info);
 
diff --git a/bfd/libcoff-in.h b/bfd/libcoff-in.h
index 4e2203656d..eacfcb3ec3 100644
--- a/bfd/libcoff-in.h
+++ b/bfd/libcoff-in.h
@@ -161,10 +161,22 @@ typedef struct pe_tdata
     const char *style;
     asection *sec;
   } build_id;
+
+  htab_t comdat_hash;
 } pe_data_type;
 
 #define pe_data(bfd)		((bfd)->tdata.pe_obj_data)
 
+struct comdat_hash_entry
+{
+  int target_index;
+  struct internal_syment isym;
+  char *symname;
+  flagword sec_flags;
+  char *comdat_name;
+  long comdat_symbol;
+};
+
 /* Tdata for XCOFF files.  */
 
 struct xcoff_tdata
diff --git a/bfd/peicode.h b/bfd/peicode.h
index 2cd020e699..5ac6b0dc53 100644
--- a/bfd/peicode.h
+++ b/bfd/peicode.h
@@ -255,6 +255,21 @@ coff_swap_scnhdr_in (bfd * abfd, void * ext, void * in)
 #endif
 }
 
+static hashval_t
+htab_hash_flags (const void *entry)
+{
+  const struct comdat_hash_entry *fe = entry;
+  return fe->target_index;
+}
+
+static int
+htab_eq_flags (const void *e1, const void *e2)
+{
+  const struct comdat_hash_entry *fe1 = e1;
+  const struct comdat_hash_entry *fe2 = e2;
+  return fe1->target_index == fe2->target_index;
+}
+
 static bool
 pe_mkobject (bfd * abfd)
 {
@@ -291,6 +306,8 @@ pe_mkobject (bfd * abfd)
   pe->dos_message[14] = 0x24;
   pe->dos_message[15] = 0x0;
 
+  pe->comdat_hash = htab_create (10, htab_hash_flags, htab_eq_flags, NULL);
+
   memset (& pe->pe_opthdr, 0, sizeof pe->pe_opthdr);
 
   bfd_coff_long_section_names (abfd)
-- 
2.41.0.windows.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH V4] optimize handle_COMDAT
  2023-08-23 21:59 [PATCH V4] optimize handle_COMDAT Oleg Tolmatcev
@ 2023-08-24  6:21 ` Alan Modra
  0 siblings, 0 replies; 2+ messages in thread
From: Alan Modra @ 2023-08-24  6:21 UTC (permalink / raw)
  To: Oleg Tolmatcev; +Cc: binutils

On Wed, Aug 23, 2023 at 11:59:59PM +0200, Oleg Tolmatcev via Binutils wrote:
> Thank you Alan, I could reproduce the bugs and I have fixed them.
> "make check" does not fail anymore.

Thanks, I have applied your patch with some formatting fixes.

-- 
Alan Modra
Australia Development Lab, IBM

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-08-24  6:21 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-23 21:59 [PATCH V4] optimize handle_COMDAT Oleg Tolmatcev
2023-08-24  6:21 ` Alan Modra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).