public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH V3] optimize handle_COMDAT
@ 2023-06-18 17:40 Oleg Tolmatcev
  2023-06-30 12:08 ` Nick Clifton
  0 siblings, 1 reply; 6+ messages in thread
From: Oleg Tolmatcev @ 2023-06-18 17:40 UTC (permalink / raw)
  To: binutils

[-- Attachment #1: Type: text/plain, Size: 21701 bytes --]

Hello,

I have improved my patch again. This time I read the MS COFF documentation, I kept all the checks, I successfully ran "make check" in WSL and I formatted the code with clang-format. I used the GNU style and tabs for indentation. I also simplified the patch.

From ddc8c9c7511c7f506761a4354a48f09ed67db326 Mon Sep 17 00:00:00 2001
From: Oleg Tolmatcev <oleg.tolmatcev@gmail.com>
Date: Sun, 18 Jun 2023 19:35:38 +0200
Subject: [PATCH] optimize handle_COMDAT

Signed-off-by: Oleg Tolmatcev <oleg.tolmatcev@gmail.com>
---
bfd/coffcode.h   | 464 +++++++++++++++++++++++++----------------------
bfd/coffgen.c    |   8 +
bfd/libcoff-in.h |  12 ++
bfd/peicode.h    |  17 ++
4 files changed, 281 insertions(+), 220 deletions(-)

diff --git a/bfd/coffcode.h b/bfd/coffcode.h
index 62720255b7..6b0629bb93 100644
--- a/bfd/coffcode.h
+++ b/bfd/coffcode.h
@@ -352,6 +352,7 @@ CODE_FRAGMENT
*/

 #include "libiberty.h"
+#include <string.h>

 #ifdef COFF_WITH_PE
#include "peicode.h"
@@ -852,19 +853,20 @@ styp_to_sec_flags (bfd *abfd,

 #else /* COFF_WITH_PE */

+static struct comdat_hash_entry *
+find_flags (htab_t comdat_hash, int target_index)
+{
+  struct comdat_hash_entry needle;
+  needle.target_index = target_index;
+
+  return htab_find (comdat_hash, &needle);
+}
+
static bool
-handle_COMDAT (bfd * abfd,
-	       flagword *sec_flags,
-	       void * hdr,
-	       const char *name,
-	       asection *section)
+fill_comdat_hash (bfd *abfd, void *hdr)
{
-  struct internal_scnhdr *internal_s = (struct internal_scnhdr *) hdr;
+  struct internal_scnhdr *internal_s = (struct internal_scnhdr *)hdr;
   bfd_byte *esymstart, *esym, *esymend;
-  int seen_state = 0;
-  char *target_name = NULL;
-
-  *sec_flags |= SEC_LINK_ONCE;

   /* Unfortunately, the PE format stores essential information in
      the symbol table, of all places.  We need to extract that
@@ -883,252 +885,274 @@ handle_COMDAT (bfd * abfd,
      doesn't seem to be a need to, either, and it would at best be
      rather messy.  */

-  if (! _bfd_coff_get_external_symbols (abfd))
+  if (!_bfd_coff_get_external_symbols (abfd))
     return true;

-  esymstart = esym = (bfd_byte *) obj_coff_external_syms (abfd);
+  esymstart = esym = (bfd_byte *)obj_coff_external_syms (abfd);
   esymend = esym + obj_raw_syment_count (abfd) * bfd_coff_symesz (abfd);

-  for (struct internal_syment isym;
-       esym < esymend;
+  for (struct internal_syment isym; esym < esymend;
        esym += (isym.n_numaux + 1) * bfd_coff_symesz (abfd))
     {
       char buf[SYMNMLEN + 1];
       const char *symname;
+      flagword sec_flags = SEC_LINK_ONCE;

       bfd_coff_swap_sym_in (abfd, esym, &isym);

       BFD_ASSERT (sizeof (internal_s->s_name) <= SYMNMLEN);

-      if (isym.n_scnum == section->target_index)
+      /* According to the MSVC documentation, the first
+	 TWO entries with the section # are both of
+	 interest to us.  The first one is the "section
+	 symbol" (section name).  The second is the comdat
+	 symbol name.  Here, we've found the first
+	 qualifying entry; we distinguish it from the
+	 second with a state flag.
+
+	 In the case of gas-generated (at least until that
+	 is fixed) .o files, it isn't necessarily the
+	 second one.  It may be some other later symbol.
+
+	 Since gas also doesn't follow MS conventions and
+	 emits the section similar to .text$<name>, where
+	 <something> is the name we're looking for, we
+	 distinguish the two as follows:
+
+	 If the section name is simply a section name (no
+	 $) we presume it's MS-generated, and look at
+	 precisely the second symbol for the comdat name.
+	 If the section name has a $, we assume it's
+	 gas-generated, and look for <something> (whatever
+	 follows the $) as the comdat symbol.  */
+
+      /* All 3 branches use this.  */
+      symname = _bfd_coff_internal_syment_name (abfd, &isym, buf);
+
+      /* PR 17512 file: 078-11867-0.004  */
+      if (symname == NULL)
	{
-	  /* According to the MSVC documentation, the first
-	     TWO entries with the section # are both of
-	     interest to us.  The first one is the "section
-	     symbol" (section name).  The second is the comdat
-	     symbol name.  Here, we've found the first
-	     qualifying entry; we distinguish it from the
-	     second with a state flag.
-
-	     In the case of gas-generated (at least until that
-	     is fixed) .o files, it isn't necessarily the
-	     second one.  It may be some other later symbol.
-
-	     Since gas also doesn't follow MS conventions and
-	     emits the section similar to .text$<name>, where
-	     <something> is the name we're looking for, we
-	     distinguish the two as follows:
-
-	     If the section name is simply a section name (no
-	     $) we presume it's MS-generated, and look at
-	     precisely the second symbol for the comdat name.
-	     If the section name has a $, we assume it's
-	     gas-generated, and look for <something> (whatever
-	     follows the $) as the comdat symbol.  */
-
-	  /* All 3 branches use this.  */
-	  symname = _bfd_coff_internal_syment_name (abfd, &isym, buf);
-
-	  /* PR 17512 file: 078-11867-0.004  */
-	  if (symname == NULL)
-	    {
-	      _bfd_error_handler (_("%pB: unable to load COMDAT section name"),
-				  abfd);
-	      return false;
-	    }
+	  _bfd_error_handler (_ ("%pB: unable to load COMDAT section name"),
+			      abfd);
+	  continue;
+	}

-	  switch (seen_state)
-	    {
-	    case 0:
-	      {
-		/* The first time we've seen the symbol.  */
-		union internal_auxent aux;
-
-		/* If it isn't the stuff we're expecting, die;
-		   The MS documentation is vague, but it
-		   appears that the second entry serves BOTH
-		   as the comdat symbol and the defining
-		   symbol record (either C_STAT or C_EXT,
-		   possibly with an aux entry with debug
-		   information if it's a function.)  It
-		   appears the only way to find the second one
-		   is to count.  (On Intel, they appear to be
-		   adjacent, but on Alpha, they have been
-		   found separated.)
-
-		   Here, we think we've found the first one,
-		   but there's some checking we can do to be
-		   sure.  */
-
-		if (! ((isym.n_sclass == C_STAT
-			|| isym.n_sclass == C_EXT)
-		       && BTYPE (isym.n_type) == T_NULL
-		       && isym.n_value == 0))
-		  {
-		    /* Malformed input files can trigger this test.
-		       cf PR 21781.  */
-		    _bfd_error_handler (_("%pB: error: unexpected symbol '%s' in COMDAT section"),
-					abfd, symname);
-		    return false;
-		  }
+      union internal_auxent aux;

-		/* FIXME LATER: MSVC generates section names
-		   like .text for comdats.  Gas generates
-		   names like .text$foo__Fv (in the case of a
-		   function).  See comment above for more.  */
-
-		if (isym.n_sclass == C_STAT && strcmp (name, symname) != 0)
-		  /* xgettext:c-format */
-		  _bfd_error_handler (_("%pB: warning: COMDAT symbol '%s'"
-					" does not match section name '%s'"),
-				      abfd, symname, name);
-
-		/* This is the section symbol.  */
-		seen_state = 1;
-		target_name = strchr (name, '$');
-		if (target_name != NULL)
-		  {
-		    /* Gas mode.  */
-		    seen_state = 2;
-		    /* Skip the `$'.  */
-		    target_name += 1;
-		  }
+      bfd_coff_swap_aux_in (abfd, (esym + bfd_coff_symesz (abfd)), isym.n_type,
+			    isym.n_sclass, 0, isym.n_numaux, &aux);

-		if (isym.n_numaux == 0)
-		  aux.x_scn.x_comdat = 0;
-		else
-		  {
-		    /* PR 17512: file: e2cfe54f.  */
-		    if (esym + bfd_coff_symesz (abfd) >= esymend)
-		      {
-			/* xgettext:c-format */
-			_bfd_error_handler (_("%pB: warning: no symbol for"
-					      " section '%s' found"),
-					    abfd, symname);
-			break;
-		      }
-		    bfd_coff_swap_aux_in (abfd, esym + bfd_coff_symesz (abfd),
-					  isym.n_type, isym.n_sclass,
-					  0, isym.n_numaux, &aux);
-		  }
+      struct comdat_hash_entry needle;
+      needle.target_index = isym.n_scnum;

-		/* FIXME: Microsoft uses NODUPLICATES and
-		   ASSOCIATIVE, but gnu uses ANY and
-		   SAME_SIZE.  Unfortunately, gnu doesn't do
-		   the comdat symbols right.  So, until we can
-		   fix it to do the right thing, we are
-		   temporarily disabling comdats for the MS
-		   types (they're used in DLLs and C++, but we
-		   don't support *their* C++ libraries anyway
-		   - DJ.  */
-
-		/* Cygwin does not follow the MS style, and
-		   uses ANY and SAME_SIZE where NODUPLICATES
-		   and ASSOCIATIVE should be used.  For
-		   Interix, we just do the right thing up
-		   front.  */
-
-		switch (aux.x_scn.x_comdat)
-		  {
-		  case IMAGE_COMDAT_SELECT_NODUPLICATES:
+      void **slot
+	  = htab_find_slot (pe_data (abfd)->comdat_hash, &needle, INSERT);
+      if (slot == NULL)
+	return false;
+
+      if (*slot == NULL)
+	{
+	  if (isym.n_numaux == 0)
+	    aux.x_scn.x_comdat = 0;
+
+	  /* FIXME: Microsoft uses NODUPLICATES and
+	     ASSOCIATIVE, but gnu uses ANY and
+	     SAME_SIZE.  Unfortunately, gnu doesn't do
+	     the comdat symbols right.  So, until we can
+	     fix it to do the right thing, we are
+	     temporarily disabling comdats for the MS
+	     types (they're used in DLLs and C++, but we
+	     don't support *their* C++ libraries anyway
+	     - DJ.  */
+
+	  /* Cygwin does not follow the MS style, and
+	     uses ANY and SAME_SIZE where NODUPLICATES
+	     and ASSOCIATIVE should be used.  For
+	     Interix, we just do the right thing up
+	     front.  */
+
+	  switch (aux.x_scn.x_comdat)
+	    {
+	    case IMAGE_COMDAT_SELECT_NODUPLICATES:
#ifdef STRICT_PE_FORMAT
-		    *sec_flags |= SEC_LINK_DUPLICATES_ONE_ONLY;
+	      sec_flags |= SEC_LINK_DUPLICATES_ONE_ONLY;
#else
-		    *sec_flags &= ~SEC_LINK_ONCE;
+	      sec_flags &= ~SEC_LINK_ONCE;
#endif
-		    break;
+	      break;

-		  case IMAGE_COMDAT_SELECT_ANY:
-		    *sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
-		    break;
+	    case IMAGE_COMDAT_SELECT_ANY:
+	      sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
+	      break;

-		  case IMAGE_COMDAT_SELECT_SAME_SIZE:
-		    *sec_flags |= SEC_LINK_DUPLICATES_SAME_SIZE;
-		    break;
+	    case IMAGE_COMDAT_SELECT_SAME_SIZE:
+	      sec_flags |= SEC_LINK_DUPLICATES_SAME_SIZE;
+	      break;

-		  case IMAGE_COMDAT_SELECT_EXACT_MATCH:
-		    /* Not yet fully implemented ??? */
-		    *sec_flags |= SEC_LINK_DUPLICATES_SAME_CONTENTS;
-		    break;
+	    case IMAGE_COMDAT_SELECT_EXACT_MATCH:
+	      /* Not yet fully implemented ??? */
+	      sec_flags |= SEC_LINK_DUPLICATES_SAME_CONTENTS;
+	      break;

-		    /* debug$S gets this case; other
-		       implications ??? */
+	      /* debug$S gets this case; other
+		 implications ??? */

-		    /* There may be no symbol... we'll search
-		       the whole table... Is this the right
-		       place to play this game? Or should we do
-		       it when reading it in.  */
-		  case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
+	      /* There may be no symbol... we'll search
+		 the whole table... Is this the right
+		 place to play this game? Or should we do
+		 it when reading it in.  */
+	    case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
#ifdef STRICT_PE_FORMAT
-		    /* FIXME: This is not currently implemented.  */
-		    *sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
+	      /* FIXME: This is not currently implemented.  */
+	      sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
#else
-		    *sec_flags &= ~SEC_LINK_ONCE;
+	      sec_flags &= ~SEC_LINK_ONCE;
#endif
-		    break;
+	      break;

-		  default:  /* 0 means "no symbol" */
-		    /* debug$F gets this case; other
-		       implications ??? */
-		    *sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
-		    break;
-		  }
-	      }
+	    default: /* 0 means "no symbol" */
+	      /* debug$F gets this case; other
+		 implications ??? */
+	      sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
	      break;
+	    }

-	    case 2:
+	  *slot = bfd_zmalloc (sizeof (struct comdat_hash_entry));
+	  if (*slot == NULL)
+	    return false;
+	  struct comdat_hash_entry *newentry = *slot;
+	  newentry->sec_flags = sec_flags;
+	  newentry->symname = bfd_strdup (symname);
+	  newentry->target_index = isym.n_scnum;
+	  newentry->isym = isym;
+	  newentry->comdat_symbol = -1;
+	}
+      else
+	{
+	  struct comdat_hash_entry *entry = *slot;
+	  char *target_name = strchr (entry->symname, '$');
+	  if (target_name != NULL)
+	    {
	      /* Gas mode: the first matching on partial name.  */
-
+	      target_name += 1;
#ifndef TARGET_UNDERSCORE
#define TARGET_UNDERSCORE 0
#endif
	      /* Is this the name we're looking for ?  */
-	      if (strcmp (target_name,
-			  symname + (TARGET_UNDERSCORE ? 1 : 0)) != 0)
-		{
-		  /* Not the name we're looking for */
-		  continue;
-		}
-	      /* Fall through.  */
-	    case 1:
-	      /* MSVC mode: the lexically second symbol (or
-		 drop through from the above).  */
-	      {
-		/* This must the second symbol with the
-		   section #.  It is the actual symbol name.
-		   Intel puts the two adjacent, but Alpha (at
-		   least) spreads them out.  */
+	      if (strcmp (target_name, symname + (TARGET_UNDERSCORE ? 1 : 0))
+		  != 0)
+		/* Not the name we're looking for */
+		continue;
+	    }
+	  /* MSVC mode: the lexically second symbol (or
+	     drop through from the above).  */
+	  /* This must the second symbol with the
+	      section #.  It is the actual symbol name.
+	      Intel puts the two adjacent, but Alpha (at
+	      least) spreads them out.  */
+
+	  entry->comdat_symbol = (esym - esymstart) / bfd_coff_symesz (abfd);
+	  entry->comdat_name = bfd_strdup (symname);
+	}
+    }

-		struct coff_comdat_info *comdat;
-		size_t len = strlen (symname) + 1;
+  return true;
+}

-		comdat = bfd_alloc (abfd, sizeof (*comdat) + len);
-		if (comdat == NULL)
-		  return false;
+static bool
+insert_coff_comdat_info (bfd *abfd, asection *section, const char *symname,
+			 long symbol)
+{
+  struct coff_comdat_info *comdat;
+  size_t len = strlen (symname) + 1;

-		coff_section_data (abfd, section)->comdat = comdat;
-		comdat->symbol = (esym - esymstart) / bfd_coff_symesz (abfd);
-		char *newname = (char *) (comdat + 1);
-		comdat->name = newname;
-		memcpy (newname, symname, len);
-		return true;
-	      }
-	    }
+  comdat = bfd_alloc (abfd, sizeof (*comdat) + len);
+  if (comdat == NULL)
+    return false;
+
+  coff_section_data (abfd, section)->comdat = comdat;
+  comdat->symbol = symbol;
+  char *newname = (char *)(comdat + 1);
+  comdat->name = newname;
+  memcpy (newname, symname, len);
+  return true;
+}
+
+static bool
+handle_COMDAT (bfd *abfd, flagword *sec_flags, void *hdr, const char *name,
+	       asection *section)
+{
+  if (htab_elements (pe_data (abfd)->comdat_hash) == 0)
+    if (!fill_comdat_hash (abfd, hdr))
+      return false;
+
+  struct comdat_hash_entry *found
+      = find_flags (pe_data (abfd)->comdat_hash, section->target_index);
+  if (found != NULL)
+    {
+
+      struct internal_syment isym = found->isym;
+
+      /* If it isn't the stuff we're expecting, die;
+	 The MS documentation is vague, but it
+	 appears that the second entry serves BOTH
+	 as the comdat symbol and the defining
+	 symbol record (either C_STAT or C_EXT,
+	 possibly with an aux entry with debug
+	 information if it's a function.)  It
+	 appears the only way to find the second one
+	 is to count.  (On Intel, they appear to be
+	 adjacent, but on Alpha, they have been
+	 found separated.)
+
+	 Here, we think we've found the first one,
+	 but there's some checking we can do to be
+	 sure.  */
+
+      if (!((isym.n_sclass == C_STAT || isym.n_sclass == C_EXT)
+	    && BTYPE (isym.n_type) == T_NULL && isym.n_value == 0))
+	{
+	  /* Malformed input files can trigger this test.
+	     cf PR 21781.  */
+	  _bfd_error_handler (
+	      _ ("%pB: error: unexpected symbol '%s' in COMDAT section"), abfd,
+	      found->symname);
+	  return false;
	}
-    }

+      /* FIXME LATER: MSVC generates section names
+	 like .text for comdats.  Gas generates
+	 names like .text$foo__Fv (in the case of a
+	 function).  See comment above for more.  */
+
+      if (isym.n_sclass == C_STAT && strcmp (name, found->symname) != 0)
+	/* xgettext:c-format */
+	_bfd_error_handler (_ ("%pB: warning: COMDAT symbol '%s'"
+			       " does not match section name '%s'"),
+			    abfd, found->symname, name);
+
+      if (found->comdat_symbol != -1)
+	{
+	  if (!insert_coff_comdat_info (abfd, section, found->comdat_name,
+					found->comdat_symbol))
+	    return false;
+	}
+      *sec_flags = *sec_flags | found->sec_flags;
+      return true;
+    }
+  *sec_flags = *sec_flags | SEC_LINK_ONCE;
   return true;
}

 
-/* The PE version; see above for the general comments.
+  /* The PE version; see above for the general comments.

-   Since to set the SEC_LINK_ONCE and associated flags, we have to
-   look at the symbol table anyway, we return the symbol table index
-   of the symbol being used as the COMDAT symbol.  This is admittedly
-   ugly, but there's really nowhere else that we have access to the
-   required information.  FIXME: Is the COMDAT symbol index used for
-   any purpose other than objdump?  */
+     Since to set the SEC_LINK_ONCE and associated flags, we have to
+     look at the symbol table anyway, we return the symbol table index
+     of the symbol being used as the COMDAT symbol.  This is admittedly
+     ugly, but there's really nowhere else that we have access to the
+     required information.  FIXME: Is the COMDAT symbol index used for
+     any purpose other than objdump?  */

 static bool
styp_to_sec_flags (bfd *abfd,
@@ -1136,25 +1160,25 @@ styp_to_sec_flags (bfd *abfd,
		   const char *name,
		   asection *section,
		   flagword *flags_ptr)
-{
-  struct internal_scnhdr *internal_s = (struct internal_scnhdr *) hdr;
-  unsigned long styp_flags = internal_s->s_flags;
-  flagword sec_flags;
-  bool result = true;
-  bool is_dbg = false;
+  {
+    struct internal_scnhdr *internal_s = (struct internal_scnhdr *)hdr;
+    unsigned long styp_flags = internal_s->s_flags;
+    flagword sec_flags;
+    bool result = true;
+    bool is_dbg = false;

   if (startswith (name, DOT_DEBUG)
       || startswith (name, DOT_ZDEBUG)
#ifdef COFF_LONG_SECTION_NAMES
-      || startswith (name, GNU_LINKONCE_WI)
-      || startswith (name, GNU_LINKONCE_WT)
+	|| startswith (name, GNU_LINKONCE_WI)
+	|| startswith (name, GNU_LINKONCE_WT)
       /* FIXME: These definitions ought to be in a header file.  */
#define GNU_DEBUGLINK		".gnu_debuglink"
#define GNU_DEBUGALTLINK	".gnu_debugaltlink"
-      || startswith (name, GNU_DEBUGLINK)
-      || startswith (name, GNU_DEBUGALTLINK)
+	|| startswith (name, GNU_DEBUGLINK)
+	|| startswith (name, GNU_DEBUGALTLINK)
#endif
-      || startswith (name, ".stab"))
+	|| startswith (name, ".stab"))
     is_dbg = true;
   /* Assume read only unless IMAGE_SCN_MEM_WRITE is specified.  */
   sec_flags = SEC_READONLY;
diff --git a/bfd/coffgen.c b/bfd/coffgen.c
index 9d45253178..72e29907c8 100644
--- a/bfd/coffgen.c
+++ b/bfd/coffgen.c
@@ -293,6 +293,8 @@ coff_object_cleanup (bfd *abfd)
	    htab_delete (td->section_by_index);
	  if (td->section_by_target_index)
	    htab_delete (td->section_by_target_index);
+	  if (obj_pe (abfd) && pe_data (abfd)->comdat_hash)
+	    htab_delete (pe_data (abfd)->comdat_hash);
	}
     }
}
@@ -3296,6 +3298,12 @@ _bfd_coff_free_cached_info (bfd *abfd)
	  tdata->section_by_target_index = NULL;
	}

+      if (obj_pe (abfd) && pe_data (abfd)->comdat_hash)
+	{
+	  htab_delete (pe_data (abfd)->comdat_hash);
+	  pe_data (abfd)->comdat_hash = NULL;
+	}
+
       _bfd_dwarf2_cleanup_debug_info (abfd, &tdata->dwarf2_find_line_info);
       _bfd_stab_cleanup (abfd, &tdata->line_info);

diff --git a/bfd/libcoff-in.h b/bfd/libcoff-in.h
index 4e2203656d..eacfcb3ec3 100644
--- a/bfd/libcoff-in.h
+++ b/bfd/libcoff-in.h
@@ -161,10 +161,22 @@ typedef struct pe_tdata
     const char *style;
     asection *sec;
   } build_id;
+
+  htab_t comdat_hash;
} pe_data_type;

 #define pe_data(bfd)		((bfd)->tdata.pe_obj_data)

+struct comdat_hash_entry
+{
+  int target_index;
+  struct internal_syment isym;
+  char *symname;
+  flagword sec_flags;
+  char *comdat_name;
+  long comdat_symbol;
+};
+
/* Tdata for XCOFF files.  */

 struct xcoff_tdata
diff --git a/bfd/peicode.h b/bfd/peicode.h
index e2e2be65b5..f5bb5e4310 100644
--- a/bfd/peicode.h
+++ b/bfd/peicode.h
@@ -255,6 +255,21 @@ coff_swap_scnhdr_in (bfd * abfd, void * ext, void * in)
#endif
}

+static hashval_t
+htab_hash_flags (const void *entry)
+{
+  const struct comdat_hash_entry *fe = entry;
+  return fe->target_index;
+}
+
+static int
+htab_eq_flags (const void *e1, const void *e2)
+{
+  const struct comdat_hash_entry *fe1 = e1;
+  const struct comdat_hash_entry *fe2 = e2;
+  return fe1->target_index == fe2->target_index;
+}
+
static bool
pe_mkobject (bfd * abfd)
{
@@ -291,6 +306,8 @@ pe_mkobject (bfd * abfd)
   pe->dos_message[14] = 0x24;
   pe->dos_message[15] = 0x0;

+  pe->comdat_hash = htab_create (10, htab_hash_flags, htab_eq_flags, NULL);
+
   memset (& pe->pe_opthdr, 0, sizeof pe->pe_opthdr);

   bfd_coff_long_section_names (abfd)
-- 
2.41.0.windows.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V3] optimize handle_COMDAT
  2023-06-18 17:40 [PATCH V3] optimize handle_COMDAT Oleg Tolmatcev
@ 2023-06-30 12:08 ` Nick Clifton
  2023-07-02 22:09   ` Oleg Tolmatcev
  0 siblings, 1 reply; 6+ messages in thread
From: Nick Clifton @ 2023-06-30 12:08 UTC (permalink / raw)
  To: Oleg Tolmatcev, binutils

Hi Oleg,

> I have improved my patch again. This time I read the MS COFF documentation, I kept all the checks, I successfully ran "make check" in WSL and I formatted the code with clang-format. I used the GNU style and tabs for indentation. I also simplified the patch.

Thank you.  Unfortunately there are still a couple of problems
with the patch,  Firstly, and most importantly, the patch triggers
a new failure in the linker testsuite:

   Running ld/testsuite/ld-linkonce/linkonce.exp ...
   [...]
   FAIL: pr26103


Secondly - the patch cannot be applied from your email.  Its formatting
has been corrupted, forcing me to apply it by hand.

Lastly the patch includes a lot of unnecessary formatting changes to
unrelated code.  This is annoying as it masks the true content of the
patch.

Please could you investigate the testsuite failure and submit a revised
patch as an attachment, rather than inline in the email.

Cheers
   Nick




^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V3] optimize handle_COMDAT
  2023-06-30 12:08 ` Nick Clifton
@ 2023-07-02 22:09   ` Oleg Tolmatcev
  2023-07-31 21:31     ` Oleg Tolmatcev
  2023-08-01  1:11     ` Alan Modra
  0 siblings, 2 replies; 6+ messages in thread
From: Oleg Tolmatcev @ 2023-07-02 22:09 UTC (permalink / raw)
  To: Nick Clifton; +Cc: binutils

[-- Attachment #1: Type: text/plain, Size: 1574 bytes --]

Am Fr., 30. Juni 2023 um 14:08 Uhr schrieb Nick Clifton <nickc@redhat.com>:
>
> Hi Oleg,

Hi Nick. Thanks for the review.

> > I have improved my patch again. This time I read the MS COFF documentation, I kept all the checks, I successfully ran "make check" in WSL and I formatted the code with clang-format. I used the GNU style and tabs for indentation. I also simplified the patch.
>
> Thank you.  Unfortunately there are still a couple of problems
> with the patch,  Firstly, and most importantly, the patch triggers
> a new failure in the linker testsuite:
>
>    Running ld/testsuite/ld-linkonce/linkonce.exp ...
>    [...]
>    FAIL: pr26103
>

I have run "make check" in WSL and it passed. I then rebased the patch
on top of master and ran "make check" in WSL again and it passed. I
can not reproduce the failure. I develop on Windows, but I even ran it
on my Linux machine with Manjaro Linux and it passed.

> Secondly - the patch cannot be applied from your email.  Its formatting
> has been corrupted, forcing me to apply it by hand.
>
> Lastly the patch includes a lot of unnecessary formatting changes to
> unrelated code.  This is annoying as it masks the true content of the
> patch.

Sorry, I have reverted as many formatting changes as I could. Now some
of the code is indented wrong, but I could format it with clang-format
later.

> Please could you investigate the testsuite failure and submit a revised
> patch as an attachment, rather than inline in the email.

I have attached the new patch.

Best regards
Oleg

[-- Attachment #2: 0001-optimize-handle_COMDAT.patch --]
[-- Type: application/octet-stream, Size: 13951 bytes --]

From 6a3bf2f9ca824087441520551d2f3d898a68f47e Mon Sep 17 00:00:00 2001
From: Oleg Tolmatcev <oleg.tolmatcev@gmail.com>
Date: Sun, 18 Jun 2023 19:35:38 +0200
Subject: [PATCH] optimize handle_COMDAT

Signed-off-by: Oleg Tolmatcev <oleg.tolmatcev@gmail.com>
---
 bfd/coffcode.h   | 272 ++++++++++++++++++++++++++---------------------
 bfd/coffgen.c    |   8 ++
 bfd/libcoff-in.h |  12 +++
 bfd/peicode.h    |  17 +++
 4 files changed, 187 insertions(+), 122 deletions(-)

diff --git a/bfd/coffcode.h b/bfd/coffcode.h
index 62720255b7..ef0189c0ff 100644
--- a/bfd/coffcode.h
+++ b/bfd/coffcode.h
@@ -352,6 +352,7 @@ CODE_FRAGMENT
 */
 
 #include "libiberty.h"
+#include <string.h>
 
 #ifdef COFF_WITH_PE
 #include "peicode.h"
@@ -852,19 +853,20 @@ styp_to_sec_flags (bfd *abfd,
 
 #else /* COFF_WITH_PE */
 
+static struct comdat_hash_entry *
+find_flags (htab_t comdat_hash, int target_index)
+{
+  struct comdat_hash_entry needle;
+  needle.target_index = target_index;
+
+  return htab_find (comdat_hash, &needle);
+}
+
 static bool
-handle_COMDAT (bfd * abfd,
-	       flagword *sec_flags,
-	       void * hdr,
-	       const char *name,
-	       asection *section)
+fill_comdat_hash (bfd *abfd, void *hdr)
 {
   struct internal_scnhdr *internal_s = (struct internal_scnhdr *) hdr;
   bfd_byte *esymstart, *esym, *esymend;
-  int seen_state = 0;
-  char *target_name = NULL;
-
-  *sec_flags |= SEC_LINK_ONCE;
 
   /* Unfortunately, the PE format stores essential information in
      the symbol table, of all places.  We need to extract that
@@ -895,13 +897,12 @@ handle_COMDAT (bfd * abfd,
     {
       char buf[SYMNMLEN + 1];
       const char *symname;
+      flagword sec_flags = SEC_LINK_ONCE;
 
       bfd_coff_swap_sym_in (abfd, esym, &isym);
 
       BFD_ASSERT (sizeof (internal_s->s_name) <= SYMNMLEN);
 
-      if (isym.n_scnum == section->target_index)
-	{
 	  /* According to the MSVC documentation, the first
 	     TWO entries with the section # are both of
 	     interest to us.  The first one is the "section
@@ -934,121 +935,64 @@ handle_COMDAT (bfd * abfd,
 	    {
 	      _bfd_error_handler (_("%pB: unable to load COMDAT section name"),
 				  abfd);
-	      return false;
+	      continue;
 	    }
 
-	  switch (seen_state)
-	    {
-	    case 0:
-	      {
-		/* The first time we've seen the symbol.  */
-		union internal_auxent aux;
-
-		/* If it isn't the stuff we're expecting, die;
-		   The MS documentation is vague, but it
-		   appears that the second entry serves BOTH
-		   as the comdat symbol and the defining
-		   symbol record (either C_STAT or C_EXT,
-		   possibly with an aux entry with debug
-		   information if it's a function.)  It
-		   appears the only way to find the second one
-		   is to count.  (On Intel, they appear to be
-		   adjacent, but on Alpha, they have been
-		   found separated.)
-
-		   Here, we think we've found the first one,
-		   but there's some checking we can do to be
-		   sure.  */
-
-		if (! ((isym.n_sclass == C_STAT
-			|| isym.n_sclass == C_EXT)
-		       && BTYPE (isym.n_type) == T_NULL
-		       && isym.n_value == 0))
-		  {
-		    /* Malformed input files can trigger this test.
-		       cf PR 21781.  */
-		    _bfd_error_handler (_("%pB: error: unexpected symbol '%s' in COMDAT section"),
-					abfd, symname);
-		    return false;
-		  }
+      union internal_auxent aux;
 
-		/* FIXME LATER: MSVC generates section names
-		   like .text for comdats.  Gas generates
-		   names like .text$foo__Fv (in the case of a
-		   function).  See comment above for more.  */
-
-		if (isym.n_sclass == C_STAT && strcmp (name, symname) != 0)
-		  /* xgettext:c-format */
-		  _bfd_error_handler (_("%pB: warning: COMDAT symbol '%s'"
-					" does not match section name '%s'"),
-				      abfd, symname, name);
-
-		/* This is the section symbol.  */
-		seen_state = 1;
-		target_name = strchr (name, '$');
-		if (target_name != NULL)
-		  {
-		    /* Gas mode.  */
-		    seen_state = 2;
-		    /* Skip the `$'.  */
-		    target_name += 1;
-		  }
+      bfd_coff_swap_aux_in (abfd, (esym + bfd_coff_symesz (abfd)), isym.n_type,
+			    isym.n_sclass, 0, isym.n_numaux, &aux);
 
-		if (isym.n_numaux == 0)
-		  aux.x_scn.x_comdat = 0;
-		else
-		  {
-		    /* PR 17512: file: e2cfe54f.  */
-		    if (esym + bfd_coff_symesz (abfd) >= esymend)
-		      {
-			/* xgettext:c-format */
-			_bfd_error_handler (_("%pB: warning: no symbol for"
-					      " section '%s' found"),
-					    abfd, symname);
-			break;
-		      }
-		    bfd_coff_swap_aux_in (abfd, esym + bfd_coff_symesz (abfd),
-					  isym.n_type, isym.n_sclass,
-					  0, isym.n_numaux, &aux);
-		  }
+      struct comdat_hash_entry needle;
+      needle.target_index = isym.n_scnum;
 
-		/* FIXME: Microsoft uses NODUPLICATES and
-		   ASSOCIATIVE, but gnu uses ANY and
-		   SAME_SIZE.  Unfortunately, gnu doesn't do
-		   the comdat symbols right.  So, until we can
-		   fix it to do the right thing, we are
-		   temporarily disabling comdats for the MS
-		   types (they're used in DLLs and C++, but we
-		   don't support *their* C++ libraries anyway
-		   - DJ.  */
-
-		/* Cygwin does not follow the MS style, and
-		   uses ANY and SAME_SIZE where NODUPLICATES
-		   and ASSOCIATIVE should be used.  For
-		   Interix, we just do the right thing up
-		   front.  */
-
-		switch (aux.x_scn.x_comdat)
-		  {
-		  case IMAGE_COMDAT_SELECT_NODUPLICATES:
+      void **slot
+	  = htab_find_slot (pe_data (abfd)->comdat_hash, &needle, INSERT);
+      if (slot == NULL)
+	return false;
+
+      if (*slot == NULL)
+	{
+	  if (isym.n_numaux == 0)
+	    aux.x_scn.x_comdat = 0;
+
+	  /* FIXME: Microsoft uses NODUPLICATES and
+	     ASSOCIATIVE, but gnu uses ANY and
+	     SAME_SIZE.  Unfortunately, gnu doesn't do
+	     the comdat symbols right.  So, until we can
+	     fix it to do the right thing, we are
+	     temporarily disabling comdats for the MS
+	     types (they're used in DLLs and C++, but we
+	     don't support *their* C++ libraries anyway
+	     - DJ.  */
+
+	  /* Cygwin does not follow the MS style, and
+	     uses ANY and SAME_SIZE where NODUPLICATES
+	     and ASSOCIATIVE should be used.  For
+	     Interix, we just do the right thing up
+	     front.  */
+
+	  switch (aux.x_scn.x_comdat)
+	    {
+	    case IMAGE_COMDAT_SELECT_NODUPLICATES:
 #ifdef STRICT_PE_FORMAT
-		    *sec_flags |= SEC_LINK_DUPLICATES_ONE_ONLY;
+	      sec_flags |= SEC_LINK_DUPLICATES_ONE_ONLY;
 #else
-		    *sec_flags &= ~SEC_LINK_ONCE;
+	      sec_flags &= ~SEC_LINK_ONCE;
 #endif
 		    break;
 
 		  case IMAGE_COMDAT_SELECT_ANY:
-		    *sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
+	      sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
 		    break;
 
 		  case IMAGE_COMDAT_SELECT_SAME_SIZE:
-		    *sec_flags |= SEC_LINK_DUPLICATES_SAME_SIZE;
+	      sec_flags |= SEC_LINK_DUPLICATES_SAME_SIZE;
 		    break;
 
 		  case IMAGE_COMDAT_SELECT_EXACT_MATCH:
 		    /* Not yet fully implemented ??? */
-		    *sec_flags |= SEC_LINK_DUPLICATES_SAME_CONTENTS;
+	      sec_flags |= SEC_LINK_DUPLICATES_SAME_CONTENTS;
 		    break;
 
 		    /* debug$S gets this case; other
@@ -1061,24 +1005,38 @@ handle_COMDAT (bfd * abfd,
 		  case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
 #ifdef STRICT_PE_FORMAT
 		    /* FIXME: This is not currently implemented.  */
-		    *sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
+	      sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
 #else
-		    *sec_flags &= ~SEC_LINK_ONCE;
+	      sec_flags &= ~SEC_LINK_ONCE;
 #endif
 		    break;
 
 		  default:  /* 0 means "no symbol" */
 		    /* debug$F gets this case; other
 		       implications ??? */
-		    *sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
+	      sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
 		    break;
-		  }
-	      }
-	      break;
+	    }
 
-	    case 2:
+	  *slot = bfd_zmalloc (sizeof (struct comdat_hash_entry));
+	  if (*slot == NULL)
+	    return false;
+	  struct comdat_hash_entry *newentry = *slot;
+	  newentry->sec_flags = sec_flags;
+	  newentry->symname = bfd_strdup (symname);
+	  newentry->target_index = isym.n_scnum;
+	  newentry->isym = isym;
+	  newentry->comdat_symbol = -1;
+	}
+      else
+	{
+	  struct comdat_hash_entry *entry = *slot;
+	  char *target_name = strchr (entry->symname, '$');
+	  if (target_name != NULL)
+	    {
 	      /* Gas mode: the first matching on partial name.  */
 
+	      target_name += 1;
 #ifndef TARGET_UNDERSCORE
 #define TARGET_UNDERSCORE 0
 #endif
@@ -1089,16 +1047,26 @@ handle_COMDAT (bfd * abfd,
 		  /* Not the name we're looking for */
 		  continue;
 		}
-	      /* Fall through.  */
-	    case 1:
+	    }
 	      /* MSVC mode: the lexically second symbol (or
 		 drop through from the above).  */
-	      {
 		/* This must the second symbol with the
 		   section #.  It is the actual symbol name.
 		   Intel puts the two adjacent, but Alpha (at
 		   least) spreads them out.  */
 
+	  entry->comdat_symbol = (esym - esymstart) / bfd_coff_symesz (abfd);
+	  entry->comdat_name = bfd_strdup (symname);
+	}
+    }
+
+  return true;
+}
+
+static bool
+insert_coff_comdat_info (bfd *abfd, asection *section, const char *symname,
+			 long symbol)
+{
 		struct coff_comdat_info *comdat;
 		size_t len = strlen (symname) + 1;
 
@@ -1107,16 +1075,76 @@ handle_COMDAT (bfd * abfd,
 		  return false;
 
 		coff_section_data (abfd, section)->comdat = comdat;
-		comdat->symbol = (esym - esymstart) / bfd_coff_symesz (abfd);
+		comdat->symbol = symbol;
 		char *newname = (char *) (comdat + 1);
 		comdat->name = newname;
 		memcpy (newname, symname, len);
 		return true;
-	      }
-	    }
+}
+
+static bool
+handle_COMDAT (bfd *abfd, flagword *sec_flags, void *hdr, const char *name,
+	       asection *section)
+{
+  if (htab_elements (pe_data (abfd)->comdat_hash) == 0)
+    if (! fill_comdat_hash (abfd, hdr))
+      return false;
+
+  struct comdat_hash_entry *found
+      = find_flags (pe_data (abfd)->comdat_hash, section->target_index);
+  if (found != NULL)
+    {
+
+      struct internal_syment isym = found->isym;
+
+      /* If it isn't the stuff we're expecting, die;
+	 The MS documentation is vague, but it
+	 appears that the second entry serves BOTH
+	 as the comdat symbol and the defining
+	 symbol record (either C_STAT or C_EXT,
+	 possibly with an aux entry with debug
+	 information if it's a function.)  It
+	 appears the only way to find the second one
+	 is to count.  (On Intel, they appear to be
+	 adjacent, but on Alpha, they have been
+	 found separated.)
+
+	 Here, we think we've found the first one,
+	 but there's some checking we can do to be
+	 sure.  */
+
+      if (! ((isym.n_sclass == C_STAT || isym.n_sclass == C_EXT)
+	     && BTYPE (isym.n_type) == T_NULL && isym.n_value == 0))
+	{
+	  /* Malformed input files can trigger this test.
+	     cf PR 21781.  */
+	  _bfd_error_handler (
+	      _ ("%pB: error: unexpected symbol '%s' in COMDAT section"), abfd,
+	      found->symname);
+	  return false;
 	}
-    }
 
+      /* FIXME LATER: MSVC generates section names
+	 like .text for comdats.  Gas generates
+	 names like .text$foo__Fv (in the case of a
+	 function).  See comment above for more.  */
+
+      if (isym.n_sclass == C_STAT && strcmp (name, found->symname) != 0)
+	/* xgettext:c-format */
+	_bfd_error_handler (_ ("%pB: warning: COMDAT symbol '%s'"
+			       " does not match section name '%s'"),
+			    abfd, found->symname, name);
+
+      if (found->comdat_symbol != -1)
+	{
+	  if (! insert_coff_comdat_info (abfd, section, found->comdat_name,
+					 found->comdat_symbol))
+	    return false;
+	}
+      *sec_flags = *sec_flags | found->sec_flags;
+      return true;
+    }
+  *sec_flags = *sec_flags | SEC_LINK_ONCE;
   return true;
 }
 
diff --git a/bfd/coffgen.c b/bfd/coffgen.c
index 9d45253178..72e29907c8 100644
--- a/bfd/coffgen.c
+++ b/bfd/coffgen.c
@@ -293,6 +293,8 @@ coff_object_cleanup (bfd *abfd)
 	    htab_delete (td->section_by_index);
 	  if (td->section_by_target_index)
 	    htab_delete (td->section_by_target_index);
+	  if (obj_pe (abfd) && pe_data (abfd)->comdat_hash)
+	    htab_delete (pe_data (abfd)->comdat_hash);
 	}
     }
 }
@@ -3296,6 +3298,12 @@ _bfd_coff_free_cached_info (bfd *abfd)
 	  tdata->section_by_target_index = NULL;
 	}
 
+      if (obj_pe (abfd) && pe_data (abfd)->comdat_hash)
+	{
+	  htab_delete (pe_data (abfd)->comdat_hash);
+	  pe_data (abfd)->comdat_hash = NULL;
+	}
+
       _bfd_dwarf2_cleanup_debug_info (abfd, &tdata->dwarf2_find_line_info);
       _bfd_stab_cleanup (abfd, &tdata->line_info);
 
diff --git a/bfd/libcoff-in.h b/bfd/libcoff-in.h
index 4e2203656d..eacfcb3ec3 100644
--- a/bfd/libcoff-in.h
+++ b/bfd/libcoff-in.h
@@ -161,10 +161,22 @@ typedef struct pe_tdata
     const char *style;
     asection *sec;
   } build_id;
+
+  htab_t comdat_hash;
 } pe_data_type;
 
 #define pe_data(bfd)		((bfd)->tdata.pe_obj_data)
 
+struct comdat_hash_entry
+{
+  int target_index;
+  struct internal_syment isym;
+  char *symname;
+  flagword sec_flags;
+  char *comdat_name;
+  long comdat_symbol;
+};
+
 /* Tdata for XCOFF files.  */
 
 struct xcoff_tdata
diff --git a/bfd/peicode.h b/bfd/peicode.h
index e2e2be65b5..f5bb5e4310 100644
--- a/bfd/peicode.h
+++ b/bfd/peicode.h
@@ -255,6 +255,21 @@ coff_swap_scnhdr_in (bfd * abfd, void * ext, void * in)
 #endif
 }
 
+static hashval_t
+htab_hash_flags (const void *entry)
+{
+  const struct comdat_hash_entry *fe = entry;
+  return fe->target_index;
+}
+
+static int
+htab_eq_flags (const void *e1, const void *e2)
+{
+  const struct comdat_hash_entry *fe1 = e1;
+  const struct comdat_hash_entry *fe2 = e2;
+  return fe1->target_index == fe2->target_index;
+}
+
 static bool
 pe_mkobject (bfd * abfd)
 {
@@ -291,6 +306,8 @@ pe_mkobject (bfd * abfd)
   pe->dos_message[14] = 0x24;
   pe->dos_message[15] = 0x0;
 
+  pe->comdat_hash = htab_create (10, htab_hash_flags, htab_eq_flags, NULL);
+
   memset (& pe->pe_opthdr, 0, sizeof pe->pe_opthdr);
 
   bfd_coff_long_section_names (abfd)
-- 
2.41.0.windows.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V3] optimize handle_COMDAT
  2023-07-02 22:09   ` Oleg Tolmatcev
@ 2023-07-31 21:31     ` Oleg Tolmatcev
  2023-08-01  1:11     ` Alan Modra
  1 sibling, 0 replies; 6+ messages in thread
From: Oleg Tolmatcev @ 2023-07-31 21:31 UTC (permalink / raw)
  To: Nick Clifton; +Cc: binutils

Hi Nick,

what is the status of this patch? Can I help get it in?

Best regards
Oleg

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V3] optimize handle_COMDAT
  2023-07-02 22:09   ` Oleg Tolmatcev
  2023-07-31 21:31     ` Oleg Tolmatcev
@ 2023-08-01  1:11     ` Alan Modra
  2023-08-01 22:22       ` Alan Modra
  1 sibling, 1 reply; 6+ messages in thread
From: Alan Modra @ 2023-08-01  1:11 UTC (permalink / raw)
  To: Oleg Tolmatcev; +Cc: Nick Clifton, binutils

On Mon, Jul 03, 2023 at 12:09:22AM +0200, Oleg Tolmatcev via Binutils wrote:
> Am Fr., 30. Juni 2023 um 14:08 Uhr schrieb Nick Clifton <nickc@redhat.com>:
> > Thank you.  Unfortunately there are still a couple of problems
> > with the patch,  Firstly, and most importantly, the patch triggers
> > a new failure in the linker testsuite:
> >
> >    Running ld/testsuite/ld-linkonce/linkonce.exp ...
> >    [...]
> >    FAIL: pr26103
> >
> 
> I have run "make check" in WSL and it passed. I then rebased the patch
> on top of master and ran "make check" in WSL again and it passed. I
> can not reproduce the failure. I develop on Windows, but I even ran it
> on my Linux machine with Manjaro Linux and it passed.

I applied your latest patch, and it results in failures as Nick said.

aarch64-pe  +FAIL: pr26103
arm-pe  +FAIL: pr26103
arm-wince-pe  +FAIL: pr26103
i686-pe  +FAIL: pr26103
mcore-pe  +FAIL: pr26103
sh-pe  +FAIL: pr26103
x86_64-w64-mingw32  +FAIL: pr26103

This is with mainline binutils built on x86_64-linux using
~/src/binutils-gdb/configure \
--disable-nls \
--disable-gdb --disable-gdbserver --disable-gdbsupport --disable-gprofng \
--disable-libbacktrace --disable-libdecnumber --disable-readline --disable-sim \
--enable-obsolete --target=$target
make && make check

I think something is wrong with your testing.  In ld/ld.log for
x86_64-w64-mingw32 I see

./ld-new   -o tmpdir/pr26103  -L/home/alan/src/binutils-gdb/ld/testsuite/ld-linkonce  tmpdir/ref1.o --start-group tmpdir/sym.a tmpdir/ref2.o --end-group
Executing on host: sh -c {./ld-new   -o tmpdir/pr26103  -L/home/alan/src/binutils-gdb/ld/testsuite/ld-linkonce  tmpdir/ref1.o --start-group tmpdir/sym.a tmpdir/ref2.o --end-group 2>&1}  /dev/null ld.tmp (timeout = 300)
spawn [open ...]
./ld-new: tmpdir/sym.a(sym2.o):fake:(.gnu.linkonce.d.foo[onex]+0x0): multiple definition of `foo'; tmpdir/sym.a(sym1.o):fake:(.gnu.linkonce.d.foo[one]+0x0): first defined here
./ld-new: tmpdir/sym.a(sym3.o):fake:(.gnu.linkonce.d.foo[oney]+0x0): multiple definition of `foo'; tmpdir/sym.a(sym1.o):fake:(.gnu.linkonce.d.foo[one]+0x0): first defined here
./ld-new: tmpdir/sym.a(sym2.o):fake:(.gnu.linkonce.d.foo[onex]+0x0): multiple definition of `foo'; tmpdir/sym.a(sym1.o):fake:(.gnu.linkonce.d.foo[one]+0x0): first defined here
./ld-new: tmpdir/sym.a(sym3.o):fake:(.gnu.linkonce.d.foo[oney]+0x0): multiple definition of `foo'; tmpdir/sym.a(sym1.o):fake:(.gnu.linkonce.d.foo[one]+0x0): first defined here
FAIL: pr26103

-- 
Alan Modra
Australia Development Lab, IBM

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V3] optimize handle_COMDAT
  2023-08-01  1:11     ` Alan Modra
@ 2023-08-01 22:22       ` Alan Modra
  0 siblings, 0 replies; 6+ messages in thread
From: Alan Modra @ 2023-08-01 22:22 UTC (permalink / raw)
  To: Oleg Tolmatcev; +Cc: Nick Clifton, binutils

On Tue, Aug 01, 2023 at 10:41:55AM +0930, Alan Modra wrote:
> On Mon, Jul 03, 2023 at 12:09:22AM +0200, Oleg Tolmatcev via Binutils wrote:
> > Am Fr., 30. Juni 2023 um 14:08 Uhr schrieb Nick Clifton <nickc@redhat.com>:
> > > Thank you.  Unfortunately there are still a couple of problems
> > > with the patch,  Firstly, and most importantly, the patch triggers
> > > a new failure in the linker testsuite:
> > >
> > >    Running ld/testsuite/ld-linkonce/linkonce.exp ...
> > >    [...]
> > >    FAIL: pr26103
> > >
> > 
> > I have run "make check" in WSL and it passed. I then rebased the patch
> > on top of master and ran "make check" in WSL again and it passed. I
> > can not reproduce the failure. I develop on Windows, but I even ran it
> > on my Linux machine with Manjaro Linux and it passed.
> 
> I applied your latest patch, and it results in failures as Nick said.
> 
> aarch64-pe  +FAIL: pr26103
> arm-pe  +FAIL: pr26103
> arm-wince-pe  +FAIL: pr26103
> i686-pe  +FAIL: pr26103
> mcore-pe  +FAIL: pr26103
> sh-pe  +FAIL: pr26103
> x86_64-w64-mingw32  +FAIL: pr26103
> 
> This is with mainline binutils built on x86_64-linux using
> ~/src/binutils-gdb/configure \
> --disable-nls \
> --disable-gdb --disable-gdbserver --disable-gdbsupport --disable-gprofng \
> --disable-libbacktrace --disable-libdecnumber --disable-readline --disable-sim \
> --enable-obsolete --target=$target
> make && make check
> 
> I think something is wrong with your testing.  In ld/ld.log for
> x86_64-w64-mingw32 I see
> 
> ./ld-new   -o tmpdir/pr26103  -L/home/alan/src/binutils-gdb/ld/testsuite/ld-linkonce  tmpdir/ref1.o --start-group tmpdir/sym.a tmpdir/ref2.o --end-group
> Executing on host: sh -c {./ld-new   -o tmpdir/pr26103  -L/home/alan/src/binutils-gdb/ld/testsuite/ld-linkonce  tmpdir/ref1.o --start-group tmpdir/sym.a tmpdir/ref2.o --end-group 2>&1}  /dev/null ld.tmp (timeout = 300)
> spawn [open ...]
> ./ld-new: tmpdir/sym.a(sym2.o):fake:(.gnu.linkonce.d.foo[onex]+0x0): multiple definition of `foo'; tmpdir/sym.a(sym1.o):fake:(.gnu.linkonce.d.foo[one]+0x0): first defined here
> ./ld-new: tmpdir/sym.a(sym3.o):fake:(.gnu.linkonce.d.foo[oney]+0x0): multiple definition of `foo'; tmpdir/sym.a(sym1.o):fake:(.gnu.linkonce.d.foo[one]+0x0): first defined here
> ./ld-new: tmpdir/sym.a(sym2.o):fake:(.gnu.linkonce.d.foo[onex]+0x0): multiple definition of `foo'; tmpdir/sym.a(sym1.o):fake:(.gnu.linkonce.d.foo[one]+0x0): first defined here
> ./ld-new: tmpdir/sym.a(sym3.o):fake:(.gnu.linkonce.d.foo[oney]+0x0): multiple definition of `foo'; tmpdir/sym.a(sym1.o):fake:(.gnu.linkonce.d.foo[one]+0x0): first defined here
> FAIL: pr26103

There is also this with a -fsanitize=address,undefined build.

Executing on host: sh -c {/home/alan/build/gas-san32/x86_64-w64-mingw32/ld/../binutils/ar   rc tmpdir/sym.a tmpdir/sym1.o tmpdir/sym2.o tmpdir/sym3.o tmpdir/ref2.o  2>&1}  /dev/null ld.tmp (timeout = 300)
spawn [open ...]
=================================================================
==3449699==ERROR: AddressSanitizer: heap-buffer-overflow on address 0xf43036ea at pc 0x569046d2 bp 0xffff86c8 sp 0xffff86b8
READ of size 1 at 0xf43036ea thread T0
    #0 0x569046d1 in bfd_getl32 /home/alan/src/binutils-gdb/bfd/libbfd.c:838
    #1 0x56975f86 in _bfd_pex64i_swap_aux_in /home/alan/build/gas-san32/x86_64-w64-mingw32/bfd/peXXigen.c:319
    #2 0x5695fe88 in fill_comdat_hash /home/alan/src/binutils-gdb/bfd/coffcode.h:943
    #3 0x5695fe88 in handle_COMDAT /home/alan/src/binutils-gdb/bfd/coffcode.h:1090
    #4 0x5695fe88 in styp_to_sec_flags /home/alan/src/binutils-gdb/bfd/coffcode.h:1299
    #5 0x569ec52c in make_a_section_from_file /home/alan/src/binutils-gdb/bfd/coffgen.c:210
    #6 0x569ec52c in coff_real_object_p /home/alan/src/binutils-gdb/bfd/coffgen.c:368
    #7 0x569ee70a in coff_object_p /home/alan/src/binutils-gdb/bfd/coffgen.c:446
    #8 0x568f91c9 in bfd_check_format_matches /home/alan/src/binutils-gdb/bfd/format.c:431
    #9 0x568fc611 in bfd_check_format /home/alan/src/binutils-gdb/bfd/format.c:94
    #10 0x568dd80c in _bfd_write_archive_contents /home/alan/src/binutils-gdb/bfd/archive.c:2180
    #11 0x5690cebb in bfd_close /home/alan/src/binutils-gdb/bfd/opncls.c:892
    #12 0x568c0723 in write_archive /home/alan/src/binutils-gdb/binutils/ar.c:1288
    #13 0x568b0038 in replace_members /home/alan/src/binutils-gdb/binutils/ar.c:1550
    #14 0x568b0038 in main /home/alan/src/binutils-gdb/binutils/ar.c:936


-- 
Alan Modra
Australia Development Lab, IBM

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-08-01 22:22 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-18 17:40 [PATCH V3] optimize handle_COMDAT Oleg Tolmatcev
2023-06-30 12:08 ` Nick Clifton
2023-07-02 22:09   ` Oleg Tolmatcev
2023-07-31 21:31     ` Oleg Tolmatcev
2023-08-01  1:11     ` Alan Modra
2023-08-01 22:22       ` Alan Modra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).