* [PATCH V4] optimize handle_COMDAT
@ 2023-08-23 21:59 Oleg Tolmatcev
2023-08-24 6:21 ` Alan Modra
0 siblings, 1 reply; 2+ messages in thread
From: Oleg Tolmatcev @ 2023-08-23 21:59 UTC (permalink / raw)
To: binutils
[-- Attachment #1: Type: text/plain, Size: 131 bytes --]
Thank you Alan, I could reproduce the bugs and I have fixed them.
"make check" does not fail anymore.
Best regards
Oleg Tolmatcev
[-- Attachment #2: 0001-optimize-handle_COMDAT.patch --]
[-- Type: application/octet-stream, Size: 14553 bytes --]
From da7c00b3145b04e1fa208b97cf8233a0e42a4050 Mon Sep 17 00:00:00 2001
From: Oleg Tolmatcev <oleg.tolmatcev@gmail.com>
Date: Sun, 18 Jun 2023 19:35:38 +0200
Subject: [PATCH] optimize handle_COMDAT
Signed-off-by: Oleg Tolmatcev <oleg.tolmatcev@gmail.com>
---
bfd/coffcode.h | 289 +++++++++++++++++++++++++++--------------------
bfd/coffgen.c | 8 ++
bfd/libcoff-in.h | 12 ++
bfd/peicode.h | 17 +++
4 files changed, 202 insertions(+), 124 deletions(-)
diff --git a/bfd/coffcode.h b/bfd/coffcode.h
index 6789f7f3cc..35d40a9501 100644
--- a/bfd/coffcode.h
+++ b/bfd/coffcode.h
@@ -352,6 +352,7 @@ CODE_FRAGMENT
*/
#include "libiberty.h"
+#include <string.h>
#ifdef COFF_WITH_PE
#include "peicode.h"
@@ -852,19 +853,19 @@ styp_to_sec_flags (bfd *abfd,
#else /* COFF_WITH_PE */
+static struct comdat_hash_entry *
+find_flags (htab_t comdat_hash, int target_index)
+{
+ struct comdat_hash_entry needle;
+ needle.target_index = target_index;
+
+ return htab_find (comdat_hash, &needle);
+}
+
static bool
-handle_COMDAT (bfd * abfd,
- flagword *sec_flags,
- void * hdr,
- const char *name,
- asection *section)
+fill_comdat_hash (bfd *abfd)
{
- struct internal_scnhdr *internal_s = (struct internal_scnhdr *) hdr;
bfd_byte *esymstart, *esym, *esymend;
- int seen_state = 0;
- char *target_name = NULL;
-
- *sec_flags |= SEC_LINK_ONCE;
/* Unfortunately, the PE format stores essential information in
the symbol table, of all places. We need to extract that
@@ -895,13 +896,10 @@ handle_COMDAT (bfd * abfd,
{
char buf[SYMNMLEN + 1];
const char *symname;
+ flagword sec_flags = SEC_LINK_ONCE;
bfd_coff_swap_sym_in (abfd, esym, &isym);
- BFD_ASSERT (sizeof (internal_s->s_name) <= SYMNMLEN);
-
- if (isym.n_scnum == section->target_index)
- {
/* According to the MSVC documentation, the first
TWO entries with the section # are both of
interest to us. The first one is the "section
@@ -934,121 +932,76 @@ handle_COMDAT (bfd * abfd,
{
_bfd_error_handler (_("%pB: unable to load COMDAT section name"),
abfd);
- return false;
+ continue;
}
- switch (seen_state)
- {
- case 0:
- {
- /* The first time we've seen the symbol. */
- union internal_auxent aux;
-
- /* If it isn't the stuff we're expecting, die;
- The MS documentation is vague, but it
- appears that the second entry serves BOTH
- as the comdat symbol and the defining
- symbol record (either C_STAT or C_EXT,
- possibly with an aux entry with debug
- information if it's a function.) It
- appears the only way to find the second one
- is to count. (On Intel, they appear to be
- adjacent, but on Alpha, they have been
- found separated.)
-
- Here, we think we've found the first one,
- but there's some checking we can do to be
- sure. */
-
- if (! ((isym.n_sclass == C_STAT
- || isym.n_sclass == C_EXT)
- && BTYPE (isym.n_type) == T_NULL
- && isym.n_value == 0))
- {
- /* Malformed input files can trigger this test.
- cf PR 21781. */
- _bfd_error_handler (_("%pB: error: unexpected symbol '%s' in COMDAT section"),
- abfd, symname);
- return false;
- }
+ union internal_auxent aux;
- /* FIXME LATER: MSVC generates section names
- like .text for comdats. Gas generates
- names like .text$foo__Fv (in the case of a
- function). See comment above for more. */
+ struct comdat_hash_entry needle;
+ needle.target_index = isym.n_scnum;
- if (isym.n_sclass == C_STAT && strcmp (name, symname) != 0)
- /* xgettext:c-format */
- _bfd_error_handler (_("%pB: warning: COMDAT symbol '%s'"
- " does not match section name '%s'"),
- abfd, symname, name);
-
- /* This is the section symbol. */
- seen_state = 1;
- target_name = strchr (name, '$');
- if (target_name != NULL)
- {
- /* Gas mode. */
- seen_state = 2;
- /* Skip the `$'. */
- target_name += 1;
- }
+ void **slot
+ = htab_find_slot (pe_data (abfd)->comdat_hash, &needle, INSERT);
+ if (slot == NULL)
+ return false;
- if (isym.n_numaux == 0)
- aux.x_scn.x_comdat = 0;
- else
- {
+ if (*slot == NULL)
+ {
+ if (isym.n_numaux == 0)
+ aux.x_scn.x_comdat = 0;
+ else
+ {
/* PR 17512: file: e2cfe54f. */
- if (esym + bfd_coff_symesz (abfd) >= esymend)
- {
- /* xgettext:c-format */
- _bfd_error_handler (_("%pB: warning: no symbol for"
- " section '%s' found"),
- abfd, symname);
- break;
- }
- bfd_coff_swap_aux_in (abfd, esym + bfd_coff_symesz (abfd),
- isym.n_type, isym.n_sclass,
- 0, isym.n_numaux, &aux);
- }
+ if (esym + bfd_coff_symesz (abfd) >= esymend)
+ {
+ /* xgettext:c-format */
+ _bfd_error_handler (_ ("%pB: warning: no symbol for"
+ " section '%s' found"),
+ abfd, symname);
+ continue;
+ }
+ bfd_coff_swap_aux_in (abfd, (esym + bfd_coff_symesz (abfd)),
+ isym.n_type, isym.n_sclass, 0,
+ isym.n_numaux, &aux);
+ }
- /* FIXME: Microsoft uses NODUPLICATES and
- ASSOCIATIVE, but gnu uses ANY and
- SAME_SIZE. Unfortunately, gnu doesn't do
- the comdat symbols right. So, until we can
- fix it to do the right thing, we are
- temporarily disabling comdats for the MS
- types (they're used in DLLs and C++, but we
- don't support *their* C++ libraries anyway
- - DJ. */
-
- /* Cygwin does not follow the MS style, and
- uses ANY and SAME_SIZE where NODUPLICATES
- and ASSOCIATIVE should be used. For
- Interix, we just do the right thing up
- front. */
-
- switch (aux.x_scn.x_comdat)
- {
- case IMAGE_COMDAT_SELECT_NODUPLICATES:
+ /* FIXME: Microsoft uses NODUPLICATES and
+ ASSOCIATIVE, but gnu uses ANY and
+ SAME_SIZE. Unfortunately, gnu doesn't do
+ the comdat symbols right. So, until we can
+ fix it to do the right thing, we are
+ temporarily disabling comdats for the MS
+ types (they're used in DLLs and C++, but we
+ don't support *their* C++ libraries anyway
+ - DJ. */
+
+ /* Cygwin does not follow the MS style, and
+ uses ANY and SAME_SIZE where NODUPLICATES
+ and ASSOCIATIVE should be used. For
+ Interix, we just do the right thing up
+ front. */
+
+ switch (aux.x_scn.x_comdat)
+ {
+ case IMAGE_COMDAT_SELECT_NODUPLICATES:
#ifdef STRICT_PE_FORMAT
- *sec_flags |= SEC_LINK_DUPLICATES_ONE_ONLY;
+ sec_flags |= SEC_LINK_DUPLICATES_ONE_ONLY;
#else
- *sec_flags &= ~SEC_LINK_ONCE;
+ sec_flags &= ~SEC_LINK_ONCE;
#endif
break;
case IMAGE_COMDAT_SELECT_ANY:
- *sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
+ sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
break;
case IMAGE_COMDAT_SELECT_SAME_SIZE:
- *sec_flags |= SEC_LINK_DUPLICATES_SAME_SIZE;
+ sec_flags |= SEC_LINK_DUPLICATES_SAME_SIZE;
break;
case IMAGE_COMDAT_SELECT_EXACT_MATCH:
/* Not yet fully implemented ??? */
- *sec_flags |= SEC_LINK_DUPLICATES_SAME_CONTENTS;
+ sec_flags |= SEC_LINK_DUPLICATES_SAME_CONTENTS;
break;
/* debug$S gets this case; other
@@ -1061,24 +1014,42 @@ handle_COMDAT (bfd * abfd,
case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
#ifdef STRICT_PE_FORMAT
/* FIXME: This is not currently implemented. */
- *sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
+ sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
#else
- *sec_flags &= ~SEC_LINK_ONCE;
+ sec_flags &= ~SEC_LINK_ONCE;
#endif
break;
default: /* 0 means "no symbol" */
/* debug$F gets this case; other
implications ??? */
- *sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
+ sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
break;
- }
- }
- break;
+ }
- case 2:
+ *slot = bfd_zmalloc (sizeof (struct comdat_hash_entry));
+ if (*slot == NULL)
+ return false;
+ struct comdat_hash_entry *newentry = *slot;
+ newentry->sec_flags = sec_flags;
+ newentry->symname = bfd_strdup (symname);
+ newentry->target_index = isym.n_scnum;
+ newentry->isym = isym;
+ newentry->comdat_symbol = -1;
+ }
+ else
+ {
+ struct comdat_hash_entry *entry = *slot;
+
+ if (entry->comdat_symbol != -1)
+ continue;
+
+ char *target_name = strchr (entry->symname, '$');
+ if (target_name != NULL)
+ {
/* Gas mode: the first matching on partial name. */
+ target_name += 1;
#ifndef TARGET_UNDERSCORE
#define TARGET_UNDERSCORE 0
#endif
@@ -1089,16 +1060,26 @@ handle_COMDAT (bfd * abfd,
/* Not the name we're looking for */
continue;
}
- /* Fall through. */
- case 1:
+ }
/* MSVC mode: the lexically second symbol (or
drop through from the above). */
- {
/* This must the second symbol with the
section #. It is the actual symbol name.
Intel puts the two adjacent, but Alpha (at
least) spreads them out. */
+ entry->comdat_symbol = (esym - esymstart) / bfd_coff_symesz (abfd);
+ entry->comdat_name = bfd_strdup (symname);
+ }
+ }
+
+ return true;
+}
+
+static bool
+insert_coff_comdat_info (bfd *abfd, asection *section, const char *symname,
+ long symbol)
+{
struct coff_comdat_info *comdat;
size_t len = strlen (symname) + 1;
@@ -1107,16 +1088,76 @@ handle_COMDAT (bfd * abfd,
return false;
coff_section_data (abfd, section)->comdat = comdat;
- comdat->symbol = (esym - esymstart) / bfd_coff_symesz (abfd);
+ comdat->symbol = symbol;
char *newname = (char *) (comdat + 1);
comdat->name = newname;
memcpy (newname, symname, len);
return true;
- }
- }
+}
+
+static bool
+handle_COMDAT (bfd *abfd, flagword *sec_flags, const char *name,
+ asection *section)
+{
+ if (htab_elements (pe_data (abfd)->comdat_hash) == 0)
+ if (! fill_comdat_hash (abfd))
+ return false;
+
+ struct comdat_hash_entry *found
+ = find_flags (pe_data (abfd)->comdat_hash, section->target_index);
+ if (found != NULL)
+ {
+
+ struct internal_syment isym = found->isym;
+
+ /* If it isn't the stuff we're expecting, die;
+ The MS documentation is vague, but it
+ appears that the second entry serves BOTH
+ as the comdat symbol and the defining
+ symbol record (either C_STAT or C_EXT,
+ possibly with an aux entry with debug
+ information if it's a function.) It
+ appears the only way to find the second one
+ is to count. (On Intel, they appear to be
+ adjacent, but on Alpha, they have been
+ found separated.)
+
+ Here, we think we've found the first one,
+ but there's some checking we can do to be
+ sure. */
+
+ if (! ((isym.n_sclass == C_STAT || isym.n_sclass == C_EXT)
+ && BTYPE (isym.n_type) == T_NULL && isym.n_value == 0))
+ {
+ /* Malformed input files can trigger this test.
+ cf PR 21781. */
+ _bfd_error_handler (
+ _ ("%pB: error: unexpected symbol '%s' in COMDAT section"), abfd,
+ found->symname);
+ return false;
}
- }
+ /* FIXME LATER: MSVC generates section names
+ like .text for comdats. Gas generates
+ names like .text$foo__Fv (in the case of a
+ function). See comment above for more. */
+
+ if (isym.n_sclass == C_STAT && strcmp (name, found->symname) != 0)
+ /* xgettext:c-format */
+ _bfd_error_handler (_ ("%pB: warning: COMDAT symbol '%s'"
+ " does not match section name '%s'"),
+ abfd, found->symname, name);
+
+ if (found->comdat_symbol != -1)
+ {
+ if (! insert_coff_comdat_info (abfd, section, found->comdat_name,
+ found->comdat_symbol))
+ return false;
+ }
+ *sec_flags = *sec_flags | found->sec_flags;
+ return true;
+ }
+ *sec_flags = *sec_flags | SEC_LINK_ONCE;
return true;
}
@@ -1268,7 +1309,7 @@ styp_to_sec_flags (bfd *abfd,
break;
case IMAGE_SCN_LNK_COMDAT:
/* COMDAT gets very special treatment. */
- if (!handle_COMDAT (abfd, &sec_flags, hdr, name, section))
+ if (!handle_COMDAT (abfd, &sec_flags, name, section))
result = false;
break;
default:
diff --git a/bfd/coffgen.c b/bfd/coffgen.c
index 1ec9a5185c..bf9633a2b3 100644
--- a/bfd/coffgen.c
+++ b/bfd/coffgen.c
@@ -293,6 +293,8 @@ coff_object_cleanup (bfd *abfd)
htab_delete (td->section_by_index);
if (td->section_by_target_index)
htab_delete (td->section_by_target_index);
+ if (obj_pe (abfd) && pe_data (abfd)->comdat_hash)
+ htab_delete (pe_data (abfd)->comdat_hash);
}
}
}
@@ -3292,6 +3294,12 @@ _bfd_coff_free_cached_info (bfd *abfd)
tdata->section_by_target_index = NULL;
}
+ if (obj_pe (abfd) && pe_data (abfd)->comdat_hash)
+ {
+ htab_delete (pe_data (abfd)->comdat_hash);
+ pe_data (abfd)->comdat_hash = NULL;
+ }
+
_bfd_dwarf2_cleanup_debug_info (abfd, &tdata->dwarf2_find_line_info);
_bfd_stab_cleanup (abfd, &tdata->line_info);
diff --git a/bfd/libcoff-in.h b/bfd/libcoff-in.h
index 4e2203656d..eacfcb3ec3 100644
--- a/bfd/libcoff-in.h
+++ b/bfd/libcoff-in.h
@@ -161,10 +161,22 @@ typedef struct pe_tdata
const char *style;
asection *sec;
} build_id;
+
+ htab_t comdat_hash;
} pe_data_type;
#define pe_data(bfd) ((bfd)->tdata.pe_obj_data)
+struct comdat_hash_entry
+{
+ int target_index;
+ struct internal_syment isym;
+ char *symname;
+ flagword sec_flags;
+ char *comdat_name;
+ long comdat_symbol;
+};
+
/* Tdata for XCOFF files. */
struct xcoff_tdata
diff --git a/bfd/peicode.h b/bfd/peicode.h
index 2cd020e699..5ac6b0dc53 100644
--- a/bfd/peicode.h
+++ b/bfd/peicode.h
@@ -255,6 +255,21 @@ coff_swap_scnhdr_in (bfd * abfd, void * ext, void * in)
#endif
}
+static hashval_t
+htab_hash_flags (const void *entry)
+{
+ const struct comdat_hash_entry *fe = entry;
+ return fe->target_index;
+}
+
+static int
+htab_eq_flags (const void *e1, const void *e2)
+{
+ const struct comdat_hash_entry *fe1 = e1;
+ const struct comdat_hash_entry *fe2 = e2;
+ return fe1->target_index == fe2->target_index;
+}
+
static bool
pe_mkobject (bfd * abfd)
{
@@ -291,6 +306,8 @@ pe_mkobject (bfd * abfd)
pe->dos_message[14] = 0x24;
pe->dos_message[15] = 0x0;
+ pe->comdat_hash = htab_create (10, htab_hash_flags, htab_eq_flags, NULL);
+
memset (& pe->pe_opthdr, 0, sizeof pe->pe_opthdr);
bfd_coff_long_section_names (abfd)
--
2.41.0.windows.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH V4] optimize handle_COMDAT
2023-08-23 21:59 [PATCH V4] optimize handle_COMDAT Oleg Tolmatcev
@ 2023-08-24 6:21 ` Alan Modra
0 siblings, 0 replies; 2+ messages in thread
From: Alan Modra @ 2023-08-24 6:21 UTC (permalink / raw)
To: Oleg Tolmatcev; +Cc: binutils
On Wed, Aug 23, 2023 at 11:59:59PM +0200, Oleg Tolmatcev via Binutils wrote:
> Thank you Alan, I could reproduce the bugs and I have fixed them.
> "make check" does not fail anymore.
Thanks, I have applied your patch with some formatting fixes.
--
Alan Modra
Australia Development Lab, IBM
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-08-24 6:21 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-23 21:59 [PATCH V4] optimize handle_COMDAT Oleg Tolmatcev
2023-08-24 6:21 ` Alan Modra
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).