Hello, I have improved my patch again. This time I read the MS COFF documentation, I kept all the checks, I successfully ran "make check" in WSL and I formatted the code with clang-format. I used the GNU style and tabs for indentation. I also simplified the patch. From ddc8c9c7511c7f506761a4354a48f09ed67db326 Mon Sep 17 00:00:00 2001 From: Oleg Tolmatcev Date: Sun, 18 Jun 2023 19:35:38 +0200 Subject: [PATCH] optimize handle_COMDAT Signed-off-by: Oleg Tolmatcev --- bfd/coffcode.h | 464 +++++++++++++++++++++++++---------------------- bfd/coffgen.c | 8 + bfd/libcoff-in.h | 12 ++ bfd/peicode.h | 17 ++ 4 files changed, 281 insertions(+), 220 deletions(-) diff --git a/bfd/coffcode.h b/bfd/coffcode.h index 62720255b7..6b0629bb93 100644 --- a/bfd/coffcode.h +++ b/bfd/coffcode.h @@ -352,6 +352,7 @@ CODE_FRAGMENT */ #include "libiberty.h" +#include #ifdef COFF_WITH_PE #include "peicode.h" @@ -852,19 +853,20 @@ styp_to_sec_flags (bfd *abfd, #else /* COFF_WITH_PE */ +static struct comdat_hash_entry * +find_flags (htab_t comdat_hash, int target_index) +{ + struct comdat_hash_entry needle; + needle.target_index = target_index; + + return htab_find (comdat_hash, &needle); +} + static bool -handle_COMDAT (bfd * abfd, - flagword *sec_flags, - void * hdr, - const char *name, - asection *section) +fill_comdat_hash (bfd *abfd, void *hdr) { - struct internal_scnhdr *internal_s = (struct internal_scnhdr *) hdr; + struct internal_scnhdr *internal_s = (struct internal_scnhdr *)hdr; bfd_byte *esymstart, *esym, *esymend; - int seen_state = 0; - char *target_name = NULL; - - *sec_flags |= SEC_LINK_ONCE; /* Unfortunately, the PE format stores essential information in the symbol table, of all places. We need to extract that @@ -883,252 +885,274 @@ handle_COMDAT (bfd * abfd, doesn't seem to be a need to, either, and it would at best be rather messy. */ - if (! _bfd_coff_get_external_symbols (abfd)) + if (!_bfd_coff_get_external_symbols (abfd)) return true; - esymstart = esym = (bfd_byte *) obj_coff_external_syms (abfd); + esymstart = esym = (bfd_byte *)obj_coff_external_syms (abfd); esymend = esym + obj_raw_syment_count (abfd) * bfd_coff_symesz (abfd); - for (struct internal_syment isym; - esym < esymend; + for (struct internal_syment isym; esym < esymend; esym += (isym.n_numaux + 1) * bfd_coff_symesz (abfd)) { char buf[SYMNMLEN + 1]; const char *symname; + flagword sec_flags = SEC_LINK_ONCE; bfd_coff_swap_sym_in (abfd, esym, &isym); BFD_ASSERT (sizeof (internal_s->s_name) <= SYMNMLEN); - if (isym.n_scnum == section->target_index) + /* According to the MSVC documentation, the first + TWO entries with the section # are both of + interest to us. The first one is the "section + symbol" (section name). The second is the comdat + symbol name. Here, we've found the first + qualifying entry; we distinguish it from the + second with a state flag. + + In the case of gas-generated (at least until that + is fixed) .o files, it isn't necessarily the + second one. It may be some other later symbol. + + Since gas also doesn't follow MS conventions and + emits the section similar to .text$, where + is the name we're looking for, we + distinguish the two as follows: + + If the section name is simply a section name (no + $) we presume it's MS-generated, and look at + precisely the second symbol for the comdat name. + If the section name has a $, we assume it's + gas-generated, and look for (whatever + follows the $) as the comdat symbol. */ + + /* All 3 branches use this. */ + symname = _bfd_coff_internal_syment_name (abfd, &isym, buf); + + /* PR 17512 file: 078-11867-0.004 */ + if (symname == NULL) { - /* According to the MSVC documentation, the first - TWO entries with the section # are both of - interest to us. The first one is the "section - symbol" (section name). The second is the comdat - symbol name. Here, we've found the first - qualifying entry; we distinguish it from the - second with a state flag. - - In the case of gas-generated (at least until that - is fixed) .o files, it isn't necessarily the - second one. It may be some other later symbol. - - Since gas also doesn't follow MS conventions and - emits the section similar to .text$, where - is the name we're looking for, we - distinguish the two as follows: - - If the section name is simply a section name (no - $) we presume it's MS-generated, and look at - precisely the second symbol for the comdat name. - If the section name has a $, we assume it's - gas-generated, and look for (whatever - follows the $) as the comdat symbol. */ - - /* All 3 branches use this. */ - symname = _bfd_coff_internal_syment_name (abfd, &isym, buf); - - /* PR 17512 file: 078-11867-0.004 */ - if (symname == NULL) - { - _bfd_error_handler (_("%pB: unable to load COMDAT section name"), - abfd); - return false; - } + _bfd_error_handler (_ ("%pB: unable to load COMDAT section name"), + abfd); + continue; + } - switch (seen_state) - { - case 0: - { - /* The first time we've seen the symbol. */ - union internal_auxent aux; - - /* If it isn't the stuff we're expecting, die; - The MS documentation is vague, but it - appears that the second entry serves BOTH - as the comdat symbol and the defining - symbol record (either C_STAT or C_EXT, - possibly with an aux entry with debug - information if it's a function.) It - appears the only way to find the second one - is to count. (On Intel, they appear to be - adjacent, but on Alpha, they have been - found separated.) - - Here, we think we've found the first one, - but there's some checking we can do to be - sure. */ - - if (! ((isym.n_sclass == C_STAT - || isym.n_sclass == C_EXT) - && BTYPE (isym.n_type) == T_NULL - && isym.n_value == 0)) - { - /* Malformed input files can trigger this test. - cf PR 21781. */ - _bfd_error_handler (_("%pB: error: unexpected symbol '%s' in COMDAT section"), - abfd, symname); - return false; - } + union internal_auxent aux; - /* FIXME LATER: MSVC generates section names - like .text for comdats. Gas generates - names like .text$foo__Fv (in the case of a - function). See comment above for more. */ - - if (isym.n_sclass == C_STAT && strcmp (name, symname) != 0) - /* xgettext:c-format */ - _bfd_error_handler (_("%pB: warning: COMDAT symbol '%s'" - " does not match section name '%s'"), - abfd, symname, name); - - /* This is the section symbol. */ - seen_state = 1; - target_name = strchr (name, '$'); - if (target_name != NULL) - { - /* Gas mode. */ - seen_state = 2; - /* Skip the `$'. */ - target_name += 1; - } + bfd_coff_swap_aux_in (abfd, (esym + bfd_coff_symesz (abfd)), isym.n_type, + isym.n_sclass, 0, isym.n_numaux, &aux); - if (isym.n_numaux == 0) - aux.x_scn.x_comdat = 0; - else - { - /* PR 17512: file: e2cfe54f. */ - if (esym + bfd_coff_symesz (abfd) >= esymend) - { - /* xgettext:c-format */ - _bfd_error_handler (_("%pB: warning: no symbol for" - " section '%s' found"), - abfd, symname); - break; - } - bfd_coff_swap_aux_in (abfd, esym + bfd_coff_symesz (abfd), - isym.n_type, isym.n_sclass, - 0, isym.n_numaux, &aux); - } + struct comdat_hash_entry needle; + needle.target_index = isym.n_scnum; - /* FIXME: Microsoft uses NODUPLICATES and - ASSOCIATIVE, but gnu uses ANY and - SAME_SIZE. Unfortunately, gnu doesn't do - the comdat symbols right. So, until we can - fix it to do the right thing, we are - temporarily disabling comdats for the MS - types (they're used in DLLs and C++, but we - don't support *their* C++ libraries anyway - - DJ. */ - - /* Cygwin does not follow the MS style, and - uses ANY and SAME_SIZE where NODUPLICATES - and ASSOCIATIVE should be used. For - Interix, we just do the right thing up - front. */ - - switch (aux.x_scn.x_comdat) - { - case IMAGE_COMDAT_SELECT_NODUPLICATES: + void **slot + = htab_find_slot (pe_data (abfd)->comdat_hash, &needle, INSERT); + if (slot == NULL) + return false; + + if (*slot == NULL) + { + if (isym.n_numaux == 0) + aux.x_scn.x_comdat = 0; + + /* FIXME: Microsoft uses NODUPLICATES and + ASSOCIATIVE, but gnu uses ANY and + SAME_SIZE. Unfortunately, gnu doesn't do + the comdat symbols right. So, until we can + fix it to do the right thing, we are + temporarily disabling comdats for the MS + types (they're used in DLLs and C++, but we + don't support *their* C++ libraries anyway + - DJ. */ + + /* Cygwin does not follow the MS style, and + uses ANY and SAME_SIZE where NODUPLICATES + and ASSOCIATIVE should be used. For + Interix, we just do the right thing up + front. */ + + switch (aux.x_scn.x_comdat) + { + case IMAGE_COMDAT_SELECT_NODUPLICATES: #ifdef STRICT_PE_FORMAT - *sec_flags |= SEC_LINK_DUPLICATES_ONE_ONLY; + sec_flags |= SEC_LINK_DUPLICATES_ONE_ONLY; #else - *sec_flags &= ~SEC_LINK_ONCE; + sec_flags &= ~SEC_LINK_ONCE; #endif - break; + break; - case IMAGE_COMDAT_SELECT_ANY: - *sec_flags |= SEC_LINK_DUPLICATES_DISCARD; - break; + case IMAGE_COMDAT_SELECT_ANY: + sec_flags |= SEC_LINK_DUPLICATES_DISCARD; + break; - case IMAGE_COMDAT_SELECT_SAME_SIZE: - *sec_flags |= SEC_LINK_DUPLICATES_SAME_SIZE; - break; + case IMAGE_COMDAT_SELECT_SAME_SIZE: + sec_flags |= SEC_LINK_DUPLICATES_SAME_SIZE; + break; - case IMAGE_COMDAT_SELECT_EXACT_MATCH: - /* Not yet fully implemented ??? */ - *sec_flags |= SEC_LINK_DUPLICATES_SAME_CONTENTS; - break; + case IMAGE_COMDAT_SELECT_EXACT_MATCH: + /* Not yet fully implemented ??? */ + sec_flags |= SEC_LINK_DUPLICATES_SAME_CONTENTS; + break; - /* debug$S gets this case; other - implications ??? */ + /* debug$S gets this case; other + implications ??? */ - /* There may be no symbol... we'll search - the whole table... Is this the right - place to play this game? Or should we do - it when reading it in. */ - case IMAGE_COMDAT_SELECT_ASSOCIATIVE: + /* There may be no symbol... we'll search + the whole table... Is this the right + place to play this game? Or should we do + it when reading it in. */ + case IMAGE_COMDAT_SELECT_ASSOCIATIVE: #ifdef STRICT_PE_FORMAT - /* FIXME: This is not currently implemented. */ - *sec_flags |= SEC_LINK_DUPLICATES_DISCARD; + /* FIXME: This is not currently implemented. */ + sec_flags |= SEC_LINK_DUPLICATES_DISCARD; #else - *sec_flags &= ~SEC_LINK_ONCE; + sec_flags &= ~SEC_LINK_ONCE; #endif - break; + break; - default: /* 0 means "no symbol" */ - /* debug$F gets this case; other - implications ??? */ - *sec_flags |= SEC_LINK_DUPLICATES_DISCARD; - break; - } - } + default: /* 0 means "no symbol" */ + /* debug$F gets this case; other + implications ??? */ + sec_flags |= SEC_LINK_DUPLICATES_DISCARD; break; + } - case 2: + *slot = bfd_zmalloc (sizeof (struct comdat_hash_entry)); + if (*slot == NULL) + return false; + struct comdat_hash_entry *newentry = *slot; + newentry->sec_flags = sec_flags; + newentry->symname = bfd_strdup (symname); + newentry->target_index = isym.n_scnum; + newentry->isym = isym; + newentry->comdat_symbol = -1; + } + else + { + struct comdat_hash_entry *entry = *slot; + char *target_name = strchr (entry->symname, '$'); + if (target_name != NULL) + { /* Gas mode: the first matching on partial name. */ - + target_name += 1; #ifndef TARGET_UNDERSCORE #define TARGET_UNDERSCORE 0 #endif /* Is this the name we're looking for ? */ - if (strcmp (target_name, - symname + (TARGET_UNDERSCORE ? 1 : 0)) != 0) - { - /* Not the name we're looking for */ - continue; - } - /* Fall through. */ - case 1: - /* MSVC mode: the lexically second symbol (or - drop through from the above). */ - { - /* This must the second symbol with the - section #. It is the actual symbol name. - Intel puts the two adjacent, but Alpha (at - least) spreads them out. */ + if (strcmp (target_name, symname + (TARGET_UNDERSCORE ? 1 : 0)) + != 0) + /* Not the name we're looking for */ + continue; + } + /* MSVC mode: the lexically second symbol (or + drop through from the above). */ + /* This must the second symbol with the + section #. It is the actual symbol name. + Intel puts the two adjacent, but Alpha (at + least) spreads them out. */ + + entry->comdat_symbol = (esym - esymstart) / bfd_coff_symesz (abfd); + entry->comdat_name = bfd_strdup (symname); + } + } - struct coff_comdat_info *comdat; - size_t len = strlen (symname) + 1; + return true; +} - comdat = bfd_alloc (abfd, sizeof (*comdat) + len); - if (comdat == NULL) - return false; +static bool +insert_coff_comdat_info (bfd *abfd, asection *section, const char *symname, + long symbol) +{ + struct coff_comdat_info *comdat; + size_t len = strlen (symname) + 1; - coff_section_data (abfd, section)->comdat = comdat; - comdat->symbol = (esym - esymstart) / bfd_coff_symesz (abfd); - char *newname = (char *) (comdat + 1); - comdat->name = newname; - memcpy (newname, symname, len); - return true; - } - } + comdat = bfd_alloc (abfd, sizeof (*comdat) + len); + if (comdat == NULL) + return false; + + coff_section_data (abfd, section)->comdat = comdat; + comdat->symbol = symbol; + char *newname = (char *)(comdat + 1); + comdat->name = newname; + memcpy (newname, symname, len); + return true; +} + +static bool +handle_COMDAT (bfd *abfd, flagword *sec_flags, void *hdr, const char *name, + asection *section) +{ + if (htab_elements (pe_data (abfd)->comdat_hash) == 0) + if (!fill_comdat_hash (abfd, hdr)) + return false; + + struct comdat_hash_entry *found + = find_flags (pe_data (abfd)->comdat_hash, section->target_index); + if (found != NULL) + { + + struct internal_syment isym = found->isym; + + /* If it isn't the stuff we're expecting, die; + The MS documentation is vague, but it + appears that the second entry serves BOTH + as the comdat symbol and the defining + symbol record (either C_STAT or C_EXT, + possibly with an aux entry with debug + information if it's a function.) It + appears the only way to find the second one + is to count. (On Intel, they appear to be + adjacent, but on Alpha, they have been + found separated.) + + Here, we think we've found the first one, + but there's some checking we can do to be + sure. */ + + if (!((isym.n_sclass == C_STAT || isym.n_sclass == C_EXT) + && BTYPE (isym.n_type) == T_NULL && isym.n_value == 0)) + { + /* Malformed input files can trigger this test. + cf PR 21781. */ + _bfd_error_handler ( + _ ("%pB: error: unexpected symbol '%s' in COMDAT section"), abfd, + found->symname); + return false; } - } + /* FIXME LATER: MSVC generates section names + like .text for comdats. Gas generates + names like .text$foo__Fv (in the case of a + function). See comment above for more. */ + + if (isym.n_sclass == C_STAT && strcmp (name, found->symname) != 0) + /* xgettext:c-format */ + _bfd_error_handler (_ ("%pB: warning: COMDAT symbol '%s'" + " does not match section name '%s'"), + abfd, found->symname, name); + + if (found->comdat_symbol != -1) + { + if (!insert_coff_comdat_info (abfd, section, found->comdat_name, + found->comdat_symbol)) + return false; + } + *sec_flags = *sec_flags | found->sec_flags; + return true; + } + *sec_flags = *sec_flags | SEC_LINK_ONCE; return true; } -/* The PE version; see above for the general comments. + /* The PE version; see above for the general comments. - Since to set the SEC_LINK_ONCE and associated flags, we have to - look at the symbol table anyway, we return the symbol table index - of the symbol being used as the COMDAT symbol. This is admittedly - ugly, but there's really nowhere else that we have access to the - required information. FIXME: Is the COMDAT symbol index used for - any purpose other than objdump? */ + Since to set the SEC_LINK_ONCE and associated flags, we have to + look at the symbol table anyway, we return the symbol table index + of the symbol being used as the COMDAT symbol. This is admittedly + ugly, but there's really nowhere else that we have access to the + required information. FIXME: Is the COMDAT symbol index used for + any purpose other than objdump? */ static bool styp_to_sec_flags (bfd *abfd, @@ -1136,25 +1160,25 @@ styp_to_sec_flags (bfd *abfd, const char *name, asection *section, flagword *flags_ptr) -{ - struct internal_scnhdr *internal_s = (struct internal_scnhdr *) hdr; - unsigned long styp_flags = internal_s->s_flags; - flagword sec_flags; - bool result = true; - bool is_dbg = false; + { + struct internal_scnhdr *internal_s = (struct internal_scnhdr *)hdr; + unsigned long styp_flags = internal_s->s_flags; + flagword sec_flags; + bool result = true; + bool is_dbg = false; if (startswith (name, DOT_DEBUG) || startswith (name, DOT_ZDEBUG) #ifdef COFF_LONG_SECTION_NAMES - || startswith (name, GNU_LINKONCE_WI) - || startswith (name, GNU_LINKONCE_WT) + || startswith (name, GNU_LINKONCE_WI) + || startswith (name, GNU_LINKONCE_WT) /* FIXME: These definitions ought to be in a header file. */ #define GNU_DEBUGLINK ".gnu_debuglink" #define GNU_DEBUGALTLINK ".gnu_debugaltlink" - || startswith (name, GNU_DEBUGLINK) - || startswith (name, GNU_DEBUGALTLINK) + || startswith (name, GNU_DEBUGLINK) + || startswith (name, GNU_DEBUGALTLINK) #endif - || startswith (name, ".stab")) + || startswith (name, ".stab")) is_dbg = true; /* Assume read only unless IMAGE_SCN_MEM_WRITE is specified. */ sec_flags = SEC_READONLY; diff --git a/bfd/coffgen.c b/bfd/coffgen.c index 9d45253178..72e29907c8 100644 --- a/bfd/coffgen.c +++ b/bfd/coffgen.c @@ -293,6 +293,8 @@ coff_object_cleanup (bfd *abfd) htab_delete (td->section_by_index); if (td->section_by_target_index) htab_delete (td->section_by_target_index); + if (obj_pe (abfd) && pe_data (abfd)->comdat_hash) + htab_delete (pe_data (abfd)->comdat_hash); } } } @@ -3296,6 +3298,12 @@ _bfd_coff_free_cached_info (bfd *abfd) tdata->section_by_target_index = NULL; } + if (obj_pe (abfd) && pe_data (abfd)->comdat_hash) + { + htab_delete (pe_data (abfd)->comdat_hash); + pe_data (abfd)->comdat_hash = NULL; + } + _bfd_dwarf2_cleanup_debug_info (abfd, &tdata->dwarf2_find_line_info); _bfd_stab_cleanup (abfd, &tdata->line_info); diff --git a/bfd/libcoff-in.h b/bfd/libcoff-in.h index 4e2203656d..eacfcb3ec3 100644 --- a/bfd/libcoff-in.h +++ b/bfd/libcoff-in.h @@ -161,10 +161,22 @@ typedef struct pe_tdata const char *style; asection *sec; } build_id; + + htab_t comdat_hash; } pe_data_type; #define pe_data(bfd) ((bfd)->tdata.pe_obj_data) +struct comdat_hash_entry +{ + int target_index; + struct internal_syment isym; + char *symname; + flagword sec_flags; + char *comdat_name; + long comdat_symbol; +}; + /* Tdata for XCOFF files. */ struct xcoff_tdata diff --git a/bfd/peicode.h b/bfd/peicode.h index e2e2be65b5..f5bb5e4310 100644 --- a/bfd/peicode.h +++ b/bfd/peicode.h @@ -255,6 +255,21 @@ coff_swap_scnhdr_in (bfd * abfd, void * ext, void * in) #endif } +static hashval_t +htab_hash_flags (const void *entry) +{ + const struct comdat_hash_entry *fe = entry; + return fe->target_index; +} + +static int +htab_eq_flags (const void *e1, const void *e2) +{ + const struct comdat_hash_entry *fe1 = e1; + const struct comdat_hash_entry *fe2 = e2; + return fe1->target_index == fe2->target_index; +} + static bool pe_mkobject (bfd * abfd) { @@ -291,6 +306,8 @@ pe_mkobject (bfd * abfd) pe->dos_message[14] = 0x24; pe->dos_message[15] = 0x0; + pe->comdat_hash = htab_create (10, htab_hash_flags, htab_eq_flags, NULL); + memset (& pe->pe_opthdr, 0, sizeof pe->pe_opthdr); bfd_coff_long_section_names (abfd) -- 2.41.0.windows.1