From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1062) id 564433857C41; Thu, 24 Aug 2023 06:24:53 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 564433857C41 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Alan Modra To: bfd-cvs@sourceware.org Subject: [binutils-gdb] optimize handle_COMDAT X-Act-Checkin: binutils-gdb X-Git-Author: Oleg Tolmatcev X-Git-Refname: refs/heads/master X-Git-Oldrev: fb9b7fbf17f50fcfabf6e3d7d06a93e1f17c52b7 X-Git-Newrev: 6aadf8a04d162feb2afe3c41f5b36534d661d447 Message-Id: <20230824062453.564433857C41@sourceware.org> Date: Thu, 24 Aug 2023 06:24:53 +0000 (GMT) X-BeenThere: binutils-cvs@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Binutils-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 24 Aug 2023 06:24:53 -0000 https://sourceware.org/git/gitweb.cgi?p=3Dbinutils-gdb.git;h=3D6aadf8a04d16= 2feb2afe3c41f5b36534d661d447 commit 6aadf8a04d162feb2afe3c41f5b36534d661d447 Author: Oleg Tolmatcev Date: Sun Jun 18 19:35:38 2023 +0200 optimize handle_COMDAT =20 Signed-off-by: Oleg Tolmatcev Diff: --- bfd/coffcode.h | 427 ++++++++++++++++++++++++++++++---------------------= ---- bfd/coffgen.c | 8 ++ bfd/libcoff-in.h | 12 ++ bfd/libcoff.h | 12 ++ bfd/peicode.h | 17 +++ 5 files changed, 283 insertions(+), 193 deletions(-) diff --git a/bfd/coffcode.h b/bfd/coffcode.h index 6789f7f3cc6..e3f4afd389f 100644 --- a/bfd/coffcode.h +++ b/bfd/coffcode.h @@ -352,6 +352,7 @@ CODE_FRAGMENT */ =20 #include "libiberty.h" +#include =20 #ifdef COFF_WITH_PE #include "peicode.h" @@ -852,19 +853,19 @@ styp_to_sec_flags (bfd *abfd, =20 #else /* COFF_WITH_PE */ =20 +static struct comdat_hash_entry * +find_flags (htab_t comdat_hash, int target_index) +{ + struct comdat_hash_entry needle; + needle.target_index =3D target_index; + + return htab_find (comdat_hash, &needle); +} + static bool -handle_COMDAT (bfd * abfd, - flagword *sec_flags, - void * hdr, - const char *name, - asection *section) +fill_comdat_hash (bfd *abfd) { - struct internal_scnhdr *internal_s =3D (struct internal_scnhdr *) hdr; bfd_byte *esymstart, *esym, *esymend; - int seen_state =3D 0; - char *target_name =3D NULL; - - *sec_flags |=3D SEC_LINK_ONCE; =20 /* Unfortunately, the PE format stores essential information in the symbol table, of all places. We need to extract that @@ -895,190 +896,160 @@ handle_COMDAT (bfd * abfd, { char buf[SYMNMLEN + 1]; const char *symname; + flagword sec_flags =3D SEC_LINK_ONCE; =20 bfd_coff_swap_sym_in (abfd, esym, &isym); =20 - BFD_ASSERT (sizeof (internal_s->s_name) <=3D SYMNMLEN); - - if (isym.n_scnum =3D=3D section->target_index) + /* According to the MSVC documentation, the first + TWO entries with the section # are both of + interest to us. The first one is the "section + symbol" (section name). The second is the comdat + symbol name. Here, we've found the first + qualifying entry; we distinguish it from the + second with a state flag. + + In the case of gas-generated (at least until that + is fixed) .o files, it isn't necessarily the + second one. It may be some other later symbol. + + Since gas also doesn't follow MS conventions and + emits the section similar to .text$, where + is the name we're looking for, we + distinguish the two as follows: + + If the section name is simply a section name (no + $) we presume it's MS-generated, and look at + precisely the second symbol for the comdat name. + If the section name has a $, we assume it's + gas-generated, and look for (whatever + follows the $) as the comdat symbol. */ + + /* All 3 branches use this. */ + symname =3D _bfd_coff_internal_syment_name (abfd, &isym, buf); + + /* PR 17512 file: 078-11867-0.004 */ + if (symname =3D=3D NULL) { - /* According to the MSVC documentation, the first - TWO entries with the section # are both of - interest to us. The first one is the "section - symbol" (section name). The second is the comdat - symbol name. Here, we've found the first - qualifying entry; we distinguish it from the - second with a state flag. - - In the case of gas-generated (at least until that - is fixed) .o files, it isn't necessarily the - second one. It may be some other later symbol. - - Since gas also doesn't follow MS conventions and - emits the section similar to .text$, where - is the name we're looking for, we - distinguish the two as follows: - - If the section name is simply a section name (no - $) we presume it's MS-generated, and look at - precisely the second symbol for the comdat name. - If the section name has a $, we assume it's - gas-generated, and look for (whatever - follows the $) as the comdat symbol. */ - - /* All 3 branches use this. */ - symname =3D _bfd_coff_internal_syment_name (abfd, &isym, buf); - - /* PR 17512 file: 078-11867-0.004 */ - if (symname =3D=3D NULL) - { - _bfd_error_handler (_("%pB: unable to load COMDAT section name"), - abfd); - return false; - } + _bfd_error_handler (_("%pB: unable to load COMDAT section name"), + abfd); + continue; + } =20 - switch (seen_state) - { - case 0: - { - /* The first time we've seen the symbol. */ - union internal_auxent aux; - - /* If it isn't the stuff we're expecting, die; - The MS documentation is vague, but it - appears that the second entry serves BOTH - as the comdat symbol and the defining - symbol record (either C_STAT or C_EXT, - possibly with an aux entry with debug - information if it's a function.) It - appears the only way to find the second one - is to count. (On Intel, they appear to be - adjacent, but on Alpha, they have been - found separated.) - - Here, we think we've found the first one, - but there's some checking we can do to be - sure. */ - - if (! ((isym.n_sclass =3D=3D C_STAT - || isym.n_sclass =3D=3D C_EXT) - && BTYPE (isym.n_type) =3D=3D T_NULL - && isym.n_value =3D=3D 0)) - { - /* Malformed input files can trigger this test. - cf PR 21781. */ - _bfd_error_handler (_("%pB: error: unexpected symbol '%s' in COMDAT = section"), - abfd, symname); - return false; - } + union internal_auxent aux; =20 - /* FIXME LATER: MSVC generates section names - like .text for comdats. Gas generates - names like .text$foo__Fv (in the case of a - function). See comment above for more. */ + struct comdat_hash_entry needle; + needle.target_index =3D isym.n_scnum; =20 - if (isym.n_sclass =3D=3D C_STAT && strcmp (name, symname) !=3D 0) - /* xgettext:c-format */ - _bfd_error_handler (_("%pB: warning: COMDAT symbol '%s'" - " does not match section name '%s'"), - abfd, symname, name); - - /* This is the section symbol. */ - seen_state =3D 1; - target_name =3D strchr (name, '$'); - if (target_name !=3D NULL) - { - /* Gas mode. */ - seen_state =3D 2; - /* Skip the `$'. */ - target_name +=3D 1; - } + void **slot + =3D htab_find_slot (pe_data (abfd)->comdat_hash, &needle, INSERT); + if (slot =3D=3D NULL) + return false; =20 - if (isym.n_numaux =3D=3D 0) - aux.x_scn.x_comdat =3D 0; - else - { - /* PR 17512: file: e2cfe54f. */ - if (esym + bfd_coff_symesz (abfd) >=3D esymend) - { - /* xgettext:c-format */ - _bfd_error_handler (_("%pB: warning: no symbol for" - " section '%s' found"), - abfd, symname); - break; - } - bfd_coff_swap_aux_in (abfd, esym + bfd_coff_symesz (abfd), - isym.n_type, isym.n_sclass, - 0, isym.n_numaux, &aux); - } + if (*slot =3D=3D NULL) + { + if (isym.n_numaux =3D=3D 0) + aux.x_scn.x_comdat =3D 0; + else + { + /* PR 17512: file: e2cfe54f. */ + if (esym + bfd_coff_symesz (abfd) >=3D esymend) + { + /* xgettext:c-format */ + _bfd_error_handler (_ ("%pB: warning: no symbol for" + " section '%s' found"), + abfd, symname); + continue; + } + bfd_coff_swap_aux_in (abfd, (esym + bfd_coff_symesz (abfd)), + isym.n_type, isym.n_sclass, 0, + isym.n_numaux, &aux); + } =20 - /* FIXME: Microsoft uses NODUPLICATES and - ASSOCIATIVE, but gnu uses ANY and - SAME_SIZE. Unfortunately, gnu doesn't do - the comdat symbols right. So, until we can - fix it to do the right thing, we are - temporarily disabling comdats for the MS - types (they're used in DLLs and C++, but we - don't support *their* C++ libraries anyway - - DJ. */ - - /* Cygwin does not follow the MS style, and - uses ANY and SAME_SIZE where NODUPLICATES - and ASSOCIATIVE should be used. For - Interix, we just do the right thing up - front. */ - - switch (aux.x_scn.x_comdat) - { - case IMAGE_COMDAT_SELECT_NODUPLICATES: + /* FIXME: Microsoft uses NODUPLICATES and + ASSOCIATIVE, but gnu uses ANY and + SAME_SIZE. Unfortunately, gnu doesn't do + the comdat symbols right. So, until we can + fix it to do the right thing, we are + temporarily disabling comdats for the MS + types (they're used in DLLs and C++, but we + don't support *their* C++ libraries anyway + - DJ. */ + + /* Cygwin does not follow the MS style, and + uses ANY and SAME_SIZE where NODUPLICATES + and ASSOCIATIVE should be used. For + Interix, we just do the right thing up + front. */ + + switch (aux.x_scn.x_comdat) + { + case IMAGE_COMDAT_SELECT_NODUPLICATES: #ifdef STRICT_PE_FORMAT - *sec_flags |=3D SEC_LINK_DUPLICATES_ONE_ONLY; + sec_flags |=3D SEC_LINK_DUPLICATES_ONE_ONLY; #else - *sec_flags &=3D ~SEC_LINK_ONCE; + sec_flags &=3D ~SEC_LINK_ONCE; #endif - break; + break; =20 - case IMAGE_COMDAT_SELECT_ANY: - *sec_flags |=3D SEC_LINK_DUPLICATES_DISCARD; - break; + case IMAGE_COMDAT_SELECT_ANY: + sec_flags |=3D SEC_LINK_DUPLICATES_DISCARD; + break; =20 - case IMAGE_COMDAT_SELECT_SAME_SIZE: - *sec_flags |=3D SEC_LINK_DUPLICATES_SAME_SIZE; - break; + case IMAGE_COMDAT_SELECT_SAME_SIZE: + sec_flags |=3D SEC_LINK_DUPLICATES_SAME_SIZE; + break; =20 - case IMAGE_COMDAT_SELECT_EXACT_MATCH: - /* Not yet fully implemented ??? */ - *sec_flags |=3D SEC_LINK_DUPLICATES_SAME_CONTENTS; - break; + case IMAGE_COMDAT_SELECT_EXACT_MATCH: + /* Not yet fully implemented ??? */ + sec_flags |=3D SEC_LINK_DUPLICATES_SAME_CONTENTS; + break; =20 - /* debug$S gets this case; other - implications ??? */ + /* debug$S gets this case; other + implications ??? */ =20 - /* There may be no symbol... we'll search - the whole table... Is this the right - place to play this game? Or should we do - it when reading it in. */ - case IMAGE_COMDAT_SELECT_ASSOCIATIVE: + /* There may be no symbol... we'll search + the whole table... Is this the right + place to play this game? Or should we do + it when reading it in. */ + case IMAGE_COMDAT_SELECT_ASSOCIATIVE: #ifdef STRICT_PE_FORMAT - /* FIXME: This is not currently implemented. */ - *sec_flags |=3D SEC_LINK_DUPLICATES_DISCARD; + /* FIXME: This is not currently implemented. */ + sec_flags |=3D SEC_LINK_DUPLICATES_DISCARD; #else - *sec_flags &=3D ~SEC_LINK_ONCE; + sec_flags &=3D ~SEC_LINK_ONCE; #endif - break; + break; =20 - default: /* 0 means "no symbol" */ - /* debug$F gets this case; other - implications ??? */ - *sec_flags |=3D SEC_LINK_DUPLICATES_DISCARD; - break; - } - } + default: /* 0 means "no symbol" */ + /* debug$F gets this case; other + implications ??? */ + sec_flags |=3D SEC_LINK_DUPLICATES_DISCARD; break; + } + + *slot =3D bfd_zmalloc (sizeof (struct comdat_hash_entry)); + if (*slot =3D=3D NULL) + return false; + struct comdat_hash_entry *newentry =3D *slot; + newentry->sec_flags =3D sec_flags; + newentry->symname =3D bfd_strdup (symname); + newentry->target_index =3D isym.n_scnum; + newentry->isym =3D isym; + newentry->comdat_symbol =3D -1; + } + else + { + struct comdat_hash_entry *entry =3D *slot; =20 - case 2: + if (entry->comdat_symbol !=3D -1) + continue; + + char *target_name =3D strchr (entry->symname, '$'); + if (target_name !=3D NULL) + { /* Gas mode: the first matching on partial name. */ =20 + target_name +=3D 1; #ifndef TARGET_UNDERSCORE #define TARGET_UNDERSCORE 0 #endif @@ -1089,34 +1060,104 @@ handle_COMDAT (bfd * abfd, /* Not the name we're looking for */ continue; } - /* Fall through. */ - case 1: - /* MSVC mode: the lexically second symbol (or - drop through from the above). */ - { - /* This must the second symbol with the - section #. It is the actual symbol name. - Intel puts the two adjacent, but Alpha (at - least) spreads them out. */ + } + /* MSVC mode: the lexically second symbol (or + drop through from the above). */ + /* This must the second symbol with the + section #. It is the actual symbol name. + Intel puts the two adjacent, but Alpha (at + least) spreads them out. */ + + entry->comdat_symbol =3D (esym - esymstart) / bfd_coff_symesz (abfd); + entry->comdat_name =3D bfd_strdup (symname); + } + } =20 - struct coff_comdat_info *comdat; - size_t len =3D strlen (symname) + 1; + return true; +} =20 - comdat =3D bfd_alloc (abfd, sizeof (*comdat) + len); - if (comdat =3D=3D NULL) - return false; +static bool +insert_coff_comdat_info (bfd *abfd, asection *section, const char *symname, + long symbol) +{ + struct coff_comdat_info *comdat; + size_t len =3D strlen (symname) + 1; =20 - coff_section_data (abfd, section)->comdat =3D comdat; - comdat->symbol =3D (esym - esymstart) / bfd_coff_symesz (abfd); - char *newname =3D (char *) (comdat + 1); - comdat->name =3D newname; - memcpy (newname, symname, len); - return true; - } - } + comdat =3D bfd_alloc (abfd, sizeof (*comdat) + len); + if (comdat =3D=3D NULL) + return false; + + coff_section_data (abfd, section)->comdat =3D comdat; + comdat->symbol =3D symbol; + char *newname =3D (char *) (comdat + 1); + comdat->name =3D newname; + memcpy (newname, symname, len); + return true; +} + +static bool +handle_COMDAT (bfd *abfd, flagword *sec_flags, const char *name, + asection *section) +{ + if (htab_elements (pe_data (abfd)->comdat_hash) =3D=3D 0) + if (! fill_comdat_hash (abfd)) + return false; + + struct comdat_hash_entry *found + =3D find_flags (pe_data (abfd)->comdat_hash, section->target_index); + if (found !=3D NULL) + { + + struct internal_syment isym =3D found->isym; + + /* If it isn't the stuff we're expecting, die; + The MS documentation is vague, but it + appears that the second entry serves BOTH + as the comdat symbol and the defining + symbol record (either C_STAT or C_EXT, + possibly with an aux entry with debug + information if it's a function.) It + appears the only way to find the second one + is to count. (On Intel, they appear to be + adjacent, but on Alpha, they have been + found separated.) + + Here, we think we've found the first one, + but there's some checking we can do to be + sure. */ + + if (! ((isym.n_sclass =3D=3D C_STAT || isym.n_sclass =3D=3D C_EXT) + && BTYPE (isym.n_type) =3D=3D T_NULL && isym.n_value =3D=3D 0)) + { + /* Malformed input files can trigger this test. + cf PR 21781. */ + _bfd_error_handler ( + _ ("%pB: error: unexpected symbol '%s' in COMDAT section"), abfd, + found->symname); + return false; } - } =20 + /* FIXME LATER: MSVC generates section names + like .text for comdats. Gas generates + names like .text$foo__Fv (in the case of a + function). See comment above for more. */ + + if (isym.n_sclass =3D=3D C_STAT && strcmp (name, found->symname) != =3D 0) + /* xgettext:c-format */ + _bfd_error_handler (_ ("%pB: warning: COMDAT symbol '%s'" + " does not match section name '%s'"), + abfd, found->symname, name); + + if (found->comdat_symbol !=3D -1) + { + if (! insert_coff_comdat_info (abfd, section, found->comdat_name, + found->comdat_symbol)) + return false; + } + *sec_flags =3D *sec_flags | found->sec_flags; + return true; + } + *sec_flags =3D *sec_flags | SEC_LINK_ONCE; return true; } =20 @@ -1268,7 +1309,7 @@ styp_to_sec_flags (bfd *abfd, break; case IMAGE_SCN_LNK_COMDAT: /* COMDAT gets very special treatment. */ - if (!handle_COMDAT (abfd, &sec_flags, hdr, name, section)) + if (!handle_COMDAT (abfd, &sec_flags, name, section)) result =3D false; break; default: diff --git a/bfd/coffgen.c b/bfd/coffgen.c index 1ec9a5185c7..bf9633a2b33 100644 --- a/bfd/coffgen.c +++ b/bfd/coffgen.c @@ -293,6 +293,8 @@ coff_object_cleanup (bfd *abfd) htab_delete (td->section_by_index); if (td->section_by_target_index) htab_delete (td->section_by_target_index); + if (obj_pe (abfd) && pe_data (abfd)->comdat_hash) + htab_delete (pe_data (abfd)->comdat_hash); } } } @@ -3292,6 +3294,12 @@ _bfd_coff_free_cached_info (bfd *abfd) tdata->section_by_target_index =3D NULL; } =20 + if (obj_pe (abfd) && pe_data (abfd)->comdat_hash) + { + htab_delete (pe_data (abfd)->comdat_hash); + pe_data (abfd)->comdat_hash =3D NULL; + } + _bfd_dwarf2_cleanup_debug_info (abfd, &tdata->dwarf2_find_line_info); _bfd_stab_cleanup (abfd, &tdata->line_info); =20 diff --git a/bfd/libcoff-in.h b/bfd/libcoff-in.h index 4e2203656de..eacfcb3ec39 100644 --- a/bfd/libcoff-in.h +++ b/bfd/libcoff-in.h @@ -161,10 +161,22 @@ typedef struct pe_tdata const char *style; asection *sec; } build_id; + + htab_t comdat_hash; } pe_data_type; =20 #define pe_data(bfd) ((bfd)->tdata.pe_obj_data) =20 +struct comdat_hash_entry +{ + int target_index; + struct internal_syment isym; + char *symname; + flagword sec_flags; + char *comdat_name; + long comdat_symbol; +}; + /* Tdata for XCOFF files. */ =20 struct xcoff_tdata diff --git a/bfd/libcoff.h b/bfd/libcoff.h index b53c3117f50..ad6138e6e3c 100644 --- a/bfd/libcoff.h +++ b/bfd/libcoff.h @@ -165,10 +165,22 @@ typedef struct pe_tdata const char *style; asection *sec; } build_id; + + htab_t comdat_hash; } pe_data_type; =20 #define pe_data(bfd) ((bfd)->tdata.pe_obj_data) =20 +struct comdat_hash_entry +{ + int target_index; + struct internal_syment isym; + char *symname; + flagword sec_flags; + char *comdat_name; + long comdat_symbol; +}; + /* Tdata for XCOFF files. */ =20 struct xcoff_tdata diff --git a/bfd/peicode.h b/bfd/peicode.h index 2cd020e699f..5ac6b0dc53f 100644 --- a/bfd/peicode.h +++ b/bfd/peicode.h @@ -255,6 +255,21 @@ coff_swap_scnhdr_in (bfd * abfd, void * ext, void * in) #endif } =20 +static hashval_t +htab_hash_flags (const void *entry) +{ + const struct comdat_hash_entry *fe =3D entry; + return fe->target_index; +} + +static int +htab_eq_flags (const void *e1, const void *e2) +{ + const struct comdat_hash_entry *fe1 =3D e1; + const struct comdat_hash_entry *fe2 =3D e2; + return fe1->target_index =3D=3D fe2->target_index; +} + static bool pe_mkobject (bfd * abfd) { @@ -291,6 +306,8 @@ pe_mkobject (bfd * abfd) pe->dos_message[14] =3D 0x24; pe->dos_message[15] =3D 0x0; =20 + pe->comdat_hash =3D htab_create (10, htab_hash_flags, htab_eq_flags, NUL= L); + memset (& pe->pe_opthdr, 0, sizeof pe->pe_opthdr); =20 bfd_coff_long_section_names (abfd)