From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1039) id 8FF973858401; Wed, 3 Apr 2024 16:17:41 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 8FF973858401 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1712161061; bh=l0pTXaPWD5226X6hI+kZfNyiYfXbDPfdgavOWWR3+hg=; h=From:To:Subject:Date:From; b=p1P/y3nRbcQFJIlI6v3XCUAXU5kn2dI+Wnqch7UsEjU3/aA/xJze++n/JqWKjM6I8 4INBrbrj4o6zXjmj6Om94iS4jtzzqNGMyGyjU0LH9Tur+8JKDUH1j5AJCXB4ovvmsT s4q8cmvBeI96OWcRRirNXbB6XqLCpGRnjAHwuRtQ= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: H.J. Lu To: binutils-cvs@sourceware.org Subject: [binutils-gdb] elf: Use mmap to map in symbol and relocation tables X-Act-Checkin: binutils-gdb X-Git-Author: H.J. Lu X-Git-Refname: refs/heads/master X-Git-Oldrev: 584b30e4b3538a64d38bc1da9cfa09fc9355378a X-Git-Newrev: c6291d749aece753ae8dcba0a5baff2fe9598541 Message-Id: <20240403161741.8FF973858401@sourceware.org> Date: Wed, 3 Apr 2024 16:17:41 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=3Dbinutils-gdb.git;h=3Dc6291d749aec= e753ae8dcba0a5baff2fe9598541 commit c6291d749aece753ae8dcba0a5baff2fe9598541 Author: H.J. Lu Date: Sun Mar 3 08:44:01 2024 -0800 elf: Use mmap to map in symbol and relocation tables =20 Add _bfd_mmap_read_temporary to mmap in symbol tables and relocations whose sizes >=3D 4 * page size. For the final link, allocate an extern= al relocation buffer of 4 * page size to avoid using mmap and munmap on smaller relocation sections. Since _bfd_mmap_read_temporary allocates buffer as needed, its callers don't need to. =20 When mmap is used to map in all ELF sections, data to link the 3.5GB clang executable in LLVM 17 debug build on Linux/x86-64 with 32GB RAM is: =20 stdio mmap improvement user 84.79 85.27 -0.5% system 10.95 9.09 17% total 97.91 94.90 3% page faults 4837944 4033778 17% =20 and data to link the 275M cc1plus executable in GCC 14 stage 1 build is: =20 user 5.31 5.33 -0.4% system 0.86 0.76 12% total 6.19 6.13 1% page faults 361273 322491 11% =20 * elf.c (bfd_elf_get_elf_syms): Don't allocate buffer for exter= nal symbol table. Replace bfd_read with _bfd_mmap_read_temporary. * elflink.c (elf_link_read_relocs_from_section): Add 2 arguments to return mmap memory address and size. (_bfd_elf_link_info_read_relocs): Don't allocate buffer for external relocation information. Replace bfd_read with _bfd_mmap_read_temporary. (bfd_elf_final_link): Cache external relocations up to _bfd_minimum_mmap_size bytes when mmap is used. * libbfd.c (_bfd_mmap_read_temporary): New. * libbfd-in.h (_bfd_mmap_read_temporary): Likewise. * libbfd.h: Regenerated. Diff: --- bfd/elf.c | 31 +++++++++++++------------------ bfd/elflink.c | 50 +++++++++++++++++++++++++++----------------------- bfd/libbfd-in.h | 3 +++ bfd/libbfd.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ bfd/libbfd.h | 3 +++ 5 files changed, 98 insertions(+), 41 deletions(-) diff --git a/bfd/elf.c b/bfd/elf.c index 5d2996505f4..868abeccddb 100644 --- a/bfd/elf.c +++ b/bfd/elf.c @@ -460,19 +460,16 @@ bfd_elf_get_elf_syms (bfd *ibfd, goto out; } pos =3D symtab_hdr->sh_offset + symoffset * extsym_size; - if (extsym_buf =3D=3D NULL) - { - alloc_ext =3D bfd_malloc (amt); - extsym_buf =3D alloc_ext; - } - if (extsym_buf =3D=3D NULL - || bfd_seek (ibfd, pos, SEEK_SET) !=3D 0 - || bfd_read (extsym_buf, amt, ibfd) !=3D amt) + size_t alloc_ext_size =3D amt; + if (bfd_seek (ibfd, pos, SEEK_SET) !=3D 0 + || !_bfd_mmap_read_temporary (&extsym_buf, &alloc_ext_size, + &alloc_ext, ibfd, false)) { intsym_buf =3D NULL; goto out; } =20 + size_t alloc_extshndx_size =3D 0; if (shndx_hdr =3D=3D NULL || shndx_hdr->sh_size =3D=3D 0) extshndx_buf =3D NULL; else @@ -483,15 +480,13 @@ bfd_elf_get_elf_syms (bfd *ibfd, intsym_buf =3D NULL; goto out; } + alloc_extshndx_size =3D amt; pos =3D shndx_hdr->sh_offset + symoffset * sizeof (Elf_External_Sym_= Shndx); - if (extshndx_buf =3D=3D NULL) - { - alloc_extshndx =3D (Elf_External_Sym_Shndx *) bfd_malloc (amt); - extshndx_buf =3D alloc_extshndx; - } - if (extshndx_buf =3D=3D NULL - || bfd_seek (ibfd, pos, SEEK_SET) !=3D 0 - || bfd_read (extshndx_buf, amt, ibfd) !=3D amt) + if (bfd_seek (ibfd, pos, SEEK_SET) !=3D 0 + || !_bfd_mmap_read_temporary ((void **) &extshndx_buf, + &alloc_extshndx_size, + (void **) &alloc_extshndx, + ibfd, false)) { intsym_buf =3D NULL; goto out; @@ -530,8 +525,8 @@ bfd_elf_get_elf_syms (bfd *ibfd, } =20 out: - free (alloc_ext); - free (alloc_extshndx); + _bfd_munmap_readonly_temporary (alloc_ext, alloc_ext_size); + _bfd_munmap_readonly_temporary (alloc_extshndx, alloc_extshndx_size); =20 return intsym_buf; } diff --git a/bfd/elflink.c b/bfd/elflink.c index 2991e06fe46..7e9f4c7f0b0 100644 --- a/bfd/elflink.c +++ b/bfd/elflink.c @@ -2644,8 +2644,11 @@ _bfd_elf_link_assign_sym_version (struct elf_link_ha= sh_entry *h, void *data) may be either a REL or a RELA section. The relocations are translated into RELA relocations and stored in INTERNAL_RELOCS, which should have already been allocated to contain enough space. - The EXTERNAL_RELOCS are a buffer where the external form of the - relocations should be stored. + The *EXTERNAL_RELOCS_P are a buffer where the external form of the + relocations should be stored. If *EXTERNAL_RELOCS_ADDR is NULL, + *EXTERNAL_RELOCS_ADDR and *EXTERNAL_RELOCS_SIZE returns the mmap + memory address and size. Otherwise, *EXTERNAL_RELOCS_ADDR is + unchanged and *EXTERNAL_RELOCS_SIZE returns 0. =20 Returns FALSE if something goes wrong. */ =20 @@ -2653,7 +2656,8 @@ static bool elf_link_read_relocs_from_section (bfd *abfd, const asection *sec, Elf_Internal_Shdr *shdr, - void *external_relocs, + void **external_relocs_addr, + size_t *external_relocs_size, Elf_Internal_Rela *internal_relocs) { const struct elf_backend_data *bed; @@ -2663,13 +2667,17 @@ elf_link_read_relocs_from_section (bfd *abfd, Elf_Internal_Rela *irela; Elf_Internal_Shdr *symtab_hdr; size_t nsyms; + void *external_relocs =3D *external_relocs_addr; =20 /* Position ourselves at the start of the section. */ if (bfd_seek (abfd, shdr->sh_offset, SEEK_SET) !=3D 0) return false; =20 /* Read the relocations. */ - if (bfd_read (external_relocs, shdr->sh_size, abfd) !=3D shdr->sh_size) + *external_relocs_size =3D shdr->sh_size; + if (!_bfd_mmap_read_temporary (&external_relocs, + external_relocs_size, + external_relocs_addr, abfd, true)) return false; =20 symtab_hdr =3D &elf_tdata (abfd)->symtab_hdr; @@ -2754,6 +2762,7 @@ _bfd_elf_link_info_read_relocs (bfd *abfd, bool keep_memory) { void *alloc1 =3D NULL; + size_t alloc1_size; Elf_Internal_Rela *alloc2 =3D NULL; const struct elf_backend_data *bed =3D get_elf_backend_data (abfd); struct bfd_elf_section_data *esdo =3D elf_section_data (o); @@ -2782,26 +2791,12 @@ _bfd_elf_link_info_read_relocs (bfd *abfd, goto error_return; } =20 - if (external_relocs =3D=3D NULL) - { - bfd_size_type size =3D 0; - - if (esdo->rel.hdr) - size +=3D esdo->rel.hdr->sh_size; - if (esdo->rela.hdr) - size +=3D esdo->rela.hdr->sh_size; - - alloc1 =3D bfd_malloc (size); - if (alloc1 =3D=3D NULL) - goto error_return; - external_relocs =3D alloc1; - } - + alloc1 =3D external_relocs; internal_rela_relocs =3D internal_relocs; if (esdo->rel.hdr) { if (!elf_link_read_relocs_from_section (abfd, o, esdo->rel.hdr, - external_relocs, + &alloc1, &alloc1_size, internal_relocs)) goto error_return; external_relocs =3D (((bfd_byte *) external_relocs) @@ -2812,7 +2807,7 @@ _bfd_elf_link_info_read_relocs (bfd *abfd, =20 if (esdo->rela.hdr && (!elf_link_read_relocs_from_section (abfd, o, esdo->rela.hdr, - external_relocs, + &alloc1, &alloc1_size, internal_rela_relocs))) goto error_return; =20 @@ -2820,7 +2815,7 @@ _bfd_elf_link_info_read_relocs (bfd *abfd, if (keep_memory) esdo->relocs =3D internal_relocs; =20 - free (alloc1); + _bfd_munmap_readonly_temporary (alloc1, alloc1_size); =20 /* Don't free alloc2, since if it was allocated we are passing it back (under the name of internal_relocs). */ @@ -2828,7 +2823,7 @@ _bfd_elf_link_info_read_relocs (bfd *abfd, return internal_relocs; =20 error_return: - free (alloc1); + _bfd_munmap_readonly_temporary (alloc1, alloc1_size); if (alloc2 !=3D NULL) { if (keep_memory) @@ -12446,7 +12441,14 @@ bfd_elf_final_link (bfd *abfd, struct bfd_link_inf= o *info) section, so that we know the sizes of the reloc sections. We also figure out some maximum sizes. */ max_contents_size =3D 0; +#ifdef USE_MMAP + /* Mmap is used only if section size >=3D the minimum mmap section + size. max_external_reloc_size covers all relocation sections + smaller than the minimum mmap section size. */ + max_external_reloc_size =3D _bfd_minimum_mmap_size; +#else max_external_reloc_size =3D 0; +#endif max_internal_reloc_count =3D 0; max_sym_count =3D 0; max_sym_shndx_count =3D 0; @@ -12535,8 +12537,10 @@ bfd_elf_final_link (bfd *abfd, struct bfd_link_inf= o *info) if (esdi->rela.hdr !=3D NULL) ext_size +=3D esdi->rela.hdr->sh_size; =20 +#ifndef USE_MMAP if (ext_size > max_external_reloc_size) max_external_reloc_size =3D ext_size; +#endif if (sec->reloc_count > max_internal_reloc_count) max_internal_reloc_count =3D sec->reloc_count; } diff --git a/bfd/libbfd-in.h b/bfd/libbfd-in.h index c5a79cf932c..889b221a950 100644 --- a/bfd/libbfd-in.h +++ b/bfd/libbfd-in.h @@ -905,6 +905,9 @@ extern void _bfd_munmap_readonly_temporary #define _bfd_munmap_readonly_temporary(ptr, rsize) free (ptr) #endif =20 +extern bool _bfd_mmap_read_temporary + (void **, size_t *, void **, bfd *, bool) ATTRIBUTE_HIDDEN; + static inline void * _bfd_malloc_and_read (bfd *abfd, bfd_size_type asize, bfd_size_type rsize) { diff --git a/bfd/libbfd.c b/bfd/libbfd.c index e5147a29d69..869f0ed5c66 100644 --- a/bfd/libbfd.c +++ b/bfd/libbfd.c @@ -1174,6 +1174,58 @@ _bfd_mmap_readonly_persistent (bfd *abfd, size_t rsi= ze) } #endif =20 +/* Attempt to read *SIZE_P bytes from ABFD's iostream to *DATA_P. + Return true if the full the amount has been read. If *DATA_P is + NULL, mmap should be used, return the memory address at the + current offset in *DATA_P as well as return mmap address and size + in *MMAP_BASE and *SIZE_P. Otherwise, return NULL in *MMAP_BASE + and 0 in *SIZE_P. If FINAL_LINK is true, this is called from + elf_link_read_relocs_from_section. */ + +bool +_bfd_mmap_read_temporary (void **data_p, size_t *size_p, + void **mmap_base, bfd *abfd, + bool final_link ATTRIBUTE_UNUSED) +{ + void *data =3D *data_p; + size_t size =3D *size_p; + +#ifdef USE_MMAP + /* NB: When FINAL_LINK is true, the size of the preallocated buffer + is _bfd_minimum_mmap_size and use mmap if the data size >=3D + _bfd_minimum_mmap_size. Otherwise, use mmap if ABFD isn't an IR + input or the data size >=3D _bfd_minimum_mmap_size. */ + bool use_mmmap; + bool mmap_size =3D size >=3D _bfd_minimum_mmap_size; + if (final_link) + use_mmmap =3D mmap_size; + else + use_mmmap =3D (mmap_size + && data =3D=3D NULL + && (abfd->flags & BFD_PLUGIN) =3D=3D 0); + if (use_mmmap) + { + data =3D _bfd_mmap_readonly_temporary (abfd, size, mmap_base, + size_p); + if (data =3D=3D NULL || data =3D=3D MAP_FAILED) + abort (); + *data_p =3D data; + return true; + } +#endif + + if (data =3D=3D NULL) + { + data =3D bfd_malloc (size); + if (data =3D=3D NULL) + return false; + *data_p =3D data; + } + *mmap_base =3D NULL; + *size_p =3D 0; + return bfd_read (data, size, abfd) =3D=3D size; +} + /* Default implementation */ =20 bool diff --git a/bfd/libbfd.h b/bfd/libbfd.h index 0caf0f39e61..0676f461382 100644 --- a/bfd/libbfd.h +++ b/bfd/libbfd.h @@ -911,6 +911,9 @@ extern void _bfd_munmap_readonly_temporary #define _bfd_munmap_readonly_temporary(ptr, rsize) free (ptr) #endif =20 +extern bool _bfd_mmap_read_temporary + (void **, size_t *, void **, bfd *, bool) ATTRIBUTE_HIDDEN; + static inline void * _bfd_malloc_and_read (bfd *abfd, bfd_size_type asize, bfd_size_type rsize) {