[PATCH 1/3] Faster string merging

public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed

* [PATCH 1/3] Faster string merging
@ 2023-01-18 17:31 Michael Matz
  0 siblings, 0 replies; only message in thread
From: Michael Matz @ 2023-01-18 17:31 UTC (permalink / raw)
  To: binutils

* use power-of-two hash table
* use better hash function (hashing 32bits at once and with better
  mixing characteristics)
* use input-offset-to-entry maps instead of retaining full input
  contents for lookup time
* don't reread SEC_MERGE section multiple times
* care for cache behaviour for the hot lookup routine

The overall effect is less usage in libz and much faster string merging
itself.  On a debug-info-enabled cc1 the effect at the time of this
writing on the machine I used was going from 14400 perf samples to 9300
perf samples or from 3.7 seconds to 2.4 seconds, i.e. about 33% .
---
 bfd/config.in    |  15 +
 bfd/configure    | 226 ++++++++++++++
 bfd/configure.ac |   2 +
 bfd/elflink.c    |   7 +
 bfd/merge.c      | 793 +++++++++++++++++++++++++++++------------------
 5 files changed, 739 insertions(+), 304 deletions(-)

diff --git a/bfd/config.in b/bfd/config.in
index e5132df393c..7ff3eeebf8b 100644
--- a/bfd/config.in
+++ b/bfd/config.in
@@ -7,6 +7,9 @@
 #endif
 #define __CONFIG_H__ 1
 
+/* Define if building universal (internal helper macro) */
+#undef AC_APPLE_UNIVERSAL_BUILD
+
 /* Name of host specific core header file to include in elf.c. */
 #undef CORE_HEADER
 
@@ -325,6 +328,18 @@
 /* Version number of package */
 #undef VERSION
 
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+#  undef WORDS_BIGENDIAN
+# endif
+#endif
+
 /* Enable large inode numbers on Mac OS X 10.5.  */
 #ifndef _DARWIN_USE_64_BIT_INODE
 # define _DARWIN_USE_64_BIT_INODE 1
diff --git a/bfd/configure b/bfd/configure
index 5af8af2ee37..41d280ef461 100755
--- a/bfd/configure
+++ b/bfd/configure
@@ -12349,6 +12349,231 @@ fi
 
 
 
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
+$as_echo_n "checking whether byte ordering is bigendian... " >&6; }
+if ${ac_cv_c_bigendian+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_c_bigendian=unknown
+    # See if we're dealing with a universal compiler.
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifndef __APPLE_CC__
+	       not a universal capable compiler
+	     #endif
+	     typedef int dummy;
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+	# Check for potential -arch flags.  It is not universal unless
+	# there are at least two -arch flags with different values.
+	ac_arch=
+	ac_prev=
+	for ac_word in $CC $CFLAGS $CPPFLAGS $LDFLAGS; do
+	 if test -n "$ac_prev"; then
+	   case $ac_word in
+	     i?86 | x86_64 | ppc | ppc64)
+	       if test -z "$ac_arch" || test "$ac_arch" = "$ac_word"; then
+		 ac_arch=$ac_word
+	       else
+		 ac_cv_c_bigendian=universal
+		 break
+	       fi
+	       ;;
+	   esac
+	   ac_prev=
+	 elif test "x$ac_word" = "x-arch"; then
+	   ac_prev=arch
+	 fi
+       done
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+    if test $ac_cv_c_bigendian = unknown; then
+      # See if sys/param.h defines the BYTE_ORDER macro.
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <sys/types.h>
+	     #include <sys/param.h>
+
+int
+main ()
+{
+#if ! (defined BYTE_ORDER && defined BIG_ENDIAN \
+		     && defined LITTLE_ENDIAN && BYTE_ORDER && BIG_ENDIAN \
+		     && LITTLE_ENDIAN)
+	      bogus endian macros
+	     #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  # It does; now see whether it defined to BIG_ENDIAN or not.
+	 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <sys/types.h>
+		#include <sys/param.h>
+
+int
+main ()
+{
+#if BYTE_ORDER != BIG_ENDIAN
+		 not big endian
+		#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_c_bigendian=yes
+else
+  ac_cv_c_bigendian=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+    fi
+    if test $ac_cv_c_bigendian = unknown; then
+      # See if <limits.h> defines _LITTLE_ENDIAN or _BIG_ENDIAN (e.g., Solaris).
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <limits.h>
+
+int
+main ()
+{
+#if ! (defined _LITTLE_ENDIAN || defined _BIG_ENDIAN)
+	      bogus endian macros
+	     #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  # It does; now see whether it defined to _BIG_ENDIAN or not.
+	 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <limits.h>
+
+int
+main ()
+{
+#ifndef _BIG_ENDIAN
+		 not big endian
+		#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_c_bigendian=yes
+else
+  ac_cv_c_bigendian=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+    fi
+    if test $ac_cv_c_bigendian = unknown; then
+      # Compile a test program.
+      if test "$cross_compiling" = yes; then :
+  # Try to guess by grepping values from an object file.
+	 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+short int ascii_mm[] =
+		  { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 };
+		short int ascii_ii[] =
+		  { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 };
+		int use_ascii (int i) {
+		  return ascii_mm[i] + ascii_ii[i];
+		}
+		short int ebcdic_ii[] =
+		  { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 };
+		short int ebcdic_mm[] =
+		  { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 };
+		int use_ebcdic (int i) {
+		  return ebcdic_mm[i] + ebcdic_ii[i];
+		}
+		extern int foo;
+
+int
+main ()
+{
+return use_ascii (foo) == use_ebcdic (foo);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  if grep BIGenDianSyS conftest.$ac_objext >/dev/null; then
+	      ac_cv_c_bigendian=yes
+	    fi
+	    if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then
+	      if test "$ac_cv_c_bigendian" = unknown; then
+		ac_cv_c_bigendian=no
+	      else
+		# finding both strings is unlikely to happen, but who knows?
+		ac_cv_c_bigendian=unknown
+	      fi
+	    fi
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main ()
+{
+
+	     /* Are we little or big endian?  From Harbison&Steele.  */
+	     union
+	     {
+	       long int l;
+	       char c[sizeof (long int)];
+	     } u;
+	     u.l = 1;
+	     return u.c[sizeof (long int) - 1] == 1;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+  ac_cv_c_bigendian=no
+else
+  ac_cv_c_bigendian=yes
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+    fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_bigendian" >&5
+$as_echo "$ac_cv_c_bigendian" >&6; }
+ case $ac_cv_c_bigendian in #(
+   yes)
+     $as_echo "#define WORDS_BIGENDIAN 1" >>confdefs.h
+;; #(
+   no)
+      ;; #(
+   universal)
+
+$as_echo "#define AC_APPLE_UNIVERSAL_BUILD 1" >>confdefs.h
+
+     ;; #(
+   *)
+     as_fn_error $? "unknown endianness
+ presetting ac_cv_c_bigendian=no (or yes) will help" "$LINENO" 5 ;;
+ esac
+
+
 host64=false
 target64=false
 bfd_default_target_size=32
@@ -15481,6 +15706,7 @@ if test -z "${INSTALL_LIBBFD_TRUE}" && test -z "${INSTALL_LIBBFD_FALSE}"; then
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
 fi
 
+
 : "${CONFIG_STATUS=./config.status}"
 ac_write_fail=0
 ac_clean_files_save=$ac_clean_files
diff --git a/bfd/configure.ac b/bfd/configure.ac
index 82a3d1f832e..f044616f4d9 100644
--- a/bfd/configure.ac
+++ b/bfd/configure.ac
@@ -193,6 +193,8 @@ AM_CONDITIONAL(GENINSRC_NEVER, false)
 AM_INSTALL_LIBBFD
 AC_EXEEXT
 
+AC_C_BIGENDIAN
+
 host64=false
 target64=false
 bfd_default_target_size=32
diff --git a/bfd/elflink.c b/bfd/elflink.c
index 7bf337c7d44..80e3a8d053d 100644
--- a/bfd/elflink.c
+++ b/bfd/elflink.c
@@ -11361,6 +11361,13 @@ elf_link_input_bfd (struct elf_final_link_info *flinfo, bfd *input_bfd)
 	      contents = flinfo->contents;
 	    }
 	}
+      else if (!(o->flags & SEC_RELOC)
+	       && !bed->elf_backend_write_section
+	       && o->sec_info_type == SEC_INFO_TYPE_MERGE)
+	/* A MERGE section that has no relocations doesn't need the
+	   contents anymore, they have been recorded earlier.  Except
+	   if the backend has special provisions for writing sections.  */
+	contents = NULL;
       else
 	{
 	  contents = flinfo->contents;
diff --git a/bfd/merge.c b/bfd/merge.c
index 23e030cc99b..f21154dcd45 100644
--- a/bfd/merge.c
+++ b/bfd/merge.c
@@ -28,16 +28,29 @@
 #include "bfd.h"
 #include "elf-bfd.h"
 #include "libbfd.h"
-#include "hashtab.h"
+#include "objalloc.h"
 #include "libiberty.h"
 
-struct sec_merge_sec_info;
+/* We partition all mergable input sections into sets of similar
+   characteristics.  These sets are the unit of merging.  All content
+   of the input sections is scanned and inserted into a hash table.
+   We also remember an input-offset to entry mapping per input section, but
+   the content itself is removed.  After everything is read in we assign
+   output offsets to all hash entries, and when relocations are processed we
+   lookup the given input offset per input-section, get the matching entry
+   and its output offset (possibly adjusted for offset pointing into the
+   middle of an entry).
+
+   The input-offset-to-entry mapping (in map_ofs/map) is sorted, so in principle
+   we could binary search it, but that's not cache-friendly and it's faster
+   to add another lookup structure that gets us very near the correct
+   entry in just one step (that's what ofstolowbound is for) and do a linear
+   search from there.  */
 
 /* An entry in the section merge hash table.  */
 
 struct sec_merge_hash_entry
 {
-  struct bfd_hash_entry root;
   /* Length of this entry.  This includes the zero terminator.  */
   unsigned int len;
   /* Start of this string needs to be aligned to
@@ -50,10 +63,9 @@ struct sec_merge_hash_entry
     /* Entry this is a suffix of (if alignment is 0).  */
     struct sec_merge_hash_entry *suffix;
   } u;
-  /* Which section is it in.  */
-  struct sec_merge_sec_info *secinfo;
-  /* Next entity in the hash table.  */
+  /* Next entity in the hash table (in order of entering).  */
   struct sec_merge_hash_entry *next;
+  char str[1];
 };
 
 /* The section merge hash table.  */
@@ -71,18 +83,51 @@ struct sec_merge_hash
   unsigned int entsize;
   /* Are entries fixed size or zero terminated strings?  */
   bool strings;
+  /* struct-of-array variant of all entries in the hash-table: */
+  unsigned int nbuckets;
+  /* We keep hash-code and length of entry together in a separate
+     array in such a way that it can be checked with just a single memory
+     reference.  In this way we don't need indirect access to the entries
+     in the normal case.  keys_lens[i] is 'hashcode << 32) | len' for entry
+     i (which is pointed to be values[i]).  */
+  uint64_t *key_lens;
+  struct sec_merge_hash_entry **values;
 };
 
+struct sec_merge_sec_info;
+
+/* Information per merged blob.  This is the unit of merging and is
+   related to (multiple) input sections of similar characteristics
+   (alignment, entity size, strings or blobs).  */
 struct sec_merge_info
 {
   /* Chain of sec_merge_infos.  */
   struct sec_merge_info *next;
-  /* Chain of sec_merge_sec_infos.  */
+  /* Chain of sec_merge_sec_infos.  This first one will be the representative
+     section that conceptually collects all merged content.  */
   struct sec_merge_sec_info *chain;
+  struct sec_merge_sec_info **last;
   /* A hash table used to hold section content.  */
   struct sec_merge_hash *htab;
 };
 
+/* Offset into input mergable sections are represented by this type.
+   Note how doesn't support crazy large mergable sections.  */
+typedef uint32_t mapofs_type;
+
+/* Given a sec_merge_sec_info S this gives the input offset of the IDX's
+   recorded entry.  */
+#define MAP_OFS(S,IDX) (S)->map_ofs[IDX]
+/* And this gives the output offset (in the merged blob representing
+   this S.  */
+#define MAP_IDX(S,IDX) (S)->map[IDX].idx
+/* For quick lookup of output offset given an input offset we store
+   an array mapping intput-offset / OFSDIV to entry index.
+   16 is better than 8, 32 is roughly same as 16, but uses less memory, so
+   we use that. */
+#define OFSDIV 32
+
+/* Information per input merge section.  */
 struct sec_merge_sec_info
 {
   /* Chain of sec_merge_sec_infos.  */
@@ -91,76 +136,212 @@ struct sec_merge_sec_info
   asection *sec;
   /* Pointer to merge_info pointing to us.  */
   void **psecinfo;
-  /* A hash table used to hold section content.  */
-  struct sec_merge_hash *htab;
+  /* The merge entity this is a part of.  */
+  struct sec_merge_info *sinfo;
+  /* The section associated with sinfo (i.e. the representative section).
+     Same as sinfo->chain->sec, but faster to access in the hot function.  */
+  asection *reprsec;
   /* First string in this section.  */
   struct sec_merge_hash_entry *first_str;
-  /* Original section content.  */
-  unsigned char contents[1];
+  /* Sparse mapping from input offset to entry covering that offset:  */
+  unsigned int noffsetmap;  /* Number of these mappings.  */
+  mapofs_type *map_ofs;     /* Input offset.  */
+  union {
+      struct sec_merge_hash_entry *entry;  /* Covering hash entry ... */
+      bfd_size_type idx;                   /* ... or destination offset.  */
+  } *map;
+  /* Quick access: index into map_ofs[].  ofstolowbound[o / OFSDIV]=I is
+     such that map_ofs[I] is the smallest offset higher that
+     rounddown(o, OFSDIV) (and hence I-1 is the largest entry whose offset is
+     smaller or equal to o/OFSDIV*OFSDIV).  */
+  unsigned int *ofstolowbound;
+  int fast_state;
 };
 
 
-/* Routine to create an entry in a section merge hashtab.  */
+/* Given a merge hash table TABLE and a number of entries to be
+   ADDED, possibly resize the table for this to fit without further
+   resizing.  */
 
-static struct bfd_hash_entry *
-sec_merge_hash_newfunc (struct bfd_hash_entry *entry,
-			struct bfd_hash_table *table, const char *string)
+static bool
+sec_merge_maybe_resize (struct sec_merge_hash *table, unsigned added)
 {
-  /* Allocate the structure if it has not already been allocated by a
-     subclass.  */
-  if (entry == NULL)
-    entry = (struct bfd_hash_entry *)
-	bfd_hash_allocate (table, sizeof (struct sec_merge_hash_entry));
-  if (entry == NULL)
-    return NULL;
+  struct bfd_hash_table *bfdtab = &table->table;
+  if (bfdtab->count + added > table->nbuckets * 2 / 3)
+    {
+      unsigned i;
+      unsigned long newnb = table->nbuckets * 2;
+      struct sec_merge_hash_entry **newv;
+      uint64_t *newl;
+      unsigned long alloc;
 
-  /* Call the allocation method of the superclass.  */
-  entry = bfd_hash_newfunc (entry, table, string);
+      while (bfdtab->count + added > newnb * 2 / 3)
+	{
+	  newnb *= 2;
+	  if (!newnb)
+	    return false;
+	}
 
-  if (entry != NULL)
-    {
-      /* Initialize the local fields.  */
-      struct sec_merge_hash_entry *ret = (struct sec_merge_hash_entry *) entry;
+      alloc = newnb * sizeof (newl[0]);
+      if (alloc / sizeof (newl[0]) != newnb)
+	return false;
+      newl = objalloc_alloc ((struct objalloc *) table->table.memory, alloc);
+      if (newl == NULL)
+	return false;
+      memset (newl, 0, alloc);
+      alloc = newnb * sizeof (newv[0]);
+      if (alloc / sizeof (newv[0]) != newnb)
+	return false;
+      newv = objalloc_alloc ((struct objalloc *) table->table.memory, alloc);
+      if (newv == NULL)
+	return false;
+      memset (newv, 0, alloc);
 
-      ret->u.suffix = NULL;
-      ret->alignment = 0;
-      ret->secinfo = NULL;
-      ret->next = NULL;
-    }
+      for (i = 0; i < table->nbuckets; i++)
+	{
+	  struct sec_merge_hash_entry *v = table->values[i];
+	  if (v)
+	    {
+	      uint32_t thishash = table->key_lens[i] >> 32;
+	      unsigned idx = thishash & (newnb - 1);
+	      while (newv[idx])
+		idx = (idx + 1) & (newnb - 1);
+	      newl[idx] = table->key_lens[i];
+	      newv[idx] = v;
+	    }
+	}
 
-  return entry;
+      table->key_lens = newl;
+      table->values = newv;
+      table->nbuckets = newnb;
+    }
+  return true;
 }
 
-/* Look up an entry in a section merge hash table.  */
+/* Insert STRING (actually a byte blob of length LEN, with pre-computed
+   HASH and bucket _INDEX) into our hash TABLE.  */
 
 static struct sec_merge_hash_entry *
-sec_merge_hash_lookup (struct sec_merge_hash *table, const char *string,
-		       unsigned int alignment, bool create)
+sec_merge_hash_insert (struct sec_merge_hash *table,
+		 const char *string,
+		 uint64_t hash, unsigned int len, unsigned int _index)
 {
-  const unsigned char *s;
-  unsigned long hash;
-  unsigned int c;
+  struct bfd_hash_table *bfdtab = &table->table;
   struct sec_merge_hash_entry *hashp;
+
+  hashp = (struct sec_merge_hash_entry *)
+      bfd_hash_allocate (bfdtab, len + sizeof (struct sec_merge_hash_entry));
+  if (hashp == NULL)
+    return NULL;
+
+  memcpy (hashp->str, string, len);
+  hashp->len = len;
+  hashp->alignment = 0;
+  hashp->u.suffix = NULL;
+  hashp->next = NULL;
+  // We must not need resizing, otherwise _index is wrong
+  BFD_ASSERT (bfdtab->count + 1 <= table->nbuckets * 2 / 3);
+  bfdtab->count++;
+  table->key_lens[_index] = (hash << 32) | (uint32_t)len;
+  table->values[_index] = hashp;
+
+  return hashp;
+}
+
+/* Read four bytes from *STR, interpret it as 32bit unsigned little
+   endian value and return that.  */
+
+static inline uint32_t
+hash_read32 (const char *str)
+{
+  uint32_t i;
+  /* All reasonable compilers will inline this memcpy and generate optimal
+     code on architectures that support unaligned (4-byte) accesses.  */
+  memcpy(&i, str, 4);
+#ifdef WORDS_BIGENDIAN
+  i = (i << 24) | ((i & 0xff00) << 8) | ((i >> 8) & 0xff00) | (i >> 24);
+#endif
+  return i;
+}
+
+/* Calculate and return a hashvalue of the bytes at STR[0..LEN-1].
+   All non-zero lengths and all alignments are supported.
+
+   This is somewhat similar to xxh3 (of xxhash), but restricted to 32bit.
+   On cc1 strings this has quite similar statistic properties, and we
+   don't need to jump through hoops to get fast 64x64->128 mults,
+   or 64bit arith on 32 bit hosts.  We also don't care for seeds
+   or secrets.  They improve mixing very little.  */
+
+static uint32_t
+hash_blob (const char *str, unsigned int len)
+{
+  uint32_t ret = 0;
+  uint32_t mul = (1 << 0) +  (1 << 2) + (1 << 3) + (1 << 5) + (1 << 7);
+  mul += (1 << 11) + (1 << 13) + (1 << 17) + (0 << 19) + (1 << 23) + (1 << 29);
+  mul += (1 << 31);
+  if (len >= 8)
+    {
+      uint32_t acc = len * 0x9e3779b1;
+      while (len >= 8)
+	{
+	  uint32_t i1 = hash_read32  (str) ^ (0x396cfeb8 + 1*len);
+	  uint32_t i2 = hash_read32  (str + 4) ^ (0xbe4ba423 + 1*len);
+	  str += 8;
+	  len -= 8;
+	  uint64_t m = (uint64_t)i1 * i2;
+	  acc += (uint32_t)m ^ (uint32_t)(m >> 32);
+	}
+      acc = acc ^ (acc >> 7);
+      uint64_t r = (uint64_t)mul * acc;
+      ret = (uint32_t)r ^ (uint32_t)(r >> 32);
+      if (len == 0)
+	goto end;
+    }
+  if (len >= 4)
+    {
+      uint32_t i1 = hash_read32  (str);
+      uint32_t i2 = hash_read32  (str + len - 4);
+      i1 = ((i1 + len) ^ (i1 >> 7));
+      i2 = i2 ^ (i2 >> 7);
+      uint64_t r = (uint64_t)mul * i1 + i2;
+      ret += r ^ (r >> 32);
+    }
+  else
+    {
+      /* Cleverly read in 1 to 3 bytes without further conditionals.  */
+      unsigned char c1 = str[0];
+      unsigned char c2 = str[len >> 1];
+      unsigned char c3 = str[len - 1];
+      uint32_t i1 = ((uint32_t)c1 << 16) | ((uint32_t)c2 << 24)
+		     | ((uint32_t) c3) | (len << 8);
+      i1 = i1 ^ (i1 >> 7);
+      uint64_t r = (uint64_t)mul * i1;
+      ret += r ^ (r >> 32);
+    }
+end:
+  return ret;
+}
+
+/* Given a hash TABLE, return the hash of STRING (a blob described
+   according to info in TABLE, either a character string, or some fixed
+   size entity) and set *PLEN to the length of this blob.  */
+
+static uint32_t
+hashit (struct sec_merge_hash *table, const char *string, unsigned int *plen)
+{
+  const unsigned char *s;
+  uint32_t hash;
   unsigned int len, i;
-  unsigned int _index;
 
-  hash = 0;
-  len = 0;
   s = (const unsigned char *) string;
   if (table->strings)
     {
       if (table->entsize == 1)
-	{
-	  while ((c = *s++) != '\0')
-	    {
-	      hash += c + (c << 17);
-	      hash ^= hash >> 2;
-	      ++len;
-	    }
-	  hash += len + (len << 17);
-	}
+	len = strlen (string) + 1;
       else
 	{
+	  len = 0;
 	  for (;;)
 	    {
 	      for (i = 0; i < table->entsize; ++i)
@@ -168,65 +349,67 @@ sec_merge_hash_lookup (struct sec_merge_hash *table, const char *string,
 		  break;
 	      if (i == table->entsize)
 		break;
-	      for (i = 0; i < table->entsize; ++i)
-		{
-		  c = *s++;
-		  hash += c + (c << 17);
-		  hash ^= hash >> 2;
-		}
+	      s += table->entsize;
 	      ++len;
 	    }
-	  hash += len + (len << 17);
 	  len *= table->entsize;
+	  len += table->entsize;
 	}
-      hash ^= hash >> 2;
-      len += table->entsize;
     }
   else
-    {
-      for (i = 0; i < table->entsize; ++i)
-	{
-	  c = *s++;
-	  hash += c + (c << 17);
-	  hash ^= hash >> 2;
-	}
-      len = table->entsize;
-    }
+    len = table->entsize;
+  hash = hash_blob (string, len);
+  *plen = len;
+  return hash;
+}
+
+/* Lookup or insert a blob STRING (of length LEN, precomputed HASH and
+   input ALIGNMENT) into TABLE.  Return the found or new hash table entry.  */
+
+static struct sec_merge_hash_entry *
+sec_merge_hash_lookup (struct sec_merge_hash *table, const char *string,
+		       unsigned int len, uint64_t hash,
+		       unsigned int alignment)
+{
+  struct sec_merge_hash_entry *hashp;
+  unsigned int _index;
 
-  _index = hash % table->table.size;
-  for (hashp = (struct sec_merge_hash_entry *) table->table.table[_index];
-       hashp != NULL;
-       hashp = (struct sec_merge_hash_entry *) hashp->root.next)
+  /*printf ("YYY insert 0x%x into %u buckets (%s)\n",
+	  (unsigned)hash, (unsigned)table->nbuckets, string);*/
+  uint64_t *key_lens = table->key_lens;
+  struct sec_merge_hash_entry **values = table->values;
+  uint64_t hlen = (hash << 32) | (uint32_t)len;
+  unsigned int nbuckets = table->nbuckets;
+  _index = hash & (nbuckets - 1);
+  while (1)
     {
-      if (hashp->root.hash == hash
-	  && len == hashp->len
-	  && memcmp (hashp->root.string, string, len) == 0)
+      uint64_t candlen = key_lens[_index];
+      if (candlen == hlen
+	  && !memcmp (values[_index]->str, string, len))
 	{
-	  /* If the string we found does not have at least the required
-	     alignment, we need to insert another copy.  */
+	  hashp = values[_index];
 	  if (hashp->alignment < alignment)
-	    {
-	      if (create)
-		{
-		  /*  Mark the less aligned copy as deleted.  */
-		  hashp->len = 0;
-		  hashp->alignment = 0;
-		}
-	      break;
-	    }
+	    hashp->alignment = alignment;
 	  return hashp;
 	}
+      if (!(candlen & (uint32_t)-1))
+	break;
+      _index = (_index + 1) & (nbuckets - 1);
     }
 
-  if (! create)
-    return NULL;
-
-  hashp = ((struct sec_merge_hash_entry *)
-	   bfd_hash_insert (&table->table, string, hash));
+  hashp = sec_merge_hash_insert (table, string, hash, len, _index);
   if (hashp == NULL)
     return NULL;
-  hashp->len = len;
   hashp->alignment = alignment;
+
+  table->size++;
+  BFD_ASSERT (table->size == table->table.count);
+  if (table->first == NULL)
+    table->first = hashp;
+  else
+    table->last->next = hashp;
+  table->last = hashp;
+
   return hashp;
 }
 
@@ -241,8 +424,8 @@ sec_merge_init (unsigned int entsize, bool strings)
   if (table == NULL)
     return NULL;
 
-  if (! bfd_hash_table_init_n (&table->table, sec_merge_hash_newfunc,
-			       sizeof (struct sec_merge_hash_entry), 16699))
+  if (! bfd_hash_table_init_n (&table->table, NULL,
+			       sizeof (struct sec_merge_hash_entry), 0x2000))
     {
       free (table);
       return NULL;
@@ -254,42 +437,83 @@ sec_merge_init (unsigned int entsize, bool strings)
   table->entsize = entsize;
   table->strings = strings;
 
+  table->nbuckets = 0x2000;
+  table->key_lens = objalloc_alloc ((struct objalloc *) table->table.memory,
+				table->nbuckets * sizeof (table->key_lens[0]));
+  memset (table->key_lens, 0, table->nbuckets * sizeof (table->key_lens[0]));
+  table->values = objalloc_alloc ((struct objalloc *) table->table.memory,
+				table->nbuckets * sizeof (table->values[0]));
+  memset (table->values, 0, table->nbuckets * sizeof (table->values[0]));
+
   return table;
 }
 
-/* Get the index of an entity in a hash table, adding it if it is not
-   already present.  */
+/* Append the tuple of input-offset O corresponding
+   to hash table ENTRY into SECINFO, such that we later may lookup the
+   entry just by O.  */
 
-static struct sec_merge_hash_entry *
-sec_merge_add (struct sec_merge_hash *tab, const char *str,
-	       unsigned int alignment, struct sec_merge_sec_info *secinfo)
+static bool
+append_offsetmap (struct sec_merge_sec_info *secinfo,
+		  mapofs_type o,
+		  struct sec_merge_hash_entry *entry)
 {
-  struct sec_merge_hash_entry *entry;
+  if ((secinfo->noffsetmap & 2047) == 0)
+    {
+      bfd_size_type amt;
+      amt = (secinfo->noffsetmap + 2048);
+      secinfo->map_ofs = bfd_realloc (secinfo->map_ofs,
+				      amt * sizeof(secinfo->map_ofs[0]));
+      if (!secinfo->map_ofs)
+	return false;
+      secinfo->map = bfd_realloc (secinfo->map, amt * sizeof(secinfo->map[0]));
+      if (!secinfo->map)
+	return false;
+    }
+  unsigned int i = secinfo->noffsetmap++;
+  MAP_OFS(secinfo, i) = o;
+  secinfo->map[i].entry = entry;
+  return true;
+}
 
-  entry = sec_merge_hash_lookup (tab, str, alignment, true);
-  if (entry == NULL)
-    return NULL;
+/* Prepare the input-offset-to-entry tables after output offsets are
+   determined.  */
+
+static void
+prepare_offsetmap (struct sec_merge_sec_info *secinfo)
+{
+  unsigned int noffsetmap = secinfo->noffsetmap;
+  unsigned int i, lbi;
+  bfd_size_type l, sz, amt;
+
+  secinfo->fast_state = 1;
+
+  for (i = 0; i < noffsetmap; i++)
+    MAP_IDX(secinfo, i) = secinfo->map[i].entry->u.index;
 
-  if (entry->secinfo == NULL)
+  sz = secinfo->sec->rawsize;
+  amt = (sz / OFSDIV + 1) * sizeof (secinfo->ofstolowbound[0]);
+  secinfo->ofstolowbound = bfd_zmalloc (amt);
+  if (!secinfo->ofstolowbound)
+    return;
+  for (l = lbi = 0; l < sz; l += OFSDIV)
     {
-      tab->size++;
-      entry->secinfo = secinfo;
-      if (tab->first == NULL)
-	tab->first = entry;
-      else
-	tab->last->next = entry;
-      tab->last = entry;
+      /* No need for bounds checking on lbi, as we've added a sentinel that's
+	 larger than any offset.  */
+      while (MAP_OFS(secinfo, lbi) <= l)
+	lbi++;
+      //BFD_ASSERT ((l / OFSDIV) <= (i / OFSDIV));
+      secinfo->ofstolowbound[l / OFSDIV] = lbi;
     }
-
-  return entry;
+  secinfo->fast_state = 2;
 }
 
 static bool
-sec_merge_emit (bfd *abfd, struct sec_merge_hash_entry *entry,
-		unsigned char *contents, file_ptr offset)
+sec_merge_emit (bfd *abfd, struct sec_merge_sec_info *secinfo,
+		unsigned char *contents)
 {
-  struct sec_merge_sec_info *secinfo = entry->secinfo;
+  struct sec_merge_hash_entry *entry = secinfo->first_str;
   asection *sec = secinfo->sec;
+  file_ptr offset = sec->output_offset;
   char *pad = NULL;
   bfd_size_type off = 0;
   unsigned int opb = bfd_octets_per_byte (abfd, sec);
@@ -304,11 +528,14 @@ sec_merge_emit (bfd *abfd, struct sec_merge_hash_entry *entry,
   if (pad == NULL)
     return false;
 
-  for (; entry != NULL && entry->secinfo == secinfo; entry = entry->next)
+  for (; entry != NULL; entry = entry->next)
     {
       const char *str;
       bfd_size_type len;
 
+      if (!entry->len)
+	continue;
+      BFD_ASSERT (entry->alignment);
       len = -off & (entry->alignment - 1);
       if (len != 0)
 	{
@@ -323,7 +550,7 @@ sec_merge_emit (bfd *abfd, struct sec_merge_hash_entry *entry,
 	  off += len;
 	}
 
-      str = entry->root.string;
+      str = entry->str;
       len = entry->len;
 
       if (contents)
@@ -336,10 +563,11 @@ sec_merge_emit (bfd *abfd, struct sec_merge_hash_entry *entry,
 
       off += len;
     }
+  BFD_ASSERT (!entry);
 
   /* Trailing alignment needed?  */
   off = sec->size - off;
-  if (off != 0)
+  if (1 && off != 0)
     {
       BFD_ASSERT (off <= pad_len);
       if (contents)
@@ -365,10 +593,9 @@ _bfd_add_merge_section (bfd *abfd, void **psinfo, asection *sec,
 {
   struct sec_merge_info *sinfo;
   struct sec_merge_sec_info *secinfo;
+  asection *repr;
   unsigned int alignment_power;  /* Octets.  */
   unsigned int align;            /* Octets.  */
-  bfd_size_type amt;
-  bfd_byte *contents;
   unsigned int opb = bfd_octets_per_byte (abfd, sec);
 
   if ((abfd->flags & DYNAMIC) != 0
@@ -389,6 +616,12 @@ _bfd_add_merge_section (bfd *abfd, void **psinfo, asection *sec,
       return true;
     }
 
+  if (sec->size > (mapofs_type)-1)
+    {
+      /* Input offsets must be representable by mapofs_type.  */
+      return true;
+    }
+
 #ifndef CHAR_BIT
 #define CHAR_BIT 8
 #endif
@@ -412,12 +645,23 @@ _bfd_add_merge_section (bfd *abfd, void **psinfo, asection *sec,
       return true;
     }
 
+  /* Initialize the descriptor for this input section.  */
+
+  *psecinfo = secinfo = bfd_zalloc (abfd, sizeof (*secinfo));
+  if (*psecinfo == NULL)
+    goto error_return;
+
+  secinfo->sec = sec;
+  secinfo->psecinfo = psecinfo;
+
+  /* Search for a matching output merged section.  */
   for (sinfo = (struct sec_merge_info *) *psinfo; sinfo; sinfo = sinfo->next)
-    if ((secinfo = sinfo->chain)
-	&& ! ((secinfo->sec->flags ^ sec->flags) & (SEC_MERGE | SEC_STRINGS))
-	&& secinfo->sec->entsize == sec->entsize
-	&& secinfo->sec->alignment_power == sec->alignment_power
-	&& secinfo->sec->output_section == sec->output_section)
+    if (sinfo->chain
+	&& (repr = sinfo->chain->sec)
+	&& ! ((repr->flags ^ sec->flags) & (SEC_MERGE | SEC_STRINGS))
+	&& repr->entsize == sec->entsize
+	&& repr->alignment_power == sec->alignment_power
+	&& repr->output_section == sec->output_section)
       break;
 
   if (sinfo == NULL)
@@ -429,44 +673,18 @@ _bfd_add_merge_section (bfd *abfd, void **psinfo, asection *sec,
 	goto error_return;
       sinfo->next = (struct sec_merge_info *) *psinfo;
       sinfo->chain = NULL;
+      sinfo->last = &sinfo->chain;
       *psinfo = sinfo;
       sinfo->htab = sec_merge_init (sec->entsize, (sec->flags & SEC_STRINGS));
       if (sinfo->htab == NULL)
 	goto error_return;
     }
 
-  /* Read the section from abfd.  */
+  *sinfo->last = secinfo;
+  sinfo->last = &secinfo->next;
 
-  amt = sizeof (struct sec_merge_sec_info) - 1 + sec->size;
-  if (sec->flags & SEC_STRINGS)
-    /* Some versions of gcc may emit a string without a zero terminator.
-       See http://gcc.gnu.org/ml/gcc-patches/2006-06/msg01004.html
-       Allocate space for an extra zero.  */
-    amt += sec->entsize;
-  *psecinfo = bfd_alloc (abfd, amt);
-  if (*psecinfo == NULL)
-    goto error_return;
-
-  secinfo = (struct sec_merge_sec_info *) *psecinfo;
-  if (sinfo->chain)
-    {
-      secinfo->next = sinfo->chain->next;
-      sinfo->chain->next = secinfo;
-    }
-  else
-    secinfo->next = secinfo;
-  sinfo->chain = secinfo;
-  secinfo->sec = sec;
-  secinfo->psecinfo = psecinfo;
-  secinfo->htab = sinfo->htab;
-  secinfo->first_str = NULL;
-
-  sec->rawsize = sec->size;
-  if (sec->flags & SEC_STRINGS)
-    memset (secinfo->contents + sec->size, 0, sec->entsize);
-  contents = secinfo->contents;
-  if (! bfd_get_full_section_contents (sec->owner, sec, &contents))
-    goto error_return;
+  secinfo->sinfo = sinfo;
+  secinfo->reprsec = sinfo->chain->sec;
 
   return true;
 
@@ -475,85 +693,88 @@ _bfd_add_merge_section (bfd *abfd, void **psinfo, asection *sec,
   return false;
 }
 
-/* Record one section into the hash table.  */
+/* Record one whole input section (described by SECINFO) into the hash table
+   SINFO.  */
+
 static bool
 record_section (struct sec_merge_info *sinfo,
 		struct sec_merge_sec_info *secinfo)
 {
   asection *sec = secinfo->sec;
   struct sec_merge_hash_entry *entry;
-  bool nul;
   unsigned char *p, *end;
   bfd_vma mask, eltalign;
-  unsigned int align, i;
+  unsigned int align;
+  bfd_size_type amt;
+  bfd_byte *contents;
 
-  align = sec->alignment_power;
-  end = secinfo->contents + sec->size;
-  nul = false;
-  mask = ((bfd_vma) 1 << align) - 1;
+  amt = sec->size;
+  if (sec->flags & SEC_STRINGS)
+    /* Some versions of gcc may emit a string without a zero terminator.
+       See http://gcc.gnu.org/ml/gcc-patches/2006-06/msg01004.html
+       Allocate space for an extra zero.  */
+    amt += sec->entsize;
+  contents = bfd_malloc (amt);
+  if (!contents)
+    goto error_return;
+
+  /* Slurp in all section contents (possibly decompressing it).  */
+  sec->rawsize = sec->size;
   if (sec->flags & SEC_STRINGS)
+    memset (contents + sec->size, 0, sec->entsize);
+  if (! bfd_get_full_section_contents (sec->owner, sec, &contents))
+    goto error_return;
+
+  /* Now populate the hash table and offset mapping.  */
+
+  /* Presize the hash table for what we're going to add.  We overestimate
+     quite a bit, but if it turns out to be too much then other sections
+     merged into this area will make use of that as well.  */
+  if (!sec_merge_maybe_resize (sinfo->htab, 1 + sec->size / 2))
     {
-      for (p = secinfo->contents; p < end; )
-	{
-	  eltalign = p - secinfo->contents;
-	  eltalign = ((eltalign ^ (eltalign - 1)) + 1) >> 1;
-	  if (!eltalign || eltalign > mask)
-	    eltalign = mask + 1;
-	  entry = sec_merge_add (sinfo->htab, (char *) p, (unsigned) eltalign,
-				 secinfo);
-	  if (! entry)
-	    goto error_return;
-	  p += entry->len;
-	  if (sec->entsize == 1)
-	    {
-	      while (p < end && *p == 0)
-		{
-		  if (!nul && !((p - secinfo->contents) & mask))
-		    {
-		      nul = true;
-		      entry = sec_merge_add (sinfo->htab, "",
-					     (unsigned) mask + 1, secinfo);
-		      if (! entry)
-			goto error_return;
-		    }
-		  p++;
-		}
-	    }
-	  else
-	    {
-	      while (p < end)
-		{
-		  for (i = 0; i < sec->entsize; i++)
-		    if (p[i] != '\0')
-		      break;
-		  if (i != sec->entsize)
-		    break;
-		  if (!nul && !((p - secinfo->contents) & mask))
-		    {
-		      nul = true;
-		      entry = sec_merge_add (sinfo->htab, (char *) p,
-					     (unsigned) mask + 1, secinfo);
-		      if (! entry)
-			goto error_return;
-		    }
-		  p += sec->entsize;
-		}
-	    }
-	}
+      bfd_set_error (bfd_error_no_memory);
+      goto error_return;
     }
-  else
+
+  /* Walk through the contents, calculate hashes and length of all
+     blobs (strings or fixed-size entries) we find and fill the
+     hash and offset tables.  */
+  align = sec->alignment_power;
+  mask = ((bfd_vma) 1 << align) - 1;
+  end = contents + sec->size;
+  for (p = contents; p < end;)
     {
-      for (p = secinfo->contents; p < end; p += sec->entsize)
-	{
-	  entry = sec_merge_add (sinfo->htab, (char *) p, 1, secinfo);
-	  if (! entry)
-	    goto error_return;
-	}
+      unsigned len;
+      uint32_t hash = hashit (sinfo->htab, (char*) p, &len);
+      unsigned int ofs = p - contents;
+      eltalign = ofs;
+      eltalign = ((eltalign ^ (eltalign - 1)) + 1) >> 1;
+      if (!eltalign || eltalign > mask)
+	eltalign = mask + 1;
+      entry = sec_merge_hash_lookup (sinfo->htab, (char *) p, len, hash,
+				     (unsigned) eltalign);
+      if (! entry)
+	goto error_return;
+      if (! append_offsetmap (secinfo, ofs, entry))
+	goto error_return;
+      p += len;
     }
 
+  /* Add a sentinel element that's conceptually behind all others.  */
+  append_offsetmap (secinfo, sec->size, NULL);
+  /* But don't count it.  */
+  secinfo->noffsetmap--;
+
+  free (contents);
+  contents = NULL;
+  /*printf ("ZZZ %s:%s %u entries\n", sec->owner->filename, sec->name,
+	  (unsigned)secinfo->noffsetmap);*/
+
   return true;
 
  error_return:
+  free (contents);
+  contents = NULL;
   for (secinfo = sinfo->chain; secinfo; secinfo = secinfo->next)
     *secinfo->psecinfo = NULL;
   return false;
@@ -569,8 +790,8 @@ strrevcmp (const void *a, const void *b)
   struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b;
   unsigned int lenA = A->len;
   unsigned int lenB = B->len;
-  const unsigned char *s = (const unsigned char *) A->root.string + lenA - 1;
-  const unsigned char *t = (const unsigned char *) B->root.string + lenB - 1;
+  const unsigned char *s = (const unsigned char *) A->str + lenA - 1;
+  const unsigned char *t = (const unsigned char *) B->str + lenB - 1;
   int l = lenA < lenB ? lenA : lenB;
 
   while (l)
@@ -594,8 +815,8 @@ strrevcmp_align (const void *a, const void *b)
   struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b;
   unsigned int lenA = A->len;
   unsigned int lenB = B->len;
-  const unsigned char *s = (const unsigned char *) A->root.string + lenA - 1;
-  const unsigned char *t = (const unsigned char *) B->root.string + lenB - 1;
+  const unsigned char *s = (const unsigned char *) A->str + lenA - 1;
+  const unsigned char *t = (const unsigned char *) B->str + lenB - 1;
   int l = lenA < lenB ? lenA : lenB;
   int tail_align = (lenA & (A->alignment - 1)) - (lenB & (A->alignment - 1));
 
@@ -622,8 +843,8 @@ is_suffix (const struct sec_merge_hash_entry *A,
        not to be equal by the hash table.  */
     return 0;
 
-  return memcmp (A->root.string + (A->len - B->len),
-		 B->root.string, B->len) == 0;
+  return memcmp (A->str + (A->len - B->len),
+		 B->str, B->len) == 0;
 }
 
 /* This is a helper function for _bfd_merge_sections.  It attempts to
@@ -689,21 +910,11 @@ merge_strings (struct sec_merge_info *sinfo)
 
   /* Now assign positions to the strings we want to keep.  */
   size = 0;
-  secinfo = sinfo->htab->first->secinfo;
+  secinfo = sinfo->chain;
   for (e = sinfo->htab->first; e; e = e->next)
     {
-      if (e->secinfo != secinfo)
-	{
-	  secinfo->sec->size = size;
-	  secinfo = e->secinfo;
-	}
       if (e->alignment)
 	{
-	  if (e->secinfo->first_str == NULL)
-	    {
-	      e->secinfo->first_str = e;
-	      size = 0;
-	    }
 	  size = (size + e->alignment - 1) & ~((bfd_vma) e->alignment - 1);
 	  e->u.index = size;
 	  size += e->len;
@@ -721,11 +932,14 @@ merge_strings (struct sec_merge_info *sinfo)
 	*a = e->next;
 	if (e->len)
 	  {
-	    e->secinfo = e->u.suffix->secinfo;
 	    e->alignment = e->u.suffix->alignment;
 	    e->u.index = e->u.suffix->u.index + (e->u.suffix->len - e->len);
 	  }
       }
+
+  BFD_ASSERT (!secinfo->first_str);
+  secinfo->first_str = sinfo->htab->first;
+
   return secinfo;
 }
 
@@ -748,11 +962,6 @@ _bfd_merge_sections (bfd *abfd,
       if (! sinfo->chain)
 	continue;
 
-      /* Move sinfo->chain to head of the chain, terminate it.  */
-      secinfo = sinfo->chain;
-      sinfo->chain = secinfo->next;
-      secinfo->next = NULL;
-
       /* Record the sections into the hash table.  */
       align = 1;
       for (secinfo = sinfo->chain; secinfo; secinfo = secinfo->next)
@@ -787,25 +996,23 @@ _bfd_merge_sections (bfd *abfd,
 	}
       else
 	{
-	  struct sec_merge_hash_entry *e;
+	  struct sec_merge_hash_entry *e = sinfo->htab->first;
 	  bfd_size_type size = 0;  /* Octets.  */
 
 	  /* Things are much simpler for non-strings.
 	     Just assign them slots in the section.  */
-	  secinfo = NULL;
+	  secinfo = sinfo->chain;
+	  BFD_ASSERT (!secinfo->first_str);
+	  secinfo->first_str = e;
 	  for (e = sinfo->htab->first; e; e = e->next)
 	    {
-	      if (e->secinfo->first_str == NULL)
+	      if (e->alignment)
 		{
-		  if (secinfo)
-		    secinfo->sec->size = size;
-		  e->secinfo->first_str = e;
-		  size = 0;
+		  size = (size + e->alignment - 1)
+			 & ~((bfd_vma) e->alignment - 1);
+		  e->u.index = size;
+		  size += e->len;
 		}
-	      size = (size + e->alignment - 1) & ~((bfd_vma) e->alignment - 1);
-	      e->u.index = size;
-	      size += e->len;
-	      secinfo = e->secinfo;
 	    }
 	  secinfo->sec->size = size;
 	}
@@ -861,8 +1068,9 @@ _bfd_write_merged_section (bfd *output_bfd, asection *sec, void *psecinfo)
 	return false;
     }
 
-  if (! sec_merge_emit (output_bfd, secinfo->first_str, contents,
-			sec->output_offset))
+  BFD_ASSERT (sec == secinfo->sec);
+  BFD_ASSERT (secinfo == secinfo->sinfo->chain);
+  if (! sec_merge_emit (output_bfd, secinfo, contents))
     return false;
 
   return true;
@@ -877,8 +1085,6 @@ _bfd_merged_section_offset (bfd *output_bfd ATTRIBUTE_UNUSED, asection **psec,
 			    void *psecinfo, bfd_vma offset)
 {
   struct sec_merge_sec_info *secinfo;
-  struct sec_merge_hash_entry *entry;
-  unsigned char *p;
   asection *sec = *psec;
 
   secinfo = (struct sec_merge_sec_info *) psecinfo;
@@ -896,55 +1102,27 @@ _bfd_merged_section_offset (bfd *output_bfd ATTRIBUTE_UNUSED, asection **psec,
       return secinfo->first_str ? sec->size : 0;
     }
 
-  if (secinfo->htab->strings)
-    {
-      if (sec->entsize == 1)
-	{
-	  p = secinfo->contents + offset - 1;
-	  while (p >= secinfo->contents && *p)
-	    --p;
-	  ++p;
-	}
-      else
-	{
-	  p = secinfo->contents + (offset / sec->entsize) * sec->entsize;
-	  p -= sec->entsize;
-	  while (p >= secinfo->contents)
-	    {
-	      unsigned int i;
-
-	      for (i = 0; i < sec->entsize; ++i)
-		if (p[i] != '\0')
-		  break;
-	      if (i == sec->entsize)
-		break;
-	      p -= sec->entsize;
-	    }
-	  p += sec->entsize;
-	}
-    }
-  else
-    {
-      p = secinfo->contents + (offset / sec->entsize) * sec->entsize;
-    }
-  entry = sec_merge_hash_lookup (secinfo->htab, (char *) p, 0, false);
-  if (!entry)
+  if (secinfo->fast_state != 2)
     {
-      if (! secinfo->htab->strings)
-	abort ();
-      /* This should only happen if somebody points into the padding
-	 after a NUL character but before next entity.  */
-      if (*p)
-	abort ();
-      if (! secinfo->htab->first)
-	abort ();
-      entry = secinfo->htab->first;
-      p = (secinfo->contents + (offset / sec->entsize + 1) * sec->entsize
-	   - entry->len);
+      if (!secinfo->fast_state)
+	prepare_offsetmap (secinfo);
+      if (secinfo->fast_state != 2)
+	return offset;
     }
 
-  *psec = entry->secinfo->sec;
-  return entry->u.index + (secinfo->contents + offset - p);
+  long lb = secinfo->ofstolowbound[offset / OFSDIV];
+  *psec = secinfo->reprsec;
+
+  /* No need for bounds checking on lb, as we've added a sentinel that's
+     larger than any offset.  */
+  while (MAP_OFS(secinfo, lb) <= offset)
+    lb++;
+  lb--;
+
+  /*printf ("YYY (%s:%s):%u -> (%s):%u\n",
+	  sec->owner->filename, sec->name, (unsigned)offset,
+	  (*psec)->name, (unsigned)lb);*/
+  return MAP_IDX(secinfo, lb) + offset - MAP_OFS(secinfo, lb);
 }
 
 /* Tidy up when done.  */
@@ -956,6 +1134,13 @@ _bfd_merge_sections_free (void *xsinfo)
 
   for (sinfo = (struct sec_merge_info *) xsinfo; sinfo; sinfo = sinfo->next)
     {
+      struct sec_merge_sec_info *secinfo;
+      for (secinfo = sinfo->chain; secinfo; secinfo = secinfo->next)
+	{
+	  free (secinfo->ofstolowbound);
+	  free (secinfo->map);
+	  free (secinfo->map_ofs);
+	}
       bfd_hash_table_free (&sinfo->htab->table);
       free (sinfo->htab);
     }
-- 
2.36.1


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-01-18 17:31 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-18 17:31 [PATCH 1/3] Faster string merging Michael Matz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).