public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc/maskray/grte] Redesign the fastload support for additional performance
@ 2021-08-28  0:32 Fangrui Song
  0 siblings, 0 replies; 3+ messages in thread
From: Fangrui Song @ 2021-08-28  0:32 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=af63681769182a8e29568088d6c9cd3c916b22f9

commit af63681769182a8e29568088d6c9cd3c916b22f9
Author: Ambrose Feinstein <ambrose@google.com>
Date:   Wed Sep 11 14:53:28 2019 -0700

    Redesign the fastload support for additional performance

Diff:
---
 elf/dl-close.c             |   3 +
 elf/dl-load.c              | 248 +++++++++++++++----
 elf/dl-lookup.c            | 583 +++++++++++++++++----------------------------
 elf/dl-object.c            |   9 +-
 elf/dl-sort-maps.c         |  27 ++-
 elf/dl-support.c           |   3 -
 elf/rtld.c                 |   8 +-
 include/link.h             |   3 +-
 sysdeps/generic/ldsodefs.h |  24 +-
 9 files changed, 474 insertions(+), 434 deletions(-)

diff --git a/elf/dl-close.c b/elf/dl-close.c
index cbf336609b..3960559b72 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -723,6 +723,9 @@ _dl_close_worker (struct link_map *map, bool force)
 	    _dl_debug_printf ("\nfile=%s [%lu];  destroying link map\n",
 			      imap->l_name, imap->l_ns);
 
+	  /* Remove from hashtables.  */
+	  _dl_hash_del_object (imap);
+
 	  /* This name always is allocated.  */
 	  free (imap->l_name);
 	  /* Remove the list with all the names of the shared object.  */
diff --git a/elf/dl-load.c b/elf/dl-load.c
index f78140a441..1ae72aa325 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -357,8 +357,7 @@ expand_dynamic_string_token (struct link_map *l, const char *s)
 
 /* Add `name' to the list of names for a particular shared object.
    `name' is expected to have been allocated with malloc and will
-   be freed if the shared object already has this name.
-   Returns false if the object already had this name.  */
+   be freed if the shared object already has this name.  */
 static void
 add_name_to_object (struct link_map *l, const char *name)
 {
@@ -804,6 +803,177 @@ lose (int code, int fd, const char *name, char *realname, struct link_map *l,
 }
 
 
+/* Hash tables of loaded objects; one keyed on name, one keyed on inode.  */
+
+struct lib_hash_namenode
+{
+  const char *name;
+  struct link_map *lib;
+  struct lib_hash_namenode *next;
+  uint32_t hash;
+};
+
+struct lib_hash_filenode
+{
+  struct link_map *lib;
+  struct lib_hash_filenode *next;
+  uint32_t hash;
+};
+
+/* Must be a power of 2.  */
+#define LIB_HASH_BUCKETS 256
+
+static struct lib_hash_namenode *lib_hash_nametable[LIB_HASH_BUCKETS];
+static struct lib_hash_filenode *lib_hash_filetable[LIB_HASH_BUCKETS];
+
+static size_t
+lib_hash_bucket (uint32_t hash)
+{
+  return hash & (LIB_HASH_BUCKETS-1);
+}
+
+static uint32_t
+hash_mix (uint32_t h, size_t val)
+{
+  h ^= val;
+  h *= 0x5bd1e995;
+  h ^= h >> 15;
+  val >>= 4 * sizeof val;
+  h ^= val;
+  h *= 0x5bd1e995;
+  h ^= h >> 15;
+  return h;
+}
+
+static struct link_map *
+lib_hash_getname (Lmid_t nsid, const char *name)
+{
+  uint32_t hash = hash_mix (dl_new_hash (name), nsid);
+  struct lib_hash_namenode *n = lib_hash_nametable[lib_hash_bucket (hash)];
+  while (n)
+    {
+      struct link_map *l = n->lib;
+      if (n->hash == hash && !strcmp (n->name, name) && l->l_ns == nsid && !l->l_removed)
+        return l;
+      n = n->next;
+    }
+  return NULL;
+}
+
+static void
+lib_hash_addname (const char *name, struct link_map *lib)
+{
+  if (lib->l_faked)
+    return;
+  struct lib_hash_namenode *n = malloc (sizeof (struct lib_hash_namenode));
+  if (!n)
+    _dl_fatal_printf ("%s: out of memory\n", __func__);
+  uint32_t hash = hash_mix (dl_new_hash (name), lib->l_ns);
+  struct lib_hash_namenode **p = lib_hash_nametable + lib_hash_bucket (hash);
+  n->name = name;
+  n->lib = lib;
+  n->next = *p;
+  n->hash = hash;
+  *p = n;
+}
+
+static void
+lib_hash_delname (const char *name, struct link_map *lib)
+{
+  uint32_t hash = hash_mix (dl_new_hash (name), lib->l_ns);
+  struct lib_hash_namenode **p = lib_hash_nametable + lib_hash_bucket (hash);
+  while (*p)
+    {
+      struct lib_hash_namenode *n = *p;
+      if (n->hash == hash && n->lib == lib &&
+          (n->name == name || !strcmp (n->name, name))) {
+        *p = n->next;
+        free (n);
+        return;
+      }
+      p = &n->next;
+    }
+  _dl_fatal_printf ("%s: can't unhash '%s'\n", __func__, name);
+}
+
+static uint32_t
+hash_file (Lmid_t nsid, dev_t dev, ino64_t ino, off_t off)
+{
+  return hash_mix (hash_mix (hash_mix (hash_mix (0, nsid), dev), ino), off);
+}
+
+static struct link_map *
+lib_hash_getfile (Lmid_t nsid, dev_t dev, ino64_t ino, off_t off)
+{
+  uint32_t hash = hash_file (nsid, dev, ino, off);
+  struct lib_hash_filenode *n = lib_hash_filetable[lib_hash_bucket (hash)];
+  while (n)
+    {
+      struct link_map *l = n->lib;
+      if (n->hash == hash && !l->l_removed && l->l_ns == nsid &&
+          l->l_file_id.dev == dev && l->l_file_id.ino == ino && l->l_off == off)
+        return l;
+      n = n->next;
+    }
+  return NULL;
+}
+
+static void
+lib_hash_addfile (struct link_map *lib)
+{
+  if (lib->l_faked)
+    return;
+  struct lib_hash_filenode *n = malloc (sizeof (struct lib_hash_filenode));
+  if (!n)
+    _dl_fatal_printf ("%s: out of memory\n", __func__);
+  uint32_t hash = hash_file (lib->l_ns, lib->l_file_id.dev, lib->l_file_id.ino, lib->l_off);
+  struct lib_hash_filenode **p = lib_hash_filetable + lib_hash_bucket (hash);
+  n->lib = lib;
+  n->next = *p;
+  n->hash = hash;
+  *p = n;
+}
+
+static void
+lib_hash_delfile (struct link_map *lib)
+{
+  uint32_t hash = hash_file (lib->l_ns, lib->l_file_id.dev, lib->l_file_id.ino, lib->l_off);
+  struct lib_hash_filenode **p = lib_hash_filetable + lib_hash_bucket (hash);
+  while (*p)
+    {
+      struct lib_hash_filenode *n = *p;
+      if (n->hash == hash && n->lib == lib)
+        {
+          *p = n->next;
+          free (n);
+          return;
+        }
+      p = &n->next;
+    }
+  _dl_fatal_printf ("%s: can't unhash '%s'\n", __func__, lib->l_name);
+}
+
+void
+_dl_hash_add_object (struct link_map *lib)
+{
+  lib_hash_addfile (lib);
+  lib_hash_addname (lib->l_name, lib);
+  for (struct libname_list *ln = lib->l_libname; ln; ln = ln->next)
+    {
+      lib_hash_addname (ln->name, lib);
+    }
+}
+
+void
+_dl_hash_del_object (struct link_map *lib)
+{
+  lib_hash_delfile (lib);
+  lib_hash_delname (lib->l_name, lib);
+  for (struct libname_list *ln = lib->l_libname; ln; ln = ln->next) {
+    lib_hash_delname (ln->name, lib);
+  }
+}
+
 /* Map in the shared object NAME, actually located in REALNAME, and already
    opened on FD.  */
 
@@ -841,28 +1011,34 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, off_t of
     }
 
   /* Look again to see if the real name matched another already loaded.  */
-  for (l = GL(dl_ns)[nsid]._ns_loaded; l != NULL; l = l->l_next)
-    if (!l->l_removed && _dl_file_id_match_p (&l->l_file_id, &id)
-        && l->l_off == offset)
-      {
-	/* The object is already loaded.
-	   Just bump its reference count and return it.  */
-	__close (fd);
-
-	/* If the name is not in the list of names for this object add
-	   it.  */
-	free (realname);
-	if (offset == 0)
-	  /* If offset!=0, foo.so/@0x<offset> should be the *only*
-	     name for this object. b/20141439.  */
-	  add_name_to_object (l, name);
-
-	return l;
-      }
+  l = lib_hash_getfile (nsid, id.dev, id.ino, offset);
+  if (l)
+    {
+      /* The object is already loaded.
+         Just bump its reference count and return it.  */
+      __close (fd);
+
+      /* If the name is not in the list of names for this object add
+         it.  */
+      free (realname);
+      if (offset == 0)
+        {
+          /* If offset!=0, foo.so/@0x<offset> should be the *only*
+             name for this object. b/20141439.  */
+          add_name_to_object (l, name);
+          lib_hash_addname (name, l);
+        }
+
+      return l;
+    }
 
 #ifdef SHARED
   /* When loading into a namespace other than the base one we must
      avoid loading ld.so since there can only be one copy.  Ever.  */
+
+  /* Note that this test is wrong in two ways:
+       1) it doesn't consider offset, and
+       2) l_file_id.ino and l_file_id.dev for rtld are always zero!  */
   if (__glibc_unlikely (nsid != LM_ID_BASE)
       && (_dl_file_id_match_p (&id, &GL(dl_rtld_map).l_file_id)
 	  || _dl_name_match_p (name, &GL(dl_rtld_map))))
@@ -1349,10 +1525,7 @@ cannot enable executable stack as shared object requires");
 
   l->l_off = offset;
 
-  /* When we profile the SONAME might be needed for something else but
-     loading.  Add it right away.  */
-  if (__glibc_unlikely (GLRO(dl_profile) != NULL)
-      && l->l_info[DT_SONAME] != NULL)
+  if (l->l_info[DT_SONAME] != NULL)
     add_name_to_object (l, ((const char *) D_PTR (l, l_info[DT_STRTAB])
 			    + l->l_info[DT_SONAME]->d_un.d_val));
 
@@ -1909,26 +2082,7 @@ match_one (const char *name, struct link_map *l)
   if (__builtin_expect (l->l_faked, 0) != 0
       || __builtin_expect (l->l_removed, 0) != 0)
     return 0;
-  if (!_dl_name_match_p (name, l))
-    {
-      const char *soname;
-
-      if (__builtin_expect (l->l_soname_added, 1)
-	  || l->l_info[DT_SONAME] == NULL)
-	return 0;
-
-      soname = ((const char *) D_PTR (l, l_info[DT_STRTAB])
-		+ l->l_info[DT_SONAME]->d_un.d_val);
-      if (strcmp (name, soname) != 0)
-	return 0;
-
-      /* We have a match on a new name -- cache it.  */
-      add_name_to_object (l, soname);
-      l->l_soname_added = 1;
-    }
-
-  /* We have a match.  */
-  return 1;
+  return _dl_name_match_p (name, l);
 }
 
 /* Map in the shared object file NAME.  */
@@ -1960,9 +2114,9 @@ _dl_map_object (struct link_map *loader, const char *name, off_t offset,
     }
   else
     {
-      for (l = _dl_last_entry (&GL(dl_ns)[nsid]); l; l = l->l_prev)
-	if (match_one (name, l))
-	  return l;
+      l = lib_hash_getname(nsid, name);
+      if (l)
+        return l;
     }
 
   /* Display information if we are debugging.  */
diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c
index 06db4e2b94..360f9ea3dc 100644
--- a/elf/dl-lookup.c
+++ b/elf/dl-lookup.c
@@ -25,6 +25,7 @@
 #include <dl-hash.h>
 #include <dl-machine.h>
 #include <sysdep-cancel.h>
+#include <hp-timing.h>
 #include <libc-lock.h>
 #include <tls.h>
 #include <atomic.h>
@@ -330,335 +331,263 @@ do_lookup_unique (const char *undef_name, uint_fast32_t new_hash,
   result->m = (struct link_map *) map;
 }
 
-/* Bob Jenkins's hash function, free for any use.
-   http://burtleburtle.net/bob/hash/  */
-#define mix(a,b,c) \
-{ \
-  a -= b; a -= c; a ^= (c>>13); \
-  b -= c; b -= a; b ^= (a<<8); \
-  c -= a; c -= b; c ^= (b>>13); \
-  a -= b; a -= c; a ^= (c>>12);  \
-  b -= c; b -= a; b ^= (a<<16); \
-  c -= a; c -= b; c ^= (b>>5); \
-  a -= b; a -= c; a ^= (c>>3);  \
-  b -= c; b -= a; b ^= (a<<10); \
-  c -= a; c -= b; c ^= (b>>15); \
-}
-
-typedef unsigned int uint32 __attribute__ ((mode (SI)));
-static uint32
-hash_with_seed (const char *k, uint32 length, uint32 seed)
-{
-   uint32 a, b, c, len;
-
-   /* Set up the internal state */
-   len = length;
-   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
-   c = seed;            /* the previous hash value */
-
-   /*---------------------------------------- handle most of the key */
-   while (len >= 12)
-     {
-       a += (k[0] +((uint32)k[1]<<8) +((uint32)k[2]<<16) +((uint32)k[3]<<24));
-       b += (k[4] +((uint32)k[5]<<8) +((uint32)k[6]<<16) +((uint32)k[7]<<24));
-       c += (k[8] +((uint32)k[9]<<8) +((uint32)k[10]<<16)+((uint32)k[11]<<24));
-       mix (a,b,c);
-       k += 12;
-       len -= 12;
-     }
-
-   /*------------------------------------- handle the last 11 bytes */
-   c += length;
-   switch (len)              /* all the case statements fall through */
-     {
-     case 11: c+=((uint32)k[10]<<24);
-     case 10: c+=((uint32)k[9]<<16);
-     case 9 : c+=((uint32)k[8]<<8);
-         /* the first byte of c is reserved for the length */
-     case 8 : b+=((uint32)k[7]<<24);
-     case 7 : b+=((uint32)k[6]<<16);
-     case 6 : b+=((uint32)k[5]<<8);
-     case 5 : b+=k[4];
-     case 4 : a+=((uint32)k[3]<<24);
-     case 3 : a+=((uint32)k[2]<<16);
-     case 2 : a+=((uint32)k[1]<<8);
-     case 1 : a+=k[0];
-       /* case 0: nothing left to add */
-     }
-   mix (a,b,c);
-   /*-------------------------------------------- report the result */
-   return c;
-}
-
 
 /* A hash table used to speed up lookups when we have a lot of shared
    libraries.  We record in the table how many ELF objects in the link
    map we can safely skip, because they are known to not contain the symbol
    we are looking for.
 
-   Values start out at N (num objects in the main_map).  We then go through
-   each object (executable or DSO) in their order in the main_map and insert
-   pos_in_map_map into the table.
+   We go through each object (executable or DSO) in their order in the
+   main_map and insert pos_in_main_map into the table.  Searches for
+   symbols not present in the table start at num_objects, skipping every
+   object preprocessed in this way.
 
    If there is already an earlier (smaller) entry in the table, we leave it
-   alone.  It could be a hash collision, or it could be an earlier
-   instance of the same symbol (eg weak __stdout definition in
-   multiple objects).
+   alone.  It represents an earlier instance of the same symbol (eg weak
+   __stdout definition in multiple objects).  */
 
-   To save space, the table contains uint16_t entries, and so we can't
-   record main map positions over 65535.
-
-   It turns out that even when only 25% of slots are used, collision
-   rate is around 10%, which is quite high, and causes lookups to be slow.
-
-   So we use a second table (using a different hash seed). Hash collisions
-   using two separate hashes are quite rare.
-
-   Note that we allocate the two tables together (a single block twice
-   the size of one table).  */
+struct position_hash
+{
+  /* The Bernstein hash, shifted right one bit.  (This lets us build a table
+     directly from the precomputed values in DT_GNU_HASH sections, in which
+     the low bit is repurposed to terminate hash chains.)  */
+  uint32_t hash;
+  /* The position of the first library with this symbol in the search list.  */
+  uint32_t pos;
+  /* The symbol name.  */
+  const char *name;
+};
+
+static struct position_hash *position_hash_table;
+static size_t position_hash_table_slots;
+static size_t position_hash_table_slots_occupied;
+static int position_hash_lookup_default;
 
-static void
-insert_into_hash_table_1 (int pos_in_main_map, const char *symbol_name,
-			  size_t name_len, dl_position_table_entry_t *table,
-			  int seed)
+static int
+position_hash_put (struct position_hash *table, size_t slots,
+                   struct position_hash *item)
 {
-  uint32 hash = hash_with_seed (symbol_name, name_len, seed);
-  int pos = hash & GLRO(dl_position_hash_mask);
-  if (pos_in_main_map < table[pos])
+  size_t mask = slots - 1;
+  /* Search for a matching entry or a free slot.  This loop always terminates
+     because we don't allow the hashtable to become completely full.  */
+  for (size_t stride = 0, i = item->hash; ; i += ++stride)
     {
-      table[pos] = pos_in_main_map;
-      if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
-        _dl_debug_printf ("fastload hash for %s: [%u] = %u (table %u)\n",
-                          symbol_name, (unsigned int) pos,
-                          (unsigned int) table[pos],
-                          (GLRO(dl_position_hash_table) == table) ? 0 : 1);
+      struct position_hash *slot = &table[i & mask];
+      if (slot->name == NULL)
+        {
+          if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+            _dl_debug_printf ("fastload:    put %s hash 0x%x pos %u slot 0x%lx\n",
+                              item->name, item->hash, item->pos, slot - table);
+          slot->hash = item->hash;
+          slot->pos = item->pos;
+          slot->name = item->name;
+          return 1;
+        }
+      else if (slot->hash == item->hash &&
+               (slot->name == item->name || !strcmp (slot->name, item->name)))
+        {
+          if (item->pos < slot->pos)
+            slot->pos = item->pos;
+          if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+            _dl_debug_printf ("fastload:    dup %s hash 0x%x pos %u slot 0x%lx\n",
+                              item->name, item->hash, slot->pos, slot - table);
+          return 0;
+        }
     }
-  else
+}
+
+static void
+position_hash_resize (struct position_hash *oldtable,
+                      size_t oldslots, size_t newslots)
+{
+  if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+    _dl_debug_printf ("fastload: resizing hashtable to %lu slots\n", newslots);
+  assert (!oldtable == !oldslots);
+  void *ptr = mmap (NULL, newslots * sizeof *oldtable, PROT_READ|PROT_WRITE,
+                    MAP_ANON|MAP_PRIVATE|MAP_POPULATE, -1, 0);
+  if (ptr == MAP_FAILED)
+    _dl_signal_error (errno, NULL, NULL, "cannot mmap fastload hashtable");
+  struct position_hash *newtable = position_hash_table = ptr;
+  position_hash_table_slots = newslots;
+  if (oldtable == NULL)
+    return;
+  for (size_t i = 0; i < oldslots; ++i)
     {
-      if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
-        _dl_debug_printf
-          ("fastload hash for %s: [%u] already set to %u <= %u (table %u)\n",
-           symbol_name, (unsigned int) pos,
-           (unsigned int) table[pos],
-           (unsigned int) pos_in_main_map,
-           (GLRO(dl_position_hash_table) == table) ? 0 : 1);
+      if (oldtable[i].name)
+        position_hash_put (newtable, newslots, &oldtable[i]);
     }
-}
 
-#define HASH_SEED_A 0
-#define HASH_SEED_B 5381  /* As good as any other value.  */
+  if (munmap (oldtable, oldslots * sizeof *oldtable))
+    _dl_signal_error (errno, NULL, NULL, "cannot munmap fastload hashtable");
+}
 
 static void
-insert_into_hash_table (int pos_in_main_map, const char *symbol_name)
+position_hash_init (int lookup_default)
 {
-  dl_position_table_entry_t *const table1 = GLRO(dl_position_hash_table);
-  const int table_size = GLRO(dl_position_hash_mask) + 1;
-  dl_position_table_entry_t *const table2 = table1 + table_size;
-  const size_t name_len = strlen (symbol_name);
-
-  insert_into_hash_table_1 (pos_in_main_map, symbol_name, name_len, table1,
-			    HASH_SEED_A);
-  insert_into_hash_table_1 (pos_in_main_map, symbol_name, name_len, table2,
-			    HASH_SEED_B);
+  assert (position_hash_table == NULL);
+  position_hash_lookup_default = lookup_default;
+  position_hash_resize (NULL, 0, 8192);
 }
 
-static inline int
-earliest_pos_from_hash_table_1 (const char *symbol_name,
-                                const dl_position_table_entry_t *table,
-                                int seed)
+static void
+position_hash_insert (uint32_t hash, const char *name, uint32_t pos)
 {
-  int pos;
-  uint32 hash;
-
-  hash = hash_with_seed (symbol_name, strlen (symbol_name), seed);
-  pos = hash & GLRO(dl_position_hash_mask);
+  struct position_hash *table = position_hash_table;
   if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
-    _dl_debug_printf
-      ("fastload earliest pos for %s: [%u] = %u (table %u)\n",
-       symbol_name, (unsigned int) pos, (unsigned int) table[pos],
-       (GLRO(dl_position_hash_table) == table) ? 0 : 1);
-
-  return table[pos];
+    _dl_debug_printf ("fastload: insert %s hash 0x%x pos %u\n", name, hash, pos);
+  size_t slots = position_hash_table_slots;
+  struct position_hash item = { hash, pos, name };
+  if (!position_hash_put (table, slots, &item))
+    return;
+  size_t newsize = ++position_hash_table_slots_occupied;
+  if (newsize >= slots / 2)
+    /* The load factor has reached 50%.  Double the table size and rehash.  */
+    position_hash_resize (table, slots, 2 * slots);
 }
 
-static inline int
-earliest_pos_from_hash_table (const char *undef_name)
+static int
+position_hash_lookup (uint32_t hash, const char *name)
 {
-  dl_position_table_entry_t *const table1 = GLRO(dl_position_hash_table);
-
-  if (table1 == NULL)
-    return 0;  /* Search from the beginning of the list.  */
-  else
+  struct position_hash *table = position_hash_table;
+  if (table == NULL)
+    return 0;
+  size_t mask = position_hash_table_slots - 1;
+  for (size_t stride = 0, i = hash; ; i += ++stride)
     {
-      const int table_size = GLRO(dl_position_hash_mask) + 1;
-      dl_position_table_entry_t *const table2 = table1 + table_size;
-      const int skip_1 = earliest_pos_from_hash_table_1 (undef_name, table1,
-                                                         HASH_SEED_A);
-      const int skip_2 = earliest_pos_from_hash_table_1 (undef_name, table2,
-                                                         HASH_SEED_B);
-
-      /* Possibilities:
-
-       * Both skip_1 and skip_2 are positive and same:
-         there were no collisions at insertion in either table, or there
-         was double-collision with symbols from the same library.
-
-         If skip_1 == skip_2 == N (num_directly_loaded_objects), then the
-         slot was empty in both tables after they were filled (i.e. there
-         is no symbol that hashes to corresponding slot).  In that case,
-         searching first [0, N) is pointless. See b/5609692.
-
-         True skip_to == skip_1 == skip_2.
-
-       * Both skip_1 and skip_2 are positive but different:
-         there was a collision in one or both tables, or one of the slots
-         remained unfilled after all symbols have been inserted.
-         It is safe to start at max, true skip_to count can not be less
-         than max.  */
-
-      return (skip_1 < skip_2) ? skip_2 : skip_1;
+      struct position_hash *slot = &table[i & mask];
+      if (slot->hash == hash &&
+          (slot->name == name || !strcmp (slot->name, name)))
+        {
+          if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+            _dl_debug_printf ("fastload:  found %s at slot 0x%lx, pos %u\n",
+                              name, slot - table, slot->pos);
+          return slot->pos;
+        }
+      else if (slot->name == NULL)
+        {
+          if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+            _dl_debug_printf ("fastload: missed %s at slot 0x%lx, default pos %u\n",
+                              name, slot - table, position_hash_lookup_default);
+          return position_hash_lookup_default;
+        }
     }
 }
 
-
-static void
-fill_hash_table (const struct link_map *const map, int pos_in_main_map,
-                 int num_symbols)
+static int
+position_hash_include_symbol (const ElfW(Sym) *sym)
 {
-  const ElfW(Sym) *const symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]);
-  const char *const strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
-  int k;
+  if (sym->st_value == 0 && ELFW(ST_TYPE) (sym->st_info) != STT_TLS)
+    return 0;
 
-  /* Iterate through the symbols defined in this map and insert them
-     into the bloom filter.  If the symbol wouldn't pass the tests in
-     do_lookup_x(), then don't insert it.  */
-  for (k = 0; k < num_symbols; k++)
+  switch (ELFW(ST_TYPE) (sym->st_info))
     {
-      const ElfW(Sym) *sym = &symtab[k];
-
-      if (sym->st_value == 0
-          && ELFW(ST_TYPE) (sym->st_info) != STT_TLS)
-        continue;
-
-      switch (ELFW(ST_TYPE) (sym->st_info))
-	{
-	case STT_SECTION:
-	case STT_FILE:
-	case STT_COMMON:
-	  continue;
-	default:
-	  break;
-	}
-
-      switch (ELFW(ST_BIND) (sym->st_info))
-	{
-	case STB_LOCAL:  /* Local symbols are ignored.  */
-	  continue;
-	default:
-	  break;
-	}
-
-      insert_into_hash_table (pos_in_main_map, strtab + sym->st_name);
+    case STT_SECTION:
+    case STT_FILE:
+    case STT_COMMON:
+      return 0;
     }
-}
-
-static dl_position_table_entry_t *
-allocate_position_hash_table (size_t table_size, int max_pos)
-{
-  /* Implementation detail: we actually allocate two "logical" tables in a
-     single block.  */
-
-  const size_t total_size = 2 * table_size;
-  dl_position_table_entry_t *table
-    = malloc (total_size * sizeof (dl_position_table_entry_t));
-
-  if (table == NULL)
-    return NULL;
-
-  for (int k = 0; k < total_size; ++k)
-    table[k] = max_pos;
 
-  return table;
+  /* Local symbols are ignored.  */
+  return ELFW(ST_BIND) (sym->st_info) != STB_LOCAL;
 }
 
 static int
-num_dynsyms (const struct link_map *const map)
+last_gnu_hash_bucket (const struct link_map *map)
 {
-  if (map->l_info[DT_HASH] != NULL)
-    {
-      /* If DT_HASH is present, we unconditionally use it, since it already
-         has the answer precomputed and waiting for us.  */
-      const Elf_Symndx *const hash = (void *) D_PTR (map, l_info[DT_HASH]);
+  /* The best documentation for GNU_HASH I found:
+     http://www.linker-aliens.org/blogs/ali/entry/gnu_hash_elf_sections/
 
-      if (hash != NULL)
-	{
-	  /* DT_HASH dynamic structure points to a symbol hash table
-	     with the following layout:
+     _dl_setup_hash() has already set things up for us.  */
 
-	     nbucket
-	     nchain       (== number of symbols in dynamic symbol table)
-	     bucket[0]
-	     ...
+  if (__builtin_expect (map->l_nbuckets < 1, 0))
+    /* Paranoia: neither gold nor gnu-ld will construct an empty
+       .gnu.hash, but some other linker just might.  */
+    return 0;
 
-	     http://refspecs.freestandards.org/elf/TIS1.1.pdf (p. 2.19). */
+  /* In the GNU hash the symbol index of a symbol is determined by
+     the offset of the symbol's entry in the hash table itself.
 
-	  return hash[1];
-	}
-    }
-  else if (map->l_info[ADDRIDX (DT_GNU_HASH)] != NULL)
-    {
-      /* The best documentation for GNU_HASH I found:
-	 http://blogs.sun.com/ali/entry/gnu_hash_elf_sections
+     We start at the last bucket map->l_gnu_buckets[map->l_nbuckets-1]
+     and loop backward from the end until we find a bucket which is
+     not zero.  */
 
-	 _dl_setup_hash() has already set things up for us.  */
+  int last_bucket_idx = map->l_nbuckets - 1;
 
-      if (__builtin_expect (map->l_nbuckets < 1, 0))
-	{
-	  /* Paranoia: neither gold nor gnu-ld will construct an empty
-	     .gnu.hash, but some other linker just might.  */
-	  return 0;
-	}
+  while (last_bucket_idx > 0 &&
+         map->l_gnu_buckets[last_bucket_idx] == 0)
+    --last_bucket_idx;
 
-      /* In the GNU hash the symbol index of a symbol is determined by
-	 the offset of the symbol's entry in the hash table itself.
+  return map->l_gnu_buckets[last_bucket_idx];
+}
 
-	 We start at the last bucket map->l_gnu_chain_zero[map->l_nbuckets-1]
-	 and loop backward from the end until we find a bucket which is
-	 not zero.
+static void
+position_hash_fill_from_gnu_hash (const struct link_map *map,
+                                  int pos_in_main_map)
+{
+  int last_bucket = last_gnu_hash_bucket (map);
+  if (last_bucket == 0)
+    return;
 
-	 Then we walk forward to find the last entry in the chain which
-	 starts at that bucket.  The terminating entry gives us desired
-	 symbol index.  */
+  /* Start of hash values.  */
+  const Elf32_Word *chains = &map->l_gnu_buckets[map->l_nbuckets];
 
-      int last_bucket_idx = map->l_nbuckets - 1;
+  /* Reconstruct the number of symbols omitted.  */
+  const Elf32_Word *zero = map->l_gnu_chain_zero;
+  int symbias = chains - zero;
 
-      while (last_bucket_idx > 0 &&
-	     map->l_gnu_buckets[last_bucket_idx] == 0)
-	{
-	  --last_bucket_idx;
-	}
+  const ElfW(Sym) *const symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]);
+  const char *const strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
 
-      const Elf32_Word last_bucket = map->l_gnu_buckets[last_bucket_idx];
+  /* Iterate through the symbols in the GNU hash chains and insert them
+     into the position hash table.  If the symbol wouldn't pass the tests
+     in do_lookup_x(), then don't insert it.  */
+  for (int k = symbias; ; ++k)
+    {
+      uint32_t hash = zero[k];
+      const ElfW(Sym) *sym = &symtab[k];
+      if (position_hash_include_symbol (sym))
+        position_hash_insert (hash >> 1, strtab + sym->st_name, pos_in_main_map);
 
-      if (__builtin_expect (last_bucket == 0, 0))
-	{
-	  /* This shouldn't happen unless the library has only static
-	     variables with global ctors, and nothing else.  */
-	  return 0;
-	}
+      /* Stop when we've passed the start of the last chain and the sentinel bit
+         terminating a chain is set.  This is the end of the section.  */
+      if (k >= last_bucket && (hash & 1))
+        break;
+    }
+}
 
-      /* Start of the hash chain for the last non-zero bucket.  */
-      const Elf32_Word *hasharr = &map->l_gnu_chain_zero[last_bucket];
+static int
+old_hash_nchain (const struct link_map *map)
+{
+  if (map->l_info[DT_HASH] == NULL)
+    return 0;
+  const Elf_Symndx *const hash = (void *) D_PTR (map, l_info[DT_HASH]);
+  if (hash == NULL)
+    return 0;
+  /* Read the "nchain" field of the DT_HASH section; see
+     https://refspecs.linuxfoundation.org/elf/gabi4+/ch5.dynamic.html#hash. */
+  return hash[1];
+}
 
-      /* The chain is terminated by an entry with LSBit set.  */
-      while ((*hasharr & 1u) == 0)
-	++hasharr;
+static void
+position_hash_fill_from_symtab (const struct link_map *const map,
+                                int pos_in_main_map)
+{
+  const ElfW(Sym) *const symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]);
+  const char *const strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
+  const int num_symbols = old_hash_nchain (map);
 
-      /* Size of dynamic symbol table is "index of last symbol" + 1.  */
-      return hasharr - map->l_gnu_chain_zero + 1;
+  /* Iterate through the symbols defined in this map and insert them
+     into the position hash table.  If the symbol wouldn't pass the tests
+     in do_lookup_x(), then don't insert it.  */
+  for (int k = 0; k < num_symbols; k++)
+    {
+      const ElfW(Sym) *sym = &symtab[k];
+      if (position_hash_include_symbol (sym))
+        {
+          uint32_t hash = dl_new_hash (strtab + sym->st_name);
+          position_hash_insert (hash >> 1, strtab + sym->st_name, pos_in_main_map);
+        }
     }
-
-  return 0;
 }
 
 /* Create the fastload position hash (if requested).  Given a link_map
@@ -677,8 +606,8 @@ _dl_fill_position_hash (struct link_map *main_map)
      fill in the hash table of earliest known positions for symbols in
      main_map.  We'll use this in do_lookup_x().  */
 
-  /* If cutoff or size is negative, fastload is disabled.  */
-  if (GLRO(dl_position_hash_cutoff) < 0 || GLRO(dl_position_hash_bits) < 0)
+  /* If cutoff is negative, fastload is disabled.  */
+  if (GLRO(dl_position_hash_cutoff) < 0)
     {
       if (__builtin_expect (debug_fastload, 0))
         _dl_debug_printf ("fastload: disabled (configuration)\n");
@@ -695,83 +624,31 @@ _dl_fill_position_hash (struct link_map *main_map)
       return;
     }
 
-  int *ndynsyms = NULL;
+  int timing = (HP_TIMING_AVAIL) && (GLRO(dl_debug_mask) & DL_DEBUG_STATISTICS);
+  hp_timing_t start, stop, elapsed;
+  if (timing)
+    HP_TIMING_NOW (start);
 
-  if (GLRO(dl_position_hash_bits) == 0)
-    {
-      /* Table size has not been specified via LD_FASTLOAD_HASH_BITS.
-         Compute appropriate size for <= 25% load factor.  */
-
-      int total_symbols = 0;
-
-      /* It is safe to call alloca() here, because we are executing in
-	 the context of dl_main, i.e. we are on the main stack, and are
-	 not yet using much of it.  Assuming sane upper limit of 10K direct
-	 dependencies, we'll use at most 40K bytes here.  */
-      ndynsyms = alloca (num_objects * sizeof (int));
-
-      for (k = 0; k < num_objects; ++k)
-        {
-          const struct link_map *const map = main_map->l_searchlist.r_list[k];
-	  const int nsyms = num_dynsyms (map);
+  position_hash_init (num_objects);
 
-	  ndynsyms[k] = nsyms;
-	  total_symbols += nsyms;
-	  if (__builtin_expect (debug_fastload, 0))
-	    _dl_debug_printf ("fastload: %s: %u symbols\n", map->l_name, nsyms);
-	}
-
-      /* Ensure table size is >= 4 * total_symbols.  Generally this
-         over-sizes the table by more than 4, since there usually are lots
-         of duplicate symbols (from different DSOs) as well.  */
-
-      GLRO(dl_position_hash_bits)
-        = (8 * sizeof (total_symbols)) - __builtin_clz (total_symbols) + 2;
-
-      if (__builtin_expect (debug_fastload, 0))
-        _dl_debug_printf ("fastload: will use %u hash bits for %u total "
-                          "symbols\n",
-                          GLRO(dl_position_hash_bits), total_symbols);
-    }
-
-  /* If the requested or computed table size is too large, limit it.  */
-  if (GLRO(dl_position_hash_bits) > DL_POSITION_HASH_BITS_MAX)
-    {
-      if (__builtin_expect (debug_fastload, 0))
-        _dl_debug_printf ("fastload: requested table too large"
-                          " (%u > max %u bits)\n",
-                          GLRO(dl_position_hash_bits),
-                          DL_POSITION_HASH_BITS_MAX);
-      GLRO(dl_position_hash_bits) = DL_POSITION_HASH_BITS_MAX;
-    }
-
-  int table_size = 1 << GLRO(dl_position_hash_bits);
-  GLRO(dl_position_hash_mask) = table_size - 1;
-  if (__builtin_expect (debug_fastload, 0))
-    _dl_debug_printf ("fastload: enabled with %u entry table\n", table_size);
-
-  int max_pos = num_objects < DL_POSITION_MAX ? num_objects : DL_POSITION_MAX;
-  GLRO(dl_position_hash_table) = allocate_position_hash_table (table_size,
-							       max_pos);
-  if (GLRO(dl_position_hash_table) == NULL)
-    {
-      if (__builtin_expect (debug_fastload, 0))
-	_dl_debug_printf ("fastload: failed to allocate table\n");
-      return;
-    }
-
-  /* There is no point in going beyond max_pos: the rest of the table
-     has already been saturated with max_pos value.  */
-  for (k = 0; k < max_pos; ++k)
+  for (k = 0; k < num_objects; ++k)
     {
       const struct link_map *const map = main_map->l_searchlist.r_list[k];
 
-      /* Reuse num_dynsyms() result we computed earlier if possible:
-	 recomputing it may be somewhat expensive with DT_GNU_HASH.  */
-      const int nsyms = ndynsyms ? ndynsyms[k] : num_dynsyms (map);
+      if (map->l_info[ADDRIDX (DT_GNU_HASH)] != NULL)
+        position_hash_fill_from_gnu_hash (map, k);
+      else
+        position_hash_fill_from_symtab (map, k);
 
-      fill_hash_table (map, k, nsyms);
     }
+
+  if (!timing)
+    return;
+  HP_TIMING_NOW (stop);
+  HP_TIMING_DIFF (elapsed, start, stop);
+  char buf[80];
+  HP_TIMING_PRINT (buf, sizeof buf, elapsed);
+  _dl_debug_printf ("\t  time to build fastload table: %s\n", buf);
 }
 
 /* Inner part of the lookup functions.  We return a value > 0 if we
@@ -795,13 +672,13 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash,
 
   if (scope == GL(dl_ns)[LM_ID_BASE]._ns_main_searchlist && i == 0)
     {
-      const int skip_to = earliest_pos_from_hash_table (undef_name);
+      const int skip_to = position_hash_lookup (new_hash >> 1, undef_name);
 
       if (skip_to < n)
 	{
 	  i = skip_to;
 	  if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
-	    _dl_debug_printf ("fastload %s skipping to %u\n",
+	    _dl_debug_printf ("fastload: lookup %s skipping to %u\n",
 			      undef_name, (unsigned int) i);
 	}
       else
@@ -812,7 +689,7 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash,
 	  assert (skip_to == n);
 
 	  if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
-	    _dl_debug_printf ("fastload %s, %u >= %u (scope->r_nlist)\n",
+	    _dl_debug_printf ("fastload: lookup %s, %u >= %u (scope->r_nlist)\n",
 			      undef_name, (unsigned int) skip_to,
 			      (unsigned int) n);
 	  return 0;
@@ -1023,16 +900,6 @@ skip:
 }
 
 
-static uint_fast32_t
-dl_new_hash (const char *s)
-{
-  uint_fast32_t h = 5381;
-  for (unsigned char c = *s; c != '\0'; c = *++s)
-    h = h * 33 + c;
-  return h & 0xffffffff;
-}
-
-
 /* Add extra dependency on MAP to UNDEF_MAP.  */
 static int
 add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
diff --git a/elf/dl-object.c b/elf/dl-object.c
index b37bcc1295..7becedf9b3 100644
--- a/elf/dl-object.c
+++ b/elf/dl-object.c
@@ -34,9 +34,7 @@ _dl_add_to_namespace_list (struct link_map *new, Lmid_t nsid)
 
   if (GL(dl_ns)[nsid]._ns_loaded != NULL)
     {
-      struct link_map *l = GL(dl_ns)[nsid]._ns_loaded;
-      while (l->l_next != NULL)
-	l = l->l_next;
+      struct link_map *l = _dl_last_entry (&GL(dl_ns)[nsid]);
       new->l_prev = l;
       /* new->l_next = NULL;   Would be necessary but we use calloc.  */
       l->l_next = new;
@@ -47,12 +45,13 @@ _dl_add_to_namespace_list (struct link_map *new, Lmid_t nsid)
   new->l_serial = GL(dl_load_adds);
   ++GL(dl_load_adds);
 
+  _dl_hash_add_object (new);
+
   __rtld_lock_unlock_recursive (GL(dl_load_write_lock));
 }
 
 
-/* Allocate a `struct link_map' for a new object being loaded,
-   and enter it into the _dl_loaded list.  */
+/* Allocate a `struct link_map' for a new object being loaded.  */
 struct link_map *
 _dl_new_object (char *realname, const char *libname, int type,
 		struct link_map *loader, int mode, Lmid_t nsid)
diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c
index b2a01ede62..54a226531b 100644
--- a/elf/dl-sort-maps.c
+++ b/elf/dl-sort-maps.c
@@ -33,11 +33,32 @@ _dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used,
   unsigned int i = 0;
   uint16_t seen[nmaps];
   memset (seen, 0, nmaps * sizeof (seen[0]));
-  while (1)
+
+  /* Mark objects with in-edges.  */
+  for (unsigned j = i; j < nmaps; ++j)
+    maps[j]->l_inedge = 0;
+
+  for (unsigned j = i; j < nmaps; ++j)
+    {
+      for (struct link_map **p = maps[j]->l_initfini; p && *p; ++p)
+        {
+          if (*p != maps[j])  /* Skip self-edges.  */
+            (*p)->l_inedge = 1;
+        }
+    }
+
+  while (i < nmaps)
     {
+      struct link_map *thisp = maps[i];
+      if (!thisp->l_inedge)
+        {
+          /* No dependencies on this object.  */
+          ++i;
+          continue;
+        }
+
       /* Keep track of which object we looked at this round.  */
       ++seen[i];
-      struct link_map *thisp = maps[i];
 
       if (__glibc_unlikely (for_fini))
 	{
@@ -52,7 +73,7 @@ _dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used,
 	 with the dependency.  */
       unsigned int k = nmaps - 1;
       while (k > i)
-	{
+        {
 	  struct link_map **runp = maps[k]->l_initfini;
 	  if (runp != NULL)
 	    /* Look through the dependencies of the object.  */
diff --git a/elf/dl-support.c b/elf/dl-support.c
index a097f22a16..e0d464a5fa 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -143,10 +143,7 @@ hp_timing_t _dl_cpuclock_offset;
 void (*_dl_init_static_tls) (struct link_map *) = &_dl_nothread_init_static_tls;
 
 /* The merged position hash table used if we have a lot of shared objects.  */
-dl_position_table_entry_t *_dl_position_hash_table;
-int _dl_position_hash_mask;
 int _dl_position_hash_cutoff = DL_POSITION_HASH_CUTOFF_DEFAULT;
-int _dl_position_hash_bits;
 
 size_t _dl_pagesize = EXEC_PAGESIZE;
 
diff --git a/elf/rtld.c b/elf/rtld.c
index a0b22c4bb3..9c942add0b 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -292,7 +292,6 @@ struct rtld_global_ro _rtld_global_ro attribute_relro =
     ._dl_close = _dl_close,
     ._dl_tls_get_addr_soft = _dl_tls_get_addr_soft,
     ._dl_position_hash_cutoff = DL_POSITION_HASH_CUTOFF_DEFAULT,
-    ._dl_position_hash_bits = 0,
 #ifdef HAVE_DL_DISCOVER_OSVERSION
     ._dl_discover_osversion = _dl_discover_osversion
 #endif
@@ -1382,6 +1381,7 @@ of this helper program; chances are you did not intend to run this program.\n\
   GL(dl_rtld_map).l_type = lt_library;
   main_map->l_next = &GL(dl_rtld_map);
   GL(dl_rtld_map).l_prev = main_map;
+  _dl_hash_add_object (&GL(dl_rtld_map));
   ++GL(dl_ns)[LM_ID_BASE]._ns_nloaded;
   ++GL(dl_load_adds);
 
@@ -2703,12 +2703,6 @@ process_envvars (enum mode *modep)
 	    GLRO(dl_position_hash_cutoff)
               = _dl_strtoul (&envline[16], NULL);;
           break;
-
-        case 18:
-          if (memcmp (envline, "FASTLOAD_HASH_BITS", 18) == 0)
-	    GLRO(dl_position_hash_bits)
-              = _dl_strtoul (&envline[19], NULL);;
-          break;
         }
     }
 
diff --git a/include/link.h b/include/link.h
index 22c3eec73d..8e4ec17e46 100644
--- a/include/link.h
+++ b/include/link.h
@@ -180,8 +180,7 @@ struct link_map
     unsigned int l_reserved:2;	/* Reserved for internal use.  */
     unsigned int l_phdr_allocated:1; /* Nonzero if the data structure pointed
 					to by `l_phdr' is allocated.  */
-    unsigned int l_soname_added:1; /* Nonzero if the SONAME is for sure in
-				      the l_libname list.  */
+    unsigned int l_inedge:1;    /* Used temporarily when sorting deps.  */
     unsigned int l_faked:1;	/* Nonzero if this is a faked descriptor
 				   without associated file.  */
     unsigned int l_need_tls_init:1; /* Nonzero if GL(dl_init_static_tls)
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 329a367ded..32647f825a 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -589,23 +589,14 @@ struct rtld_global_ro
      below.  */
   EXTERN struct r_search_path_elem *_dl_init_all_dirs;
 
-  /* The merged hash table used if we have a lot of shared objects. */
-  EXTERN dl_position_table_entry_t *_dl_position_hash_table;
-  EXTERN int _dl_position_hash_mask;
-  EXTERN int _dl_position_hash_bits;
   EXTERN int _dl_position_hash_cutoff;
 
-#define DL_POSITION_HASH_BITS_MAX       27  /* (1 << 27) entries.  */
 #if defined(__powerpc__) && !defined(__powerpc64__)
 #define DL_POSITION_HASH_CUTOFF_DEFAULT -1  /* Disabled.  */
 #else
 #define DL_POSITION_HASH_CUTOFF_DEFAULT 32  /* > 32 shared libs.  */
 #endif
 
-  /* Colon-separated list of absolute paths to ld.so.cache files
-     we'll load.  */
-  EXTERN const char *_google_ld_so_cache_list;
-
 #if HP_SMALL_TIMING_AVAIL || HP_TIMING_PAD
   /* Overhead of a high-precision timing measurement.  */
   EXTERN hp_timing_t _dl_hp_timing_overhead;
@@ -969,6 +960,12 @@ extern lookup_t _dl_lookup_symbol_x (const char *undef,
 extern void _dl_add_to_namespace_list (struct link_map *new, Lmid_t nsid)
      attribute_hidden;
 
+/* Update hashtables when objects are loaded or unloaded.  */
+extern void _dl_hash_add_object (struct link_map *lib)
+    attribute_hidden;
+extern void _dl_hash_del_object (struct link_map *lib)
+    attribute_hidden;
+
 /* Allocate a `struct link_map' for a new object being loaded.  */
 extern struct link_map *_dl_new_object (char *realname, const char *libname,
 					int type, struct link_map *loader,
@@ -1257,6 +1254,15 @@ _dl_last_entry (struct link_namespaces *ns)
   return map;
 }
 
+static inline uint_fast32_t
+dl_new_hash (const char *s)
+{
+  uint_fast32_t h = 5381;
+  for (unsigned char c = *s; c != '\0'; c = *++s)
+    h = h * 33 + c;
+  return h & 0xffffffff;
+}
+
 __END_DECLS
 
 #endif /* ldsodefs.h */


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [glibc/maskray/grte] Redesign the fastload support for additional performance
@ 2021-08-28  0:27 Fangrui Song
  0 siblings, 0 replies; 3+ messages in thread
From: Fangrui Song @ 2021-08-28  0:27 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=69446cda28ba1e72347368b62d0184090a05d617

commit 69446cda28ba1e72347368b62d0184090a05d617
Author: Ambrose Feinstein <ambrose@google.com>
Date:   Wed Sep 11 14:53:28 2019 -0700

    Redesign the fastload support for additional performance

Diff:
---
 elf/dl-close.c             |   3 +
 elf/dl-load.c              | 248 +++++++++++++++----
 elf/dl-lookup.c            | 583 +++++++++++++++++----------------------------
 elf/dl-object.c            |   9 +-
 elf/dl-sort-maps.c         |  27 ++-
 elf/dl-support.c           |   3 -
 elf/rtld.c                 |   8 +-
 include/link.h             |   3 +-
 sysdeps/generic/ldsodefs.h |  24 +-
 9 files changed, 474 insertions(+), 434 deletions(-)

diff --git a/elf/dl-close.c b/elf/dl-close.c
index cbf336609b..3960559b72 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -723,6 +723,9 @@ _dl_close_worker (struct link_map *map, bool force)
 	    _dl_debug_printf ("\nfile=%s [%lu];  destroying link map\n",
 			      imap->l_name, imap->l_ns);
 
+	  /* Remove from hashtables.  */
+	  _dl_hash_del_object (imap);
+
 	  /* This name always is allocated.  */
 	  free (imap->l_name);
 	  /* Remove the list with all the names of the shared object.  */
diff --git a/elf/dl-load.c b/elf/dl-load.c
index f78140a441..1ae72aa325 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -357,8 +357,7 @@ expand_dynamic_string_token (struct link_map *l, const char *s)
 
 /* Add `name' to the list of names for a particular shared object.
    `name' is expected to have been allocated with malloc and will
-   be freed if the shared object already has this name.
-   Returns false if the object already had this name.  */
+   be freed if the shared object already has this name.  */
 static void
 add_name_to_object (struct link_map *l, const char *name)
 {
@@ -804,6 +803,177 @@ lose (int code, int fd, const char *name, char *realname, struct link_map *l,
 }
 
 
+/* Hash tables of loaded objects; one keyed on name, one keyed on inode.  */
+
+struct lib_hash_namenode
+{
+  const char *name;
+  struct link_map *lib;
+  struct lib_hash_namenode *next;
+  uint32_t hash;
+};
+
+struct lib_hash_filenode
+{
+  struct link_map *lib;
+  struct lib_hash_filenode *next;
+  uint32_t hash;
+};
+
+/* Must be a power of 2.  */
+#define LIB_HASH_BUCKETS 256
+
+static struct lib_hash_namenode *lib_hash_nametable[LIB_HASH_BUCKETS];
+static struct lib_hash_filenode *lib_hash_filetable[LIB_HASH_BUCKETS];
+
+static size_t
+lib_hash_bucket (uint32_t hash)
+{
+  return hash & (LIB_HASH_BUCKETS-1);
+}
+
+static uint32_t
+hash_mix (uint32_t h, size_t val)
+{
+  h ^= val;
+  h *= 0x5bd1e995;
+  h ^= h >> 15;
+  val >>= 4 * sizeof val;
+  h ^= val;
+  h *= 0x5bd1e995;
+  h ^= h >> 15;
+  return h;
+}
+
+static struct link_map *
+lib_hash_getname (Lmid_t nsid, const char *name)
+{
+  uint32_t hash = hash_mix (dl_new_hash (name), nsid);
+  struct lib_hash_namenode *n = lib_hash_nametable[lib_hash_bucket (hash)];
+  while (n)
+    {
+      struct link_map *l = n->lib;
+      if (n->hash == hash && !strcmp (n->name, name) && l->l_ns == nsid && !l->l_removed)
+        return l;
+      n = n->next;
+    }
+  return NULL;
+}
+
+static void
+lib_hash_addname (const char *name, struct link_map *lib)
+{
+  if (lib->l_faked)
+    return;
+  struct lib_hash_namenode *n = malloc (sizeof (struct lib_hash_namenode));
+  if (!n)
+    _dl_fatal_printf ("%s: out of memory\n", __func__);
+  uint32_t hash = hash_mix (dl_new_hash (name), lib->l_ns);
+  struct lib_hash_namenode **p = lib_hash_nametable + lib_hash_bucket (hash);
+  n->name = name;
+  n->lib = lib;
+  n->next = *p;
+  n->hash = hash;
+  *p = n;
+}
+
+static void
+lib_hash_delname (const char *name, struct link_map *lib)
+{
+  uint32_t hash = hash_mix (dl_new_hash (name), lib->l_ns);
+  struct lib_hash_namenode **p = lib_hash_nametable + lib_hash_bucket (hash);
+  while (*p)
+    {
+      struct lib_hash_namenode *n = *p;
+      if (n->hash == hash && n->lib == lib &&
+          (n->name == name || !strcmp (n->name, name))) {
+        *p = n->next;
+        free (n);
+        return;
+      }
+      p = &n->next;
+    }
+  _dl_fatal_printf ("%s: can't unhash '%s'\n", __func__, name);
+}
+
+static uint32_t
+hash_file (Lmid_t nsid, dev_t dev, ino64_t ino, off_t off)
+{
+  return hash_mix (hash_mix (hash_mix (hash_mix (0, nsid), dev), ino), off);
+}
+
+static struct link_map *
+lib_hash_getfile (Lmid_t nsid, dev_t dev, ino64_t ino, off_t off)
+{
+  uint32_t hash = hash_file (nsid, dev, ino, off);
+  struct lib_hash_filenode *n = lib_hash_filetable[lib_hash_bucket (hash)];
+  while (n)
+    {
+      struct link_map *l = n->lib;
+      if (n->hash == hash && !l->l_removed && l->l_ns == nsid &&
+          l->l_file_id.dev == dev && l->l_file_id.ino == ino && l->l_off == off)
+        return l;
+      n = n->next;
+    }
+  return NULL;
+}
+
+static void
+lib_hash_addfile (struct link_map *lib)
+{
+  if (lib->l_faked)
+    return;
+  struct lib_hash_filenode *n = malloc (sizeof (struct lib_hash_filenode));
+  if (!n)
+    _dl_fatal_printf ("%s: out of memory\n", __func__);
+  uint32_t hash = hash_file (lib->l_ns, lib->l_file_id.dev, lib->l_file_id.ino, lib->l_off);
+  struct lib_hash_filenode **p = lib_hash_filetable + lib_hash_bucket (hash);
+  n->lib = lib;
+  n->next = *p;
+  n->hash = hash;
+  *p = n;
+}
+
+static void
+lib_hash_delfile (struct link_map *lib)
+{
+  uint32_t hash = hash_file (lib->l_ns, lib->l_file_id.dev, lib->l_file_id.ino, lib->l_off);
+  struct lib_hash_filenode **p = lib_hash_filetable + lib_hash_bucket (hash);
+  while (*p)
+    {
+      struct lib_hash_filenode *n = *p;
+      if (n->hash == hash && n->lib == lib)
+        {
+          *p = n->next;
+          free (n);
+          return;
+        }
+      p = &n->next;
+    }
+  _dl_fatal_printf ("%s: can't unhash '%s'\n", __func__, lib->l_name);
+}
+
+void
+_dl_hash_add_object (struct link_map *lib)
+{
+  lib_hash_addfile (lib);
+  lib_hash_addname (lib->l_name, lib);
+  for (struct libname_list *ln = lib->l_libname; ln; ln = ln->next)
+    {
+      lib_hash_addname (ln->name, lib);
+    }
+}
+
+void
+_dl_hash_del_object (struct link_map *lib)
+{
+  lib_hash_delfile (lib);
+  lib_hash_delname (lib->l_name, lib);
+  for (struct libname_list *ln = lib->l_libname; ln; ln = ln->next) {
+    lib_hash_delname (ln->name, lib);
+  }
+}
+
 /* Map in the shared object NAME, actually located in REALNAME, and already
    opened on FD.  */
 
@@ -841,28 +1011,34 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, off_t of
     }
 
   /* Look again to see if the real name matched another already loaded.  */
-  for (l = GL(dl_ns)[nsid]._ns_loaded; l != NULL; l = l->l_next)
-    if (!l->l_removed && _dl_file_id_match_p (&l->l_file_id, &id)
-        && l->l_off == offset)
-      {
-	/* The object is already loaded.
-	   Just bump its reference count and return it.  */
-	__close (fd);
-
-	/* If the name is not in the list of names for this object add
-	   it.  */
-	free (realname);
-	if (offset == 0)
-	  /* If offset!=0, foo.so/@0x<offset> should be the *only*
-	     name for this object. b/20141439.  */
-	  add_name_to_object (l, name);
-
-	return l;
-      }
+  l = lib_hash_getfile (nsid, id.dev, id.ino, offset);
+  if (l)
+    {
+      /* The object is already loaded.
+         Just bump its reference count and return it.  */
+      __close (fd);
+
+      /* If the name is not in the list of names for this object add
+         it.  */
+      free (realname);
+      if (offset == 0)
+        {
+          /* If offset!=0, foo.so/@0x<offset> should be the *only*
+             name for this object. b/20141439.  */
+          add_name_to_object (l, name);
+          lib_hash_addname (name, l);
+        }
+
+      return l;
+    }
 
 #ifdef SHARED
   /* When loading into a namespace other than the base one we must
      avoid loading ld.so since there can only be one copy.  Ever.  */
+
+  /* Note that this test is wrong in two ways:
+       1) it doesn't consider offset, and
+       2) l_file_id.ino and l_file_id.dev for rtld are always zero!  */
   if (__glibc_unlikely (nsid != LM_ID_BASE)
       && (_dl_file_id_match_p (&id, &GL(dl_rtld_map).l_file_id)
 	  || _dl_name_match_p (name, &GL(dl_rtld_map))))
@@ -1349,10 +1525,7 @@ cannot enable executable stack as shared object requires");
 
   l->l_off = offset;
 
-  /* When we profile the SONAME might be needed for something else but
-     loading.  Add it right away.  */
-  if (__glibc_unlikely (GLRO(dl_profile) != NULL)
-      && l->l_info[DT_SONAME] != NULL)
+  if (l->l_info[DT_SONAME] != NULL)
     add_name_to_object (l, ((const char *) D_PTR (l, l_info[DT_STRTAB])
 			    + l->l_info[DT_SONAME]->d_un.d_val));
 
@@ -1909,26 +2082,7 @@ match_one (const char *name, struct link_map *l)
   if (__builtin_expect (l->l_faked, 0) != 0
       || __builtin_expect (l->l_removed, 0) != 0)
     return 0;
-  if (!_dl_name_match_p (name, l))
-    {
-      const char *soname;
-
-      if (__builtin_expect (l->l_soname_added, 1)
-	  || l->l_info[DT_SONAME] == NULL)
-	return 0;
-
-      soname = ((const char *) D_PTR (l, l_info[DT_STRTAB])
-		+ l->l_info[DT_SONAME]->d_un.d_val);
-      if (strcmp (name, soname) != 0)
-	return 0;
-
-      /* We have a match on a new name -- cache it.  */
-      add_name_to_object (l, soname);
-      l->l_soname_added = 1;
-    }
-
-  /* We have a match.  */
-  return 1;
+  return _dl_name_match_p (name, l);
 }
 
 /* Map in the shared object file NAME.  */
@@ -1960,9 +2114,9 @@ _dl_map_object (struct link_map *loader, const char *name, off_t offset,
     }
   else
     {
-      for (l = _dl_last_entry (&GL(dl_ns)[nsid]); l; l = l->l_prev)
-	if (match_one (name, l))
-	  return l;
+      l = lib_hash_getname(nsid, name);
+      if (l)
+        return l;
     }
 
   /* Display information if we are debugging.  */
diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c
index 06db4e2b94..360f9ea3dc 100644
--- a/elf/dl-lookup.c
+++ b/elf/dl-lookup.c
@@ -25,6 +25,7 @@
 #include <dl-hash.h>
 #include <dl-machine.h>
 #include <sysdep-cancel.h>
+#include <hp-timing.h>
 #include <libc-lock.h>
 #include <tls.h>
 #include <atomic.h>
@@ -330,335 +331,263 @@ do_lookup_unique (const char *undef_name, uint_fast32_t new_hash,
   result->m = (struct link_map *) map;
 }
 
-/* Bob Jenkins's hash function, free for any use.
-   http://burtleburtle.net/bob/hash/  */
-#define mix(a,b,c) \
-{ \
-  a -= b; a -= c; a ^= (c>>13); \
-  b -= c; b -= a; b ^= (a<<8); \
-  c -= a; c -= b; c ^= (b>>13); \
-  a -= b; a -= c; a ^= (c>>12);  \
-  b -= c; b -= a; b ^= (a<<16); \
-  c -= a; c -= b; c ^= (b>>5); \
-  a -= b; a -= c; a ^= (c>>3);  \
-  b -= c; b -= a; b ^= (a<<10); \
-  c -= a; c -= b; c ^= (b>>15); \
-}
-
-typedef unsigned int uint32 __attribute__ ((mode (SI)));
-static uint32
-hash_with_seed (const char *k, uint32 length, uint32 seed)
-{
-   uint32 a, b, c, len;
-
-   /* Set up the internal state */
-   len = length;
-   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
-   c = seed;            /* the previous hash value */
-
-   /*---------------------------------------- handle most of the key */
-   while (len >= 12)
-     {
-       a += (k[0] +((uint32)k[1]<<8) +((uint32)k[2]<<16) +((uint32)k[3]<<24));
-       b += (k[4] +((uint32)k[5]<<8) +((uint32)k[6]<<16) +((uint32)k[7]<<24));
-       c += (k[8] +((uint32)k[9]<<8) +((uint32)k[10]<<16)+((uint32)k[11]<<24));
-       mix (a,b,c);
-       k += 12;
-       len -= 12;
-     }
-
-   /*------------------------------------- handle the last 11 bytes */
-   c += length;
-   switch (len)              /* all the case statements fall through */
-     {
-     case 11: c+=((uint32)k[10]<<24);
-     case 10: c+=((uint32)k[9]<<16);
-     case 9 : c+=((uint32)k[8]<<8);
-         /* the first byte of c is reserved for the length */
-     case 8 : b+=((uint32)k[7]<<24);
-     case 7 : b+=((uint32)k[6]<<16);
-     case 6 : b+=((uint32)k[5]<<8);
-     case 5 : b+=k[4];
-     case 4 : a+=((uint32)k[3]<<24);
-     case 3 : a+=((uint32)k[2]<<16);
-     case 2 : a+=((uint32)k[1]<<8);
-     case 1 : a+=k[0];
-       /* case 0: nothing left to add */
-     }
-   mix (a,b,c);
-   /*-------------------------------------------- report the result */
-   return c;
-}
-
 
 /* A hash table used to speed up lookups when we have a lot of shared
    libraries.  We record in the table how many ELF objects in the link
    map we can safely skip, because they are known to not contain the symbol
    we are looking for.
 
-   Values start out at N (num objects in the main_map).  We then go through
-   each object (executable or DSO) in their order in the main_map and insert
-   pos_in_map_map into the table.
+   We go through each object (executable or DSO) in their order in the
+   main_map and insert pos_in_main_map into the table.  Searches for
+   symbols not present in the table start at num_objects, skipping every
+   object preprocessed in this way.
 
    If there is already an earlier (smaller) entry in the table, we leave it
-   alone.  It could be a hash collision, or it could be an earlier
-   instance of the same symbol (eg weak __stdout definition in
-   multiple objects).
+   alone.  It represents an earlier instance of the same symbol (eg weak
+   __stdout definition in multiple objects).  */
 
-   To save space, the table contains uint16_t entries, and so we can't
-   record main map positions over 65535.
-
-   It turns out that even when only 25% of slots are used, collision
-   rate is around 10%, which is quite high, and causes lookups to be slow.
-
-   So we use a second table (using a different hash seed). Hash collisions
-   using two separate hashes are quite rare.
-
-   Note that we allocate the two tables together (a single block twice
-   the size of one table).  */
+struct position_hash
+{
+  /* The Bernstein hash, shifted right one bit.  (This lets us build a table
+     directly from the precomputed values in DT_GNU_HASH sections, in which
+     the low bit is repurposed to terminate hash chains.)  */
+  uint32_t hash;
+  /* The position of the first library with this symbol in the search list.  */
+  uint32_t pos;
+  /* The symbol name.  */
+  const char *name;
+};
+
+static struct position_hash *position_hash_table;
+static size_t position_hash_table_slots;
+static size_t position_hash_table_slots_occupied;
+static int position_hash_lookup_default;
 
-static void
-insert_into_hash_table_1 (int pos_in_main_map, const char *symbol_name,
-			  size_t name_len, dl_position_table_entry_t *table,
-			  int seed)
+static int
+position_hash_put (struct position_hash *table, size_t slots,
+                   struct position_hash *item)
 {
-  uint32 hash = hash_with_seed (symbol_name, name_len, seed);
-  int pos = hash & GLRO(dl_position_hash_mask);
-  if (pos_in_main_map < table[pos])
+  size_t mask = slots - 1;
+  /* Search for a matching entry or a free slot.  This loop always terminates
+     because we don't allow the hashtable to become completely full.  */
+  for (size_t stride = 0, i = item->hash; ; i += ++stride)
     {
-      table[pos] = pos_in_main_map;
-      if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
-        _dl_debug_printf ("fastload hash for %s: [%u] = %u (table %u)\n",
-                          symbol_name, (unsigned int) pos,
-                          (unsigned int) table[pos],
-                          (GLRO(dl_position_hash_table) == table) ? 0 : 1);
+      struct position_hash *slot = &table[i & mask];
+      if (slot->name == NULL)
+        {
+          if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+            _dl_debug_printf ("fastload:    put %s hash 0x%x pos %u slot 0x%lx\n",
+                              item->name, item->hash, item->pos, slot - table);
+          slot->hash = item->hash;
+          slot->pos = item->pos;
+          slot->name = item->name;
+          return 1;
+        }
+      else if (slot->hash == item->hash &&
+               (slot->name == item->name || !strcmp (slot->name, item->name)))
+        {
+          if (item->pos < slot->pos)
+            slot->pos = item->pos;
+          if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+            _dl_debug_printf ("fastload:    dup %s hash 0x%x pos %u slot 0x%lx\n",
+                              item->name, item->hash, slot->pos, slot - table);
+          return 0;
+        }
     }
-  else
+}
+
+static void
+position_hash_resize (struct position_hash *oldtable,
+                      size_t oldslots, size_t newslots)
+{
+  if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+    _dl_debug_printf ("fastload: resizing hashtable to %lu slots\n", newslots);
+  assert (!oldtable == !oldslots);
+  void *ptr = mmap (NULL, newslots * sizeof *oldtable, PROT_READ|PROT_WRITE,
+                    MAP_ANON|MAP_PRIVATE|MAP_POPULATE, -1, 0);
+  if (ptr == MAP_FAILED)
+    _dl_signal_error (errno, NULL, NULL, "cannot mmap fastload hashtable");
+  struct position_hash *newtable = position_hash_table = ptr;
+  position_hash_table_slots = newslots;
+  if (oldtable == NULL)
+    return;
+  for (size_t i = 0; i < oldslots; ++i)
     {
-      if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
-        _dl_debug_printf
-          ("fastload hash for %s: [%u] already set to %u <= %u (table %u)\n",
-           symbol_name, (unsigned int) pos,
-           (unsigned int) table[pos],
-           (unsigned int) pos_in_main_map,
-           (GLRO(dl_position_hash_table) == table) ? 0 : 1);
+      if (oldtable[i].name)
+        position_hash_put (newtable, newslots, &oldtable[i]);
     }
-}
 
-#define HASH_SEED_A 0
-#define HASH_SEED_B 5381  /* As good as any other value.  */
+  if (munmap (oldtable, oldslots * sizeof *oldtable))
+    _dl_signal_error (errno, NULL, NULL, "cannot munmap fastload hashtable");
+}
 
 static void
-insert_into_hash_table (int pos_in_main_map, const char *symbol_name)
+position_hash_init (int lookup_default)
 {
-  dl_position_table_entry_t *const table1 = GLRO(dl_position_hash_table);
-  const int table_size = GLRO(dl_position_hash_mask) + 1;
-  dl_position_table_entry_t *const table2 = table1 + table_size;
-  const size_t name_len = strlen (symbol_name);
-
-  insert_into_hash_table_1 (pos_in_main_map, symbol_name, name_len, table1,
-			    HASH_SEED_A);
-  insert_into_hash_table_1 (pos_in_main_map, symbol_name, name_len, table2,
-			    HASH_SEED_B);
+  assert (position_hash_table == NULL);
+  position_hash_lookup_default = lookup_default;
+  position_hash_resize (NULL, 0, 8192);
 }
 
-static inline int
-earliest_pos_from_hash_table_1 (const char *symbol_name,
-                                const dl_position_table_entry_t *table,
-                                int seed)
+static void
+position_hash_insert (uint32_t hash, const char *name, uint32_t pos)
 {
-  int pos;
-  uint32 hash;
-
-  hash = hash_with_seed (symbol_name, strlen (symbol_name), seed);
-  pos = hash & GLRO(dl_position_hash_mask);
+  struct position_hash *table = position_hash_table;
   if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
-    _dl_debug_printf
-      ("fastload earliest pos for %s: [%u] = %u (table %u)\n",
-       symbol_name, (unsigned int) pos, (unsigned int) table[pos],
-       (GLRO(dl_position_hash_table) == table) ? 0 : 1);
-
-  return table[pos];
+    _dl_debug_printf ("fastload: insert %s hash 0x%x pos %u\n", name, hash, pos);
+  size_t slots = position_hash_table_slots;
+  struct position_hash item = { hash, pos, name };
+  if (!position_hash_put (table, slots, &item))
+    return;
+  size_t newsize = ++position_hash_table_slots_occupied;
+  if (newsize >= slots / 2)
+    /* The load factor has reached 50%.  Double the table size and rehash.  */
+    position_hash_resize (table, slots, 2 * slots);
 }
 
-static inline int
-earliest_pos_from_hash_table (const char *undef_name)
+static int
+position_hash_lookup (uint32_t hash, const char *name)
 {
-  dl_position_table_entry_t *const table1 = GLRO(dl_position_hash_table);
-
-  if (table1 == NULL)
-    return 0;  /* Search from the beginning of the list.  */
-  else
+  struct position_hash *table = position_hash_table;
+  if (table == NULL)
+    return 0;
+  size_t mask = position_hash_table_slots - 1;
+  for (size_t stride = 0, i = hash; ; i += ++stride)
     {
-      const int table_size = GLRO(dl_position_hash_mask) + 1;
-      dl_position_table_entry_t *const table2 = table1 + table_size;
-      const int skip_1 = earliest_pos_from_hash_table_1 (undef_name, table1,
-                                                         HASH_SEED_A);
-      const int skip_2 = earliest_pos_from_hash_table_1 (undef_name, table2,
-                                                         HASH_SEED_B);
-
-      /* Possibilities:
-
-       * Both skip_1 and skip_2 are positive and same:
-         there were no collisions at insertion in either table, or there
-         was double-collision with symbols from the same library.
-
-         If skip_1 == skip_2 == N (num_directly_loaded_objects), then the
-         slot was empty in both tables after they were filled (i.e. there
-         is no symbol that hashes to corresponding slot).  In that case,
-         searching first [0, N) is pointless. See b/5609692.
-
-         True skip_to == skip_1 == skip_2.
-
-       * Both skip_1 and skip_2 are positive but different:
-         there was a collision in one or both tables, or one of the slots
-         remained unfilled after all symbols have been inserted.
-         It is safe to start at max, true skip_to count can not be less
-         than max.  */
-
-      return (skip_1 < skip_2) ? skip_2 : skip_1;
+      struct position_hash *slot = &table[i & mask];
+      if (slot->hash == hash &&
+          (slot->name == name || !strcmp (slot->name, name)))
+        {
+          if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+            _dl_debug_printf ("fastload:  found %s at slot 0x%lx, pos %u\n",
+                              name, slot - table, slot->pos);
+          return slot->pos;
+        }
+      else if (slot->name == NULL)
+        {
+          if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+            _dl_debug_printf ("fastload: missed %s at slot 0x%lx, default pos %u\n",
+                              name, slot - table, position_hash_lookup_default);
+          return position_hash_lookup_default;
+        }
     }
 }
 
-
-static void
-fill_hash_table (const struct link_map *const map, int pos_in_main_map,
-                 int num_symbols)
+static int
+position_hash_include_symbol (const ElfW(Sym) *sym)
 {
-  const ElfW(Sym) *const symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]);
-  const char *const strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
-  int k;
+  if (sym->st_value == 0 && ELFW(ST_TYPE) (sym->st_info) != STT_TLS)
+    return 0;
 
-  /* Iterate through the symbols defined in this map and insert them
-     into the bloom filter.  If the symbol wouldn't pass the tests in
-     do_lookup_x(), then don't insert it.  */
-  for (k = 0; k < num_symbols; k++)
+  switch (ELFW(ST_TYPE) (sym->st_info))
     {
-      const ElfW(Sym) *sym = &symtab[k];
-
-      if (sym->st_value == 0
-          && ELFW(ST_TYPE) (sym->st_info) != STT_TLS)
-        continue;
-
-      switch (ELFW(ST_TYPE) (sym->st_info))
-	{
-	case STT_SECTION:
-	case STT_FILE:
-	case STT_COMMON:
-	  continue;
-	default:
-	  break;
-	}
-
-      switch (ELFW(ST_BIND) (sym->st_info))
-	{
-	case STB_LOCAL:  /* Local symbols are ignored.  */
-	  continue;
-	default:
-	  break;
-	}
-
-      insert_into_hash_table (pos_in_main_map, strtab + sym->st_name);
+    case STT_SECTION:
+    case STT_FILE:
+    case STT_COMMON:
+      return 0;
     }
-}
-
-static dl_position_table_entry_t *
-allocate_position_hash_table (size_t table_size, int max_pos)
-{
-  /* Implementation detail: we actually allocate two "logical" tables in a
-     single block.  */
-
-  const size_t total_size = 2 * table_size;
-  dl_position_table_entry_t *table
-    = malloc (total_size * sizeof (dl_position_table_entry_t));
-
-  if (table == NULL)
-    return NULL;
-
-  for (int k = 0; k < total_size; ++k)
-    table[k] = max_pos;
 
-  return table;
+  /* Local symbols are ignored.  */
+  return ELFW(ST_BIND) (sym->st_info) != STB_LOCAL;
 }
 
 static int
-num_dynsyms (const struct link_map *const map)
+last_gnu_hash_bucket (const struct link_map *map)
 {
-  if (map->l_info[DT_HASH] != NULL)
-    {
-      /* If DT_HASH is present, we unconditionally use it, since it already
-         has the answer precomputed and waiting for us.  */
-      const Elf_Symndx *const hash = (void *) D_PTR (map, l_info[DT_HASH]);
+  /* The best documentation for GNU_HASH I found:
+     http://www.linker-aliens.org/blogs/ali/entry/gnu_hash_elf_sections/
 
-      if (hash != NULL)
-	{
-	  /* DT_HASH dynamic structure points to a symbol hash table
-	     with the following layout:
+     _dl_setup_hash() has already set things up for us.  */
 
-	     nbucket
-	     nchain       (== number of symbols in dynamic symbol table)
-	     bucket[0]
-	     ...
+  if (__builtin_expect (map->l_nbuckets < 1, 0))
+    /* Paranoia: neither gold nor gnu-ld will construct an empty
+       .gnu.hash, but some other linker just might.  */
+    return 0;
 
-	     http://refspecs.freestandards.org/elf/TIS1.1.pdf (p. 2.19). */
+  /* In the GNU hash the symbol index of a symbol is determined by
+     the offset of the symbol's entry in the hash table itself.
 
-	  return hash[1];
-	}
-    }
-  else if (map->l_info[ADDRIDX (DT_GNU_HASH)] != NULL)
-    {
-      /* The best documentation for GNU_HASH I found:
-	 http://blogs.sun.com/ali/entry/gnu_hash_elf_sections
+     We start at the last bucket map->l_gnu_buckets[map->l_nbuckets-1]
+     and loop backward from the end until we find a bucket which is
+     not zero.  */
 
-	 _dl_setup_hash() has already set things up for us.  */
+  int last_bucket_idx = map->l_nbuckets - 1;
 
-      if (__builtin_expect (map->l_nbuckets < 1, 0))
-	{
-	  /* Paranoia: neither gold nor gnu-ld will construct an empty
-	     .gnu.hash, but some other linker just might.  */
-	  return 0;
-	}
+  while (last_bucket_idx > 0 &&
+         map->l_gnu_buckets[last_bucket_idx] == 0)
+    --last_bucket_idx;
 
-      /* In the GNU hash the symbol index of a symbol is determined by
-	 the offset of the symbol's entry in the hash table itself.
+  return map->l_gnu_buckets[last_bucket_idx];
+}
 
-	 We start at the last bucket map->l_gnu_chain_zero[map->l_nbuckets-1]
-	 and loop backward from the end until we find a bucket which is
-	 not zero.
+static void
+position_hash_fill_from_gnu_hash (const struct link_map *map,
+                                  int pos_in_main_map)
+{
+  int last_bucket = last_gnu_hash_bucket (map);
+  if (last_bucket == 0)
+    return;
 
-	 Then we walk forward to find the last entry in the chain which
-	 starts at that bucket.  The terminating entry gives us desired
-	 symbol index.  */
+  /* Start of hash values.  */
+  const Elf32_Word *chains = &map->l_gnu_buckets[map->l_nbuckets];
 
-      int last_bucket_idx = map->l_nbuckets - 1;
+  /* Reconstruct the number of symbols omitted.  */
+  const Elf32_Word *zero = map->l_gnu_chain_zero;
+  int symbias = chains - zero;
 
-      while (last_bucket_idx > 0 &&
-	     map->l_gnu_buckets[last_bucket_idx] == 0)
-	{
-	  --last_bucket_idx;
-	}
+  const ElfW(Sym) *const symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]);
+  const char *const strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
 
-      const Elf32_Word last_bucket = map->l_gnu_buckets[last_bucket_idx];
+  /* Iterate through the symbols in the GNU hash chains and insert them
+     into the position hash table.  If the symbol wouldn't pass the tests
+     in do_lookup_x(), then don't insert it.  */
+  for (int k = symbias; ; ++k)
+    {
+      uint32_t hash = zero[k];
+      const ElfW(Sym) *sym = &symtab[k];
+      if (position_hash_include_symbol (sym))
+        position_hash_insert (hash >> 1, strtab + sym->st_name, pos_in_main_map);
 
-      if (__builtin_expect (last_bucket == 0, 0))
-	{
-	  /* This shouldn't happen unless the library has only static
-	     variables with global ctors, and nothing else.  */
-	  return 0;
-	}
+      /* Stop when we've passed the start of the last chain and the sentinel bit
+         terminating a chain is set.  This is the end of the section.  */
+      if (k >= last_bucket && (hash & 1))
+        break;
+    }
+}
 
-      /* Start of the hash chain for the last non-zero bucket.  */
-      const Elf32_Word *hasharr = &map->l_gnu_chain_zero[last_bucket];
+static int
+old_hash_nchain (const struct link_map *map)
+{
+  if (map->l_info[DT_HASH] == NULL)
+    return 0;
+  const Elf_Symndx *const hash = (void *) D_PTR (map, l_info[DT_HASH]);
+  if (hash == NULL)
+    return 0;
+  /* Read the "nchain" field of the DT_HASH section; see
+     https://refspecs.linuxfoundation.org/elf/gabi4+/ch5.dynamic.html#hash. */
+  return hash[1];
+}
 
-      /* The chain is terminated by an entry with LSBit set.  */
-      while ((*hasharr & 1u) == 0)
-	++hasharr;
+static void
+position_hash_fill_from_symtab (const struct link_map *const map,
+                                int pos_in_main_map)
+{
+  const ElfW(Sym) *const symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]);
+  const char *const strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
+  const int num_symbols = old_hash_nchain (map);
 
-      /* Size of dynamic symbol table is "index of last symbol" + 1.  */
-      return hasharr - map->l_gnu_chain_zero + 1;
+  /* Iterate through the symbols defined in this map and insert them
+     into the position hash table.  If the symbol wouldn't pass the tests
+     in do_lookup_x(), then don't insert it.  */
+  for (int k = 0; k < num_symbols; k++)
+    {
+      const ElfW(Sym) *sym = &symtab[k];
+      if (position_hash_include_symbol (sym))
+        {
+          uint32_t hash = dl_new_hash (strtab + sym->st_name);
+          position_hash_insert (hash >> 1, strtab + sym->st_name, pos_in_main_map);
+        }
     }
-
-  return 0;
 }
 
 /* Create the fastload position hash (if requested).  Given a link_map
@@ -677,8 +606,8 @@ _dl_fill_position_hash (struct link_map *main_map)
      fill in the hash table of earliest known positions for symbols in
      main_map.  We'll use this in do_lookup_x().  */
 
-  /* If cutoff or size is negative, fastload is disabled.  */
-  if (GLRO(dl_position_hash_cutoff) < 0 || GLRO(dl_position_hash_bits) < 0)
+  /* If cutoff is negative, fastload is disabled.  */
+  if (GLRO(dl_position_hash_cutoff) < 0)
     {
       if (__builtin_expect (debug_fastload, 0))
         _dl_debug_printf ("fastload: disabled (configuration)\n");
@@ -695,83 +624,31 @@ _dl_fill_position_hash (struct link_map *main_map)
       return;
     }
 
-  int *ndynsyms = NULL;
+  int timing = (HP_TIMING_AVAIL) && (GLRO(dl_debug_mask) & DL_DEBUG_STATISTICS);
+  hp_timing_t start, stop, elapsed;
+  if (timing)
+    HP_TIMING_NOW (start);
 
-  if (GLRO(dl_position_hash_bits) == 0)
-    {
-      /* Table size has not been specified via LD_FASTLOAD_HASH_BITS.
-         Compute appropriate size for <= 25% load factor.  */
-
-      int total_symbols = 0;
-
-      /* It is safe to call alloca() here, because we are executing in
-	 the context of dl_main, i.e. we are on the main stack, and are
-	 not yet using much of it.  Assuming sane upper limit of 10K direct
-	 dependencies, we'll use at most 40K bytes here.  */
-      ndynsyms = alloca (num_objects * sizeof (int));
-
-      for (k = 0; k < num_objects; ++k)
-        {
-          const struct link_map *const map = main_map->l_searchlist.r_list[k];
-	  const int nsyms = num_dynsyms (map);
+  position_hash_init (num_objects);
 
-	  ndynsyms[k] = nsyms;
-	  total_symbols += nsyms;
-	  if (__builtin_expect (debug_fastload, 0))
-	    _dl_debug_printf ("fastload: %s: %u symbols\n", map->l_name, nsyms);
-	}
-
-      /* Ensure table size is >= 4 * total_symbols.  Generally this
-         over-sizes the table by more than 4, since there usually are lots
-         of duplicate symbols (from different DSOs) as well.  */
-
-      GLRO(dl_position_hash_bits)
-        = (8 * sizeof (total_symbols)) - __builtin_clz (total_symbols) + 2;
-
-      if (__builtin_expect (debug_fastload, 0))
-        _dl_debug_printf ("fastload: will use %u hash bits for %u total "
-                          "symbols\n",
-                          GLRO(dl_position_hash_bits), total_symbols);
-    }
-
-  /* If the requested or computed table size is too large, limit it.  */
-  if (GLRO(dl_position_hash_bits) > DL_POSITION_HASH_BITS_MAX)
-    {
-      if (__builtin_expect (debug_fastload, 0))
-        _dl_debug_printf ("fastload: requested table too large"
-                          " (%u > max %u bits)\n",
-                          GLRO(dl_position_hash_bits),
-                          DL_POSITION_HASH_BITS_MAX);
-      GLRO(dl_position_hash_bits) = DL_POSITION_HASH_BITS_MAX;
-    }
-
-  int table_size = 1 << GLRO(dl_position_hash_bits);
-  GLRO(dl_position_hash_mask) = table_size - 1;
-  if (__builtin_expect (debug_fastload, 0))
-    _dl_debug_printf ("fastload: enabled with %u entry table\n", table_size);
-
-  int max_pos = num_objects < DL_POSITION_MAX ? num_objects : DL_POSITION_MAX;
-  GLRO(dl_position_hash_table) = allocate_position_hash_table (table_size,
-							       max_pos);
-  if (GLRO(dl_position_hash_table) == NULL)
-    {
-      if (__builtin_expect (debug_fastload, 0))
-	_dl_debug_printf ("fastload: failed to allocate table\n");
-      return;
-    }
-
-  /* There is no point in going beyond max_pos: the rest of the table
-     has already been saturated with max_pos value.  */
-  for (k = 0; k < max_pos; ++k)
+  for (k = 0; k < num_objects; ++k)
     {
       const struct link_map *const map = main_map->l_searchlist.r_list[k];
 
-      /* Reuse num_dynsyms() result we computed earlier if possible:
-	 recomputing it may be somewhat expensive with DT_GNU_HASH.  */
-      const int nsyms = ndynsyms ? ndynsyms[k] : num_dynsyms (map);
+      if (map->l_info[ADDRIDX (DT_GNU_HASH)] != NULL)
+        position_hash_fill_from_gnu_hash (map, k);
+      else
+        position_hash_fill_from_symtab (map, k);
 
-      fill_hash_table (map, k, nsyms);
     }
+
+  if (!timing)
+    return;
+  HP_TIMING_NOW (stop);
+  HP_TIMING_DIFF (elapsed, start, stop);
+  char buf[80];
+  HP_TIMING_PRINT (buf, sizeof buf, elapsed);
+  _dl_debug_printf ("\t  time to build fastload table: %s\n", buf);
 }
 
 /* Inner part of the lookup functions.  We return a value > 0 if we
@@ -795,13 +672,13 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash,
 
   if (scope == GL(dl_ns)[LM_ID_BASE]._ns_main_searchlist && i == 0)
     {
-      const int skip_to = earliest_pos_from_hash_table (undef_name);
+      const int skip_to = position_hash_lookup (new_hash >> 1, undef_name);
 
       if (skip_to < n)
 	{
 	  i = skip_to;
 	  if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
-	    _dl_debug_printf ("fastload %s skipping to %u\n",
+	    _dl_debug_printf ("fastload: lookup %s skipping to %u\n",
 			      undef_name, (unsigned int) i);
 	}
       else
@@ -812,7 +689,7 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash,
 	  assert (skip_to == n);
 
 	  if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
-	    _dl_debug_printf ("fastload %s, %u >= %u (scope->r_nlist)\n",
+	    _dl_debug_printf ("fastload: lookup %s, %u >= %u (scope->r_nlist)\n",
 			      undef_name, (unsigned int) skip_to,
 			      (unsigned int) n);
 	  return 0;
@@ -1023,16 +900,6 @@ skip:
 }
 
 
-static uint_fast32_t
-dl_new_hash (const char *s)
-{
-  uint_fast32_t h = 5381;
-  for (unsigned char c = *s; c != '\0'; c = *++s)
-    h = h * 33 + c;
-  return h & 0xffffffff;
-}
-
-
 /* Add extra dependency on MAP to UNDEF_MAP.  */
 static int
 add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
diff --git a/elf/dl-object.c b/elf/dl-object.c
index b37bcc1295..7becedf9b3 100644
--- a/elf/dl-object.c
+++ b/elf/dl-object.c
@@ -34,9 +34,7 @@ _dl_add_to_namespace_list (struct link_map *new, Lmid_t nsid)
 
   if (GL(dl_ns)[nsid]._ns_loaded != NULL)
     {
-      struct link_map *l = GL(dl_ns)[nsid]._ns_loaded;
-      while (l->l_next != NULL)
-	l = l->l_next;
+      struct link_map *l = _dl_last_entry (&GL(dl_ns)[nsid]);
       new->l_prev = l;
       /* new->l_next = NULL;   Would be necessary but we use calloc.  */
       l->l_next = new;
@@ -47,12 +45,13 @@ _dl_add_to_namespace_list (struct link_map *new, Lmid_t nsid)
   new->l_serial = GL(dl_load_adds);
   ++GL(dl_load_adds);
 
+  _dl_hash_add_object (new);
+
   __rtld_lock_unlock_recursive (GL(dl_load_write_lock));
 }
 
 
-/* Allocate a `struct link_map' for a new object being loaded,
-   and enter it into the _dl_loaded list.  */
+/* Allocate a `struct link_map' for a new object being loaded.  */
 struct link_map *
 _dl_new_object (char *realname, const char *libname, int type,
 		struct link_map *loader, int mode, Lmid_t nsid)
diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c
index b2a01ede62..54a226531b 100644
--- a/elf/dl-sort-maps.c
+++ b/elf/dl-sort-maps.c
@@ -33,11 +33,32 @@ _dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used,
   unsigned int i = 0;
   uint16_t seen[nmaps];
   memset (seen, 0, nmaps * sizeof (seen[0]));
-  while (1)
+
+  /* Mark objects with in-edges.  */
+  for (unsigned j = i; j < nmaps; ++j)
+    maps[j]->l_inedge = 0;
+
+  for (unsigned j = i; j < nmaps; ++j)
+    {
+      for (struct link_map **p = maps[j]->l_initfini; p && *p; ++p)
+        {
+          if (*p != maps[j])  /* Skip self-edges.  */
+            (*p)->l_inedge = 1;
+        }
+    }
+
+  while (i < nmaps)
     {
+      struct link_map *thisp = maps[i];
+      if (!thisp->l_inedge)
+        {
+          /* No dependencies on this object.  */
+          ++i;
+          continue;
+        }
+
       /* Keep track of which object we looked at this round.  */
       ++seen[i];
-      struct link_map *thisp = maps[i];
 
       if (__glibc_unlikely (for_fini))
 	{
@@ -52,7 +73,7 @@ _dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used,
 	 with the dependency.  */
       unsigned int k = nmaps - 1;
       while (k > i)
-	{
+        {
 	  struct link_map **runp = maps[k]->l_initfini;
 	  if (runp != NULL)
 	    /* Look through the dependencies of the object.  */
diff --git a/elf/dl-support.c b/elf/dl-support.c
index a097f22a16..e0d464a5fa 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -143,10 +143,7 @@ hp_timing_t _dl_cpuclock_offset;
 void (*_dl_init_static_tls) (struct link_map *) = &_dl_nothread_init_static_tls;
 
 /* The merged position hash table used if we have a lot of shared objects.  */
-dl_position_table_entry_t *_dl_position_hash_table;
-int _dl_position_hash_mask;
 int _dl_position_hash_cutoff = DL_POSITION_HASH_CUTOFF_DEFAULT;
-int _dl_position_hash_bits;
 
 size_t _dl_pagesize = EXEC_PAGESIZE;
 
diff --git a/elf/rtld.c b/elf/rtld.c
index a0b22c4bb3..9c942add0b 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -292,7 +292,6 @@ struct rtld_global_ro _rtld_global_ro attribute_relro =
     ._dl_close = _dl_close,
     ._dl_tls_get_addr_soft = _dl_tls_get_addr_soft,
     ._dl_position_hash_cutoff = DL_POSITION_HASH_CUTOFF_DEFAULT,
-    ._dl_position_hash_bits = 0,
 #ifdef HAVE_DL_DISCOVER_OSVERSION
     ._dl_discover_osversion = _dl_discover_osversion
 #endif
@@ -1382,6 +1381,7 @@ of this helper program; chances are you did not intend to run this program.\n\
   GL(dl_rtld_map).l_type = lt_library;
   main_map->l_next = &GL(dl_rtld_map);
   GL(dl_rtld_map).l_prev = main_map;
+  _dl_hash_add_object (&GL(dl_rtld_map));
   ++GL(dl_ns)[LM_ID_BASE]._ns_nloaded;
   ++GL(dl_load_adds);
 
@@ -2703,12 +2703,6 @@ process_envvars (enum mode *modep)
 	    GLRO(dl_position_hash_cutoff)
               = _dl_strtoul (&envline[16], NULL);;
           break;
-
-        case 18:
-          if (memcmp (envline, "FASTLOAD_HASH_BITS", 18) == 0)
-	    GLRO(dl_position_hash_bits)
-              = _dl_strtoul (&envline[19], NULL);;
-          break;
         }
     }
 
diff --git a/include/link.h b/include/link.h
index 22c3eec73d..8e4ec17e46 100644
--- a/include/link.h
+++ b/include/link.h
@@ -180,8 +180,7 @@ struct link_map
     unsigned int l_reserved:2;	/* Reserved for internal use.  */
     unsigned int l_phdr_allocated:1; /* Nonzero if the data structure pointed
 					to by `l_phdr' is allocated.  */
-    unsigned int l_soname_added:1; /* Nonzero if the SONAME is for sure in
-				      the l_libname list.  */
+    unsigned int l_inedge:1;    /* Used temporarily when sorting deps.  */
     unsigned int l_faked:1;	/* Nonzero if this is a faked descriptor
 				   without associated file.  */
     unsigned int l_need_tls_init:1; /* Nonzero if GL(dl_init_static_tls)
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 329a367ded..32647f825a 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -589,23 +589,14 @@ struct rtld_global_ro
      below.  */
   EXTERN struct r_search_path_elem *_dl_init_all_dirs;
 
-  /* The merged hash table used if we have a lot of shared objects. */
-  EXTERN dl_position_table_entry_t *_dl_position_hash_table;
-  EXTERN int _dl_position_hash_mask;
-  EXTERN int _dl_position_hash_bits;
   EXTERN int _dl_position_hash_cutoff;
 
-#define DL_POSITION_HASH_BITS_MAX       27  /* (1 << 27) entries.  */
 #if defined(__powerpc__) && !defined(__powerpc64__)
 #define DL_POSITION_HASH_CUTOFF_DEFAULT -1  /* Disabled.  */
 #else
 #define DL_POSITION_HASH_CUTOFF_DEFAULT 32  /* > 32 shared libs.  */
 #endif
 
-  /* Colon-separated list of absolute paths to ld.so.cache files
-     we'll load.  */
-  EXTERN const char *_google_ld_so_cache_list;
-
 #if HP_SMALL_TIMING_AVAIL || HP_TIMING_PAD
   /* Overhead of a high-precision timing measurement.  */
   EXTERN hp_timing_t _dl_hp_timing_overhead;
@@ -969,6 +960,12 @@ extern lookup_t _dl_lookup_symbol_x (const char *undef,
 extern void _dl_add_to_namespace_list (struct link_map *new, Lmid_t nsid)
      attribute_hidden;
 
+/* Update hashtables when objects are loaded or unloaded.  */
+extern void _dl_hash_add_object (struct link_map *lib)
+    attribute_hidden;
+extern void _dl_hash_del_object (struct link_map *lib)
+    attribute_hidden;
+
 /* Allocate a `struct link_map' for a new object being loaded.  */
 extern struct link_map *_dl_new_object (char *realname, const char *libname,
 					int type, struct link_map *loader,
@@ -1257,6 +1254,15 @@ _dl_last_entry (struct link_namespaces *ns)
   return map;
 }
 
+static inline uint_fast32_t
+dl_new_hash (const char *s)
+{
+  uint_fast32_t h = 5381;
+  for (unsigned char c = *s; c != '\0'; c = *++s)
+    h = h * 33 + c;
+  return h & 0xffffffff;
+}
+
 __END_DECLS
 
 #endif /* ldsodefs.h */


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [glibc/maskray/grte] Redesign the fastload support for additional performance
@ 2021-08-27 23:50 Fangrui Song
  0 siblings, 0 replies; 3+ messages in thread
From: Fangrui Song @ 2021-08-27 23:50 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=a0da65566d50dd6b059230c4d2d2906c5905ad37

commit a0da65566d50dd6b059230c4d2d2906c5905ad37
Author: Ambrose Feinstein <ambrose@google.com>
Date:   Wed Sep 11 14:53:28 2019 -0700

    Redesign the fastload support for additional performance

Diff:
---
 elf/dl-close.c             |   3 +
 elf/dl-load.c              | 248 +++++++++++++++----
 elf/dl-lookup.c            | 583 +++++++++++++++++----------------------------
 elf/dl-object.c            |   9 +-
 elf/dl-sort-maps.c         |  27 ++-
 elf/dl-support.c           |   3 -
 elf/rtld.c                 |   8 +-
 include/link.h             |   3 +-
 sysdeps/generic/ldsodefs.h |  24 +-
 9 files changed, 474 insertions(+), 434 deletions(-)

diff --git a/elf/dl-close.c b/elf/dl-close.c
index cbf336609b..3960559b72 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -723,6 +723,9 @@ _dl_close_worker (struct link_map *map, bool force)
 	    _dl_debug_printf ("\nfile=%s [%lu];  destroying link map\n",
 			      imap->l_name, imap->l_ns);
 
+	  /* Remove from hashtables.  */
+	  _dl_hash_del_object (imap);
+
 	  /* This name always is allocated.  */
 	  free (imap->l_name);
 	  /* Remove the list with all the names of the shared object.  */
diff --git a/elf/dl-load.c b/elf/dl-load.c
index f78140a441..1ae72aa325 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -357,8 +357,7 @@ expand_dynamic_string_token (struct link_map *l, const char *s)
 
 /* Add `name' to the list of names for a particular shared object.
    `name' is expected to have been allocated with malloc and will
-   be freed if the shared object already has this name.
-   Returns false if the object already had this name.  */
+   be freed if the shared object already has this name.  */
 static void
 add_name_to_object (struct link_map *l, const char *name)
 {
@@ -804,6 +803,177 @@ lose (int code, int fd, const char *name, char *realname, struct link_map *l,
 }
 
 
+/* Hash tables of loaded objects; one keyed on name, one keyed on inode.  */
+
+struct lib_hash_namenode
+{
+  const char *name;
+  struct link_map *lib;
+  struct lib_hash_namenode *next;
+  uint32_t hash;
+};
+
+struct lib_hash_filenode
+{
+  struct link_map *lib;
+  struct lib_hash_filenode *next;
+  uint32_t hash;
+};
+
+/* Must be a power of 2.  */
+#define LIB_HASH_BUCKETS 256
+
+static struct lib_hash_namenode *lib_hash_nametable[LIB_HASH_BUCKETS];
+static struct lib_hash_filenode *lib_hash_filetable[LIB_HASH_BUCKETS];
+
+static size_t
+lib_hash_bucket (uint32_t hash)
+{
+  return hash & (LIB_HASH_BUCKETS-1);
+}
+
+static uint32_t
+hash_mix (uint32_t h, size_t val)
+{
+  h ^= val;
+  h *= 0x5bd1e995;
+  h ^= h >> 15;
+  val >>= 4 * sizeof val;
+  h ^= val;
+  h *= 0x5bd1e995;
+  h ^= h >> 15;
+  return h;
+}
+
+static struct link_map *
+lib_hash_getname (Lmid_t nsid, const char *name)
+{
+  uint32_t hash = hash_mix (dl_new_hash (name), nsid);
+  struct lib_hash_namenode *n = lib_hash_nametable[lib_hash_bucket (hash)];
+  while (n)
+    {
+      struct link_map *l = n->lib;
+      if (n->hash == hash && !strcmp (n->name, name) && l->l_ns == nsid && !l->l_removed)
+        return l;
+      n = n->next;
+    }
+  return NULL;
+}
+
+static void
+lib_hash_addname (const char *name, struct link_map *lib)
+{
+  if (lib->l_faked)
+    return;
+  struct lib_hash_namenode *n = malloc (sizeof (struct lib_hash_namenode));
+  if (!n)
+    _dl_fatal_printf ("%s: out of memory\n", __func__);
+  uint32_t hash = hash_mix (dl_new_hash (name), lib->l_ns);
+  struct lib_hash_namenode **p = lib_hash_nametable + lib_hash_bucket (hash);
+  n->name = name;
+  n->lib = lib;
+  n->next = *p;
+  n->hash = hash;
+  *p = n;
+}
+
+static void
+lib_hash_delname (const char *name, struct link_map *lib)
+{
+  uint32_t hash = hash_mix (dl_new_hash (name), lib->l_ns);
+  struct lib_hash_namenode **p = lib_hash_nametable + lib_hash_bucket (hash);
+  while (*p)
+    {
+      struct lib_hash_namenode *n = *p;
+      if (n->hash == hash && n->lib == lib &&
+          (n->name == name || !strcmp (n->name, name))) {
+        *p = n->next;
+        free (n);
+        return;
+      }
+      p = &n->next;
+    }
+  _dl_fatal_printf ("%s: can't unhash '%s'\n", __func__, name);
+}
+
+static uint32_t
+hash_file (Lmid_t nsid, dev_t dev, ino64_t ino, off_t off)
+{
+  return hash_mix (hash_mix (hash_mix (hash_mix (0, nsid), dev), ino), off);
+}
+
+static struct link_map *
+lib_hash_getfile (Lmid_t nsid, dev_t dev, ino64_t ino, off_t off)
+{
+  uint32_t hash = hash_file (nsid, dev, ino, off);
+  struct lib_hash_filenode *n = lib_hash_filetable[lib_hash_bucket (hash)];
+  while (n)
+    {
+      struct link_map *l = n->lib;
+      if (n->hash == hash && !l->l_removed && l->l_ns == nsid &&
+          l->l_file_id.dev == dev && l->l_file_id.ino == ino && l->l_off == off)
+        return l;
+      n = n->next;
+    }
+  return NULL;
+}
+
+static void
+lib_hash_addfile (struct link_map *lib)
+{
+  if (lib->l_faked)
+    return;
+  struct lib_hash_filenode *n = malloc (sizeof (struct lib_hash_filenode));
+  if (!n)
+    _dl_fatal_printf ("%s: out of memory\n", __func__);
+  uint32_t hash = hash_file (lib->l_ns, lib->l_file_id.dev, lib->l_file_id.ino, lib->l_off);
+  struct lib_hash_filenode **p = lib_hash_filetable + lib_hash_bucket (hash);
+  n->lib = lib;
+  n->next = *p;
+  n->hash = hash;
+  *p = n;
+}
+
+static void
+lib_hash_delfile (struct link_map *lib)
+{
+  uint32_t hash = hash_file (lib->l_ns, lib->l_file_id.dev, lib->l_file_id.ino, lib->l_off);
+  struct lib_hash_filenode **p = lib_hash_filetable + lib_hash_bucket (hash);
+  while (*p)
+    {
+      struct lib_hash_filenode *n = *p;
+      if (n->hash == hash && n->lib == lib)
+        {
+          *p = n->next;
+          free (n);
+          return;
+        }
+      p = &n->next;
+    }
+  _dl_fatal_printf ("%s: can't unhash '%s'\n", __func__, lib->l_name);
+}
+
+void
+_dl_hash_add_object (struct link_map *lib)
+{
+  lib_hash_addfile (lib);
+  lib_hash_addname (lib->l_name, lib);
+  for (struct libname_list *ln = lib->l_libname; ln; ln = ln->next)
+    {
+      lib_hash_addname (ln->name, lib);
+    }
+}
+
+void
+_dl_hash_del_object (struct link_map *lib)
+{
+  lib_hash_delfile (lib);
+  lib_hash_delname (lib->l_name, lib);
+  for (struct libname_list *ln = lib->l_libname; ln; ln = ln->next) {
+    lib_hash_delname (ln->name, lib);
+  }
+}
+
 /* Map in the shared object NAME, actually located in REALNAME, and already
    opened on FD.  */
 
@@ -841,28 +1011,34 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, off_t of
     }
 
   /* Look again to see if the real name matched another already loaded.  */
-  for (l = GL(dl_ns)[nsid]._ns_loaded; l != NULL; l = l->l_next)
-    if (!l->l_removed && _dl_file_id_match_p (&l->l_file_id, &id)
-        && l->l_off == offset)
-      {
-	/* The object is already loaded.
-	   Just bump its reference count and return it.  */
-	__close (fd);
-
-	/* If the name is not in the list of names for this object add
-	   it.  */
-	free (realname);
-	if (offset == 0)
-	  /* If offset!=0, foo.so/@0x<offset> should be the *only*
-	     name for this object. b/20141439.  */
-	  add_name_to_object (l, name);
-
-	return l;
-      }
+  l = lib_hash_getfile (nsid, id.dev, id.ino, offset);
+  if (l)
+    {
+      /* The object is already loaded.
+         Just bump its reference count and return it.  */
+      __close (fd);
+
+      /* If the name is not in the list of names for this object add
+         it.  */
+      free (realname);
+      if (offset == 0)
+        {
+          /* If offset!=0, foo.so/@0x<offset> should be the *only*
+             name for this object. b/20141439.  */
+          add_name_to_object (l, name);
+          lib_hash_addname (name, l);
+        }
+
+      return l;
+    }
 
 #ifdef SHARED
   /* When loading into a namespace other than the base one we must
      avoid loading ld.so since there can only be one copy.  Ever.  */
+
+  /* Note that this test is wrong in two ways:
+       1) it doesn't consider offset, and
+       2) l_file_id.ino and l_file_id.dev for rtld are always zero!  */
   if (__glibc_unlikely (nsid != LM_ID_BASE)
       && (_dl_file_id_match_p (&id, &GL(dl_rtld_map).l_file_id)
 	  || _dl_name_match_p (name, &GL(dl_rtld_map))))
@@ -1349,10 +1525,7 @@ cannot enable executable stack as shared object requires");
 
   l->l_off = offset;
 
-  /* When we profile the SONAME might be needed for something else but
-     loading.  Add it right away.  */
-  if (__glibc_unlikely (GLRO(dl_profile) != NULL)
-      && l->l_info[DT_SONAME] != NULL)
+  if (l->l_info[DT_SONAME] != NULL)
     add_name_to_object (l, ((const char *) D_PTR (l, l_info[DT_STRTAB])
 			    + l->l_info[DT_SONAME]->d_un.d_val));
 
@@ -1909,26 +2082,7 @@ match_one (const char *name, struct link_map *l)
   if (__builtin_expect (l->l_faked, 0) != 0
       || __builtin_expect (l->l_removed, 0) != 0)
     return 0;
-  if (!_dl_name_match_p (name, l))
-    {
-      const char *soname;
-
-      if (__builtin_expect (l->l_soname_added, 1)
-	  || l->l_info[DT_SONAME] == NULL)
-	return 0;
-
-      soname = ((const char *) D_PTR (l, l_info[DT_STRTAB])
-		+ l->l_info[DT_SONAME]->d_un.d_val);
-      if (strcmp (name, soname) != 0)
-	return 0;
-
-      /* We have a match on a new name -- cache it.  */
-      add_name_to_object (l, soname);
-      l->l_soname_added = 1;
-    }
-
-  /* We have a match.  */
-  return 1;
+  return _dl_name_match_p (name, l);
 }
 
 /* Map in the shared object file NAME.  */
@@ -1960,9 +2114,9 @@ _dl_map_object (struct link_map *loader, const char *name, off_t offset,
     }
   else
     {
-      for (l = _dl_last_entry (&GL(dl_ns)[nsid]); l; l = l->l_prev)
-	if (match_one (name, l))
-	  return l;
+      l = lib_hash_getname(nsid, name);
+      if (l)
+        return l;
     }
 
   /* Display information if we are debugging.  */
diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c
index 06db4e2b94..360f9ea3dc 100644
--- a/elf/dl-lookup.c
+++ b/elf/dl-lookup.c
@@ -25,6 +25,7 @@
 #include <dl-hash.h>
 #include <dl-machine.h>
 #include <sysdep-cancel.h>
+#include <hp-timing.h>
 #include <libc-lock.h>
 #include <tls.h>
 #include <atomic.h>
@@ -330,335 +331,263 @@ do_lookup_unique (const char *undef_name, uint_fast32_t new_hash,
   result->m = (struct link_map *) map;
 }
 
-/* Bob Jenkins's hash function, free for any use.
-   http://burtleburtle.net/bob/hash/  */
-#define mix(a,b,c) \
-{ \
-  a -= b; a -= c; a ^= (c>>13); \
-  b -= c; b -= a; b ^= (a<<8); \
-  c -= a; c -= b; c ^= (b>>13); \
-  a -= b; a -= c; a ^= (c>>12);  \
-  b -= c; b -= a; b ^= (a<<16); \
-  c -= a; c -= b; c ^= (b>>5); \
-  a -= b; a -= c; a ^= (c>>3);  \
-  b -= c; b -= a; b ^= (a<<10); \
-  c -= a; c -= b; c ^= (b>>15); \
-}
-
-typedef unsigned int uint32 __attribute__ ((mode (SI)));
-static uint32
-hash_with_seed (const char *k, uint32 length, uint32 seed)
-{
-   uint32 a, b, c, len;
-
-   /* Set up the internal state */
-   len = length;
-   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
-   c = seed;            /* the previous hash value */
-
-   /*---------------------------------------- handle most of the key */
-   while (len >= 12)
-     {
-       a += (k[0] +((uint32)k[1]<<8) +((uint32)k[2]<<16) +((uint32)k[3]<<24));
-       b += (k[4] +((uint32)k[5]<<8) +((uint32)k[6]<<16) +((uint32)k[7]<<24));
-       c += (k[8] +((uint32)k[9]<<8) +((uint32)k[10]<<16)+((uint32)k[11]<<24));
-       mix (a,b,c);
-       k += 12;
-       len -= 12;
-     }
-
-   /*------------------------------------- handle the last 11 bytes */
-   c += length;
-   switch (len)              /* all the case statements fall through */
-     {
-     case 11: c+=((uint32)k[10]<<24);
-     case 10: c+=((uint32)k[9]<<16);
-     case 9 : c+=((uint32)k[8]<<8);
-         /* the first byte of c is reserved for the length */
-     case 8 : b+=((uint32)k[7]<<24);
-     case 7 : b+=((uint32)k[6]<<16);
-     case 6 : b+=((uint32)k[5]<<8);
-     case 5 : b+=k[4];
-     case 4 : a+=((uint32)k[3]<<24);
-     case 3 : a+=((uint32)k[2]<<16);
-     case 2 : a+=((uint32)k[1]<<8);
-     case 1 : a+=k[0];
-       /* case 0: nothing left to add */
-     }
-   mix (a,b,c);
-   /*-------------------------------------------- report the result */
-   return c;
-}
-
 
 /* A hash table used to speed up lookups when we have a lot of shared
    libraries.  We record in the table how many ELF objects in the link
    map we can safely skip, because they are known to not contain the symbol
    we are looking for.
 
-   Values start out at N (num objects in the main_map).  We then go through
-   each object (executable or DSO) in their order in the main_map and insert
-   pos_in_map_map into the table.
+   We go through each object (executable or DSO) in their order in the
+   main_map and insert pos_in_main_map into the table.  Searches for
+   symbols not present in the table start at num_objects, skipping every
+   object preprocessed in this way.
 
    If there is already an earlier (smaller) entry in the table, we leave it
-   alone.  It could be a hash collision, or it could be an earlier
-   instance of the same symbol (eg weak __stdout definition in
-   multiple objects).
+   alone.  It represents an earlier instance of the same symbol (eg weak
+   __stdout definition in multiple objects).  */
 
-   To save space, the table contains uint16_t entries, and so we can't
-   record main map positions over 65535.
-
-   It turns out that even when only 25% of slots are used, collision
-   rate is around 10%, which is quite high, and causes lookups to be slow.
-
-   So we use a second table (using a different hash seed). Hash collisions
-   using two separate hashes are quite rare.
-
-   Note that we allocate the two tables together (a single block twice
-   the size of one table).  */
+struct position_hash
+{
+  /* The Bernstein hash, shifted right one bit.  (This lets us build a table
+     directly from the precomputed values in DT_GNU_HASH sections, in which
+     the low bit is repurposed to terminate hash chains.)  */
+  uint32_t hash;
+  /* The position of the first library with this symbol in the search list.  */
+  uint32_t pos;
+  /* The symbol name.  */
+  const char *name;
+};
+
+static struct position_hash *position_hash_table;
+static size_t position_hash_table_slots;
+static size_t position_hash_table_slots_occupied;
+static int position_hash_lookup_default;
 
-static void
-insert_into_hash_table_1 (int pos_in_main_map, const char *symbol_name,
-			  size_t name_len, dl_position_table_entry_t *table,
-			  int seed)
+static int
+position_hash_put (struct position_hash *table, size_t slots,
+                   struct position_hash *item)
 {
-  uint32 hash = hash_with_seed (symbol_name, name_len, seed);
-  int pos = hash & GLRO(dl_position_hash_mask);
-  if (pos_in_main_map < table[pos])
+  size_t mask = slots - 1;
+  /* Search for a matching entry or a free slot.  This loop always terminates
+     because we don't allow the hashtable to become completely full.  */
+  for (size_t stride = 0, i = item->hash; ; i += ++stride)
     {
-      table[pos] = pos_in_main_map;
-      if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
-        _dl_debug_printf ("fastload hash for %s: [%u] = %u (table %u)\n",
-                          symbol_name, (unsigned int) pos,
-                          (unsigned int) table[pos],
-                          (GLRO(dl_position_hash_table) == table) ? 0 : 1);
+      struct position_hash *slot = &table[i & mask];
+      if (slot->name == NULL)
+        {
+          if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+            _dl_debug_printf ("fastload:    put %s hash 0x%x pos %u slot 0x%lx\n",
+                              item->name, item->hash, item->pos, slot - table);
+          slot->hash = item->hash;
+          slot->pos = item->pos;
+          slot->name = item->name;
+          return 1;
+        }
+      else if (slot->hash == item->hash &&
+               (slot->name == item->name || !strcmp (slot->name, item->name)))
+        {
+          if (item->pos < slot->pos)
+            slot->pos = item->pos;
+          if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+            _dl_debug_printf ("fastload:    dup %s hash 0x%x pos %u slot 0x%lx\n",
+                              item->name, item->hash, slot->pos, slot - table);
+          return 0;
+        }
     }
-  else
+}
+
+static void
+position_hash_resize (struct position_hash *oldtable,
+                      size_t oldslots, size_t newslots)
+{
+  if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+    _dl_debug_printf ("fastload: resizing hashtable to %lu slots\n", newslots);
+  assert (!oldtable == !oldslots);
+  void *ptr = mmap (NULL, newslots * sizeof *oldtable, PROT_READ|PROT_WRITE,
+                    MAP_ANON|MAP_PRIVATE|MAP_POPULATE, -1, 0);
+  if (ptr == MAP_FAILED)
+    _dl_signal_error (errno, NULL, NULL, "cannot mmap fastload hashtable");
+  struct position_hash *newtable = position_hash_table = ptr;
+  position_hash_table_slots = newslots;
+  if (oldtable == NULL)
+    return;
+  for (size_t i = 0; i < oldslots; ++i)
     {
-      if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
-        _dl_debug_printf
-          ("fastload hash for %s: [%u] already set to %u <= %u (table %u)\n",
-           symbol_name, (unsigned int) pos,
-           (unsigned int) table[pos],
-           (unsigned int) pos_in_main_map,
-           (GLRO(dl_position_hash_table) == table) ? 0 : 1);
+      if (oldtable[i].name)
+        position_hash_put (newtable, newslots, &oldtable[i]);
     }
-}
 
-#define HASH_SEED_A 0
-#define HASH_SEED_B 5381  /* As good as any other value.  */
+  if (munmap (oldtable, oldslots * sizeof *oldtable))
+    _dl_signal_error (errno, NULL, NULL, "cannot munmap fastload hashtable");
+}
 
 static void
-insert_into_hash_table (int pos_in_main_map, const char *symbol_name)
+position_hash_init (int lookup_default)
 {
-  dl_position_table_entry_t *const table1 = GLRO(dl_position_hash_table);
-  const int table_size = GLRO(dl_position_hash_mask) + 1;
-  dl_position_table_entry_t *const table2 = table1 + table_size;
-  const size_t name_len = strlen (symbol_name);
-
-  insert_into_hash_table_1 (pos_in_main_map, symbol_name, name_len, table1,
-			    HASH_SEED_A);
-  insert_into_hash_table_1 (pos_in_main_map, symbol_name, name_len, table2,
-			    HASH_SEED_B);
+  assert (position_hash_table == NULL);
+  position_hash_lookup_default = lookup_default;
+  position_hash_resize (NULL, 0, 8192);
 }
 
-static inline int
-earliest_pos_from_hash_table_1 (const char *symbol_name,
-                                const dl_position_table_entry_t *table,
-                                int seed)
+static void
+position_hash_insert (uint32_t hash, const char *name, uint32_t pos)
 {
-  int pos;
-  uint32 hash;
-
-  hash = hash_with_seed (symbol_name, strlen (symbol_name), seed);
-  pos = hash & GLRO(dl_position_hash_mask);
+  struct position_hash *table = position_hash_table;
   if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
-    _dl_debug_printf
-      ("fastload earliest pos for %s: [%u] = %u (table %u)\n",
-       symbol_name, (unsigned int) pos, (unsigned int) table[pos],
-       (GLRO(dl_position_hash_table) == table) ? 0 : 1);
-
-  return table[pos];
+    _dl_debug_printf ("fastload: insert %s hash 0x%x pos %u\n", name, hash, pos);
+  size_t slots = position_hash_table_slots;
+  struct position_hash item = { hash, pos, name };
+  if (!position_hash_put (table, slots, &item))
+    return;
+  size_t newsize = ++position_hash_table_slots_occupied;
+  if (newsize >= slots / 2)
+    /* The load factor has reached 50%.  Double the table size and rehash.  */
+    position_hash_resize (table, slots, 2 * slots);
 }
 
-static inline int
-earliest_pos_from_hash_table (const char *undef_name)
+static int
+position_hash_lookup (uint32_t hash, const char *name)
 {
-  dl_position_table_entry_t *const table1 = GLRO(dl_position_hash_table);
-
-  if (table1 == NULL)
-    return 0;  /* Search from the beginning of the list.  */
-  else
+  struct position_hash *table = position_hash_table;
+  if (table == NULL)
+    return 0;
+  size_t mask = position_hash_table_slots - 1;
+  for (size_t stride = 0, i = hash; ; i += ++stride)
     {
-      const int table_size = GLRO(dl_position_hash_mask) + 1;
-      dl_position_table_entry_t *const table2 = table1 + table_size;
-      const int skip_1 = earliest_pos_from_hash_table_1 (undef_name, table1,
-                                                         HASH_SEED_A);
-      const int skip_2 = earliest_pos_from_hash_table_1 (undef_name, table2,
-                                                         HASH_SEED_B);
-
-      /* Possibilities:
-
-       * Both skip_1 and skip_2 are positive and same:
-         there were no collisions at insertion in either table, or there
-         was double-collision with symbols from the same library.
-
-         If skip_1 == skip_2 == N (num_directly_loaded_objects), then the
-         slot was empty in both tables after they were filled (i.e. there
-         is no symbol that hashes to corresponding slot).  In that case,
-         searching first [0, N) is pointless. See b/5609692.
-
-         True skip_to == skip_1 == skip_2.
-
-       * Both skip_1 and skip_2 are positive but different:
-         there was a collision in one or both tables, or one of the slots
-         remained unfilled after all symbols have been inserted.
-         It is safe to start at max, true skip_to count can not be less
-         than max.  */
-
-      return (skip_1 < skip_2) ? skip_2 : skip_1;
+      struct position_hash *slot = &table[i & mask];
+      if (slot->hash == hash &&
+          (slot->name == name || !strcmp (slot->name, name)))
+        {
+          if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+            _dl_debug_printf ("fastload:  found %s at slot 0x%lx, pos %u\n",
+                              name, slot - table, slot->pos);
+          return slot->pos;
+        }
+      else if (slot->name == NULL)
+        {
+          if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+            _dl_debug_printf ("fastload: missed %s at slot 0x%lx, default pos %u\n",
+                              name, slot - table, position_hash_lookup_default);
+          return position_hash_lookup_default;
+        }
     }
 }
 
-
-static void
-fill_hash_table (const struct link_map *const map, int pos_in_main_map,
-                 int num_symbols)
+static int
+position_hash_include_symbol (const ElfW(Sym) *sym)
 {
-  const ElfW(Sym) *const symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]);
-  const char *const strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
-  int k;
+  if (sym->st_value == 0 && ELFW(ST_TYPE) (sym->st_info) != STT_TLS)
+    return 0;
 
-  /* Iterate through the symbols defined in this map and insert them
-     into the bloom filter.  If the symbol wouldn't pass the tests in
-     do_lookup_x(), then don't insert it.  */
-  for (k = 0; k < num_symbols; k++)
+  switch (ELFW(ST_TYPE) (sym->st_info))
     {
-      const ElfW(Sym) *sym = &symtab[k];
-
-      if (sym->st_value == 0
-          && ELFW(ST_TYPE) (sym->st_info) != STT_TLS)
-        continue;
-
-      switch (ELFW(ST_TYPE) (sym->st_info))
-	{
-	case STT_SECTION:
-	case STT_FILE:
-	case STT_COMMON:
-	  continue;
-	default:
-	  break;
-	}
-
-      switch (ELFW(ST_BIND) (sym->st_info))
-	{
-	case STB_LOCAL:  /* Local symbols are ignored.  */
-	  continue;
-	default:
-	  break;
-	}
-
-      insert_into_hash_table (pos_in_main_map, strtab + sym->st_name);
+    case STT_SECTION:
+    case STT_FILE:
+    case STT_COMMON:
+      return 0;
     }
-}
-
-static dl_position_table_entry_t *
-allocate_position_hash_table (size_t table_size, int max_pos)
-{
-  /* Implementation detail: we actually allocate two "logical" tables in a
-     single block.  */
-
-  const size_t total_size = 2 * table_size;
-  dl_position_table_entry_t *table
-    = malloc (total_size * sizeof (dl_position_table_entry_t));
-
-  if (table == NULL)
-    return NULL;
-
-  for (int k = 0; k < total_size; ++k)
-    table[k] = max_pos;
 
-  return table;
+  /* Local symbols are ignored.  */
+  return ELFW(ST_BIND) (sym->st_info) != STB_LOCAL;
 }
 
 static int
-num_dynsyms (const struct link_map *const map)
+last_gnu_hash_bucket (const struct link_map *map)
 {
-  if (map->l_info[DT_HASH] != NULL)
-    {
-      /* If DT_HASH is present, we unconditionally use it, since it already
-         has the answer precomputed and waiting for us.  */
-      const Elf_Symndx *const hash = (void *) D_PTR (map, l_info[DT_HASH]);
+  /* The best documentation for GNU_HASH I found:
+     http://www.linker-aliens.org/blogs/ali/entry/gnu_hash_elf_sections/
 
-      if (hash != NULL)
-	{
-	  /* DT_HASH dynamic structure points to a symbol hash table
-	     with the following layout:
+     _dl_setup_hash() has already set things up for us.  */
 
-	     nbucket
-	     nchain       (== number of symbols in dynamic symbol table)
-	     bucket[0]
-	     ...
+  if (__builtin_expect (map->l_nbuckets < 1, 0))
+    /* Paranoia: neither gold nor gnu-ld will construct an empty
+       .gnu.hash, but some other linker just might.  */
+    return 0;
 
-	     http://refspecs.freestandards.org/elf/TIS1.1.pdf (p. 2.19). */
+  /* In the GNU hash the symbol index of a symbol is determined by
+     the offset of the symbol's entry in the hash table itself.
 
-	  return hash[1];
-	}
-    }
-  else if (map->l_info[ADDRIDX (DT_GNU_HASH)] != NULL)
-    {
-      /* The best documentation for GNU_HASH I found:
-	 http://blogs.sun.com/ali/entry/gnu_hash_elf_sections
+     We start at the last bucket map->l_gnu_buckets[map->l_nbuckets-1]
+     and loop backward from the end until we find a bucket which is
+     not zero.  */
 
-	 _dl_setup_hash() has already set things up for us.  */
+  int last_bucket_idx = map->l_nbuckets - 1;
 
-      if (__builtin_expect (map->l_nbuckets < 1, 0))
-	{
-	  /* Paranoia: neither gold nor gnu-ld will construct an empty
-	     .gnu.hash, but some other linker just might.  */
-	  return 0;
-	}
+  while (last_bucket_idx > 0 &&
+         map->l_gnu_buckets[last_bucket_idx] == 0)
+    --last_bucket_idx;
 
-      /* In the GNU hash the symbol index of a symbol is determined by
-	 the offset of the symbol's entry in the hash table itself.
+  return map->l_gnu_buckets[last_bucket_idx];
+}
 
-	 We start at the last bucket map->l_gnu_chain_zero[map->l_nbuckets-1]
-	 and loop backward from the end until we find a bucket which is
-	 not zero.
+static void
+position_hash_fill_from_gnu_hash (const struct link_map *map,
+                                  int pos_in_main_map)
+{
+  int last_bucket = last_gnu_hash_bucket (map);
+  if (last_bucket == 0)
+    return;
 
-	 Then we walk forward to find the last entry in the chain which
-	 starts at that bucket.  The terminating entry gives us desired
-	 symbol index.  */
+  /* Start of hash values.  */
+  const Elf32_Word *chains = &map->l_gnu_buckets[map->l_nbuckets];
 
-      int last_bucket_idx = map->l_nbuckets - 1;
+  /* Reconstruct the number of symbols omitted.  */
+  const Elf32_Word *zero = map->l_gnu_chain_zero;
+  int symbias = chains - zero;
 
-      while (last_bucket_idx > 0 &&
-	     map->l_gnu_buckets[last_bucket_idx] == 0)
-	{
-	  --last_bucket_idx;
-	}
+  const ElfW(Sym) *const symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]);
+  const char *const strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
 
-      const Elf32_Word last_bucket = map->l_gnu_buckets[last_bucket_idx];
+  /* Iterate through the symbols in the GNU hash chains and insert them
+     into the position hash table.  If the symbol wouldn't pass the tests
+     in do_lookup_x(), then don't insert it.  */
+  for (int k = symbias; ; ++k)
+    {
+      uint32_t hash = zero[k];
+      const ElfW(Sym) *sym = &symtab[k];
+      if (position_hash_include_symbol (sym))
+        position_hash_insert (hash >> 1, strtab + sym->st_name, pos_in_main_map);
 
-      if (__builtin_expect (last_bucket == 0, 0))
-	{
-	  /* This shouldn't happen unless the library has only static
-	     variables with global ctors, and nothing else.  */
-	  return 0;
-	}
+      /* Stop when we've passed the start of the last chain and the sentinel bit
+         terminating a chain is set.  This is the end of the section.  */
+      if (k >= last_bucket && (hash & 1))
+        break;
+    }
+}
 
-      /* Start of the hash chain for the last non-zero bucket.  */
-      const Elf32_Word *hasharr = &map->l_gnu_chain_zero[last_bucket];
+static int
+old_hash_nchain (const struct link_map *map)
+{
+  if (map->l_info[DT_HASH] == NULL)
+    return 0;
+  const Elf_Symndx *const hash = (void *) D_PTR (map, l_info[DT_HASH]);
+  if (hash == NULL)
+    return 0;
+  /* Read the "nchain" field of the DT_HASH section; see
+     https://refspecs.linuxfoundation.org/elf/gabi4+/ch5.dynamic.html#hash. */
+  return hash[1];
+}
 
-      /* The chain is terminated by an entry with LSBit set.  */
-      while ((*hasharr & 1u) == 0)
-	++hasharr;
+static void
+position_hash_fill_from_symtab (const struct link_map *const map,
+                                int pos_in_main_map)
+{
+  const ElfW(Sym) *const symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]);
+  const char *const strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
+  const int num_symbols = old_hash_nchain (map);
 
-      /* Size of dynamic symbol table is "index of last symbol" + 1.  */
-      return hasharr - map->l_gnu_chain_zero + 1;
+  /* Iterate through the symbols defined in this map and insert them
+     into the position hash table.  If the symbol wouldn't pass the tests
+     in do_lookup_x(), then don't insert it.  */
+  for (int k = 0; k < num_symbols; k++)
+    {
+      const ElfW(Sym) *sym = &symtab[k];
+      if (position_hash_include_symbol (sym))
+        {
+          uint32_t hash = dl_new_hash (strtab + sym->st_name);
+          position_hash_insert (hash >> 1, strtab + sym->st_name, pos_in_main_map);
+        }
     }
-
-  return 0;
 }
 
 /* Create the fastload position hash (if requested).  Given a link_map
@@ -677,8 +606,8 @@ _dl_fill_position_hash (struct link_map *main_map)
      fill in the hash table of earliest known positions for symbols in
      main_map.  We'll use this in do_lookup_x().  */
 
-  /* If cutoff or size is negative, fastload is disabled.  */
-  if (GLRO(dl_position_hash_cutoff) < 0 || GLRO(dl_position_hash_bits) < 0)
+  /* If cutoff is negative, fastload is disabled.  */
+  if (GLRO(dl_position_hash_cutoff) < 0)
     {
       if (__builtin_expect (debug_fastload, 0))
         _dl_debug_printf ("fastload: disabled (configuration)\n");
@@ -695,83 +624,31 @@ _dl_fill_position_hash (struct link_map *main_map)
       return;
     }
 
-  int *ndynsyms = NULL;
+  int timing = (HP_TIMING_AVAIL) && (GLRO(dl_debug_mask) & DL_DEBUG_STATISTICS);
+  hp_timing_t start, stop, elapsed;
+  if (timing)
+    HP_TIMING_NOW (start);
 
-  if (GLRO(dl_position_hash_bits) == 0)
-    {
-      /* Table size has not been specified via LD_FASTLOAD_HASH_BITS.
-         Compute appropriate size for <= 25% load factor.  */
-
-      int total_symbols = 0;
-
-      /* It is safe to call alloca() here, because we are executing in
-	 the context of dl_main, i.e. we are on the main stack, and are
-	 not yet using much of it.  Assuming sane upper limit of 10K direct
-	 dependencies, we'll use at most 40K bytes here.  */
-      ndynsyms = alloca (num_objects * sizeof (int));
-
-      for (k = 0; k < num_objects; ++k)
-        {
-          const struct link_map *const map = main_map->l_searchlist.r_list[k];
-	  const int nsyms = num_dynsyms (map);
+  position_hash_init (num_objects);
 
-	  ndynsyms[k] = nsyms;
-	  total_symbols += nsyms;
-	  if (__builtin_expect (debug_fastload, 0))
-	    _dl_debug_printf ("fastload: %s: %u symbols\n", map->l_name, nsyms);
-	}
-
-      /* Ensure table size is >= 4 * total_symbols.  Generally this
-         over-sizes the table by more than 4, since there usually are lots
-         of duplicate symbols (from different DSOs) as well.  */
-
-      GLRO(dl_position_hash_bits)
-        = (8 * sizeof (total_symbols)) - __builtin_clz (total_symbols) + 2;
-
-      if (__builtin_expect (debug_fastload, 0))
-        _dl_debug_printf ("fastload: will use %u hash bits for %u total "
-                          "symbols\n",
-                          GLRO(dl_position_hash_bits), total_symbols);
-    }
-
-  /* If the requested or computed table size is too large, limit it.  */
-  if (GLRO(dl_position_hash_bits) > DL_POSITION_HASH_BITS_MAX)
-    {
-      if (__builtin_expect (debug_fastload, 0))
-        _dl_debug_printf ("fastload: requested table too large"
-                          " (%u > max %u bits)\n",
-                          GLRO(dl_position_hash_bits),
-                          DL_POSITION_HASH_BITS_MAX);
-      GLRO(dl_position_hash_bits) = DL_POSITION_HASH_BITS_MAX;
-    }
-
-  int table_size = 1 << GLRO(dl_position_hash_bits);
-  GLRO(dl_position_hash_mask) = table_size - 1;
-  if (__builtin_expect (debug_fastload, 0))
-    _dl_debug_printf ("fastload: enabled with %u entry table\n", table_size);
-
-  int max_pos = num_objects < DL_POSITION_MAX ? num_objects : DL_POSITION_MAX;
-  GLRO(dl_position_hash_table) = allocate_position_hash_table (table_size,
-							       max_pos);
-  if (GLRO(dl_position_hash_table) == NULL)
-    {
-      if (__builtin_expect (debug_fastload, 0))
-	_dl_debug_printf ("fastload: failed to allocate table\n");
-      return;
-    }
-
-  /* There is no point in going beyond max_pos: the rest of the table
-     has already been saturated with max_pos value.  */
-  for (k = 0; k < max_pos; ++k)
+  for (k = 0; k < num_objects; ++k)
     {
       const struct link_map *const map = main_map->l_searchlist.r_list[k];
 
-      /* Reuse num_dynsyms() result we computed earlier if possible:
-	 recomputing it may be somewhat expensive with DT_GNU_HASH.  */
-      const int nsyms = ndynsyms ? ndynsyms[k] : num_dynsyms (map);
+      if (map->l_info[ADDRIDX (DT_GNU_HASH)] != NULL)
+        position_hash_fill_from_gnu_hash (map, k);
+      else
+        position_hash_fill_from_symtab (map, k);
 
-      fill_hash_table (map, k, nsyms);
     }
+
+  if (!timing)
+    return;
+  HP_TIMING_NOW (stop);
+  HP_TIMING_DIFF (elapsed, start, stop);
+  char buf[80];
+  HP_TIMING_PRINT (buf, sizeof buf, elapsed);
+  _dl_debug_printf ("\t  time to build fastload table: %s\n", buf);
 }
 
 /* Inner part of the lookup functions.  We return a value > 0 if we
@@ -795,13 +672,13 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash,
 
   if (scope == GL(dl_ns)[LM_ID_BASE]._ns_main_searchlist && i == 0)
     {
-      const int skip_to = earliest_pos_from_hash_table (undef_name);
+      const int skip_to = position_hash_lookup (new_hash >> 1, undef_name);
 
       if (skip_to < n)
 	{
 	  i = skip_to;
 	  if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
-	    _dl_debug_printf ("fastload %s skipping to %u\n",
+	    _dl_debug_printf ("fastload: lookup %s skipping to %u\n",
 			      undef_name, (unsigned int) i);
 	}
       else
@@ -812,7 +689,7 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash,
 	  assert (skip_to == n);
 
 	  if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
-	    _dl_debug_printf ("fastload %s, %u >= %u (scope->r_nlist)\n",
+	    _dl_debug_printf ("fastload: lookup %s, %u >= %u (scope->r_nlist)\n",
 			      undef_name, (unsigned int) skip_to,
 			      (unsigned int) n);
 	  return 0;
@@ -1023,16 +900,6 @@ skip:
 }
 
 
-static uint_fast32_t
-dl_new_hash (const char *s)
-{
-  uint_fast32_t h = 5381;
-  for (unsigned char c = *s; c != '\0'; c = *++s)
-    h = h * 33 + c;
-  return h & 0xffffffff;
-}
-
-
 /* Add extra dependency on MAP to UNDEF_MAP.  */
 static int
 add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
diff --git a/elf/dl-object.c b/elf/dl-object.c
index b37bcc1295..7becedf9b3 100644
--- a/elf/dl-object.c
+++ b/elf/dl-object.c
@@ -34,9 +34,7 @@ _dl_add_to_namespace_list (struct link_map *new, Lmid_t nsid)
 
   if (GL(dl_ns)[nsid]._ns_loaded != NULL)
     {
-      struct link_map *l = GL(dl_ns)[nsid]._ns_loaded;
-      while (l->l_next != NULL)
-	l = l->l_next;
+      struct link_map *l = _dl_last_entry (&GL(dl_ns)[nsid]);
       new->l_prev = l;
       /* new->l_next = NULL;   Would be necessary but we use calloc.  */
       l->l_next = new;
@@ -47,12 +45,13 @@ _dl_add_to_namespace_list (struct link_map *new, Lmid_t nsid)
   new->l_serial = GL(dl_load_adds);
   ++GL(dl_load_adds);
 
+  _dl_hash_add_object (new);
+
   __rtld_lock_unlock_recursive (GL(dl_load_write_lock));
 }
 
 
-/* Allocate a `struct link_map' for a new object being loaded,
-   and enter it into the _dl_loaded list.  */
+/* Allocate a `struct link_map' for a new object being loaded.  */
 struct link_map *
 _dl_new_object (char *realname, const char *libname, int type,
 		struct link_map *loader, int mode, Lmid_t nsid)
diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c
index b2a01ede62..54a226531b 100644
--- a/elf/dl-sort-maps.c
+++ b/elf/dl-sort-maps.c
@@ -33,11 +33,32 @@ _dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used,
   unsigned int i = 0;
   uint16_t seen[nmaps];
   memset (seen, 0, nmaps * sizeof (seen[0]));
-  while (1)
+
+  /* Mark objects with in-edges.  */
+  for (unsigned j = i; j < nmaps; ++j)
+    maps[j]->l_inedge = 0;
+
+  for (unsigned j = i; j < nmaps; ++j)
+    {
+      for (struct link_map **p = maps[j]->l_initfini; p && *p; ++p)
+        {
+          if (*p != maps[j])  /* Skip self-edges.  */
+            (*p)->l_inedge = 1;
+        }
+    }
+
+  while (i < nmaps)
     {
+      struct link_map *thisp = maps[i];
+      if (!thisp->l_inedge)
+        {
+          /* No dependencies on this object.  */
+          ++i;
+          continue;
+        }
+
       /* Keep track of which object we looked at this round.  */
       ++seen[i];
-      struct link_map *thisp = maps[i];
 
       if (__glibc_unlikely (for_fini))
 	{
@@ -52,7 +73,7 @@ _dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used,
 	 with the dependency.  */
       unsigned int k = nmaps - 1;
       while (k > i)
-	{
+        {
 	  struct link_map **runp = maps[k]->l_initfini;
 	  if (runp != NULL)
 	    /* Look through the dependencies of the object.  */
diff --git a/elf/dl-support.c b/elf/dl-support.c
index a097f22a16..e0d464a5fa 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -143,10 +143,7 @@ hp_timing_t _dl_cpuclock_offset;
 void (*_dl_init_static_tls) (struct link_map *) = &_dl_nothread_init_static_tls;
 
 /* The merged position hash table used if we have a lot of shared objects.  */
-dl_position_table_entry_t *_dl_position_hash_table;
-int _dl_position_hash_mask;
 int _dl_position_hash_cutoff = DL_POSITION_HASH_CUTOFF_DEFAULT;
-int _dl_position_hash_bits;
 
 size_t _dl_pagesize = EXEC_PAGESIZE;
 
diff --git a/elf/rtld.c b/elf/rtld.c
index a0b22c4bb3..9c942add0b 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -292,7 +292,6 @@ struct rtld_global_ro _rtld_global_ro attribute_relro =
     ._dl_close = _dl_close,
     ._dl_tls_get_addr_soft = _dl_tls_get_addr_soft,
     ._dl_position_hash_cutoff = DL_POSITION_HASH_CUTOFF_DEFAULT,
-    ._dl_position_hash_bits = 0,
 #ifdef HAVE_DL_DISCOVER_OSVERSION
     ._dl_discover_osversion = _dl_discover_osversion
 #endif
@@ -1382,6 +1381,7 @@ of this helper program; chances are you did not intend to run this program.\n\
   GL(dl_rtld_map).l_type = lt_library;
   main_map->l_next = &GL(dl_rtld_map);
   GL(dl_rtld_map).l_prev = main_map;
+  _dl_hash_add_object (&GL(dl_rtld_map));
   ++GL(dl_ns)[LM_ID_BASE]._ns_nloaded;
   ++GL(dl_load_adds);
 
@@ -2703,12 +2703,6 @@ process_envvars (enum mode *modep)
 	    GLRO(dl_position_hash_cutoff)
               = _dl_strtoul (&envline[16], NULL);;
           break;
-
-        case 18:
-          if (memcmp (envline, "FASTLOAD_HASH_BITS", 18) == 0)
-	    GLRO(dl_position_hash_bits)
-              = _dl_strtoul (&envline[19], NULL);;
-          break;
         }
     }
 
diff --git a/include/link.h b/include/link.h
index 22c3eec73d..8e4ec17e46 100644
--- a/include/link.h
+++ b/include/link.h
@@ -180,8 +180,7 @@ struct link_map
     unsigned int l_reserved:2;	/* Reserved for internal use.  */
     unsigned int l_phdr_allocated:1; /* Nonzero if the data structure pointed
 					to by `l_phdr' is allocated.  */
-    unsigned int l_soname_added:1; /* Nonzero if the SONAME is for sure in
-				      the l_libname list.  */
+    unsigned int l_inedge:1;    /* Used temporarily when sorting deps.  */
     unsigned int l_faked:1;	/* Nonzero if this is a faked descriptor
 				   without associated file.  */
     unsigned int l_need_tls_init:1; /* Nonzero if GL(dl_init_static_tls)
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 329a367ded..32647f825a 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -589,23 +589,14 @@ struct rtld_global_ro
      below.  */
   EXTERN struct r_search_path_elem *_dl_init_all_dirs;
 
-  /* The merged hash table used if we have a lot of shared objects. */
-  EXTERN dl_position_table_entry_t *_dl_position_hash_table;
-  EXTERN int _dl_position_hash_mask;
-  EXTERN int _dl_position_hash_bits;
   EXTERN int _dl_position_hash_cutoff;
 
-#define DL_POSITION_HASH_BITS_MAX       27  /* (1 << 27) entries.  */
 #if defined(__powerpc__) && !defined(__powerpc64__)
 #define DL_POSITION_HASH_CUTOFF_DEFAULT -1  /* Disabled.  */
 #else
 #define DL_POSITION_HASH_CUTOFF_DEFAULT 32  /* > 32 shared libs.  */
 #endif
 
-  /* Colon-separated list of absolute paths to ld.so.cache files
-     we'll load.  */
-  EXTERN const char *_google_ld_so_cache_list;
-
 #if HP_SMALL_TIMING_AVAIL || HP_TIMING_PAD
   /* Overhead of a high-precision timing measurement.  */
   EXTERN hp_timing_t _dl_hp_timing_overhead;
@@ -969,6 +960,12 @@ extern lookup_t _dl_lookup_symbol_x (const char *undef,
 extern void _dl_add_to_namespace_list (struct link_map *new, Lmid_t nsid)
      attribute_hidden;
 
+/* Update hashtables when objects are loaded or unloaded.  */
+extern void _dl_hash_add_object (struct link_map *lib)
+    attribute_hidden;
+extern void _dl_hash_del_object (struct link_map *lib)
+    attribute_hidden;
+
 /* Allocate a `struct link_map' for a new object being loaded.  */
 extern struct link_map *_dl_new_object (char *realname, const char *libname,
 					int type, struct link_map *loader,
@@ -1257,6 +1254,15 @@ _dl_last_entry (struct link_namespaces *ns)
   return map;
 }
 
+static inline uint_fast32_t
+dl_new_hash (const char *s)
+{
+  uint_fast32_t h = 5381;
+  for (unsigned char c = *s; c != '\0'; c = *++s)
+    h = h * 33 + c;
+  return h & 0xffffffff;
+}
+
 __END_DECLS
 
 #endif /* ldsodefs.h */


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-08-28  0:32 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-28  0:32 [glibc/maskray/grte] Redesign the fastload support for additional performance Fangrui Song
  -- strict thread matches above, loose matches on Subject: below --
2021-08-28  0:27 Fangrui Song
2021-08-27 23:50 Fangrui Song

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).