public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc/google/grte/v5-2.27/master] Add "fastload" support.
@ 2021-08-28  0:41 Fangrui Song
  0 siblings, 0 replies; only message in thread
From: Fangrui Song @ 2021-08-28  0:41 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=590786950c039260b75db235e81f2ae1633a055e

commit 590786950c039260b75db235e81f2ae1633a055e
Author: Paul Pluzhnikov <ppluzhnikov@google.com>
Date:   Tue Mar 4 19:07:05 2014 -0800

    Add "fastload" support.

Diff:
---
 elf/dl-close.c             |   6 +
 elf/dl-load.c              |  69 ++++---
 elf/dl-lookup.c            | 478 +++++++++++++++++++++++++++++++++++++++++++++
 elf/dl-support.c           |   7 +
 elf/dl-version.c           |  22 ++-
 elf/rtld.c                 |  40 +++-
 sysdeps/generic/ldsodefs.h |  67 ++++++-
 7 files changed, 657 insertions(+), 32 deletions(-)

diff --git a/elf/dl-close.c b/elf/dl-close.c
index ecd6729704..cbf336609b 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -135,6 +135,12 @@ _dl_close_worker (struct link_map *map, bool force)
   Lmid_t nsid = map->l_ns;
   struct link_namespaces *ns = &GL(dl_ns)[nsid];
 
+  /* Recompute _ns_last next time we need it.
+     It is tempting to set _ns_last to map->l_prev, but map->l_prev might also
+     be going away as part of current dlclose, and using it may cause _ns_last
+     to become dangling.  */
+  ns->_ns_last = NULL;
+
  retry:
   dl_close_state = pending;
 
diff --git a/elf/dl-load.c b/elf/dl-load.c
index 1b08f294e8..6f08e8ad13 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -1900,6 +1900,37 @@ open_path (const char *name, size_t namelen, off_t offset, int mode,
   return -1;
 }
 
+static int
+match_one (const char *name, struct link_map *l)
+{
+  /* If the requested name matches the soname of a loaded object,
+     use that object.  Elide this check for names that have not
+     yet been opened.  */
+  if (__builtin_expect (l->l_faked, 0) != 0
+      || __builtin_expect (l->l_removed, 0) != 0)
+    return 0;
+  if (!_dl_name_match_p (name, l))
+    {
+      const char *soname;
+
+      if (__builtin_expect (l->l_soname_added, 1)
+	  || l->l_info[DT_SONAME] == NULL)
+	return 0;
+
+      soname = ((const char *) D_PTR (l, l_info[DT_STRTAB])
+		+ l->l_info[DT_SONAME]->d_un.d_val);
+      if (strcmp (name, soname) != 0)
+	return 0;
+
+      /* We have a match on a new name -- cache it.  */
+      add_name_to_object (l, soname);
+      l->l_soname_added = 1;
+    }
+
+  /* We have a match.  */
+  return 1;
+}
+
 /* Map in the shared object file NAME.  */
 
 struct link_map *
@@ -1917,33 +1948,21 @@ _dl_map_object (struct link_map *loader, const char *name, off_t offset,
   assert (nsid < GL(dl_nns));
 
   /* Look for this name among those already loaded.  */
-  for (l = GL(dl_ns)[nsid]._ns_loaded; l; l = l->l_next)
+  if (!GLRO(dl_enable_fastload) || name[0] == '\0')
     {
-      /* If the requested name matches the soname of a loaded object,
-	 use that object.  Elide this check for names that have not
-	 yet been opened.  */
-      if (__glibc_unlikely ((l->l_faked | l->l_removed) != 0))
-	continue;
-      if (!_dl_name_match_p (name, l))
-	{
-	  const char *soname;
-
-	  if (__glibc_likely (l->l_soname_added)
-	      || l->l_info[DT_SONAME] == NULL)
-	    continue;
-
-	  soname = ((const char *) D_PTR (l, l_info[DT_STRTAB])
-		    + l->l_info[DT_SONAME]->d_un.d_val);
-	  if (strcmp (name, soname) != 0)
-	    continue;
-
-	  /* We have a match on a new name -- cache it.  */
-	  add_name_to_object (l, soname);
-	  l->l_soname_added = 1;
-	}
+      /* Special case: both main exe and vdso can have empty name;
+	 so search from head: it is important to return the map for main
+	 a.out; else dlsym(0, ...) will fail unexpectedly.  */
 
-      /* We have a match.  */
-      return l;
+      for (l = GL(dl_ns)[nsid]._ns_loaded; l; l = l->l_next)
+	if (match_one (name, l))
+	  return l;
+    }
+  else
+    {
+      for (l = _dl_last_entry (&GL(dl_ns)[nsid]); l; l = l->l_prev)
+	if (match_one (name, l))
+	  return l;
     }
 
   /* Display information if we are debugging.  */
diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c
index 41dced0e19..297ff9dec5 100644
--- a/elf/dl-lookup.c
+++ b/elf/dl-lookup.c
@@ -39,6 +39,11 @@
 
 #define VERSTAG(tag)	(DT_NUM + DT_THISPROCNUM + DT_VERSIONTAGIDX (tag))
 
+#ifndef ADDRIDX
+# define ADDRIDX(tag) (DT_NUM + DT_THISPROCNUM + DT_VERSIONTAGNUM \
+		       + DT_EXTRANUM + DT_VALNUM + DT_ADDRTAGIDX (tag))
+#endif
+
 
 struct sym_val
   {
@@ -325,6 +330,450 @@ do_lookup_unique (const char *undef_name, uint_fast32_t new_hash,
   result->m = (struct link_map *) map;
 }
 
+/* Bob Jenkins's hash function, free for any use.
+   http://burtleburtle.net/bob/hash/  */
+#define mix(a,b,c) \
+{ \
+  a -= b; a -= c; a ^= (c>>13); \
+  b -= c; b -= a; b ^= (a<<8); \
+  c -= a; c -= b; c ^= (b>>13); \
+  a -= b; a -= c; a ^= (c>>12);  \
+  b -= c; b -= a; b ^= (a<<16); \
+  c -= a; c -= b; c ^= (b>>5); \
+  a -= b; a -= c; a ^= (c>>3);  \
+  b -= c; b -= a; b ^= (a<<10); \
+  c -= a; c -= b; c ^= (b>>15); \
+}
+
+typedef unsigned int uint32 __attribute__ ((mode (SI)));
+static uint32
+hash_with_seed (const char *k, uint32 length, uint32 seed)
+{
+   uint32 a, b, c, len;
+
+   /* Set up the internal state */
+   len = length;
+   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
+   c = seed;            /* the previous hash value */
+
+   /*---------------------------------------- handle most of the key */
+   while (len >= 12)
+     {
+       a += (k[0] +((uint32)k[1]<<8) +((uint32)k[2]<<16) +((uint32)k[3]<<24));
+       b += (k[4] +((uint32)k[5]<<8) +((uint32)k[6]<<16) +((uint32)k[7]<<24));
+       c += (k[8] +((uint32)k[9]<<8) +((uint32)k[10]<<16)+((uint32)k[11]<<24));
+       mix (a,b,c);
+       k += 12;
+       len -= 12;
+     }
+
+   /*------------------------------------- handle the last 11 bytes */
+   c += length;
+   switch (len)              /* all the case statements fall through */
+     {
+     case 11: c+=((uint32)k[10]<<24);
+     case 10: c+=((uint32)k[9]<<16);
+     case 9 : c+=((uint32)k[8]<<8);
+         /* the first byte of c is reserved for the length */
+     case 8 : b+=((uint32)k[7]<<24);
+     case 7 : b+=((uint32)k[6]<<16);
+     case 6 : b+=((uint32)k[5]<<8);
+     case 5 : b+=k[4];
+     case 4 : a+=((uint32)k[3]<<24);
+     case 3 : a+=((uint32)k[2]<<16);
+     case 2 : a+=((uint32)k[1]<<8);
+     case 1 : a+=k[0];
+       /* case 0: nothing left to add */
+     }
+   mix (a,b,c);
+   /*-------------------------------------------- report the result */
+   return c;
+}
+
+
+/* A hash table used to speed up lookups when we have a lot of shared
+   libraries.  We record in the table how many ELF objects in the link
+   map we can safely skip, because they are known to not contain the symbol
+   we are looking for.
+
+   Values start out at N (num objects in the main_map).  We then go through
+   each object (executable or DSO) in their order in the main_map and insert
+   pos_in_map_map into the table.
+
+   If there is already an earlier (smaller) entry in the table, we leave it
+   alone.  It could be a hash collision, or it could be an earlier
+   instance of the same symbol (eg weak __stdout definition in
+   multiple objects).
+
+   To save space, the table contains uint16_t entries, and so we can't
+   record main map positions over 65535.
+
+   It turns out that even when only 25% of slots are used, collision
+   rate is around 10%, which is quite high, and causes lookups to be slow.
+
+   So we use a second table (using a different hash seed). Hash collisions
+   using two separate hashes are quite rare.
+
+   Note that we allocate the two tables together (a single block twice
+   the size of one table).  */
+
+static void
+insert_into_hash_table_1 (int pos_in_main_map, const char *symbol_name,
+			  size_t name_len, dl_position_table_entry_t *table,
+			  int seed)
+{
+  uint32 hash = hash_with_seed (symbol_name, name_len, seed);
+  int pos = hash & GLRO(dl_position_hash_mask);
+  if (pos_in_main_map < table[pos])
+    {
+      table[pos] = pos_in_main_map;
+      if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+        _dl_debug_printf ("fastload hash for %s: [%u] = %u (table %u)\n",
+                          symbol_name, (unsigned int) pos,
+                          (unsigned int) table[pos],
+                          (GLRO(dl_position_hash_table) == table) ? 0 : 1);
+    }
+  else
+    {
+      if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+        _dl_debug_printf
+          ("fastload hash for %s: [%u] already set to %u <= %u (table %u)\n",
+           symbol_name, (unsigned int) pos,
+           (unsigned int) table[pos],
+           (unsigned int) pos_in_main_map,
+           (GLRO(dl_position_hash_table) == table) ? 0 : 1);
+    }
+}
+
+#define HASH_SEED_A 0
+#define HASH_SEED_B 5381  /* As good as any other value.  */
+
+static void
+insert_into_hash_table (int pos_in_main_map, const char *symbol_name)
+{
+  dl_position_table_entry_t *const table1 = GLRO(dl_position_hash_table);
+  const int table_size = GLRO(dl_position_hash_mask) + 1;
+  dl_position_table_entry_t *const table2 = table1 + table_size;
+  const size_t name_len = strlen (symbol_name);
+
+  insert_into_hash_table_1 (pos_in_main_map, symbol_name, name_len, table1,
+			    HASH_SEED_A);
+  insert_into_hash_table_1 (pos_in_main_map, symbol_name, name_len, table2,
+			    HASH_SEED_B);
+}
+
+static inline int
+earliest_pos_from_hash_table_1 (const char *symbol_name,
+                                const dl_position_table_entry_t *table,
+                                int seed)
+{
+  int pos;
+  uint32 hash;
+
+  hash = hash_with_seed (symbol_name, strlen (symbol_name), seed);
+  pos = hash & GLRO(dl_position_hash_mask);
+  if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+    _dl_debug_printf
+      ("fastload earliest pos for %s: [%u] = %u (table %u)\n",
+       symbol_name, (unsigned int) pos, (unsigned int) table[pos],
+       (GLRO(dl_position_hash_table) == table) ? 0 : 1);
+
+  return table[pos];
+}
+
+static inline int
+earliest_pos_from_hash_table (const char *undef_name)
+{
+  dl_position_table_entry_t *const table1 = GLRO(dl_position_hash_table);
+
+  if (table1 == NULL)
+    return 0;  /* Search from the beginning of the list.  */
+  else
+    {
+      const int table_size = GLRO(dl_position_hash_mask) + 1;
+      dl_position_table_entry_t *const table2 = table1 + table_size;
+      const int skip_1 = earliest_pos_from_hash_table_1 (undef_name, table1,
+                                                         HASH_SEED_A);
+      const int skip_2 = earliest_pos_from_hash_table_1 (undef_name, table2,
+                                                         HASH_SEED_B);
+
+      /* Possibilities:
+
+       * Both skip_1 and skip_2 are positive and same:
+         there were no collisions at insertion in either table, or there
+         was double-collision with symbols from the same library.
+
+         If skip_1 == skip_2 == N (num_directly_loaded_objects), then the
+         slot was empty in both tables after they were filled (i.e. there
+         is no symbol that hashes to corresponding slot).  In that case,
+         searching first [0, N) is pointless. See b/5609692.
+
+         True skip_to == skip_1 == skip_2.
+
+       * Both skip_1 and skip_2 are positive but different:
+         there was a collision in one or both tables, or one of the slots
+         remained unfilled after all symbols have been inserted.
+         It is safe to start at max, true skip_to count can not be less
+         than max.  */
+
+      return (skip_1 < skip_2) ? skip_2 : skip_1;
+    }
+}
+
+
+static void
+fill_hash_table (const struct link_map *const map, int pos_in_main_map,
+                 int num_symbols)
+{
+  const ElfW(Sym) *const symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]);
+  const char *const strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
+  int k;
+
+  /* Iterate through the symbols defined in this map and insert them
+     into the bloom filter.  If the symbol wouldn't pass the tests in
+     do_lookup_x(), then don't insert it.  */
+  for (k = 0; k < num_symbols; k++)
+    {
+      const ElfW(Sym) *sym = &symtab[k];
+
+      if (sym->st_value == 0
+          && ELFW(ST_TYPE) (sym->st_info) != STT_TLS)
+        continue;
+
+      switch (ELFW(ST_TYPE) (sym->st_info))
+	{
+	case STT_SECTION:
+	case STT_FILE:
+	case STT_COMMON:
+	  continue;
+	default:
+	  break;
+	}
+
+      switch (ELFW(ST_BIND) (sym->st_info))
+	{
+	case STB_LOCAL:  /* Local symbols are ignored.  */
+	  continue;
+	default:
+	  break;
+	}
+
+      insert_into_hash_table (pos_in_main_map, strtab + sym->st_name);
+    }
+}
+
+static dl_position_table_entry_t *
+allocate_position_hash_table (size_t table_size, int max_pos)
+{
+  /* Implementation detail: we actually allocate two "logical" tables in a
+     single block.  */
+
+  const size_t total_size = 2 * table_size;
+  dl_position_table_entry_t *table
+    = malloc (total_size * sizeof (dl_position_table_entry_t));
+
+  if (table == NULL)
+    return NULL;
+
+  for (int k = 0; k < total_size; ++k)
+    table[k] = max_pos;
+
+  return table;
+}
+
+static int
+num_dynsyms (const struct link_map *const map)
+{
+  if (map->l_info[DT_HASH] != NULL)
+    {
+      /* If DT_HASH is present, we unconditionally use it, since it already
+         has the answer precomputed and waiting for us.  */
+      const Elf_Symndx *const hash = (void *) D_PTR (map, l_info[DT_HASH]);
+
+      if (hash != NULL)
+	{
+	  /* DT_HASH dynamic structure points to a symbol hash table
+	     with the following layout:
+
+	     nbucket
+	     nchain       (== number of symbols in dynamic symbol table)
+	     bucket[0]
+	     ...
+
+	     http://refspecs.freestandards.org/elf/TIS1.1.pdf (p. 2.19). */
+
+	  return hash[1];
+	}
+    }
+  else if (map->l_info[ADDRIDX (DT_GNU_HASH)] != NULL)
+    {
+      /* The best documentation for GNU_HASH I found:
+	 http://blogs.sun.com/ali/entry/gnu_hash_elf_sections
+
+	 _dl_setup_hash() has already set things up for us.  */
+
+      if (__builtin_expect (map->l_nbuckets < 1, 0))
+	{
+	  /* Paranoia: neither gold nor gnu-ld will construct an empty
+	     .gnu.hash, but some other linker just might.  */
+	  return 0;
+	}
+
+      /* In the GNU hash the symbol index of a symbol is determined by
+	 the offset of the symbol's entry in the hash table itself.
+
+	 We start at the last bucket map->l_gnu_chain_zero[map->l_nbuckets-1]
+	 and loop backward from the end until we find a bucket which is
+	 not zero.
+
+	 Then we walk forward to find the last entry in the chain which
+	 starts at that bucket.  The terminating entry gives us desired
+	 symbol index.  */
+
+      int last_bucket_idx = map->l_nbuckets - 1;
+
+      while (last_bucket_idx > 0 &&
+	     map->l_gnu_buckets[last_bucket_idx] == 0)
+	{
+	  --last_bucket_idx;
+	}
+
+      const Elf32_Word last_bucket = map->l_gnu_buckets[last_bucket_idx];
+
+      if (__builtin_expect (last_bucket == 0, 0))
+	{
+	  /* This shouldn't happen unless the library has only static
+	     variables with global ctors, and nothing else.  */
+	  return 0;
+	}
+
+      /* Start of the hash chain for the last non-zero bucket.  */
+      const Elf32_Word *hasharr = &map->l_gnu_chain_zero[last_bucket];
+
+      /* The chain is terminated by an entry with LSBit set.  */
+      while ((*hasharr & 1u) == 0)
+	++hasharr;
+
+      /* Size of dynamic symbol table is "index of last symbol" + 1.  */
+      return hasharr - map->l_gnu_chain_zero + 1;
+    }
+
+  return 0;
+}
+
+/* Create the fastload position hash (if requested).  Given a link_map
+   containing all required objects, iterate through all symbols. For each
+   symbol which could match in do_lookup_x, insert the index of the
+   providing object in main_map.  */
+
+void
+_dl_fill_position_hash (struct link_map *main_map)
+{
+  const int num_objects = main_map->l_searchlist.r_nlist;
+  const int debug_fastload = GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD;
+  int k;
+
+  /* If we have more than dl_position_hash_cutoff shared libraries,
+     fill in the hash table of earliest known positions for symbols in
+     main_map.  We'll use this in do_lookup_x().  */
+
+  /* If cutoff or size is negative, fastload is disabled.  */
+  if (GLRO(dl_position_hash_cutoff) < 0 || GLRO(dl_position_hash_bits) < 0)
+    {
+      if (__builtin_expect (debug_fastload, 0))
+        _dl_debug_printf ("fastload: disabled (configuration)\n");
+      return;
+    }
+
+  /* If we don't have enough mapped objects, fastload is disabled.  */
+  if (num_objects <= GLRO(dl_position_hash_cutoff))
+    {
+      if (__builtin_expect (debug_fastload, 0))
+        _dl_debug_printf ("fastload: disabled (too few objects,"
+                          " %u <= cutoff %u)\n",
+                          num_objects, GLRO(dl_position_hash_cutoff));
+      return;
+    }
+
+  int *ndynsyms = NULL;
+
+  if (GLRO(dl_position_hash_bits) == 0)
+    {
+      /* Table size has not been specified via LD_FASTLOAD_HASH_BITS.
+         Compute appropriate size for <= 25% load factor.  */
+
+      int total_symbols = 0;
+
+      /* It is safe to call alloca() here, because we are executing in
+	 the context of dl_main, i.e. we are on the main stack, and are
+	 not yet using much of it.  Assuming sane upper limit of 10K direct
+	 dependencies, we'll use at most 40K bytes here.  */
+      ndynsyms = alloca (num_objects * sizeof (int));
+
+      for (k = 0; k < num_objects; ++k)
+        {
+          const struct link_map *const map = main_map->l_searchlist.r_list[k];
+	  const int nsyms = num_dynsyms (map);
+
+	  ndynsyms[k] = nsyms;
+	  total_symbols += nsyms;
+	  if (__builtin_expect (debug_fastload, 0))
+	    _dl_debug_printf ("fastload: %s: %u symbols\n", map->l_name, nsyms);
+	}
+
+      /* Ensure table size is >= 4 * total_symbols.  Generally this
+         over-sizes the table by more than 4, since there usually are lots
+         of duplicate symbols (from different DSOs) as well.  */
+
+      GLRO(dl_position_hash_bits)
+        = (8 * sizeof (total_symbols)) - __builtin_clz (total_symbols) + 2;
+
+      if (__builtin_expect (debug_fastload, 0))
+        _dl_debug_printf ("fastload: will use %u hash bits for %u total "
+                          "symbols\n",
+                          GLRO(dl_position_hash_bits), total_symbols);
+    }
+
+  /* If the requested or computed table size is too large, limit it.  */
+  if (GLRO(dl_position_hash_bits) > DL_POSITION_HASH_BITS_MAX)
+    {
+      if (__builtin_expect (debug_fastload, 0))
+        _dl_debug_printf ("fastload: requested table too large"
+                          " (%u > max %u bits)\n",
+                          GLRO(dl_position_hash_bits),
+                          DL_POSITION_HASH_BITS_MAX);
+      GLRO(dl_position_hash_bits) = DL_POSITION_HASH_BITS_MAX;
+    }
+
+  int table_size = 1 << GLRO(dl_position_hash_bits);
+  GLRO(dl_position_hash_mask) = table_size - 1;
+  if (__builtin_expect (debug_fastload, 0))
+    _dl_debug_printf ("fastload: enabled with %u entry table\n", table_size);
+
+  int max_pos = num_objects < DL_POSITION_MAX ? num_objects : DL_POSITION_MAX;
+  GLRO(dl_position_hash_table) = allocate_position_hash_table (table_size,
+							       max_pos);
+  if (GLRO(dl_position_hash_table) == NULL)
+    {
+      if (__builtin_expect (debug_fastload, 0))
+	_dl_debug_printf ("fastload: failed to allocate table\n");
+      return;
+    }
+
+  /* There is no point in going beyond max_pos: the rest of the table
+     has already been saturated with max_pos value.  */
+  for (k = 0; k < max_pos; ++k)
+    {
+      const struct link_map *const map = main_map->l_searchlist.r_list[k];
+
+      /* Reuse num_dynsyms() result we computed earlier if possible:
+	 recomputing it may be somewhat expensive with DT_GNU_HASH.  */
+      const int nsyms = ndynsyms ? ndynsyms[k] : num_dynsyms (map);
+
+      fill_hash_table (map, k, nsyms);
+    }
+}
+
 /* Inner part of the lookup functions.  We return a value > 0 if we
    found the symbol, the value 0 if nothing is found and < 0 if
    something bad happened.  */
@@ -344,6 +793,35 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash,
   __asm volatile ("" : "+r" (n), "+m" (scope->r_list));
   struct link_map **list = scope->r_list;
 
+  if (GLRO(dl_enable_fastload))
+  {
+  if (scope == GL(dl_ns)[LM_ID_BASE]._ns_main_searchlist && i == 0)
+    {
+      const int skip_to = earliest_pos_from_hash_table (undef_name);
+
+      if (skip_to < n)
+	{
+	  i = skip_to;
+	  if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+	    _dl_debug_printf ("fastload %s skipping to %u\n",
+			      undef_name, (unsigned int) i);
+	}
+      else
+	{
+	  /* Symbol was not found in any of the initial libraries,
+	     and no new libraries have been added.  */
+
+	  assert (skip_to == n);
+
+	  if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0))
+	    _dl_debug_printf ("fastload %s, %u >= %u (scope->r_nlist)\n",
+			      undef_name, (unsigned int) skip_to,
+			      (unsigned int) n);
+	  return 0;
+	}
+    }
+  }
+
   do
     {
       const struct link_map *map = list[i]->l_real;
diff --git a/elf/dl-support.c b/elf/dl-support.c
index 9349134fa0..4f1c9de4c5 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -142,6 +142,13 @@ hp_timing_t _dl_cpuclock_offset;
 
 void (*_dl_init_static_tls) (struct link_map *) = &_dl_nothread_init_static_tls;
 
+/* The merged position hash table used if we have a lot of shared objects.  */
+dl_position_table_entry_t *_dl_position_hash_table;
+int _dl_position_hash_mask;
+int _dl_position_hash_cutoff = DL_POSITION_HASH_CUTOFF_DEFAULT;
+int _dl_position_hash_bits;
+int _dl_enable_fastload = 1;
+
 size_t _dl_pagesize = EXEC_PAGESIZE;
 
 int _dl_inhibit_cache;
diff --git a/elf/dl-version.c b/elf/dl-version.c
index 7860cc817e..6c1d941233 100644
--- a/elf/dl-version.c
+++ b/elf/dl-version.c
@@ -34,11 +34,23 @@ find_needed (const char *name, struct link_map *map)
   struct link_map *tmap;
   unsigned int n;
 
-  for (tmap = GL(dl_ns)[map->l_ns]._ns_loaded; tmap != NULL;
-       tmap = tmap->l_next)
-    if (_dl_name_match_p (name, tmap))
-      return tmap;
-
+  if (!GLRO(dl_enable_fastload) || name[0] == '\0')
+    {
+      /* Special case: both main exe and vdso can have empty name;
+	 so search from head: it is important to return the map for main
+	 a.out; else dlsym(0, ...) will fail unexpectedly.  */
+      for (tmap = GL(dl_ns)[map->l_ns]._ns_loaded; tmap != NULL;
+	   tmap = tmap->l_next)
+	if (_dl_name_match_p (name, tmap))
+	  return tmap;
+    }
+  else
+    {
+      for (tmap = _dl_last_entry (&GL(dl_ns)[map->l_ns]); tmap != NULL;
+	   tmap = tmap->l_prev)
+	if (_dl_name_match_p (name, tmap))
+	  return tmap;
+    }
   /* The required object is not in the global scope, look to see if it is
      a dependency of the current object.  */
   for (n = 0; n < map->l_searchlist.r_nlist; n++)
diff --git a/elf/rtld.c b/elf/rtld.c
index 9468cd60d9..4f61d49d0a 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -291,6 +291,9 @@ struct rtld_global_ro _rtld_global_ro attribute_relro =
     ._dl_open = _dl_open,
     ._dl_close = _dl_close,
     ._dl_tls_get_addr_soft = _dl_tls_get_addr_soft,
+    ._dl_position_hash_cutoff = DL_POSITION_HASH_CUTOFF_DEFAULT,
+    ._dl_position_hash_bits = 0,
+    ._dl_enable_fastload = 1,
 #ifdef HAVE_DL_DISCOVER_OSVERSION
     ._dl_discover_osversion = _dl_discover_osversion
 #endif
@@ -1767,6 +1770,12 @@ ERROR: ld.so: object '%s' cannot be loaded as audit interface: %s; ignored.\n",
      dependencies in the executable's searchlist for symbol resolution.  */
   HP_TIMING_NOW (start);
   _dl_map_object_deps (main_map, preloads, npreloads, mode == trace, 0);
+
+  /* We have now finished loading every required (linked-in) object.
+     Set up the position hash if needed.  */
+  if (GLRO(dl_enable_fastload) == 1)
+  _dl_fill_position_hash (main_map);
+
   HP_TIMING_NOW (stop);
   HP_TIMING_DIFF (diff, start, stop);
   HP_TIMING_ACCUM_NT (load_time, diff);
@@ -2394,10 +2403,12 @@ process_dl_debug (const char *dl_debug)
 	DL_DEBUG_VERSIONS | DL_DEBUG_IMPCALLS },
       { LEN_AND_STR ("scopes"), "display scope information",
 	DL_DEBUG_SCOPES },
+      { LEN_AND_STR ("fastload"), "display fastload information",
+        DL_DEBUG_FASTLOAD },
       { LEN_AND_STR ("all"), "all previous options combined",
 	DL_DEBUG_LIBS | DL_DEBUG_RELOC | DL_DEBUG_FILES | DL_DEBUG_SYMBOLS
 	| DL_DEBUG_BINDINGS | DL_DEBUG_VERSIONS | DL_DEBUG_IMPCALLS
-	| DL_DEBUG_SCOPES },
+	| DL_DEBUG_SCOPES | DL_DEBUG_FASTLOAD },
       { LEN_AND_STR ("statistics"), "display relocation statistics",
 	DL_DEBUG_STATISTICS },
       { LEN_AND_STR ("unused"), "determined unused DSOs",
@@ -2680,6 +2691,33 @@ process_envvars (enum mode *modep)
 	  EXTRA_LD_ENVVARS
 #endif
 	}
+
+      /* Handle all fastload-related env vars here.  This may duplicate
+         effort with the switch table above, but it localizes changes
+         made by the fastload patch.  On Linux, the '15' case is used
+         by another environment variable (LIBRARY_VERSION) and much
+         change would be needed to add support adding a variable of
+         that length with the existing style.  */
+      switch (len)
+        {
+        case 15:
+          if (memcmp (envline, "FASTLOAD_CUTOFF", 15) == 0)
+	    GLRO(dl_position_hash_cutoff)
+              = _dl_strtoul (&envline[16], NULL);;
+          break;
+
+        case 16:
+          if (memcmp (envline, "FASTLOAD_ENABLED", 16) == 0)
+	    GLRO(dl_enable_fastload)
+              = _dl_strtoul (&envline[17], NULL);;
+          break;
+
+        case 18:
+          if (memcmp (envline, "FASTLOAD_HASH_BITS", 18) == 0)
+	    GLRO(dl_position_hash_bits)
+              = _dl_strtoul (&envline[19], NULL);;
+          break;
+        }
     }
 
   /* The caller wants this information.  */
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 7ea931817b..1df593c96b 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -27,6 +27,7 @@
 #include <stddef.h>
 #include <string.h>
 #include <stdint.h>
+#include <assert.h>
 
 #include <elf.h>
 #include <dlfcn.h>
@@ -275,6 +276,13 @@ typedef void (*receiver_fct) (int, const char *, const char *);
    The `-ldl' library functions in <dlfcn.h> provide a simple
    user interface to run-time dynamic linking.  */
 
+/* To save storage space in the hash table, we use uint16_t, which allows
+   us to support up to 64K of DSOs efficiently.  */
+typedef uint16_t dl_position_table_entry_t;
+
+/* 65535 -- max position that could be recorded in
+   dl_position_table_entry_t.  */
+#define DL_POSITION_MAX UINT16_MAX
 
 #ifndef SHARED
 # define EXTERN extern
@@ -302,6 +310,8 @@ struct rtld_global
   {
     /* A pointer to the map for the main map.  */
     struct link_map *_ns_loaded;
+    /* Tail end of the _ns_loaded link chain.  Computed on demand.  */
+    struct link_map *_ns_last;
     /* Number of object in the _dl_loaded list.  */
     unsigned int _ns_nloaded;
     /* Direct pointer to the searchlist of the main object.  */
@@ -350,7 +360,7 @@ struct rtld_global
   /* The object to be initialized first.  */
   EXTERN struct link_map *_dl_initfirst;
 
-#if HP_SMALL_TIMING_AVAIL
+#if HP_SMALL_TIMING_AVAIL || HP_TIMING_PAD
   /* Start time on CPU clock.  */
   EXTERN hp_timing_t _dl_cpuclock_offset;
 #endif
@@ -487,6 +497,9 @@ struct rtld_global_ro
 #define DL_DEBUG_HELP       (1 << 10)
 #define DL_DEBUG_PRELINK    (1 << 11)
 
+/* Google-local.  */
+#define DL_DEBUG_FASTLOAD   (1 << 12)
+
   /* OS version.  */
   EXTERN unsigned int _dl_osversion;
   /* Platform name.  */
@@ -576,6 +589,29 @@ struct rtld_global_ro
      below.  */
   EXTERN struct r_search_path_elem *_dl_init_all_dirs;
 
+  /* The merged hash table used if we have a lot of shared objects. */
+  EXTERN dl_position_table_entry_t *_dl_position_hash_table;
+  EXTERN int _dl_position_hash_mask;
+  EXTERN int _dl_position_hash_bits;
+  EXTERN int _dl_position_hash_cutoff;
+  EXTERN int _dl_enable_fastload;
+
+#define DL_POSITION_HASH_BITS_MAX       27  /* (1 << 27) entries.  */
+#if defined(__powerpc__) && !defined(__powerpc64__)
+#define DL_POSITION_HASH_CUTOFF_DEFAULT -1  /* Disabled.  */
+#else
+#define DL_POSITION_HASH_CUTOFF_DEFAULT 32  /* > 32 shared libs.  */
+#endif
+
+  /* Colon-separated list of absolute paths to ld.so.cache files
+     we'll load.  */
+  EXTERN const char *_google_ld_so_cache_list;
+
+#if HP_SMALL_TIMING_AVAIL || HP_TIMING_PAD
+  /* Overhead of a high-precision timing measurement.  */
+  EXTERN hp_timing_t _dl_hp_timing_overhead;
+#endif
+
 #ifdef NEED_DL_SYSINFO
   /* Syscall handling improvements.  This is very specific to x86.  */
   EXTERN uintptr_t _dl_sysinfo;
@@ -750,6 +786,8 @@ _dl_dprintf (int fd, const char *fmt, ...)
     }									      \
   while (1)
 
+/* Fill the position hash table used if we have a lot of shared libraries.  */
+extern void _dl_fill_position_hash (struct link_map *main_map);
 
 /* An exception raised by the _dl_signal_error function family and
    caught by _dl_catch_error function family.  Exceptions themselves
@@ -1057,6 +1095,8 @@ extern ElfW(Addr) _dl_sysdep_start (void **start_argptr,
 
 extern void _dl_sysdep_start_cleanup (void) attribute_hidden;
 
+extern int _dl_addr_inside_object (struct link_map *l, const ElfW(Addr) addr)
+     attribute_hidden;
 
 /* Determine next available module ID.  */
 extern size_t _dl_next_tls_modid (void) attribute_hidden;
@@ -1193,6 +1233,31 @@ rtld_active (void)
 }
 #endif
 
+/* Find last entry in the given scope.  Cache the result.  */
+static inline struct link_map *
+_dl_last_entry (struct link_namespaces *ns)
+{
+  struct link_map *map = ns->_ns_last;
+  size_t len = 0;
+
+  if (map == NULL)
+    map = ns->_ns_loaded;
+
+  if (map == NULL)
+    return NULL;
+
+  while (map->l_next != NULL)
+    {
+      map = map->l_next;
+      /* If we have more than _ns_nloaded libraries, it's likely we are
+	 looking at a corrupt list.  Fail hard.  */
+      assert (len++ < ns->_ns_nloaded);
+    }
+
+  ns->_ns_last = map;
+  return map;
+}
+
 __END_DECLS
 
 #endif /* ldsodefs.h */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-08-28  0:41 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-28  0:41 [glibc/google/grte/v5-2.27/master] Add "fastload" support Fangrui Song

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).