public inbox for gdb-patches@sourceware.org
 help / color / mirror / Atom feed
* [RFC] symbol lookup cache
@ 2014-12-01  8:23 Doug Evans
  2014-12-03  0:50 ` Doug Evans
                   ` (3 more replies)
  0 siblings, 4 replies; 20+ messages in thread
From: Doug Evans @ 2014-12-01  8:23 UTC (permalink / raw)
  To: gdb-patches, brobecker

Hi.

Holidays are nice for having the time to take on
projects one is interested in but doesn't have
time to otherwise do.

I looked at a couple of areas for improvement
in gdb's symbol handling over the last couple
of days.  One is .gdb_index support for tab
completion, which is the topic for another mail.
This email is about a symbol lookup cache.

..gdb_index helps speed things up for one objfile,
but when there are 100s (or 1000s) of shared
libraries, symbol lookup can still take awhile,
even with .gdb_index.
[I'm setting aside a planned change to basic
symbol lookup to use the index better.
We'll still, I think, want a cache even
with that change.]

This patch still needs more testing
and I need to collect more perf data,
but it's a start at caching symbol lookups.
This first pass is just for the case
where we iterate over all objfiles for
lookups in the global or static blocks.
Low hanging fruit.
At the moment it's just RFC.

Joel: I noticed ada-lang.c has its own cache.
I haven't studied it too closely (yet).
I can imagine wanting to keep some variant of both.
This is just a "heads up".

btw, another area that needs caching
is pc lookups (e.g., find_pc_sect_compunit_symtab).
That's the topic of yet another email.

diff --git a/gdb/symtab.c b/gdb/symtab.c
index 345c20d..f9ef72f 100644
--- a/gdb/symtab.c
+++ b/gdb/symtab.c
@@ -104,9 +104,78 @@ struct main_info
   enum language language_of_main;
 };
 
+/* Program space key for finding its symbol cache.  */
+
+static const struct program_space_data *symbol_cache_key;
+
+/* The symbol cache size.
+   TODO(dje): One can imagine making this dynamic based on the size of the
+   program, but OTOH this cache consumes minimal space, and there is no extra
+   cpu cost for large N (except when flushing the cache, which is rare).
+   The value here is just a first attempt.  */
+#define SYMBOL_CACHE_SIZE 127
+
+/* symbol_cache_lookup returns this if a previous lookup failed to find the
+   symbol in any objfile.  */
+#define SYMBOL_LOOKUP_FAILED ((struct symbol *) 1)
+
+/* Recording lookups that don't find the symbol is just as important, if not
+   more so, than recording found symbols.  */
+
+enum symbol_cache_slot_state
+{
+  SYMBOL_SLOT_UNUSED,
+  SYMBOL_SLOT_NOT_FOUND,
+  SYMBOL_SLOT_FOUND
+};
+
+/* Symbols don't specify global vs static block.
+   So keep them in separate caches.  */
+
+struct block_symbol_cache
+{
+  unsigned int hits;
+  unsigned int misses;
+
+  struct symbol_cache_slot
+  {
+    enum symbol_cache_slot_state state;
+    union
+    {
+      struct symbol *found;
+      struct
+      {
+	char *name;
+	domain_enum domain;
+      } not_found;
+    } value;
+  } symbols[SYMBOL_CACHE_SIZE];
+};
+
+/* The symbol cache.
+
+   Searching for symbols in the static and global blocks over multiple objfiles
+   again and again can be slow, as can searching very big objfiles.  This is a
+   simple cache to improve symbol lookup performance, which is critical to
+   overall gdb performance.
+
+   Symbols are hashed on the name, its domain, and block.
+   They are also hashed on their objfile for objfile-specific lookups.
+
+   TODO(dje): Collect some numbers to guide the choices here.  */
+
+struct symbol_cache
+{
+  struct block_symbol_cache global_symbols;
+  struct block_symbol_cache static_symbols;
+};
+
 /* When non-zero, print debugging messages related to symtab creation.  */
 unsigned int symtab_create_debug = 0;
 
+/* When non-zero, print debugging messages related to symbol cache lookup.  */
+static unsigned int symbol_cache_debug = 0;
+
 /* Non-zero if a file may be known by two different basenames.
    This is the uncommon case, and significantly slows down gdb.
    Default set to "off" to not slow down the common case.  */
@@ -1089,6 +1158,309 @@ expand_symtab_containing_pc (CORE_ADDR pc, struct obj_section *section)
   }
 }
 \f
+/* Hash function for the symbol cache.  */
+
+static unsigned int
+hash_symbol_entry (const char *name, domain_enum domain)
+{
+  unsigned int hash = 0;
+
+  if (name != NULL)
+    hash += htab_hash_string (name);
+
+  /* TODO(dje): Too simplistic?  */
+  hash += domain;
+
+  return hash;
+}
+
+/* Equality function for the symbol cache.  */
+
+static int
+eq_symbol_entry (const struct symbol_cache_slot *slot,
+		 const char *name, domain_enum domain)
+{
+  const char *slot_name;
+  domain_enum slot_domain;
+
+  if (slot->state == SYMBOL_SLOT_UNUSED)
+    return 0;
+
+  if (slot->state == SYMBOL_SLOT_NOT_FOUND)
+    {
+      slot_name = slot->value.not_found.name;
+      slot_domain = slot->value.not_found.domain;
+    }
+  else
+    {
+      slot_name = SYMBOL_LINKAGE_NAME (slot->value.found);
+      slot_domain = SYMBOL_DOMAIN (slot->value.found);
+    }
+
+  /* NULL names match.  */
+  if (slot_name == NULL && name == NULL)
+    ;
+  else if (slot_name != NULL && name != NULL)
+    {
+      if (strcmp (slot_name, name) != 0)
+	return 0;
+    }
+  else
+    {
+      /* Only one name is NULL.  */
+      return 0;
+    }
+
+  if (slot_domain != domain)
+    return 0;
+
+  return 1;
+}
+
+/* Return the symbol cache of PSPACE.
+   Create one if it doesn't exist yet.  */
+
+static struct symbol_cache *
+get_symbol_cache (struct program_space *pspace)
+{
+  struct symbol_cache *cache = program_space_data (pspace, symbol_cache_key);
+
+  if (cache == NULL)
+    {
+      cache = XCNEW (struct symbol_cache);
+      set_program_space_data (pspace, symbol_cache_key, cache);
+    }
+
+  return cache;
+}
+
+/* Delete the symbol cache of PSPACE.
+   Called when PSPACE is destroyed.  */
+
+static void
+symbol_cache_cleanup (struct program_space *pspace, void *data)
+{
+  struct symbol_cache *cache = data;
+
+  xfree (cache);
+}
+
+/* Lookup symbol NAME,DOMAIN in BLOCK in the symbol cache of PSPACE.
+   The result is the symbol if found, SYMBOL_LOOKUP_FAILED if a previous lookup
+   failed (and thus this one will too), or NULL if the symbol is not present
+   in the cache.
+   *SLOT is set to the cache slot of the symbol, whether found or not
+   found.  */
+
+static struct symbol *
+symbol_cache_lookup (struct symbol_cache *cache, int block,
+		     const char *name, domain_enum domain,
+		     struct symbol_cache_slot **slot_ptr)
+{
+  struct block_symbol_cache *bsc;
+  unsigned int hash;
+  struct symbol_cache_slot *slot;
+
+  if (block == GLOBAL_BLOCK)
+    bsc = &cache->global_symbols;
+  else
+    bsc = &cache->static_symbols;
+  hash = hash_symbol_entry (name, domain);
+  slot = bsc->symbols + hash % SYMBOL_CACHE_SIZE;
+  *slot_ptr = slot;
+  if (eq_symbol_entry (slot, name, domain))
+    {
+      if (symbol_cache_debug)
+	fprintf_unfiltered (gdb_stdlog,
+			    "%s block symbol cache hit%s for %s, %d\n",
+			    block == GLOBAL_BLOCK ? "Global" : "Static",
+			    slot->state == SYMBOL_SLOT_NOT_FOUND
+			    ? " (not found)" : "",
+			    name, domain);
+      ++bsc->hits;
+      if (slot->state == SYMBOL_SLOT_NOT_FOUND)
+	return SYMBOL_LOOKUP_FAILED;
+      return slot->value.found;
+    }
+  if (symbol_cache_debug)
+    {
+      fprintf_unfiltered (gdb_stdlog,
+			  "%s block symbol cache miss for %s, %d\n",
+			  block == GLOBAL_BLOCK ? "Global" : "Static",
+			  name, domain);
+    }
+  ++bsc->misses;
+  return NULL;
+}
+
+/* Clear out SLOT.  */
+
+static void
+symbol_cache_clear_slot (struct symbol_cache_slot *slot)
+{
+  if (slot->state == SYMBOL_SLOT_NOT_FOUND)
+    xfree (slot->value.not_found.name);
+  slot->state = SYMBOL_SLOT_UNUSED;
+}
+
+/* Mark SYMBOL as found in SLOT.  */
+
+static void
+symbol_cache_mark_found (struct symbol_cache_slot *slot, struct symbol *symbol)
+{
+  symbol_cache_clear_slot (slot);
+  slot->state = SYMBOL_SLOT_FOUND;
+  slot->value.found = symbol;
+}
+
+/* Mark symbol NAME, DOMAIN as not found in SLOT.  */
+
+static void
+symbol_cache_mark_not_found (struct symbol_cache_slot *slot,
+			     const char *name, domain_enum domain)
+{
+  symbol_cache_clear_slot (slot);
+  slot->state = SYMBOL_SLOT_NOT_FOUND;
+  slot->value.not_found.name = xstrdup (name);
+  slot->value.not_found.domain = domain;
+}
+
+/* Flush the symbol cache of PSPACE.  */
+
+static void
+symbol_cache_flush (struct program_space *pspace)
+{
+  struct symbol_cache *cache = get_symbol_cache (pspace);
+  int pass;
+
+  for (pass = 0; pass < 2; ++pass)
+    {
+      struct block_symbol_cache *bsc
+	= pass == 0 ? &cache->global_symbols : &cache->static_symbols;
+      int i;
+
+      for (i = 0; i < SYMBOL_CACHE_SIZE; ++i)
+	symbol_cache_clear_slot (&bsc->symbols[i]);
+    }
+
+  memset (&cache->global_symbols, 0, sizeof (cache->global_symbols));
+  memset (&cache->static_symbols, 0, sizeof (cache->static_symbols));
+}
+
+/* Dump CACHE.  */
+
+static void
+symbol_cache_dump (const struct symbol_cache *cache)
+{
+  int pass;
+
+  for (pass = 0; pass < 2; ++pass)
+    {
+      const struct block_symbol_cache *bsc
+	= pass == 0 ? &cache->global_symbols : &cache->static_symbols;
+      int i;
+
+      if (pass == 0)
+	printf_filtered ("Global symbols:\n");
+      else
+	printf_filtered ("Static symbols:\n");
+
+      for (i = 0; i < SYMBOL_CACHE_SIZE; ++i)
+	{
+	  switch (bsc->symbols[i].state)
+	    {
+	    case SYMBOL_SLOT_UNUSED:
+	      break;
+	    case SYMBOL_SLOT_NOT_FOUND:
+	      printf_filtered ("  [%-2d] = %s\n", i, /* TODO(dje): "not found" */
+			       bsc->symbols[i].value.not_found.name);
+	      break;
+	    case SYMBOL_SLOT_FOUND:
+	      printf_filtered ("  [%-2d] = %s\n", i,
+			       SYMBOL_PRINT_NAME (bsc->symbols[i].value.found));
+	      break;
+	    }
+	}
+    }
+}
+
+/* The "mt print symbol-cache" command.  */
+
+static void
+maintenance_print_symbol_cache (char *args, int from_tty)
+{
+  struct program_space *pspace;
+
+  ALL_PSPACES (pspace)
+    {
+      struct symbol_cache *cache = get_symbol_cache (pspace);
+
+      QUIT;
+      symbol_cache_dump (cache);
+    }
+}
+
+/* Print usage statistics of CACHE.  */
+
+static void
+symbol_cache_stats (struct symbol_cache *cache)
+{
+  int pass;
+
+  for (pass = 0; pass < 2; ++pass)
+    {
+      const struct block_symbol_cache *bsc
+	= pass == 0 ? &cache->global_symbols : &cache->static_symbols;
+
+      if (pass == 0)
+	printf_filtered ("Global block cache stats:\n");
+      else
+	printf_filtered ("Static block cache stats:\n");
+
+      printf_filtered ("  size:   %u\n", SYMBOL_CACHE_SIZE);
+      printf_filtered ("  hits:   %u\n", bsc->hits);
+      printf_filtered ("  misses: %u\n", bsc->misses);
+    }
+}
+
+/* The "mt print symbol-cache-statistics" command.  */
+
+static void
+maintenance_print_symbol_cache_statistics (char *args, int from_tty)
+{
+  struct program_space *pspace;
+
+  ALL_PSPACES (pspace)
+    {
+      struct symbol_cache *cache = get_symbol_cache (pspace);
+
+      QUIT;
+      printf_filtered (_("Symbol cache statistics for pspace %d, '%s':\n"),
+		       pspace->num,
+		       pspace->symfile_object_file != NULL
+		       ? objfile_name (pspace->symfile_object_file)
+		       : "NULL");
+      symbol_cache_stats (cache);
+    }
+}
+
+/* This module's 'new_objfile' observer.  */
+
+static void
+symtab_new_objfile_observer (struct objfile *objfile)
+{
+  /* Ideally we'd use OBJFILE->pspace, but OBJFILE may be NULL.  */
+  symbol_cache_flush (current_program_space);
+}
+
+/* This module's 'free_objfile' observer.  */
+
+static void
+symtab_free_objfile_observer (struct objfile *objfile)
+{
+  symbol_cache_flush (objfile->pspace);
+}
+\f
 /* Debug symbols usually don't have section information.  We need to dig that
    out of the minimal symbols and stash that in the debug symbol.  */
 
@@ -1818,16 +2190,30 @@ lookup_symbol_in_objfile (struct objfile *objfile, int block_index,
 struct symbol *
 lookup_static_symbol (const char *name, const domain_enum domain)
 {
+  struct symbol_cache *cache = get_symbol_cache (current_program_space);
   struct objfile *objfile;
   struct symbol *result;
+  struct symbol_cache_slot *slot;
+
+  result = symbol_cache_lookup (cache, STATIC_BLOCK, name, domain, &slot);
+  if (result != NULL)
+    {
+      if (result == SYMBOL_LOOKUP_FAILED)
+	return NULL;
+      return result;
+    }
 
   ALL_OBJFILES (objfile)
     {
       result = lookup_symbol_in_objfile (objfile, STATIC_BLOCK, name, domain);
       if (result != NULL)
-	return result;
+	{
+	  symbol_cache_mark_found (slot, result);
+	  return result;
+	}
     }
 
+  symbol_cache_mark_not_found (slot, name, domain);
   return NULL;
 }
 
@@ -1875,9 +2261,11 @@ lookup_global_symbol (const char *name,
 		      const struct block *block,
 		      const domain_enum domain)
 {
+  struct symbol_cache *cache = get_symbol_cache (current_program_space);
   struct symbol *sym = NULL;
   struct objfile *objfile = NULL;
   struct global_sym_lookup_data lookup_data;
+  struct symbol_cache_slot *slot;
 
   /* Call library-specific lookup procedure.  */
   objfile = lookup_objfile_from_block (block);
@@ -1886,6 +2274,14 @@ lookup_global_symbol (const char *name,
   if (sym != NULL)
     return sym;
 
+  sym = symbol_cache_lookup (cache, GLOBAL_BLOCK, name, domain, &slot);
+  if (sym != NULL)
+    {
+      if (sym == SYMBOL_LOOKUP_FAILED)
+	return NULL;
+      return sym;
+    }
+
   memset (&lookup_data, 0, sizeof (lookup_data));
   lookup_data.name = name;
   lookup_data.domain = domain;
@@ -1893,6 +2289,11 @@ lookup_global_symbol (const char *name,
     (objfile != NULL ? get_objfile_arch (objfile) : target_gdbarch (),
      lookup_symbol_global_iterator_cb, &lookup_data, objfile);
 
+  if (lookup_data.result != NULL)
+    symbol_cache_mark_found (slot, lookup_data.result);
+  else
+    symbol_cache_mark_not_found (slot, name, domain);
+
   return lookup_data.result;
 }
 
@@ -5235,6 +5636,9 @@ _initialize_symtab (void)
   main_progspace_key
     = register_program_space_data_with_cleanup (NULL, main_info_cleanup);
 
+  symbol_cache_key
+    = register_program_space_data_with_cleanup (NULL, symbol_cache_cleanup);
+
   add_info ("variables", variables_info, _("\
 All global and static variable names, or those matching REGEXP."));
   if (dbx_commands)
@@ -5302,5 +5706,25 @@ A value greater than 1 provides more verbose information."),
 			     NULL,
 			     &setdebuglist, &showdebuglist);
 
+  add_setshow_zuinteger_cmd ("symbol-cache", no_class, &symbol_cache_debug,
+			     _("Set debugging of symbol cache usage."),
+			     _("Show debugging of symbol cache usage."), _("\
+When enabled (non-zero), debugging messages are printed when looking up\n\
+symbols in the symbol cache."),
+			     NULL,
+			     NULL,
+			     &setdebuglist, &showdebuglist);
+
+  add_cmd ("symbol-cache", class_maintenance, maintenance_print_symbol_cache,
+	   _("Dump the symbol cache for each program space."),
+	   &maintenanceprintlist);
+
+  add_cmd ("symbol-cache-statistics", class_maintenance,
+	   maintenance_print_symbol_cache_statistics,
+	   _("Print symbol cache statistics for each program space."),
+	   &maintenanceprintlist);
+
   observer_attach_executable_changed (symtab_observer_executable_changed);
+  observer_attach_new_objfile (symtab_new_objfile_observer);
+  observer_attach_free_objfile (symtab_free_objfile_observer);
 }

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] symbol lookup cache
  2014-12-01  8:23 [RFC] symbol lookup cache Doug Evans
@ 2014-12-03  0:50 ` Doug Evans
  2014-12-20  7:57   ` [PATCH] " Doug Evans
  2014-12-03  2:46 ` [RFC] " Joel Brobecker
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 20+ messages in thread
From: Doug Evans @ 2014-12-03  0:50 UTC (permalink / raw)
  To: gdb-patches, Joel Brobecker

On Mon, Dec 1, 2014 at 12:23 AM, Doug Evans <dje@google.com> wrote:
>
> Hi.
>
> Holidays are nice for having the time to take on
> projects one is interested in but doesn't have
> time to otherwise do.
>
> I looked at a couple of areas for improvement
> in gdb's symbol handling over the last couple
> of days.  One is .gdb_index support for tab
> completion, which is the topic for another mail.
> This email is about a symbol lookup cache.
>
> ..gdb_index helps speed things up for one objfile,
> but when there are 100s (or 1000s) of shared
> libraries, symbol lookup can still take awhile,
> even with .gdb_index.
> [I'm setting aside a planned change to basic
> symbol lookup to use the index better.
> We'll still, I think, want a cache even
> with that change.]
>
> This patch still needs more testing
> and I need to collect more perf data,
> but it's a start at caching symbol lookups.
> This first pass is just for the case
> where we iterate over all objfiles for
> lookups in the global or static blocks.
> Low hanging fruit.
> At the moment it's just RFC.

[bleah, sorry for the repeat Joel]

One thing this patch is missing is the ability to turn the cache off,
both for performance measurements and for debugging.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] symbol lookup cache
  2014-12-01  8:23 [RFC] symbol lookup cache Doug Evans
  2014-12-03  0:50 ` Doug Evans
@ 2014-12-03  2:46 ` Joel Brobecker
  2014-12-04 16:38 ` Pedro Alves
  2014-12-04 19:16 ` Jan Kratochvil
  3 siblings, 0 replies; 20+ messages in thread
From: Joel Brobecker @ 2014-12-03  2:46 UTC (permalink / raw)
  To: Doug Evans; +Cc: gdb-patches

> Joel: I noticed ada-lang.c has its own cache.
> I haven't studied it too closely (yet).
> I can imagine wanting to keep some variant of both.
> This is just a "heads up".

A priori, I will eventually try to migrate Ada to the standard cache
if we can. If symbol lookup has taught me anything, it's that it is
really costly to do things your way...

-- 
Joel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] symbol lookup cache
  2014-12-01  8:23 [RFC] symbol lookup cache Doug Evans
  2014-12-03  0:50 ` Doug Evans
  2014-12-03  2:46 ` [RFC] " Joel Brobecker
@ 2014-12-04 16:38 ` Pedro Alves
  2014-12-04 19:17   ` Doug Evans
  2014-12-04 19:16 ` Jan Kratochvil
  3 siblings, 1 reply; 20+ messages in thread
From: Pedro Alves @ 2014-12-04 16:38 UTC (permalink / raw)
  To: Doug Evans, gdb-patches, brobecker

On 12/01/2014 08:23 AM, Doug Evans wrote:
> Hi.
> 
> Holidays are nice for having the time to take on
> projects one is interested in but doesn't have
> time to otherwise do.
> 
> I looked at a couple of areas for improvement
> in gdb's symbol handling over the last couple
> of days.  One is .gdb_index support for tab
> completion, which is the topic for another mail.
> This email is about a symbol lookup cache.
> 
> ..gdb_index helps speed things up for one objfile,
> but when there are 100s (or 1000s) of shared
> libraries, symbol lookup can still take awhile,
> even with .gdb_index.
> [I'm setting aside a planned change to basic
> symbol lookup to use the index better.
> We'll still, I think, want a cache even
> with that change.]
> 
> This patch still needs more testing
> and I need to collect more perf data,
> but it's a start at caching symbol lookups.
> This first pass is just for the case
> where we iterate over all objfiles for
> lookups in the global or static blocks.
> Low hanging fruit.
> At the moment it's just RFC.

I don't have particular comments on the patch, but a cache in
principle sounds like a great idea to me.

You've probably thought this through already, but, my only worry
is whether this could affect which symbol is found.  E.g., if
there are multiple instances of a static global symbol in the
program.  ISTR expressions find the instance in the current
module (or CU?) first.  I may be thinking of solib_global_lookup.
Could the cache hit the symbol in another module first?

Thanks,
Pedro Alves

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] symbol lookup cache
  2014-12-01  8:23 [RFC] symbol lookup cache Doug Evans
                   ` (2 preceding siblings ...)
  2014-12-04 16:38 ` Pedro Alves
@ 2014-12-04 19:16 ` Jan Kratochvil
  2014-12-04 19:24   ` Doug Evans
  3 siblings, 1 reply; 20+ messages in thread
From: Jan Kratochvil @ 2014-12-04 19:16 UTC (permalink / raw)
  To: Doug Evans; +Cc: gdb-patches, brobecker

On Mon, 01 Dec 2014 09:23:49 +0100, Doug Evans wrote:
> ..gdb_index helps speed things up for one objfile,
> but when there are 100s (or 1000s) of shared
> libraries, symbol lookup can still take awhile,
> even with .gdb_index.
> [I'm setting aside a planned change to basic
> symbol lookup to use the index better.
> We'll still, I think, want a cache even
> with that change.]

While I have no numbers personally I do not think GDB would still need a cache
after that change.

"Accelerate lookup_symbol_aux_objfile 85x" is so fast in a fraction of second
because it traverses 146426 symtabs.  So 146426 is OK but 1979341 was not (it
was in some way more than 1979341 before you refactored symtabs).

I was considering in the past to have some merged .gdb_index for all the
shared libraries used by some app on disk (not just in memory).  But that
makes sense for HDD but less for SSD with the regular ~100 libraries.  While
it make more sense for Google 1000+ libraries I am not sure if Google always
uses the same set of 1000+ libraries for a given app.


Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] symbol lookup cache
  2014-12-04 16:38 ` Pedro Alves
@ 2014-12-04 19:17   ` Doug Evans
  0 siblings, 0 replies; 20+ messages in thread
From: Doug Evans @ 2014-12-04 19:17 UTC (permalink / raw)
  To: Pedro Alves; +Cc: gdb-patches, Joel Brobecker

On Thu, Dec 4, 2014 at 8:38 AM, Pedro Alves <palves@redhat.com> wrote:
>
> On 12/01/2014 08:23 AM, Doug Evans wrote:
> > Hi.
> >
> > Holidays are nice for having the time to take on
> > projects one is interested in but doesn't have
> > time to otherwise do.
> >
> > I looked at a couple of areas for improvement
> > in gdb's symbol handling over the last couple
> > of days.  One is .gdb_index support for tab
> > completion, which is the topic for another mail.
> > This email is about a symbol lookup cache.
> >
> > ..gdb_index helps speed things up for one objfile,
> > but when there are 100s (or 1000s) of shared
> > libraries, symbol lookup can still take awhile,
> > even with .gdb_index.
> > [I'm setting aside a planned change to basic
> > symbol lookup to use the index better.
> > We'll still, I think, want a cache even
> > with that change.]
> >
> > This patch still needs more testing
> > and I need to collect more perf data,
> > but it's a start at caching symbol lookups.
> > This first pass is just for the case
> > where we iterate over all objfiles for
> > lookups in the global or static blocks.
> > Low hanging fruit.
> > At the moment it's just RFC.
>
> I don't have particular comments on the patch, but a cache in
> principle sounds like a great idea to me.
>
> You've probably thought this through already, but, my only worry
> is whether this could affect which symbol is found.  E.g., if
> there are multiple instances of a static global symbol in the
> program.  ISTR expressions find the instance in the current
> module (or CU?) first.  I may be thinking of solib_global_lookup.
> Could the cache hit the symbol in another module first?
>
> Thanks,
> Pedro Alves
>


Oh curse you gmail.  Sorry for the resend.

The intent is to perform the caching only from lookups after all
context-specific searches have been done.
E.g, we specifically do lookups in the current STATIC_BLOCK (via
lookup_symbol_in_static_block) and then call the
"loop-over-all-static-blocks" routine (lookup_static_symbol), and it's
only the latter that employs caching.
context-specific searches are generally pretty quick and
don't(/shouldn't) need caching.

I put the caching checks after the call to solib_global_lookup in
lookup_global_symbol to avoid problems there.
Alas, I'm guessing it's windows's global search routine
(windows_iterate_over_objfiles_in_search_order) that is going to throw
a wrench into the works here.
We'll need to refactor that.
Seems like we have two solutions (solib_global_lookup and
gdbarch_iterate_over_objfiles_in_search_order) for what is essentially
the same problem.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] symbol lookup cache
  2014-12-04 19:16 ` Jan Kratochvil
@ 2014-12-04 19:24   ` Doug Evans
  2014-12-04 19:28     ` Jan Kratochvil
  0 siblings, 1 reply; 20+ messages in thread
From: Doug Evans @ 2014-12-04 19:24 UTC (permalink / raw)
  To: Jan Kratochvil; +Cc: gdb-patches, Joel Brobecker

On Thu, Dec 4, 2014 at 11:15 AM, Jan Kratochvil
<jan.kratochvil@redhat.com> wrote:
> On Mon, 01 Dec 2014 09:23:49 +0100, Doug Evans wrote:
>> ..gdb_index helps speed things up for one objfile,
>> but when there are 100s (or 1000s) of shared
>> libraries, symbol lookup can still take awhile,
>> even with .gdb_index.
>> [I'm setting aside a planned change to basic
>> symbol lookup to use the index better.
>> We'll still, I think, want a cache even
>> with that change.]
>
> While I have no numbers personally I do not think GDB would still need a cache
> after that change.
>
> "Accelerate lookup_symbol_aux_objfile 85x" is so fast in a fraction of second
> because it traverses 146426 symtabs.  So 146426 is OK but 1979341 was not (it
> was in some way more than 1979341 before you refactored symtabs).

Just to make sure I understand correctly, the 85x is dwz-specific, right?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] symbol lookup cache
  2014-12-04 19:24   ` Doug Evans
@ 2014-12-04 19:28     ` Jan Kratochvil
  0 siblings, 0 replies; 20+ messages in thread
From: Jan Kratochvil @ 2014-12-04 19:28 UTC (permalink / raw)
  To: Doug Evans; +Cc: gdb-patches, Joel Brobecker

On Thu, 04 Dec 2014 20:24:28 +0100, Doug Evans wrote:
> On Thu, Dec 4, 2014 at 11:15 AM, Jan Kratochvil <jan.kratochvil@redhat.com> wrote:
> > "Accelerate lookup_symbol_aux_objfile 85x" is so fast in a fraction of second
> > because it traverses 146426 symtabs.  So 146426 is OK but 1979341 was not (it
> > was in some way more than 1979341 before you refactored symtabs).
> 
> Just to make sure I understand correctly, the 85x is dwz-specific, right?

Yes, all my numbers are DWZ specific, I have not tested what/if effect it has
for non-DWZ inferiors.


Jan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] symbol lookup cache
  2014-12-03  0:50 ` Doug Evans
@ 2014-12-20  7:57   ` Doug Evans
  2014-12-20  8:29     ` Doug Evans
  2014-12-20 10:43     ` Eli Zaretskii
  0 siblings, 2 replies; 20+ messages in thread
From: Doug Evans @ 2014-12-20  7:57 UTC (permalink / raw)
  To: gdb-patches, eliz

Here is an updated patch for the symbol cache,
with docs and ChangeLog entry.

I looked into trying to do something special
for gdbarch_iterate_over_objfiles_in_search_order,
but I couldn't come up with anything I was happy with.
This gdbarch method could, theoretically, iterate
over objfiles in any particular order based on what
the current objfile is, and plugging the symbol
cache into that would be awkward.

The solution presented here is not that bad though.
For global symbol lookups it records the objfile
that was current at the time of the lookup, and
a later hit requires the same objfile.
This means a symbol may be recorded multiple times
in the cache, but a larger cache will ameliorate
any performance loss from the duplication.

One can imagine making the size of the cache
dependent on the size of the program, or have
it dynamically adapt, but that's more code and
I have no data to guide such a decision.
For the nonce there is a maint command to adjust
the size.  This will allow experimentation to
at least pick a possibly better default size.

I need to enhance the perf testsuite to allow
demonstrating how much the cache helps.
All the other perf patches make the cache less
and less useful, but I think(!) in the end
we'll still want it, I see programs a lot larger
than most.
I think of the cache as simple proxy for a
symbol table that spans all objfiles.

2014-12-19  Doug Evans  <xdje42@gmail.com>

	Add symbol lookup cache.
	* NEWS: Document new options and commands.
	* symtab.c (symbol_cache_key): New static global.
	(DEFAULT_SYMBOL_CACHE_SIZE, MAX_SYMBOL_CACHE_SIZE): New macro.
	(SYMBOL_LOOKUP_FAILED): New macro.
	(symbol_cache_slot_state): New enum.
	(block_symbol_cache): New struct.
	(symbol_cache): New struct.
	(symbol_cache_debug): New static global.
	(new_symbol_cache_size, symbol_cache_size): New static globals.
	(hash_symbol_entry, eq_symbol_entry): New functions.
	(symbol_cache_byte_size, resize_symbol_cache): New functions.
	(make_symbol_cache, free_symbol_cache): New functions.
	(get_symbol_cache, symbol_cache_cleanup): New function.
	(set_symbol_cache_size, set_symbol_cache_size_handler): New functions.
	(symbol_cache_lookup, symbol_cache_clear_slot): New function.
	(symbol_cache_mark_found, symbol_cache_mark_not_found): New functions.
	(symbol_cache_flush, symbol_cache_dump): New functions.
	(maintenance_print_symbol_cache): New function.
	(maintenance_flush_symbol_cache): New function.
	(symbol_cache_stats): New function.
	(maintenance_print_symbol_cache_statistics): New function.
	(symtab_new_objfile_observer): New function.
	(symtab_free_objfile_observer): New function.
	(lookup_static_symbol, lookup_global_symbol): Use symbol cache.
	(_initialize_symtab): Init symbol_cache_key.
	New parameters debug symbol-cache, maint symbol-cache-size.
	New maint commands print symbol-cache,
	print symbol-cache-statistics, flush-symbol-cache.
	Install new_objfile, free_objfile observers.

	doc/
	* gdb.texinfo (Symbols): Document new commands
	"maint print symbol-cache", "maint print symbol-cache-statistics",
	"maint flush-symbol-cache".  Document new option
	"maint set symbol-cache-size".
	(Debugging Output): Document new option "set debug symbol-cache".

diff --git a/gdb/NEWS b/gdb/NEWS
index c34cf2b..24db095 100644
--- a/gdb/NEWS
+++ b/gdb/NEWS
@@ -57,6 +57,15 @@ add-auto-load-scripts-directory directory
 maint print user-registers
   List all currently available "user" registers.
 
+maint print symbol-cache
+  Print the contents of the symbol cache.
+
+maint print symbol-cache-statistics
+  Print statistics of symbol cache usage.
+
+maint flush-symbol-cache
+  Flush the contents of the symbol cache.
+
 compile code [-r|-raw] [--] [source code]
   Compile, inject, and execute in the inferior the executable object
   code produced by compiling the provided source code.
@@ -90,6 +99,14 @@ set debug symbol-lookup
 show debug symbol-lookup
   Control display of debugging info regarding symbol lookup.
 
+set debug symbol-cache
+show debug symbol-cache
+  Control display of debugging info regarding the symbol cache.
+
+maint set symbol-cache-size
+maint show symbol-cache-size
+  Control the size of the symbol cache.
+
 * MI changes
 
   ** The -list-thread-groups command outputs an exit-code field for
diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo
index e086c33..78db27d 100644
--- a/gdb/doc/gdb.texinfo
+++ b/gdb/doc/gdb.texinfo
@@ -16471,8 +16471,37 @@ line 1574.
 @}
 (@value{GDBP})
 @end smallexample
-@end table
 
+@kindex maint set symbol-cache-size
+@kindex maint show symbol-cache-size
+@cindex symbol cache size
+@item maint set symbol-cache-size @var{size}
+@item maint show symbol-cache-size
+Set the size of the symbol cache to @var{size}.
+The default size is intended to be good enough for debugging
+most applications.  This option exists to allow for experimenting
+with different sizes.
+
+@kindex maint print symbol-cache
+@cindex symbol cache, printing its contents
+@item maint print symbol-cache
+Print the contents of the symbol cache.
+This is useful when debugging symbol cache issues.
+
+@kindex maint print symbol-cache-statistics
+@cindex symbol cache, printing usage statistics
+@item maint print symbol-cache-statistics
+Print symbol cache usage statistics.
+This helps determine how well the cache is being utilized.
+
+@kindex maint flush-symbol-cache
+@cindex symbol cache, flushing
+@item maint flush-symbol-cache
+Flush the contents of the symbol cache, all entries are removed.
+This command is useful when debugging the symbol cache.
+It is also useful when collecting performance data.
+
+@end table
 
 @node Altering
 @chapter Altering Execution
@@ -23186,6 +23215,12 @@ Turns on or off debugging messages for FR-V shared-library code.
 @item show debug solib-frv
 Display the current state of FR-V shared-library code debugging
 messages.
+@item set debug symbol-cache
+@cindex symbol cache
+Turns on or off display of debugging messages related to the symbol cache.
+The default is off.
+@item show debug symbol-cache
+Show the current state of symbol cache debugging messages.
 @item set debug symbol-lookup
 @cindex symbol lookup
 Turns on or off display of debugging messages related to symbol lookup.
diff --git a/gdb/symtab.c b/gdb/symtab.c
index d64fdbd..b759384 100644
--- a/gdb/symtab.c
+++ b/gdb/symtab.c
@@ -102,12 +102,118 @@ struct main_info
   enum language language_of_main;
 };
 
+/* Program space key for finding its symbol cache.  */
+
+static const struct program_space_data *symbol_cache_key;
+
+/* The default symbol cache size.
+   There is no extra cpu cost for large N (except when flushing the cache,
+   which is rare).  The value here is just a first attempt.  A better default
+   value may be higher or lower.  A prime number can make up for a bad hash
+   computation, so that's why the number is what it is.  */
+#define DEFAULT_SYMBOL_CACHE_SIZE 1021
+
+/* The maximum symbol cache size.
+   There's no method to the decision of what value to use here, other than
+   there's no point in allowing a user typo to make gdb consume all memory.  */
+#define MAX_SYMBOL_CACHE_SIZE (1024*1024)
+
+/* symbol_cache_lookup returns this if a previous lookup failed to find the
+   symbol in any objfile.  */
+#define SYMBOL_LOOKUP_FAILED ((struct symbol *) 1)
+
+/* Recording lookups that don't find the symbol is just as important, if not
+   more so, than recording found symbols.  */
+
+enum symbol_cache_slot_state
+{
+  SYMBOL_SLOT_UNUSED,
+  SYMBOL_SLOT_NOT_FOUND,
+  SYMBOL_SLOT_FOUND
+};
+
+/* Symbols don't specify global vs static block.
+   So keep them in separate caches.  */
+
+struct block_symbol_cache
+{
+  unsigned int hits;
+  unsigned int misses;
+  unsigned int collisions;
+
+  /* SYMBOLS is a variable length array of this size.
+     One can imagine that in general one cache (global/static) should be a
+     fraction of the size of the other, but there's no data at the moment
+     on which to decide.  */
+  unsigned int size;
+
+  struct symbol_cache_slot
+  {
+    enum symbol_cache_slot_state state;
+
+    /* The objfile that was current when the symbol was looked up.
+       This is only needed for global blocks, but for simplicity's sake
+       we allocate the space for both.  If data shows the extra space used
+       for static blocks is a problem, we can split things up then.
+
+       Global blocks need cache lookup to include the objfile context because
+       we need to account for gdbarch_iterate_over_objfiles_in_search_order
+       which can traverse objfiles in, effectively, any order, depending on
+       the current objfile, thus affecting which symbol is found.  Normally,
+       only the current objfile is searched first, and then the rest are
+       searched in recorded order; but putting cache lookup inside
+       gdbarch_iterate_over_objfiles_in_search_order would be awkward.
+       Instead we just make the current objfile part of the context of
+       cache lookup.  This means we can record the same symbol multiple times,
+       each with a different "current objfile" that was in effect when the
+       lookup was saved in the cache, but cache space is pretty cheap.  */
+    const struct objfile *objfile_context;
+
+    union
+    {
+      struct symbol *found;
+      struct
+      {
+	char *name;
+	domain_enum domain;
+      } not_found;
+    } value;
+  } symbols[1];
+};
+
+/* The symbol cache.
+
+   Searching for symbols in the static and global blocks over multiple objfiles
+   again and again can be slow, as can searching very big objfiles.  This is a
+   simple cache to improve symbol lookup performance, which is critical to
+   overall gdb performance.
+
+   Symbols are hashed on the name, its domain, and block.
+   They are also hashed on their objfile for objfile-specific lookups.  */
+
+struct symbol_cache
+{
+  struct block_symbol_cache *global_symbols;
+  struct block_symbol_cache *static_symbols;
+};
+
 /* When non-zero, print debugging messages related to symtab creation.  */
 unsigned int symtab_create_debug = 0;
 
 /* When non-zero, print debugging messages related to symbol lookup.  */
 unsigned int symbol_lookup_debug = 0;
 
+/* When non-zero, print debugging messages related to symbol cache lookup.  */
+static unsigned int symbol_cache_debug = 0;
+
+/* The size of the cache is staged here.  */
+static unsigned int new_symbol_cache_size = DEFAULT_SYMBOL_CACHE_SIZE;
+
+/* The current value of the symbol cache size.
+   This is saved so that if the user enters a value too big we can restore
+   the original value from here.  */
+static unsigned int symbol_cache_size = DEFAULT_SYMBOL_CACHE_SIZE;
+
 /* Non-zero if a file may be known by two different basenames.
    This is the uncommon case, and significantly slows down gdb.
    Default set to "off" to not slow down the common case.  */
@@ -1058,6 +1164,520 @@ expand_symtab_containing_pc (CORE_ADDR pc, struct obj_section *section)
   }
 }
 \f
+/* Hash function for the symbol cache.  */
+
+static unsigned int
+hash_symbol_entry (const struct objfile *objfile_context,
+		   const char *name, domain_enum domain)
+{
+  unsigned int hash = (uintptr_t) objfile_context;
+
+  if (name != NULL)
+    hash += htab_hash_string (name);
+
+  hash += domain;
+
+  return hash;
+}
+
+/* Equality function for the symbol cache.  */
+
+static int
+eq_symbol_entry (const struct symbol_cache_slot *slot,
+		 const struct objfile *objfile_context,
+		 const char *name, domain_enum domain)
+{
+  const char *slot_name;
+  domain_enum slot_domain;
+
+  if (slot->state == SYMBOL_SLOT_UNUSED)
+    return 0;
+
+  if (slot->objfile_context != objfile_context)
+    return 0;
+
+  if (slot->state == SYMBOL_SLOT_NOT_FOUND)
+    {
+      slot_name = slot->value.not_found.name;
+      slot_domain = slot->value.not_found.domain;
+    }
+  else
+    {
+      slot_name = SYMBOL_LINKAGE_NAME (slot->value.found);
+      slot_domain = SYMBOL_DOMAIN (slot->value.found);
+    }
+
+  /* NULL names match.  */
+  if (slot_name == NULL && name == NULL)
+    ;
+  else if (slot_name != NULL && name != NULL)
+    {
+      if (strcmp (slot_name, name) != 0)
+	return 0;
+    }
+  else
+    {
+      /* Only one name is NULL.  */
+      return 0;
+    }
+
+  if (slot_domain != domain)
+    return 0;
+
+  return 1;
+}
+
+/* Given a cache of size SIZE, return the size of the struct (with variable
+   length array) in bytes.  */
+
+static size_t
+symbol_cache_byte_size (unsigned int size)
+{
+  return (sizeof (struct block_symbol_cache)
+	  + ((size - 1) * sizeof (struct symbol_cache_slot)));
+}
+
+/* Resize CACHE.  */
+
+static void
+resize_symbol_cache (struct symbol_cache *cache, unsigned int new_size)
+{
+  /* If there's no change in size, don't do anything.
+     All caches have the same size, so we can just compare with the size
+     of the global symbols cache.  */
+  if ((cache->global_symbols != NULL
+       && cache->global_symbols->size == new_size)
+      || (cache->global_symbols == NULL
+	  && new_size == 0))
+    return;
+
+  xfree (cache->global_symbols);
+  xfree (cache->static_symbols);
+
+  if (new_size == 0)
+    {
+      cache->global_symbols = NULL;
+      cache->static_symbols = NULL;
+    }
+  else
+    {
+      size_t total_size = symbol_cache_byte_size (new_size);
+
+      cache->global_symbols = xcalloc (1, total_size);
+      cache->static_symbols = xcalloc (1, total_size);
+      cache->global_symbols->size = new_size;
+      cache->static_symbols->size = new_size;
+    }
+}
+
+/* Make a symbol cache of size SIZE.  */
+
+static struct symbol_cache *
+make_symbol_cache (unsigned int size)
+{
+  struct symbol_cache *cache;
+
+  cache = XCNEW (struct symbol_cache);
+  resize_symbol_cache (cache, symbol_cache_size);
+  return cache;
+}
+
+/* Free the space used by CACHE.  */
+
+static void
+free_symbol_cache (struct symbol_cache *cache)
+{
+  xfree (cache->global_symbols);
+  xfree (cache->static_symbols);
+  xfree (cache);
+}
+
+/* Return the symbol cache of PSPACE.
+   Create one if it doesn't exist yet.  */
+
+static struct symbol_cache *
+get_symbol_cache (struct program_space *pspace)
+{
+  struct symbol_cache *cache = program_space_data (pspace, symbol_cache_key);
+
+  if (cache == NULL)
+    {
+      cache = make_symbol_cache (symbol_cache_size);
+      set_program_space_data (pspace, symbol_cache_key, cache);
+    }
+
+  return cache;
+}
+
+/* Delete the symbol cache of PSPACE.
+   Called when PSPACE is destroyed.  */
+
+static void
+symbol_cache_cleanup (struct program_space *pspace, void *data)
+{
+  struct symbol_cache *cache = data;
+
+  free_symbol_cache (cache);
+}
+
+/* Set the size of the symbol cache in all program spaces.  */
+
+static void
+set_symbol_cache_size (unsigned int new_size)
+{
+  struct program_space *pspace;
+
+  ALL_PSPACES (pspace)
+    {
+      struct symbol_cache *cache
+	= program_space_data (pspace, symbol_cache_key);
+
+      /* The pspace could have been created but not have a cache yet.  */
+      if (cache != NULL)
+	resize_symbol_cache (cache, new_size);
+    }
+}
+
+/* Called when symbol-cache-size is set.  */
+
+static void
+set_symbol_cache_size_handler (char *args, int from_tty,
+			       struct cmd_list_element *c)
+{
+  if (new_symbol_cache_size > MAX_SYMBOL_CACHE_SIZE)
+    {
+      /* Restore the previous value.
+	 This is the value the "show" command prints.  */
+      new_symbol_cache_size = symbol_cache_size;
+
+      error (_("Symbol cache size is too large, max is %u."),
+	     MAX_SYMBOL_CACHE_SIZE);
+    }
+  symbol_cache_size = new_symbol_cache_size;
+
+  set_symbol_cache_size (symbol_cache_size);
+}
+
+/* Lookup symbol NAME,DOMAIN in BLOCK in the symbol cache of PSPACE.
+   OBJFILE_CONTEXT is the current objfile, which may be NULL.
+   The result is the symbol if found, SYMBOL_LOOKUP_FAILED if a previous lookup
+   failed (and thus this one will too), or NULL if the symbol is not present
+   in the cache.
+   *BSC_PTR, *SLOT_PTR are set to the cache and slot of the symbol, whether
+   found or not found.  */
+
+static struct symbol *
+symbol_cache_lookup (struct symbol_cache *cache,
+		     struct objfile *objfile_context, int block,
+		     const char *name, domain_enum domain,
+		     struct block_symbol_cache **bsc_ptr,
+		     struct symbol_cache_slot **slot_ptr)
+{
+  struct block_symbol_cache *bsc;
+  unsigned int hash;
+  struct symbol_cache_slot *slot;
+
+  if (block == GLOBAL_BLOCK)
+    bsc = cache->global_symbols;
+  else
+    bsc = cache->static_symbols;
+  if (bsc == NULL)
+    {
+      *bsc_ptr = NULL;
+      *slot_ptr = NULL;
+      return NULL;
+    }
+
+  hash = hash_symbol_entry (objfile_context, name, domain);
+  slot = bsc->symbols + hash % bsc->size;
+  *bsc_ptr = bsc;
+  *slot_ptr = slot;
+
+  if (eq_symbol_entry (slot, objfile_context, name, domain))
+    {
+      if (symbol_cache_debug)
+	fprintf_unfiltered (gdb_stdlog,
+			    "%s block symbol cache hit%s for %s, %s\n",
+			    block == GLOBAL_BLOCK ? "Global" : "Static",
+			    slot->state == SYMBOL_SLOT_NOT_FOUND
+			    ? " (not found)" : "",
+			    name, domain_name (domain));
+      ++bsc->hits;
+      if (slot->state == SYMBOL_SLOT_NOT_FOUND)
+	return SYMBOL_LOOKUP_FAILED;
+      return slot->value.found;
+    }
+
+  if (symbol_cache_debug)
+    {
+      fprintf_unfiltered (gdb_stdlog,
+			  "%s block symbol cache miss for %s, %s\n",
+			  block == GLOBAL_BLOCK ? "Global" : "Static",
+			  name, domain_name (domain));
+    }
+  ++bsc->misses;
+  return NULL;
+}
+
+/* Clear out SLOT.  */
+
+static void
+symbol_cache_clear_slot (struct symbol_cache_slot *slot)
+{
+  if (slot->state == SYMBOL_SLOT_NOT_FOUND)
+    xfree (slot->value.not_found.name);
+  slot->state = SYMBOL_SLOT_UNUSED;
+}
+
+/* Mark SYMBOL as found in SLOT.  */
+
+static void
+symbol_cache_mark_found (struct block_symbol_cache *bsc,
+			 struct symbol_cache_slot *slot,
+			 struct objfile *objfile, struct symbol *symbol)
+{
+  if (bsc == NULL)
+    return;
+  if (slot->state != SYMBOL_SLOT_UNUSED)
+    {
+      ++bsc->collisions;
+      symbol_cache_clear_slot (slot);
+    }
+  slot->state = SYMBOL_SLOT_FOUND;
+  slot->objfile_context = objfile;
+  slot->value.found = symbol;
+}
+
+/* Mark symbol NAME, DOMAIN as not found in SLOT.  */
+
+static void
+symbol_cache_mark_not_found (struct block_symbol_cache *bsc,
+			     struct symbol_cache_slot *slot,
+			     struct objfile *objfile_context,
+			     const char *name, domain_enum domain)
+{
+  if (bsc == NULL)
+    return;
+  if (slot->state != SYMBOL_SLOT_UNUSED)
+    {
+      ++bsc->collisions;
+      symbol_cache_clear_slot (slot);
+    }
+  slot->state = SYMBOL_SLOT_NOT_FOUND;
+  slot->objfile_context = objfile_context;
+  slot->value.not_found.name = xstrdup (name);
+  slot->value.not_found.domain = domain;
+}
+
+/* Flush the symbol cache of PSPACE.  */
+
+static void
+symbol_cache_flush (struct program_space *pspace)
+{
+  struct symbol_cache *cache = program_space_data (pspace, symbol_cache_key);
+  int pass;
+  size_t total_size;
+
+  if (cache == NULL)
+    return;
+  if (cache->global_symbols == NULL)
+    {
+      gdb_assert (symbol_cache_size == 0);
+      gdb_assert (cache->static_symbols == NULL);
+      return;
+    }
+
+  /* If the cache is untouched since the last flush, early exit.
+     This is important for performance during the startup of a program linked
+     with 100s (or 1000s) of shared libraries.  */
+  if (cache->global_symbols->misses == 0
+      && cache->static_symbols->misses == 0)
+    return;
+
+  gdb_assert (cache->global_symbols->size == symbol_cache_size);
+  gdb_assert (cache->static_symbols->size == symbol_cache_size);
+
+  for (pass = 0; pass < 2; ++pass)
+    {
+      struct block_symbol_cache *bsc
+	= pass == 0 ? cache->global_symbols : cache->static_symbols;
+      unsigned int i;
+
+      for (i = 0; i < bsc->size; ++i)
+	symbol_cache_clear_slot (&bsc->symbols[i]);
+    }
+
+  cache->global_symbols->hits = 0;
+  cache->global_symbols->misses = 0;
+  cache->global_symbols->collisions = 0;
+  cache->static_symbols->hits = 0;
+  cache->static_symbols->misses = 0;
+  cache->static_symbols->collisions = 0;
+}
+
+/* Dump CACHE.  */
+
+static void
+symbol_cache_dump (const struct symbol_cache *cache)
+{
+  int pass;
+
+  if (cache->global_symbols == NULL)
+    {
+      printf_filtered ("  <disabled>\n");
+      return;
+    }
+
+  for (pass = 0; pass < 2; ++pass)
+    {
+      const struct block_symbol_cache *bsc
+	= pass == 0 ? cache->global_symbols : cache->static_symbols;
+      unsigned int i;
+
+      if (pass == 0)
+	printf_filtered ("Global symbols:\n");
+      else
+	printf_filtered ("Static symbols:\n");
+
+      for (i = 0; i < bsc->size; ++i)
+	{
+	  const struct symbol_cache_slot *slot = &bsc->symbols[i];
+
+	  QUIT;
+
+	  switch (slot->state)
+	    {
+	    case SYMBOL_SLOT_UNUSED:
+	      break;
+	    case SYMBOL_SLOT_NOT_FOUND:
+	      printf_filtered ("  [%-4u] = %s, %s (not found)\n", i,
+			       host_address_to_string (slot->objfile_context),
+			       slot->value.not_found.name);
+	      break;
+	    case SYMBOL_SLOT_FOUND:
+	      printf_filtered ("  [%-4u] = %s, %s\n", i,
+			       host_address_to_string (slot->objfile_context),
+			       SYMBOL_PRINT_NAME (slot->value.found));
+	      break;
+	    }
+	}
+    }
+}
+
+/* The "mt print symbol-cache" command.  */
+
+static void
+maintenance_print_symbol_cache (char *args, int from_tty)
+{
+  struct program_space *pspace;
+
+  ALL_PSPACES (pspace)
+    {
+      struct symbol_cache *cache;
+
+      printf_filtered (_("Symbol cache for pspace %d\n%s:\n"),
+		       pspace->num,
+		       pspace->symfile_object_file != NULL
+		       ? objfile_name (pspace->symfile_object_file)
+		       : "(no object file)");
+
+      /* If the cache hasn't been created yet, avoid creating one.  */
+      cache = program_space_data (pspace, symbol_cache_key);
+      if (cache == NULL)
+	printf_filtered ("  <empty>\n");
+      else
+	symbol_cache_dump (cache);
+    }
+}
+
+/* The "mt flush-symbol-cache" command.  */
+
+static void
+maintenance_flush_symbol_cache (char *args, int from_tty)
+{
+  struct program_space *pspace;
+
+  ALL_PSPACES (pspace)
+    {
+      symbol_cache_flush (pspace);
+    }
+}
+
+/* Print usage statistics of CACHE.  */
+
+static void
+symbol_cache_stats (struct symbol_cache *cache)
+{
+  int pass;
+
+  if (cache->global_symbols == NULL)
+    {
+      printf_filtered ("  <disabled>\n");
+      return;
+    }
+
+  for (pass = 0; pass < 2; ++pass)
+    {
+      const struct block_symbol_cache *bsc
+	= pass == 0 ? cache->global_symbols : cache->static_symbols;
+
+      QUIT;
+
+      if (pass == 0)
+	printf_filtered ("Global block cache stats:\n");
+      else
+	printf_filtered ("Static block cache stats:\n");
+
+      printf_filtered ("  size:       %u\n", bsc->size);
+      printf_filtered ("  hits:       %u\n", bsc->hits);
+      printf_filtered ("  misses:     %u\n", bsc->misses);
+      printf_filtered ("  collisions: %u\n", bsc->collisions);
+    }
+}
+
+/* The "mt print symbol-cache-statistics" command.  */
+
+static void
+maintenance_print_symbol_cache_statistics (char *args, int from_tty)
+{
+  struct program_space *pspace;
+
+  ALL_PSPACES (pspace)
+    {
+      struct symbol_cache *cache;
+
+      printf_filtered (_("Symbol cache statistics for pspace %d\n%s:\n"),
+		       pspace->num,
+		       pspace->symfile_object_file != NULL
+		       ? objfile_name (pspace->symfile_object_file)
+		       : "(no object file)");
+
+      /* If the cache hasn't been created yet, avoid creating one.  */
+      cache = program_space_data (pspace, symbol_cache_key);
+      if (cache == NULL)
+ 	printf_filtered ("  empty, no stats available\n");
+      else
+	symbol_cache_stats (cache);
+    }
+}
+
+/* This module's 'new_objfile' observer.  */
+
+static void
+symtab_new_objfile_observer (struct objfile *objfile)
+{
+  /* Ideally we'd use OBJFILE->pspace, but OBJFILE may be NULL.  */
+  symbol_cache_flush (current_program_space);
+}
+
+/* This module's 'free_objfile' observer.  */
+
+static void
+symtab_free_objfile_observer (struct objfile *objfile)
+{
+  symbol_cache_flush (objfile->pspace);
+}
+\f
 /* Debug symbols usually don't have section information.  We need to dig that
    out of the minimal symbols and stash that in the debug symbol.  */
 
@@ -1940,16 +2560,36 @@ lookup_symbol_in_objfile (struct objfile *objfile, int block_index,
 struct symbol *
 lookup_static_symbol (const char *name, const domain_enum domain)
 {
+  struct symbol_cache *cache = get_symbol_cache (current_program_space);
   struct objfile *objfile;
   struct symbol *result;
+  struct block_symbol_cache *bsc;
+  struct symbol_cache_slot *slot;
+
+  /* Lookup in STATIC_BLOCK is not current-objfile-dependent, so just pass
+     NULL for OBJFILE_CONTEXT.  */
+  result = symbol_cache_lookup (cache, NULL, STATIC_BLOCK, name, domain,
+				&bsc, &slot);
+  if (result != NULL)
+    {
+      if (result == SYMBOL_LOOKUP_FAILED)
+	return NULL;
+      return result;
+    }
 
   ALL_OBJFILES (objfile)
     {
       result = lookup_symbol_in_objfile (objfile, STATIC_BLOCK, name, domain);
       if (result != NULL)
-	return result;
+	{
+	  /* Still pass NULL for OBJFILE_CONTEXT here.  */
+	  symbol_cache_mark_found (bsc, slot, NULL, result);
+	  return result;
+	}
     }
 
+  /* Still pass NULL for OBJFILE_CONTEXT here.  */
+  symbol_cache_mark_not_found (bsc, slot, NULL, name, domain);
   return NULL;
 }
 
@@ -1997,25 +2637,48 @@ lookup_global_symbol (const char *name,
 		      const struct block *block,
 		      const domain_enum domain)
 {
-  struct symbol *sym = NULL;
-  struct objfile *objfile = NULL;
+  struct symbol_cache *cache = get_symbol_cache (current_program_space);
+  struct symbol *sym;
+  struct objfile *objfile;
   struct global_sym_lookup_data lookup_data;
+  struct block_symbol_cache *bsc;
+  struct symbol_cache_slot *slot;
 
-  /* Call library-specific lookup procedure.  */
   objfile = lookup_objfile_from_block (block);
+
+  /* First see if we can find the symbol in the cache.
+     This works because we use the current objfile to qualify the lookup.  */
+  sym = symbol_cache_lookup (cache, objfile, GLOBAL_BLOCK, name, domain,
+			     &bsc, &slot);
+  if (sym != NULL)
+    {
+      if (sym == SYMBOL_LOOKUP_FAILED)
+	return NULL;
+      return sym;
+    }
+
+  /* Call library-specific lookup procedure.  */
   if (objfile != NULL)
     sym = solib_global_lookup (objfile, name, domain);
-  if (sym != NULL)
-    return sym;
 
-  memset (&lookup_data, 0, sizeof (lookup_data));
-  lookup_data.name = name;
-  lookup_data.domain = domain;
-  gdbarch_iterate_over_objfiles_in_search_order
-    (objfile != NULL ? get_objfile_arch (objfile) : target_gdbarch (),
-     lookup_symbol_global_iterator_cb, &lookup_data, objfile);
+  /* If that didn't work go a global search (of global blocks, heh).  */
+  if (sym == NULL)
+    {
+      memset (&lookup_data, 0, sizeof (lookup_data));
+      lookup_data.name = name;
+      lookup_data.domain = domain;
+      gdbarch_iterate_over_objfiles_in_search_order
+	(objfile != NULL ? get_objfile_arch (objfile) : target_gdbarch (),
+	 lookup_symbol_global_iterator_cb, &lookup_data, objfile);
+      sym = lookup_data.result;
+    }
+
+  if (sym != NULL)
+    symbol_cache_mark_found (bsc, slot, objfile, sym);
+  else
+    symbol_cache_mark_not_found (bsc, slot, objfile, name, domain);
 
-  return lookup_data.result;
+  return sym;
 }
 
 int
@@ -5357,6 +6020,9 @@ _initialize_symtab (void)
   main_progspace_key
     = register_program_space_data_with_cleanup (NULL, main_info_cleanup);
 
+  symbol_cache_key
+    = register_program_space_data_with_cleanup (NULL, symbol_cache_cleanup);
+
   add_info ("variables", variables_info, _("\
 All global and static variable names, or those matching REGEXP."));
   if (dbx_commands)
@@ -5432,5 +6098,39 @@ When enabled (non-zero), symbol lookups are logged."),
 			   NULL, NULL,
 			   &setdebuglist, &showdebuglist);
 
+  add_setshow_zuinteger_cmd ("symbol-cache", no_class, &symbol_cache_debug,
+			     _("Set debugging of symbol cache usage."),
+			     _("Show debugging of symbol cache usage."), _("\
+When enabled (non-zero), debugging messages are printed when looking up\n\
+symbols in the symbol cache."),
+			     NULL, NULL,
+			     &setdebuglist, &showdebuglist);
+
+  add_setshow_zuinteger_cmd ("symbol-cache-size", no_class,
+			     &new_symbol_cache_size,
+			     _("Set the size of the symbol cache."),
+			     _("Show the size of the symbol cache."), _("\
+The size of the symbol cache.\n\
+If zero then the symbol cache is disabled."),
+			     set_symbol_cache_size_handler, NULL,
+			     &maintenance_set_cmdlist,
+			     &maintenance_show_cmdlist);
+
+  add_cmd ("symbol-cache", class_maintenance, maintenance_print_symbol_cache,
+	   _("Dump the symbol cache for each program space."),
+	   &maintenanceprintlist);
+
+  add_cmd ("symbol-cache-statistics", class_maintenance,
+	   maintenance_print_symbol_cache_statistics,
+	   _("Print symbol cache statistics for each program space."),
+	   &maintenanceprintlist);
+
+  add_cmd ("flush-symbol-cache", class_maintenance,
+	   maintenance_flush_symbol_cache,
+	   _("Flush the symbol cache for each program space."),
+	   &maintenancelist);
+
   observer_attach_executable_changed (symtab_observer_executable_changed);
+  observer_attach_new_objfile (symtab_new_objfile_observer);
+  observer_attach_free_objfile (symtab_free_objfile_observer);
 }

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] symbol lookup cache
  2014-12-20  7:57   ` [PATCH] " Doug Evans
@ 2014-12-20  8:29     ` Doug Evans
  2014-12-20 10:43     ` Eli Zaretskii
  1 sibling, 0 replies; 20+ messages in thread
From: Doug Evans @ 2014-12-20  8:29 UTC (permalink / raw)
  To: gdb-patches

On Fri, Dec 19, 2014 at 11:56 PM, Doug Evans <xdje42@gmail.com> wrote:
> Here is an updated patch for the symbol cache,
> with docs and ChangeLog entry.

Oh, btw,
flushing of the cache on new_objfile and free_objfile events
*could* be more intelligent.
Easy enough to do, left for another day.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] symbol lookup cache
  2014-12-20  7:57   ` [PATCH] " Doug Evans
  2014-12-20  8:29     ` Doug Evans
@ 2014-12-20 10:43     ` Eli Zaretskii
  2014-12-20 19:14       ` Doug Evans
  1 sibling, 1 reply; 20+ messages in thread
From: Eli Zaretskii @ 2014-12-20 10:43 UTC (permalink / raw)
  To: Doug Evans; +Cc: gdb-patches

> From: Doug Evans <xdje42@gmail.com>
> Date: Fri, 19 Dec 2014 23:56:23 -0800
> 
> Here is an updated patch for the symbol cache,
> with docs and ChangeLog entry.

Thanks.

> +@kindex maint set symbol-cache-size
> +@kindex maint show symbol-cache-size
> +@cindex symbol cache size
> +@item maint set symbol-cache-size @var{size}
> +@item maint show symbol-cache-size
> +Set the size of the symbol cache to @var{size}.
> +The default size is intended to be good enough for debugging
> +most applications.  This option exists to allow for experimenting
> +with different sizes.

The item is both for "set" and "show", but the text only says "Set".

Otherwise, the documentation parts are OK.

Btw, I wonder if this should be a user option, not a "maint" option.
The heuristics used to determine the cache size tend to be wrong in
some rare corner cases, so letting the user override this should be a
good thing, I think.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] symbol lookup cache
  2014-12-20 10:43     ` Eli Zaretskii
@ 2014-12-20 19:14       ` Doug Evans
  2014-12-20 19:55         ` Eli Zaretskii
  0 siblings, 1 reply; 20+ messages in thread
From: Doug Evans @ 2014-12-20 19:14 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: gdb-patches

On Sat, Dec 20, 2014 at 2:43 AM, Eli Zaretskii <eliz@gnu.org> wrote:
>> From: Doug Evans <xdje42@gmail.com>
>> Date: Fri, 19 Dec 2014 23:56:23 -0800
>>
>> Here is an updated patch for the symbol cache,
>> with docs and ChangeLog entry.
>
> Btw, I wonder if this should be a user option, not a "maint" option.
> The heuristics used to determine the cache size tend to be wrong in
> some rare corner cases, so letting the user override this should be a
> good thing, I think.

The thought is the fewer knobs the user needs the better,
and that's where potentially dynamically adjusting the size comes in.

It's easier to remove/change maint options, so for now I put the size there
until there's data to guide a better choice.
But, ultimately, making it a user-settable option is definitely a possibility.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] symbol lookup cache
  2014-12-20 19:14       ` Doug Evans
@ 2014-12-20 19:55         ` Eli Zaretskii
  2014-12-20 21:04           ` Doug Evans
  0 siblings, 1 reply; 20+ messages in thread
From: Eli Zaretskii @ 2014-12-20 19:55 UTC (permalink / raw)
  To: Doug Evans; +Cc: gdb-patches

> Date: Sat, 20 Dec 2014 11:14:39 -0800
> From: Doug Evans <xdje42@gmail.com>
> Cc: "gdb-patches@sourceware.org" <gdb-patches@sourceware.org>
> 
> > Btw, I wonder if this should be a user option, not a "maint" option.
> > The heuristics used to determine the cache size tend to be wrong in
> > some rare corner cases, so letting the user override this should be a
> > good thing, I think.
> 
> The thought is the fewer knobs the user needs the better,

We are way past the point where this ideal was achievable.  You can
stop worrying about that.  With the gazillion knobs we have already,
one more doesn't change anything.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] symbol lookup cache
  2014-12-20 19:55         ` Eli Zaretskii
@ 2014-12-20 21:04           ` Doug Evans
  2014-12-21  3:34             ` Eli Zaretskii
  0 siblings, 1 reply; 20+ messages in thread
From: Doug Evans @ 2014-12-20 21:04 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: gdb-patches

On Sat, Dec 20, 2014 at 11:55 AM, Eli Zaretskii <eliz@gnu.org> wrote:
>> Date: Sat, 20 Dec 2014 11:14:39 -0800
>> From: Doug Evans <xdje42@gmail.com>
>> Cc: "gdb-patches@sourceware.org" <gdb-patches@sourceware.org>
>>
>> > Btw, I wonder if this should be a user option, not a "maint" option.
>> > The heuristics used to determine the cache size tend to be wrong in
>> > some rare corner cases, so letting the user override this should be a
>> > good thing, I think.
>>
>> The thought is the fewer knobs the user needs the better,
>
> We are way past the point where this ideal was achievable.  You can
> stop worrying about that.  With the gazillion knobs we have already,
> one more doesn't change anything.

There isn't so much an ideal as a process that should be followed.
I still want to vet every new knob that I feel needs vetting.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] symbol lookup cache
  2014-12-20 21:04           ` Doug Evans
@ 2014-12-21  3:34             ` Eli Zaretskii
  2014-12-21 18:51               ` Doug Evans
  0 siblings, 1 reply; 20+ messages in thread
From: Eli Zaretskii @ 2014-12-21  3:34 UTC (permalink / raw)
  To: Doug Evans; +Cc: gdb-patches

> Date: Sat, 20 Dec 2014 13:04:01 -0800
> From: Doug Evans <xdje42@gmail.com>
> Cc: "gdb-patches@sourceware.org" <gdb-patches@sourceware.org>
> 
> On Sat, Dec 20, 2014 at 11:55 AM, Eli Zaretskii <eliz@gnu.org> wrote:
> >> Date: Sat, 20 Dec 2014 11:14:39 -0800
> >> From: Doug Evans <xdje42@gmail.com>
> >> Cc: "gdb-patches@sourceware.org" <gdb-patches@sourceware.org>
> >>
> >> > Btw, I wonder if this should be a user option, not a "maint" option.
> >> > The heuristics used to determine the cache size tend to be wrong in
> >> > some rare corner cases, so letting the user override this should be a
> >> > good thing, I think.
> >>
> >> The thought is the fewer knobs the user needs the better,
> >
> > We are way past the point where this ideal was achievable.  You can
> > stop worrying about that.  With the gazillion knobs we have already,
> > one more doesn't change anything.
> 
> There isn't so much an ideal as a process that should be followed.
> I still want to vet every new knob that I feel needs vetting.

And the considerations, such as those I described, by others that
users might want this option -- do these have any bearing on your
decisions?  IOW, is there any hope to convince you in these matters?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] symbol lookup cache
  2014-12-21  3:34             ` Eli Zaretskii
@ 2014-12-21 18:51               ` Doug Evans
  2014-12-21 21:01                 ` Joel Brobecker
  0 siblings, 1 reply; 20+ messages in thread
From: Doug Evans @ 2014-12-21 18:51 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: gdb-patches

On Sat, Dec 20, 2014 at 7:33 PM, Eli Zaretskii <eliz@gnu.org> wrote:
>> Date: Sat, 20 Dec 2014 13:04:01 -0800
>> From: Doug Evans <xdje42@gmail.com>
>> Cc: "gdb-patches@sourceware.org" <gdb-patches@sourceware.org>
>>
>> On Sat, Dec 20, 2014 at 11:55 AM, Eli Zaretskii <eliz@gnu.org> wrote:
>> >> Date: Sat, 20 Dec 2014 11:14:39 -0800
>> >> From: Doug Evans <xdje42@gmail.com>
>> >> Cc: "gdb-patches@sourceware.org" <gdb-patches@sourceware.org>
>> >>
>> >> > Btw, I wonder if this should be a user option, not a "maint" option.
>> >> > The heuristics used to determine the cache size tend to be wrong in
>> >> > some rare corner cases, so letting the user override this should be a
>> >> > good thing, I think.
>> >>
>> >> The thought is the fewer knobs the user needs the better,
>> >
>> > We are way past the point where this ideal was achievable.  You can
>> > stop worrying about that.  With the gazillion knobs we have already,
>> > one more doesn't change anything.
>>
>> There isn't so much an ideal as a process that should be followed.
>> I still want to vet every new knob that I feel needs vetting.
>
> And the considerations, such as those I described, by others that
> users might want this option -- do these have any bearing on your
> decisions?  IOW, is there any hope to convince you in these matters?

I'm not in any rush to add such an option.
For example, if gdb can adequately dynamically adjust the size of the
cache on its own then the need for such an option is much less (and
users would be even happier than having to manually adjust the cache
size on their own).

Let's first verify the efficacy of the cache, collect some data, and
go from there.
And the next step after that, besides improving the efficiency of the
cache (*1), if the data suggests it, is I think to explore dynamically
adjusting the cache size.
And if, after that, there are still some important cases where gdb
can't do a good enough job on its own, *then* I'd be happy to add some
knobs to control the cache size.  Maybe the knob we will want is not
so much to control the cache size but how it grows.

----
(*1): E.g., one can imagine adding support to not kick out frequently
used items, and there are various, umm, ways to do this.  [Bad pun
intended - just trying to keep it light. :-)]

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] symbol lookup cache
  2014-12-21 18:51               ` Doug Evans
@ 2014-12-21 21:01                 ` Joel Brobecker
  2014-12-22  2:04                   ` Doug Evans
  0 siblings, 1 reply; 20+ messages in thread
From: Joel Brobecker @ 2014-12-21 21:01 UTC (permalink / raw)
  To: Doug Evans; +Cc: Eli Zaretskii, gdb-patches

> Let's first verify the efficacy of the cache, collect some data, and
> go from there.
> And the next step after that, besides improving the efficiency of the
> cache (*1), if the data suggests it, is I think to explore dynamically
> adjusting the cache size.
> And if, after that, there are still some important cases where gdb
> can't do a good enough job on its own, *then* I'd be happy to add some
> knobs to control the cache size.  Maybe the knob we will want is not
> so much to control the cache size but how it grows.

do you see the amount of data in the cache being all that much?
I haven't looked at your implementation, but for Ada, the cache is
updated when symbols are looked up, usually due to a user querying
something. So, I don't the cache to be all that big, and FWIW,
we have never even had to think of having a dynamic hash size.

-- 
Joel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] symbol lookup cache
  2014-12-21 21:01                 ` Joel Brobecker
@ 2014-12-22  2:04                   ` Doug Evans
  2014-12-22 12:48                     ` Joel Brobecker
  0 siblings, 1 reply; 20+ messages in thread
From: Doug Evans @ 2014-12-22  2:04 UTC (permalink / raw)
  To: Joel Brobecker; +Cc: Eli Zaretskii, gdb-patches

On Sun, Dec 21, 2014 at 1:01 PM, Joel Brobecker <brobecker@adacore.com> wrote:
>> Let's first verify the efficacy of the cache, collect some data, and
>> go from there.
>> And the next step after that, besides improving the efficiency of the
>> cache (*1), if the data suggests it, is I think to explore dynamically
>> adjusting the cache size.
>> And if, after that, there are still some important cases where gdb
>> can't do a good enough job on its own, *then* I'd be happy to add some
>> knobs to control the cache size.  Maybe the knob we will want is not
>> so much to control the cache size but how it grows.
>
> do you see the amount of data in the cache being all that much?

Nope, and I've noted that in the code.
Plus I recently gained back 40MB for free (16 * #minsyms) on one of my
monster benchmarks.
ref: https://sourceware.org/ml/gdb-patches/2014-11/msg00638.html
We could spend 1/10 that on the cache (which would still be a rather
large cache) and still be way ahead.

> I haven't looked at your implementation, but for Ada, the cache is
> updated when symbols are looked up, usually due to a user querying
> something. So, I don't the cache to be all that big, and FWIW,
> we have never even had to think of having a dynamic hash size.

But, *if* one-size-fits-all *wasn't* working (*1), and I think I can
reasonably assume we're agreed we're not there yet, then would you
immediately go the route of providing a user option to allow changing
the size, or first see if gdb could do better on its own?

---
(*1): I'm only suggesting exploring dynamically adjusting the cache
size (I realize you wrote hash size) *if* the data suggests we need
it. IMO we're not there yet.  But, and here's where the disagreement
is (AFAICT), *if* we do get there, I'd rather see if gdb could do
better on its own first, before adding a knob that the user has to
tweak to get the desired performance.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] symbol lookup cache
  2014-12-22  2:04                   ` Doug Evans
@ 2014-12-22 12:48                     ` Joel Brobecker
  2015-01-11 19:11                       ` Doug Evans
  0 siblings, 1 reply; 20+ messages in thread
From: Joel Brobecker @ 2014-12-22 12:48 UTC (permalink / raw)
  To: Doug Evans; +Cc: Eli Zaretskii, gdb-patches

> But, *if* one-size-fits-all *wasn't* working (*1), and I think I can
> reasonably assume we're agreed we're not there yet, then would you
> immediately go the route of providing a user option to allow changing
> the size, or first see if gdb could do better on its own?
> 
> ---
> (*1): I'm only suggesting exploring dynamically adjusting the cache
> size (I realize you wrote hash size) *if* the data suggests we need
> it. IMO we're not there yet.  But, and here's where the disagreement
> is (AFAICT), *if* we do get there, I'd rather see if gdb could do
> better on its own first, before adding a knob that the user has to
> tweak to get the desired performance.

On that, I don't really have an opinion, at least not yet; and since
I don't see us getting there, I propose we do not decide now :).

-- 
Joel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] symbol lookup cache
  2014-12-22 12:48                     ` Joel Brobecker
@ 2015-01-11 19:11                       ` Doug Evans
  0 siblings, 0 replies; 20+ messages in thread
From: Doug Evans @ 2015-01-11 19:11 UTC (permalink / raw)
  To: Joel Brobecker; +Cc: Eli Zaretskii, gdb-patches

Joel Brobecker <brobecker@adacore.com> writes:
>> But, *if* one-size-fits-all *wasn't* working (*1), and I think I can
>> reasonably assume we're agreed we're not there yet, then would you
>> immediately go the route of providing a user option to allow changing
>> the size, or first see if gdb could do better on its own?
>> 
>> ---
>> (*1): I'm only suggesting exploring dynamically adjusting the cache
>> size (I realize you wrote hash size) *if* the data suggests we need
>> it. IMO we're not there yet.  But, and here's where the disagreement
>> is (AFAICT), *if* we do get there, I'd rather see if gdb could do
>> better on its own first, before adding a knob that the user has to
>> tweak to get the desired performance.
>
> On that, I don't really have an opinion, at least not yet; and since
> I don't see us getting there, I propose we do not decide now :).

Hi.

Here's what I committed.
[regression tested again on amd64-linux]

I have a couple of followup patches, which I'll get in today.

2015-01-10  Doug Evans  <xdje42@gmail.com>

	Add symbol lookup cache.
	* NEWS: Document new options and commands.
	* symtab.c (symbol_cache_key): New static global.
	(DEFAULT_SYMBOL_CACHE_SIZE, MAX_SYMBOL_CACHE_SIZE): New macros.
	(SYMBOL_LOOKUP_FAILED): New macro.
	(symbol_cache_slot_state): New enum.
	(block_symbol_cache): New struct.
	(symbol_cache): New struct.
	(new_symbol_cache_size, symbol_cache_size): New static globals.
	(hash_symbol_entry, eq_symbol_entry): New functions.
	(symbol_cache_byte_size, resize_symbol_cache): New functions.
	(make_symbol_cache, free_symbol_cache): New functions.
	(get_symbol_cache, symbol_cache_cleanup): New function.
	(set_symbol_cache_size, set_symbol_cache_size_handler): New functions.
	(symbol_cache_lookup, symbol_cache_clear_slot): New function.
	(symbol_cache_mark_found, symbol_cache_mark_not_found): New functions.
	(symbol_cache_flush, symbol_cache_dump): New functions.
	(maintenance_print_symbol_cache): New function.
	(maintenance_flush_symbol_cache): New function.
	(symbol_cache_stats): New function.
	(maintenance_print_symbol_cache_statistics): New function.
	(symtab_new_objfile_observer): New function.
	(symtab_free_objfile_observer): New function.
	(lookup_static_symbol, lookup_global_symbol): Use symbol cache.
	(_initialize_symtab): Init symbol_cache_key.  New parameter
	maint symbol-cache-size.  New maint commands print symbol-cache,
	print symbol-cache-statistics, flush-symbol-cache.
	Install new_objfile, free_objfile observers.

        doc/
	* gdb.texinfo (Symbols): Document new commands
	"maint print symbol-cache", "maint print symbol-cache-statistics",
	"maint flush-symbol-cache".  Document new option
	"maint set symbol-cache-size".

diff --git a/gdb/NEWS b/gdb/NEWS
index 9a668c4..f866a5a 100644
--- a/gdb/NEWS
+++ b/gdb/NEWS
@@ -57,6 +57,15 @@ add-auto-load-scripts-directory directory
 maint print user-registers
   List all currently available "user" registers.
 
+maint print symbol-cache
+  Print the contents of the symbol cache.
+
+maint print symbol-cache-statistics
+  Print statistics of symbol cache usage.
+
+maint flush-symbol-cache
+  Flush the contents of the symbol cache.
+
 compile code [-r|-raw] [--] [source code]
   Compile, inject, and execute in the inferior the executable object
   code produced by compiling the provided source code.
@@ -90,6 +99,10 @@ set debug symbol-lookup
 show debug symbol-lookup
   Control display of debugging info regarding symbol lookup.
 
+maint set symbol-cache-size
+maint show symbol-cache-size
+  Control the size of the symbol cache.
+
 * MI changes
 
   ** The -list-thread-groups command outputs an exit-code field for
diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo
index f4d7132..b752524 100644
--- a/gdb/doc/gdb.texinfo
+++ b/gdb/doc/gdb.texinfo
@@ -16471,8 +16471,39 @@ line 1574.
 @}
 (@value{GDBP})
 @end smallexample
-@end table
 
+@kindex maint set symbol-cache-size
+@cindex symbol cache size
+@item maint set symbol-cache-size @var{size}
+Set the size of the symbol cache to @var{size}.
+The default size is intended to be good enough for debugging
+most applications.  This option exists to allow for experimenting
+with different sizes.
+
+@kindex maint show symbol-cache-size
+@item maint show symbol-cache-size
+Show the size of the symbol cache.
+
+@kindex maint print symbol-cache
+@cindex symbol cache, printing its contents
+@item maint print symbol-cache
+Print the contents of the symbol cache.
+This is useful when debugging symbol cache issues.
+
+@kindex maint print symbol-cache-statistics
+@cindex symbol cache, printing usage statistics
+@item maint print symbol-cache-statistics
+Print symbol cache usage statistics.
+This helps determine how well the cache is being utilized.
+
+@kindex maint flush-symbol-cache
+@cindex symbol cache, flushing
+@item maint flush-symbol-cache
+Flush the contents of the symbol cache, all entries are removed.
+This command is useful when debugging the symbol cache.
+It is also useful when collecting performance data.
+
+@end table
 
 @node Altering
 @chapter Altering Execution
diff --git a/gdb/symtab.c b/gdb/symtab.c
index b5d8d63..7193131 100644
--- a/gdb/symtab.c
+++ b/gdb/symtab.c
@@ -102,12 +102,115 @@ struct main_info
   enum language language_of_main;
 };
 
+/* Program space key for finding its symbol cache.  */
+
+static const struct program_space_data *symbol_cache_key;
+
+/* The default symbol cache size.
+   There is no extra cpu cost for large N (except when flushing the cache,
+   which is rare).  The value here is just a first attempt.  A better default
+   value may be higher or lower.  A prime number can make up for a bad hash
+   computation, so that's why the number is what it is.  */
+#define DEFAULT_SYMBOL_CACHE_SIZE 1021
+
+/* The maximum symbol cache size.
+   There's no method to the decision of what value to use here, other than
+   there's no point in allowing a user typo to make gdb consume all memory.  */
+#define MAX_SYMBOL_CACHE_SIZE (1024*1024)
+
+/* symbol_cache_lookup returns this if a previous lookup failed to find the
+   symbol in any objfile.  */
+#define SYMBOL_LOOKUP_FAILED ((struct symbol *) 1)
+
+/* Recording lookups that don't find the symbol is just as important, if not
+   more so, than recording found symbols.  */
+
+enum symbol_cache_slot_state
+{
+  SYMBOL_SLOT_UNUSED,
+  SYMBOL_SLOT_NOT_FOUND,
+  SYMBOL_SLOT_FOUND
+};
+
+/* Symbols don't specify global vs static block.
+   So keep them in separate caches.  */
+
+struct block_symbol_cache
+{
+  unsigned int hits;
+  unsigned int misses;
+  unsigned int collisions;
+
+  /* SYMBOLS is a variable length array of this size.
+     One can imagine that in general one cache (global/static) should be a
+     fraction of the size of the other, but there's no data at the moment
+     on which to decide.  */
+  unsigned int size;
+
+  struct symbol_cache_slot
+  {
+    enum symbol_cache_slot_state state;
+
+    /* The objfile that was current when the symbol was looked up.
+       This is only needed for global blocks, but for simplicity's sake
+       we allocate the space for both.  If data shows the extra space used
+       for static blocks is a problem, we can split things up then.
+
+       Global blocks need cache lookup to include the objfile context because
+       we need to account for gdbarch_iterate_over_objfiles_in_search_order
+       which can traverse objfiles in, effectively, any order, depending on
+       the current objfile, thus affecting which symbol is found.  Normally,
+       only the current objfile is searched first, and then the rest are
+       searched in recorded order; but putting cache lookup inside
+       gdbarch_iterate_over_objfiles_in_search_order would be awkward.
+       Instead we just make the current objfile part of the context of
+       cache lookup.  This means we can record the same symbol multiple times,
+       each with a different "current objfile" that was in effect when the
+       lookup was saved in the cache, but cache space is pretty cheap.  */
+    const struct objfile *objfile_context;
+
+    union
+    {
+      struct symbol *found;
+      struct
+      {
+	char *name;
+	domain_enum domain;
+      } not_found;
+    } value;
+  } symbols[1];
+};
+
+/* The symbol cache.
+
+   Searching for symbols in the static and global blocks over multiple objfiles
+   again and again can be slow, as can searching very big objfiles.  This is a
+   simple cache to improve symbol lookup performance, which is critical to
+   overall gdb performance.
+
+   Symbols are hashed on the name, its domain, and block.
+   They are also hashed on their objfile for objfile-specific lookups.  */
+
+struct symbol_cache
+{
+  struct block_symbol_cache *global_symbols;
+  struct block_symbol_cache *static_symbols;
+};
+
 /* When non-zero, print debugging messages related to symtab creation.  */
 unsigned int symtab_create_debug = 0;
 
 /* When non-zero, print debugging messages related to symbol lookup.  */
 unsigned int symbol_lookup_debug = 0;
 
+/* The size of the cache is staged here.  */
+static unsigned int new_symbol_cache_size = DEFAULT_SYMBOL_CACHE_SIZE;
+
+/* The current value of the symbol cache size.
+   This is saved so that if the user enters a value too big we can restore
+   the original value from here.  */
+static unsigned int symbol_cache_size = DEFAULT_SYMBOL_CACHE_SIZE;
+
 /* Non-zero if a file may be known by two different basenames.
    This is the uncommon case, and significantly slows down gdb.
    Default set to "off" to not slow down the common case.  */
@@ -1058,6 +1161,520 @@ expand_symtab_containing_pc (CORE_ADDR pc, struct obj_section *section)
   }
 }
 \f
+/* Hash function for the symbol cache.  */
+
+static unsigned int
+hash_symbol_entry (const struct objfile *objfile_context,
+		   const char *name, domain_enum domain)
+{
+  unsigned int hash = (uintptr_t) objfile_context;
+
+  if (name != NULL)
+    hash += htab_hash_string (name);
+
+  hash += domain;
+
+  return hash;
+}
+
+/* Equality function for the symbol cache.  */
+
+static int
+eq_symbol_entry (const struct symbol_cache_slot *slot,
+		 const struct objfile *objfile_context,
+		 const char *name, domain_enum domain)
+{
+  const char *slot_name;
+  domain_enum slot_domain;
+
+  if (slot->state == SYMBOL_SLOT_UNUSED)
+    return 0;
+
+  if (slot->objfile_context != objfile_context)
+    return 0;
+
+  if (slot->state == SYMBOL_SLOT_NOT_FOUND)
+    {
+      slot_name = slot->value.not_found.name;
+      slot_domain = slot->value.not_found.domain;
+    }
+  else
+    {
+      slot_name = SYMBOL_LINKAGE_NAME (slot->value.found);
+      slot_domain = SYMBOL_DOMAIN (slot->value.found);
+    }
+
+  /* NULL names match.  */
+  if (slot_name == NULL && name == NULL)
+    ;
+  else if (slot_name != NULL && name != NULL)
+    {
+      if (strcmp (slot_name, name) != 0)
+	return 0;
+    }
+  else
+    {
+      /* Only one name is NULL.  */
+      return 0;
+    }
+
+  if (slot_domain != domain)
+    return 0;
+
+  return 1;
+}
+
+/* Given a cache of size SIZE, return the size of the struct (with variable
+   length array) in bytes.  */
+
+static size_t
+symbol_cache_byte_size (unsigned int size)
+{
+  return (sizeof (struct block_symbol_cache)
+	  + ((size - 1) * sizeof (struct symbol_cache_slot)));
+}
+
+/* Resize CACHE.  */
+
+static void
+resize_symbol_cache (struct symbol_cache *cache, unsigned int new_size)
+{
+  /* If there's no change in size, don't do anything.
+     All caches have the same size, so we can just compare with the size
+     of the global symbols cache.  */
+  if ((cache->global_symbols != NULL
+       && cache->global_symbols->size == new_size)
+      || (cache->global_symbols == NULL
+	  && new_size == 0))
+    return;
+
+  xfree (cache->global_symbols);
+  xfree (cache->static_symbols);
+
+  if (new_size == 0)
+    {
+      cache->global_symbols = NULL;
+      cache->static_symbols = NULL;
+    }
+  else
+    {
+      size_t total_size = symbol_cache_byte_size (new_size);
+
+      cache->global_symbols = xcalloc (1, total_size);
+      cache->static_symbols = xcalloc (1, total_size);
+      cache->global_symbols->size = new_size;
+      cache->static_symbols->size = new_size;
+    }
+}
+
+/* Make a symbol cache of size SIZE.  */
+
+static struct symbol_cache *
+make_symbol_cache (unsigned int size)
+{
+  struct symbol_cache *cache;
+
+  cache = XCNEW (struct symbol_cache);
+  resize_symbol_cache (cache, symbol_cache_size);
+  return cache;
+}
+
+/* Free the space used by CACHE.  */
+
+static void
+free_symbol_cache (struct symbol_cache *cache)
+{
+  xfree (cache->global_symbols);
+  xfree (cache->static_symbols);
+  xfree (cache);
+}
+
+/* Return the symbol cache of PSPACE.
+   Create one if it doesn't exist yet.  */
+
+static struct symbol_cache *
+get_symbol_cache (struct program_space *pspace)
+{
+  struct symbol_cache *cache = program_space_data (pspace, symbol_cache_key);
+
+  if (cache == NULL)
+    {
+      cache = make_symbol_cache (symbol_cache_size);
+      set_program_space_data (pspace, symbol_cache_key, cache);
+    }
+
+  return cache;
+}
+
+/* Delete the symbol cache of PSPACE.
+   Called when PSPACE is destroyed.  */
+
+static void
+symbol_cache_cleanup (struct program_space *pspace, void *data)
+{
+  struct symbol_cache *cache = data;
+
+  free_symbol_cache (cache);
+}
+
+/* Set the size of the symbol cache in all program spaces.  */
+
+static void
+set_symbol_cache_size (unsigned int new_size)
+{
+  struct program_space *pspace;
+
+  ALL_PSPACES (pspace)
+    {
+      struct symbol_cache *cache
+	= program_space_data (pspace, symbol_cache_key);
+
+      /* The pspace could have been created but not have a cache yet.  */
+      if (cache != NULL)
+	resize_symbol_cache (cache, new_size);
+    }
+}
+
+/* Called when symbol-cache-size is set.  */
+
+static void
+set_symbol_cache_size_handler (char *args, int from_tty,
+			       struct cmd_list_element *c)
+{
+  if (new_symbol_cache_size > MAX_SYMBOL_CACHE_SIZE)
+    {
+      /* Restore the previous value.
+	 This is the value the "show" command prints.  */
+      new_symbol_cache_size = symbol_cache_size;
+
+      error (_("Symbol cache size is too large, max is %u."),
+	     MAX_SYMBOL_CACHE_SIZE);
+    }
+  symbol_cache_size = new_symbol_cache_size;
+
+  set_symbol_cache_size (symbol_cache_size);
+}
+
+/* Lookup symbol NAME,DOMAIN in BLOCK in the symbol cache of PSPACE.
+   OBJFILE_CONTEXT is the current objfile, which may be NULL.
+   The result is the symbol if found, SYMBOL_LOOKUP_FAILED if a previous lookup
+   failed (and thus this one will too), or NULL if the symbol is not present
+   in the cache.
+   *BSC_PTR, *SLOT_PTR are set to the cache and slot of the symbol, whether
+   found or not found.  */
+
+static struct symbol *
+symbol_cache_lookup (struct symbol_cache *cache,
+		     struct objfile *objfile_context, int block,
+		     const char *name, domain_enum domain,
+		     struct block_symbol_cache **bsc_ptr,
+		     struct symbol_cache_slot **slot_ptr)
+{
+  struct block_symbol_cache *bsc;
+  unsigned int hash;
+  struct symbol_cache_slot *slot;
+
+  if (block == GLOBAL_BLOCK)
+    bsc = cache->global_symbols;
+  else
+    bsc = cache->static_symbols;
+  if (bsc == NULL)
+    {
+      *bsc_ptr = NULL;
+      *slot_ptr = NULL;
+      return NULL;
+    }
+
+  hash = hash_symbol_entry (objfile_context, name, domain);
+  slot = bsc->symbols + hash % bsc->size;
+  *bsc_ptr = bsc;
+  *slot_ptr = slot;
+
+  if (eq_symbol_entry (slot, objfile_context, name, domain))
+    {
+      if (symbol_lookup_debug)
+	fprintf_unfiltered (gdb_stdlog,
+			    "%s block symbol cache hit%s for %s, %s\n",
+			    block == GLOBAL_BLOCK ? "Global" : "Static",
+			    slot->state == SYMBOL_SLOT_NOT_FOUND
+			    ? " (not found)" : "",
+			    name, domain_name (domain));
+      ++bsc->hits;
+      if (slot->state == SYMBOL_SLOT_NOT_FOUND)
+	return SYMBOL_LOOKUP_FAILED;
+      return slot->value.found;
+    }
+
+  if (symbol_lookup_debug)
+    {
+      fprintf_unfiltered (gdb_stdlog,
+			  "%s block symbol cache miss for %s, %s\n",
+			  block == GLOBAL_BLOCK ? "Global" : "Static",
+			  name, domain_name (domain));
+    }
+  ++bsc->misses;
+  return NULL;
+}
+
+/* Clear out SLOT.  */
+
+static void
+symbol_cache_clear_slot (struct symbol_cache_slot *slot)
+{
+  if (slot->state == SYMBOL_SLOT_NOT_FOUND)
+    xfree (slot->value.not_found.name);
+  slot->state = SYMBOL_SLOT_UNUSED;
+}
+
+/* Mark SYMBOL as found in SLOT.  */
+
+static void
+symbol_cache_mark_found (struct block_symbol_cache *bsc,
+			 struct symbol_cache_slot *slot,
+			 struct objfile *objfile, struct symbol *symbol)
+{
+  if (bsc == NULL)
+    return;
+  if (slot->state != SYMBOL_SLOT_UNUSED)
+    {
+      ++bsc->collisions;
+      symbol_cache_clear_slot (slot);
+    }
+  slot->state = SYMBOL_SLOT_FOUND;
+  slot->objfile_context = objfile;
+  slot->value.found = symbol;
+}
+
+/* Mark symbol NAME, DOMAIN as not found in SLOT.  */
+
+static void
+symbol_cache_mark_not_found (struct block_symbol_cache *bsc,
+			     struct symbol_cache_slot *slot,
+			     struct objfile *objfile_context,
+			     const char *name, domain_enum domain)
+{
+  if (bsc == NULL)
+    return;
+  if (slot->state != SYMBOL_SLOT_UNUSED)
+    {
+      ++bsc->collisions;
+      symbol_cache_clear_slot (slot);
+    }
+  slot->state = SYMBOL_SLOT_NOT_FOUND;
+  slot->objfile_context = objfile_context;
+  slot->value.not_found.name = xstrdup (name);
+  slot->value.not_found.domain = domain;
+}
+
+/* Flush the symbol cache of PSPACE.  */
+
+static void
+symbol_cache_flush (struct program_space *pspace)
+{
+  struct symbol_cache *cache = program_space_data (pspace, symbol_cache_key);
+  int pass;
+  size_t total_size;
+
+  if (cache == NULL)
+    return;
+  if (cache->global_symbols == NULL)
+    {
+      gdb_assert (symbol_cache_size == 0);
+      gdb_assert (cache->static_symbols == NULL);
+      return;
+    }
+
+  /* If the cache is untouched since the last flush, early exit.
+     This is important for performance during the startup of a program linked
+     with 100s (or 1000s) of shared libraries.  */
+  if (cache->global_symbols->misses == 0
+      && cache->static_symbols->misses == 0)
+    return;
+
+  gdb_assert (cache->global_symbols->size == symbol_cache_size);
+  gdb_assert (cache->static_symbols->size == symbol_cache_size);
+
+  for (pass = 0; pass < 2; ++pass)
+    {
+      struct block_symbol_cache *bsc
+	= pass == 0 ? cache->global_symbols : cache->static_symbols;
+      unsigned int i;
+
+      for (i = 0; i < bsc->size; ++i)
+	symbol_cache_clear_slot (&bsc->symbols[i]);
+    }
+
+  cache->global_symbols->hits = 0;
+  cache->global_symbols->misses = 0;
+  cache->global_symbols->collisions = 0;
+  cache->static_symbols->hits = 0;
+  cache->static_symbols->misses = 0;
+  cache->static_symbols->collisions = 0;
+}
+
+/* Dump CACHE.  */
+
+static void
+symbol_cache_dump (const struct symbol_cache *cache)
+{
+  int pass;
+
+  if (cache->global_symbols == NULL)
+    {
+      printf_filtered ("  <disabled>\n");
+      return;
+    }
+
+  for (pass = 0; pass < 2; ++pass)
+    {
+      const struct block_symbol_cache *bsc
+	= pass == 0 ? cache->global_symbols : cache->static_symbols;
+      unsigned int i;
+
+      if (pass == 0)
+	printf_filtered ("Global symbols:\n");
+      else
+	printf_filtered ("Static symbols:\n");
+
+      for (i = 0; i < bsc->size; ++i)
+	{
+	  const struct symbol_cache_slot *slot = &bsc->symbols[i];
+
+	  QUIT;
+
+	  switch (slot->state)
+	    {
+	    case SYMBOL_SLOT_UNUSED:
+	      break;
+	    case SYMBOL_SLOT_NOT_FOUND:
+	      printf_filtered ("  [%-4u] = %s, %s (not found)\n", i,
+			       host_address_to_string (slot->objfile_context),
+			       slot->value.not_found.name);
+	      break;
+	    case SYMBOL_SLOT_FOUND:
+	      printf_filtered ("  [%-4u] = %s, %s\n", i,
+			       host_address_to_string (slot->objfile_context),
+			       SYMBOL_PRINT_NAME (slot->value.found));
+	      break;
+	    }
+	}
+    }
+}
+
+/* The "mt print symbol-cache" command.  */
+
+static void
+maintenance_print_symbol_cache (char *args, int from_tty)
+{
+  struct program_space *pspace;
+
+  ALL_PSPACES (pspace)
+    {
+      struct symbol_cache *cache;
+
+      printf_filtered (_("Symbol cache for pspace %d\n%s:\n"),
+		       pspace->num,
+		       pspace->symfile_object_file != NULL
+		       ? objfile_name (pspace->symfile_object_file)
+		       : "(no object file)");
+
+      /* If the cache hasn't been created yet, avoid creating one.  */
+      cache = program_space_data (pspace, symbol_cache_key);
+      if (cache == NULL)
+	printf_filtered ("  <empty>\n");
+      else
+	symbol_cache_dump (cache);
+    }
+}
+
+/* The "mt flush-symbol-cache" command.  */
+
+static void
+maintenance_flush_symbol_cache (char *args, int from_tty)
+{
+  struct program_space *pspace;
+
+  ALL_PSPACES (pspace)
+    {
+      symbol_cache_flush (pspace);
+    }
+}
+
+/* Print usage statistics of CACHE.  */
+
+static void
+symbol_cache_stats (struct symbol_cache *cache)
+{
+  int pass;
+
+  if (cache->global_symbols == NULL)
+    {
+      printf_filtered ("  <disabled>\n");
+      return;
+    }
+
+  for (pass = 0; pass < 2; ++pass)
+    {
+      const struct block_symbol_cache *bsc
+	= pass == 0 ? cache->global_symbols : cache->static_symbols;
+
+      QUIT;
+
+      if (pass == 0)
+	printf_filtered ("Global block cache stats:\n");
+      else
+	printf_filtered ("Static block cache stats:\n");
+
+      printf_filtered ("  size:       %u\n", bsc->size);
+      printf_filtered ("  hits:       %u\n", bsc->hits);
+      printf_filtered ("  misses:     %u\n", bsc->misses);
+      printf_filtered ("  collisions: %u\n", bsc->collisions);
+    }
+}
+
+/* The "mt print symbol-cache-statistics" command.  */
+
+static void
+maintenance_print_symbol_cache_statistics (char *args, int from_tty)
+{
+  struct program_space *pspace;
+
+  ALL_PSPACES (pspace)
+    {
+      struct symbol_cache *cache;
+
+      printf_filtered (_("Symbol cache statistics for pspace %d\n%s:\n"),
+		       pspace->num,
+		       pspace->symfile_object_file != NULL
+		       ? objfile_name (pspace->symfile_object_file)
+		       : "(no object file)");
+
+      /* If the cache hasn't been created yet, avoid creating one.  */
+      cache = program_space_data (pspace, symbol_cache_key);
+      if (cache == NULL)
+ 	printf_filtered ("  empty, no stats available\n");
+      else
+	symbol_cache_stats (cache);
+    }
+}
+
+/* This module's 'new_objfile' observer.  */
+
+static void
+symtab_new_objfile_observer (struct objfile *objfile)
+{
+  /* Ideally we'd use OBJFILE->pspace, but OBJFILE may be NULL.  */
+  symbol_cache_flush (current_program_space);
+}
+
+/* This module's 'free_objfile' observer.  */
+
+static void
+symtab_free_objfile_observer (struct objfile *objfile)
+{
+  symbol_cache_flush (objfile->pspace);
+}
+\f
 /* Debug symbols usually don't have section information.  We need to dig that
    out of the minimal symbols and stash that in the debug symbol.  */
 
@@ -1970,16 +2587,36 @@ lookup_symbol_in_objfile (struct objfile *objfile, int block_index,
 struct symbol *
 lookup_static_symbol (const char *name, const domain_enum domain)
 {
+  struct symbol_cache *cache = get_symbol_cache (current_program_space);
   struct objfile *objfile;
   struct symbol *result;
+  struct block_symbol_cache *bsc;
+  struct symbol_cache_slot *slot;
+
+  /* Lookup in STATIC_BLOCK is not current-objfile-dependent, so just pass
+     NULL for OBJFILE_CONTEXT.  */
+  result = symbol_cache_lookup (cache, NULL, STATIC_BLOCK, name, domain,
+				&bsc, &slot);
+  if (result != NULL)
+    {
+      if (result == SYMBOL_LOOKUP_FAILED)
+	return NULL;
+      return result;
+    }
 
   ALL_OBJFILES (objfile)
     {
       result = lookup_symbol_in_objfile (objfile, STATIC_BLOCK, name, domain);
       if (result != NULL)
-	return result;
+	{
+	  /* Still pass NULL for OBJFILE_CONTEXT here.  */
+	  symbol_cache_mark_found (bsc, slot, NULL, result);
+	  return result;
+	}
     }
 
+  /* Still pass NULL for OBJFILE_CONTEXT here.  */
+  symbol_cache_mark_not_found (bsc, slot, NULL, name, domain);
   return NULL;
 }
 
@@ -2027,25 +2664,48 @@ lookup_global_symbol (const char *name,
 		      const struct block *block,
 		      const domain_enum domain)
 {
-  struct symbol *sym = NULL;
-  struct objfile *objfile = NULL;
+  struct symbol_cache *cache = get_symbol_cache (current_program_space);
+  struct symbol *sym;
+  struct objfile *objfile;
   struct global_sym_lookup_data lookup_data;
+  struct block_symbol_cache *bsc;
+  struct symbol_cache_slot *slot;
 
-  /* Call library-specific lookup procedure.  */
   objfile = lookup_objfile_from_block (block);
+
+  /* First see if we can find the symbol in the cache.
+     This works because we use the current objfile to qualify the lookup.  */
+  sym = symbol_cache_lookup (cache, objfile, GLOBAL_BLOCK, name, domain,
+			     &bsc, &slot);
+  if (sym != NULL)
+    {
+      if (sym == SYMBOL_LOOKUP_FAILED)
+	return NULL;
+      return sym;
+    }
+
+  /* Call library-specific lookup procedure.  */
   if (objfile != NULL)
     sym = solib_global_lookup (objfile, name, domain);
-  if (sym != NULL)
-    return sym;
 
-  memset (&lookup_data, 0, sizeof (lookup_data));
-  lookup_data.name = name;
-  lookup_data.domain = domain;
-  gdbarch_iterate_over_objfiles_in_search_order
-    (objfile != NULL ? get_objfile_arch (objfile) : target_gdbarch (),
-     lookup_symbol_global_iterator_cb, &lookup_data, objfile);
+  /* If that didn't work go a global search (of global blocks, heh).  */
+  if (sym == NULL)
+    {
+      memset (&lookup_data, 0, sizeof (lookup_data));
+      lookup_data.name = name;
+      lookup_data.domain = domain;
+      gdbarch_iterate_over_objfiles_in_search_order
+	(objfile != NULL ? get_objfile_arch (objfile) : target_gdbarch (),
+	 lookup_symbol_global_iterator_cb, &lookup_data, objfile);
+      sym = lookup_data.result;
+    }
 
-  return lookup_data.result;
+  if (sym != NULL)
+    symbol_cache_mark_found (bsc, slot, objfile, sym);
+  else
+    symbol_cache_mark_not_found (bsc, slot, objfile, name, domain);
+
+  return sym;
 }
 
 int
@@ -5436,6 +6096,9 @@ _initialize_symtab (void)
   main_progspace_key
     = register_program_space_data_with_cleanup (NULL, main_info_cleanup);
 
+  symbol_cache_key
+    = register_program_space_data_with_cleanup (NULL, symbol_cache_cleanup);
+
   add_info ("variables", variables_info, _("\
 All global and static variable names, or those matching REGEXP."));
   if (dbx_commands)
@@ -5511,5 +6174,31 @@ When enabled (non-zero), symbol lookups are logged."),
 			   NULL, NULL,
 			   &setdebuglist, &showdebuglist);
 
+  add_setshow_zuinteger_cmd ("symbol-cache-size", no_class,
+			     &new_symbol_cache_size,
+			     _("Set the size of the symbol cache."),
+			     _("Show the size of the symbol cache."), _("\
+The size of the symbol cache.\n\
+If zero then the symbol cache is disabled."),
+			     set_symbol_cache_size_handler, NULL,
+			     &maintenance_set_cmdlist,
+			     &maintenance_show_cmdlist);
+
+  add_cmd ("symbol-cache", class_maintenance, maintenance_print_symbol_cache,
+	   _("Dump the symbol cache for each program space."),
+	   &maintenanceprintlist);
+
+  add_cmd ("symbol-cache-statistics", class_maintenance,
+	   maintenance_print_symbol_cache_statistics,
+	   _("Print symbol cache statistics for each program space."),
+	   &maintenanceprintlist);
+
+  add_cmd ("flush-symbol-cache", class_maintenance,
+	   maintenance_flush_symbol_cache,
+	   _("Flush the symbol cache for each program space."),
+	   &maintenancelist);
+
   observer_attach_executable_changed (symtab_observer_executable_changed);
+  observer_attach_new_objfile (symtab_new_objfile_observer);
+  observer_attach_free_objfile (symtab_free_objfile_observer);
 }

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2015-01-11 19:11 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-12-01  8:23 [RFC] symbol lookup cache Doug Evans
2014-12-03  0:50 ` Doug Evans
2014-12-20  7:57   ` [PATCH] " Doug Evans
2014-12-20  8:29     ` Doug Evans
2014-12-20 10:43     ` Eli Zaretskii
2014-12-20 19:14       ` Doug Evans
2014-12-20 19:55         ` Eli Zaretskii
2014-12-20 21:04           ` Doug Evans
2014-12-21  3:34             ` Eli Zaretskii
2014-12-21 18:51               ` Doug Evans
2014-12-21 21:01                 ` Joel Brobecker
2014-12-22  2:04                   ` Doug Evans
2014-12-22 12:48                     ` Joel Brobecker
2015-01-11 19:11                       ` Doug Evans
2014-12-03  2:46 ` [RFC] " Joel Brobecker
2014-12-04 16:38 ` Pedro Alves
2014-12-04 19:17   ` Doug Evans
2014-12-04 19:16 ` Jan Kratochvil
2014-12-04 19:24   ` Doug Evans
2014-12-04 19:28     ` Jan Kratochvil

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).