This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC] symbol lookup cache


Hi.

Holidays are nice for having the time to take on
projects one is interested in but doesn't have
time to otherwise do.

I looked at a couple of areas for improvement
in gdb's symbol handling over the last couple
of days.  One is .gdb_index support for tab
completion, which is the topic for another mail.
This email is about a symbol lookup cache.

..gdb_index helps speed things up for one objfile,
but when there are 100s (or 1000s) of shared
libraries, symbol lookup can still take awhile,
even with .gdb_index.
[I'm setting aside a planned change to basic
symbol lookup to use the index better.
We'll still, I think, want a cache even
with that change.]

This patch still needs more testing
and I need to collect more perf data,
but it's a start at caching symbol lookups.
This first pass is just for the case
where we iterate over all objfiles for
lookups in the global or static blocks.
Low hanging fruit.
At the moment it's just RFC.

Joel: I noticed ada-lang.c has its own cache.
I haven't studied it too closely (yet).
I can imagine wanting to keep some variant of both.
This is just a "heads up".

btw, another area that needs caching
is pc lookups (e.g., find_pc_sect_compunit_symtab).
That's the topic of yet another email.

diff --git a/gdb/symtab.c b/gdb/symtab.c
index 345c20d..f9ef72f 100644
--- a/gdb/symtab.c
+++ b/gdb/symtab.c
@@ -104,9 +104,78 @@ struct main_info
   enum language language_of_main;
 };
 
+/* Program space key for finding its symbol cache.  */
+
+static const struct program_space_data *symbol_cache_key;
+
+/* The symbol cache size.
+   TODO(dje): One can imagine making this dynamic based on the size of the
+   program, but OTOH this cache consumes minimal space, and there is no extra
+   cpu cost for large N (except when flushing the cache, which is rare).
+   The value here is just a first attempt.  */
+#define SYMBOL_CACHE_SIZE 127
+
+/* symbol_cache_lookup returns this if a previous lookup failed to find the
+   symbol in any objfile.  */
+#define SYMBOL_LOOKUP_FAILED ((struct symbol *) 1)
+
+/* Recording lookups that don't find the symbol is just as important, if not
+   more so, than recording found symbols.  */
+
+enum symbol_cache_slot_state
+{
+  SYMBOL_SLOT_UNUSED,
+  SYMBOL_SLOT_NOT_FOUND,
+  SYMBOL_SLOT_FOUND
+};
+
+/* Symbols don't specify global vs static block.
+   So keep them in separate caches.  */
+
+struct block_symbol_cache
+{
+  unsigned int hits;
+  unsigned int misses;
+
+  struct symbol_cache_slot
+  {
+    enum symbol_cache_slot_state state;
+    union
+    {
+      struct symbol *found;
+      struct
+      {
+	char *name;
+	domain_enum domain;
+      } not_found;
+    } value;
+  } symbols[SYMBOL_CACHE_SIZE];
+};
+
+/* The symbol cache.
+
+   Searching for symbols in the static and global blocks over multiple objfiles
+   again and again can be slow, as can searching very big objfiles.  This is a
+   simple cache to improve symbol lookup performance, which is critical to
+   overall gdb performance.
+
+   Symbols are hashed on the name, its domain, and block.
+   They are also hashed on their objfile for objfile-specific lookups.
+
+   TODO(dje): Collect some numbers to guide the choices here.  */
+
+struct symbol_cache
+{
+  struct block_symbol_cache global_symbols;
+  struct block_symbol_cache static_symbols;
+};
+
 /* When non-zero, print debugging messages related to symtab creation.  */
 unsigned int symtab_create_debug = 0;
 
+/* When non-zero, print debugging messages related to symbol cache lookup.  */
+static unsigned int symbol_cache_debug = 0;
+
 /* Non-zero if a file may be known by two different basenames.
    This is the uncommon case, and significantly slows down gdb.
    Default set to "off" to not slow down the common case.  */
@@ -1089,6 +1158,309 @@ expand_symtab_containing_pc (CORE_ADDR pc, struct obj_section *section)
   }
 }
 
+/* Hash function for the symbol cache.  */
+
+static unsigned int
+hash_symbol_entry (const char *name, domain_enum domain)
+{
+  unsigned int hash = 0;
+
+  if (name != NULL)
+    hash += htab_hash_string (name);
+
+  /* TODO(dje): Too simplistic?  */
+  hash += domain;
+
+  return hash;
+}
+
+/* Equality function for the symbol cache.  */
+
+static int
+eq_symbol_entry (const struct symbol_cache_slot *slot,
+		 const char *name, domain_enum domain)
+{
+  const char *slot_name;
+  domain_enum slot_domain;
+
+  if (slot->state == SYMBOL_SLOT_UNUSED)
+    return 0;
+
+  if (slot->state == SYMBOL_SLOT_NOT_FOUND)
+    {
+      slot_name = slot->value.not_found.name;
+      slot_domain = slot->value.not_found.domain;
+    }
+  else
+    {
+      slot_name = SYMBOL_LINKAGE_NAME (slot->value.found);
+      slot_domain = SYMBOL_DOMAIN (slot->value.found);
+    }
+
+  /* NULL names match.  */
+  if (slot_name == NULL && name == NULL)
+    ;
+  else if (slot_name != NULL && name != NULL)
+    {
+      if (strcmp (slot_name, name) != 0)
+	return 0;
+    }
+  else
+    {
+      /* Only one name is NULL.  */
+      return 0;
+    }
+
+  if (slot_domain != domain)
+    return 0;
+
+  return 1;
+}
+
+/* Return the symbol cache of PSPACE.
+   Create one if it doesn't exist yet.  */
+
+static struct symbol_cache *
+get_symbol_cache (struct program_space *pspace)
+{
+  struct symbol_cache *cache = program_space_data (pspace, symbol_cache_key);
+
+  if (cache == NULL)
+    {
+      cache = XCNEW (struct symbol_cache);
+      set_program_space_data (pspace, symbol_cache_key, cache);
+    }
+
+  return cache;
+}
+
+/* Delete the symbol cache of PSPACE.
+   Called when PSPACE is destroyed.  */
+
+static void
+symbol_cache_cleanup (struct program_space *pspace, void *data)
+{
+  struct symbol_cache *cache = data;
+
+  xfree (cache);
+}
+
+/* Lookup symbol NAME,DOMAIN in BLOCK in the symbol cache of PSPACE.
+   The result is the symbol if found, SYMBOL_LOOKUP_FAILED if a previous lookup
+   failed (and thus this one will too), or NULL if the symbol is not present
+   in the cache.
+   *SLOT is set to the cache slot of the symbol, whether found or not
+   found.  */
+
+static struct symbol *
+symbol_cache_lookup (struct symbol_cache *cache, int block,
+		     const char *name, domain_enum domain,
+		     struct symbol_cache_slot **slot_ptr)
+{
+  struct block_symbol_cache *bsc;
+  unsigned int hash;
+  struct symbol_cache_slot *slot;
+
+  if (block == GLOBAL_BLOCK)
+    bsc = &cache->global_symbols;
+  else
+    bsc = &cache->static_symbols;
+  hash = hash_symbol_entry (name, domain);
+  slot = bsc->symbols + hash % SYMBOL_CACHE_SIZE;
+  *slot_ptr = slot;
+  if (eq_symbol_entry (slot, name, domain))
+    {
+      if (symbol_cache_debug)
+	fprintf_unfiltered (gdb_stdlog,
+			    "%s block symbol cache hit%s for %s, %d\n",
+			    block == GLOBAL_BLOCK ? "Global" : "Static",
+			    slot->state == SYMBOL_SLOT_NOT_FOUND
+			    ? " (not found)" : "",
+			    name, domain);
+      ++bsc->hits;
+      if (slot->state == SYMBOL_SLOT_NOT_FOUND)
+	return SYMBOL_LOOKUP_FAILED;
+      return slot->value.found;
+    }
+  if (symbol_cache_debug)
+    {
+      fprintf_unfiltered (gdb_stdlog,
+			  "%s block symbol cache miss for %s, %d\n",
+			  block == GLOBAL_BLOCK ? "Global" : "Static",
+			  name, domain);
+    }
+  ++bsc->misses;
+  return NULL;
+}
+
+/* Clear out SLOT.  */
+
+static void
+symbol_cache_clear_slot (struct symbol_cache_slot *slot)
+{
+  if (slot->state == SYMBOL_SLOT_NOT_FOUND)
+    xfree (slot->value.not_found.name);
+  slot->state = SYMBOL_SLOT_UNUSED;
+}
+
+/* Mark SYMBOL as found in SLOT.  */
+
+static void
+symbol_cache_mark_found (struct symbol_cache_slot *slot, struct symbol *symbol)
+{
+  symbol_cache_clear_slot (slot);
+  slot->state = SYMBOL_SLOT_FOUND;
+  slot->value.found = symbol;
+}
+
+/* Mark symbol NAME, DOMAIN as not found in SLOT.  */
+
+static void
+symbol_cache_mark_not_found (struct symbol_cache_slot *slot,
+			     const char *name, domain_enum domain)
+{
+  symbol_cache_clear_slot (slot);
+  slot->state = SYMBOL_SLOT_NOT_FOUND;
+  slot->value.not_found.name = xstrdup (name);
+  slot->value.not_found.domain = domain;
+}
+
+/* Flush the symbol cache of PSPACE.  */
+
+static void
+symbol_cache_flush (struct program_space *pspace)
+{
+  struct symbol_cache *cache = get_symbol_cache (pspace);
+  int pass;
+
+  for (pass = 0; pass < 2; ++pass)
+    {
+      struct block_symbol_cache *bsc
+	= pass == 0 ? &cache->global_symbols : &cache->static_symbols;
+      int i;
+
+      for (i = 0; i < SYMBOL_CACHE_SIZE; ++i)
+	symbol_cache_clear_slot (&bsc->symbols[i]);
+    }
+
+  memset (&cache->global_symbols, 0, sizeof (cache->global_symbols));
+  memset (&cache->static_symbols, 0, sizeof (cache->static_symbols));
+}
+
+/* Dump CACHE.  */
+
+static void
+symbol_cache_dump (const struct symbol_cache *cache)
+{
+  int pass;
+
+  for (pass = 0; pass < 2; ++pass)
+    {
+      const struct block_symbol_cache *bsc
+	= pass == 0 ? &cache->global_symbols : &cache->static_symbols;
+      int i;
+
+      if (pass == 0)
+	printf_filtered ("Global symbols:\n");
+      else
+	printf_filtered ("Static symbols:\n");
+
+      for (i = 0; i < SYMBOL_CACHE_SIZE; ++i)
+	{
+	  switch (bsc->symbols[i].state)
+	    {
+	    case SYMBOL_SLOT_UNUSED:
+	      break;
+	    case SYMBOL_SLOT_NOT_FOUND:
+	      printf_filtered ("  [%-2d] = %s\n", i, /* TODO(dje): "not found" */
+			       bsc->symbols[i].value.not_found.name);
+	      break;
+	    case SYMBOL_SLOT_FOUND:
+	      printf_filtered ("  [%-2d] = %s\n", i,
+			       SYMBOL_PRINT_NAME (bsc->symbols[i].value.found));
+	      break;
+	    }
+	}
+    }
+}
+
+/* The "mt print symbol-cache" command.  */
+
+static void
+maintenance_print_symbol_cache (char *args, int from_tty)
+{
+  struct program_space *pspace;
+
+  ALL_PSPACES (pspace)
+    {
+      struct symbol_cache *cache = get_symbol_cache (pspace);
+
+      QUIT;
+      symbol_cache_dump (cache);
+    }
+}
+
+/* Print usage statistics of CACHE.  */
+
+static void
+symbol_cache_stats (struct symbol_cache *cache)
+{
+  int pass;
+
+  for (pass = 0; pass < 2; ++pass)
+    {
+      const struct block_symbol_cache *bsc
+	= pass == 0 ? &cache->global_symbols : &cache->static_symbols;
+
+      if (pass == 0)
+	printf_filtered ("Global block cache stats:\n");
+      else
+	printf_filtered ("Static block cache stats:\n");
+
+      printf_filtered ("  size:   %u\n", SYMBOL_CACHE_SIZE);
+      printf_filtered ("  hits:   %u\n", bsc->hits);
+      printf_filtered ("  misses: %u\n", bsc->misses);
+    }
+}
+
+/* The "mt print symbol-cache-statistics" command.  */
+
+static void
+maintenance_print_symbol_cache_statistics (char *args, int from_tty)
+{
+  struct program_space *pspace;
+
+  ALL_PSPACES (pspace)
+    {
+      struct symbol_cache *cache = get_symbol_cache (pspace);
+
+      QUIT;
+      printf_filtered (_("Symbol cache statistics for pspace %d, '%s':\n"),
+		       pspace->num,
+		       pspace->symfile_object_file != NULL
+		       ? objfile_name (pspace->symfile_object_file)
+		       : "NULL");
+      symbol_cache_stats (cache);
+    }
+}
+
+/* This module's 'new_objfile' observer.  */
+
+static void
+symtab_new_objfile_observer (struct objfile *objfile)
+{
+  /* Ideally we'd use OBJFILE->pspace, but OBJFILE may be NULL.  */
+  symbol_cache_flush (current_program_space);
+}
+
+/* This module's 'free_objfile' observer.  */
+
+static void
+symtab_free_objfile_observer (struct objfile *objfile)
+{
+  symbol_cache_flush (objfile->pspace);
+}
+
 /* Debug symbols usually don't have section information.  We need to dig that
    out of the minimal symbols and stash that in the debug symbol.  */
 
@@ -1818,16 +2190,30 @@ lookup_symbol_in_objfile (struct objfile *objfile, int block_index,
 struct symbol *
 lookup_static_symbol (const char *name, const domain_enum domain)
 {
+  struct symbol_cache *cache = get_symbol_cache (current_program_space);
   struct objfile *objfile;
   struct symbol *result;
+  struct symbol_cache_slot *slot;
+
+  result = symbol_cache_lookup (cache, STATIC_BLOCK, name, domain, &slot);
+  if (result != NULL)
+    {
+      if (result == SYMBOL_LOOKUP_FAILED)
+	return NULL;
+      return result;
+    }
 
   ALL_OBJFILES (objfile)
     {
       result = lookup_symbol_in_objfile (objfile, STATIC_BLOCK, name, domain);
       if (result != NULL)
-	return result;
+	{
+	  symbol_cache_mark_found (slot, result);
+	  return result;
+	}
     }
 
+  symbol_cache_mark_not_found (slot, name, domain);
   return NULL;
 }
 
@@ -1875,9 +2261,11 @@ lookup_global_symbol (const char *name,
 		      const struct block *block,
 		      const domain_enum domain)
 {
+  struct symbol_cache *cache = get_symbol_cache (current_program_space);
   struct symbol *sym = NULL;
   struct objfile *objfile = NULL;
   struct global_sym_lookup_data lookup_data;
+  struct symbol_cache_slot *slot;
 
   /* Call library-specific lookup procedure.  */
   objfile = lookup_objfile_from_block (block);
@@ -1886,6 +2274,14 @@ lookup_global_symbol (const char *name,
   if (sym != NULL)
     return sym;
 
+  sym = symbol_cache_lookup (cache, GLOBAL_BLOCK, name, domain, &slot);
+  if (sym != NULL)
+    {
+      if (sym == SYMBOL_LOOKUP_FAILED)
+	return NULL;
+      return sym;
+    }
+
   memset (&lookup_data, 0, sizeof (lookup_data));
   lookup_data.name = name;
   lookup_data.domain = domain;
@@ -1893,6 +2289,11 @@ lookup_global_symbol (const char *name,
     (objfile != NULL ? get_objfile_arch (objfile) : target_gdbarch (),
      lookup_symbol_global_iterator_cb, &lookup_data, objfile);
 
+  if (lookup_data.result != NULL)
+    symbol_cache_mark_found (slot, lookup_data.result);
+  else
+    symbol_cache_mark_not_found (slot, name, domain);
+
   return lookup_data.result;
 }
 
@@ -5235,6 +5636,9 @@ _initialize_symtab (void)
   main_progspace_key
     = register_program_space_data_with_cleanup (NULL, main_info_cleanup);
 
+  symbol_cache_key
+    = register_program_space_data_with_cleanup (NULL, symbol_cache_cleanup);
+
   add_info ("variables", variables_info, _("\
 All global and static variable names, or those matching REGEXP."));
   if (dbx_commands)
@@ -5302,5 +5706,25 @@ A value greater than 1 provides more verbose information."),
 			     NULL,
 			     &setdebuglist, &showdebuglist);
 
+  add_setshow_zuinteger_cmd ("symbol-cache", no_class, &symbol_cache_debug,
+			     _("Set debugging of symbol cache usage."),
+			     _("Show debugging of symbol cache usage."), _("\
+When enabled (non-zero), debugging messages are printed when looking up\n\
+symbols in the symbol cache."),
+			     NULL,
+			     NULL,
+			     &setdebuglist, &showdebuglist);
+
+  add_cmd ("symbol-cache", class_maintenance, maintenance_print_symbol_cache,
+	   _("Dump the symbol cache for each program space."),
+	   &maintenanceprintlist);
+
+  add_cmd ("symbol-cache-statistics", class_maintenance,
+	   maintenance_print_symbol_cache_statistics,
+	   _("Print symbol cache statistics for each program space."),
+	   &maintenanceprintlist);
+
   observer_attach_executable_changed (symtab_observer_executable_changed);
+  observer_attach_new_objfile (symtab_new_objfile_observer);
+  observer_attach_free_objfile (symtab_free_objfile_observer);
 }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]