This is the mail archive of the gdb-patches@sources.redhat.com mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

[rfa] Symbol hashing (for the last time?)


The patch should be adequately reduced for review now.  The diff includes part of:
  http://sources.redhat.com/ml/gdb-patches/2001-10/msg00320.html
which is still unreviewed and was not worth the effort for me to mangle out
of this patch; it's just a couple of lines in symtab.c.

As before, the only parts I have the slightest doubt left about are:
 - I reverse the direction of a search in coffread.c which appeared
  to go top-to-bottom deliberately, but I can't follow the chaining,
  and there's already another hash table involved somehow.
 - The result of several search functions and dump_symtab are no longer
  in sorted order.  As far as I can tell this is purely a user interface
  change.  If anyone cares I can sort them after searching.
 - The way mdebugread builds symtabs is kind of dodgy.  I don't hash them.
  It should be updated to use a pending list the way everyone else does.
  Or taken out and shot.  I like taken out and shot.

I'd appreciate comments, as I've been working on this patch for over a month
without any substantive changes.

-- 
Daniel Jacobowitz                           Carnegie Mellon University
MontaVista Software                         Debian GNU/Linux Developer

2001-10-30  Daniel Jacobowitz  <drow@mvista.com>

	Based on patch from Daniel Berlin <dan@cgsoftware.com>.
	* buildsym.c: Include "demangle.h" for SYMBOL_INIT_DEMANGLED_NAME.
	(finish_block) For non-function blocks, hash the symbol table.  For
	function blocks, mark the symbol table as unhashed.

	* minsyms.c (msymbol_hash): Return hash value without taking modulus.
	(msymbol_hash_iw): Likewise.
	(add_minsym_to_hash_table): Take modulus of msymbol_hash's return
	value.
	(add_minsym_to_demangled_hash_table): Likewise for msymbol_hash_iw.
	(lookup_minimal_symbol): Likewise for both.

	* symtab.h (struct block): Add `hashtable' flag.
	(BLOCK_HASHTABLE): New macro.
	(BLOCK_BUCKETS): New macro.
	(BLOCK_BUCKET): New macro.
	(ALL_BLOCK_SYMBOLS): New macro.
	(BLOCK_SHOULD_SORT): Do not sort hashed blocks.
	(struct symbol): Add `hash_next' pointer.

	* symtab.c (lookup_block_symbol): Search using the hash table when
	possible.
	(find_pc_sect_symtab): Use ALL_BLOCK_SYMBOLS.
	(search_symbols): Do not attempt to sort hashed blocks or function
	blocks.  Use ALL_BLOCK_SYMBOLS.

	* dstread.c (process_dst_block): Clear hashtable bit for new block.
	(read_dst_symtab): Likewise.
	* jv-lang.c (get_java_class_symtab): Likewise.
	* mdebugread.c: Include "gdb_assert.h".
	(shrink_block): Assert that the block being modified is not hashed.

	* coffread.c (patch_opaque_types): Use ALL_BLOCK_SYMBOLS.

	* symmisc.c (free_symtab_block): Walk the hash table when freeing
	symbols.
	(dump_symtab): Recognize hashed blocks.

	* printcmd.c (print_frame_args):  Assert that function blocks do not
	have hashed symbol tables.

Index: gdb/buildsym.c
===================================================================
RCS file: /cvs/src/src/gdb/buildsym.c,v
retrieving revision 1.12
diff -u -p -r1.12 buildsym.c
--- buildsym.c	2001/10/12 23:51:28	1.12
+++ buildsym.c	2001/10/30 15:41:17
@@ -39,6 +39,7 @@
 #include "language.h"		/* For "longest_local_hex_string_custom" */
 #include "bcache.h"
 #include "filenames.h"		/* For DOSish file names */
+#include "demangle.h"		/* Needed by SYMBOL_INIT_DEMANGLED_NAME */
 /* Ask buildsym.h to define the vars it normally declares `extern'.  */
 #define	EXTERN
 /**/
@@ -239,17 +240,45 @@ finish_block (struct symbol *symbol, str
       /* EMPTY */ ;
     }
 
-  block = (struct block *) obstack_alloc (&objfile->symbol_obstack,
-	    (sizeof (struct block) + ((i - 1) * sizeof (struct symbol *))));
-
   /* Copy the symbols into the block.  */
 
-  BLOCK_NSYMS (block) = i;
-  for (next = *listhead; next; next = next->next)
+  if (symbol)
+    {
+      block = (struct block *) 
+	obstack_alloc (&objfile->symbol_obstack,
+		       (sizeof (struct block) + 
+			((i - 1) * sizeof (struct symbol *))));
+      BLOCK_NSYMS (block) = i;
+      for (next = *listhead; next; next = next->next)
+	for (j = next->nsyms - 1; j >= 0; j--)
+	  {
+	    BLOCK_SYM (block, --i) = next->symbol[j];
+	  }
+    }
+  else
     {
-      for (j = next->nsyms - 1; j >= 0; j--)
+      block = (struct block *) 
+	obstack_alloc (&objfile->symbol_obstack,
+		       (sizeof (struct block) + 
+			((i / 5) * sizeof (struct symbol *))));
+      for (j = 0; j < ((i / 5) + 1); j++)
 	{
-	  BLOCK_SYM (block, --i) = next->symbol[j];
+	  BLOCK_BUCKET (block, j) = 0;
+	}
+      BLOCK_BUCKETS (block) = (i / 5) + 1;
+      for (next = *listhead; next; next = next->next)
+	{
+	  for (j = next->nsyms - 1; j >= 0; j--)
+	    {
+	      struct symbol *sym;
+	      unsigned int hash_index;
+	      SYMBOL_INIT_DEMANGLED_NAME (next->symbol[j], &objfile->symbol_obstack);
+	      hash_index = msymbol_hash_iw (SYMBOL_SOURCE_NAME (next->symbol[j]));
+	      hash_index = hash_index % BLOCK_BUCKETS (block);
+	      sym = BLOCK_BUCKET (block, hash_index);
+	      BLOCK_BUCKET (block, hash_index) = next->symbol[j];
+	      next->symbol[j]->hash_next = sym;
+	    }
 	}
     }
 
@@ -267,6 +296,7 @@ finish_block (struct symbol *symbol, str
       struct type *ftype = SYMBOL_TYPE (symbol);
       SYMBOL_BLOCK_VALUE (symbol) = block;
       BLOCK_FUNCTION (block) = symbol;
+      BLOCK_HASHTABLE (block) = 0;
 
       if (TYPE_NFIELDS (ftype) <= 0)
 	{
@@ -347,6 +377,7 @@ finish_block (struct symbol *symbol, str
   else
     {
       BLOCK_FUNCTION (block) = NULL;
+      BLOCK_HASHTABLE (block) = 1;
     }
 
   /* Now "free" the links of the list, and empty the list.  */
Index: gdb/coffread.c
===================================================================
RCS file: /cvs/src/src/gdb/coffread.c,v
retrieving revision 1.20
diff -u -p -r1.20 coffread.c
--- coffread.c	2001/09/20 03:03:39	1.20
+++ coffread.c	2001/10/30 15:41:17
@@ -1446,13 +1446,13 @@ patch_opaque_types (struct symtab *s)
 
   /* Go through the per-file symbols only */
   b = BLOCKVECTOR_BLOCK (BLOCKVECTOR (s), STATIC_BLOCK);
-  for (i = BLOCK_NSYMS (b) - 1; i >= 0; i--)
+  /* FIXMED: Originally all symbols, in reverse order, before they are sorted.  */
+  ALL_BLOCK_SYMBOLS (b, i, real_sym)
     {
       /* Find completed typedefs to use to fix opaque ones.
          Remove syms from the chain when their types are stored,
          but search the whole chain, as there may be several syms
          from different files with the same name.  */
-      real_sym = BLOCK_SYM (b, i);
       if (SYMBOL_CLASS (real_sym) == LOC_TYPEDEF &&
 	  SYMBOL_NAMESPACE (real_sym) == VAR_NAMESPACE &&
 	  TYPE_CODE (SYMBOL_TYPE (real_sym)) == TYPE_CODE_PTR &&
Index: gdb/dstread.c
===================================================================
RCS file: /cvs/src/src/gdb/dstread.c,v
retrieving revision 1.7
diff -u -p -r1.7 dstread.c
--- dstread.c	2001/03/06 08:21:06	1.7
+++ dstread.c	2001/10/30 15:41:17
@@ -1407,6 +1407,7 @@ process_dst_block (struct objfile *objfi
       symnum++;
     }
   BLOCK_NSYMS (block) = total_symbols;
+  BLOCK_HASHTABLE (block) = 0;
   BLOCK_START (block) = address;
   BLOCK_END (block) = address + size;
   BLOCK_SUPERBLOCK (block) = 0;
@@ -1491,6 +1492,7 @@ read_dst_symtab (struct objfile *objfile
 			     (total_globals - 1) *
 			   sizeof (struct symbol *));
 	  BLOCK_NSYMS (global_block) = total_globals;
+	  BLOCK_HASHTABLE (global_block) = 0;
 	  for (symnum = 0; symnum < total_globals; symnum++)
 	    {
 	      nextsym = dst_global_symbols->next;
Index: gdb/jv-lang.c
===================================================================
RCS file: /cvs/src/src/gdb/jv-lang.c,v
retrieving revision 1.9
diff -u -p -r1.9 jv-lang.c
--- jv-lang.c	2001/10/21 01:57:42	1.9
+++ jv-lang.c	2001/10/30 15:41:17
@@ -106,6 +106,7 @@ get_java_class_symtab (void)
       bl = (struct block *)
 	obstack_alloc (&objfile->symbol_obstack, sizeof (struct block));
       BLOCK_NSYMS (bl) = 0;
+      BLOCK_HASHTABLE (bl) = 0;
       BLOCK_START (bl) = 0;
       BLOCK_END (bl) = 0;
       BLOCK_FUNCTION (bl) = NULL;
Index: gdb/mdebugread.c
===================================================================
RCS file: /cvs/src/src/gdb/mdebugread.c,v
retrieving revision 1.17
diff -u -p -r1.17 mdebugread.c
--- mdebugread.c	2001/10/24 17:10:18	1.17
+++ mdebugread.c	2001/10/30 15:41:17
@@ -52,6 +52,7 @@
 #include "stabsread.h"
 #include "complaints.h"
 #include "demangle.h"
+#include "gdb_assert.h"
 
 /* These are needed if the tm.h file does not contain the necessary
    mips specific definitions.  */
@@ -4173,6 +4174,11 @@ shrink_block (struct block *b, struct sy
 				   (sizeof (struct block)
 				    + ((BLOCK_NSYMS (b) - 1)
 				       * sizeof (struct symbol *))));
+
+  /* FIXME: Not worth hashing this block as it's built.  */
+  /* All callers should have created the block with new_block (), which
+     would mean it was not previously hashed.  Make sure.  */
+  gdb_assert (BLOCK_HASHTABLE (new) == 0);
 
   /* Should chase pointers to old one.  Fortunately, that`s just
      the block`s function and inferior blocks */
Index: gdb/minsyms.c
===================================================================
RCS file: /cvs/src/src/gdb/minsyms.c,v
retrieving revision 1.18
diff -u -p -r1.18 minsyms.c
--- minsyms.c	2001/10/12 19:07:07	1.18
+++ minsyms.c	2001/10/30 15:41:17
@@ -101,7 +101,7 @@ msymbol_hash_iw (const char *string)
 	  ++string;
 	}
     }
-  return hash % MINIMAL_SYMBOL_HASH_SIZE;
+  return hash;
 }
 
 /* Compute a hash code for a string.  */
@@ -112,7 +112,7 @@ msymbol_hash (const char *string)
   unsigned int hash = 0;
   for (; *string; ++string)
     hash = hash * 67 + *string - 113;
-  return hash % MINIMAL_SYMBOL_HASH_SIZE;
+  return hash;
 }
 
 /* Add the minimal symbol SYM to an objfile's minsym hash table, TABLE.  */
@@ -122,7 +122,7 @@ add_minsym_to_hash_table (struct minimal
 {
   if (sym->hash_next == NULL)
     {
-      unsigned int hash = msymbol_hash (SYMBOL_NAME (sym));
+      unsigned int hash = msymbol_hash (SYMBOL_NAME (sym)) % MINIMAL_SYMBOL_HASH_SIZE;
       sym->hash_next = table[hash];
       table[hash] = sym;
     }
@@ -136,7 +136,7 @@ add_minsym_to_demangled_hash_table (stru
 {
   if (sym->demangled_hash_next == NULL)
     {
-      unsigned int hash = msymbol_hash_iw (SYMBOL_DEMANGLED_NAME (sym));
+      unsigned int hash = msymbol_hash_iw (SYMBOL_DEMANGLED_NAME (sym)) % MINIMAL_SYMBOL_HASH_SIZE;
       sym->demangled_hash_next = table[hash];
       table[hash] = sym;
     }
@@ -164,8 +164,8 @@ lookup_minimal_symbol (register const ch
   struct minimal_symbol *found_file_symbol = NULL;
   struct minimal_symbol *trampoline_symbol = NULL;
 
-  unsigned int hash = msymbol_hash (name);
-  unsigned int dem_hash = msymbol_hash_iw (name);
+  unsigned int hash = msymbol_hash (name) % MINIMAL_SYMBOL_HASH_SIZE;
+  unsigned int dem_hash = msymbol_hash_iw (name) % MINIMAL_SYMBOL_HASH_SIZE;
 
 #ifdef SOFUN_ADDRESS_MAYBE_MISSING
   if (sfile != NULL)
Index: gdb/printcmd.c
===================================================================
RCS file: /cvs/src/src/gdb/printcmd.c,v
retrieving revision 1.30
diff -u -p -r1.30 printcmd.c
--- printcmd.c	2001/10/16 01:58:07	1.30
+++ printcmd.c	2001/10/30 15:41:17
@@ -38,6 +38,7 @@
 #include "symfile.h"		/* for overlay functions */
 #include "objfiles.h"		/* ditto */
 #include "completer.h"		/* for completion functions */
+#include "gdb_assert.h"
 #ifdef UI_OUT
 #include "ui-out.h"
 #endif
@@ -1805,6 +1806,11 @@ print_frame_args (struct symbol *func, s
   if (func)
     {
       b = SYMBOL_BLOCK_VALUE (func);
+      /* Function blocks are order sensitive, and thus should not be
+	 hashed.  */
+      gdb_assert (BLOCK_HASHTABLE (b) == 0);
+      nsyms = BLOCK_NSYMS (b);
+
       ALL_BLOCK_SYMBOLS (b, i, sym)
         {
 	  QUIT;
Index: gdb/symmisc.c
===================================================================
RCS file: /cvs/src/src/gdb/symmisc.c,v
retrieving revision 1.6
diff -u -p -r1.6 symmisc.c
--- symmisc.c	2001/10/12 23:51:29	1.6
+++ symmisc.c	2001/10/30 15:41:17
@@ -86,11 +86,17 @@ static void
 free_symtab_block (struct objfile *objfile, struct block *b)
 {
   register int i, n;
-  n = BLOCK_NSYMS (b);
+  struct symbol *sym, *next_sym;
+
+  n = BLOCK_BUCKETS (b);
   for (i = 0; i < n; i++)
     {
-      mfree (objfile->md, SYMBOL_NAME (BLOCK_SYM (b, i)));
-      mfree (objfile->md, (PTR) BLOCK_SYM (b, i));
+      for (sym = BLOCK_BUCKET (b, i); sym; sym = next_sym)
+	{
+	  next_sym = sym->hash_next;
+	  mfree (objfile->md, SYMBOL_NAME (sym));
+	  mfree (objfile->md, (PTR) sym);
+	}
     }
   mfree (objfile->md, (PTR) b);
 }
@@ -455,8 +461,12 @@ dump_symtab (struct objfile *objfile, st
 	      fprintf_filtered (outfile, " under ");
 	      gdb_print_host_address (BLOCK_SUPERBLOCK (b), outfile);
 	    }
-	  blen = BLOCK_NSYMS (b);
-	  fprintf_filtered (outfile, ", %d syms in ", blen);
+	  /* FIXMED: Save total syms count even if hashed?  */
+	  blen = BLOCK_BUCKETS (b);
+	  if (BLOCK_HASHTABLE (b))
+	    fprintf_filtered (outfile, ", %d buckets in ", blen);
+	  else
+	    fprintf_filtered (outfile, ", %d syms in ", blen);
 	  print_address_numeric (BLOCK_START (b), 1, outfile);
 	  fprintf_filtered (outfile, "..");
 	  print_address_numeric (BLOCK_END (b), 1, outfile);
Index: gdb/symtab.c
===================================================================
RCS file: /cvs/src/src/gdb/symtab.c,v
retrieving revision 1.46
diff -u -p -r1.46 symtab.c
--- symtab.c	2001/10/17 07:12:57	1.46
+++ symtab.c	2001/10/30 15:41:17
@@ -1148,6 +1148,20 @@ lookup_block_symbol (register const stru
   register struct symbol *sym_found = NULL;
   register int do_linear_search = 1;
 
+  if (BLOCK_HASHTABLE (block))
+    {
+      unsigned int hash_index;
+      hash_index = msymbol_hash_iw (name);
+      hash_index = hash_index % BLOCK_BUCKETS (block);
+      for (sym = BLOCK_BUCKET (block, hash_index); sym; sym = sym->hash_next)
+	{
+	  if (SYMBOL_NAMESPACE (sym) == namespace 
+	      && SYMBOL_MATCHES_NAME (sym, name))
+	    return sym;
+	}
+      return NULL;
+    }
+
   /* If the blocks's symbols were sorted, start with a binary search.  */
 
   if (BLOCK_SHOULD_SORT (block))
@@ -1380,14 +1394,15 @@ find_pc_sect_symtab (CORE_ADDR pc, asect
 	if (section != 0)
 	  {
 	    int i;
+	    struct symbol *sym = NULL;
 
-	    for (i = 0; i < b->nsyms; i++)
+	    ALL_BLOCK_SYMBOLS (b, i, sym)
 	      {
-		fixup_symbol_section (b->sym[i], objfile);
-		if (section == SYMBOL_BFD_SECTION (b->sym[i]))
+		fixup_symbol_section (sym, objfile);
+		if (section == SYMBOL_BFD_SECTION (sym))
 		  break;
 	      }
-	    if (i >= b->nsyms)
+	    if ((i >= BLOCK_BUCKETS (b)) && (sym == NULL))
 	      continue;		/* no symbol in this symtab matches section */
 	  }
 	distance = BLOCK_END (b) - BLOCK_START (b);
@@ -2479,13 +2494,18 @@ search_symbols (char *regexp, namespace_
       for (i = GLOBAL_BLOCK; i <= STATIC_BLOCK; i++)
 	{
 	  b = BLOCKVECTOR_BLOCK (bv, i);
+	  /* FIXMED: The #if 0'd code seems *very* wrong. It'll sort blocks 
+	     that we specifically are supposed to avoid sorting.  */
+#if 0
+	  
 	  /* Skip the sort if this block is always sorted.  */
 	  if (!BLOCK_SHOULD_SORT (b))
 	    sort_block_syms (b);
-	  for (j = 0; j < BLOCK_NSYMS (b); j++)
+#endif
+	  ALL_BLOCK_SYMBOLS (b, j, sym)
 	    {
 	      QUIT;
-	      sym = BLOCK_SYM (b, j);
+	      /* FIXME: Hoist file_matches out of this loop.  */
 	      if (file_matches (s->filename, files, nfiles)
 		  && ((regexp == NULL || SYMBOL_MATCHES_REGEXP (sym))
 		      && ((kind == VARIABLES_NAMESPACE && SYMBOL_CLASS (sym) != LOC_TYPEDEF
@@ -2512,6 +2532,7 @@ search_symbols (char *regexp, namespace_
 		  tail = psr;
 		}
 	    }
+	  /* FIXMED: Sort the results?  */
 	}
     prev_bv = bv;
   }
Index: gdb/symtab.h
===================================================================
RCS file: /cvs/src/src/gdb/symtab.h,v
retrieving revision 1.25
diff -u -p -r1.25 symtab.h
--- symtab.h	2001/10/12 23:51:29	1.25
+++ symtab.h	2001/10/30 15:41:17
@@ -449,6 +449,10 @@ struct block
 
     unsigned char gcc_compile_flag;
 
+    /* If this is really a hashtable of the symbols, this flag is 1.  */
+
+    unsigned char hashtable;
+
     /* Number of local symbols.  */
 
     int nsyms;
@@ -461,26 +465,34 @@ struct block
 
 #define BLOCK_START(bl)		(bl)->startaddr
 #define BLOCK_END(bl)		(bl)->endaddr
-#define BLOCK_NSYMS(bl)		(bl)->nsyms
-#define BLOCK_SYM(bl, n)	(bl)->sym[n]
 #define BLOCK_FUNCTION(bl)	(bl)->function
 #define BLOCK_SUPERBLOCK(bl)	(bl)->superblock
 #define BLOCK_GCC_COMPILED(bl)	(bl)->gcc_compile_flag
+#define BLOCK_HASHTABLE(bl)     (bl)->hashtable
 
-/* Macro to loop through all symbols in a block BL.
-   i counts which symbol we are looking at, and sym points to the current
-   symbol.  */
-#define ALL_BLOCK_SYMBOLS(bl, i, sym)			\
-	for ((i) = 0, (sym) = BLOCK_SYM ((bl), (i));	\
-	     (i) < BLOCK_NSYMS ((bl));			\
-	     ++(i), (sym) = BLOCK_SYM ((bl), (i)))
+/* For BLOCK_HASHTABLE (bl) == 0 blocks only.  */
+#define BLOCK_NSYMS(bl)		(bl)->nsyms
+#define BLOCK_SYM(bl, n)	(bl)->sym[n]
 
+/* For BLOCK_HASHTABLE (bl) == 1 blocks, but these are valid
+   for non-hashed blocks as well - each symbol will appear to be
+   one bucket by itself.  */
+#define BLOCK_BUCKETS(bl)	(bl)->nsyms
+#define BLOCK_BUCKET(bl, n)	(bl)->sym[n]
+
+/* Macro to loop through all symbols in a block BL, in no particular order.
+   i counts which bucket we are in, and sym points to the current symbol.  */
+#define ALL_BLOCK_SYMBOLS(bl, i, sym)   \
+	for ((i) = 0; (i) < BLOCK_BUCKETS ((bl)); (i)++)        \
+	  for ((sym) = BLOCK_BUCKET ((bl), (i)); (sym);         \
+	       (sym) = (sym)->hash_next)
+
 /* Nonzero if symbols of block BL should be sorted alphabetically.
    Don't sort a block which corresponds to a function.  If we did the
    sorting would have to preserve the order of the symbols for the
    arguments.  */
 
-#define BLOCK_SHOULD_SORT(bl) ((bl)->nsyms >= 40 && BLOCK_FUNCTION (bl) == NULL)
+#define BLOCK_SHOULD_SORT(bl) (!BLOCK_HASHTABLE (bl) && BLOCK_FUNCTION (bl) == NULL)
 
 
 /* Represent one symbol name; a variable, constant, function or typedef.  */
@@ -730,6 +742,8 @@ struct symbol
     /* List of ranges where this symbol is active.  This is only
        used by alias symbols at the current time.  */
     struct range_list *ranges;
+
+    struct symbol *hash_next;
   };
 
 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]