This is the mail archive of the gdb-patches@sourceware.cygnus.com mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

[RFA] Reduce memory usage for dwarf2 dramatically, unify hashfunctions


This patch dramatically reduces memory usage when using DWARF2. It
should also make debugging with DWARF2 quicker because of what it
is doing. Before, we would allocate a new type every time we saw a type,
or close to it.
With this patch, we cache the last 4096 (it's controlled by a define, this
is a completely unscientific number, you could probably do with smaller,
but it won't make much difference, seeing as how it's 16k worth of
pointers) types we saw by the hash of their name, and use that, rather
than allocating a new type every time.
The results are dramatic.
Just on GDB, compiled with dwarf2, for instance, we go from 123,739 types
to 87,004, a 33% reduction. We also drop from needing 60 meg, with
everything read in (maint check symtabs was used to force readin), to 46
meg.

Since their are now really only "distinct" symbols, and "distinct" types,
rather than tons of duplicates, things should go a bit faster as well in
other lookups.

In doing all of this, i also changed the hash function in the bcache to a
much better hash function (the old one was stolen from SDBM, this one uses
what db 3.0.55 uses. The commentary in the db 3.0.55 source says it's just
as good on strings, and much better on numbers), made the hash function
available through bcache.h, and changed buildsym.c's hashname to use it as
well.

I was going to change the minsym hash to use the hash function as well,
but haven't finished regression tests on that change, so i left it out.

I'm just starting to clear out my backlog of patches, expect more memory
reductions for dwarf2 soon.
--Dan

Index: ChangeLog
===================================================================
RCS file: /cvs/src/src/gdb/ChangeLog,v
retrieving revision 1.420
diff -c -3 -p -w -B -b -r1.420 ChangeLog
*** ChangeLog	2000/05/30 08:52:57	1.420
--- ChangeLog	2000/05/31 00:22:24
***************
*** 1,3 ****
--- 1,16 ----
+ 2000-05-30  Daniel Berlin  <dan@cgsoftware.com>
+ 
+ 	* buildsym.c (hashname): Change to use hash function from bcache.c/.h
+ 
+ 	* bcache.c (hash): Change to newer hash function.
+ 
+ 	* bcache.h (hash): Prototype for hash function
+ 
+ 	* dwarf2read.c (TYPE_HASH_SIZE): New define for controlling size
+ 	of type hash.
+ 	(dwarf2_cached_types): New variable that is the cached types.
+ 	(tag_type_to_type): Do the actual caching of types here.
+ 
  Tue May 30 18:48:33 2000  Andrew Cagney  <cagney@b1.cygnus.com>
  
  	* TODO: Re-organize.
Index: buildsym.c
===================================================================
RCS file: /cvs/src/src/gdb/buildsym.c,v
retrieving revision 1.4
diff -c -3 -p -w -B -b -r1.4 buildsym.c
*** buildsym.c	2000/02/21 03:04:19	1.4
--- buildsym.c	2000/05/31 00:22:25
***************
*** 36,42 ****
  #include "gdb_string.h"
  #include "expression.h"		/* For "enum exp_opcode" used by... */
  #include "language.h"		/* For "longest_local_hex_string_custom" */
! 
  /* Ask buildsym.h to define the vars it normally declares `extern'.  */
  #define	EXTERN
  /**/
--- 36,42 ----
  #include "gdb_string.h"
  #include "expression.h"		/* For "enum exp_opcode" used by... */
  #include "language.h"		/* For "longest_local_hex_string_custom" */
! #include "bcache.h"
  /* Ask buildsym.h to define the vars it normally declares `extern'.  */
  #define	EXTERN
  /**/
*************** push_context (int desc, CORE_ADDR valu)
*** 1059,1087 ****
  
  int
  hashname (char *name)
- {
-   register char *p = name;
-   register int total = p[0];
-   register int c;
- 
-   c = p[1];
-   total += c << 2;
-   if (c)
-     {
-       c = p[2];
-       total += c << 4;
-       if (c)
  	{
! 	  total += p[3] << 6;
! 	}
!     }
! 
!   /* Ensure result is positive.  */
!   if (total < 0)
!     {
!       total += (1000 << 6);
!     }
!   return (total % HASHSIZE);
  }
  
  
--- 1060,1067 ----
  
  int
  hashname (char *name)
  {
!     return (hash(name,strlen(name)) % HASHSIZE);
  }
  
  
Index: bcache.c
===================================================================
RCS file: /cvs/src/src/gdb/bcache.c,v
retrieving revision 1.3
diff -c -3 -p -w -B -b -r1.3 bcache.c
*** bcache.c	2000/04/19 07:08:35	1.3
--- bcache.c	2000/05/31 00:22:26
***************
*** 28,69 ****
  #include "bcache.h"
  #include "gdb_string.h"		/* For memcpy declaration */
  
  
- 
- /* The hash function.  */
- 
  unsigned long
  hash (void *addr, int length)
- {
-   /* If it's a short string, hash on every character.  Otherwise, sample
-      characters from throughout the string.  */
-   if (length <= 64)
-     {
-       char *byte = addr;
-       unsigned long h = 0;
-       int i;
- 
-       for (i = 0; i < length; i++)
- 	h = h * 65793 ^ (h >> (sizeof (h) * 8 - 6)) ^ byte[i];
- 
-       return h;
-     }
-   else
      {
!       char *byte = addr;
!       int n, i;
!       unsigned long h = 0;
  
!       for (n = i = 0; n < 64; n++)
  	{
! 	  h = h * 65793 + (h >> (sizeof (h) * 8 - 6)) + byte[i];
! 	  i = h % length;
  	}
! 
!       return h;
      }
- }
- 
  
  /* Growing the bcache's hash table.  */
  
--- 28,52 ----
  #include "bcache.h"
  #include "gdb_string.h"		/* For memcpy declaration */
  
+ /* The old hash function was stolen from SDBM. This is what DB 3.0 uses now,
+  * and is better than the old one. 
+  */
  
  unsigned long
  hash(void *addr, int length)
  {
! 		const unsigned char *k, *e;
! 		unsigned long h;
  		
! 		k = (const unsigned char *)addr;
! 		e = k+length;
! 		for (h=0; k< e;++k)
  		{
! 				h *=16777619;
! 				h ^= *k;
  		}
! 		return (h);
  }
  
  /* Growing the bcache's hash table.  */
  
Index: bcache.h
===================================================================
RCS file: /cvs/src/src/gdb/bcache.h,v
retrieving revision 1.2
diff -c -3 -p -w -B -b -r1.2 bcache.h
*** bcache.h	2000/02/08 04:39:01	1.2
--- bcache.h	2000/05/31 00:22:26
*************** extern void free_bcache (struct bcache *
*** 125,129 ****
     kind of data BCACHE holds.  Statistics are printed using
     `printf_filtered' and its ilk.  */
  extern void print_bcache_statistics (struct bcache *bcache, char *type);
! 
  #endif /* BCACHE_H */
--- 125,130 ----
     kind of data BCACHE holds.  Statistics are printed using
     `printf_filtered' and its ilk.  */
  extern void print_bcache_statistics (struct bcache *bcache, char *type);
! /* The hash function */
! extern unsigned long hash(void *addr, int length);
  #endif /* BCACHE_H */
Index: dwarf2read.c
===================================================================
RCS file: /cvs/src/src/gdb/dwarf2read.c,v
retrieving revision 1.8
diff -c -3 -p -w -B -b -r1.8 dwarf2read.c
*** dwarf2read.c	2000/05/28 01:12:26	1.8
--- dwarf2read.c	2000/05/31 00:22:31
***************
*** 35,43 ****
  #include "buildsym.h"
  #include "demangle.h"
  #include "expression.h"
  #include "language.h"
  #include "complaints.h"
! 
  #include <fcntl.h>
  #include "gdb_string.h"
  #include <sys/types.h>
--- 35,44 ----
  #include "buildsym.h"
  #include "demangle.h"
  #include "expression.h"
+ 
  #include "language.h"
  #include "complaints.h"
! #include "bcache.h"
  #include <fcntl.h>
  #include "gdb_string.h"
  #include <sys/types.h>
*************** static struct abbrev_info *dwarf2_abbrev
*** 267,272 ****
--- 268,278 ----
  
  static struct die_info *die_ref_table[REF_HASH_SIZE];
  
+ #ifndef TYPE_HASH_SIZE
+ #define TYPE_HASH_SIZE 4096
+ #endif
+ static struct type *dwarf2_cached_types[TYPE_HASH_SIZE];
+ 
  /* Obstack for allocating temporary storage used during symbol reading.  */
  static struct obstack dwarf2_tmp_obstack;
  
*************** tag_type_to_type (die, objfile)
*** 4449,4455 ****
--- 4455,4492 ----
      }
    else
      {
+       struct attribute *attr;
+       attr = dwarf_attr (die, DW_AT_name);
+       if (attr && DW_STRING (attr))
+ 	{
+ 	  char *attrname=DW_STRING (attr);
+ 	  unsigned long hashval=hash(attrname, strlen(attrname)) % TYPE_HASH_SIZE;
+ 
+ 	  if (dwarf2_cached_types[hashval] != NULL)
+ 	    {
+ 	      const char *nameoftype;
+ 	      nameoftype = TYPE_NAME(dwarf2_cached_types[hashval]) == NULL ? TYPE_TAG_NAME(dwarf2_cached_types[hashval]) : TYPE_NAME(dwarf2_cached_types[hashval]);
+ 	      if (strcmp(attrname, nameoftype) == 0)
+ 		{
+ 		  die->type=dwarf2_cached_types[hashval];
+ 		}
+ 	      else
+ 		{
  		  read_type_die (die, objfile);
+ 		  dwarf2_cached_types[hashval] = die->type;
+ 		}
+ 	    }
+ 	  else
+ 	    {
+ 	      read_type_die (die, objfile);
+ 	      dwarf2_cached_types[hashval] = die->type;
+ 	    }
+ 	}
+       else
+ 	{
+ 	  read_type_die (die, objfile);
+ 	}
+ 
        if (!die->type)
  	{
  	  dump_die (die);

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]