This is the mail archive of the
gdb-patches@sourceware.cygnus.com
mailing list for the GDB project.
[RFA] Reduce memory usage for dwarf2 dramatically, unify hashfunctions
- To: gdb-patches at sourceware dot cygnus dot com
- Subject: [RFA] Reduce memory usage for dwarf2 dramatically, unify hashfunctions
- From: Daniel Berlin <dan at cgsoftware dot com>
- Date: Tue, 30 May 2000 17:23:42 -0700 (PDT)
This patch dramatically reduces memory usage when using DWARF2. It
should also make debugging with DWARF2 quicker because of what it
is doing. Before, we would allocate a new type every time we saw a type,
or close to it.
With this patch, we cache the last 4096 (it's controlled by a define, this
is a completely unscientific number, you could probably do with smaller,
but it won't make much difference, seeing as how it's 16k worth of
pointers) types we saw by the hash of their name, and use that, rather
than allocating a new type every time.
The results are dramatic.
Just on GDB, compiled with dwarf2, for instance, we go from 123,739 types
to 87,004, a 33% reduction. We also drop from needing 60 meg, with
everything read in (maint check symtabs was used to force readin), to 46
meg.
Since their are now really only "distinct" symbols, and "distinct" types,
rather than tons of duplicates, things should go a bit faster as well in
other lookups.
In doing all of this, i also changed the hash function in the bcache to a
much better hash function (the old one was stolen from SDBM, this one uses
what db 3.0.55 uses. The commentary in the db 3.0.55 source says it's just
as good on strings, and much better on numbers), made the hash function
available through bcache.h, and changed buildsym.c's hashname to use it as
well.
I was going to change the minsym hash to use the hash function as well,
but haven't finished regression tests on that change, so i left it out.
I'm just starting to clear out my backlog of patches, expect more memory
reductions for dwarf2 soon.
--Dan
Index: ChangeLog
===================================================================
RCS file: /cvs/src/src/gdb/ChangeLog,v
retrieving revision 1.420
diff -c -3 -p -w -B -b -r1.420 ChangeLog
*** ChangeLog 2000/05/30 08:52:57 1.420
--- ChangeLog 2000/05/31 00:22:24
***************
*** 1,3 ****
--- 1,16 ----
+ 2000-05-30 Daniel Berlin <dan@cgsoftware.com>
+
+ * buildsym.c (hashname): Change to use hash function from bcache.c/.h
+
+ * bcache.c (hash): Change to newer hash function.
+
+ * bcache.h (hash): Prototype for hash function
+
+ * dwarf2read.c (TYPE_HASH_SIZE): New define for controlling size
+ of type hash.
+ (dwarf2_cached_types): New variable that is the cached types.
+ (tag_type_to_type): Do the actual caching of types here.
+
Tue May 30 18:48:33 2000 Andrew Cagney <cagney@b1.cygnus.com>
* TODO: Re-organize.
Index: buildsym.c
===================================================================
RCS file: /cvs/src/src/gdb/buildsym.c,v
retrieving revision 1.4
diff -c -3 -p -w -B -b -r1.4 buildsym.c
*** buildsym.c 2000/02/21 03:04:19 1.4
--- buildsym.c 2000/05/31 00:22:25
***************
*** 36,42 ****
#include "gdb_string.h"
#include "expression.h" /* For "enum exp_opcode" used by... */
#include "language.h" /* For "longest_local_hex_string_custom" */
!
/* Ask buildsym.h to define the vars it normally declares `extern'. */
#define EXTERN
/**/
--- 36,42 ----
#include "gdb_string.h"
#include "expression.h" /* For "enum exp_opcode" used by... */
#include "language.h" /* For "longest_local_hex_string_custom" */
! #include "bcache.h"
/* Ask buildsym.h to define the vars it normally declares `extern'. */
#define EXTERN
/**/
*************** push_context (int desc, CORE_ADDR valu)
*** 1059,1087 ****
int
hashname (char *name)
- {
- register char *p = name;
- register int total = p[0];
- register int c;
-
- c = p[1];
- total += c << 2;
- if (c)
- {
- c = p[2];
- total += c << 4;
- if (c)
{
! total += p[3] << 6;
! }
! }
!
! /* Ensure result is positive. */
! if (total < 0)
! {
! total += (1000 << 6);
! }
! return (total % HASHSIZE);
}
--- 1060,1067 ----
int
hashname (char *name)
{
! return (hash(name,strlen(name)) % HASHSIZE);
}
Index: bcache.c
===================================================================
RCS file: /cvs/src/src/gdb/bcache.c,v
retrieving revision 1.3
diff -c -3 -p -w -B -b -r1.3 bcache.c
*** bcache.c 2000/04/19 07:08:35 1.3
--- bcache.c 2000/05/31 00:22:26
***************
*** 28,69 ****
#include "bcache.h"
#include "gdb_string.h" /* For memcpy declaration */
-
- /* The hash function. */
-
unsigned long
hash (void *addr, int length)
- {
- /* If it's a short string, hash on every character. Otherwise, sample
- characters from throughout the string. */
- if (length <= 64)
- {
- char *byte = addr;
- unsigned long h = 0;
- int i;
-
- for (i = 0; i < length; i++)
- h = h * 65793 ^ (h >> (sizeof (h) * 8 - 6)) ^ byte[i];
-
- return h;
- }
- else
{
! char *byte = addr;
! int n, i;
! unsigned long h = 0;
! for (n = i = 0; n < 64; n++)
{
! h = h * 65793 + (h >> (sizeof (h) * 8 - 6)) + byte[i];
! i = h % length;
}
!
! return h;
}
- }
-
/* Growing the bcache's hash table. */
--- 28,52 ----
#include "bcache.h"
#include "gdb_string.h" /* For memcpy declaration */
+ /* The old hash function was stolen from SDBM. This is what DB 3.0 uses now,
+ * and is better than the old one.
+ */
unsigned long
hash(void *addr, int length)
{
! const unsigned char *k, *e;
! unsigned long h;
! k = (const unsigned char *)addr;
! e = k+length;
! for (h=0; k< e;++k)
{
! h *=16777619;
! h ^= *k;
}
! return (h);
}
/* Growing the bcache's hash table. */
Index: bcache.h
===================================================================
RCS file: /cvs/src/src/gdb/bcache.h,v
retrieving revision 1.2
diff -c -3 -p -w -B -b -r1.2 bcache.h
*** bcache.h 2000/02/08 04:39:01 1.2
--- bcache.h 2000/05/31 00:22:26
*************** extern void free_bcache (struct bcache *
*** 125,129 ****
kind of data BCACHE holds. Statistics are printed using
`printf_filtered' and its ilk. */
extern void print_bcache_statistics (struct bcache *bcache, char *type);
!
#endif /* BCACHE_H */
--- 125,130 ----
kind of data BCACHE holds. Statistics are printed using
`printf_filtered' and its ilk. */
extern void print_bcache_statistics (struct bcache *bcache, char *type);
! /* The hash function */
! extern unsigned long hash(void *addr, int length);
#endif /* BCACHE_H */
Index: dwarf2read.c
===================================================================
RCS file: /cvs/src/src/gdb/dwarf2read.c,v
retrieving revision 1.8
diff -c -3 -p -w -B -b -r1.8 dwarf2read.c
*** dwarf2read.c 2000/05/28 01:12:26 1.8
--- dwarf2read.c 2000/05/31 00:22:31
***************
*** 35,43 ****
#include "buildsym.h"
#include "demangle.h"
#include "expression.h"
#include "language.h"
#include "complaints.h"
!
#include <fcntl.h>
#include "gdb_string.h"
#include <sys/types.h>
--- 35,44 ----
#include "buildsym.h"
#include "demangle.h"
#include "expression.h"
+
#include "language.h"
#include "complaints.h"
! #include "bcache.h"
#include <fcntl.h>
#include "gdb_string.h"
#include <sys/types.h>
*************** static struct abbrev_info *dwarf2_abbrev
*** 267,272 ****
--- 268,278 ----
static struct die_info *die_ref_table[REF_HASH_SIZE];
+ #ifndef TYPE_HASH_SIZE
+ #define TYPE_HASH_SIZE 4096
+ #endif
+ static struct type *dwarf2_cached_types[TYPE_HASH_SIZE];
+
/* Obstack for allocating temporary storage used during symbol reading. */
static struct obstack dwarf2_tmp_obstack;
*************** tag_type_to_type (die, objfile)
*** 4449,4455 ****
--- 4455,4492 ----
}
else
{
+ struct attribute *attr;
+ attr = dwarf_attr (die, DW_AT_name);
+ if (attr && DW_STRING (attr))
+ {
+ char *attrname=DW_STRING (attr);
+ unsigned long hashval=hash(attrname, strlen(attrname)) % TYPE_HASH_SIZE;
+
+ if (dwarf2_cached_types[hashval] != NULL)
+ {
+ const char *nameoftype;
+ nameoftype = TYPE_NAME(dwarf2_cached_types[hashval]) == NULL ? TYPE_TAG_NAME(dwarf2_cached_types[hashval]) : TYPE_NAME(dwarf2_cached_types[hashval]);
+ if (strcmp(attrname, nameoftype) == 0)
+ {
+ die->type=dwarf2_cached_types[hashval];
+ }
+ else
+ {
read_type_die (die, objfile);
+ dwarf2_cached_types[hashval] = die->type;
+ }
+ }
+ else
+ {
+ read_type_die (die, objfile);
+ dwarf2_cached_types[hashval] = die->type;
+ }
+ }
+ else
+ {
+ read_type_die (die, objfile);
+ }
+
if (!die->type)
{
dump_die (die);