This is the mail archive of the
gdb@sourceware.org
mailing list for the GDB project.
Re: Note on choosing string hash functions
On 11/28/2017 03:00 PM, Pedro Alves wrote:
Anyway, this is just a brain dump from a little investigation I did this past
weekend, and I think that this area has a lot of scope for improvements, but
I won't be able to follow up on any of this myself in the next following
weeks at least, with several gdb 8.1 issues on my platе.
Just for the record, I have a brain dump around this area too :-). Instead of
optimizing htab_hash_string itself, we can try to reduce number of calls (and
drop a few calls to strcmp as well if lucky):
---
gdb/symtab.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/gdb/symtab.c b/gdb/symtab.c
index ebb7fbe1e0..c9dab4d575 100644
--- a/gdb/symtab.c
+++ b/gdb/symtab.c
@@ -673,6 +673,7 @@ symbol_set_language (struct general_symbol_info *gsymbol,
struct demangled_name_entry
{
const char *mangled;
+ hashval_t hashval;
char demangled[1];
};
@@ -684,7 +685,7 @@ hash_demangled_name_entry (const void *data)
const struct demangled_name_entry *e
= (const struct demangled_name_entry *) data;
- return htab_hash_string (e->mangled);
+ return e->hashval;
}
/* Equality function for the demangled name hash. */
@@ -697,7 +698,8 @@ eq_demangled_name_entry (const void *a, const void *b)
const struct demangled_name_entry *db
= (const struct demangled_name_entry *) b;
- return strcmp (da->mangled, db->mangled) == 0;
+ return (da->hashval != db->hashval) ? 0
+ : (strcmp (da->mangled, db->mangled) == 0);
}
/* Create the hash table used for demangled names. Each hash entry is
@@ -816,6 +818,8 @@ symbol_set_names (struct general_symbol_info *gsymbol,
linkage_name_copy = linkage_name;
entry.mangled = linkage_name_copy;
+ entry.hashval = htab_hash_string (linkage_name_copy);
+
slot = ((struct demangled_name_entry **)
htab_find_slot (per_bfd->demangled_names_hash,
&entry, INSERT));
@@ -848,6 +852,7 @@ symbol_set_names (struct general_symbol_info *gsymbol,
offsetof (struct demangled_name_entry, demangled)
+ demangled_len + 1));
(*slot)->mangled = linkage_name;
+ (*slot)->hashval = entry.hashval;
}
else
{
@@ -864,6 +869,7 @@ symbol_set_names (struct general_symbol_info *gsymbol,
mangled_ptr = &((*slot)->demangled[demangled_len + 1]);
strcpy (mangled_ptr, linkage_name_copy);
(*slot)->mangled = mangled_ptr;
+ (*slot)->hashval = entry.hashval;
}
if (demangled_name != NULL)
--
This immediately moves htab_hash_string down in reports, from:
9.67% gdb gdb [.] find_pc_sect_psymtab
6.62% gdb gdb [.] bcache_full
5.41% gdb libc-2.25.so [.] tolower
4.44% gdb gdb [.] htab_hash_string ; 4th place
4.14% gdb gdb [.] htab_find_slot_with_hash
3.86% gdb gdb [.] load_partial_dies
3.56% gdb gdb [.] strcmp_iw_ordered
3.48% gdb gdb [.] read_indirect_string_at_offset_from
3.26% gdb gdb [.] lookup_minimal_symbol_by_pc_name
3.12% gdb gdb [.] cpname_parse
2.77% gdb gdb [.] read_attribute_value
2.72% gdb gdb [.] cpname_lex
2.71% gdb libc-2.25.so [.] __strcmp_sse2_unaligned
2.49% gdb gdb [.] peek_die_abbrev
2.30% gdb libc-2.25.so [.] _int_malloc
2.16% gdb gdb [.] d_print_comp_inner
to:
9.78% gdb gdb [.] find_pc_sect_psymtab
6.63% gdb gdb [.] bcache_full
5.00% gdb libc-2.25.so [.] tolower
4.20% gdb gdb [.] htab_find_slot_with_hash
3.79% gdb gdb [.] load_partial_dies
3.68% gdb gdb [.] lookup_minimal_symbol_by_pc_name
3.54% gdb gdb [.] read_indirect_string_at_offset_from
3.47% gdb gdb [.] strcmp_iw_ordered
3.39% gdb gdb [.] cpname_parse
2.71% gdb gdb [.] read_attribute_value
2.53% gdb gdb [.] cpname_lex
2.46% gdb gdb [.] peek_die_abbrev
2.43% gdb gdb [.] d_print_comp_inner
2.43% gdb libc-2.25.so [.] _int_malloc
2.38% gdb gdb [.] htab_hash_string ; 15th place
at the cost of having per_bfd->storage_obstack ~3.7% larger for libxul.so with ~2.3M symbols.
Dmitry