This is the mail archive of the gdb@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Note on choosing string hash functions


On 11/28/2017 03:00 PM, Pedro Alves wrote:

Anyway, this is just a brain dump from a little investigation I did this past
weekend, and I think that this area has a lot of scope for improvements, but
I won't be able to follow up on any of this myself in the next following
weeks at least, with several gdb 8.1 issues on my platе.

Just for the record, I have a brain dump around this area too :-). Instead of
optimizing htab_hash_string itself, we can try to reduce number of calls (and
drop a few calls to strcmp as well if lucky):

---
 gdb/symtab.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/gdb/symtab.c b/gdb/symtab.c
index ebb7fbe1e0..c9dab4d575 100644
--- a/gdb/symtab.c
+++ b/gdb/symtab.c
@@ -673,6 +673,7 @@ symbol_set_language (struct general_symbol_info *gsymbol,
 struct demangled_name_entry
 {
   const char *mangled;
+  hashval_t hashval;
   char demangled[1];
 };

@@ -684,7 +685,7 @@ hash_demangled_name_entry (const void *data)
   const struct demangled_name_entry *e
     = (const struct demangled_name_entry *) data;

-  return htab_hash_string (e->mangled);
+  return e->hashval;
 }

 /* Equality function for the demangled name hash.  */
@@ -697,7 +698,8 @@ eq_demangled_name_entry (const void *a, const void *b)
   const struct demangled_name_entry *db
     = (const struct demangled_name_entry *) b;

-  return strcmp (da->mangled, db->mangled) == 0;
+  return (da->hashval != db->hashval) ? 0
+    : (strcmp (da->mangled, db->mangled) == 0);
 }

 /* Create the hash table used for demangled names.  Each hash entry is
@@ -816,6 +818,8 @@ symbol_set_names (struct general_symbol_info *gsymbol,
     linkage_name_copy = linkage_name;

   entry.mangled = linkage_name_copy;
+  entry.hashval = htab_hash_string (linkage_name_copy);
+
   slot = ((struct demangled_name_entry **)
          htab_find_slot (per_bfd->demangled_names_hash,
                          &entry, INSERT));
@@ -848,6 +852,7 @@ symbol_set_names (struct general_symbol_info *gsymbol,
                              offsetof (struct demangled_name_entry, demangled)
                              + demangled_len + 1));
          (*slot)->mangled = linkage_name;
+         (*slot)->hashval = entry.hashval;
        }
       else
        {
@@ -864,6 +869,7 @@ symbol_set_names (struct general_symbol_info *gsymbol,
          mangled_ptr = &((*slot)->demangled[demangled_len + 1]);
          strcpy (mangled_ptr, linkage_name_copy);
          (*slot)->mangled = mangled_ptr;
+         (*slot)->hashval = entry.hashval;
        }

       if (demangled_name != NULL)
--

This immediately moves htab_hash_string down in reports, from:

     9.67%  gdb      gdb                       [.] find_pc_sect_psymtab
     6.62%  gdb      gdb                       [.] bcache_full
     5.41%  gdb      libc-2.25.so              [.] tolower
     4.44%  gdb      gdb                       [.] htab_hash_string                    ; 4th place
     4.14%  gdb      gdb                       [.] htab_find_slot_with_hash
     3.86%  gdb      gdb                       [.] load_partial_dies
     3.56%  gdb      gdb                       [.] strcmp_iw_ordered
     3.48%  gdb      gdb                       [.] read_indirect_string_at_offset_from
     3.26%  gdb      gdb                       [.] lookup_minimal_symbol_by_pc_name
     3.12%  gdb      gdb                       [.] cpname_parse
     2.77%  gdb      gdb                       [.] read_attribute_value
     2.72%  gdb      gdb                       [.] cpname_lex
     2.71%  gdb      libc-2.25.so              [.] __strcmp_sse2_unaligned
     2.49%  gdb      gdb                       [.] peek_die_abbrev
     2.30%  gdb      libc-2.25.so              [.] _int_malloc
     2.16%  gdb      gdb                       [.] d_print_comp_inner

to:

     9.78%  gdb      gdb                       [.] find_pc_sect_psymtab
     6.63%  gdb      gdb                       [.] bcache_full
     5.00%  gdb      libc-2.25.so              [.] tolower
     4.20%  gdb      gdb                       [.] htab_find_slot_with_hash
     3.79%  gdb      gdb                       [.] load_partial_dies
     3.68%  gdb      gdb                       [.] lookup_minimal_symbol_by_pc_name
     3.54%  gdb      gdb                       [.] read_indirect_string_at_offset_from
     3.47%  gdb      gdb                       [.] strcmp_iw_ordered
     3.39%  gdb      gdb                       [.] cpname_parse
     2.71%  gdb      gdb                       [.] read_attribute_value
     2.53%  gdb      gdb                       [.] cpname_lex
     2.46%  gdb      gdb                       [.] peek_die_abbrev
     2.43%  gdb      gdb                       [.] d_print_comp_inner
     2.43%  gdb      libc-2.25.so              [.] _int_malloc
     2.38%  gdb      gdb                       [.] htab_hash_string                    ; 15th place

at the cost of having per_bfd->storage_obstack ~3.7% larger for libxul.so with ~2.3M symbols.

Dmitry


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]