From bae885367490e78c95719104b4f9bbbe9901930a Mon Sep 17 00:00:00 2001 From: Igor Tsimbalist Date: Mon, 17 Oct 2016 17:20:23 +0700 Subject: [PATCH] Speedup objdump in processing of source line information. The issue can show up on an object file with a lot of functions and a lot of functions are inlined. Each function has 1) a very big linked list of address ranges and 2) a code sequence has a very big linked list of lines associated with the code. When objdump needs to comment a line of code with source line information (-Sdl options) it has to traverse these big linked lists each time (time consuming functions are functions lookup_address_in_function_table and lookup_address_in_line_info_table). The issue is resolved by introducing a lookup tables for functions and its address ranges and a lookup table for line info. The lists traverse is replaced with a binary search in the lookup tables. The processing time reduced from hours to several minutes. bfd/ * dwarf2.c (comp_unit): Add new fields 'lookup_funcinfo_table' and 'number_of_functions' to keep lookup table and number of entries in the table. (line_sequence): Add new fields 'line_info_lookup' and 'num_lines' to keep lookup table and number of entries in the table. (lookup_funcinfo): New structure for lookup table for function references. (build_line_info_table): New function to create and build the lookup table for line information. (lookup_address_in_line_info_table): Use the lookup table instead of traverse a linked list. (compare_lookup_funcinfos): New compare fuction used in sorting of lookup table for function references. (build_lookup_funcinfo_table): New function to create, build and sort the lookup table for functions references. (lookup_address_in_function_table): Use the table instead of traverse a linked list. (_bfd_dwarf2_cleanup_debug_info): Free memory from function references lookup table. --- bfd/dwarf2.c | 286 +++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 249 insertions(+), 37 deletions(-) diff --git a/bfd/dwarf2.c b/bfd/dwarf2.c index 51f3de1..488d85d 100644 --- a/bfd/dwarf2.c +++ b/bfd/dwarf2.c @@ -256,6 +256,12 @@ struct comp_unit /* A list of the functions found in this comp. unit. */ struct funcinfo *function_table; + /* A table of function information references searchable by address. */ + struct lookup_funcinfo *lookup_funcinfo_table; + + /* Number of functions in the function_table and sorted_function_table. */ + bfd_size_type number_of_functions; + /* A list of the variables found in this comp. unit. */ struct varinfo *variable_table; @@ -1236,6 +1242,8 @@ struct line_sequence bfd_vma low_pc; struct line_sequence* prev_sequence; struct line_info* last_line; /* Largest VMA. */ + struct line_info** line_info_lookup; + bfd_size_type num_lines; }; struct line_info_table @@ -1278,6 +1286,20 @@ struct funcinfo asection *sec; }; +struct lookup_funcinfo +{ + /* Function information corresponding to this lookup table entry. */ + struct funcinfo *funcinfo; + + /* The lowest address for this specific function. */ + bfd_vma low_addr; + + /* The highest address of this function before the lookup table is sorted. + The highest address of all prior functions after the lookup table is sorted, + which is used for binary search. */ + bfd_vma high_addr; +}; + struct varinfo { /* Pointer to previous variable in list of all variables */ @@ -1578,6 +1600,49 @@ compare_sequences (const void* a, const void* b) return 0; } +/* Construct the line information table for quick lookup. */ + +static bfd_boolean +build_line_info_table (struct line_info_table* table, struct line_sequence *seq) +{ + bfd_size_type amt; + struct line_info** line_info_lookup; + struct line_info* each_line; + unsigned int num_lines; + unsigned int index; + + if (seq->line_info_lookup != NULL) + return TRUE; + + /* Count the number of line information entries. We could do this while + scanning the debug information, but some entries may be added via lcl_head + without having a sequence handy to increment the number of lines. */ + num_lines = 0; + for (each_line = seq->last_line; each_line; each_line = each_line->prev_line) + num_lines++; + + if (num_lines == 0) + return TRUE; + + /* Allocate space for the line information lookup table. */ + amt = sizeof (struct line_info*) * num_lines; + line_info_lookup = (struct line_info**) bfd_alloc (table->abfd, amt); + if (line_info_lookup == NULL) + return FALSE; + + /* Create the line information lookup table. */ + index = num_lines; + for (each_line = seq->last_line; each_line; each_line = each_line->prev_line) + line_info_lookup[--index] = each_line; + + BFD_ASSERT (index == 0); + + seq->num_lines = num_lines; + seq->line_info_lookup = line_info_lookup; + + return TRUE; +} + /* Sort the line sequences for quick lookup. */ static bfd_boolean @@ -1609,6 +1674,8 @@ sort_line_sequences (struct line_info_table* table) sequences[n].low_pc = seq->low_pc; sequences[n].prev_sequence = NULL; sequences[n].last_line = seq->last_line; + sequences[n].line_info_lookup = NULL; + sequences[n].num_lines = 0; seq = seq->prev_sequence; free (last_seq); } @@ -2089,7 +2156,7 @@ lookup_address_in_line_info_table (struct line_info_table *table, unsigned int *discriminator_ptr) { struct line_sequence *seq = NULL; - struct line_info *each_line; + struct line_info *info; int low, high, mid; /* Binary search the array of sequences. */ @@ -2107,26 +2174,43 @@ lookup_address_in_line_info_table (struct line_info_table *table, break; } - if (seq && addr >= seq->low_pc && addr < seq->last_line->address) + /* Check for a valid sequence. */ + if (!seq || addr < seq->low_pc || addr >= seq->last_line->address) + goto fail; + + if (!build_line_info_table (table, seq)) + goto fail; + + /* Binary search the array of line information. */ + low = 0; + high = seq->num_lines; + info = NULL; + while (low < high) { - /* Note: seq->last_line should be a descendingly sorted list. */ - for (each_line = seq->last_line; - each_line; - each_line = each_line->prev_line) - if (addr >= each_line->address) - break; + mid = (low + high) / 2; + info = seq->line_info_lookup[mid]; + if (addr < info->address) + high = mid; + else if (addr >= seq->line_info_lookup[mid + 1]->address) + low = mid + 1; + else + break; + } - if (each_line - && !(each_line->end_sequence || each_line == seq->last_line)) - { - *filename_ptr = each_line->filename; - *linenumber_ptr = each_line->line; - if (discriminator_ptr) - *discriminator_ptr = each_line->discriminator; - return seq->last_line->address - seq->low_pc; - } + /* Check for a valid line information entry. */ + if (info + && addr >= info->address + && addr < seq->line_info_lookup[mid + 1]->address + && !(info->end_sequence || info == seq->last_line)) + { + *filename_ptr = info->filename; + *linenumber_ptr = info->line; + if (discriminator_ptr) + *discriminator_ptr = info->discriminator; + return seq->last_line->address - seq->low_pc; } +fail: *filename_ptr = NULL; return 0; } @@ -2144,6 +2228,93 @@ read_debug_ranges (struct comp_unit *unit) /* Function table functions. */ +static int +compare_lookup_funcinfos (const void* a, const void* b) +{ + const struct lookup_funcinfo *lookup1 = a; + const struct lookup_funcinfo *lookup2 = b; + int result; + + if (lookup1->low_addr < lookup2->low_addr) + result = -1; + else if (lookup1->low_addr > lookup2->low_addr) + result = 1; + else if (lookup1->high_addr < lookup2->high_addr) + result = -1; + else if (lookup1->high_addr > lookup1->high_addr) + result = 1; + else + result = 0; + + return result; +} + +static bfd_boolean +build_lookup_funcinfo_table (struct comp_unit *unit) +{ + struct lookup_funcinfo *lookup_funcinfo_table = unit->lookup_funcinfo_table; + unsigned int number_of_functions = unit->number_of_functions; + struct funcinfo *each; + struct lookup_funcinfo *entry; + size_t index; + struct arange *range; + bfd_vma low_addr, high_addr; + + if (lookup_funcinfo_table || number_of_functions == 0) + return TRUE; + + /* Create the function info lookup table. */ + lookup_funcinfo_table = (struct lookup_funcinfo *) + bfd_malloc (number_of_functions * sizeof (struct lookup_funcinfo)); + if (lookup_funcinfo_table == NULL) + return FALSE; + + /* Populate the function info lookup table. */ + index = number_of_functions; + for (each = unit->function_table; each; each = each->prev_func) + { + entry = &lookup_funcinfo_table[--index]; + entry->funcinfo = each; + + /* Calculate the lowest and highest address for this function entry. */ + low_addr = entry->funcinfo->arange.low; + high_addr = entry->funcinfo->arange.high; + + for (range = entry->funcinfo->arange.next; range; range = range->next) + { + if (range->low < low_addr) + low_addr = range->low; + if (range->high > high_addr) + high_addr = range->high; + } + + entry->low_addr = low_addr; + entry->high_addr = high_addr; + } + + BFD_ASSERT (index == 0); + + /* Sort the function by address. */ + qsort (lookup_funcinfo_table, + number_of_functions, + sizeof (struct lookup_funcinfo), + compare_lookup_funcinfos); + + /* Calculate the high watermark for each function in the lookup table. */ + high_addr = lookup_funcinfo_table[0].high_addr; + for (index = 1; index < number_of_functions; index++) + { + entry = &lookup_funcinfo_table[index]; + if (entry->high_addr > high_addr) + high_addr = entry->high_addr; + else + entry->high_addr = high_addr; + } + + unit->lookup_funcinfo_table = lookup_funcinfo_table; + return TRUE; +} + /* If ADDR is within UNIT's function tables, set FUNCTION_PTR, and return TRUE. Note that we need to find the function that has the smallest range that contains ADDR, to handle inlined functions without depending upon @@ -2154,37 +2325,71 @@ lookup_address_in_function_table (struct comp_unit *unit, bfd_vma addr, struct funcinfo **function_ptr) { - struct funcinfo* each_func; + unsigned int number_of_functions = unit->number_of_functions; + struct lookup_funcinfo* lookup_funcinfo = NULL; + struct funcinfo* funcinfo = NULL; struct funcinfo* best_fit = NULL; bfd_vma best_fit_len = 0; + bfd_size_type low, high, mid, first; struct arange *arange; - for (each_func = unit->function_table; - each_func; - each_func = each_func->prev_func) + if (!build_lookup_funcinfo_table (unit)) + return FALSE; + + /* Find the first function in the lookup table which may contain the + specified address. */ + low = 0; + high = number_of_functions; + first = high; + while (low < high) { - for (arange = &each_func->arange; - arange; - arange = arange->next) + mid = (low + high) / 2; + lookup_funcinfo = &unit->lookup_funcinfo_table[mid]; + if (addr < lookup_funcinfo->low_addr) + high = mid; + else if (addr >= lookup_funcinfo->high_addr) + low = mid + 1; + else + high = first = mid; + } + + /* Find the 'best' match for the address. The prior algorithm defined the + best match as the function with the smallest address range containing + the specified address. This definition should probably be changed to the + innermost inline routine containing the address, but right now we want + to get the same results we did before. */ + while (first < number_of_functions) + { + if (addr < unit->lookup_funcinfo_table[first].low_addr) + break; + funcinfo = unit->lookup_funcinfo_table[first].funcinfo; + + for (arange = &funcinfo->arange; arange; arange = arange->next) { - if (addr >= arange->low && addr < arange->high) + if (addr < arange->low || addr >= arange->high) + continue; + + if (!best_fit + || arange->high - arange->low < best_fit_len + /* The following comparison is designed to return the same + match as the previous algorithm for routines which have the + same best fit length. */ + || (arange->high - arange->low == best_fit_len + && funcinfo > best_fit)) { - if (!best_fit - || arange->high - arange->low < best_fit_len) - { - best_fit = each_func; - best_fit_len = arange->high - arange->low; - } + best_fit = funcinfo; + best_fit_len = arange->high - arange->low; } } - } - if (best_fit) - { - *function_ptr = best_fit; - return TRUE; + first++; } - return FALSE; + + if (!best_fit) + return FALSE; + + *function_ptr = best_fit; + return TRUE; } /* If SYM at ADDR is within function table of UNIT, set FILENAME_PTR @@ -2513,6 +2718,7 @@ scan_unit_for_symbols (struct comp_unit *unit) func->tag = abbrev->tag; func->prev_func = unit->function_table; unit->function_table = func; + unit->number_of_functions++; BFD_ASSERT (!unit->cached); if (func->tag == DW_TAG_inlined_subroutine) @@ -4241,6 +4447,12 @@ _bfd_dwarf2_cleanup_debug_info (bfd *abfd, void **pinfo) function_table = function_table->prev_func; } + if (each->lookup_funcinfo_table) + { + free (each->lookup_funcinfo_table); + each->lookup_funcinfo_table = NULL; + } + while (variable_table) { if (variable_table->file) -- 1.8.3.1