This is the mail archive of the elfutils-devel@sourceware.org mailing list for the elfutils project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
Hi, I'm poking into .debug_loc. Currently the validation is done only on portions referenced from .debug_info. I rewrote the code to do it the way it's done in other cases: .debug_info references are remembered, and used to cross-check connectivity later, during sequential scan of .debug_loc. But when validating "holes", i.e. portions of .debug_loc that are not referenced from .debug_info, I'm getting a lot of weird errors: negative and empty ranges, invalid location expressions (opcode 0 is a norm). It looks like the unreferenced portions are filled with garbage, and that the section as a whole is not supposed to be scanned sequentially. But if it's garbage, it screws the stream of location lists surprisingly seldom. One example that I've found is /usr/lib/debug/usr/lib64/openoffice.org/program/deployment680lx.uno.so.debug, where the stream actually ends up screwed royally after offset 70dd, and the file ends up with dozens of unresolved location list references. readelf -wloc shows implausible location expressions after 70dd, too. Many debuginfo files from openoffice.org suite show similar problems. So... am I missing some arcane insider info (or just misreading the spec), or are these actual errors in our DWARF files? In the mean time, I switched back to the old way of validation, but I'm attaching the patch (formed against 8aef81) in case anyone wants to see on their own eyes. PM diff --git a/src/dwarflint.c b/src/dwarflint.c index dc71805..5060994 100644 --- a/src/dwarflint.c +++ b/src/dwarflint.c @@ -170,8 +170,8 @@ enum message_category mc_pubtables = 0x100000, // table of public names/types mc_pubtypes = 0x200000, // .debug_pubtypes presence mc_loc = 0x400000, // messages related to .debug_loc - mc_ranges = 0x800000, // messages related to .debug_ranges - mc_other = 0x1000000, // messages unrelated to any of the above + mc_ranges = 0x1000000, // messages related to .debug_ranges + mc_other = 0x2000000, // messages unrelated to any of the above mc_all = 0xffffff00, // all areas }; @@ -190,7 +190,7 @@ accept_message (struct message_criteria *crit, enum message_category cat) } static struct message_criteria warning_criteria - = {mc_all & ~(mc_strings | mc_loc), + = {mc_all & ~(mc_strings), mc_pubtypes}; static struct message_criteria error_criteria = {mc_impact_4 | mc_error, @@ -321,7 +321,7 @@ main (int argc, char *argv[]) warning_criteria.reject |= mc_acc_bloat | mc_pubtypes; if (be_strict) { - warning_criteria.accept |= mc_strings | mc_loc; + warning_criteria.accept |= mc_strings; if (!be_gnu) warning_criteria.reject &= ~mc_pubtypes; } @@ -586,7 +586,6 @@ struct hole_info static void coverage_init (struct coverage *ar, uint64_t size); static void coverage_add (struct coverage *ar, uint64_t begin, uint64_t end); -static bool coverage_is_covered (struct coverage *ar, uint64_t address); static void coverage_find_holes (struct coverage *ar, void (*cb)(uint64_t begin, uint64_t end, void *data), void *data); @@ -604,6 +603,7 @@ struct cu uint64_t base; // DW_AT_low_pc value of CU DIE, 0 if not present. struct addr_record die_addrs; // Addresses where DIEs begin in this CU. struct ref_record die_refs; // DIE references into other CUs from this CU. + struct ref_record loc_refs; // DIE references into .debug_loc struct where where; // Where was this section defined. bool has_arange; // Whether we saw arange section pointing to this CU. bool has_pubnames; // Likewise for pubnames. @@ -619,24 +619,23 @@ static struct cu *cu_find_cu (struct cu *cu_chain, uint64_t offset); static struct cu *check_debug_info_structural (struct read_ctx *ctx, struct abbrev_table *abbrev_chain, - Elf_Data *strings, - Elf_Data *loc); + Elf_Data *strings); static bool check_cu_structural (struct read_ctx *ctx, struct cu *const cu, struct abbrev_table *abbrev_chain, Elf_Data *strings, - Elf_Data *loc, bool dwarf_64, struct ref_record *die_refs, - struct addr_record *loc_addrs, - struct coverage *strings_coverage, - struct coverage *loc_coverage); + struct coverage *strings_coverage); static bool check_aranges_structural (struct read_ctx *ctx, struct cu *cu_chain); static bool check_pub_structural (struct read_ctx *ctx, struct cu *cu_chain, enum section_id sec); - +static bool check_loc_structural (struct read_ctx *ctx, + struct cu *cu_chain, + bool addr_64, + struct addr_record *addrs); static const char *where_fmt (struct where *wh, char *ptr) { @@ -841,7 +840,7 @@ process_file (int fd __attribute__((unused)), { read_ctx_init (&ctx, dwarf, info_data); cu_chain = check_debug_info_structural (&ctx, abbrev_chain, - str_data, loc_data); + str_data); } else if (!tolerate_nodebug) /* Hard error, not a message. We can't debug without this. */ @@ -872,6 +871,57 @@ process_file (int fd __attribute__((unused)), wr_message (mc_impact_4 | mc_acc_suboptimal | mc_elf | mc_pubtypes, &WHERE (sec_pubtypes, NULL), ": data not found.\n"); + if (loc_data != NULL && cu_chain != NULL) + { + /* We can only do exhaustive .debug_loc validation, if all CUs + have the same address size. Sanity of address size is + checked when building the cu_chain. */ + uint8_t address_size = 0; + for (struct cu *it = cu_chain; it != NULL; it = it->next) + if (address_size == 0) + address_size = it->address_size; + else if (address_size != it->address_size) + { + wr_message (mc_loc | mc_info, &it->where, + ": has different address size from the previous CU.\n"); + address_size = 0; + break; + } + + if (address_size != 0) + { + read_ctx_init (&ctx, dwarf, loc_data); + + struct addr_record loclist_addrs; + memset (&loclist_addrs, 0, sizeof (loclist_addrs)); + + if (check_loc_structural (&ctx, cu_chain, address_size == 8, + &loclist_addrs)) + { + bool have_unresolved = false; + + /* Check that all references are resolved. */ + for (struct cu *it = cu_chain; it != NULL; it = it->next) + for (size_t i = 0; i < it->loc_refs.size; ++i) + { + struct ref *ref = it->loc_refs.refs + i; + if (!addr_record_has_addr (&loclist_addrs, ref->addr)) + { + if (!have_unresolved) + { + wr_error (&WHERE (sec_info, NULL), + ": unresolved location expression references:\n"); + have_unresolved = true; + } + wr_error (&ref->who, + ": references 0x%" PRIx64 ".\n", ref->addr); + } + } + } + } + } + /* XXX check that all cu->loc_refs are satisfied. */ + cu_free (cu_chain); abbrev_table_free (abbrev_chain); } @@ -1620,27 +1670,6 @@ coverage_add (struct coverage *ar, uint64_t begin, uint64_t end) } } -static bool -coverage_is_covered (struct coverage *ar, uint64_t address) -{ - assert (ar); - assert (address <= ar->size); - - uint64_t bi = address / coverage_emt_bits; - uint8_t bb = address % coverage_emt_bits; - coverage_emt_type bm = (coverage_emt_type)1 << (coverage_emt_bits - 1 - bb); - return !!(ar->buf[bi] & bm); -} - -static bool -coverage_pristine (struct coverage *ar, uint64_t begin, uint64_t length) -{ - for (uint64_t i = 0; i < length; ++i) - if (coverage_is_covered (ar, begin + i)) - return false; - return true; -} - static void coverage_find_holes (struct coverage *ar, void (*cb)(uint64_t begin, uint64_t end, void *user), @@ -1844,6 +1873,7 @@ wr_check_zero_padding (struct read_ctx *ctx, return true; } +/* static bool check_x_location_expression (struct read_ctx *ctx, struct cu *cu, @@ -1851,12 +1881,12 @@ check_x_location_expression (struct read_ctx *ctx, struct addr_record *loc_addrs, uint64_t addr, bool addr_64, struct where *wh); +*/ static struct cu * check_debug_info_structural (struct read_ctx *ctx, struct abbrev_table *abbrev_chain, - Elf_Data *strings, - Elf_Data *loc) + Elf_Data *strings) { struct ref_record die_refs; memset (&die_refs, 0, sizeof (die_refs)); @@ -1872,16 +1902,6 @@ check_debug_info_structural (struct read_ctx *ctx, strings_coverage = &strings_coverage_mem; } - struct coverage loc_coverage_mem, *loc_coverage = NULL; - struct addr_record loc_addrs_mem, *loc_addrs = NULL; - if (loc != NULL && check_category (mc_loc)) - { - coverage_init (&loc_coverage_mem, loc->d_size); - loc_coverage = &loc_coverage_mem; - memset (&loc_addrs_mem, 0, sizeof (loc_addrs_mem)); - loc_addrs = &loc_addrs_mem; - } - while (!read_ctx_eof (ctx)) { const unsigned char *cu_begin = ctx->ptr; @@ -1955,9 +1975,8 @@ check_debug_info_structural (struct read_ctx *ctx, } cu_ctx.ptr = ctx->ptr; - if (!check_cu_structural (&cu_ctx, cur, abbrev_chain, strings, loc, - dwarf_64, &die_refs, loc_addrs, - strings_coverage, loc_coverage)) + if (!check_cu_structural (&cu_ctx, cur, abbrev_chain, strings, + dwarf_64, &die_refs, strings_coverage)) { success = false; break; @@ -2008,24 +2027,26 @@ check_debug_info_structural (struct read_ctx *ctx, coverage_free (strings_coverage); } - if (loc_coverage != NULL) - { - if (success) - coverage_find_holes (loc_coverage, found_hole, - &((struct hole_info) - {sec_loc, mc_loc, loc->d_buf})); - coverage_free (loc_coverage); - } - - if (loc_addrs != NULL) - addr_record_free (loc_addrs); - if (!success || !references_sound) { cu_free (cu_chain); cu_chain = NULL; } + /* Reverse the chain, so that it's organized "naturally". Has + significant impact on performance when building set of all loc + references in CUs, as the addr_record_add doesn't have to add to + the beginning. */ + struct cu *last = NULL; + for (struct cu *it = cu_chain; it != NULL; ) + { + struct cu *next = it->next; + it->next = last; + last = it; + it = next; + } + cu_chain = last; + return cu_chain; } @@ -2051,6 +2072,7 @@ get_location_opcode_operands (uint8_t opcode, uint8_t *op1, uint8_t *op2) }; } +/* CAT is message category. */ static void check_location_expression (struct read_ctx *ctx, struct where *wh, bool addr_64) { @@ -2068,8 +2090,9 @@ check_location_expression (struct read_ctx *ctx, struct where *wh, bool addr_64) uint8_t op1, op2; if (!get_location_opcode_operands (opcode, &op1, &op2)) { - wr_error (&where, ": can't decode opcode \"%s\".\n", - dwarf_locexpr_opcode_string (opcode)); + wr_message (mc_loc | mc_error, &where, + ": can't decode opcode \"%s\".\n", + dwarf_locexpr_opcode_string (opcode)); return; } @@ -2093,6 +2116,7 @@ check_location_expression (struct read_ctx *ctx, struct where *wh, bool addr_64) } } +#if 0 static bool check_x_location_expression (struct read_ctx *ctx, struct cu *cu, @@ -2228,6 +2252,7 @@ check_x_location_expression (struct read_ctx *ctx, return retval; } +#endif /* @@ -2242,13 +2267,10 @@ read_die_chain (struct read_ctx *ctx, struct cu *cu, struct abbrev_table *abbrevs, Elf_Data *strings, - Elf_Data *loc, bool dwarf_64, bool addr_64, struct ref_record *die_refs, - struct ref_record *die_loc_refs, - struct addr_record *loc_addrs, - struct coverage *strings_coverage, - struct coverage *loc_coverage) + struct ref_record *local_die_refs, + struct coverage *strings_coverage) { bool got_die = false; uint64_t sibling_addr = 0; @@ -2320,7 +2342,7 @@ read_die_chain (struct read_ctx *ctx, { where.ref = &it->where; - void record_ref (uint64_t addr, struct where *who, bool local) + void record_ref (uint64_t addr, bool local) { struct ref_record *record = &cu->die_refs; if (local) @@ -2337,11 +2359,11 @@ read_die_chain (struct read_ctx *ctx, /* Address holds a CU-local reference, so add CU offset to turn it into section offset. */ addr += cu->offset; - record = die_loc_refs; + record = local_die_refs; } if (record != NULL) - ref_record_add (record, addr, who); + ref_record_add (record, addr, &where); } uint8_t form = it->form; @@ -2377,7 +2399,6 @@ read_die_chain (struct read_ctx *ctx, } bool check_locptr = false; - bool locptr_64 = addr_64; if (is_location_attrib (it->name)) { switch (check_CU_location_form (form, dwarf_64)) @@ -2395,7 +2416,6 @@ read_die_chain (struct read_ctx *ctx, wr_error (&where, ": location attribute with form \"%s\" in %d-bit CU.\n", dwarf_form_string (form), (dwarf_64 ? 64 : 32)); - locptr_64 = !locptr_64; /* fall-through */ case 1: /* locptr */ @@ -2455,7 +2475,7 @@ read_die_chain (struct read_ctx *ctx, goto cant_read; if (it->form == DW_FORM_ref_addr) - record_ref (addr, &where, false); + record_ref (addr, false); else if (abbrev->tag == DW_TAG_compile_unit || abbrev->tag == DW_TAG_partial_unit) cu->base = addr; @@ -2474,7 +2494,7 @@ read_die_chain (struct read_ctx *ctx, if (it->name == DW_AT_sibling) sibling_addr = value; else if (it->form == DW_FORM_ref_udata) - record_ref (value, &where, true); + record_ref (value, true); break; } @@ -2489,7 +2509,7 @@ read_die_chain (struct read_ctx *ctx, if (it->name == DW_AT_sibling) sibling_addr = value; else if (it->form == DW_FORM_ref1) - record_ref (value, &where, true); + record_ref (value, true); break; } @@ -2503,14 +2523,14 @@ read_die_chain (struct read_ctx *ctx, if (it->name == DW_AT_sibling) sibling_addr = value; else if (it->form == DW_FORM_ref2) - record_ref (value, &where, true); + record_ref (value, true); break; } case DW_FORM_data4: case DW_FORM_ref4: { - uint32_t value; + uint32_t value = -1; if (!read_ctx_read_4ubyte (ctx, &value)) goto cant_read; @@ -2518,14 +2538,11 @@ read_die_chain (struct read_ctx *ctx, sibling_addr = value; else if (check_locptr) { - struct read_ctx sub_ctx; - read_ctx_init (&sub_ctx, ctx->dbg, loc); - check_x_location_expression (&sub_ctx, cu, loc_coverage, - loc_addrs, value, locptr_64, - &where); + //printf ("loclist reference 32 %#" PRIx32 "\n", value); + ref_record_add (&cu->loc_refs, value, &where); } else if (it->form == DW_FORM_ref4) - record_ref (value, &where, true); + record_ref (value, true); break; } @@ -2540,14 +2557,11 @@ read_die_chain (struct read_ctx *ctx, sibling_addr = value; else if (check_locptr) { - struct read_ctx sub_ctx; - read_ctx_init (&sub_ctx, ctx->dbg, loc); - check_x_location_expression (&sub_ctx, cu, loc_coverage, - loc_addrs, value, locptr_64, - &where); + //printf ("loclist reference 64 %#" PRIx64 "\n", value); + ref_record_add (&cu->loc_refs, value, &where); } else if (it->form == DW_FORM_ref8) - record_ref (value, &where, true); + record_ref (value, true); break; } @@ -2618,10 +2632,10 @@ read_die_chain (struct read_ctx *ctx, if (abbrev->has_children) { - int st = read_die_chain (ctx, cu, abbrevs, strings, loc, + int st = read_die_chain (ctx, cu, abbrevs, strings, dwarf_64, addr_64, - die_refs, die_loc_refs, loc_addrs, - strings_coverage, loc_coverage); + die_refs, local_die_refs, + strings_coverage); if (st == -1) return -1; else if (st == 0) @@ -2672,12 +2686,9 @@ check_cu_structural (struct read_ctx *ctx, struct cu *const cu, struct abbrev_table *abbrev_chain, Elf_Data *strings, - Elf_Data *loc, bool dwarf_64, struct ref_record *die_refs, - struct addr_record *loc_addrs, - struct coverage *strings_coverage, - struct coverage *loc_coverage) + struct coverage *strings_coverage) { uint16_t version; uint64_t abbrev_offset; @@ -2722,15 +2733,14 @@ check_cu_structural (struct read_ctx *ctx, return false; } - struct ref_record die_loc_refs; - memset (&die_loc_refs, 0, sizeof (die_loc_refs)); + struct ref_record local_die_refs; + memset (&local_die_refs, 0, sizeof (local_die_refs)); bool retval = true; - if (read_die_chain (ctx, cu, abbrevs, strings, loc, + if (read_die_chain (ctx, cu, abbrevs, strings, dwarf_64, address_size == 8, - die_refs, &die_loc_refs, - loc_addrs, strings_coverage, - loc_coverage) >= 0) + die_refs, &local_die_refs, + strings_coverage) >= 0) { for (size_t i = 0; i < abbrevs->size; ++i) if (!abbrevs->abbr[i].used) @@ -2738,13 +2748,13 @@ check_cu_structural (struct read_ctx *ctx, ": Abbreviation with code %" PRIu64 " is never used.\n", abbrevs->abbr[i].code); - if (!check_die_references (cu, &die_loc_refs)) + if (!check_die_references (cu, &local_die_refs)) retval = false; } else retval = false; - ref_record_free (&die_loc_refs); + ref_record_free (&local_die_refs); return retval; } @@ -3063,3 +3073,116 @@ check_pub_structural (struct read_ctx *ctx, struct cu *cu_chain, return retval; } + + +static bool +check_loc_structural (struct read_ctx *ctx, + struct cu *cu_chain, + bool addr_64, + struct addr_record *addrs) +{ + /* A record of all references for fast look-up of + unreferenced .debug_loc portions. */ + struct addr_record refs; + memset (&refs, 0, sizeof (refs)); + for (struct cu *it = cu_chain; it != NULL; it = it->next) + for (size_t i = 0; i < it->loc_refs.size; ++i) + addr_record_add (&refs, it->loc_refs.refs[i].addr); + + uint64_t escape = addr_64 ? (uint64_t)-1 : (uint64_t)(uint32_t)-1; + while (!read_ctx_eof (ctx)) + { + struct where where = WHERE (sec_loc, NULL); + uint64_t loclist_addr = read_ctx_get_offset (ctx); + where_reset_1 (&where, loclist_addr); + addr_record_add (addrs, loclist_addr); + + if (!addr_record_has_addr (&refs, loclist_addr)) + wr_message (mc_loc | mc_acc_bloat | mc_impact_2, + &where, ": unreferenced loclist.\n"); + + uint64_t base = -1; + while (!read_ctx_eof (ctx)) + { + where_reset_2 (&where, read_ctx_get_offset (ctx)); + /* + for (size_t i = 0; i < 32; ++i) + printf (" %02x", ctx->ptr[i]); + printf ("\n"); + */ + /* 7.7.3: the two addresses are the same size as used by + DW_FORM_addr on the target machine. */ + + /* begin address */ + uint64_t begin_addr; + if (!read_ctx_read_offset (ctx, addr_64, &begin_addr)) + { + wr_error (&where, ": can't read address range beginning.\n"); + return false; + } + + /* end address */ + uint64_t end_addr; + if (!read_ctx_read_offset (ctx, addr_64, &end_addr)) + { + wr_error (&where, ": can't read address range ending.\n"); + return false; + } + + if (begin_addr == 0 && end_addr == 0) + /* 2.6.6: A location list containing only an end of list + entry describes an object that exists in the source + code but not in the executable program. */ + break; + else if (begin_addr == escape) + { + if (end_addr == base) + wr_message (mc_acc_bloat | mc_impact_3 | mc_loc, &where, + ": base address selection entry doesn't change base address.\n"); + base = end_addr; + } + else + { + /* location expression length */ + uint16_t len; + if (!read_ctx_read_2ubyte (ctx, &len)) + { + wr_error (&where, ": can't read length of location expression.\n"); + return false; + } + /* DWARF expression containing no operations + [i.e. length == 0] [...] represents a piece of an + object that is present in the source code but not in + the object code (perhaps due to optimization). */ + + if (end_addr < begin_addr) + wr_message (mc_loc | mc_error, &where, + ": has negative range 0x%" PRIx64 "..0x%" PRIx64 ".\n", + begin_addr, end_addr); + else if (begin_addr == end_addr && len > 0) + /* 2.6.6: A location list entry [...] whose beginning + and ending addresses are equal has no effect. */ + wr_message (mc_acc_bloat | mc_impact_3 | mc_loc, &where, + ": location list entry covers no range.\n"); + + /* location expression itself */ + struct read_ctx expr_ctx; + if (!read_ctx_init_sub (&expr_ctx, ctx, ctx->ptr, ctx->ptr + len)) + { + not_enough: + wr_error (&where, + ": not enough data for location expression of length %d.\n", + len); + return false; + } + + check_location_expression (&expr_ctx, &where, addr_64); + if (!read_ctx_skip (ctx, len)) + /* "can't happen" */ + goto not_enough; + } + } + } + + return true; +}
Attachment:
signature.asc
Description: PGP signature
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |