This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
Re: [PATCH] Mach-O: Follow Apple's dSYM files
On Jan 2, 2012, at 7:45 PM, shinichiro hamaji wrote:
> Hi,
>
> Thanks for the review! I feel my patch is much better than before.
> Here is the updated patch: http://shinh.skr.jp/t/dsym3.patch
>
> Some replies for comments will be inlined.
Ok for me. You still need to get the approval for the change in non mach-o files.
The latest step is to implement find_nearest_line when there is no dsym file !
Thanks!
Tristan.
>
>> I think you should omit the _macosx_ part of the name.
>> Also, you may find bfd_mac_o_fat_extract useful.
>
> Ah, nice! I could eliminate bfd_mach_o_find_dsym_in_fat with this function.
>
>>> + if (bfd_check_format (debug_bfd, bfd_archive))
>>> + {
>>> + bfd *r = bfd_mach_o_find_macosx_dsym_in_fat (debug_bfd, uuid_cmd);
>>> + if (r)
>>> + {
>>> + mdata->debug_filename = debug_filename;
>>> + mdata->debug_bfd = r;
>>> + mdata->debug_fat_bfd = debug_bfd;
>>> + }
>>> + return r;
>>> + }
>>> +
>>> + if (bfd_mach_o_dsym_p (debug_bfd, uuid_cmd))
>>> + {
>>> + mdata->debug_filename = debug_filename;
>>> + mdata->debug_bfd = debug_bfd;
>>> + return debug_bfd;
>>> + }
>>
>> It would be nice not to store debug_filename and debug_bfd here, but in a caller. That would make it easier to reuse bfd_mach_o_follow_dsym by gdb.
>
> Now find_nearest_line is storing dsym_bfd. This function is not the
> direct caller of the code you commented, but I guessed this is what
> you meant.
>
>>> + /* Filename of .dSYM file. */
>>> + char *debug_filename;
>>> + /* BFD of .dSYM file. */
>>> + bfd *debug_bfd;
>>> + /* BFD of a fat binary which contains debug_bfd. */
>>> + bfd *debug_fat_bfd;
>>
>> I'd prefer to use the 'dsym_' prefix instead of 'debug_'.
>> There is no need to save debug_filename, just free it when the BFD is closed.
>> Same for debug_fat_bfd.
>
> Now bfd_mach_o_close_and_cleanup frees my_archive and filename of
> dsym_bfd. Hmm I'm not 100% sure if this is what you meant. Could you
> elaborate if the new code looks bad?
>
> Thanks!
>
> bfd/
> 2012-01-02 Shinichiro Hamaji <shinichiro.hamaji@gmail.com>
>
> * dwarf2.c (_bfd_dwarf2_slurp_debug_info): Factor out the part
> which reads DWARF2 and stores in stash from find_line.
> (find_line) Call _bfd_dwarf2_slurp_debug_info.
> * libbfd-in.h (_bfd_dwarf2_slurp_debug_info): Add declaration.
> * libbfd.h (_bfd_dwarf2_slurp_debug_info): Regenerate.
> * mach-o.c (dsym_subdir): The name of subdir where debug
> information may be stored.
> (bfd_mach_o_lookup_uuid_command): New. Lookup a load command whose
> type is UUID.
> (bfd_mach_o_dsym_for_uuid_p): New. Check if the specified BFD is
> corresponding to the executable.
> (bfd_mach_o_find_dsym): New. Find a debug information BFD in the
> specified binary file.
> (bfd_mach_o_follow_dsym): New. Find a debug information BFD for
> the original BFD.
> (bfd_mach_o_find_nearest_line): Check dSYM files for Mach-O
> executables, dylibs, and bundles.
> (bfd_mach_o_close_and_cleanup): Clean up BFDs for the dSYM file.
> * mach-o.h (dsym_bfd): The BFD of the dSYM file.
>
> diff --git a/bfd/dwarf2.c b/bfd/dwarf2.c
> index 767fa52..66fd16f 100644
> --- a/bfd/dwarf2.c
> +++ b/bfd/dwarf2.c
> @@ -3117,6 +3117,122 @@ stash_find_line_fast (struct dwarf2_debug *stash,
> filename_ptr, linenumber_ptr);
> }
>
> +/* Read debug information from DEBUG_BFD when DEBUG_BFD is specified.
> + If DEBUG_BFD is not specified, we read debug information from ABFD
> + or its gnu_debuglink. The results will be stored in PINFO.
> + The function returns TRUE iff debug information is ready. */
> +
> +bfd_boolean
> +_bfd_dwarf2_slurp_debug_info (bfd *abfd, bfd *debug_bfd,
> + const struct dwarf_debug_section *debug_sections,
> + asymbol **symbols,
> + void **pinfo)
> +{
> + bfd_size_type amt = sizeof (struct dwarf2_debug);
> + bfd_size_type total_size;
> + asection *msec;
> + struct dwarf2_debug *stash = (struct dwarf2_debug *) *pinfo;
> +
> + if (stash != NULL)
> + return TRUE;
> +
> + stash = (struct dwarf2_debug *) bfd_zalloc (abfd, amt);
> + if (! stash)
> + return FALSE;
> + stash->debug_sections = debug_sections;
> +
> + *pinfo = stash;
> +
> + if (debug_bfd == NULL)
> + debug_bfd = abfd;
> +
> + msec = find_debug_info (debug_bfd, debug_sections, NULL);
> + if (msec == NULL && abfd == debug_bfd)
> + {
> + char * debug_filename = bfd_follow_gnu_debuglink (abfd, DEBUGDIR);
> +
> + if (debug_filename == NULL)
> + /* No dwarf2 info, and no gnu_debuglink to follow.
> + Note that at this point the stash has been allocated, but
> + contains zeros. This lets future calls to this function
> + fail more quickly. */
> + return FALSE;
> +
> + if ((debug_bfd = bfd_openr (debug_filename, NULL)) == NULL
> + || ! bfd_check_format (debug_bfd, bfd_object)
> + || (msec = find_debug_info (debug_bfd,
> + debug_sections, NULL)) == NULL)
> + {
> + if (debug_bfd)
> + bfd_close (debug_bfd);
> + /* FIXME: Should we report our failure to follow the debuglink ? */
> + free (debug_filename);
> + return FALSE;
> + }
> + }
> +
> + /* There can be more than one DWARF2 info section in a BFD these
> + days. First handle the easy case when there's only one. If
> + there's more than one, try case two: none of the sections is
> + compressed. In that case, read them all in and produce one
> + large stash. We do this in two passes - in the first pass we
> + just accumulate the section sizes, and in the second pass we
> + read in the section's contents. (The allows us to avoid
> + reallocing the data as we add sections to the stash.) If
> + some or all sections are compressed, then do things the slow
> + way, with a bunch of reallocs. */
> +
> + if (! find_debug_info (debug_bfd, debug_sections, msec))
> + {
> + /* Case 1: only one info section. */
> + total_size = msec->size;
> + if (! read_section (debug_bfd, &stash->debug_sections[debug_info],
> + symbols, 0,
> + &stash->info_ptr_memory, &total_size))
> + return FALSE;
> + }
> + else
> + {
> + /* Case 2: multiple sections. */
> + for (total_size = 0;
> + msec;
> + msec = find_debug_info (debug_bfd, debug_sections, msec))
> + total_size += msec->size;
> +
> + stash->info_ptr_memory = (bfd_byte *) bfd_malloc (total_size);
> + if (stash->info_ptr_memory == NULL)
> + return FALSE;
> +
> + total_size = 0;
> + for (msec = find_debug_info (debug_bfd, debug_sections, NULL);
> + msec;
> + msec = find_debug_info (debug_bfd, debug_sections, msec))
> + {
> + bfd_size_type size;
> +
> + size = msec->size;
> + if (size == 0)
> + continue;
> +
> + if (!(bfd_simple_get_relocated_section_contents
> + (debug_bfd, msec, stash->info_ptr_memory + total_size,
> + symbols)))
> + return FALSE;
> +
> + total_size += size;
> + }
> + }
> +
> + stash->info_ptr = stash->info_ptr_memory;
> + stash->info_ptr_end = stash->info_ptr + total_size;
> + stash->sec = find_debug_info (debug_bfd, debug_sections, NULL);
> + stash->sec_info_ptr = stash->info_ptr;
> + stash->syms = symbols;
> + stash->bfd_ptr = debug_bfd;
> +
> + return TRUE;
> +}
> +
> /* Find the source code location of SYMBOL. If SYMBOL is NULL
> then find the nearest source code location corresponding to
> the address SECTION + OFFSET.
> @@ -3157,17 +3273,16 @@ find_line (bfd *abfd,
> bfd_vma found = FALSE;
> bfd_boolean do_line;
>
> - stash = (struct dwarf2_debug *) *pinfo;
> + *filename_ptr = NULL;
> + if (functionname_ptr != NULL)
> + *functionname_ptr = NULL;
> + *linenumber_ptr = 0;
>
> - if (! stash)
> - {
> - bfd_size_type amt = sizeof (struct dwarf2_debug);
> + if (! _bfd_dwarf2_slurp_debug_info (abfd, NULL,
> + debug_sections, symbols, pinfo))
> + return FALSE;
>
> - stash = (struct dwarf2_debug *) bfd_zalloc (abfd, amt);
> - if (! stash)
> - return FALSE;
> - stash->debug_sections = debug_sections;
> - }
> + stash = (struct dwarf2_debug *) *pinfo;
>
> /* In a relocatable file, 2 functions may have the same address.
> We change the section vma so that they won't overlap. */
> @@ -3197,110 +3312,11 @@ find_line (bfd *abfd,
> addr += section->output_section->vma + section->output_offset;
> else
> addr += section->vma;
> - *filename_ptr = NULL;
> - if (! do_line)
> - *functionname_ptr = NULL;
> - *linenumber_ptr = 0;
> -
> - if (! *pinfo)
> - {
> - bfd *debug_bfd;
> - bfd_size_type total_size;
> - asection *msec;
> -
> - *pinfo = stash;
> -
> - msec = find_debug_info (abfd, debug_sections, NULL);
> - if (msec == NULL)
> - {
> - char * debug_filename = bfd_follow_gnu_debuglink (abfd, DEBUGDIR);
> -
> - if (debug_filename == NULL)
> - /* No dwarf2 info, and no gnu_debuglink to follow.
> - Note that at this point the stash has been allocated, but
> - contains zeros. This lets future calls to this function
> - fail more quickly. */
> - goto done;
> -
> - if ((debug_bfd = bfd_openr (debug_filename, NULL)) == NULL
> - || ! bfd_check_format (debug_bfd, bfd_object)
> - || (msec = find_debug_info (debug_bfd,
> - debug_sections, NULL)) == NULL)
> - {
> - if (debug_bfd)
> - bfd_close (debug_bfd);
> - /* FIXME: Should we report our failure to follow the debuglink ? */
> - free (debug_filename);
> - goto done;
> - }
> - }
> - else
> - debug_bfd = abfd;
> -
> - /* There can be more than one DWARF2 info section in a BFD these
> - days. First handle the easy case when there's only one. If
> - there's more than one, try case two: none of the sections is
> - compressed. In that case, read them all in and produce one
> - large stash. We do this in two passes - in the first pass we
> - just accumulate the section sizes, and in the second pass we
> - read in the section's contents. (The allows us to avoid
> - reallocing the data as we add sections to the stash.) If
> - some or all sections are compressed, then do things the slow
> - way, with a bunch of reallocs. */
> -
> - if (! find_debug_info (debug_bfd, debug_sections, msec))
> - {
> - /* Case 1: only one info section. */
> - total_size = msec->size;
> - if (! read_section (debug_bfd, &stash->debug_sections[debug_info],
> - symbols, 0,
> - &stash->info_ptr_memory, &total_size))
> - goto done;
> - }
> - else
> - {
> - /* Case 2: multiple sections. */
> - for (total_size = 0;
> - msec;
> - msec = find_debug_info (debug_bfd, debug_sections, msec))
> - total_size += msec->size;
> -
> - stash->info_ptr_memory = (bfd_byte *) bfd_malloc (total_size);
> - if (stash->info_ptr_memory == NULL)
> - goto done;
> -
> - total_size = 0;
> - for (msec = find_debug_info (debug_bfd, debug_sections, NULL);
> - msec;
> - msec = find_debug_info (debug_bfd, debug_sections, msec))
> - {
> - bfd_size_type size;
> -
> - size = msec->size;
> - if (size == 0)
> - continue;
> -
> - if (!(bfd_simple_get_relocated_section_contents
> - (debug_bfd, msec, stash->info_ptr_memory + total_size,
> - symbols)))
> - goto done;
> -
> - total_size += size;
> - }
> - }
> -
> - stash->info_ptr = stash->info_ptr_memory;
> - stash->info_ptr_end = stash->info_ptr + total_size;
> - stash->sec = find_debug_info (debug_bfd, debug_sections, NULL);
> - stash->sec_info_ptr = stash->info_ptr;
> - stash->syms = symbols;
> - stash->bfd_ptr = debug_bfd;
> - }
>
> /* A null info_ptr indicates that there is no dwarf2 info
> (or that an error occured while setting up the stash). */
> if (! stash->info_ptr)
> - goto done;
> + return FALSE;
>
> stash->inliner_chain = NULL;
>
> diff --git a/bfd/libbfd-in.h b/bfd/libbfd-in.h
> index 7db09e4..f7a9e21 100644
> --- a/bfd/libbfd-in.h
> +++ b/bfd/libbfd-in.h
> @@ -549,6 +549,10 @@ bfd_boolean _bfd_generic_find_line
> extern bfd_boolean _bfd_dwarf2_find_inliner_info
> (bfd *, const char **, const char **, unsigned int *, void **);
>
> +/* Read DWARF 2 debugging information. */
> +extern bfd_boolean _bfd_dwarf2_slurp_debug_info
> + (bfd *, bfd *, const struct dwarf_debug_section *, asymbol **, void **);
> +
> /* Clean up the data used to handle DWARF 2 debugging information. */
> extern void _bfd_dwarf2_cleanup_debug_info
> (bfd *, void **);
> diff --git a/bfd/libbfd.h b/bfd/libbfd.h
> index 0beddb6..a10a651 100644
> --- a/bfd/libbfd.h
> +++ b/bfd/libbfd.h
> @@ -554,6 +554,10 @@ bfd_boolean _bfd_generic_find_line
> extern bfd_boolean _bfd_dwarf2_find_inliner_info
> (bfd *, const char **, const char **, unsigned int *, void **);
>
> +/* Read DWARF 2 debugging information. */
> +extern bfd_boolean _bfd_dwarf2_slurp_debug_info
> + (bfd *, bfd *, const struct dwarf_debug_section *, asymbol **, void **);
> +
> /* Clean up the data used to handle DWARF 2 debugging information. */
> extern void _bfd_dwarf2_cleanup_debug_info
> (bfd *, void **);
> diff --git a/bfd/mach-o.c b/bfd/mach-o.c
> index cc68d89..ceddc8d 100644
> --- a/bfd/mach-o.c
> +++ b/bfd/mach-o.c
> @@ -277,6 +277,8 @@ static const mach_o_segment_name_xlat segsec_names_xlat[] =
> { NULL, NULL }
> };
>
> +static const char dsym_subdir[] = ".dSYM/Contents/Resources/DWARF";
> +
> /* For both cases bfd-name => mach-o name and vice versa, the specific target
> is checked before the generic. This allows a target (e.g. ppc for cstring)
> to override the generic definition with a more specific one. */
> @@ -3738,6 +3740,120 @@ bfd_mach_o_core_file_failing_signal (bfd *abfd
> ATTRIBUTE_UNUSED)
> return 0;
> }
>
> +static bfd_mach_o_uuid_command *
> +bfd_mach_o_lookup_uuid_command (bfd *abfd)
> +{
> + bfd_mach_o_load_command *uuid_cmd;
> + int ncmd = bfd_mach_o_lookup_command (abfd, BFD_MACH_O_LC_UUID, &uuid_cmd);
> + if (ncmd != 1)
> + return FALSE;
> + return &uuid_cmd->command.uuid;
> +}
> +
> +/* Return true if ABFD is a dSYM file and its UUID matches UUID_CMD. */
> +
> +static bfd_boolean
> +bfd_mach_o_dsym_for_uuid_p (bfd *abfd, const bfd_mach_o_uuid_command *uuid_cmd)
> +{
> + bfd_mach_o_uuid_command *dsym_uuid_cmd;
> +
> + BFD_ASSERT (abfd);
> + BFD_ASSERT (uuid_cmd);
> +
> + if (!bfd_check_format (abfd, bfd_object))
> + return FALSE;
> +
> + if (bfd_get_flavour (abfd) != bfd_target_mach_o_flavour
> + || bfd_mach_o_get_data (abfd) == NULL
> + || bfd_mach_o_get_data (abfd)->header.filetype != BFD_MACH_O_MH_DSYM)
> + return FALSE;
> +
> + dsym_uuid_cmd = bfd_mach_o_lookup_uuid_command (abfd);
> + if (dsym_uuid_cmd == NULL)
> + return FALSE;
> +
> + if (memcmp (uuid_cmd->uuid, dsym_uuid_cmd->uuid,
> + sizeof (uuid_cmd->uuid)) != 0)
> + return FALSE;
> +
> + return TRUE;
> +}
> +
> +/* Find a BFD in DSYM_FILENAME which matches ARCH and UUID_CMD.
> + The caller is responsible for closing the returned BFD object and
> + its my_archive if the returned BFD is in a fat dSYM. */
> +
> +static bfd *
> +bfd_mach_o_find_dsym (const char *dsym_filename,
> + const bfd_mach_o_uuid_command *uuid_cmd,
> + const bfd_arch_info_type *arch)
> +{
> + bfd *base_dsym_bfd, *dsym_bfd;
> +
> + BFD_ASSERT (uuid_cmd);
> +
> + base_dsym_bfd = bfd_openr (dsym_filename, NULL);
> + if (base_dsym_bfd == NULL)
> + return NULL;
> +
> + dsym_bfd = bfd_mach_o_fat_extract (base_dsym_bfd, bfd_object, arch);
> + if (bfd_mach_o_dsym_for_uuid_p (dsym_bfd, uuid_cmd))
> + return dsym_bfd;
> +
> + bfd_close (dsym_bfd);
> + if (base_dsym_bfd != dsym_bfd)
> + bfd_close (base_dsym_bfd);
> +
> + return NULL;
> +}
> +
> +/* Return a BFD created from a dSYM file for ABFD.
> + The caller is responsible for closing the returned BFD object, its
> + filename, and its my_archive if the returned BFD is in a fat dSYM. */
> +
> +static bfd *
> +bfd_mach_o_follow_dsym (bfd *abfd)
> +{
> + char *dsym_filename;
> + bfd_mach_o_uuid_command *uuid_cmd;
> + bfd *dsym_bfd, *base_bfd = abfd;
> + const char *base_basename;
> +
> + if (abfd == NULL || bfd_get_flavour (abfd) != bfd_target_mach_o_flavour)
> + return NULL;
> +
> + if (abfd->my_archive)
> + base_bfd = abfd->my_archive;
> + /* BFD may have been opened from a stream. */
> + if (base_bfd->filename == NULL)
> + {
> + bfd_set_error (bfd_error_invalid_operation);
> + return NULL;
> + }
> + base_basename = lbasename (base_bfd->filename);
> +
> + uuid_cmd = bfd_mach_o_lookup_uuid_command (abfd);
> + if (uuid_cmd == NULL)
> + return NULL;
> +
> + /* TODO: We assume the DWARF file has the same as the binary's.
> + It seems apple's GDB checks all files in the dSYM bundle directory.
> + http://opensource.apple.com/source/gdb/gdb-1708/src/gdb/macosx/macosx-tdep.c
> + */
> + dsym_filename = (char *)bfd_malloc (strlen (base_bfd->filename)
> + + strlen (dsym_subdir) + 1
> + + strlen (base_basename) + 1);
> + sprintf (dsym_filename, "%s%s/%s",
> + base_bfd->filename, dsym_subdir, base_basename);
> +
> + dsym_bfd = bfd_mach_o_find_dsym (dsym_filename, uuid_cmd,
> + bfd_get_arch_info (abfd));
> + if (dsym_bfd == NULL)
> + free (dsym_filename);
> +
> + return dsym_bfd;
> +}
> +
> bfd_boolean
> bfd_mach_o_find_nearest_line (bfd *abfd,
> asection *section,
> @@ -3748,9 +3864,34 @@ bfd_mach_o_find_nearest_line (bfd *abfd,
> unsigned int *line_ptr)
> {
> bfd_mach_o_data_struct *mdata = bfd_mach_o_get_data (abfd);
> - /* TODO: Handle executables and dylibs by using dSYMs. */
> - if (mdata->header.filetype != BFD_MACH_O_MH_OBJECT)
> + if (mdata == NULL)
> return FALSE;
> + switch (mdata->header.filetype)
> + {
> + case BFD_MACH_O_MH_OBJECT:
> + break;
> + case BFD_MACH_O_MH_EXECUTE:
> + case BFD_MACH_O_MH_DYLIB:
> + case BFD_MACH_O_MH_BUNDLE:
> + case BFD_MACH_O_MH_KEXT_BUNDLE:
> + if (mdata->dwarf2_find_line_info == NULL)
> + {
> + mdata->dsym_bfd = bfd_mach_o_follow_dsym (abfd);
> + /* When we couldn't find dSYM for this binary, we look for
> + the debug information in the binary itself. In this way,
> + we won't try finding separated dSYM again because
> + mdata->dwarf2_find_line_info will be filled. */
> + if (! mdata->dsym_bfd)
> + break;
> + if (! _bfd_dwarf2_slurp_debug_info (abfd, mdata->dsym_bfd,
> + dwarf_debug_sections, symbols,
> + &mdata->dwarf2_find_line_info))
> + return FALSE;
> + }
> + break;
> + default:
> + return FALSE;
> + }
> if (_bfd_dwarf2_find_nearest_line (abfd, dwarf_debug_sections,
> section, symbols, offset,
> filename_ptr, functionname_ptr,
> @@ -3768,6 +3909,18 @@ bfd_mach_o_close_and_cleanup (bfd *abfd)
> {
> _bfd_dwarf2_cleanup_debug_info (abfd, &mdata->dwarf2_find_line_info);
> bfd_mach_o_free_cached_info (abfd);
> + if (mdata->dsym_bfd != NULL)
> + {
> + bfd *fat_bfd = mdata->dsym_bfd->my_archive;
> + char *dsym_filename = (char *)(fat_bfd
> + ? fat_bfd->filename
> + : mdata->dsym_bfd->filename);
> + bfd_close (mdata->dsym_bfd);
> + mdata->dsym_bfd = NULL;
> + if (fat_bfd)
> + bfd_close (fat_bfd);
> + free (dsym_filename);
> + }
> }
>
> return _bfd_generic_close_and_cleanup (abfd);
> diff --git a/bfd/mach-o.h b/bfd/mach-o.h
> index 89dce1a..cbd4451 100644
> --- a/bfd/mach-o.h
> +++ b/bfd/mach-o.h
> @@ -520,6 +520,9 @@ typedef struct mach_o_data_struct
> /* A place to stash dwarf2 info for this bfd. */
> void *dwarf2_find_line_info;
>
> + /* BFD of .dSYM file. */
> + bfd *dsym_bfd;
> +
> /* Cache of dynamic relocs. */
> arelent *dyn_reloc_cache;
> }