This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
[PATCH] Mach-O: Follow Apple's dSYM files
- From: shinichiro hamaji <shinichiro dot hamaji at gmail dot com>
- To: binutils Development <binutils at sourceware dot org>
- Date: Tue, 27 Dec 2011 02:17:59 +0900
- Subject: [PATCH] Mach-O: Follow Apple's dSYM files
Hi,
Here is a patch which reads debug information from .dSYM files in the
same directory as the executable, dylib, or bundle binary:
http://shinh.skr.jp/t/dsym.patch
To let find_line use debug BFD (in dSYM bundle) instead of the
original executable's BFD, I used slightly different approach from
gnu_debuglink. I factor out the part which reads debug info from
find_line and use it from mach-o.c.
If we take an approach like gnu_debuglink, we may need to add a
function pointer to bfd_target, say _bfd_follow_debug_bfd. I think
this is kinda overkill considering this function just returns NULL for
all other formats.
When I factor out _bfd_dwarf2_slurp_debug_info, I modified the order
of operations in find_line. I hope this change is OK.
It seems Apple's GDB checks all files under
"foo.dSYM/Contents/Resources/DWARF" directory for an executable "foo".
This patch checks only "foo.dSYM/Contents/Resources/DWARF/foo" and
leave this as a TODO. This is because 1) Apple's dsymutil (a linker
for debug information) uses this filename by default and 2) dirent.h
is not the standard header so I need a few autoconf changes. Even if
we need it, I'd like to do this change in different patch as this
patch is already getting large.
I ran "objdump -S" for bunch of mach-o/elf files and "make check". The
results look OK. I'm not 100% sure if my decisions are the best. Any
kind of suggestions will be really appreciated.
bfd/
2011-12-27 Shinichiro Hamaji <shinichiro.hamaji@gmail.com>
* dwarf2.c (_bfd_dwarf2_slurp_debug_info): Factor out the part
which reads DWARF2 and stores in stash from find_line.
(find_line) Call _bfd_dwarf2_slurp_debug_info.
* libbfd-in.h (_bfd_dwarf2_slurp_debug_info): Add declaration.
* libbfd.h (_bfd_dwarf2_slurp_debug_info): Regenerate.
* mach-o.c (dsym_subdir): The name of subdir where debug
information may be stored.
(dsym_subdir_len): The length of dsym_subdir.
(bfd_mach_o_lookup_uuid_command): New. Lookup a load command whose
type is UUID.
(bfd_mach_o_dsym_p): New. Check if the specified BFD is
corresponding to the executable.
(bfd_mach_o_find_macosx_dsym_in_fat): New. Find a debug information
BFD in a FAT binary.
(bfd_mach_o_find_macosx_dsym): New. Find a debug information BFD in
the specified binary file.
(bfd_mach_o_follow_macosx_dsym): New. Find a debug information BFD
for the original BFD.
(bfd_mach_o_find_nearest_line): Check dSYM files for Mach-O
executables, dylibs, and bundles.
(bfd_mach_o_close_and_cleanup): Clean up BFDs for the dSYM file.
* mach-o.h (debug_filename): The filename of the dSYM file.
(debug_bfd): The BFD of the dSYM file.
(debug_fat_bfd): The BFD of the fat binary containing debug_bfd.
diff --git a/bfd/dwarf2.c b/bfd/dwarf2.c
index 767fa52..66fd16f 100644
--- a/bfd/dwarf2.c
+++ b/bfd/dwarf2.c
@@ -3117,6 +3117,122 @@ stash_find_line_fast (struct dwarf2_debug *stash,
filename_ptr, linenumber_ptr);
}
+/* Read debug information from DEBUG_BFD when DEBUG_BFD is specified.
+ If DEBUG_BFD is not specified, we read debug information from ABFD
+ or its gnu_debuglink. The results will be stored in PINFO.
+ The function returns TRUE iff debug information is ready. */
+
+bfd_boolean
+_bfd_dwarf2_slurp_debug_info (bfd *abfd, bfd *debug_bfd,
+ const struct dwarf_debug_section *debug_sections,
+ asymbol **symbols,
+ void **pinfo)
+{
+ bfd_size_type amt = sizeof (struct dwarf2_debug);
+ bfd_size_type total_size;
+ asection *msec;
+ struct dwarf2_debug *stash = (struct dwarf2_debug *) *pinfo;
+
+ if (stash != NULL)
+ return TRUE;
+
+ stash = (struct dwarf2_debug *) bfd_zalloc (abfd, amt);
+ if (! stash)
+ return FALSE;
+ stash->debug_sections = debug_sections;
+
+ *pinfo = stash;
+
+ if (debug_bfd == NULL)
+ debug_bfd = abfd;
+
+ msec = find_debug_info (debug_bfd, debug_sections, NULL);
+ if (msec == NULL && abfd == debug_bfd)
+ {
+ char * debug_filename = bfd_follow_gnu_debuglink (abfd, DEBUGDIR);
+
+ if (debug_filename == NULL)
+ /* No dwarf2 info, and no gnu_debuglink to follow.
+ Note that at this point the stash has been allocated, but
+ contains zeros. This lets future calls to this function
+ fail more quickly. */
+ return FALSE;
+
+ if ((debug_bfd = bfd_openr (debug_filename, NULL)) == NULL
+ || ! bfd_check_format (debug_bfd, bfd_object)
+ || (msec = find_debug_info (debug_bfd,
+ debug_sections, NULL)) == NULL)
+ {
+ if (debug_bfd)
+ bfd_close (debug_bfd);
+ /* FIXME: Should we report our failure to follow the debuglink ? */
+ free (debug_filename);
+ return FALSE;
+ }
+ }
+
+ /* There can be more than one DWARF2 info section in a BFD these
+ days. First handle the easy case when there's only one. If
+ there's more than one, try case two: none of the sections is
+ compressed. In that case, read them all in and produce one
+ large stash. We do this in two passes - in the first pass we
+ just accumulate the section sizes, and in the second pass we
+ read in the section's contents. (The allows us to avoid
+ reallocing the data as we add sections to the stash.) If
+ some or all sections are compressed, then do things the slow
+ way, with a bunch of reallocs. */
+
+ if (! find_debug_info (debug_bfd, debug_sections, msec))
+ {
+ /* Case 1: only one info section. */
+ total_size = msec->size;
+ if (! read_section (debug_bfd, &stash->debug_sections[debug_info],
+ symbols, 0,
+ &stash->info_ptr_memory, &total_size))
+ return FALSE;
+ }
+ else
+ {
+ /* Case 2: multiple sections. */
+ for (total_size = 0;
+ msec;
+ msec = find_debug_info (debug_bfd, debug_sections, msec))
+ total_size += msec->size;
+
+ stash->info_ptr_memory = (bfd_byte *) bfd_malloc (total_size);
+ if (stash->info_ptr_memory == NULL)
+ return FALSE;
+
+ total_size = 0;
+ for (msec = find_debug_info (debug_bfd, debug_sections, NULL);
+ msec;
+ msec = find_debug_info (debug_bfd, debug_sections, msec))
+ {
+ bfd_size_type size;
+
+ size = msec->size;
+ if (size == 0)
+ continue;
+
+ if (!(bfd_simple_get_relocated_section_contents
+ (debug_bfd, msec, stash->info_ptr_memory + total_size,
+ symbols)))
+ return FALSE;
+
+ total_size += size;
+ }
+ }
+
+ stash->info_ptr = stash->info_ptr_memory;
+ stash->info_ptr_end = stash->info_ptr + total_size;
+ stash->sec = find_debug_info (debug_bfd, debug_sections, NULL);
+ stash->sec_info_ptr = stash->info_ptr;
+ stash->syms = symbols;
+ stash->bfd_ptr = debug_bfd;
+
+ return TRUE;
+}
+
/* Find the source code location of SYMBOL. If SYMBOL is NULL
then find the nearest source code location corresponding to
the address SECTION + OFFSET.
@@ -3157,17 +3273,16 @@ find_line (bfd *abfd,
bfd_vma found = FALSE;
bfd_boolean do_line;
- stash = (struct dwarf2_debug *) *pinfo;
+ *filename_ptr = NULL;
+ if (functionname_ptr != NULL)
+ *functionname_ptr = NULL;
+ *linenumber_ptr = 0;
- if (! stash)
- {
- bfd_size_type amt = sizeof (struct dwarf2_debug);
+ if (! _bfd_dwarf2_slurp_debug_info (abfd, NULL,
+ debug_sections, symbols, pinfo))
+ return FALSE;
- stash = (struct dwarf2_debug *) bfd_zalloc (abfd, amt);
- if (! stash)
- return FALSE;
- stash->debug_sections = debug_sections;
- }
+ stash = (struct dwarf2_debug *) *pinfo;
/* In a relocatable file, 2 functions may have the same address.
We change the section vma so that they won't overlap. */
@@ -3197,110 +3312,11 @@ find_line (bfd *abfd,
addr += section->output_section->vma + section->output_offset;
else
addr += section->vma;
- *filename_ptr = NULL;
- if (! do_line)
- *functionname_ptr = NULL;
- *linenumber_ptr = 0;
-
- if (! *pinfo)
- {
- bfd *debug_bfd;
- bfd_size_type total_size;
- asection *msec;
-
- *pinfo = stash;
-
- msec = find_debug_info (abfd, debug_sections, NULL);
- if (msec == NULL)
- {
- char * debug_filename = bfd_follow_gnu_debuglink (abfd, DEBUGDIR);
-
- if (debug_filename == NULL)
- /* No dwarf2 info, and no gnu_debuglink to follow.
- Note that at this point the stash has been allocated, but
- contains zeros. This lets future calls to this function
- fail more quickly. */
- goto done;
-
- if ((debug_bfd = bfd_openr (debug_filename, NULL)) == NULL
- || ! bfd_check_format (debug_bfd, bfd_object)
- || (msec = find_debug_info (debug_bfd,
- debug_sections, NULL)) == NULL)
- {
- if (debug_bfd)
- bfd_close (debug_bfd);
- /* FIXME: Should we report our failure to follow the debuglink ? */
- free (debug_filename);
- goto done;
- }
- }
- else
- debug_bfd = abfd;
-
- /* There can be more than one DWARF2 info section in a BFD these
- days. First handle the easy case when there's only one. If
- there's more than one, try case two: none of the sections is
- compressed. In that case, read them all in and produce one
- large stash. We do this in two passes - in the first pass we
- just accumulate the section sizes, and in the second pass we
- read in the section's contents. (The allows us to avoid
- reallocing the data as we add sections to the stash.) If
- some or all sections are compressed, then do things the slow
- way, with a bunch of reallocs. */
-
- if (! find_debug_info (debug_bfd, debug_sections, msec))
- {
- /* Case 1: only one info section. */
- total_size = msec->size;
- if (! read_section (debug_bfd, &stash->debug_sections[debug_info],
- symbols, 0,
- &stash->info_ptr_memory, &total_size))
- goto done;
- }
- else
- {
- /* Case 2: multiple sections. */
- for (total_size = 0;
- msec;
- msec = find_debug_info (debug_bfd, debug_sections, msec))
- total_size += msec->size;
-
- stash->info_ptr_memory = (bfd_byte *) bfd_malloc (total_size);
- if (stash->info_ptr_memory == NULL)
- goto done;
-
- total_size = 0;
- for (msec = find_debug_info (debug_bfd, debug_sections, NULL);
- msec;
- msec = find_debug_info (debug_bfd, debug_sections, msec))
- {
- bfd_size_type size;
-
- size = msec->size;
- if (size == 0)
- continue;
-
- if (!(bfd_simple_get_relocated_section_contents
- (debug_bfd, msec, stash->info_ptr_memory + total_size,
- symbols)))
- goto done;
-
- total_size += size;
- }
- }
-
- stash->info_ptr = stash->info_ptr_memory;
- stash->info_ptr_end = stash->info_ptr + total_size;
- stash->sec = find_debug_info (debug_bfd, debug_sections, NULL);
- stash->sec_info_ptr = stash->info_ptr;
- stash->syms = symbols;
- stash->bfd_ptr = debug_bfd;
- }
/* A null info_ptr indicates that there is no dwarf2 info
(or that an error occured while setting up the stash). */
if (! stash->info_ptr)
- goto done;
+ return FALSE;
stash->inliner_chain = NULL;
diff --git a/bfd/libbfd-in.h b/bfd/libbfd-in.h
index 7db09e4..f7a9e21 100644
--- a/bfd/libbfd-in.h
+++ b/bfd/libbfd-in.h
@@ -549,6 +549,10 @@ bfd_boolean _bfd_generic_find_line
extern bfd_boolean _bfd_dwarf2_find_inliner_info
(bfd *, const char **, const char **, unsigned int *, void **);
+/* Read DWARF 2 debugging information. */
+extern bfd_boolean _bfd_dwarf2_slurp_debug_info
+ (bfd *, bfd *, const struct dwarf_debug_section *, asymbol **, void **);
+
/* Clean up the data used to handle DWARF 2 debugging information. */
extern void _bfd_dwarf2_cleanup_debug_info
(bfd *, void **);
diff --git a/bfd/libbfd.h b/bfd/libbfd.h
index 0beddb6..a10a651 100644
--- a/bfd/libbfd.h
+++ b/bfd/libbfd.h
@@ -554,6 +554,10 @@ bfd_boolean _bfd_generic_find_line
extern bfd_boolean _bfd_dwarf2_find_inliner_info
(bfd *, const char **, const char **, unsigned int *, void **);
+/* Read DWARF 2 debugging information. */
+extern bfd_boolean _bfd_dwarf2_slurp_debug_info
+ (bfd *, bfd *, const struct dwarf_debug_section *, asymbol **, void **);
+
/* Clean up the data used to handle DWARF 2 debugging information. */
extern void _bfd_dwarf2_cleanup_debug_info
(bfd *, void **);
diff --git a/bfd/mach-o.c b/bfd/mach-o.c
index cc68d89..f46f94d 100644
--- a/bfd/mach-o.c
+++ b/bfd/mach-o.c
@@ -277,6 +277,9 @@ static const mach_o_segment_name_xlat segsec_names_xlat[] =
{ NULL, NULL }
};
+static const char dsym_subdir[] = ".dSYM/Contents/Resources/DWARF";
+static const int dsym_subdir_len = sizeof(dsym_subdir);
+
/* For both cases bfd-name => mach-o name and vice versa, the specific target
is checked before the generic. This allows a target (e.g. ppc for cstring)
to override the generic definition with a more specific one. */
@@ -3738,6 +3741,152 @@ bfd_mach_o_core_file_failing_signal (bfd *abfd
ATTRIBUTE_UNUSED)
return 0;
}
+static bfd_mach_o_uuid_command *
+bfd_mach_o_lookup_uuid_command (bfd *abfd)
+{
+ bfd_mach_o_load_command *uuid_cmd;
+ int ncmd = bfd_mach_o_lookup_command (abfd, BFD_MACH_O_LC_UUID, &uuid_cmd);
+ if (ncmd != 1)
+ return FALSE;
+ return &uuid_cmd->command.uuid;
+}
+
+static bfd_boolean
+bfd_mach_o_dsym_p (bfd *debug_bfd, bfd_mach_o_uuid_command *uuid_cmd)
+{
+ bfd_mach_o_uuid_command *debug_uuid_cmd;
+
+ BFD_ASSERT (debug_bfd);
+ BFD_ASSERT (uuid_cmd);
+
+ if (!bfd_check_format (debug_bfd, bfd_object))
+ return FALSE;
+
+ if (bfd_get_flavour (debug_bfd) != bfd_target_mach_o_flavour)
+ return FALSE;
+
+ debug_uuid_cmd = bfd_mach_o_lookup_uuid_command (debug_bfd);
+ if (debug_uuid_cmd == NULL)
+ return FALSE;
+
+ if (memcmp (uuid_cmd->uuid, debug_uuid_cmd->uuid,
+ sizeof (uuid_cmd->uuid)) != 0)
+ return FALSE;
+
+ return TRUE;
+}
+
+static bfd *
+bfd_mach_o_find_macosx_dsym_in_fat (bfd *fat_bfd,
+ bfd_mach_o_uuid_command *uuid_cmd)
+{
+ bfd *debug_bfd = NULL, *last_bfd = NULL;
+
+ BFD_ASSERT (fat_bfd);
+ BFD_ASSERT (uuid_cmd);
+
+ for (;;)
+ {
+ debug_bfd = bfd_mach_o_openr_next_archived_file (fat_bfd, debug_bfd);
+ if (debug_bfd == NULL)
+ break;
+
+ if (bfd_mach_o_dsym_p (debug_bfd, uuid_cmd))
+ break;
+
+ if (last_bfd != NULL)
+ bfd_close (last_bfd);
+ last_bfd = debug_bfd;
+ }
+
+ if (last_bfd != NULL)
+ bfd_close (last_bfd);
+ return debug_bfd;
+}
+
+static bfd *
+bfd_mach_o_find_macosx_dsym (bfd *abfd,
+ bfd_mach_o_uuid_command *uuid_cmd,
+ char *debug_filename)
+{
+ bfd *debug_bfd;
+ bfd_mach_o_data_struct *mdata;
+
+ BFD_ASSERT (abfd);
+ BFD_ASSERT (uuid_cmd);
+
+ debug_bfd = bfd_openr (debug_filename, NULL);
+ if (debug_bfd == NULL)
+ return NULL;
+
+ mdata = bfd_mach_o_get_data (abfd);
+
+ if (bfd_check_format (debug_bfd, bfd_archive))
+ {
+ bfd *r = bfd_mach_o_find_macosx_dsym_in_fat (debug_bfd, uuid_cmd);
+ if (r)
+ {
+ mdata->debug_filename = debug_filename;
+ mdata->debug_bfd = r;
+ mdata->debug_fat_bfd = debug_bfd;
+ }
+ return r;
+ }
+
+ if (bfd_mach_o_dsym_p (debug_bfd, uuid_cmd))
+ {
+ mdata->debug_filename = debug_filename;
+ mdata->debug_bfd = debug_bfd;
+ return debug_bfd;
+ }
+
+ bfd_close (debug_bfd);
+
+ return NULL;
+}
+
+static bfd *
+bfd_mach_o_follow_macosx_dsym (bfd *abfd)
+{
+ char *debug_filename;
+ bfd_mach_o_uuid_command *uuid_cmd;
+ bfd *debug_bfd, *base_bfd = abfd;
+ const char *base_basename;
+
+ if (abfd == NULL || bfd_get_flavour (abfd) != bfd_target_mach_o_flavour)
+ return NULL;
+
+ if (abfd->my_archive)
+ base_bfd = abfd->my_archive;
+ /* BFD may have been opened from a stream. */
+ if (base_bfd->filename == NULL)
+ {
+ bfd_set_error (bfd_error_invalid_operation);
+ return NULL;
+ }
+ base_basename = lbasename (base_bfd->filename);
+
+ uuid_cmd = bfd_mach_o_lookup_uuid_command (abfd);
+ if (uuid_cmd == NULL)
+ return NULL;
+
+ /* TODO: We assume the DWARF file has the same as the binary's.
+ It seems apple's GDB checks all files in the dSYM bundle directory.
+ http://opensource.apple.com/source/gdb/gdb-1708/src/gdb/macosx/macosx-tdep.c
+ */
+ debug_filename = (char *)bfd_malloc (strlen (base_bfd->filename)
+ + dsym_subdir_len + 1
+ + strlen (base_basename) + 1);
+ sprintf (debug_filename, "%s%s/%s",
+ base_bfd->filename, dsym_subdir, base_basename);
+
+ debug_bfd = bfd_mach_o_find_macosx_dsym (abfd, uuid_cmd, debug_filename);
+ if (debug_bfd == NULL)
+ free (debug_filename);
+
+ return debug_bfd;
+}
+
bfd_boolean
bfd_mach_o_find_nearest_line (bfd *abfd,
asection *section,
@@ -3748,9 +3897,30 @@ bfd_mach_o_find_nearest_line (bfd *abfd,
unsigned int *line_ptr)
{
bfd_mach_o_data_struct *mdata = bfd_mach_o_get_data (abfd);
- /* TODO: Handle executables and dylibs by using dSYMs. */
- if (mdata->header.filetype != BFD_MACH_O_MH_OBJECT)
+ if (mdata == NULL)
return FALSE;
+ switch (mdata->header.filetype)
+ {
+ case BFD_MACH_O_MH_OBJECT:
+ break;
+ case BFD_MACH_O_MH_EXECUTE:
+ case BFD_MACH_O_MH_DYLIB:
+ case BFD_MACH_O_MH_BUNDLE:
+ case BFD_MACH_O_MH_KEXT_BUNDLE:
+ if (mdata->dwarf2_find_line_info == NULL)
+ {
+ bfd *debug_bfd = bfd_mach_o_follow_macosx_dsym (abfd);
+ if (! debug_bfd)
+ return FALSE;
+ if (! _bfd_dwarf2_slurp_debug_info (abfd, debug_bfd,
+ dwarf_debug_sections, symbols,
+ &mdata->dwarf2_find_line_info))
+ return FALSE;
+ }
+ break;
+ default:
+ return FALSE;
+ }
if (_bfd_dwarf2_find_nearest_line (abfd, dwarf_debug_sections,
section, symbols, offset,
filename_ptr, functionname_ptr,
@@ -3768,6 +3938,21 @@ bfd_mach_o_close_and_cleanup (bfd *abfd)
{
_bfd_dwarf2_cleanup_debug_info (abfd, &mdata->dwarf2_find_line_info);
bfd_mach_o_free_cached_info (abfd);
+ if (mdata->debug_bfd != NULL)
+ {
+ bfd_close (mdata->debug_bfd);
+ mdata->debug_bfd = NULL;
+ }
+ if (mdata->debug_fat_bfd != NULL)
+ {
+ bfd_close (mdata->debug_fat_bfd);
+ mdata->debug_fat_bfd = NULL;
+ }
+ if (mdata->debug_filename != NULL)
+ {
+ free (mdata->debug_filename);
+ mdata->debug_filename = NULL;
+ }
}
return _bfd_generic_close_and_cleanup (abfd);
diff --git a/bfd/mach-o.h b/bfd/mach-o.h
index 89dce1a..c94dd55 100644
--- a/bfd/mach-o.h
+++ b/bfd/mach-o.h
@@ -520,6 +520,13 @@ typedef struct mach_o_data_struct
/* A place to stash dwarf2 info for this bfd. */
void *dwarf2_find_line_info;
+ /* Filename of .dSYM file. */
+ char *debug_filename;
+ /* BFD of .dSYM file. */
+ bfd *debug_bfd;
+ /* BFD of a fat binary which contains debug_bfd. */
+ bfd *debug_fat_bfd;
+
/* Cache of dynamic relocs. */
arelent *dyn_reloc_cache;
}