This is the mail archive of the
gdb-patches@sourceware.org
mailing list for the GDB project.
Re: [patch 6/6] gdbserver build-id attribute generator
- From: Jan Kratochvil <jan dot kratochvil at redhat dot com>
- To: Aleksandar Ristovski <aristovski at qnx dot com>
- Cc: "gdb-patches at sourceware dot org" <gdb-patches at sourceware dot org>
- Date: Tue, 26 Mar 2013 21:41:57 +0100
- Subject: Re: [patch 6/6] gdbserver build-id attribute generator
- References: <51278984 dot 3070208 at qnx dot com> <20130310210843 dot GG21130 at host2 dot jankratochvil dot net> <514C56D4 dot 1060906 at qnx dot com>
On Fri, 22 Mar 2013 14:04:20 +0100, Aleksandar Ristovski wrote:
> diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo
> index 4ac28bb..5ff9e95 100644
> --- a/gdb/doc/gdb.texinfo
> +++ b/gdb/doc/gdb.texinfo
> @@ -40404,6 +40404,9 @@ memory address. It is a displacement of absolute memory address against
> address the file was prelinked to during the library load.
> @item
> @code{l_ld}, which is memory address of the @code{PT_DYNAMIC} segment
> +@item
> +@code{build-id}, hex encoded @code{.note.gnu.build-id} section, if such
> +section exists.
@code{build-id}, hex encoded @code{NT_GNU_BUILD_ID} note, if such
@code{PT_NOTE} exists.
This describes what gdbserver sends, and gdbserver may even have a binary with
no section table therein. segments == runtime, sections == link time.
> @end itemize
>
> Additionally the single @code{main-lm} attribute specifies address of
> @@ -40421,7 +40424,7 @@ looks like this:
> <library name="/lib/ld-linux.so.2" lm="0xe4f51c" l_addr="0xe2d000"
> l_ld="0xe4eefc"/>
> <library name="/lib/libc.so.6" lm="0xe4fbe8" l_addr="0x154000"
> - l_ld="0x152350"/>
> + l_ld="0x152350" build-id="9afccf7cc41e6293476223fe72480854"/>
> </library-list-svr>
> @end smallexample
>
> @@ -40437,6 +40440,7 @@ The format of an SVR4 library list is described by this DTD:
> <!ATTLIST library lm CDATA #REQUIRED>
> <!ATTLIST library l_addr CDATA #REQUIRED>
> <!ATTLIST library l_ld CDATA #REQUIRED>
Please reindent these entries to match their columns like:
<!ATTLIST library l_addr CDATA #REQUIRED>
<!ATTLIST library l_ld CDATA #REQUIRED>
> +<!ATTLIST library build-id CDATA #IMPLIED>
> @end smallexample
>
> @node Memory Map Format
> diff --git a/gdb/features/library-list-svr4.dtd b/gdb/features/library-list-svr4.dtd
> index cae7fd8..e4409ba 100644
> --- a/gdb/features/library-list-svr4.dtd
> +++ b/gdb/features/library-list-svr4.dtd
> @@ -14,3 +14,4 @@
> <!ATTLIST library lm CDATA #REQUIRED>
> <!ATTLIST library l_addr CDATA #REQUIRED>
> <!ATTLIST library l_ld CDATA #REQUIRED>
Again the reindentation.
> +<!ATTLIST library build-id CDATA #IMPLIED>
> diff --git a/gdb/gdbserver/linux-low.c b/gdb/gdbserver/linux-low.c
> index 523926d..8bbb5ba 100644
> --- a/gdb/gdbserver/linux-low.c
> +++ b/gdb/gdbserver/linux-low.c
> @@ -20,6 +20,7 @@
> #include "linux-low.h"
> #include "linux-osdata.h"
> #include "agent.h"
> +#include "linux-maps.h"
>
> #include "gdb_wait.h"
> #include <stdio.h>
> @@ -43,6 +44,7 @@
> #include "gdb_stat.h"
> #include <sys/vfs.h>
> #include <sys/uio.h>
> +#include <search.h>
> #ifndef ELFMAG0
> /* Don't include <linux/elf.h> here. If it got included by gdb_proc_service.h
> then ELFMAG0 will have been defined. If it didn't get included by
> @@ -5643,6 +5645,265 @@ struct link_map_offsets
> int l_prev_offset;
> };
>
> +
> +/* Structure for holding all mappings. Only mapping
s/all mappings/one mapping/.
> + containing l_ld can have hex_build_id set. */
> +
> +struct build_id_list
Calling it "*_list" is confusing, it is only one entry. VEC makes it a list.
Moreover I would find more appropriate something like "mapping_entry", there
will be many such structs with no relevant build-id.
> +{
Each field should have a comment. So you could remove the empty lines and say
they are all stored from the parameters of linux_find_memory_region_ftype.
> + ULONGEST vaddr;
> +
> + ULONGEST size;
> +
> + ULONGEST offset;
offset is not used so it could be removed - but in fact it should be used,
described later.
> +
> + ULONGEST inode;
> +
> + const char *filename;
It is not used anywhere, you do not need to store it at all.
> + int read:1;
It should be 'unsigned read : 1;', otherwise true value gets read as -1 (which
may not hurt but it is just not great). Also see the ' : 1' spacing (I do not
see it described anywhere but it is so in GDB)..
> + int write:1;
> + int exec:1;
> + int modified:1;
3x likewise.
But now I see any of read/write/exec/modified is not used so they should be
removed.
> +
> + /* build_id is hex encoded string allocated using malloc, and
> + needs to be freed. */
s/build_id/hex_build_id/ although one does not need to repeat the name here.
s/malloc/xmalloc/
Add: It may be NULL.
> +
> + char *hex_build_id;
> +};
> +
> +typedef struct build_id_list build_id_list_s;
> +
> +DEF_VEC_O(build_id_list_s);
> +
> +static void
> +free_build_id_list (VEC (build_id_list_s) *lst)
> +{
> + if (VEC_length (build_id_list_s, lst))
This condition can be removed, VEC_iterate works even if VEC_length == 0.
> + {
> + int ix;
> + build_id_list_s *p;
> +
> + for (ix = 0; VEC_iterate (build_id_list_s, lst, ix, p); ++ix)
> + xfree (p->hex_build_id);
> + }
> +
> + VEC_free (build_id_list_s, lst);
> +}
> +
> +/* Used for qsort-ing list by vaddr. */
> +
> +static int
> +compare_build_id_list (const void *const b1,
> + const void *const b2)
Formatting - it can be:
compare_build_id_list (const void *const b1, const void *const b2)
> +{
> + const build_id_list_s *const p1 = b1;
> + const build_id_list_s *const p2 = b2;
> +
> + if (p1->vaddr > p2->vaddr)
> + return 1;
> + if (p1->vaddr < p2->vaddr)
> + return -1;
> + return 0;
> +}
> +
> +/* Used for finding a mapping containing the given
> + l_ld passed in K. */
> +
> +static int
> +compare_build_id_list_range (const void *const k,
> + const void *const b)
Formatting:
compare_build_id_list_range (const void *const k, const void *const b)
> +{
> + const ULONGEST key = *(CORE_ADDR*) k;
> + const build_id_list_s *const p = b;
> +
> + if (key < p->vaddr)
> + return -1;
> +
> + if (key < p->vaddr + p->size)
> + return 0;
> +
> + return 1;
> +}
> +
> +/* Used for linear search of the lowest vaddr for the given
> + inode. */
> +
> +static int
> +compare_build_id_list_inode (const void *const k, const void *const b)
> +{
> + const ULONGEST key = *(ULONGEST*)k;
GNU Coding Standards formatting:
const ULONGEST key = *(ULONGEST *) k;
> + const build_id_list_s *const p = b;
> +
> + return !(key == p->inode);
> +}
> +
> +struct find_memory_region_callback_data {
Formatting:
struct find_memory_region_callback_data
{
> + unsigned is_elf64;
> +
> + /* Return. Ordered list of all object mappings sorted in
> + ascending order by VADDR. Must be freed with free_build_id_list. */
> + VEC (build_id_list_s) *list;
> +};
> +
> +static linux_find_memory_region_ftype find_memory_region_callback;
> +
> +/* Read .note.gnu.build-id from PT_NOTE. */
> +
> +static void
> +read_build_id (struct find_memory_region_callback_data *const p,
> + build_id_list_s *const bil, const CORE_ADDR load_addr,
> + const CORE_ADDR l_addr)
> +{
> + union ElfXX_Ehdr
> + {
> + Elf32_Ehdr _32;
> + Elf64_Ehdr _64;
> + } ehdr;
> + union ElfXX_Phdr
> + {
> + Elf32_Phdr _32;
> + Elf64_Phdr _64;
> + } phdr;
> + union ElfXX_Nhdr
> + {
> + Elf32_Nhdr _32;
> + Elf64_Nhdr _64;
> + } *nhdr;
get_dynamic already parses/scans PHDRs, you introduce new PHDR parser, they
should be merged.
> +#define HDR(hdr, fld) (((p)->is_elf64)? (hdr)._64.fld : (hdr)._32.fld)
> +#define SIZEOFHDR(hdr) (((p)->is_elf64)?sizeof((hdr)._64):sizeof((hdr)._32))
> + if (linux_read_memory (load_addr, (unsigned char *) &ehdr, SIZEOFHDR (ehdr))
> + == 0
> + && HDR (ehdr, e_ident[EI_MAG0]) == ELFMAG0
> + && HDR (ehdr, e_ident[EI_MAG1]) == ELFMAG1
> + && HDR (ehdr, e_ident[EI_MAG2]) == ELFMAG2
> + && HDR (ehdr, e_ident[EI_MAG3]) == ELFMAG3)
> + {
> + unsigned i;
> +
> + for (i = 0; i < HDR (ehdr, e_phnum); ++i)
> + {
> + if (linux_read_memory (load_addr + HDR (ehdr, e_phoff)
> + + HDR (ehdr, e_phentsize) * i,
> + (unsigned char *) &phdr,
> + HDR (ehdr, e_phentsize)) != 0)
> + {
> + warning ("Could not read program header.");
> + break;
> + }
> + if (HDR (phdr, p_type) == PT_NOTE)
> + {
> + void *const pt_note = xmalloc (HDR (phdr, p_memsz));
> + const void *const pt_end
> + = (char*)pt_note + HDR (phdr, p_memsz);
GNU Coding Standard formatting:
= (char *) pt_note + HDR (phdr, p_memsz);
> +
> + if (linux_read_memory (HDR (phdr, p_vaddr) + l_addr,
> + pt_note, HDR (phdr, p_memsz)) != 0)
> + {
> + xfree (pt_note);
> + warning ("Could not read note.");
> + break;
> + }
> +
> + for (nhdr = pt_note; (void*)nhdr < pt_end;)
GNU Coding Standard formatting:
for (nhdr = pt_note; (void *) nhdr < pt_end;)
Although I would simplify it as:
for (nhdr = pt_note; (void *) nhdr < pt_end;
nhdr = (void *) nhdr + note_sz)
> + {
> + const size_t note_sz
> + = HDR (*nhdr, n_namesz) + HDR (*nhdr, n_descsz)
> + + SIZEOFHDR (*nhdr);
> +
> + if ((char*)nhdr + note_sz > (char*)pt_end)
GNU Coding Standard formatting:
if ((char *) nhdr + note_sz > (char *) pt_end)
Check also for NOTE_SZ == 0 as bin2hex could crash in such case.
> + {
> + warning ("Malformed PT_NOTE\n");
> + break;
> + }
> + if (HDR (*nhdr, n_type) == NT_GNU_BUILD_ID)
> + {
> + /* .note.gnu.build-id. */
GNU Coding Standard formatting:
> + /* .note.gnu.build-id. */
But this is not ".note.gnu.build-id", that would be a section name. In fact
just remove the comment, there is already NT_GNU_BUILD_ID above.
> + bil->hex_build_id = xmalloc (note_sz * 2 + 1);
> + bin2hex ((gdb_byte*)nhdr, bil->hex_build_id, note_sz);
GNU Coding Standard formatting:
bin2hex ((gdb_byte *) nhdr, bil->hex_build_id, note_sz);
> + xfree (pt_note);
> + return;
> + }
> + nhdr = (void*)((char *)nhdr + note_sz);
GNU Coding Standard formatting + simplification:
nhdr = (void *) nhdr + note_sz;
> + }
> + xfree (pt_note);
> + }
> + }
> + }
> + else
> + warning ("Reading build-id failed.");
> +#undef HDR
> +#undef SIZEOFHDR
> +}
> +
> +
> +/* Create list of build_id_list. */
Update build_id_list struct name.
> +
> +static int
> +find_memory_region_callback (ULONGEST vaddr, ULONGEST size, ULONGEST offset,
> + ULONGEST inode, int read, int write, int exec,
> + int modified, const char *filename, void *data)
> +{
> + if (inode != 0)
> + {
> + struct find_memory_region_callback_data *const p = data;
> + build_id_list_s bil;
> +
> + bil.vaddr = vaddr;
> + bil.size = size;
> + bil.inode = inode;
> + bil.read = !(read == 0);
> + bil.write = !(write == 0);
> + bil.exec = !(write == 0);
> + bil.modified = !(write == 0);
> + bil.filename = filename;
> + bil.hex_build_id = NULL;
> +
> + VEC_safe_push (build_id_list_s, p->list, &bil);
> + }
> +
/* Continue the traversal. */
> + return 0;
> +}
> +
> +/* Get build-id for the given L_LD. DATA must point to
> + already filled list of build_id_list elements.
Update build_id_list struct name.
> +
> + Return build_id as stored in the list element corresponding
> + to L_LD.
> +
> + NULL may be returned if build-id could not be fetched.
> +
> + Returned string must not be freed explicitly.
> +*/
GNU Coding Standards formatting:
Returned string must not be freed explicitly. */
> +
> +static const char *
> +get_hex_build_id (const CORE_ADDR l_addr, const CORE_ADDR l_ld,
> + struct find_memory_region_callback_data *const data)
> +{
> + build_id_list_s *const bil
> + = bsearch (&l_ld, VEC_address (build_id_list_s, data->list),
> + VEC_length (build_id_list_s, data->list),
> + sizeof (build_id_list_s), compare_build_id_list_range);
> +
> + if (bil == NULL)
> + return NULL;
> +
> + if (bil->hex_build_id == NULL)
> + {
> + CORE_ADDR load_addr;
> + size_t len = VEC_length (build_id_list_s, data->list);
> +
> + /* Must do linear search for the first mapping of this inode. */
You do not have to. One could search either for an entry containing:
searched->vaddr == bil->vaddr - bil->offset
or in practice it would be faster to do a linear search backwards from BIL
down as the offset == 0 entry will be at most the fourth one.
One can stop when the vaddr gets lower than bil->vaddr - bil->offset.
The way you implemented it the time complexity is quadratic. I do not insist
on a fix as it will be pretty fast but it would be nice to fix it.
> + build_id_list_s *const bil_min
> + = lfind (&bil->inode, VEC_address (build_id_list_s, data->list), &len,
> + sizeof (build_id_list_s), compare_build_id_list_inode);
> + gdb_assert (bil_min != NULL && "This should never happen.");
It can happen. Application can munmap first page of its shared library,
gdbserver should not crash on such application. Just do not read the build-id
in such case.
Here you should also verify (and ignore build-id for such entry if it fails
the verification) ->offset matches, that is:
bil_min->offset == 0
bil_min->vaddr + bil->offset == bil->vaddr
That is also more reliable than inode, inode should be compared with device
major/minor numbers but those are currently not passed to
find_memory_region_ftype. (Not asking for changing it.)
> + load_addr = bil_min->vaddr;
> + read_build_id (data, bil, load_addr, l_addr);
If the read fails it will be retried next time; but get_hex_build_id should
not be called for the same library twice anyway. Not asking for a change.
> + }
> +
> + return bil->hex_build_id;
> +}
> +
> /* Construct qXfer:libraries-svr4:read reply. */
>
> static int
> @@ -5655,6 +5916,8 @@ linux_qxfer_libraries_svr4 (const char *annex, unsigned char *readbuf,
> struct process_info_private *const priv = current_process ()->private;
> char filename[PATH_MAX];
> int pid, is_elf64;
> + struct find_memory_region_callback_data data;
> + int build_id_list_p;
>
> static const struct link_map_offsets lmo_32bit_offsets =
> {
> @@ -5690,6 +5953,19 @@ linux_qxfer_libraries_svr4 (const char *annex, unsigned char *readbuf,
> is_elf64 = elf_64_file_p (filename, &machine);
> lmo = is_elf64 ? &lmo_64bit_offsets : &lmo_32bit_offsets;
>
> + data.is_elf64 = is_elf64;
> + data.list = NULL;
> + VEC_reserve (build_id_list_s, data.list, 16);
> + build_id_list_p
I do not see why build_id_list_p exists at all. data.list just remains empty
and all the functions already cope with it OK.
> + = linux_find_memory_regions_full (
> + lwpid_of (get_thread_lwp (current_inferior)),
> + find_memory_region_callback, &data, NULL) >= 0;
You could rather just add a warning if linux_find_memory_regions_full
returns -1.
> +
> + if (build_id_list_p)
> + qsort (VEC_address (build_id_list_s, data.list),
> + VEC_length (build_id_list_s, data.list),
> + sizeof (build_id_list_s), compare_build_id_list);
It is always already sorted by Linux kernel, rather a for cycle to verify it
really is sorted.
> +
> if (priv->r_debug == 0)
> priv->r_debug = get_r_debug (pid, is_elf64);
>
> @@ -5764,6 +6040,7 @@ linux_qxfer_libraries_svr4 (const char *annex, unsigned char *readbuf,
> /* 6x the size for xml_escape_text below. */
> size_t len = 6 * strlen ((char *) libname);
> char *name;
> + const char *hex_enc_build_id = NULL;
>
> if (!header_done)
> {
> @@ -5772,21 +6049,29 @@ linux_qxfer_libraries_svr4 (const char *annex, unsigned char *readbuf,
> header_done = 1;
> }
>
> - while (allocated < p - document + len + 200)
> + name = xml_escape_text ((char *) libname);
> + if (build_id_list_p)
> + hex_enc_build_id = get_hex_build_id (l_addr, l_ld, &data);
> +
> + while (allocated < (p - document + len + 200
> + + (hex_enc_build_id != NULL
> + ? strlen (hex_enc_build_id) : 0)))
> {
> /* Expand to guarantee sufficient storage. */
> - uintptr_t document_len = p - document;
> + const ptrdiff_t document_len = p - document;
>
> - document = xrealloc (document, 2 * allocated);
> allocated *= 2;
> + document = xrealloc (document, allocated);
> p = document + document_len;
> }
>
> - name = xml_escape_text ((char *) libname);
> p += sprintf (p, "<library name=\"%s\" lm=\"0x%lx\" "
> - "l_addr=\"0x%lx\" l_ld=\"0x%lx\"/>",
> + "l_addr=\"0x%lx\" l_ld=\"0x%lx\"",
> name, (unsigned long) lm_addr,
> (unsigned long) l_addr, (unsigned long) l_ld);
> + if (hex_enc_build_id != NULL)
> + p += sprintf (p, " build-id=\"%s\"", hex_enc_build_id);
> + p += sprintf(p, "/>");
> free (name);
> }
> else if (lm_prev == 0)
> @@ -5821,6 +6106,7 @@ linux_qxfer_libraries_svr4 (const char *annex, unsigned char *readbuf,
>
> memcpy (readbuf, document + offset, len);
> xfree (document);
> + free_build_id_list (data.list);
>
> return len;
> }
> --
> 1.7.10.4
>
>
Thanks,
Jan