This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [patch 6/6] gdbserver build-id attribute generator


On Fri, 22 Mar 2013 14:04:20 +0100, Aleksandar Ristovski wrote:
> diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo
> index 4ac28bb..5ff9e95 100644
> --- a/gdb/doc/gdb.texinfo
> +++ b/gdb/doc/gdb.texinfo
> @@ -40404,6 +40404,9 @@ memory address.  It is a displacement of absolute memory address against
>  address the file was prelinked to during the library load.
>  @item
>  @code{l_ld}, which is memory address of the @code{PT_DYNAMIC} segment
> +@item

> +@code{build-id}, hex encoded @code{.note.gnu.build-id} section, if such
> +section exists.

@code{build-id}, hex encoded @code{NT_GNU_BUILD_ID} note, if such
@code{PT_NOTE} exists.

This describes what gdbserver sends, and gdbserver may even have a binary with
no section table therein.  segments == runtime, sections == link time.


>  @end itemize
>  
>  Additionally the single @code{main-lm} attribute specifies address of
> @@ -40421,7 +40424,7 @@ looks like this:
>    <library name="/lib/ld-linux.so.2" lm="0xe4f51c" l_addr="0xe2d000"
>             l_ld="0xe4eefc"/>
>    <library name="/lib/libc.so.6" lm="0xe4fbe8" l_addr="0x154000"
> -           l_ld="0x152350"/>
> +           l_ld="0x152350" build-id="9afccf7cc41e6293476223fe72480854"/>
>  </library-list-svr>
>  @end smallexample
>  
> @@ -40437,6 +40440,7 @@ The format of an SVR4 library list is described by this DTD:
>  <!ATTLIST library            lm      CDATA   #REQUIRED>
>  <!ATTLIST library            l_addr  CDATA   #REQUIRED>
>  <!ATTLIST library            l_ld    CDATA   #REQUIRED>

Please reindent these entries to match their columns like:

   <!ATTLIST library            l_addr   CDATA  #REQUIRED>
   <!ATTLIST library            l_ld     CDATA  #REQUIRED>
> +<!ATTLIST library            build-id CDATA  #IMPLIED>

>  @end smallexample
>  
>  @node Memory Map Format
> diff --git a/gdb/features/library-list-svr4.dtd b/gdb/features/library-list-svr4.dtd
> index cae7fd8..e4409ba 100644
> --- a/gdb/features/library-list-svr4.dtd
> +++ b/gdb/features/library-list-svr4.dtd
> @@ -14,3 +14,4 @@
>  <!ATTLIST library            lm      CDATA   #REQUIRED>
>  <!ATTLIST library            l_addr  CDATA   #REQUIRED>
>  <!ATTLIST library            l_ld    CDATA   #REQUIRED>

Again the reindentation.

> +<!ATTLIST library            build-id CDATA  #IMPLIED>
> diff --git a/gdb/gdbserver/linux-low.c b/gdb/gdbserver/linux-low.c
> index 523926d..8bbb5ba 100644
> --- a/gdb/gdbserver/linux-low.c
> +++ b/gdb/gdbserver/linux-low.c
> @@ -20,6 +20,7 @@
>  #include "linux-low.h"
>  #include "linux-osdata.h"
>  #include "agent.h"
> +#include "linux-maps.h"
>  
>  #include "gdb_wait.h"
>  #include <stdio.h>
> @@ -43,6 +44,7 @@
>  #include "gdb_stat.h"
>  #include <sys/vfs.h>
>  #include <sys/uio.h>
> +#include <search.h>
>  #ifndef ELFMAG0
>  /* Don't include <linux/elf.h> here.  If it got included by gdb_proc_service.h
>     then ELFMAG0 will have been defined.  If it didn't get included by
> @@ -5643,6 +5645,265 @@ struct link_map_offsets
>      int l_prev_offset;
>    };
>  
> +
> +/* Structure for holding all mappings.  Only mapping

s/all mappings/one mapping/.

> +   containing l_ld can have hex_build_id set.  */
> +
> +struct build_id_list

Calling it "*_list" is confusing, it is only one entry.  VEC makes it a list.
Moreover I would find more appropriate something like "mapping_entry", there
will be many such structs with no relevant build-id.


> +{

Each field should have a comment.  So you could remove the empty lines and say
they are all stored from the parameters of linux_find_memory_region_ftype.


> +  ULONGEST vaddr;
> +
> +  ULONGEST size;
> +
> +  ULONGEST offset;

offset is not used so it could be removed - but in fact it should be used,
described later.


> +
> +  ULONGEST inode;
> +
> +  const char *filename;

It is not used anywhere, you do not need to store it at all.


> +  int read:1;

It should be 'unsigned read : 1;', otherwise true value gets read as -1 (which
may not hurt but it is just not great).  Also see the ' : 1' spacing (I do not
see it described anywhere but it is so in GDB)..


> +  int write:1;
> +  int exec:1;
> +  int modified:1;

3x likewise.

But now I see any of read/write/exec/modified is not used so they should be
removed.


> +
> +  /* build_id is hex encoded string allocated using malloc, and
> +     needs to be freed.  */

s/build_id/hex_build_id/ although one does not need to repeat the name here.
s/malloc/xmalloc/
Add: It may be NULL.

> +
> +  char *hex_build_id;
> +};
> +
> +typedef struct build_id_list build_id_list_s;
> +
> +DEF_VEC_O(build_id_list_s);
> +
> +static void
> +free_build_id_list (VEC (build_id_list_s) *lst)
> +{
> +  if (VEC_length (build_id_list_s, lst))

This condition can be removed, VEC_iterate works even if VEC_length == 0.


> +    {
> +      int ix;
> +      build_id_list_s *p;
> +
> +      for (ix = 0; VEC_iterate (build_id_list_s, lst, ix, p); ++ix)
> +	xfree (p->hex_build_id);
> +    }
> +
> +  VEC_free (build_id_list_s, lst);
> +}
> +
> +/* Used for qsort-ing list by vaddr.  */
> +
> +static int
> +compare_build_id_list (const void *const b1,
> +		       const void *const b2)

Formatting - it can be:

compare_build_id_list (const void *const b1, const void *const b2)


> +{
> +  const build_id_list_s *const p1 = b1;
> +  const build_id_list_s *const p2 = b2;
> +
> +  if (p1->vaddr > p2->vaddr)
> +    return 1;
> +  if (p1->vaddr < p2->vaddr)
> +    return -1;
> +  return 0;
> +}
> +
> +/* Used for finding a mapping containing the given
> +   l_ld passed in K.  */
> +
> +static int
> +compare_build_id_list_range (const void *const k,
> +			     const void *const b)

Formatting:

compare_build_id_list_range (const void *const k, const void *const b)


> +{
> +  const ULONGEST key = *(CORE_ADDR*) k;
> +  const build_id_list_s *const p = b;
> +
> +  if (key < p->vaddr)
> +    return -1;
> +
> +  if (key < p->vaddr + p->size)
> +    return 0;
> +
> +  return 1;
> +}
> +
> +/* Used for linear search of the lowest vaddr for the given
> +   inode.  */
> +
> +static int
> +compare_build_id_list_inode (const void *const k, const void *const b)
> +{
> +  const ULONGEST key = *(ULONGEST*)k;

GNU Coding Standards formatting:

const ULONGEST key = *(ULONGEST *) k;


> +  const build_id_list_s *const p = b;
> +
> +  return !(key == p->inode);
> +}
> +
> +struct find_memory_region_callback_data {

Formatting:

struct find_memory_region_callback_data
{

> +  unsigned is_elf64;
> +
> +  /* Return.  Ordered list of all object mappings sorted in
> +     ascending order by VADDR.  Must be freed with free_build_id_list.  */
> +  VEC (build_id_list_s) *list;
> +};
> +
> +static linux_find_memory_region_ftype find_memory_region_callback;
> +
> +/* Read .note.gnu.build-id from PT_NOTE.  */
> +
> +static void
> +read_build_id (struct find_memory_region_callback_data *const p,
> +	       build_id_list_s *const bil, const CORE_ADDR load_addr,
> +	       const CORE_ADDR l_addr)
> +{
> +  union ElfXX_Ehdr
> +    {
> +      Elf32_Ehdr _32;
> +      Elf64_Ehdr _64;
> +    } ehdr;
> +  union ElfXX_Phdr
> +    {
> +      Elf32_Phdr _32;
> +      Elf64_Phdr _64;
> +    } phdr;
> +  union ElfXX_Nhdr
> +    {
> +      Elf32_Nhdr _32;
> +      Elf64_Nhdr _64;
> +    } *nhdr;

get_dynamic already parses/scans PHDRs, you introduce new PHDR parser, they
should be merged.


> +#define HDR(hdr, fld) (((p)->is_elf64)? (hdr)._64.fld : (hdr)._32.fld)
> +#define SIZEOFHDR(hdr) (((p)->is_elf64)?sizeof((hdr)._64):sizeof((hdr)._32))
> +  if (linux_read_memory (load_addr, (unsigned char *) &ehdr, SIZEOFHDR (ehdr))
> +      == 0
> +      && HDR (ehdr, e_ident[EI_MAG0]) == ELFMAG0
> +      && HDR (ehdr, e_ident[EI_MAG1]) == ELFMAG1
> +      && HDR (ehdr, e_ident[EI_MAG2]) == ELFMAG2
> +      && HDR (ehdr, e_ident[EI_MAG3]) == ELFMAG3)
> +    {
> +      unsigned i;
> +
> +      for (i = 0; i < HDR (ehdr, e_phnum); ++i)
> +	{
> +	  if (linux_read_memory (load_addr + HDR (ehdr, e_phoff)
> +				 + HDR (ehdr, e_phentsize) * i,
> +				 (unsigned char *) &phdr,
> +				 HDR (ehdr, e_phentsize)) != 0)
> +	    {
> +	      warning ("Could not read program header.");
> +	      break;
> +	    }
> +	  if (HDR (phdr, p_type) == PT_NOTE)
> +	    {
> +	      void *const pt_note = xmalloc (HDR (phdr, p_memsz));
> +	      const void *const pt_end

> +		= (char*)pt_note + HDR (phdr, p_memsz);

GNU Coding Standard formatting:
   		= (char *) pt_note + HDR (phdr, p_memsz);


> +
> +	      if (linux_read_memory (HDR (phdr, p_vaddr) + l_addr,
> +				     pt_note, HDR (phdr, p_memsz)) != 0)
> +		{
> +		  xfree (pt_note);
> +		  warning ("Could not read note.");
> +		  break;
> +		}
> +

> +	      for (nhdr = pt_note; (void*)nhdr < pt_end;)
GNU Coding Standard formatting:
   	      for (nhdr = pt_note; (void *) nhdr < pt_end;)

Although I would simplify it as:
   	      for (nhdr = pt_note; (void *) nhdr < pt_end;
	           nhdr = (void *) nhdr + note_sz)

> +		{
> +		  const size_t note_sz
> +		    = HDR (*nhdr, n_namesz) + HDR (*nhdr, n_descsz)
> +			   + SIZEOFHDR (*nhdr);
> +

> +		  if ((char*)nhdr + note_sz > (char*)pt_end)

GNU Coding Standard formatting:
   		  if ((char *) nhdr + note_sz > (char *) pt_end)

Check also for NOTE_SZ == 0 as bin2hex could crash in such case.


> +		    {
> +		      warning ("Malformed PT_NOTE\n");
> +		      break;
> +		    }
> +		  if (HDR (*nhdr, n_type) == NT_GNU_BUILD_ID)
> +		    {
> +		      /* .note.gnu.build-id. */

GNU Coding Standard formatting:
> +		      /* .note.gnu.build-id.  */

But this is not ".note.gnu.build-id", that would be a section name.  In fact
just remove the comment, there is already NT_GNU_BUILD_ID above.


> +		      bil->hex_build_id = xmalloc (note_sz * 2 + 1);

> +		      bin2hex ((gdb_byte*)nhdr, bil->hex_build_id, note_sz);
GNU Coding Standard formatting:
   		      bin2hex ((gdb_byte *) nhdr, bil->hex_build_id, note_sz);


> +		      xfree (pt_note);
> +		      return;
> +		    }

> +		  nhdr = (void*)((char *)nhdr + note_sz);
GNU Coding Standard formatting + simplification:
   		  nhdr = (void *) nhdr + note_sz;

> +		}
> +	      xfree (pt_note);
> +	    }
> +	}
> +    }
> +  else
> +    warning ("Reading build-id failed.");
> +#undef HDR
> +#undef SIZEOFHDR
> +}
> +
> +
> +/* Create list of build_id_list.  */

Update build_id_list struct name.



> +
> +static int
> +find_memory_region_callback (ULONGEST vaddr, ULONGEST size, ULONGEST offset,
> +			     ULONGEST inode, int read, int write, int exec,
> +			     int modified, const char *filename, void *data)
> +{
> +  if (inode != 0)
> +    {
> +      struct find_memory_region_callback_data *const p = data;
> +      build_id_list_s bil;
> +
> +      bil.vaddr = vaddr;
> +      bil.size = size;
> +      bil.inode = inode;
> +      bil.read = !(read == 0);
> +      bil.write = !(write == 0);
> +      bil.exec = !(write == 0);
> +      bil.modified = !(write == 0);
> +      bil.filename = filename;
> +      bil.hex_build_id = NULL;
> +
> +      VEC_safe_push (build_id_list_s, p->list, &bil);
> +    }
> +

/* Continue the traversal.  */

> +  return 0;
> +}
> +
> +/* Get build-id for the given L_LD.  DATA must point to
> +   already filled list of build_id_list elements.

Update build_id_list struct name.


> +
> +   Return build_id as stored in the list element corresponding
> +   to L_LD.
> +
> +   NULL may be returned if build-id could not be fetched.
> +

> +   Returned string must not be freed explicitly.
> +*/

GNU Coding Standards formatting:
      Returned string must not be freed explicitly.  */


> +
> +static const char *
> +get_hex_build_id (const CORE_ADDR l_addr, const CORE_ADDR l_ld,
> +		  struct find_memory_region_callback_data *const data)
> +{
> +  build_id_list_s *const bil
> +    = bsearch (&l_ld, VEC_address (build_id_list_s, data->list),
> +	       VEC_length (build_id_list_s, data->list),
> +	       sizeof (build_id_list_s), compare_build_id_list_range);
> +
> +  if (bil == NULL)
> +    return NULL;
> +
> +  if (bil->hex_build_id == NULL)
> +    {
> +      CORE_ADDR load_addr;
> +      size_t len = VEC_length (build_id_list_s, data->list);
> +
> +      /* Must do linear search for the first mapping of this inode.  */

You do not have to.  One could search either for an entry containing:
	searched->vaddr == bil->vaddr - bil->offset
or in practice it would be faster to do a linear search backwards from BIL
down as the offset == 0 entry will be at most the fourth one.
One can stop when the vaddr gets lower than bil->vaddr - bil->offset.

The way you implemented it the time complexity is quadratic.  I do not insist
on a fix as it will be pretty fast but it would be nice to fix it.


> +      build_id_list_s *const bil_min
> +	= lfind (&bil->inode, VEC_address (build_id_list_s, data->list), &len,
> +		 sizeof (build_id_list_s), compare_build_id_list_inode);
> +      gdb_assert (bil_min != NULL && "This should never happen.");

It can happen.  Application can munmap first page of its shared library,
gdbserver should not crash on such application.  Just do not read the build-id
in such case.


Here you should also verify (and ignore build-id for such entry if it fails
the verification) ->offset matches, that is:
	bil_min->offset == 0
	bil_min->vaddr + bil->offset == bil->vaddr

That is also more reliable than inode, inode should be compared with device
major/minor numbers but those are currently not passed to
find_memory_region_ftype.  (Not asking for changing it.)


> +      load_addr = bil_min->vaddr;
> +      read_build_id (data, bil, load_addr, l_addr);

If the read fails it will be retried next time; but get_hex_build_id should
not be called for the same library twice anyway.  Not asking for a change.


> +    }
> +
> +  return bil->hex_build_id;
> +}
> +
>  /* Construct qXfer:libraries-svr4:read reply.  */
>  
>  static int
> @@ -5655,6 +5916,8 @@ linux_qxfer_libraries_svr4 (const char *annex, unsigned char *readbuf,
>    struct process_info_private *const priv = current_process ()->private;
>    char filename[PATH_MAX];
>    int pid, is_elf64;
> +  struct find_memory_region_callback_data data;
> +  int build_id_list_p;
>  
>    static const struct link_map_offsets lmo_32bit_offsets =
>      {
> @@ -5690,6 +5953,19 @@ linux_qxfer_libraries_svr4 (const char *annex, unsigned char *readbuf,
>    is_elf64 = elf_64_file_p (filename, &machine);
>    lmo = is_elf64 ? &lmo_64bit_offsets : &lmo_32bit_offsets;
>  
> +  data.is_elf64 = is_elf64;
> +  data.list = NULL;
> +  VEC_reserve (build_id_list_s, data.list, 16);
> +  build_id_list_p

I do not see why build_id_list_p exists at all.  data.list just remains empty
and all the functions already cope with it OK.


> +    = linux_find_memory_regions_full (
> +	      lwpid_of (get_thread_lwp (current_inferior)),
> +	      find_memory_region_callback, &data, NULL) >= 0;

You could rather just add a warning if linux_find_memory_regions_full
returns -1.


> +
> +  if (build_id_list_p)
> +    qsort (VEC_address (build_id_list_s, data.list),
> +	   VEC_length (build_id_list_s, data.list),
> +	   sizeof (build_id_list_s), compare_build_id_list);

It is always already sorted by Linux kernel, rather a for cycle to verify it
really is sorted.


> +
>    if (priv->r_debug == 0)
>      priv->r_debug = get_r_debug (pid, is_elf64);
>  
> @@ -5764,6 +6040,7 @@ linux_qxfer_libraries_svr4 (const char *annex, unsigned char *readbuf,
>  	      /* 6x the size for xml_escape_text below.  */
>  	      size_t len = 6 * strlen ((char *) libname);
>  	      char *name;
> +	      const char *hex_enc_build_id = NULL;
>  
>  	      if (!header_done)
>  		{
> @@ -5772,21 +6049,29 @@ linux_qxfer_libraries_svr4 (const char *annex, unsigned char *readbuf,
>  		  header_done = 1;
>  		}
>  
> -	      while (allocated < p - document + len + 200)
> +	      name = xml_escape_text ((char *) libname);
> +	      if (build_id_list_p)
> +		hex_enc_build_id = get_hex_build_id (l_addr, l_ld, &data);
> +
> +	      while (allocated < (p - document + len + 200
> +				  + (hex_enc_build_id != NULL
> +				     ? strlen (hex_enc_build_id) : 0)))
>  		{
>  		  /* Expand to guarantee sufficient storage.  */
> -		  uintptr_t document_len = p - document;
> +		  const ptrdiff_t document_len = p - document;
>  
> -		  document = xrealloc (document, 2 * allocated);
>  		  allocated *= 2;
> +		  document = xrealloc (document, allocated);
>  		  p = document + document_len;
>  		}
>  
> -	      name = xml_escape_text ((char *) libname);
>  	      p += sprintf (p, "<library name=\"%s\" lm=\"0x%lx\" "
> -			       "l_addr=\"0x%lx\" l_ld=\"0x%lx\"/>",
> +			       "l_addr=\"0x%lx\" l_ld=\"0x%lx\"",
>  			    name, (unsigned long) lm_addr,
>  			    (unsigned long) l_addr, (unsigned long) l_ld);
> +	      if (hex_enc_build_id != NULL)
> +		p += sprintf (p, " build-id=\"%s\"", hex_enc_build_id);
> +	      p += sprintf(p, "/>");
>  	      free (name);
>  	    }
>  	  else if (lm_prev == 0)
> @@ -5821,6 +6106,7 @@ linux_qxfer_libraries_svr4 (const char *annex, unsigned char *readbuf,
>  
>    memcpy (readbuf, document + offset, len);
>    xfree (document);
> +  free_build_id_list (data.list);
>  
>    return len;
>  }
> -- 
> 1.7.10.4
> 
> 


Thanks,
Jan


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]