This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[rft/symtab] Check the sizes of minimal symbols


This is something I've been meaning to do for years, on general principle.
Much to my surprise it did very good things (~ 15% fails fixed) for my
native amd64 test run, too, so I have a good example to illustrate with.

In ELF, one of the properties of a symbol is its size.  If left unspecified,
the size is treated as zero.  Some hand-written code may lack .size, but
most compiled code will have it.

One place where this information is very useful is in stripped libraries.
Here's a sample of an unstripped libc.so.6, as available on Debian/amd64
when GDB is configured to search for .gnu_debuglink files in the right
place (/usr/lib/debug):

0x2fe70 <killpg>:       test   %edi,%edi
0x2fe72 <killpg+2>:     js     0x2fe7b <killpg+11>
0x2fe74 <killpg+4>:     neg    %edi
0x2fe76 <killpg+6>:     jmpq   0x30180 <kill>
0x2fe7b <killpg+11>:    mov    2133958(%rip),%rax        # 0x238e48 <svcauthsw+936>
0x2fe82 <killpg+18>:    movl   $0x16,%fs:(%rax)
0x2fe89 <killpg+25>:    mov    $0xffffffff,%eax
0x2fe8e <killpg+30>:    retq   
0x2fe8f <killpg+31>:    nop    
0x2fe90 <__restore_rt>: mov    $0xf,%rax
0x2fe97 <__restore_rt+7>:       syscall 
0x2fe99 <__restore_rt+9>:       data16
0x2fe9a <__restore_rt+10>:      data16
0x2fe9b <__restore_rt+11>:      data16
0x2fe9c <__restore_rt+12>:      nop    
0x2fe9d <__restore_rt+13>:      data16
0x2fe9e <__restore_rt+14>:      data16
0x2fe9f <__restore_rt+15>:      nop    
0x2fea0 <__libc_sigaction>:     push   %r12

Here's without the separate debug info:

0x2fe70 <killpg>:       test   %edi,%edi
0x2fe72 <killpg+2>:     js     0x2fe7b <killpg+11>
0x2fe74 <killpg+4>:     neg    %edi
0x2fe76 <killpg+6>:     jmpq   0x30180 <kill>
0x2fe7b <killpg+11>:    mov    2133958(%rip),%rax        # 0x238e48 <_IO_file_jumps+1576>
0x2fe82 <killpg+18>:    movl   $0x16,%fs:(%rax)
0x2fe89 <killpg+25>:    mov    $0xffffffff,%eax
0x2fe8e <killpg+30>:    retq   
0x2fe8f <killpg+31>:    nop    
0x2fe90 <killpg+32>:    mov    $0xf,%rax
0x2fe97 <killpg+39>:    syscall 
0x2fe99 <killpg+41>:    data16
0x2fe9a <killpg+42>:    data16
0x2fe9b <killpg+43>:    data16
0x2fe9c <killpg+44>:    nop    
0x2fe9d <killpg+45>:    data16
0x2fe9e <killpg+46>:    data16
0x2fe9f <killpg+47>:    nop    
0x2fea0 <killpg+48>:    push   %r12

Oops!  GDB already knew to look for __restore_rt when it appeared to be part
of sigaction, because they're in the same file; but recent versions of GCC
reorder sigaction and __restore_rt within that file.  So it appears to be
part of the previous function, and we fail to recognize it as a trampoline.
Not to mention we might try to do prologue analysis on it based on the start
of killpg.  But here's with my current patch applied:

0x2fe70 <killpg>:       test   %edi,%edi
0x2fe72 <killpg+2>:     js     0x2fe7b <killpg+11>
0x2fe74 <killpg+4>:     neg    %edi
0x2fe76 <killpg+6>:     jmpq   0x30180 <kill>
0x2fe7b <killpg+11>:    mov    2133958(%rip),%rax        # 0x238e48
0x2fe82 <killpg+18>:    movl   $0x16,%fs:(%rax)
0x2fe89 <killpg+25>:    mov    $0xffffffff,%eax
0x2fe8e <killpg+30>:    retq   
0x2fe8f:        nop    
0x2fe90:        mov    $0xf,%rax
0x2fe97:        syscall 
0x2fe99:        xchg   %ax,%ax
0x2fe9d:        xchg   %ax,%ax
0x2fea0:        push   %r12

Now we know where the end of killpg is.  We don't appear to have the
bogus symbol, so the signal trampoline sniffer kicks in and does its thing.

This patch took me four tries to get right, so I'd appreciate both review
and testing on other platforms.  I tried both i686-pc-linux-gnu and
x86_64-pc-linux-gnu.  Any comments?

-- 
Daniel Jacobowitz
CodeSourcery

2006-07-06  Daniel Jacobowitz  <dan@codesourcery.com>

	* blockframe.c (find_pc_partial_function): Use the minimal symbol
	size to control the cache entry, if available.
	* minsyms.c (lookup_minimal_symbol_by_pc_section): Handle minimal
	symbols with zero and non-zero sizes differently.

2006-07-06  Daniel Jacobowitz  <dan@codesourcery.com>

	* gdb.arch/i386-size.c, gdb.arch/i386-size.exp: New files.

Index: blockframe.c
===================================================================
RCS file: /cvs/src/src/gdb/blockframe.c,v
retrieving revision 1.109
diff -u -p -r1.109 blockframe.c
--- blockframe.c	17 Dec 2005 22:33:59 -0000	1.109
+++ blockframe.c	7 Jul 2006 03:40:43 -0000
@@ -280,27 +280,34 @@ find_pc_partial_function (CORE_ADDR pc, 
   cache_pc_function_name = DEPRECATED_SYMBOL_NAME (msymbol);
   cache_pc_function_section = section;
 
-  /* Use the lesser of the next minimal symbol in the same section, or
-     the end of the section, as the end of the function.  */
+  /* If the minimal symbol has a size, use it for the cache.
+     Otherwise use the lesser of the next minimal symbol in the same
+     section, or the end of the section, as the end of the
+     function.  */
 
-  /* Step over other symbols at this same address, and symbols in
-     other sections, to find the next symbol in this section with
-     a different address.  */
-
-  for (i = 1; DEPRECATED_SYMBOL_NAME (msymbol + i) != NULL; i++)
+  if (MSYMBOL_SIZE (msymbol) != 0)
+    cache_pc_function_high = cache_pc_function_low + MSYMBOL_SIZE (msymbol);
+  else
     {
-      if (SYMBOL_VALUE_ADDRESS (msymbol + i) != SYMBOL_VALUE_ADDRESS (msymbol)
-	  && SYMBOL_BFD_SECTION (msymbol + i) == SYMBOL_BFD_SECTION (msymbol))
-	break;
-    }
+      /* Step over other symbols at this same address, and symbols in
+	 other sections, to find the next symbol in this section with
+	 a different address.  */
 
-  if (DEPRECATED_SYMBOL_NAME (msymbol + i) != NULL
-      && SYMBOL_VALUE_ADDRESS (msymbol + i) < osect->endaddr)
-    cache_pc_function_high = SYMBOL_VALUE_ADDRESS (msymbol + i);
-  else
-    /* We got the start address from the last msymbol in the objfile.
-       So the end address is the end of the section.  */
-    cache_pc_function_high = osect->endaddr;
+      for (i = 1; DEPRECATED_SYMBOL_NAME (msymbol + i) != NULL; i++)
+	{
+	  if (SYMBOL_VALUE_ADDRESS (msymbol + i) != SYMBOL_VALUE_ADDRESS (msymbol)
+	      && SYMBOL_BFD_SECTION (msymbol + i) == SYMBOL_BFD_SECTION (msymbol))
+	    break;
+	}
+
+      if (DEPRECATED_SYMBOL_NAME (msymbol + i) != NULL
+	  && SYMBOL_VALUE_ADDRESS (msymbol + i) < osect->endaddr)
+	cache_pc_function_high = SYMBOL_VALUE_ADDRESS (msymbol + i);
+      else
+	/* We got the start address from the last msymbol in the objfile.
+	   So the end address is the end of the section.  */
+	cache_pc_function_high = osect->endaddr;
+    }
 
  return_cached_value:
 
Index: minsyms.c
===================================================================
RCS file: /cvs/src/src/gdb/minsyms.c,v
retrieving revision 1.45
diff -u -p -r1.45 minsyms.c
--- minsyms.c	17 Dec 2005 22:34:01 -0000	1.45
+++ minsyms.c	7 Jul 2006 03:40:44 -0000
@@ -412,6 +412,8 @@ lookup_minimal_symbol_by_pc_section (COR
 
       if (objfile->minimal_symbol_count > 0)
 	{
+	  int best_zero_sized = -1;
+
           msymbol = objfile->msymbols;
 	  lo = 0;
 	  hi = objfile->minimal_symbol_count - 1;
@@ -461,34 +463,90 @@ lookup_minimal_symbol_by_pc_section (COR
 			 == SYMBOL_VALUE_ADDRESS (&msymbol[hi + 1])))
 		hi++;
 
+	      /* Skip various undesirable symbols.  */
+	      while (hi >= 0)
+		{
+		  /* Skip any absolute symbols.  This is apparently
+		     what adb and dbx do, and is needed for the CM-5.
+		     There are two known possible problems: (1) on
+		     ELF, apparently end, edata, etc. are absolute.
+		     Not sure ignoring them here is a big deal, but if
+		     we want to use them, the fix would go in
+		     elfread.c.  (2) I think shared library entry
+		     points on the NeXT are absolute.  If we want
+		     special handling for this it probably should be
+		     triggered by a special mst_abs_or_lib or some
+		     such.  */
+
+		  if (msymbol[hi].type == mst_abs)
+		    {
+		      hi--;
+		      continue;
+		    }
+
+		  /* If SECTION was specified, skip any symbol from
+		     wrong section.  */
+		  if (section
+		      /* Some types of debug info, such as COFF,
+			 don't fill the bfd_section member, so don't
+			 throw away symbols on those platforms.  */
+		      && SYMBOL_BFD_SECTION (&msymbol[hi]) != NULL
+		      && SYMBOL_BFD_SECTION (&msymbol[hi]) != section)
+		    {
+		      hi--;
+		      continue;
+		    }
+
+		  /* If the minimal symbol has a zero size, save it
+		     but keep scanning backwards looking for one with
+		     a non-zero size.  A zero size may mean that the
+		     symbol isn't an object or function (e.g. a
+		     label), or it may just mean that the size was not
+		     specified.  */
+		  if (MSYMBOL_SIZE (&msymbol[hi]) == 0
+		      && best_zero_sized == -1)
+		    {
+		      best_zero_sized = hi;
+		      hi--;
+		      continue;
+		    }
+
+		  /* Otherwise, this symbol must be as good as we're going
+		     to get.  */
+		  break;
+		}
+
+	      /* If HI has a zero size, and best_zero_sized is set,
+		 then we had two or more zero-sized symbols; prefer
+		 the first one we found (which may have a higher
+		 address).  Also, if we ran off the end, be sure
+		 to back up.  */
+	      if (best_zero_sized != -1
+		  && (hi < 0 || MSYMBOL_SIZE (&msymbol[hi]) == 0))
+		hi = best_zero_sized;
+
+	      /* If the minimal symbol has a non-zero size, and this
+		 PC appears to be outside the symbol's contents, then
+		 refuse to use this symbol.  If we found a zero-sized
+		 symbol with an address greater than this symbol's,
+		 use that instead.  We assume that if symbols have
+		 specified sizes, they do not overlap.  */
+
+	      if (MSYMBOL_SIZE (&msymbol[hi]) != 0
+		  && pc >= (SYMBOL_VALUE_ADDRESS (&msymbol[hi])
+			    + MSYMBOL_SIZE (&msymbol[hi])))
+		{
+		  if (best_zero_sized != -1)
+		    hi = best_zero_sized;
+		  else
+		    /* Go on to the next object file.  */
+		    continue;
+		}
+
 	      /* The minimal symbol indexed by hi now is the best one in this
 	         objfile's minimal symbol table.  See if it is the best one
 	         overall. */
 
-	      /* Skip any absolute symbols.  This is apparently what adb
-	         and dbx do, and is needed for the CM-5.  There are two
-	         known possible problems: (1) on ELF, apparently end, edata,
-	         etc. are absolute.  Not sure ignoring them here is a big
-	         deal, but if we want to use them, the fix would go in
-	         elfread.c.  (2) I think shared library entry points on the
-	         NeXT are absolute.  If we want special handling for this
-	         it probably should be triggered by a special
-	         mst_abs_or_lib or some such.  */
-	      while (hi >= 0
-		     && msymbol[hi].type == mst_abs)
-		--hi;
-
-	      /* If "section" specified, skip any symbol from wrong section */
-	      /* This is the new code that distinguishes it from the old function */
-	      if (section)
-		while (hi >= 0
-		       /* Some types of debug info, such as COFF,
-			  don't fill the bfd_section member, so don't
-			  throw away symbols on those platforms.  */
-		       && SYMBOL_BFD_SECTION (&msymbol[hi]) != NULL
-		       && SYMBOL_BFD_SECTION (&msymbol[hi]) != section)
-		  --hi;
-
 	      if (hi >= 0
 		  && ((best_symbol == NULL) ||
 		      (SYMBOL_VALUE_ADDRESS (best_symbol) <
Index: testsuite/gdb.arch/i386-size.c
===================================================================
RCS file: testsuite/gdb.arch/i386-size.c
diff -N testsuite/gdb.arch/i386-size.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ testsuite/gdb.arch/i386-size.c	7 Jul 2006 03:40:44 -0000
@@ -0,0 +1,50 @@
+/* Symbol size test program.
+
+   Copyright 2006 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#ifdef SYMBOL_PREFIX
+#define SYMBOL(str)	SYMBOL_PREFIX #str
+#else
+#define SYMBOL(str)	#str
+#endif
+
+void
+trap (void)
+{
+  asm ("int $0x03");
+}
+
+/* Jump from a function with its symbol size set, to a function
+   named by a local label.  If GDB does not look at the sizes of
+   symbols, we will still appear to be in the first function.  */
+
+asm(".text\n"
+    "    .align 8\n"
+    "    .globl " SYMBOL (main) "\n"
+    SYMBOL (main) ":\n"
+    "    pushl %ebp\n"
+    "    mov   %esp, %ebp\n"
+    "    call  .Lfunc\n"
+    "    ret\n"
+    "    .size " SYMBOL (main) ", .-" SYMBOL (main) "\n"
+    ".Lfunc:\n"
+    "    pushl %ebp\n"
+    "    mov   %esp, %ebp\n"
+    "    call  " SYMBOL (trap) "\n");
Index: testsuite/gdb.arch/i386-size.exp
===================================================================
RCS file: testsuite/gdb.arch/i386-size.exp
diff -N testsuite/gdb.arch/i386-size.exp
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ testsuite/gdb.arch/i386-size.exp	7 Jul 2006 03:40:44 -0000
@@ -0,0 +1,89 @@
+# Copyright 2006 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  
+
+# Please email any bugs, comments, and/or additions to this file to:
+# bug-gdb@gnu.org
+
+# This file is part of the gdb testsuite.
+
+if $tracelevel {
+    strace $tracelevel
+}
+
+# Test that GDB can see the sizes of symbols.
+
+if ![istarget "i?86-*-*"] then {
+    verbose "Skipping i386 unwinder tests."
+    return
+}
+
+set testfile "i386-size"
+set srcfile ${testfile}.c
+set binfile ${objdir}/${subdir}/${testfile}
+
+# some targets have leading underscores on assembly symbols.
+# TODO: detect this automatically
+set additional_flags ""
+if [istarget "i?86-*-cygwin*"] then {
+  set additional_flags "additional_flags=-DSYMBOL_PREFIX=\"_\""
+}
+
+if { [gdb_compile "${srcdir}/${subdir}/${srcfile}" "${binfile}" \
+	  executable [list debug $additional_flags]] != "" } {
+    untested "i386-size"
+    return -1
+}
+
+gdb_exit
+gdb_start
+gdb_reinitialize_dir $srcdir/$subdir
+gdb_load ${binfile}
+
+# We use gdb_run_cmd so this stands a chance to work for remote
+# targets too.
+gdb_run_cmd
+
+gdb_expect {
+    -re "Program received signal SIGTRAP.*$gdb_prompt $" {
+	pass "run past main"
+    }
+    -re ".*$gdb_prompt $" {
+	fail "run past main"
+    }
+    timeout {
+	fail "run past main (timeout)"
+    }
+}
+
+set message "backtrace shows no function"
+gdb_test_multiple "backtrace 10" $message {
+    -re "#1\[ \t]*$hex in main.*$gdb_prompt $" {
+	fail $message
+    }
+    -re "#1\[ \t]*$hex in \\?\\? \\(\\).*$gdb_prompt $" {
+	pass $message
+    }
+}
+
+set message "disassemble stops at end of main"
+gdb_test_multiple "disassemble main" $message {
+    -re "call.*<trap>.*$gdb_prompt $" {
+	fail $message
+    }
+    -re "<main\\+8>:\[ \t\]+ret\[ \t\r\n\]+End of.*$gdb_prompt $" {
+	pass $message
+    }
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]