This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [patch 2/2] iFort compat.: case insensitive symbols (PR 11313)


On Mon, 08 Nov 2010 19:31:33 +0100, Joel Brobecker wrote:
> trying to see if we can evaluate the work that would be required to make
> lookups case-insensitive, rather than lowercasing the entity names.

Maybe some Ada code could be cleaned up on top of that?


On Fri, 12 Nov 2010 19:50:38 +0100, Tom Tromey wrote:
> >>>>> "Jan" == Jan Kratochvil <jan.kratochvil@redhat.com> writes:
> 
> Jan> I have prepared a second patch now to address Joel's suggestion to
> Jan> rather look up symbols case insensitively instead of lowercasing
> Jan> them on read-in.
> 
> I'll be interested to see that.


Here is a draft, not yet for review.

I guess .gdb_index's find_slot_in_mapped_hash should already use strcmp_iw.
It may currently break C++ psymtabs expansion on .gdb_index'ed files.

But I cannot test it as GDB already is broken just by the physname:
	[testcase patch] 7.2 regression on expand psymtabs
	http://sourceware.org/ml/gdb-patches/2010-11/msg00300.html


Unrelated talk:

Currently strcmp_iw does ugly skipping of whitespaces.  Thanks to the physname
canonicalization of all the symbols they should be unique enough so that no
longer should be needed.

Just one would have to canonicalize even the user input string (such as
`method(long, char *)').  As dwarf2_physname works from the internal parsed
representation the expression would need to be parsed first.  But for
tab-completion an incomplete string cannot be parsed into exp_element-s, can
it?

It is now even reproducible as a bug, GDB cannot distinguish those two:

#include <stdio.h>
class C
  {
  public:
    static void operator delete (void *p)
    { printf ("operator %p\n", operator delete); }
    static void operatordelete (void *p)
    { printf ("method %p\n", operatordelete); }
  };
int
main ()
{
  C *x = new C;
  x->operatordelete(NULL);
  delete x;
}

gdb -q -nx ./1 -ex r -ex 'p C::operator delete(void*)' -ex 'p C::operatordelete(void*)' -ex q
method 0x4005d9
operator 0x4005b7
Program exited normally.
$1 = {void (void *)} 0x4005b7 <C::operator delete(void*)>
$2 = {void (void *)} 0x4005b7 <C::operator delete(void*)>



Thanks,
Jan


--- a/gdb/dictionary.c
+++ b/gdb/dictionary.c
@@ -836,7 +836,7 @@ dict_hash (const char *string0)
 	    }
 	  /* FALL THROUGH */
 	default:
-	  hash = hash * 67 + *string - 113;
+	  hash = SYMBOL_HASH_NEXT (hash, *string);
 	  string += 1;
 	  break;
 	}
--- a/gdb/dwarf2read.c
+++ b/gdb/dwarf2read.c
@@ -71,6 +71,7 @@
 #define MAP_FAILED ((void *) -1)
 #endif
 #endif
+#include <ctype.h>
 
 typedef struct symbol *symbolp;
 DEF_VEC_P (symbolp);
@@ -148,6 +149,8 @@ DEF_VEC_I (offset_type);
    a comment by the code that writes the index.  */
 struct mapped_index
 {
+  /* Index data format version.  */
+  int version;
   /* The total length of the buffer.  */
   off_t total_size;
   /* A pointer to the address table data.  */
@@ -1853,21 +1856,26 @@ create_addrmap_from_index (struct objfile *objfile, struct mapped_index *index)
   do_cleanups (cleanup);
 }
 
-/* The hash function for strings in the mapped index.  This is the
-   same as the hashtab.c hash function, but we keep a separate copy to
-   maintain control over the implementation.  This is necessary
-   because the hash function is tied to the format of the mapped index
-   file.  */
+/* The hash function for strings in the mapped index.  This is the same as
+   SYMBOL_HASH_NEXT, but we keep a separate copy to maintain control over the
+   implementation.  This is necessary because the hash function is tied to the
+   format of the mapped index file.
+   
+   Use INT_MAX for INDEX_VERSION if you generate the current index format.  */
 
 static hashval_t
-mapped_index_string_hash (const void *p)
+mapped_index_string_hash (int index_version, const void *p)
 {
   const unsigned char *str = (const unsigned char *) p;
   hashval_t r = 0;
   unsigned char c;
 
   while ((c = *str++) != 0)
-    r = r * 67 + c - 113;
+    {
+      if (index_version >= 4)
+	c = tolower (c);
+      r = r * 67 + c - 113;
+    }
 
   return r;
 }
@@ -1880,7 +1888,7 @@ static int
 find_slot_in_mapped_hash (struct mapped_index *index, const char *name,
 			  offset_type **vec_out)
 {
-  offset_type hash = mapped_index_string_hash (name);
+  offset_type hash = mapped_index_string_hash (index->version, name);
   offset_type slot, step;
 
   slot = hash & (index->symbol_table_slots - 1);
@@ -1895,7 +1903,7 @@ find_slot_in_mapped_hash (struct mapped_index *index, const char *name,
 	return 0;
 
       str = index->constant_pool + MAYBE_SWAP (index->symbol_table[i]);
-      if (!strcmp (name, str))
+      if (!strcmp_iw (str, name))
 	{
 	  *vec_out = (offset_type *) (index->constant_pool
 				      + MAYBE_SWAP (index->symbol_table[i + 1]));
@@ -1941,8 +1949,13 @@ dwarf2_read_index (struct objfile *objfile)
      it seems better to just ignore such indices.  */
   if (version < 3)
     return 0;
+  /* Indexes with higher version than the one supported by GDB may be no
+     longer backward compatible.  */
+  if (version > 4)
+    return 0;
 
   map = OBSTACK_ZALLOC (&objfile->objfile_obstack, struct mapped_index);
+  map->version = version;
   map->total_size = dwarf2_per_objfile->gdb_index.size;
 
   metadata = (offset_type *) (addr + sizeof (offset_type));
@@ -14800,13 +14813,16 @@ struct strtab_entry
   const char *str;
 };
 
-/* Hash function for a strtab_entry.  */
+/* Hash function for a strtab_entry.
+
+   Function is used only during write_hash_table so no index format backward
+   compatibility is needed.  */
 
 static hashval_t
 hash_strtab_entry (const void *e)
 {
   const struct strtab_entry *entry = e;
-  return mapped_index_string_hash (entry->str);
+  return mapped_index_string_hash (INT_MAX, entry->str);
 }
 
 /* Equality function for a strtab_entry.  */
@@ -14944,12 +14960,15 @@ cleanup_mapped_symtab (void *p)
 }
 
 /* Find a slot in SYMTAB for the symbol NAME.  Returns a pointer to
-   the slot.  */
+   the slot.
+   
+   Function is used only during write_hash_table so no index format backward
+   compatibility is needed.  */
 
 static struct symtab_index_entry **
 find_slot (struct mapped_symtab *symtab, const char *name)
 {
-  offset_type index, step, hash = mapped_index_string_hash (name);
+  offset_type index, step, hash = mapped_index_string_hash (INT_MAX, name);
 
   index = hash & (symtab->size - 1);
   step = ((hash * 17) & (symtab->size - 1)) | 1;
@@ -15349,7 +15368,7 @@ write_psymtabs_to_index (struct objfile *objfile, const char *dir)
   total_len = size_of_contents;
 
   /* The version number.  */
-  val = MAYBE_SWAP (3);
+  val = MAYBE_SWAP (4);
   obstack_grow (&contents, &val, sizeof (val));
 
   /* The offset of the CU list from the start of the file.  */
@@ -15407,8 +15426,8 @@ write_psymtabs_to_index (struct objfile *objfile, const char *dir)
    1. The file header.  This is a sequence of values, of offset_type
    unless otherwise noted:
 
-   [0] The version number, currently 3.  Versions 1 and 2 are
-   obsolete.
+   [0] The version number, currently 4.  Versions 1 and 2 are
+   obsolete.  Version 3 differs by its hashing function.
    [1] The offset, from the start of the file, of the CU list.
    [2] The offset, from the start of the file, of the types CU list.
    Note that this section can be empty, in which case this offset will
@@ -15440,7 +15459,8 @@ write_psymtabs_to_index (struct objfile *objfile, const char *dir)
 
    5. The symbol table.  This is a hash table.  The size of the hash
    table is always a power of 2.  The initial hash and the step are
-   currently defined by the `find_slot' function.
+   currently defined by the `find_slot' function.  Index version 3 uses
+   a different hash function than index version 4 and later.
 
    Each slot in the hash table consists of a pair of offset_type
    values.  The first value is the offset of the symbol's name in the
--- a/gdb/minsyms.c
+++ b/gdb/minsyms.c
@@ -89,7 +89,7 @@ msymbol_hash_iw (const char *string)
 	++string;
       if (*string && *string != '(')
 	{
-	  hash = hash * 67 + *string - 113;
+	  hash = SYMBOL_HASH_NEXT (hash, *string);
 	  ++string;
 	}
     }
@@ -104,7 +104,7 @@ msymbol_hash (const char *string)
   unsigned int hash = 0;
 
   for (; *string; ++string)
-    hash = hash * 67 + *string - 113;
+    hash = SYMBOL_HASH_NEXT (hash, *string);
   return hash;
 }
 
@@ -244,6 +244,9 @@ lookup_minimal_symbol (const char *name, const char *sfile,
 		    {
 		      match = strcmp (SYMBOL_LINKAGE_NAME (msymbol),
 				      modified_name) == 0;
+		      if (!match && case_sensitivity == case_sensitive_off)
+			match = strcasecmp (SYMBOL_LINKAGE_NAME (msymbol),
+					    modified_name) == 0;
 		    }
 		  else
 		    {
--- a/gdb/psymtab.c
+++ b/gdb/psymtab.c
@@ -577,8 +577,15 @@ lookup_partial_symbol (struct partial_symtab *pst, const char *name,
       if (!(top == bottom))
 	internal_error (__FILE__, __LINE__, _("failed internal consistency check"));
 
-      while (top <= real_top
-	     && SYMBOL_MATCHES_SEARCH_NAME (*top, name))
+      /* For `case_sensitivity == case_sensitive_off' strcmp_iw_ordered will
+	 search more exactly than what matches SYMBOL_MATCHES_SEARCH_NAME.  */
+      while (top >= start && SYMBOL_MATCHES_SEARCH_NAME (*top, name))
+	top--;
+
+      /* Fixup to have a symbol which matches SYMBOL_MATCHES_SEARCH_NAME.  */
+      top++;
+
+      while (top <= real_top && SYMBOL_MATCHES_SEARCH_NAME (*top, name))
 	{
 	  if (symbol_matches_domain (SYMBOL_LANGUAGE (*top),
 				     SYMBOL_DOMAIN (*top), domain))
--- a/gdb/symtab.c
+++ b/gdb/symtab.c
@@ -1061,19 +1061,6 @@ lookup_symbol_in_language (const char *name, const struct block *block,
 	}
     }
 
-  if (case_sensitivity == case_sensitive_off)
-    {
-      char *copy;
-      int len, i;
-
-      len = strlen (name);
-      copy = (char *) alloca (len + 1);
-      for (i= 0; i < len; i++)
-        copy[i] = tolower (name[i]);
-      copy[len] = 0;
-      modified_name = copy;
-    }
-
   returnval = lookup_symbol_aux (modified_name, block, domain, lang,
 				 is_a_field_of_this);
   do_cleanups (cleanup);
--- a/gdb/symtab.h
+++ b/gdb/symtab.h
@@ -1012,6 +1012,13 @@ extern unsigned int msymbol_hash_iw (const char *);
 
 extern unsigned int msymbol_hash (const char *);
 
+/* This is only an internally computed value with no external files
+   compatibility requirements.  Compute next hash value from string
+   character C.  */
+
+#define SYMBOL_HASH_NEXT(hash, c) \
+  ((hash) * 67 + tolower ((unsigned char) (c)) - 113)
+
 extern struct objfile * msymbol_objfile (struct minimal_symbol *sym);
 
 extern void
--- /dev/null
+++ b/gdb/testsuite/gdb.base/fortran-sym-case.c
@@ -0,0 +1,22 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2010 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+int
+main (int argc, char **aRGv)
+{
+  return 0;
+}
--- /dev/null
+++ b/gdb/testsuite/gdb.base/fortran-sym-case.exp
@@ -0,0 +1,27 @@
+# Copyright (C) 2010 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+set testfile fortran-sym-case
+if { [prepare_for_testing ${testfile}.exp ${testfile}] } {
+    return -1
+}
+
+if ![runto_main] {
+    return -1
+}
+
+gdb_test "set language fortran" {Warning: the current language does not match this frame\.}
+
+gdb_test "frame" "aRGv=0x.*"
--- /dev/null
+++ b/gdb/testsuite/gdb.dwarf2/dw2-case-insensitive-debug.S
@@ -0,0 +1,78 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2010 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+	.section .debug_info
+.Lcu1_begin:
+	/* CU header */
+	.4byte	.Lcu1_end - .Lcu1_start		/* Length of Compilation Unit */
+.Lcu1_start:
+	.2byte	2				/* DWARF Version */
+	.4byte	.Labbrev1_begin			/* Offset into abbrev section */
+	.byte	4				/* Pointer size */
+
+	/* CU die */
+	.uleb128 1				/* Abbrev: DW_TAG_compile_unit */
+	.ascii	"file1.txt\0"			/* DW_AT_name */
+	.ascii	"GNU C 3.3.3\0"			/* DW_AT_producer */
+	.byte	8				/* DW_AT_language (DW_LANG_Fortran90) */
+	.4byte		FUNC_lang		/* DW_AT_low_pc */
+	.4byte		main			/* DW_AT_high_pc */
+
+	.uleb128	3			/* Abbrev: DW_TAG_subprogram */
+	.byte		1			/* DW_AT_external */
+	.ascii		"FUNC_lang\0"		/* DW_AT_name */
+	.4byte		FUNC_lang		/* DW_AT_low_pc */
+	.4byte		main			/* DW_AT_high_pc */
+
+	.byte		0			/* End of children of CU */
+.Lcu1_end:
+
+/* Abbrev table */
+	.section .debug_abbrev
+.Labbrev1_begin:
+	.uleb128	1			/* Abbrev code */
+	.uleb128	0x11			/* DW_TAG_compile_unit */
+	.byte		1			/* has_children */
+	.uleb128	0x3			/* DW_AT_name */
+	.uleb128	0x8			/* DW_FORM_string */
+	.uleb128	0x25			/* DW_AT_producer */
+	.uleb128	0x8			/* DW_FORM_string */
+	.uleb128	0x13			/* DW_AT_language */
+	.uleb128	0xb			/* DW_FORM_data1 */
+	.uleb128	0x11			/* DW_AT_low_pc */
+	.uleb128	0x1			/* DW_FORM_addr */
+	.uleb128	0x12			/* DW_AT_high_pc */
+	.uleb128	0x1			/* DW_FORM_addr */
+	.byte		0x0			/* Terminator */
+	.byte		0x0			/* Terminator */
+
+	.uleb128	3			/* Abbrev code */
+	.uleb128	0x2e			/* DW_TAG_subprogram */
+	.byte		0			/* has_children */
+	.uleb128	0x3f			/* DW_AT_external */
+	.uleb128	0xc			/* DW_FORM_flag */
+	.uleb128	0x3			/* DW_AT_name */
+	.uleb128	0x8			/* DW_FORM_string */
+	.uleb128	0x11			/* DW_AT_low_pc */
+	.uleb128	0x1			/* DW_FORM_addr */
+	.uleb128	0x12			/* DW_AT_high_pc */
+	.uleb128	0x1			/* DW_FORM_addr */
+	.byte		0x0			/* Terminator */
+	.byte		0x0			/* Terminator */
+
+	.byte		0x0			/* Terminator */
+	.byte		0x0			/* Terminator */
--- /dev/null
+++ b/gdb/testsuite/gdb.dwarf2/dw2-case-insensitive.c
@@ -0,0 +1,38 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2010 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* Use DW_LANG_Fortran90 for case insensitive DWARF.  */
+
+void
+FUNC_lang (void)
+{
+}
+
+/* Symbol is present only in ELF .symtab.  */
+
+void
+FUNC_symtab (void)
+{
+}
+
+int
+main()
+{
+  FUNC_lang ();
+  FUNC_symtab ();
+  return 0;
+}
--- /dev/null
+++ b/gdb/testsuite/gdb.dwarf2/dw2-case-insensitive.exp
@@ -0,0 +1,41 @@
+# Copyright 2010 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# This test can only be run on targets which support DWARF-2 and use gas.
+# For now pick a sampling of likely targets.
+if {![istarget *-*-linux*]
+    && ![istarget *-*-gnu*]
+    && ![istarget *-*-elf*]
+    && ![istarget *-*-openbsd*]
+    && ![istarget arm-*-eabi*]
+    && ![istarget powerpc-*-eabi*]} {
+    return 0
+}
+
+set testfile "dw2-case-insensitive"
+
+if { [prepare_for_testing ${testfile}.exp ${testfile} [list ${testfile}.c ${testfile}-debug.S] {nodebug}] } {
+    return -1
+}
+
+gdb_test_no_output "set language fortran"
+
+if {[gdb_breakpoint "fuNC_lang"] == 1} {
+    pass "setting breakpoint at fuNC_lang"
+}
+
+if {[gdb_breakpoint "fuNC_symtab"] == 1} {
+    pass "setting breakpoint at fuNC_symtab"
+}
--- a/gdb/utils.c
+++ b/gdb/utils.c
@@ -2927,10 +2927,12 @@ strcmp_iw (const char *string1, const char *string2)
 	{
 	  string2++;
 	}
-      if (*string1 != *string2)
-	{
-	  break;
-	}
+      if (case_sensitivity == case_sensitive_on && *string1 != *string2)
+	break;
+      if (case_sensitivity == case_sensitive_off
+	  && (tolower ((unsigned char) *string1)
+	      != tolower ((unsigned char) *string2)))
+	break;
       if (*string1 != '\0')
 	{
 	  string1++;
@@ -2986,10 +2988,12 @@ strcmp_iw_ordered (const char *string1, const char *string2)
 	{
 	  string2++;
 	}
-      if (*string1 != *string2)
-	{
-	  break;
-	}
+      if (case_sensitivity == case_sensitive_on && *string1 != *string2)
+	break;
+      if (case_sensitivity == case_sensitive_off
+	  && (tolower ((unsigned char) *string1)
+	      != tolower ((unsigned char) *string2)))
+	break;
       if (*string1 != '\0')
 	{
 	  string1++;
@@ -3015,8 +3019,11 @@ strcmp_iw_ordered (const char *string1, const char *string2)
     default:
       if (*string2 == '(')
 	return 1;
-      else
+      else if (case_sensitivity == case_sensitive_on)
 	return *string1 - *string2;
+      else
+	return (tolower ((unsigned char) *string1)
+		- tolower ((unsigned char) *string2));
     }
 }
 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]