This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 1/2] Fix PR c++/21323: GDB thinks char16_t and char32_t are signed in C++


While the C++ standard says that char16_t and char32_t are unsigned types:

 Types char16_t and char32_t denote distinct types with the same size,
 signedness, and alignment as uint_least16_t and uint_least32_t,
 respectively, in <cstdint>, called the underlying types.

... gdb treats them as signed currently:

 (gdb) p (char16_t)-1
 $1 = -1 u'\xffff'

There are actually two places in gdb that hardcode these types:

- gdbtypes.c:gdbtypes_post_init, when creating the built-in types,
  seemingly used by the "x /s" command (judging from commit 9a22f0d0).

- dwarf2read.c, when reading base types with DW_ATE_UTF encoding
  (which is what is used for these types, when compiling for C++11 and
  up).  Despite the comment, the type created does end up used.

Both places need fixing.  But since I couldn't tell why dwarf2read.c
needs to create a new type, I've made it use the per-arch built-in
types instead, so that the types are only created once per arch
instead of once per objfile.  That seems to work fine.

While writting the test, I noticed that the C++ language parser isn't
actually aware of these built-in types, so if you try to use them
without a program that uses them, you get:

 (gdb) set language c++
 (gdb) ptype char16_t
 No symbol table is loaded.  Use the "file" command.
 (gdb) ptype u"hello"
 No type named char16_t.
 (gdb) p u"hello"
 No type named char16_t.

That's fixed by simply adding a couple entries to C++'s built-in types
array in c-lang.c.  With that, we get the expected:

 (gdb) ptype char16_t
 type = char16_t
 (gdb) ptype u"hello"
 type = char16_t [6]
 (gdb) p u"hello"
 $1 = u"hello"

gdb/ChangeLog:
yyyy-mm-dd  Pedro Alves  <palves@redhat.com>

	PR c++/21323
	* c-lang.c (cplus_primitive_types) <cplus_primitive_type_char16_t,
	cplus_primitive_type_char32_t>: New enum values.
	(cplus_language_arch_info): Register cplus_primitive_type_char16_t
	and cplus_primitive_type_char32_t.
	* dwarf2read.c (read_base_type) <DW_ATE_UTF>: If bit size is 16 or
	32, use the archtecture's built-in type for char16_t and char32_t,
	respectively.  Otherwise, fallback to init_integer_type as before,
	but make the type unsigned, and issue a complaint.
	* gdbtypes.c (gdbtypes_post_init): Make char16_t and char32_t unsigned.

gdb/testsuite/ChangeLog:
yyyy-mm-dd  Pedro Alves  <palves@redhat.com>

	PR c++/21323
	* gdb.cp/wide_char_types.c: New file.
	* gdb.cp/wide_char_types.exp: New file.
---
 gdb/c-lang.c                             |   6 ++
 gdb/dwarf2read.c                         |  19 +++-
 gdb/gdbtypes.c                           |   5 +-
 gdb/testsuite/gdb.cp/wide_char_types.c   |  28 ++++++
 gdb/testsuite/gdb.cp/wide_char_types.exp | 143 +++++++++++++++++++++++++++++++
 5 files changed, 195 insertions(+), 6 deletions(-)
 create mode 100644 gdb/testsuite/gdb.cp/wide_char_types.c
 create mode 100644 gdb/testsuite/gdb.cp/wide_char_types.exp

diff --git a/gdb/c-lang.c b/gdb/c-lang.c
index a100199..616aa26 100644
--- a/gdb/c-lang.c
+++ b/gdb/c-lang.c
@@ -895,6 +895,8 @@ enum cplus_primitive_types {
   cplus_primitive_type_decfloat,
   cplus_primitive_type_decdouble,
   cplus_primitive_type_declong,
+  cplus_primitive_type_char16_t,
+  cplus_primitive_type_char32_t,
   nr_cplus_primitive_types
 };
 
@@ -950,6 +952,10 @@ cplus_language_arch_info (struct gdbarch *gdbarch,
     = builtin->builtin_decdouble;
   lai->primitive_type_vector [cplus_primitive_type_declong]
     = builtin->builtin_declong;
+  lai->primitive_type_vector [cplus_primitive_type_char16_t]
+    = builtin->builtin_char16;
+  lai->primitive_type_vector [cplus_primitive_type_char32_t]
+    = builtin->builtin_char32;
 
   lai->bool_type_symbol = "bool";
   lai->bool_type_default = builtin->builtin_bool;
diff --git a/gdb/dwarf2read.c b/gdb/dwarf2read.c
index f1a10c4..1a06f7b 100644
--- a/gdb/dwarf2read.c
+++ b/gdb/dwarf2read.c
@@ -15089,9 +15089,22 @@ read_base_type (struct die_info *die, struct dwarf2_cu *cu)
 	  type = init_integer_type (objfile, bits, 1, name);
 	break;
       case DW_ATE_UTF:
-	/* We just treat this as an integer and then recognize the
-	   type by name elsewhere.  */
-	type = init_integer_type (objfile, bits, 0, name);
+	{
+	  gdbarch *arch = get_objfile_arch (objfile);
+
+	  if (bits == 16)
+	    type = builtin_type (arch)->builtin_char16;
+	  else if (bits == 32)
+	    type = builtin_type (arch)->builtin_char32;
+	  else
+	    {
+	      complaint (&symfile_complaints,
+			 _("unsupported DW_ATE_UTF bit size: '%d'"),
+			 bits);
+	      type = init_integer_type (objfile, bits, 1, name);
+	    }
+	  return set_die_type (die, type, cu);
+	}
 	break;
 
       default:
diff --git a/gdb/gdbtypes.c b/gdb/gdbtypes.c
index 6f3aeab..c1f76fb 100644
--- a/gdb/gdbtypes.c
+++ b/gdb/gdbtypes.c
@@ -5204,10 +5204,9 @@ gdbtypes_post_init (struct gdbarch *gdbarch)
 
   /* Wide character types.  */
   builtin_type->builtin_char16
-    = arch_integer_type (gdbarch, 16, 0, "char16_t");
+    = arch_integer_type (gdbarch, 16, 1, "char16_t");
   builtin_type->builtin_char32
-    = arch_integer_type (gdbarch, 32, 0, "char32_t");
-	
+    = arch_integer_type (gdbarch, 32, 1, "char32_t");
 
   /* Default data/code pointer types.  */
   builtin_type->builtin_data_ptr
diff --git a/gdb/testsuite/gdb.cp/wide_char_types.c b/gdb/testsuite/gdb.cp/wide_char_types.c
new file mode 100644
index 0000000..8337cd4
--- /dev/null
+++ b/gdb/testsuite/gdb.cp/wide_char_types.c
@@ -0,0 +1,28 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2017 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <uchar.h>
+
+char16_t u16 = -1;
+char32_t u32 = -1;
+
+int
+main ()
+{
+  return 0;
+}
diff --git a/gdb/testsuite/gdb.cp/wide_char_types.exp b/gdb/testsuite/gdb.cp/wide_char_types.exp
new file mode 100644
index 0000000..dccb623
--- /dev/null
+++ b/gdb/testsuite/gdb.cp/wide_char_types.exp
@@ -0,0 +1,143 @@
+# This testcase is part of GDB, the GNU debugger.
+
+# Copyright 2017 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Test GDB's awareness of the char16_t, char32_t (C++11+) built-in
+# types.  We also run most tests here in C mode, and check whether the
+# built-ins are disabled (gdb uses the typedefs in the debug info
+# instead.)
+
+standard_testfile
+
+# Test char16_t/char32_t in language LANG, against symbols in
+# a program.  Lang can be "c", "c++03" or "c++11".  In C++11,
+# char16_t/char32_t are built-in types, and the debug information
+# reflects that (see
+# http://wiki.dwarfstd.org/index.php?title=C%2B%2B0x:_New_string_literals).
+
+proc wide_char_types_program {lang} {
+    global srcfile testfile
+
+    set options {debug}
+    if {$lang == "c++03"} {
+	lappend options c++ additional_flags=-std=c++03
+	set out $testfile-cxx03
+    } elseif {$lang == "c++11"} {
+	lappend options c++ additional_flags=-std=c++11
+	set out $testfile-cxx11
+    } else {
+	set out $testfile-c
+    }
+
+    if { [prepare_for_testing "failed to prepare" \
+	      ${out} [list $srcfile] $options] } {
+	return -1
+    }
+
+    if ![runto_main] then {
+	fail "can't run to main"
+	return 0
+    }
+    do_test_wide_char $lang "u16" "u32"
+}
+
+# Test char16_t/char32_t in language LANG.  Use CHAR16_EXP and
+# CHAR32_EXP as expression for each of the corresponding types.
+# (E.g., CHAR16_EXP will be u16 when testing against the program, and
+# "(char16_t)-1" when testing the built-in types without a program
+# loaded.)
+
+proc do_test_wide_char {lang char16_exp char32_exp} {
+    global gdb_prompt
+
+    # Check that the fixed-width wide types are distinct built-in
+    # types in C++11+.  In other modes, they're instead typedefs,
+    # found in the debug info.
+    if {$lang == "c++11"} {
+	gdb_test "ptype $char16_exp" "type = char16_t" \
+	    "char16_t is distinct"
+	gdb_test "ptype $char32_exp" "type = char32_t" \
+	    "char32_t is distinct"
+    } else {
+	gdb_test "ptype $char16_exp" "type = unsigned short" \
+	    "char16_t is typedef"
+	gdb_test "ptype $char32_exp" "type = unsigned int" \
+	    "char32_t is typedef"
+    }
+
+    # Check that the fixed-width wide char types are unsigned.
+    gdb_test "p $char16_exp" " = 65535 u'\\\\xffff'" \
+	"char16_t is unsigned"
+    gdb_test "p $char32_exp" " = 4294967295 U'\\\\xffffffff'" \
+	"char32_t is unsigned"
+
+    # Check sizeof.  These are fixed-width.
+    gdb_test "p sizeof($char16_exp)" "= 2" \
+	"sizeof($char16_exp) == 2"
+    gdb_test "p sizeof($char32_exp)" "= 4" \
+	"sizeof(char16_t) == 4"
+
+    # Test printing wide literal strings.  Note that when testing with
+    # no program started, this relies on GDB's awareness of the
+    # built-in wide char types.
+    gdb_test {p U"hello"} {= U"hello"}
+    gdb_test {p u"hello"} {= u"hello"}
+}
+
+# Make sure that the char16_t/char32_t types are recognized as
+# distinct built-in types in C++ mode, even with no program loaded.
+# Check that in C mode, the types are not recognized.
+
+proc wide_char_types_no_program {} {
+    global srcfile testfile
+
+    gdb_exit
+    gdb_start
+
+    # These types are not built-in in C.
+    with_test_prefix "c" {
+	gdb_test "set language c"
+
+	gdb_test "p (char16_t) -1" "No symbol table is loaded.*" \
+	    "char16_t is not built-in"
+	gdb_test "p (char32_t) -1" "No symbol table is loaded.*" \
+	    "char32_t is not built-in"
+
+	gdb_test {p U"hello"} "No type named char32_t\\\."
+	gdb_test {p u"hello"} "No type named char16_t\\\."
+    }
+
+    # Note GDB does not distinguish C++ dialects, so the fixed-width
+    # types are always available in C++ mode, even if they were not
+    # built-in types before C++11.
+    with_test_prefix "c++" {
+	gdb_test "set language c++"
+
+	do_test_wide_char "c++11" "(char16_t) -1" "(char32_t) -1"
+    }
+}
+
+# Check wide char types with no program loaded.
+with_test_prefix "no program" {
+    wide_char_types_no_program
+}
+
+# Check types when a program is loaded.
+with_test_prefix "with program" {
+    foreach_with_prefix lang {"c" "c++03" "c++11"} {
+	wide_char_types_program $lang
+    }
+}
-- 
2.5.5


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]