This is the mail archive of the
gdb-patches@sourceware.org
mailing list for the GDB project.
RE: [RFC 2/6] Avoid missing char before incomplete sequence in wchar_iterate.
- From: "Pierre Muller" <pierre dot muller at ics-cnrs dot unistra dot fr>
- To: "'Keith Seitz'" <keiths at redhat dot com>
- Cc: "'gdb-patches'" <gdb-patches at sourceware dot org>
- Date: Tue, 1 Oct 2013 14:47:49 +0200
- Subject: RE: [RFC 2/6] Avoid missing char before incomplete sequence in wchar_iterate.
- Authentication-results: sourceware.org; auth=none
- References: <002901cebaf2$35ec65a0$a1c530e0$ at muller@ics-cnrs.unistra.fr> <002f01cebaf2$aac80410$00580c30$ at muller@ics-cnrs.unistra.fr> <524A2316 dot 8020607 at redhat dot com>
> -----Message d'origine-----
> De : gdb-patches-owner@sourceware.org [mailto:gdb-patches-
> owner@sourceware.org] De la part de Keith Seitz
> Envoyé : mardi 1 octobre 2013 03:19
> À : Pierre Muller
> Cc : 'gdb-patches'
> Objet : Re: [RFC 2/6] Avoid missing char before incomplete sequence in
> wchar_iterate.
>
> On 09/26/2013 12:57 PM, Pierre Muller wrote:
> > If charset is set to UTF-8
> > p "ABCD\340"
> > will output
> > "ABC" <incomplete sequence \340>
> >
> > Note the missing character 'D'.
> >
> > This patch solves the issue by checking also for EINVAL
> > if character have been converted.
>
> This looks okay to me, but it needs a test (and a maintainer to approve).
>
> Keith
Here is a test, maybe this is overkill...
On a linux machine with charset set to UTF-8,
I do get failures, but not all are related to the fix in the patch...
Some are due to the fact that <incomplete sequence is printed out..
Nevertheless, incomplete_LSE, incomplete_RSE and incomplete_SSE
do show one missing char.
FAIL: gdb.base/printcmds.exp: print incomplete_LLE
FAIL: gdb.base/printcmds.exp: print incomplete_RLE
FAIL: gdb.base/printcmds.exp: print incomplete_SLE
FAIL: gdb.base/printcmds.exp: print incomplete_LRE
FAIL: gdb.base/printcmds.exp: print incomplete_RRE
FAIL: gdb.base/printcmds.exp: print incomplete_SRE
FAIL: gdb.base/printcmds.exp: print incomplete_LSE
FAIL: gdb.base/printcmds.exp: print incomplete_RSE
FAIL: gdb.base/printcmds.exp: print incomplete_SSE
Pierre Muller
PS: One missing thing is the re-indentation of the exp
file for the additional foreach loop...
But using vim in visual mode and '>' command
added directly 8 spaces, instead of 4...
Doing all that by hand is really frustrating...
Should .exp files be treated differently from other tcl files?
gdb Changelog entry:
2013-10-01 Pierre Muller <muller@sourceware.org>
charset.c (wchar_iterate): Also handle converted characters
when EINVAL is returned by iconv call.
testsuite Changelog entry:
2013-10-01 Pierre Muller <muller@sourceware.org>
* gdb.base/printcmds.c: Add strings with incomplete sequence.
* gdb.base/printcmds.exp (test_repeat_bytes): Check output of
strings containing incomplete sequence.
---
gdb/charset.c | 7 ++-
gdb/testsuite/gdb.base/printcmds.c | 82
++++++++++++++++++++++++++++++++++
gdb/testsuite/gdb.base/printcmds.exp | 19 ++++++--
3 files changed, 101 insertions(+), 7 deletions(-)
diff --git a/gdb/charset.c b/gdb/charset.c
index 5835fd4..f0e258c 100644
--- a/gdb/charset.c
+++ b/gdb/charset.c
@@ -659,7 +659,7 @@ wchar_iterate (struct wchar_iterator *iter,
converted a character; if so, return it. */
if (out_avail < out_request * sizeof (gdb_wchar_t))
break;
-
+
/* Otherwise skip the first invalid character, and let
the caller know about it. */
*out_result = wchar_iterate_invalid;
@@ -687,7 +687,10 @@ wchar_iterate (struct wchar_iterator *iter,
case EINVAL:
/* Incomplete input sequence. Let the caller know, and
- arrange for future calls to see EOF. */
+ arrange for future calls to see EOF.
+ Here also we might have converted something. */
+ if (out_avail < out_request * sizeof (gdb_wchar_t))
+ break;
*out_result = wchar_iterate_incomplete;
*ptr = iter->input;
*len = iter->bytes;
diff --git a/gdb/testsuite/gdb.base/printcmds.c
b/gdb/testsuite/gdb.base/printcmds.c
index d80c13d..4a645d1 100644
--- a/gdb/testsuite/gdb.base/printcmds.c
+++ b/gdb/testsuite/gdb.base/printcmds.c
@@ -214,6 +214,88 @@ char invalid_RRR[] = "aaaaaaaaaaaaaaaaaaaa"
"\240\240\240\240\240\240\240\240\240\240"
"\240\240\240\240\240\240\240\240\240\240cccccccccccccccccccc";
+/* Same with incomplete \340 prefix. */
+char incomplete_ESE[] = "\340";
+char incomplete_SSE[] = "a\340";
+char incomplete_LSE[] = "abaabbaaabbb\340";
+char incomplete_RSE[] = "aaaaaaaaaaaaaaaaaaaa\340";
+char incomplete_ESS[] = "\340c";
+char incomplete_SSS[] = "a\340c";
+char incomplete_LSS[] = "abaabbaaabbb\340c";
+char incomplete_RSS[] = "aaaaaaaaaaaaaaaaaaaa\340c";
+char incomplete_ESL[] = "\340cdccddcccddd";
+char incomplete_SSL[] = "a\340cdccddcccddd";
+char incomplete_LSL[] = "abaabbaaabbb\340cdccddcccddd";
+char incomplete_RSL[] = "aaaaaaaaaaaaaaaaaaaa\340cdccddcccddd";
+char incomplete_ESR[] = "\340cccccccccccccccccccc";
+char incomplete_SSR[] = "a\340cccccccccccccccccccc";
+char incomplete_LSR[] = "abaabbaaabbb\340cccccccccccccccccccc";
+char incomplete_RSR[] = "aaaaaaaaaaaaaaaaaaaa\340cccccccccccccccccccc";
+char incomplete_ELE[] = "\340\340\340\340";
+char incomplete_SLE[] = "a\340\340\340\340";
+char incomplete_LLE[] = "abaabbaaabbb\340\340\340\340";
+char incomplete_RLE[] = "aaaaaaaaaaaaaaaaaaaa\340\340\340\340";
+char incomplete_ELS[] = "\340\340\340\340c";
+char incomplete_SLS[] = "a\340\340\340\340c";
+char incomplete_LLS[] = "abaabbaaabbb\340\340\340\340c";
+char incomplete_RLS[] = "aaaaaaaaaaaaaaaaaaaa\340\340\340\340c";
+char incomplete_ELL[] = "\340\340\340\340cdccddcccddd";
+char incomplete_SLL[] = "a\340\340\340\340cdccddcccddd";
+char incomplete_LLL[] = "abaabbaaabbb\340\340\340\340cdccddcccddd";
+char incomplete_RLL[] = "aaaaaaaaaaaaaaaaaaaa\340\340\340\340cdccddcccddd";
+char incomplete_ELR[] = "\340\340\340\340cccccccccccccccccccc";
+char incomplete_SLR[] = "a\340\340\340\340cccccccccccccccccccc";
+char incomplete_LLR[] = "abaabbaaabbb\340\340\340\340cccccccccccccccccccc";
+char incomplete_RLR[] =
"aaaaaaaaaaaaaaaaaaaa\340\340\340\340cccccccccccccccccccc";
+char incomplete_ERE[] = ""
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340";
+char incomplete_LRE[] = "abaabbaaabbb"
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340";
+char incomplete_RRE[] = "aaaaaaaaaaaaaaaaaaaa"
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340";
+char incomplete_ERS[] = ""
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340c";
+char incomplete_ERL[] = ""
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340cdccddcccddd";
+char incomplete_ERR[] = ""
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340cccccccccccccccccccc";
+char incomplete_SRE[] = "a"
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340";
+char incomplete_SRS[] = "a"
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340c";
+char incomplete_SRL[] = "a"
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340cdccddcccddd";
+char incomplete_SRR[] = "a"
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340cccccccccccccccccccc";
+char incomplete_LRS[] = "abaabbaaabbb"
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340c";
+char incomplete_LRL[] = "abaabbaaabbb"
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340cdccddcccddd";
+char incomplete_LRR[] = "abaabbaaabbb"
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340cccccccccccccccccccc";
+char incomplete_RRS[] = "aaaaaaaaaaaaaaaaaaaa"
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340c";
+char incomplete_RRL[] = "aaaaaaaaaaaaaaaaaaaa"
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340cdccddcccddd";
+char incomplete_RRR[] = "aaaaaaaaaaaaaaaaaaaa"
+ "\340\340\340\340\340\340\340\340\340\340"
+ "\340\340\340\340\340\340\340\340\340\340cccccccccccccccccccc";
+
/* -- */
int main ()
diff --git a/gdb/testsuite/gdb.base/printcmds.exp
b/gdb/testsuite/gdb.base/printcmds.exp
index 60e4a7f..36149bc 100644
--- a/gdb/testsuite/gdb.base/printcmds.exp
+++ b/gdb/testsuite/gdb.base/printcmds.exp
@@ -827,6 +827,10 @@ proc test_repeat_bytes {} {
set invalid(S) {\\240}
set invalid(L) {\\240\\240\\240\\240}
set invalid(R) {'\\240' <repeats 20 times>}
+ set incomplete(S) {\\340}
+ set incomplete(L) {\\340\\340\\340\\340}
+ set incomplete(R) {'\\340' <repeats 20 times>}
+
set fmt(SSS) "\"%s%s%s\""
set fmt(SSR) "\"%s%s\", %s"
@@ -843,12 +847,16 @@ proc test_repeat_bytes {} {
set fmt(SS) "\"%s%s\""
# Test the various permutations of invalid characters
- foreach i [array names invalid] {
+ foreach prefix { invalid incomplete } {
+ foreach i [array names $prefix] {
set I $i
if {$i == "L"} {
set i "S"
}
+ set prefname "${prefix}($I)"
+ set pref [subst "$$prefname"]
+ verbose "prefname=$prefname, pref=$pref"
foreach s [array names start] {
set S $s
@@ -870,19 +878,20 @@ proc test_repeat_bytes {} {
# Special cases...
if {$s == "E"} {
- set result [format $fmt($i$e) $invalid($I) $end($E)]
+ set result [format $fmt($i$e) $pref $end($E)]
} elseif {$e == "E"} {
- set result [format $fmt($s$i) $start($S) $invalid($I)]
+ set result [format $fmt($s$i) $start($S) $pref]
} else {
set result [format $fmt($s$i$e) \
- $start($S) $invalid($I) $end($E)]
+ $start($S) $pref $end($E)]
}
send_log "expecting: = $result\n"
- gdb_test "print invalid_$S$I$E" "= $result"
+ gdb_test "print ${prefix}_$S$I$E" "= $result"
}
}
}
+ }
}
# Start with a fresh gdb.
--
1.7.9