This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
bugs in CP1258 converter (2)
- To: libc-alpha at sources dot redhat dot com
- Subject: bugs in CP1258 converter (2)
- From: Bruno Haible <haible at ilog dot fr>
- Date: Tue, 15 May 2001 15:25:19 +0200 (CEST)
What's currently wrong for stateful converters which are to be used in the
first step of a conversion sequence (FROM_DIRECTION):
- Such a converter cannot distinguish a "flush" operation (emit shift to
initial state) from a "reset" operation, because __gconv() sets the
__outbuf of the last step only. The previous steps are not informed.
To fix this, I pass do_flush = 2 to indicate the "reset" operation.
- When such a converter is written in the usual way, i.e. it will increment
data->__outbuf to indicate that it has produced output, then - because
this step's __outbuf is a private buffer not known to the user - after a
few thousand calls, when __outbuf has become equal to __outbufend, the
iconv descriptor will return E2BIG for any request and thus become
unusable.
To fix this, I introduce local variables 'outbuf', 'outend', which the
EMIT_SHIFT_TO_INIT macro must use instead of incrementing data->__outbuf.
- When EMIT_SHIFT_TO_INIT produces output, it must be passed down to
the following steps.
To fix this, I add the appropriate DL_CALL_FCT (fct, ...) calls, and
protect the operation through SAVE_RESET_STATE (1) and SAVE_RESET_STATE (0).
- The EMIT_SHIFT_TO_INIT cannot perform a "reset": when called with
outbuf = NULL and outend = NULL, it will usually think there is no room
for output, and return E2BIG - without clearing the state!
To fix this, I don't call EMIT_SHIFT_TO_INIT in "reset" mode.
2001-05-14 Bruno Haible <haible@clisp.cons.org>
* iconv/gconv.c (__gconv): For flush without output, pass do_flush = 2.
* iconv/skeleton.c: Distinguish do_flush = 1 and do_flush = 2. In the
first case, set outbuf, outstart, outend, and call PREPARE_LOOP before
EMIT_SHIFT_TO_INIT; then pass the output produced by this step down to
the next step. In the second case, clear the state without calling
EMIT_SHIFT_TO_INIT.
* iconvdata/ibm930.c (EMIT_SHIFT_TO_INIT): Use outbuf instead of
data->__outbuf, and outend instead of data->__outbufend.
* iconvdata/ibm933.c (EMIT_SHIFT_TO_INIT): Likewise.
* iconvdata/ibm935.c (EMIT_SHIFT_TO_INIT): Likewise.
* iconvdata/ibm937.c (EMIT_SHIFT_TO_INIT): Likewise.
* iconvdata/ibm939.c (EMIT_SHIFT_TO_INIT): Likewise.
* iso-2022-cn.c (EMIT_SHIFT_TO_INIT): Likewise.
* iso-2022-cn-ext.c (EMIT_SHIFT_TO_INIT): Likewise.
* iso-2022-jp.c (EMIT_SHIFT_TO_INIT): Likewise.
* iso-2022-kr.c (EMIT_SHIFT_TO_INIT): Likewise.
* utf-7.c (EMIT_SHIFT_TO_INIT): Likewise.
*** glibc-20010430/iconv/gconv.c.bak Tue Apr 10 23:52:15 2001
--- glibc-20010430/iconv/gconv.c Tue May 15 02:01:00 2001
***************
*** 49,55 ****
/* We just flush. */
result = DL_CALL_FCT (cd->__steps->__fct,
(cd->__steps, cd->__data, NULL, NULL, NULL,
! irreversible, 1, 0));
else
{
const unsigned char *last_start;
--- 49,56 ----
/* We just flush. */
result = DL_CALL_FCT (cd->__steps->__fct,
(cd->__steps, cd->__data, NULL, NULL, NULL,
! irreversible,
! cd->__data[last_step].__outbuf == NULL ? 2 : 1, 0));
else
{
const unsigned char *last_start;
*** glibc-20010430/iconv/skeleton.c.bak Thu Jan 11 22:00:23 2001
--- glibc-20010430/iconv/skeleton.c Tue May 15 02:05:04 2001
***************
*** 298,325 ****
dropped. */
if (__builtin_expect (do_flush, 0))
{
- status = __GCONV_OK;
-
/* This should never happen during error handling. */
assert (outbufstart == NULL);
#ifdef EMIT_SHIFT_TO_INIT
! /* Emit the escape sequence to reset the state. */
! EMIT_SHIFT_TO_INIT;
#else
! /* Clear the state object. There might be bytes in there from
! previous calls with CONSUME_INCOMPLETE == 1. */
! memset (data->__statep, '\0', sizeof (*data->__statep));
! #endif
! /* Call the steps down the chain if there are any but only if we
! successfully emitted the escape sequence. This should only
! fail if the output buffer is full. If the input is invalid
! it should be discarded since the user wants to start from a
! clean slate. */
! if (status == __GCONV_OK && ! (data->__flags & __GCONV_IS_LAST))
! status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL,
! NULL, irreversible, 1,
! consume_incomplete));
}
else
{
--- 298,393 ----
dropped. */
if (__builtin_expect (do_flush, 0))
{
/* This should never happen during error handling. */
assert (outbufstart == NULL);
+ status = __GCONV_OK;
+
+ if (do_flush >= 2)
+ {
+ /* Clear the state object. There might be bytes in there from
+ previous calls with CONSUME_INCOMPLETE == 1. But don't emit
+ escape sequences. */
+ memset (data->__statep, '\0', sizeof (*data->__statep));
+
+ if (! (data->__flags & __GCONV_IS_LAST))
+ status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL,
+ NULL, irreversible, 2,
+ consume_incomplete));
+ }
+ else
+ {
+ /* We preserve the initial values of the pointer variables. */
+ unsigned char *outbuf = data->__outbuf;
+ unsigned char *outstart = outbuf;
+ #ifdef EMIT_SHIFT_TO_INIT
+ unsigned char *outend = data->__outbufend;
+
+ # ifdef PREPARE_LOOP
+ PREPARE_LOOP
+ # endif
+ #endif
+
+ #ifdef SAVE_RESET_STATE
+ SAVE_RESET_STATE (1);
+ #endif
+
#ifdef EMIT_SHIFT_TO_INIT
! /* Emit the escape sequence to reset the state. */
! EMIT_SHIFT_TO_INIT;
#else
! /* Clear the state object. There might be bytes in there from
! previous calls with CONSUME_INCOMPLETE == 1. */
! memset (data->__statep, '\0', sizeof (*data->__statep));
! #endif
! /* Call the steps down the chain if there are any but only if we
! successfully emitted the escape sequence. This should only
! fail if the output buffer is full. If the input is invalid
! it should be discarded since the user wants to start from a
! clean slate. */
! if (status == __GCONV_OK)
! {
! if (data->__flags & __GCONV_IS_LAST)
! /* Store information about how many bytes are available. */
! data->__outbuf = outbuf;
! else
! {
! /* Write out all output which was produced. */
! if (outbuf > outstart)
! {
! const unsigned char *outerr = outstart;
! int result;
!
! result = DL_CALL_FCT (fct, (next_step, next_data,
! &outerr, outbuf, NULL,
! irreversible, 0,
! consume_incomplete));
!
! if (result != __GCONV_EMPTY_INPUT)
! {
! if (__builtin_expect (outerr != outbuf, 0))
! {
! /* We have a problem. Undo the conversion. */
! outbuf = outstart;
!
! /* Restore the state. */
! #ifdef SAVE_RESET_STATE
! SAVE_RESET_STATE (0);
! #endif
! }
!
! /* Change the status. */
! status = result;
! }
! }
!
! if (status == __GCONV_OK)
! status = DL_CALL_FCT (fct, (next_step, next_data, NULL,
! NULL, NULL, irreversible, 1,
! consume_incomplete));
! }
! }
! }
}
else
{
***************
*** 499,505 ****
*inptrp = inptr;
outbuf = outstart;
! /* Reset the state. */
# ifdef SAVE_RESET_STATE
SAVE_RESET_STATE (0);
# endif
--- 567,573 ----
*inptrp = inptr;
outbuf = outstart;
! /* Restore the state. */
# ifdef SAVE_RESET_STATE
SAVE_RESET_STATE (0);
# endif
*** glibc-20010430/iconvdata/ibm930.c.bak Thu Jan 11 22:00:23 2001
--- glibc-20010430/iconvdata/ibm930.c Mon May 14 13:05:08 2001
***************
*** 56,73 ****
data->__statep->__count &= 7; \
else \
{ \
- unsigned char *outbuf = data->__outbuf; \
- \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (__builtin_expect (outbuf >= data->__outbufend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
*outbuf++ = SI; \
- data->__outbuf = outbuf; \
data->__statep->__count &= 7; \
} \
} \
--- 56,70 ----
data->__statep->__count &= 7; \
else \
{ \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (__builtin_expect (outbuf >= outend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
*outbuf++ = SI; \
data->__statep->__count &= 7; \
} \
} \
*** glibc-20010430/iconvdata/ibm933.c.bak Thu Jan 11 22:00:23 2001
--- glibc-20010430/iconvdata/ibm933.c Mon May 14 13:05:35 2001
***************
*** 56,73 ****
data->__statep->__count &= 7; \
else \
{ \
- unsigned char *outbuf = data->__outbuf; \
- \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (__builtin_expect (outbuf >= data->__outbufend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
*outbuf++ = SI; \
- data->__outbuf = outbuf; \
data->__statep->__count &= 7; \
} \
} \
--- 56,70 ----
data->__statep->__count &= 7; \
else \
{ \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (__builtin_expect (outbuf >= outend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
*outbuf++ = SI; \
data->__statep->__count &= 7; \
} \
} \
*** glibc-20010430/iconvdata/ibm935.c.bak Thu Jan 11 22:00:24 2001
--- glibc-20010430/iconvdata/ibm935.c Mon May 14 13:05:59 2001
***************
*** 56,73 ****
data->__statep->__count &= 7; \
else \
{ \
- unsigned char *outbuf = data->__outbuf; \
- \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (__builtin_expect (outbuf >= data->__outbufend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
*outbuf++ = SI; \
- data->__outbuf = outbuf; \
data->__statep->__count &= 7; \
} \
} \
--- 56,70 ----
data->__statep->__count &= 7; \
else \
{ \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (__builtin_expect (outbuf >= outend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
*outbuf++ = SI; \
data->__statep->__count &= 7; \
} \
} \
*** glibc-20010430/iconvdata/ibm937.c.bak Thu Jan 11 22:00:24 2001
--- glibc-20010430/iconvdata/ibm937.c Mon May 14 13:06:23 2001
***************
*** 56,73 ****
data->__statep->__count &= 7; \
else \
{ \
- unsigned char *outbuf = data->__outbuf; \
- \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (__builtin_expect (outbuf >= data->__outbufend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
*outbuf++ = SI; \
- data->__outbuf = outbuf; \
data->__statep->__count &= 7; \
} \
} \
--- 56,70 ----
data->__statep->__count &= 7; \
else \
{ \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (__builtin_expect (outbuf >= outend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
*outbuf++ = SI; \
data->__statep->__count &= 7; \
} \
} \
*** glibc-20010430/iconvdata/ibm939.c.bak Thu Jan 11 22:00:24 2001
--- glibc-20010430/iconvdata/ibm939.c Mon May 14 13:06:43 2001
***************
*** 56,73 ****
data->__statep->__count &= 7; \
else \
{ \
- unsigned char *outbuf = data->__outbuf; \
- \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (__builtin_expect (outbuf >= data->__outbufend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
*outbuf++ = SI; \
- data->__outbuf = outbuf; \
data->__statep->__count &= 7; \
} \
} \
--- 56,70 ----
data->__statep->__count &= 7; \
else \
{ \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (__builtin_expect (outbuf >= outend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
*outbuf++ = SI; \
data->__statep->__count &= 7; \
} \
} \
*** glibc-20010430/iconvdata/iso-2022-cn.c.bak Mon Sep 18 23:15:36 2000
--- glibc-20010430/iconvdata/iso-2022-cn.c Mon May 14 13:07:10 2001
***************
*** 83,100 ****
data->__statep->__count = ASCII_set; \
else \
{ \
- unsigned char *outbuf = data->__outbuf; \
- \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (__builtin_expect (outbuf == data->__outbufend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
*outbuf++ = SI; \
- data->__outbuf = outbuf; \
data->__statep->__count = ASCII_set; \
} \
} \
--- 83,97 ----
data->__statep->__count = ASCII_set; \
else \
{ \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (__builtin_expect (outbuf == outend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
*outbuf++ = SI; \
data->__statep->__count = ASCII_set; \
} \
} \
*** glibc-20010430/iconvdata/iso-2022-cn-ext.c.bak Mon Dec 4 19:53:43 2000
--- glibc-20010430/iconvdata/iso-2022-cn-ext.c Tue May 15 01:04:29 2001
***************
*** 131,141 ****
data->__statep->__count = ASCII_set << 3; \
else \
{ \
- unsigned char *outbuf = data->__outbuf; \
- \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (outbuf == data->__outbufend) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
--- 131,139 ----
data->__statep->__count = ASCII_set << 3; \
else \
{ \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (__builtin_expect (outbuf == outend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
***************
*** 144,150 ****
*outbuf++ = SI; \
if (data->__flags & __GCONV_IS_LAST) \
*irreversible += 1; \
- data->__outbuf = outbuf; \
data->__statep->__count = ASCII_set << 3; \
} \
} \
--- 142,147 ----
*** glibc-20010430/iconvdata/iso-2022-jp.c.bak Mon Nov 27 23:28:38 2000
--- glibc-20010430/iconvdata/iso-2022-jp.c Mon May 14 13:59:53 2001
***************
*** 193,202 ****
the output state to the initial state. This has to be done during the
flushing. */
#define EMIT_SHIFT_TO_INIT \
if ((data->__statep->__count & ~7) != ASCII_set) \
{ \
- enum direction dir = ((struct iso2022jp_data *) step->__data)->dir; \
- \
if (dir == from_iso2022jp) \
{ \
/* It's easy, we don't have to emit anything, we just reset the \
--- 193,203 ----
the output state to the initial state. This has to be done during the
flushing. */
#define EMIT_SHIFT_TO_INIT \
+ /* Avoid warning about unused variable 'var'. */ \
+ (void) var; \
+ \
if ((data->__statep->__count & ~7) != ASCII_set) \
{ \
if (dir == from_iso2022jp) \
{ \
/* It's easy, we don't have to emit anything, we just reset the \
***************
*** 207,217 ****
} \
else \
{ \
- unsigned char *outbuf = data->__outbuf; \
- \
/* We are not in the initial state. To switch back we have \
to emit the sequence `Esc ( B'. */ \
! if (__builtin_expect (outbuf + 3 > data->__outbufend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
--- 208,216 ----
} \
else \
{ \
/* We are not in the initial state. To switch back we have \
to emit the sequence `Esc ( B'. */ \
! if (__builtin_expect (outbuf + 3 > outend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
***************
*** 220,226 ****
*outbuf++ = ESC; \
*outbuf++ = '('; \
*outbuf++ = 'B'; \
- data->__outbuf = outbuf; \
/* Note that this also clears the G2 designation. */ \
data->__statep->__count &= ~7; \
data->__statep->__count |= ASCII_set; \
--- 219,224 ----
*** glibc-20010430/iconvdata/iso-2022-kr.c.bak Wed Aug 30 23:43:54 2000
--- glibc-20010430/iconvdata/iso-2022-kr.c Mon May 14 13:08:44 2001
***************
*** 85,102 ****
} \
else \
{ \
- unsigned char *outbuf = data->__outbuf; \
- \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (__builtin_expect (outbuf == data->__outbufend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
*outbuf++ = SI; \
- data->__outbuf = outbuf; \
data->__statep->__count = ASCII_set; \
} \
} \
--- 85,99 ----
} \
else \
{ \
/* We are not in the initial state. To switch back we have \
to emit `SI'. */ \
! if (__builtin_expect (outbuf == outend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
*outbuf++ = SI; \
data->__statep->__count = ASCII_set; \
} \
} \
*** glibc-20010430/iconvdata/utf-7.c.bak Mon Sep 11 21:52:53 2000
--- glibc-20010430/iconvdata/utf-7.c Mon May 14 13:09:40 2001
***************
*** 533,542 ****
if (state & 0x18) \
{ \
/* Deactivate base64 encoding. */ \
- unsigned char *outbuf = data->__outbuf; \
size_t count = ((state & 0x18) >= 0x10) + 1; \
\
! if (__builtin_expect (outbuf + count > data->__outbufend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
--- 533,541 ----
if (state & 0x18) \
{ \
/* Deactivate base64 encoding. */ \
size_t count = ((state & 0x18) >= 0x10) + 1; \
\
! if (__builtin_expect (outbuf + count > outend, 0)) \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
else \
***************
*** 546,552 ****
*outbuf++ = base64 ((state >> 3) & ~3); \
*outbuf++ = '-'; \
\
- data->__outbuf = outbuf; \
data->__statep->__count = 0; \
} \
} \
--- 545,550 ----