This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
transliteration produces wrong output
- To: libc-alpha at sources dot redhat dot com
- Subject: transliteration produces wrong output
- From: Bruno Haible <haible at ilog dot fr>
- Date: Mon, 5 Feb 2001 19:30:40 +0100 (CET)
In some locales, transliteration produces too much output, for example for
the german O-umlaut:
$ localedef -v -c -i de_DE -f UTF-8 de_DE.UTF-8
$ printf '\xd6\n' | LC_ALL=de_DE.UTF-8 iconv -f ISO-8859-1 -t ASCII//translit
OOE
What happens? The rule
<U00D6> "<U004F><U0308>";"<U004F><U0045>"
specifies two transliterations. It attempts to execute the first, must stop
after the "O", then executes the second one, and outputs "OE" - without
backing up the output pointer. Here is a fix.
2001-02-04 Bruno Haible <haible@clisp.cons.org>
* iconv/gconv_trans.c (__gconv_transliterate): Use a temporary output
pointer, to avoid accumulating output from incomplete (unsuccessful)
transliteration attempts.
*** glibc-20010106/iconv/gconv_trans.c.bak Mon Dec 4 19:53:41 2000
--- glibc-20010106/iconv/gconv_trans.c Mon Feb 5 01:14:35 2001
***************
*** 111,127 ****
uint_fast32_t len = 0;
int res;
const unsigned char *toinptr;
while (to_tbl[idx2 + len] != L'\0')
++len;
/* Try this input text. */
toinptr = (const unsigned char *) &to_tbl[idx2];
res = DL_CALL_FCT (step->__fct,
(step, step_data, &toinptr,
(const unsigned char *) &to_tbl[idx2 + len],
! (unsigned char **) outbufstart,
! NULL, 0, 0));
if (res != __GCONV_ILLEGAL_INPUT)
{
/* If the conversion succeeds we have to increment the
--- 111,128 ----
uint_fast32_t len = 0;
int res;
const unsigned char *toinptr;
+ unsigned char *outptr;
while (to_tbl[idx2 + len] != L'\0')
++len;
/* Try this input text. */
toinptr = (const unsigned char *) &to_tbl[idx2];
+ outptr = *outbufstart;
res = DL_CALL_FCT (step->__fct,
(step, step_data, &toinptr,
(const unsigned char *) &to_tbl[idx2 + len],
! &outptr, NULL, 0, 0));
if (res != __GCONV_ILLEGAL_INPUT)
{
/* If the conversion succeeds we have to increment the
***************
*** 132,137 ****
--- 133,139 ----
++*irreversible;
res = __GCONV_OK;
}
+ *outbufstart = outptr;
return res;
}
***************
*** 193,198 ****
--- 195,201 ----
const unsigned char *toinptr = (const unsigned char *) default_missing;
uint32_t len = _NL_CURRENT_WORD (LC_CTYPE,
_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN);
+ unsigned char *outptr;
int res;
/* Test whether there is enough input. */
***************
*** 200,210 ****
return (winbuf == winbufend
? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
res = DL_CALL_FCT (step->__fct,
(step, step_data, &toinptr,
(const unsigned char *) (default_missing + len),
! (unsigned char **) outbufstart,
! NULL, 0, 0));
if (res != __GCONV_ILLEGAL_INPUT)
{
--- 203,213 ----
return (winbuf == winbufend
? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
+ outptr = *outbufstart;
res = DL_CALL_FCT (step->__fct,
(step, step_data, &toinptr,
(const unsigned char *) (default_missing + len),
! &outptr, NULL, 0, 0));
if (res != __GCONV_ILLEGAL_INPUT)
{
***************
*** 217,222 ****
--- 220,226 ----
*inbufp += 4;
res = __GCONV_OK;
}
+ *outbufstart = outptr;
return res;
}