This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

transliteration produces wrong output



In some locales, transliteration produces too much output, for example for
the german O-umlaut:

$ localedef -v -c -i de_DE -f UTF-8 de_DE.UTF-8
$ printf '\xd6\n' | LC_ALL=de_DE.UTF-8 iconv -f ISO-8859-1 -t ASCII//translit
OOE

What happens? The rule

<U00D6> "<U004F><U0308>";"<U004F><U0045>"

specifies two transliterations. It attempts to execute the first, must stop
after the "O", then executes the second one, and outputs "OE" - without
backing up the output pointer. Here is a fix.


2001-02-04  Bruno Haible  <haible@clisp.cons.org>

	* iconv/gconv_trans.c (__gconv_transliterate): Use a temporary output
	pointer, to avoid accumulating output from incomplete (unsuccessful)
	transliteration attempts.

*** glibc-20010106/iconv/gconv_trans.c.bak	Mon Dec  4 19:53:41 2000
--- glibc-20010106/iconv/gconv_trans.c	Mon Feb  5 01:14:35 2001
***************
*** 111,127 ****
  	      uint_fast32_t len = 0;
  	      int res;
  	      const unsigned char *toinptr;
  
  	      while (to_tbl[idx2 + len] != L'\0')
  		++len;
  
  	      /* Try this input text.  */
  	      toinptr = (const unsigned char *) &to_tbl[idx2];
  	      res = DL_CALL_FCT (step->__fct,
  				 (step, step_data, &toinptr,
  				  (const unsigned char *) &to_tbl[idx2 + len],
! 				  (unsigned char **) outbufstart,
! 				  NULL, 0, 0));
  	      if (res != __GCONV_ILLEGAL_INPUT)
  		{
  		  /* If the conversion succeeds we have to increment the
--- 111,128 ----
  	      uint_fast32_t len = 0;
  	      int res;
  	      const unsigned char *toinptr;
+ 	      unsigned char *outptr;
  
  	      while (to_tbl[idx2 + len] != L'\0')
  		++len;
  
  	      /* Try this input text.  */
  	      toinptr = (const unsigned char *) &to_tbl[idx2];
+ 	      outptr = *outbufstart;
  	      res = DL_CALL_FCT (step->__fct,
  				 (step, step_data, &toinptr,
  				  (const unsigned char *) &to_tbl[idx2 + len],
! 				  &outptr, NULL, 0, 0));
  	      if (res != __GCONV_ILLEGAL_INPUT)
  		{
  		  /* If the conversion succeeds we have to increment the
***************
*** 132,137 ****
--- 133,139 ----
  		      ++*irreversible;
  		      res = __GCONV_OK;
  		    }
+ 		  *outbufstart = outptr;
  
  		  return res;
  		}
***************
*** 193,198 ****
--- 195,201 ----
        const unsigned char *toinptr = (const unsigned char *) default_missing;
        uint32_t len = _NL_CURRENT_WORD (LC_CTYPE,
  				       _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN);
+       unsigned char *outptr;
        int res;
  
        /* Test whether there is enough input.  */
***************
*** 200,210 ****
  	return (winbuf == winbufend
  		? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
  
        res = DL_CALL_FCT (step->__fct,
  			 (step, step_data, &toinptr,
  			  (const unsigned char *) (default_missing + len),
! 			  (unsigned char **) outbufstart,
! 			  NULL, 0, 0));
  
        if (res != __GCONV_ILLEGAL_INPUT)
  	{
--- 203,213 ----
  	return (winbuf == winbufend
  		? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
  
+       outptr = *outbufstart;
        res = DL_CALL_FCT (step->__fct,
  			 (step, step_data, &toinptr,
  			  (const unsigned char *) (default_missing + len),
! 			  &outptr, NULL, 0, 0));
  
        if (res != __GCONV_ILLEGAL_INPUT)
  	{
***************
*** 217,222 ****
--- 220,226 ----
  	      *inbufp += 4;
  	      res = __GCONV_OK;
  	    }
+ 	  *outbufstart = outptr;
  
  	  return res;
  	}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]