This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]

bugs in ISO-2022-CN converter

To: libc-alpha at sources dot redhat dot com
Subject: bugs in ISO-2022-CN converter
From: Bruno Haible <haible at ilog dot fr>
Date: Mon, 18 Sep 2000 15:06:11 +0200 (CEST)


The patch below fixes the following bugs in the ISO-2022-CN converter:

1) In the FROM direction, it may read more bytes than the buffer contains.

2) In the TO direction, the assertion "used >= 1 && used <= 3" is violated
because `used' has been shifted left by 3 bits.

/glibc22/bin/iconv -f UTF-8 -t ISO-2022-CN < ISO-2022-CN-snippet.UTF-8 > x
conv: ../iconv/loop.c:267: to_iso2022cn_loop: Zusicherung »used >= 1 && used <= 3« nicht erfüllt.

3) In the TO direction, in the initial state, CNS11643-1 characters will not
be recognized, because ucs4_to_gb2312 is called twice but ucs4_to_cns11643l1
is not called at all.

4) In the TO direction, SO will be output again and again, once before every
character, because the variable "set" is never assigned to.


2000-09-17  Bruno Haible  <haible@clisp.cons.org>

	* iconvdata/iso-2022-cn.c (BODY for FROM_LOOP): Fix buffer overrun.
	(BODY for TO_LOOP): Fix usage of `set' vs. `used'.

*** glibc-20000914/iconvdata/iso-2022-cn.c.bak	Wed Aug 30 23:43:54 2000
--- glibc-20000914/iconvdata/iso-2022-cn.c	Mon Sep 18 10:36:09 2000
***************
*** 141,155 ****
  	     line; we can simply ignore them				      \
  	   - the initial byte of the SS2 sequence.			      \
  	*/								      \
! 	if (__builtin_expect (inptr + 1 > inend, 0)			      \
  	    || (inptr[1] == '$'						      \
! 		&& (__builtin_expect (inptr + 2 > inend, 0)		      \
  		    || (inptr[2] == ')'					      \
! 			&& __builtin_expect (inptr + 3 > inend, 0))	      \
  		    || (inptr[2] == '*'					      \
! 			&& __builtin_expect (inptr + 3 > inend, 0))))	      \
  	    || (inptr[1] == SS2_1					      \
! 		&& __builtin_expect (inptr + 3 > inend, 0)))		      \
  	  {								      \
  	    result = __GCONV_INCOMPLETE_INPUT;				      \
  	    break;							      \
--- 141,155 ----
  	     line; we can simply ignore them				      \
  	   - the initial byte of the SS2 sequence.			      \
  	*/								      \
! 	if (__builtin_expect (inptr + 2 > inend, 0)			      \
  	    || (inptr[1] == '$'						      \
! 		&& (__builtin_expect (inptr + 3 > inend, 0)		      \
  		    || (inptr[2] == ')'					      \
! 			&& __builtin_expect (inptr + 4 > inend, 0))	      \
  		    || (inptr[2] == '*'					      \
! 			&& __builtin_expect (inptr + 4 > inend, 0))))	      \
  	    || (inptr[1] == SS2_1					      \
! 		&& __builtin_expect (inptr + 4 > inend, 0)))		      \
  	  {								      \
  	    result = __GCONV_INCOMPLETE_INPUT;				      \
  	    break;							      \
***************
*** 313,326 ****
  	    else							      \
  	      {								      \
  		/* Well, see whether we have to change the SO set.  */	      \
! 		if (set == GB2312_set)					      \
  		  written = ucs4_to_cns11643l1 (ch, buf, 2);		      \
  		else							      \
  		  written = ucs4_to_gb2312 (ch, buf, 2);		      \
  									      \
  		if (__builtin_expect (written, 0) != __UNKNOWN_10646_CHAR)    \
  		  /* Oh well, then switch SO.  */			      \
! 		  used = GB2312_set + CNS11643_1_set - set;		      \
  		else							      \
  		  {							      \
  		    /* Even this does not work.  Error.  */		      \
--- 313,326 ----
  	    else							      \
  	      {								      \
  		/* Well, see whether we have to change the SO set.  */	      \
! 		if (used == GB2312_set)					      \
  		  written = ucs4_to_cns11643l1 (ch, buf, 2);		      \
  		else							      \
  		  written = ucs4_to_gb2312 (ch, buf, 2);		      \
  									      \
  		if (__builtin_expect (written, 0) != __UNKNOWN_10646_CHAR)    \
  		  /* Oh well, then switch SO.  */			      \
! 		  used = GB2312_set + CNS11643_1_set - used;		      \
  		else							      \
  		  {							      \
  		    /* Even this does not work.  Error.  */		      \
***************
*** 335,341 ****
  	  {								      \
  	    /* First see whether we announced that we use this		      \
  	       character set.  */					      \
! 	    if ((ann & (2 << used)) == 0)				      \
  	      {								      \
  		const char *escseq;					      \
  									      \
--- 335,341 ----
  	  {								      \
  	    /* First see whether we announced that we use this		      \
  	       character set.  */					      \
! 	    if ((ann & (16 << (used >> 3))) == 0)			      \
  	      {								      \
  		const char *escseq;					      \
  									      \
***************
*** 345,354 ****
  		    break;						      \
  		  }							      \
  									      \
! 		assert (used >= 1 && used <= 3);			      \
! 		escseq = "\e$)A\e$)G\e$*H" + (used - 1) * 4;		      \
! 		*outptr++ = *escseq++;					      \
! 		*outptr++ = *escseq++;					      \
  		*outptr++ = *escseq++;					      \
  		*outptr++ = *escseq++;					      \
  									      \
--- 345,354 ----
  		    break;						      \
  		  }							      \
  									      \
! 		assert ((used >> 3) >= 1 && (used >> 3) <= 3);		      \
! 		escseq = ")A)G*H" + ((used >> 3) - 1) * 2;		      \
! 		*outptr++ = ESC;					      \
! 		*outptr++ = '$';					      \
  		*outptr++ = *escseq++;					      \
  		*outptr++ = *escseq++;					      \
  									      \
***************
*** 402,407 ****
--- 402,408 ----
  									      \
  	*outptr++ = buf[0];						      \
  	*outptr++ = buf[1];						      \
+ 	set = used;							      \
        }									      \
  									      \
      /* Now that we wrote the output increment the input pointer.  */	      \

Follow-Ups:
- Re: bugs in ISO-2022-CN converter
  - From: Ulrich Drepper

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]