This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

bugs in JOHAB converter



Hi Ulrich,

Now that you have added a charmap for JOHAB, a few more small bugs in the
JOHAB converter become apparent.

In the charmap:
  - 0x7f missing. (Some Unicode.org tables are missing the 0x00..0x1f,0x7f
    control characters.)

In the JOHAB -> Unicode direction:
  - mapping for 0x5c wrong
  - mapping for 0x7f missing
  - mapping for 0x844c wrong
  - mapping for 0x8454 swapped with 0x8455
  - extra mappings for 0xd9e6..0xd9e7
  - extra mappings for 0xdef9..0xdefe (out-of-range array access)

In the Unicode -> JOHAB direction:
  - mapping for 0x5c wrong
  - mapping for 0x7f missing
  - mappings for U+313E, U+313F, U+314D wrong
  - extra mappings for 0xd9e6..0xd9e7

Here is a fix.


2000-09-23  Bruno Haible  <haible@clisp.cons.org>

	* charmaps/JOHAB: Add identity mapping for 0x7f.

2000-09-23  Bruno Haible  <haible@clisp.cons.org>

	* iconvdata/johab.c (final_to_ucs): Fix typos.
	(jamo_from_ucs_table): Likewise.
	(BODY for FROM_LOOP): Map 0x5c to U+20A9. Reject ranges
	0xD9E6..0xD9FE and 0xDEF2..0xDEFE.
	(BODY for TO_LOOP): Map U+20A9 to 0x5c. Don't produce values in
	the range 0xD9E6..0xD9FE.

*** glibc-20000914/localedata/charmaps/JOHAB.bak	Thu Sep 14 22:36:46 2000
--- glibc-20000914/localedata/charmaps/JOHAB	Sat Sep 23 18:05:42 2000
***************
*** 133,138 ****
--- 133,139 ----
  <U007C>     /x7c         VERTICAL LINE
  <U007D>     /x7d         RIGHT CURLY BRACKET
  <U007E>     /x7e         TILDE
+ <U007F>     /x7f         DELETE (DEL)
  <U3133>     /x84/x44     HANGUL LETTER KIYEOK-SIOS
  <U3135>     /x84/x46     HANGUL LETTER NIEUN-CIEUC
  <U3136>     /x84/x47     HANGUL LETTER NIEUN-HIEUH
*** glibc-20000914/iconvdata/johab.c.bak	Tue Aug  1 13:52:59 2000
--- glibc-20000914/iconvdata/johab.c	Sat Sep 23 19:08:36 2000
***************
*** 68,75 ****
  static const uint32_t final_to_ucs[31] =
  {
    L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0',
!   0x313a, 0x313b, 0x314c, 0x313d, 0x313e, 0x313f,
!   0x3140, L'\0', L'\0', L'\0', 0x3144, L'\0', L'\0', L'\0',
    L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'
  };
  
--- 68,75 ----
  static const uint32_t final_to_ucs[31] =
  {
    L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0',
!   0x313a, 0x313b, 0x313c, 0x313d, 0x313e, 0x313f,
!   0x3140, L'\0', L'\0', 0x3144, L'\0', L'\0', L'\0', L'\0',
    L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'
  };
  
***************
*** 120,130 ****
    0x9041,
    0x8446, 0x8447,
    0x9441, 0x9841, 0x9c41,
!   0x844a, 0x844b, 0x844c, 0x844d, 0x884e, 0x884f, 0x8450,
    0xa041, 0xa441, 0xa841,
    0x8454,
    0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
!   0xc041, 0xc441, 0xc841, 0xca41, 0xd041,
    0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
    0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
    0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
--- 120,130 ----
    0x9041,
    0x8446, 0x8447,
    0x9441, 0x9841, 0x9c41,
!   0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f, 0x8450,
    0xa041, 0xa441, 0xa841,
    0x8454,
    0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
!   0xc041, 0xc441, 0xc841, 0xcc41, 0xd041,
    0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
    0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
    0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
***************
*** 162,176 ****
    {									      \
      uint32_t ch = *inptr;						      \
  									      \
!     /* half-width Korean Currency WON sign				      \
!        if (ch == 0x5c)							      \
! 	 ch =  0x20a9;							      \
!        else if (ch < 0x7f)						      \
! 	 ch = (uint32_t) ch;						      \
!     */									      \
!     if (ch < 0x7f)							      \
!       /* Plain ASCII.  */						      \
!       ++inptr;								      \
      /* Johab : 1. Hangul						      \
         1st byte : 0x84-0xd3						      \
         2nd byte : 0x41-0x7e, 0x81-0xfe					      \
--- 162,174 ----
    {									      \
      uint32_t ch = *inptr;						      \
  									      \
!     if (ch <= 0x7f)							      \
!       {									      \
! 	/* Plain ISO646-KR.  */						      \
! 	if (ch == 0x5c)							      \
! 	  ch = 0x20a9; /* half-width Korean Currency WON sign */	      \
! 	++inptr;							      \
!       }									      \
      /* Johab : 1. Hangul						      \
         1st byte : 0x84-0xd3						      \
         2nd byte : 0x41-0x7e, 0x81-0xfe					      \
***************
*** 268,275 ****
  		if (__builtin_expect (ch2, 0x31) < 0x31			      \
  		    || (__builtin_expect (ch2, 0x7e) > 0x7e && ch2 < 0x91)    \
  		    || __builtin_expect (ch2, 0) == 0xff		      \
  		    || (__builtin_expect (ch, 0) == 0xda		      \
! 			&& ch2 > 0xa0 && ch2 < 0xd4))			      \
  		  {							      \
  		    /* This is illegal.  */				      \
  		    if (! ignore_errors_p ())				      \
--- 266,275 ----
  		if (__builtin_expect (ch2, 0x31) < 0x31			      \
  		    || (__builtin_expect (ch2, 0x7e) > 0x7e && ch2 < 0x91)    \
  		    || __builtin_expect (ch2, 0) == 0xff		      \
+ 		    || (__builtin_expect (ch, 0) == 0xd9 && ch2 > 0xe5)	      \
  		    || (__builtin_expect (ch, 0) == 0xda		      \
! 			&& ch2 > 0xa0 && ch2 < 0xd4)			      \
! 		    || (__builtin_expect (ch, 0) == 0xde && ch2 > 0xf1))      \
  		  {							      \
  		    /* This is illegal.  */				      \
  		    if (! ignore_errors_p ())				      \
***************
*** 346,352 ****
  	 cp = from_ucs4_lat1[ch];					      \
      */									      \
  									      \
!     if (ch < 0x7f)							      \
        *outptr++ = ch;							      \
      else								      \
        {									      \
--- 346,352 ----
  	 cp = from_ucs4_lat1[ch];					      \
      */									      \
  									      \
!     if (ch <= 0x7f && ch != 0x5c)					      \
        *outptr++ = ch;							      \
      else								      \
        {									      \
***************
*** 410,415 ****
--- 410,417 ----
  									      \
  	    outptr += 2;						      \
  	  }								      \
+ 	else if (ch == 0x20a9)						      \
+ 	  *outptr++ = 0x5c;						      \
  	else								      \
  	  {								      \
  	    size_t written;						      \
***************
*** 421,427 ****
  		result = __GCONV_FULL_OUTPUT;				      \
  		break;							      \
  	      }								      \
! 	    if (__builtin_expect (written, 1) == __UNKNOWN_10646_CHAR)	      \
  	      {								      \
  		STANDARD_ERR_HANDLER (4);				      \
  	      }								      \
--- 423,430 ----
  		result = __GCONV_FULL_OUTPUT;				      \
  		break;							      \
  	      }								      \
! 	    if (__builtin_expect (written, 1) == __UNKNOWN_10646_CHAR	      \
! 		|| (outptr[0] == 0x22 && outptr[1] > 0x65))		      \
  	      {								      \
  		STANDARD_ERR_HANDLER (4);				      \
  	      }								      \
*** glibc-20000914/iconvdata/tst-tables.sh.bak	Thu Sep 14 17:23:16 2000
--- glibc-20000914/iconvdata/tst-tables.sh	Sat Sep 23 18:19:47 2000
***************
*** 186,192 ****
    SJIS
    EUC-KR
    CP949
!   #JOHAB                                No charmap exists
    BIG5
    BIG5HKSCS
    EUC-JP
--- 186,192 ----
    SJIS
    EUC-KR
    CP949
!   JOHAB
    BIG5
    BIG5HKSCS
    EUC-JP

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]