This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
GNU C Library master sources branch, master, updated. glibc-2.15-30-g9954432

From: drepper at sourceware dot org
To: glibc-cvs at sourceware dot org
Date: 7 Jan 2012 15:53:30 -0000
Subject: GNU C Library master sources branch, master, updated. glibc-2.15-30-g9954432
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  9954432e309c8fddaec2fe53e601702a5c981624 (commit)
      from  c3a87236702cb73be1dada3438bbd3c3934e83f8 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=9954432e309c8fddaec2fe53e601702a5c981624

commit 9954432e309c8fddaec2fe53e601702a5c981624
Author: Ulrich Drepper <drepper@gmail.com>
Date:   Sat Jan 7 10:52:53 2012 -0500

    More char16_t and char32_t support
    
    It works now for UTF-8 locales

diff --git a/ChangeLog b/ChangeLog
index f089e19..2c0b0f8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,21 @@
 2012-01-07  Ulrich Drepper  <drepper@gmail.com>
 
+	* iconv/gconv_builtin.h: Use CHAR16 for the char16_t conversions.
+	* iconv/gconv_simple.c: Rename char16_t routines.  Add char16_t<->utf8
+	routines.
+	* iconv/gconv_int.h: Adjust prototypes for char16_t routines.
+	* iconv/iconv_prog.c: Recognize CHAR16 as internal name.
+	* wcsmbs/c16rtomb.c: Fix a few problems.  Disable all the code and
+	fall back to using wcrtomb.
+	* wcsmbs/mbrtoc16.: Fix implementation to handle real conversions.
+	* wcsmbs/wcsmbsload.c: Make char16 routines optional.  Adjust for
+	renaming.
+	* wcsmbs/Makefile (tests): Add tst-c16c32-1:
+	* wcsmbs/tst-c16c32-1.c: New file.
+
+	* wcsmbs/wcrtomb.c: Use MB_LEN_MAX instead of MB_CUR_MAX for sizing
+	local variable.
+
 	* libio/stdio.h: Do not declare gets at all for _GNU_SOURCE.
 
 	* elf/tst-unique3.cc: Add explicit declaration of gets.
diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h
index fd736a4..6820f82 100644
--- a/iconv/gconv_builtin.h
+++ b/iconv/gconv_builtin.h
@@ -1,5 +1,5 @@
 /* Builtin transformations.
-   Copyright (C) 1997-1999, 2000-2002, 2006, 2011 Free Software Foundation, Inc.
+   Copyright (C) 1997-1999, 2000-2002, 2006, 2011, 2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -124,22 +124,15 @@ BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODEBIG//", 1,
 #endif
 
 
-BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "UTF-16//", 1, "=ascii->UTF-16",
-			__gconv_transform_ascii_utf16, NULL, 2, 2, 1, 1)
+BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "CHAR16", 1, "=ascii->CHAR16",
+			__gconv_transform_ascii_char16, NULL, 1, 1, 2, 4)
 
-BUILTIN_TRANSFORMATION ("UTF-16//", "ANSI_X3.4-1968//", 1, "=UTF-16->ascii",
-			__gconv_transform_utf16_ascii, NULL, 2, 2, 1, 1)
+BUILTIN_TRANSFORMATION ("CHAR16", "ANSI_X3.4-1968//", 1, "=CHAR16->ascii",
+			__gconv_transform_char16_ascii, NULL, 2, 4, 1, 1)
 
-#if BYTE_ORDER == BIG_ENDIAN
-BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "UTF-16BE//", 1, "=ascii->UTF-16BE",
-			__gconv_transform_ascii_utf16, NULL, 2, 2, 1, 1)
 
-BUILTIN_TRANSFORMATION ("UTF-16BE//", "ANSI_X3.4-1968//", 1, "=UTF-16BE->ascii",
-			__gconv_transform_utf16_ascii, NULL, 2, 2, 1, 1)
-#else
-BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "UTF-16LE//", 1, "=ascii->UTF-16LE",
-			__gconv_transform_ascii_utf16, NULL, 2, 2, 1, 1)
+BUILTIN_TRANSFORMATION ("ISO-10646/UTF8/", "CHAR16", 1, "=utf8->CHAR16",
+			__gconv_transform_utf8_char16, NULL, 1, 6, 2, 4)
 
-BUILTIN_TRANSFORMATION ("UTF-16LE//", "ANSI_X3.4-1968//", 1, "=UTF-16LE->ascii",
-			__gconv_transform_utf16_ascii, NULL, 2, 2, 1, 1)
-#endif
+BUILTIN_TRANSFORMATION ("CHAR16", "ISO-10646/UTF8/", 1, "=CHAR16->utf8",
+			__gconv_transform_char16_utf8, NULL, 2, 4, 1, 6)
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index 80253dd..79de975 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1997-2005, 2006, 2007, 2011 Free Software Foundation, Inc.
+/* Copyright (C) 1997-2005, 2006, 2007, 2011, 2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -303,8 +303,10 @@ __BUILTIN_TRANSFORM (__gconv_transform_internal_ucs4le);
 __BUILTIN_TRANSFORM (__gconv_transform_ucs4le_internal);
 __BUILTIN_TRANSFORM (__gconv_transform_internal_utf16);
 __BUILTIN_TRANSFORM (__gconv_transform_utf16_internal);
-__BUILTIN_TRANSFORM (__gconv_transform_ascii_utf16);
-__BUILTIN_TRANSFORM (__gconv_transform_utf16_ascii);
+__BUILTIN_TRANSFORM (__gconv_transform_ascii_char16);
+__BUILTIN_TRANSFORM (__gconv_transform_char16_ascii);
+__BUILTIN_TRANSFORM (__gconv_transform_utf8_char16);
+__BUILTIN_TRANSFORM (__gconv_transform_char16_utf8);
 # undef __BUITLIN_TRANSFORM
 
 /* Specialized conversion function for a single byte to INTERNAL, recognizing
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index b0ef3e6..d145a3e 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -1,5 +1,5 @@
 /* Simple transformations functions.
-   Copyright (C) 1997-2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+   Copyright (C) 1997-2005, 2007, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -1065,6 +1065,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
 									      \
     state->__count = inend - *inptrp;					      \
 									      \
+    assert (ch != 0xc0 && ch != 0xc1);					      \
     if (ch >= 0xc2 && ch < 0xe0)					      \
       {									      \
 	/* We expect two bytes.  The first byte cannot be 0xc0 or	      \
@@ -1322,15 +1323,15 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
 #include <iconv/skeleton.c>
 
 
-/* Convert from ISO 646-IRV to UTF-16.  */
+/* Convert from ISO 646-IRV to the char16_t format.  */
 #define DEFINE_INIT		0
 #define DEFINE_FINI		0
 #define MIN_NEEDED_FROM		1
 #define MIN_NEEDED_TO		2
 #define FROM_DIRECTION		1
-#define FROM_LOOP		ascii_utf16_loop
-#define TO_LOOP			ascii_utf16_loop /* This is not used.  */
-#define FUNCTION_NAME		__gconv_transform_ascii_utf16
+#define FROM_LOOP		ascii_char16_loop
+#define TO_LOOP			ascii_char16_loop /* This is not used.  */
+#define FUNCTION_NAME		__gconv_transform_ascii_char16
 #define ONE_DIRECTION		1
 
 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
@@ -1358,15 +1359,15 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
 #include <iconv/skeleton.c>
 
 
-/* Convert from UTF-16 to ISO 646-IRV.  */
+/* Convert from the char16_t format to ISO 646-IRV.  */
 #define DEFINE_INIT		0
 #define DEFINE_FINI		0
 #define MIN_NEEDED_FROM		2
 #define MIN_NEEDED_TO		1
 #define FROM_DIRECTION		1
-#define FROM_LOOP		utf16_ascii_loop
-#define TO_LOOP			utf16_ascii_loop /* This is not used.  */
-#define FUNCTION_NAME		__gconv_transform_utf16_ascii
+#define FROM_LOOP		char16_ascii_loop
+#define TO_LOOP			char16_ascii_loop /* This is not used.  */
+#define FUNCTION_NAME		__gconv_transform_char16_ascii
 #define ONE_DIRECTION		1
 
 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
@@ -1383,9 +1384,328 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
       {									      \
 	/* It's an one byte sequence.  */				      \
 	*outptr++ = *((const uint16_t *) inptr);			      \
-	inptr += sizeof (uint16_t);					      \
+	inptr += 2;							      \
+      }									      \
+  }
+#define LOOP_NEED_FLAGS
+#include <iconv/loop.c>
+#include <iconv/skeleton.c>
+
+
+/* Convert from the char16_t format to UTF-8.  */
+#define DEFINE_INIT		0
+#define DEFINE_FINI		0
+#define MIN_NEEDED_FROM		2
+#define MAX_NEEDED_FROM		4
+#define MIN_NEEDED_TO		1
+#define MAX_NEEDED_TO		6
+#define FROM_DIRECTION		1
+#define FROM_LOOP		char16_utf8_loop
+#define TO_LOOP			char16_utf8_loop /* This is not used.  */
+#define FUNCTION_NAME		__gconv_transform_char16_utf8
+#define ONE_DIRECTION		1
+
+#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
+#define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
+#define MAX_NEEDED_OUTPUT	MAX_NEEDED_TO
+#define LOOPFCT			FROM_LOOP
+#define BODY \
+  {									      \
+    /* Yes, reading a 16-bit number and storing it as 32-bit is correct.  */  \
+    uint32_t wc = *((const uint16_t *) inptr);				      \
+    inptr += 2;								      \
+									      \
+    if (__builtin_expect (wc < 0x80, 1))				      \
+      /* It's an one byte sequence.  */					      \
+      *outptr++ = (unsigned char) wc;					      \
+    else								      \
+      {									      \
+	size_t step;							      \
+									      \
+	if (__builtin_expect (wc < 0xd800 || wc > 0xdfff, 1))		      \
+	  step = wc < 0x800 ? 2 : 3;					      \
+	else								      \
+	  {								      \
+	    if (__builtin_expect (inptr + 2 > inend, 0))		      \
+	      {                                                               \
+		/* We don't have enough input for another complete input      \
+		   character.  */                                             \
+		inptr -= 2;						      \
+		result = __GCONV_INCOMPLETE_INPUT;                            \
+		break;                                                        \
+	      }								      \
+									      \
+	    uint32_t sec = *((const uint16_t *) inptr);			      \
+	    if (__builtin_expect (sec < 0xdc00, 0)			      \
+		|| __builtin_expect (sec > 0xdfff, 0))			      \
+	      {								      \
+		/* This is no valid second word for a surrogate.  */	      \
+		STANDARD_FROM_LOOP_ERR_HANDLER (2);			      \
+	      }								      \
+	    inptr += 2;							      \
+	    wc = ((wc - 0xd7c0) << 10) + (sec - 0xdc00);		      \
+									      \
+	    step = wc < 0x200000 ? 4 : 5;				      \
+	  }								      \
+									      \
+	if (__builtin_expect (outptr + step > outend, 0))		      \
+	  {								      \
+	    /* Too long.  */						      \
+	    result = __GCONV_FULL_OUTPUT;				      \
+	    inptr -= step >= 4 ? 4 : 2;					      \
+	    break;							      \
+	  }								      \
+									      \
+	unsigned char *start = outptr;					      \
+	*outptr = (unsigned char) (~0xff >> step);			      \
+	outptr += step;							      \
+	do								      \
+	  {								      \
+	    start[--step] = 0x80 | (wc & 0x3f);				      \
+	    wc >>= 6;							      \
+	  }								      \
+	while (step > 1);						      \
+	start[0] |= wc;							      \
       }									      \
   }
 #define LOOP_NEED_FLAGS
 #include <iconv/loop.c>
 #include <iconv/skeleton.c>
+
+
+/* Convert from UTF-8 to the char16_t format.  */
+#define DEFINE_INIT		0
+#define DEFINE_FINI		0
+#define MIN_NEEDED_FROM		1
+#define MAX_NEEDED_FROM		6
+#define MIN_NEEDED_TO		2
+#define MAX_NEEDED_TO		4
+#define FROM_DIRECTION		1
+#define FROM_LOOP		utf8_char16_loop
+#define TO_LOOP			utf8_char16_loop /* This is not used.  */
+#define FUNCTION_NAME		__gconv_transform_utf8_char16
+#define ONE_DIRECTION		1
+
+#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
+#define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
+#define LOOPFCT			FROM_LOOP
+#define BODY \
+  {									      \
+    /* Next input byte.  */						      \
+    uint32_t ch = *inptr;						      \
+									      \
+    if (__builtin_expect (ch < 0x80, 1))				      \
+      {									      \
+	/* One byte sequence.  */					      \
+	*((uint16_t *) outptr) = ch;					      \
+	outptr += 2;							      \
+	++inptr;							      \
+      }									      \
+    else								      \
+      {									      \
+	uint_fast32_t cnt;						      \
+	uint_fast32_t i;						      \
+									      \
+	if (ch >= 0xc2 && ch < 0xe0)					      \
+	  {								      \
+	    /* We expect two bytes.  The first byte cannot be 0xc0 or 0xc1,   \
+	       otherwise the wide character could have been represented	      \
+	       using a single byte.  */					      \
+	    cnt = 2;							      \
+	    ch &= 0x1f;							      \
+	  }								      \
+	else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1))		      \
+	  {								      \
+	    /* We expect three bytes.  */				      \
+	    cnt = 3;							      \
+	    ch &= 0x0f;							      \
+	  }								      \
+	else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1))		      \
+	  {								      \
+	    /* We expect four bytes.  */				      \
+	    cnt = 4;							      \
+	    ch &= 0x07;							      \
+	  }								      \
+	else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1))		      \
+	  {								      \
+	    /* We expect five bytes.  */				      \
+	    cnt = 5;							      \
+	    ch &= 0x03;							      \
+	  }								      \
+	else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1))		      \
+	  {								      \
+	    /* We expect six bytes.  */					      \
+	    cnt = 6;							      \
+	    ch &= 0x01;							      \
+	  }								      \
+	else								      \
+	  {								      \
+	    /* Search the end of this ill-formed UTF-8 character.  This	      \
+	       is the next byte with (x & 0xc0) != 0x80.  */		      \
+	    i = 0;							      \
+	    do								      \
+	      ++i;							      \
+	    while (inptr + i < inend					      \
+		   && (*(inptr + i) & 0xc0) == 0x80			      \
+		   && i < 5);						      \
+									      \
+	  errout:							      \
+	    STANDARD_FROM_LOOP_ERR_HANDLER (i);				      \
+	  }								      \
+									      \
+	if (__builtin_expect (inptr + cnt > inend, 0))			      \
+	  {								      \
+	    /* We don't have enough input.  But before we report that check   \
+	       that all the bytes are correct.  */			      \
+	    for (i = 1; inptr + i < inend; ++i)				      \
+	      if ((inptr[i] & 0xc0) != 0x80)				      \
+		break;							      \
+									      \
+	    if (__builtin_expect (inptr + i == inend, 1))		      \
+	      {								      \
+		result = __GCONV_INCOMPLETE_INPUT;			      \
+		break;							      \
+	      }								      \
+									      \
+	    goto errout;						      \
+	  }								      \
+									      \
+	/* Read the possible remaining bytes.  */			      \
+	for (i = 1; i < cnt; ++i)					      \
+	  {								      \
+	    uint32_t byte = inptr[i];					      \
+									      \
+	    if ((byte & 0xc0) != 0x80)					      \
+	      /* This is an illegal encoding.  */			      \
+	      break;							      \
+									      \
+	    ch <<= 6;							      \
+	    ch |= byte & 0x3f;						      \
+	  }								      \
+									      \
+	/* If i < cnt, some trail byte was not >= 0x80, < 0xc0.		      \
+	   If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could	      \
+	   have been represented with fewer than cnt bytes.  */		      \
+	if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)		      \
+	    /* Do not accept UTF-16 surrogates.  */			      \
+	    || (ch >= 0xd800 && ch <= 0xdfff))				      \
+	  {								      \
+	    /* This is an illegal encoding.  */				      \
+	    goto errout;						      \
+	  }								      \
+									      \
+	/* Now adjust the pointers and store the result.  */		      \
+	if (ch < 0x10000)						      \
+	  *((uint16_t *) outptr) = ch;					      \
+	else								      \
+	  {								      \
+	    if (__builtin_expect (outptr + 4 > outend, 0))		      \
+	      {								      \
+		result = __GCONV_FULL_OUTPUT;				      \
+		break;							      \
+	      }								      \
+									      \
+	    *((uint16_t *) outptr) = 0xd7c0 + (ch >> 10);		      \
+	    outptr += 2;						      \
+	    *((uint16_t *) outptr) = 0xdc00 + (ch & 0x3ff);		      \
+	  }								      \
+									      \
+	outptr += 2;							      \
+	inptr += cnt;							      \
+      }									      \
+  }
+#define LOOP_NEED_FLAGS
+
+#define STORE_REST \
+  {									      \
+    /* We store the remaining bytes while converting them into the UCS4	      \
+       format.  We can assume that the first byte in the buffer is	      \
+       correct and that it requires a larger number of bytes than there	      \
+       are in the input buffer.  */					      \
+    wint_t ch = **inptrp;						      \
+    size_t cnt, r;							      \
+									      \
+    state->__count = inend - *inptrp;					      \
+									      \
+    assert (ch != 0xc0 && ch != 0xc1);					      \
+    if (ch >= 0xc2 && ch < 0xe0)					      \
+      {									      \
+	/* We expect two bytes.  The first byte cannot be 0xc0 or	      \
+	   0xc1, otherwise the wide character could have been		      \
+	   represented using a single byte.  */				      \
+	cnt = 2;							      \
+	ch &= 0x1f;							      \
+      }									      \
+    else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1))			      \
+      {									      \
+	/* We expect three bytes.  */					      \
+	cnt = 3;							      \
+	ch &= 0x0f;							      \
+      }									      \
+    else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1))			      \
+      {									      \
+	/* We expect four bytes.  */					      \
+	cnt = 4;							      \
+	ch &= 0x07;							      \
+      }									      \
+    else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1))			      \
+      {									      \
+	/* We expect five bytes.  */					      \
+	cnt = 5;							      \
+	ch &= 0x03;							      \
+      }									      \
+    else								      \
+      {									      \
+	/* We expect six bytes.  */					      \
+	cnt = 6;							      \
+	ch &= 0x01;							      \
+      }									      \
+									      \
+    /* The first byte is already consumed.  */				      \
+    r = cnt - 1;							      \
+    while (++(*inptrp) < inend)						      \
+      {									      \
+	ch <<= 6;							      \
+	ch |= **inptrp & 0x3f;						      \
+	--r;								      \
+      }									      \
+									      \
+    /* Shift for the so far missing bytes.  */				      \
+    ch <<= r * 6;							      \
+									      \
+    /* Store the number of bytes expected for the entire sequence.  */	      \
+    state->__count |= cnt << 8;						      \
+									      \
+    /* Store the value.  */						      \
+    state->__value.__wch = ch;						      \
+  }
+
+#define UNPACK_BYTES \
+  {									      \
+    static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc };  \
+    wint_t wch = state->__value.__wch;					      \
+    size_t ntotal = state->__count >> 8;				      \
+									      \
+    inlen = state->__count & 255;					      \
+									      \
+    bytebuf[0] = inmask[ntotal - 2];					      \
+									      \
+    do									      \
+      {									      \
+	if (--ntotal < inlen)						      \
+	  bytebuf[ntotal] = 0x80 | (wch & 0x3f);			      \
+	wch >>= 6;							      \
+      }									      \
+    while (ntotal > 1);							      \
+									      \
+    bytebuf[0] |= wch;							      \
+  }
+
+#define CLEAR_STATE \
+  state->__count = 0
+
+
+#include <iconv/loop.c>
+#include <iconv/skeleton.c>
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index 59c6402..13facc8 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -719,10 +719,12 @@ add_known_names (struct gconv_module *node)
     add_known_names (node->right);
   do
     {
-      if (strcmp (node->from_string, "INTERNAL"))
+      if (strcmp (node->from_string, "INTERNAL") != 0
+	  && strcmp (node->from_string, "CHAR16") != 0)
 	tsearch (node->from_string, &printlist,
 		 (__compar_fn_t) strverscmp);
-      if (strcmp (node->to_string, "INTERNAL") != 0)
+      if (strcmp (node->to_string, "INTERNAL") != 0
+	  && strcmp (node->to_string, "CHAR16") != 0)
 	tsearch (node->to_string, &printlist, (__compar_fn_t) strverscmp);
 
       node = node->same;
@@ -748,7 +750,8 @@ insert_cache (void)
       {
 	const char *str = strtab + hashtab[cnt].string_offset;
 
-	if (strcmp (str, "INTERNAL") != 0)
+	if (strcmp (str, "INTERNAL") != 0
+	    && strcmp (str, "CHAR16") != 0)
 	  tsearch (str, &printlist, (__compar_fn_t) strverscmp);
       }
 }
diff --git a/wcsmbs/Makefile b/wcsmbs/Makefile
index 8c446e1..010e0c8 100644
--- a/wcsmbs/Makefile
+++ b/wcsmbs/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 1995-2000,2002,2003,2004,2005,2006,2007,2011
+# Copyright (C) 1995-2000,2002,2003,2004,2005,2006,2007,2011,2012
 #	Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
@@ -46,6 +46,7 @@ routines := wcscat wcschr wcscmp wcscpy wcscspn wcsdup wcslen wcsncat \
 strop-tests :=  wcscmp wmemcmp wcslen wcschr wcsrchr wcscpy
 tests := tst-wcstof wcsmbs-tst1 tst-wcsnlen tst-btowc tst-mbrtowc \
 	 tst-wcrtomb tst-wcpncpy tst-mbsrtowcs tst-wchar-h tst-mbrtowc2 \
+	 tst-c16c32-1 \
 	 wcsatcliff $(addprefix test-,$(strop-tests))
 
 include ../Rules
diff --git a/wcsmbs/c16rtomb.c b/wcsmbs/c16rtomb.c
index c75ca3b..3fed0b5 100644
--- a/wcsmbs/c16rtomb.c
+++ b/wcsmbs/c16rtomb.c
@@ -1,6 +1,6 @@
 /* Copyright (C) 2011, 2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 2011.
+   Contributed by Ulrich Drepper <drepper@gmail.com>, 2011.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -44,7 +44,12 @@ static mbstate_t state;
 size_t
 c16rtomb (char *s, char16_t c16, mbstate_t *ps)
 {
-  char buf[MB_CUR_MAX];
+#if 1
+  // XXX The ISO C 11 spec I have does not say anything about handling
+  // XXX surrogates in this interface.
+  return wcrtomb (s, c16, ps ?: &state);
+#else
+  char buf[MB_LEN_MAX];
   struct __gconv_step_data data;
   int status;
   size_t result;
@@ -78,9 +83,9 @@ c16rtomb (char *s, char16_t c16, mbstate_t *ps)
     PTR_DEMANGLE (fct);
 #endif
 
-  /* If C16 is the NUL character we write into the output buffer the byte
-     sequence necessary for PS to get into the initial state, followed
-     by a NUL byte.  */
+  /* If C16 is the NUL character we write into the output buffer
+     the byte sequence necessary for PS to get into the initial
+     state, followed by a NUL byte.  */
   if (c16 == L'\0')
     {
       status = DL_CALL_FCT (fct, (fcts->fromc16, &data, NULL, NULL,
@@ -96,7 +101,8 @@ c16rtomb (char *s, char16_t c16, mbstate_t *ps)
 
       status = DL_CALL_FCT (fct,
 			    (fcts->fromc16, &data, &inbuf,
-			     inbuf + sizeof (char16_t), NULL, &dummy, 0, 1));
+			     inbuf + sizeof (char16_t), NULL, &dummy,
+			     0, 1));
     }
 
   /* There must not be any problems with the conversion but illegal input
@@ -118,4 +124,5 @@ c16rtomb (char *s, char16_t c16, mbstate_t *ps)
     }
 
   return result;
+#endif
 }
diff --git a/wcsmbs/mbrtoc16.c b/wcsmbs/mbrtoc16.c
index 7b5822d..df970fb 100644
--- a/wcsmbs/mbrtoc16.c
+++ b/wcsmbs/mbrtoc16.c
@@ -1,6 +1,6 @@
 /* Copyright (C) 2011, 2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@gnu.org>, 2011.
+   Contributed by Ulrich Drepper <drepper@gmail.com>, 2011.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -43,20 +43,32 @@ static mbstate_t state;
 size_t
 mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
 {
-  char16_t buf[1];
+  if (ps == NULL)
+    ps = &state;
+
+  if (ps->__count & 0x80000000)
+    {
+      /* We have to return the second word for a surrogate.  */
+      ps->__count &= 0x7fffffff;
+      *pc16 = ps->__value.__wch;
+      ps->__value.__wch = L'\0';
+      return (size_t) -3;
+    }
+
+  char16_t buf[2];
   struct __gconv_step_data data;
   int status;
   size_t result;
   size_t dummy;
   const unsigned char *inbuf, *endbuf;
-  unsigned char *outbuf = (unsigned char *) (pc16 ?: buf);
+  unsigned char *outbuf = (unsigned char *) buf;
   const struct gconv_fcts *fcts;
 
   /* Set information for this step.  */
   data.__invocation_counter = 0;
   data.__internal_use = 1;
   data.__flags = __GCONV_IS_LAST;
-  data.__statep = ps ?: &state;
+  data.__statep = ps;
   data.__trans = NULL;
 
   /* A first special case is if S is NULL.  This means put PS in the
@@ -85,9 +97,22 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
   if (fcts->toc16->__shlib_handle != NULL)
     PTR_DEMANGLE (fct);
 #endif
+
+  /* We first have to check whether the character can be represented
+     without a surrogate.  If we immediately pass in a buffer large
+     enough to hold two char16_t values and the first character does
+     not require a surrogate the routine will try to convert more
+     input if N is larger then needed for the first character.  */
   status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
 			      NULL, &dummy, 0, 1));
 
+  if (status == __GCONV_FULL_OUTPUT && data.__outbuf == outbuf)
+    {
+      data.__outbufend = outbuf + 2 * sizeof (char16_t);
+      status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
+				  NULL, &dummy, 0, 1));
+    }
+
   /* There must not be any problems with the conversion but illegal input
      characters.  The output buffer must be large enough, otherwise the
      definition of MB_CUR_MAX is not correct.  All the other possible
@@ -100,15 +125,28 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
   if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
       || status == __GCONV_FULL_OUTPUT)
     {
-      if (data.__outbuf != (unsigned char *) outbuf
-	  && *(char16_t *) outbuf == U('\0'))
+      if (pc16 != NULL)
+	*pc16 = buf[0];
+
+      if (data.__outbuf != outbuf && *(char16_t *) outbuf == U('\0'))
 	{
 	  /* The converted character is the NUL character.  */
 	  assert (__mbsinit (data.__statep));
 	  result = 0;
 	}
       else
-	result = inbuf - (const unsigned char *) s;
+	{
+	  result = inbuf - (const unsigned char *) s;
+
+	  if (data.__outbuf != outbuf + 2)
+	    {
+	      /* This is a surrogate.  */
+	      assert (buf[0] >= 0xd800 && buf[0] <= 0xdfff);
+	      assert (buf[1] >= 0xdc00 && buf[1] <= 0xdfff);
+	      ps->__count |= 0x80000000;
+	      ps->__value.__wch = buf[1];
+	    }
+	}
     }
   else if (status == __GCONV_INCOMPLETE_INPUT)
     result = (size_t) -2;
diff --git a/wcsmbs/tst-c16c32-1.c b/wcsmbs/tst-c16c32-1.c
new file mode 100644
index 0000000..f4534c5
--- /dev/null
+++ b/wcsmbs/tst-c16c32-1.c
@@ -0,0 +1,131 @@
+#include <inttypes.h>
+#include <locale.h>
+#include <stdio.h>
+#include <uchar.h>
+
+
+static int
+do_test (void)
+{
+  if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
+    {
+      puts ("cannot set locale");
+      return 1;
+    }
+
+  int result = 0;
+
+  char32_t c32 = 48;
+  do
+    {
+      if (c32 >= 0xd800 && c32 <= 0xe000)
+	continue;
+
+      char buf[20];
+      size_t n1 = c32rtomb (buf, c32, NULL);
+      if (n1 <= 0)
+	{
+	  printf ("c32rtomb for U'\\x%" PRIx32 "' failed\n", (uint32_t) c32);
+	  result = 1;
+	  continue;
+	}
+
+      char32_t c32out;
+      size_t n2 = mbrtoc32 (&c32out, buf, n1, NULL);
+      if ((ssize_t) n2 < 0)
+	{
+	  printf ("mbrtoc32 for U'\\x%" PRIx32 "' failed\n", (uint32_t) c32);
+	  result = 1;
+	  continue;
+	}
+      if (n2 != n1)
+	{
+	  printf ("mbrtoc32 for U'\\x%" PRIx32 "' consumed %zu bytes, not %zu\n",
+		  (uint32_t) c32, n2, n1);
+	  result = 1;
+	}
+      else if (c32out != c32)
+	{
+	  printf ("mbrtoc32 for U'\\x%" PRIx32 "' produced U'\\x%" PRIx32 "\n",
+		  (uint32_t) c32, (uint32_t) c32out);
+	  result = 1;
+	}
+
+      char16_t c16;
+      size_t n3 = mbrtoc16 (&c16, buf, n1, NULL);
+      if (n3 != n1)
+	{
+	  printf ("mbrtoc16 for U'\\x%" PRIx32 "' did not consume all bytes\n",
+		  (uint32_t) c32);
+	  result = 1;
+	  continue;
+	}
+      if (c32 < 0x10000)
+	{
+	  if (c16 != c32)
+	    {
+	      printf ("mbrtoc16 for U'\\x%" PRIx32 "' produce u'\\x%" PRIx16 "'\n",
+		      (uint32_t) c32, (uint16_t) c16);
+	      result = 1;
+	      continue;
+	    }
+	}
+      else
+	{
+	  buf[0] = '1';
+	  char16_t c16_2;
+	  size_t n4 = mbrtoc16 (&c16_2, buf, 1, NULL);
+	  if (n4 != (size_t) -3)
+	    {
+	      printf ("second mbrtoc16 for U'\\x%" PRIx32 "' did not return -3\n",
+		      (uint32_t) c32);
+	      result = 1;
+	      continue;
+	    }
+
+	  if (c32 != (((uint32_t) (c16 - 0xd7c0)) << 10) + (c16_2 - 0xdc00))
+	    {
+	      printf ("mbrtoc16 for U'\\x%" PRIx32 "' returns U'\\x%" PRIx32 "\n",
+		      (uint32_t) c32,
+		      (((uint32_t) (c16 - 0xd7c0)) << 10) + (c16_2 - 0xdc00));
+	      result = 1;
+	      continue;
+	    }
+	}
+
+      buf[0] = '\0';
+      char16_t c16_nul;
+      n3 = mbrtoc16 (&c16_nul, buf, n1, NULL);
+      if (n3 != 0)
+	{
+	  printf ("mbrtoc16 for '\\0' returns %zd\n", n3);
+	  result = 1;
+	  continue;
+	}
+
+      if (c32 < 0x10000)
+	{
+	  size_t n5 = c16rtomb (buf, c16, NULL);
+	  if ((ssize_t) n5 < 0)
+	    {
+	      printf ("c16rtomb for U'\\x%" PRIx32 "' failed with %zd\n",
+		      (uint32_t) c32, n5);
+	      result = 1;
+	      continue;
+	    }
+	  if (n5 != n1)
+	    {
+	      printf ("c16rtomb for U'\\x%" PRIx32 "' produced %zu bytes instead of %zu bytes\n",
+		      (uint32_t) c32, n5, n1);
+	      result = 1;
+	      continue;
+	    }
+	}
+    }
+  while ((c32 += 0x1111) <= U'\x12000');
+
+  return result;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/wcsmbs/wcrtomb.c b/wcsmbs/wcrtomb.c
index 547b05a..946fdaf 100644
--- a/wcsmbs/wcrtomb.c
+++ b/wcsmbs/wcrtomb.c
@@ -1,4 +1,5 @@
-/* Copyright (C) 1996-1998,2000,2002,2005,2011 Free Software Foundation, Inc.
+/* Copyright (C) 1996-1998,2000,2002,2005,2011,2012
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
 
@@ -38,7 +39,7 @@ static mbstate_t state;
 size_t
 __wcrtomb (char *s, wchar_t wc, mbstate_t *ps)
 {
-  char buf[MB_CUR_MAX];
+  char buf[MB_LEN_MAX];
   struct __gconv_step_data data;
   int status;
   size_t result;
diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c
index 212a6c8..9ce26f1 100644
--- a/wcsmbs/wcsmbsload.c
+++ b/wcsmbs/wcsmbsload.c
@@ -1,4 +1,5 @@
-/* Copyright (C) 1998-2002,2004,2005,2008,2010,2011 Free Software Foundation, Inc.
+/* Copyright (C) 1998-2002,2004,2005,2008,2010,2011,2012
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
 
@@ -74,7 +75,7 @@ static const struct __gconv_step to_c16 =
   .__counter = INT_MAX,
   .__from_name = (char *) "ANSI_X3.4-1968//TRANSLIT",
   .__to_name = (char *) "UTF-16//",
-  .__fct = __gconv_transform_ascii_utf16,
+  .__fct = __gconv_transform_ascii_char16,
   .__btowc_fct = NULL,
   .__init_fct = NULL,
   .__end_fct = NULL,
@@ -93,7 +94,7 @@ static const struct __gconv_step from_c16 =
   .__counter = INT_MAX,
   .__from_name = (char *) "UTF-16//",
   .__to_name = (char *) "ANSI_X3.4-1968//TRANSLIT",
-  .__fct = __gconv_transform_utf16_ascii,
+  .__fct = __gconv_transform_char16_ascii,
   .__btowc_fct = NULL,
   .__init_fct = NULL,
   .__end_fct = NULL,
@@ -209,7 +210,7 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
       int use_translit;
 
       /* Allocate the gconv_fcts structure.  */
-      new_fcts = malloc (sizeof *new_fcts);
+      new_fcts = calloc (1, sizeof *new_fcts);
       if (new_fcts == NULL)
 	goto failed;
 
@@ -229,16 +230,24 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
 	 represent all others.  */
       new_fcts->towc = __wcsmbs_getfct ("INTERNAL", complete_name,
 					&new_fcts->towc_nsteps);
-      new_fcts->tomb = (new_fcts->towc != NULL
-			? __wcsmbs_getfct (complete_name, "INTERNAL",
-					   &new_fcts->tomb_nsteps)
-			: NULL);
+      if (new_fcts->towc != NULL)
+	new_fcts->tomb = __wcsmbs_getfct (complete_name, "INTERNAL",
+					  &new_fcts->tomb_nsteps);
 
-      // XXX
-      new_fcts->toc16 = (struct __gconv_step *) &to_c16;
-      new_fcts->toc16_nsteps = 1;
-      new_fcts->fromc16 = (struct __gconv_step *) &from_c16;
-      new_fcts->fromc16_nsteps = 1;
+      if (new_fcts->tomb != NULL)
+	{
+	  new_fcts->toc16 = __wcsmbs_getfct ("CHAR16", complete_name,
+					     &new_fcts->toc16_nsteps);
+
+	  if (new_fcts->toc16 != NULL)
+	    new_fcts->fromc16 = __wcsmbs_getfct (complete_name, "CHAR16",
+						 &new_fcts->fromc16_nsteps);
+	  else
+	    {
+	      __gconv_close_transform (new_fcts->toc16, new_fcts->toc16_nsteps);
+	      new_fcts->toc16 = NULL;
+	    }
+	}
 
       /* If any of the conversion functions is not available we don't
 	 use any since this would mean we cannot convert back and
@@ -255,6 +264,12 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
 	}
       else
 	{
+	  // XXX At least for now we live with the CHAR16 not being available.
+	  if (new_fcts->toc16 == NULL)
+	    new_fcts->toc16 = __wcsmbs_gconv_fcts_c.toc16;
+	  if (new_fcts->fromc16 == NULL)
+	    new_fcts->fromc16 = __wcsmbs_gconv_fcts_c.fromc16;
+
 	  new_category->private.ctype = new_fcts;
 	  new_category->private.cleanup = &_nl_cleanup_ctype;
 	}
@@ -277,11 +292,15 @@ __wcsmbs_clone_conv (struct gconv_fcts *copy)
   *copy = *orig;
 
   /* Now increment the usage counters.
-     Note: This assumes copy->towc_nsteps == 1 and copy->tomb_nsteps == 1.  */
+     Note: This assumes copy->*_nsteps == 1.  */
   if (copy->towc->__shlib_handle != NULL)
     ++copy->towc->__counter;
   if (copy->tomb->__shlib_handle != NULL)
     ++copy->tomb->__counter;
+  if (copy->toc16->__shlib_handle != NULL)
+    ++copy->toc16->__counter;
+  if (copy->fromc16->__shlib_handle != NULL)
+    ++copy->fromc16->__counter;
 }
 
 
@@ -296,30 +315,24 @@ __wcsmbs_named_conv (struct gconv_fcts *copy, const char *name)
 
   copy->tomb = __wcsmbs_getfct (name, "INTERNAL", &copy->tomb_nsteps);
   if (copy->tomb == NULL)
-    goto out_mb;
-
-#if 0
-  copy->fromc16 = __wcsmbs_getfct (name, "UTF-16//", &copy->fromc16_nsteps);
-  if (copy->fromc16 == NULL)
-    goto out_fromc16;
-
-  copy->toc16 = __wcsmbs_getfct ("UTF-16//", name, &copy->toc16_nsteps);
-  if (copy->toc16 == NULL)
-#else
-  if (0)
-#endif
     {
-#if 0
-      __gconv_close_transform (copy->fromc16, copy->fromc16_nsteps);
-    out_fromc16:
-      __gconv_close_transform (copy->tomb, copy->tomb_nsteps);
-#endif
-    out_mb:
       __gconv_close_transform (copy->towc, copy->towc_nsteps);
-    out_wc:
       return 1;
     }
 
+  copy->fromc16 = __wcsmbs_getfct (name, "CHAR16", &copy->fromc16_nsteps);
+  if (copy->fromc16 == NULL)
+    copy->toc16 = NULL;
+  else
+    {
+      copy->toc16 = __wcsmbs_getfct ("CHAR16", name, &copy->toc16_nsteps);
+      if (copy->toc16 == NULL)
+	{
+	  __gconv_close_transform (copy->fromc16, copy->fromc16_nsteps);
+	  copy->fromc16 = NULL;
+	}
+    }
+
   return 0;
 }
 
@@ -335,11 +348,8 @@ _nl_cleanup_ctype (struct __locale_data *locale)
       /* Free the old conversions.  */
       __gconv_close_transform (data->tomb, data->tomb_nsteps);
       __gconv_close_transform (data->towc, data->towc_nsteps);
-#if 0
-      // XXX
       __gconv_close_transform (data->fromc16, data->fromc16_nsteps);
-      __gconv_close_transform (data->toc16, data->toc16c_nsteps);
-#endif
+      __gconv_close_transform (data->toc16, data->toc16_nsteps);
       free ((char *) data);
     }
 }

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog             |   16 +++
 iconv/gconv_builtin.h |   25 ++---
 iconv/gconv_int.h     |    8 +-
 iconv/gconv_simple.c  |  340 +++++++++++++++++++++++++++++++++++++++++++++++--
 iconv/iconv_prog.c    |    9 +-
 wcsmbs/Makefile       |    3 +-
 wcsmbs/c16rtomb.c     |   19 ++-
 wcsmbs/mbrtoc16.c     |   52 +++++++-
 wcsmbs/tst-c16c32-1.c |  131 +++++++++++++++++++
 wcsmbs/wcrtomb.c      |    5 +-
 wcsmbs/wcsmbsload.c   |   84 +++++++------
 11 files changed, 607 insertions(+), 85 deletions(-)
 create mode 100644 wcsmbs/tst-c16c32-1.c


hooks/post-receive
-- 
GNU C Library master sources
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]