This is the mail archive of the newlib@sourceware.org mailing list for the newlib project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] gethex doesn't cope with multibyte decimalpoints


On Feb  6 16:34, Jeff Johnston wrote:
> Corinna Vinschen wrote:
>> Hi,
>>
>> AFAICS, there's a bug in gethex().  The function assumes that the
>> decimalpoint is a single char.
>> [...]
>>   
> This isn't going to work unfortunately.  The decimal point location  
> (decpt) is calculated by adding 1 to the string pointer s which in the  
> multibyte case is wrong.  This value is used to figure out the exponent  
> to adjust the result by (see the calculation of e).  Changing to reading  
> left to right makes the calculation of the result wrong if the number of  
> hex digits is larger than 8.  For example, if you have 10 hex digits,  
> you want the bottom 8 to form one long integer and the top 2 digits by  
> themselves not the other way around.

Below is another solution for this problem.  It's sticking to the right
to left reading order in the main loop, while allowing to specify a
multibyte decimal point.  The calculation of decpt should be correct now.
In the orginal code, decpt is pointing to the character succeeding the
decimal point.  The pointer s is always >= decpt.  The below code also
sets decpt to the next char succeeding the multibyte decimal point and s
is still >= decpt.  So the calculation of e is still the same as in the
original code.

The main loop now checks for the trailing byte in the decimal point char
and if it hits that char, it first checks if the strcmp would still be
within the s0 margin.  If the decimal point has been detected, s1 is set
back to the leading byte of the decimal point character.  Then the loop
continues with the next *--s1 which points to the next digit preceeding
the decimal point.

I changed strtod and wcstod accordingly to cope correctly with multibyte
decimal points.  I also removed the USE_LOCALE define in gdtoa-gethex.c
and strtod.c since it's not used or set anywhere in newlib's configury.

So far newlib's locale code doesn't allow to change the locale settings
returned by localeconv(), but I'm planning to change that.


Corinna


 	* libc/stdlib/gdtoa-gethex.c: Remove use of USE_LOCALE.
	(gethex): Allow multibyte decimal point.
 	Fix compiler warnings due to different signedness of pointer types.
	* libc/stdlib/strtod.c: Remove use of USE_LOCALE.
	(_strtod_r): Allow multibyte decimal point.
	* libc/stdlib/wcstod.c (_wcstod_r): Evaluate correct wide char
	endptr position if the decimal point is a multibyte char.


Index: libc/stdlib/gdtoa-gethex.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdlib/gdtoa-gethex.c,v
retrieving revision 1.1
diff -u -p -r1.1 gdtoa-gethex.c
--- libc/stdlib/gdtoa-gethex.c	22 Jun 2006 17:59:52 -0000	1.1
+++ libc/stdlib/gdtoa-gethex.c	23 Mar 2009 13:55:03 -0000
@@ -35,10 +35,7 @@ THIS SOFTWARE.
 #include "mprec.h"
 #include "gdtoa.h"
 #include "gd_qnan.h"
-
-#ifdef USE_LOCALE
 #include "locale.h"
-#endif
 
 unsigned char hexdig[256];
 
@@ -151,11 +148,10 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, s
 	int esign, havedig, irv, k, n, nbits, up, zret;
 	__ULong L, lostbits, *x;
 	Long e, e1;
-#ifdef USE_LOCALE
-	unsigned char decimalpoint = *localeconv()->decimal_point;
-#else
-#define decimalpoint '.'
-#endif
+	unsigned char *decimalpoint = (unsigned char *)
+				      localeconv()->decimal_point;
+	size_t decp_len = strlen ((const char *) decimalpoint);
+	unsigned char decp_end = decimalpoint[decp_len - 1];
 
 	if (!hexdig['0'])
 		hexdig_init();
@@ -170,9 +166,9 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, s
 	e = 0;
 	if (!hexdig[*s]) {
 		zret = 1;
-		if (*s != decimalpoint)
+		if (strcmp ((const char *) s, (const char *) decimalpoint) != 0)
 			goto pcheck;
-		decpt = ++s;
+		decpt = (s += decp_len);
 		if (!hexdig[*s])
 			goto pcheck;
 		while(*s == '0')
@@ -184,8 +180,9 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, s
 		}
 	while(hexdig[*s])
 		s++;
-	if (*s == decimalpoint && !decpt) {
-		decpt = ++s;
+	if (strcmp ((const char *) s, (const char *) decimalpoint) == 0
+	    && !decpt) {
+		decpt = (s += decp_len);
 		while(hexdig[*s])
 			s++;
 		}
@@ -226,8 +223,12 @@ _DEFUN(gethex, (ptr, sp, fpi, exp, bp, s
 	n = 0;
 	L = 0;
 	while(s1 > s0) {
-		if (*--s1 == decimalpoint)
+		if (*--s1 == decp_end && s1 - decp_len + 1 >= s0
+		    && strcmp ((const char *) s1 - decp_len + 1,
+			       (const char *) decimalpoint) == 0) {
+			s1 -= decp_len - 1; /* Note the --s1 above! */
 			continue;
+		}
 		if (n == 32) {
 			*x++ = L;
 			L = 0;
Index: libc/stdlib/strtod.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdlib/strtod.c,v
retrieving revision 1.12
diff -u -p -r1.12 strtod.c
--- libc/stdlib/strtod.c	27 Nov 2008 20:45:37 -0000	1.12
+++ libc/stdlib/strtod.c	23 Mar 2009 13:55:03 -0000
@@ -122,9 +122,7 @@ THIS SOFTWARE.
 /* #include <fenv.h> */
 /* #endif */
 
-#ifdef USE_LOCALE
 #include "locale.h"
-#endif
 
 #ifdef IEEE_Arith
 #ifndef NO_IEEE_Scale
@@ -307,14 +305,10 @@ _DEFUN (_strtod_r, (ptr, s00, se),
 		else if (nd < 16)
 			z = 10*z + c - '0';
 	nd0 = nd;
-#ifdef USE_LOCALE
-	if (c == *localeconv()->decimal_point)
-#else
-	if (c == '.')
-#endif
+	if (strcmp (s, localeconv()->decimal_point) == 0)
 		{
 		decpt = 1;
-		c = *++s;
+		c = *(s += strlen (localeconv()->decimal_point));
 		if (!nd) {
 			for(; c == '0'; c = *++s)
 				nz++;
Index: libc/stdlib/wcstod.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdlib/wcstod.c,v
retrieving revision 1.3
diff -u -p -r1.3 wcstod.c
--- libc/stdlib/wcstod.c	25 Feb 2009 21:33:18 -0000	1.3
+++ libc/stdlib/wcstod.c	23 Mar 2009 13:55:03 -0000
@@ -116,8 +116,10 @@ Supporting OS subroutines required: <<cl
 #include <_ansi.h>
 #include <errno.h>
 #include <stdlib.h>
+#include <string.h>
 #include <wchar.h>
 #include <wctype.h>
+#include <locale.h>
 #include <math.h>
 
 double
@@ -167,9 +169,25 @@ _DEFUN (_wcstod_r, (ptr, nptr, endptr),
          * where it ended, count multibyte characters to find the
          * corresponding position in the wide char string.
          */
-        if (endptr != NULL)
-                /* XXX Assume each wide char is one byte. */
+        if (endptr != NULL) {
+		/* The only valid multibyte char in a float converted by
+		   strtod/wcstod is the radix char.  What we do here is,
+		   figure out if the radix char was in the valid leading
+		   float sequence in the incoming string.  If so, the
+		   multibyte float string is strlen(radix char) - 1 bytes
+		   longer than the incoming wide char string has characters.
+		   To fix endptr, reposition end as if the radix char was
+		   just one byte long.  The resulting difference (end - buf)
+		   is then equivalent to the number of valid wide characters
+		   in the input string. */
+		len = strlen (localeconv ()->decimal_point);
+		if (len > 1) {
+			char *d = strstr (buf, localeconv ()->decimal_point);
+			if (d && d < end)
+				end -= len - 1;
+		}
                 *endptr = (wchar_t *)nptr + (end - buf);
+	}
 
         _free_r(ptr, buf);
 


-- 
Corinna Vinschen
Cygwin Project Co-Leader
Red Hat


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]