This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Fix strtol in Turkish locales (bug 19242)


The implementations of strtol and related functions use
locale-specific conversions to upper case before determining whether a
character is a valid letter in the argument.  This means that in
Turkish locales such as tr_TR.UTF-8 and tr_TR.ISO-8859-9, "i" is
interpreted as not being a valid number, when if the base passed to
strtol is 19 or more it should be interpreted as the number 18.

ISO C explicitly says "The letters from a (or A) through z (or Z) are
ascribed the values 10 through 35", so clearly intends the standard
ASCII letters (otherwise you wouldn't generally have exactly 26
letters to ascribe such values) (whereas white-space must be
identified according to the locale).  In particular, 'i' and 'I' must
be understood to be in that sequence.

This patch makes the code check for the relevant ranges of letters
rather than using toupper / isalpha (via macros) at all; locales
remain used for whitespace testing (explicitly correct according to
ISO C).  Note that the way the code worked, the only non-ASCII letter
that would previously have been accepted would have been the Turkish
'Ä' (dotless 'I'), because the uppercase version of that in Turkish
locales is 'I'.  This patch means that will no longer be accepted,
which seems appropriate.

Tested for x86_64 and x86.

2015-11-13  Joseph Myers  <joseph@codesourcery.com>

	[BZ #19242]
	* stdlib/strtol_l.c (ISALPHA): Remove macro.
	(TOUPPER): Likewise.
	(ISALPHA_BASE): New macro.
	(FROM_ALPHA): Likewise.
	(INTERNAL (__strtol_l)): Use ISALPHA_BASE, FROM_ALPHA and explicit
	checks for uppercase and lowercase letters instead of using
	ISALPHA and TOUPPER.
	* stdlib/tst-strtol-locale-main.c: New file.
	* stdlib/tst-strtol-locale.c: Likewise.
	* stdlib/Makefile (tests): Add tst-strtol-locale.
	[$(run-built-tests) = yes] (LOCALES): Add tr_TR.ISO-8859-9.
	[$(run-built-tests) = yes] ($(objpfx)tst-strtol-locale.out):
	Depend on $(gen-locales).
	* wcsmbs/tst-wcstol-locale.c: New file.
	* wcsmbs/Makefile (tests): Add tst-wcstol-locale.
	[$(run-built-tests) = yes] (LOCALES): Add tr_TR.UTF-8 and
	tr_TR.ISO-8859-9.
	[$(run-built-tests) = yes] ($(objpfx)tst-wcstol-locale.out):
	Depend on $(gen-locales).

diff --git a/stdlib/Makefile b/stdlib/Makefile
index 9e0c249..e8b5b8c 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -74,7 +74,8 @@ tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \
 		   tst-makecontext3 bug-getcontext bug-fmtmsg1		    \
 		   tst-secure-getenv tst-strtod-overflow tst-strtod-round   \
 		   tst-tininess tst-strtod-underflow tst-tls-atexit	    \
-		   tst-setcontext3 tst-tls-atexit-nodelete
+		   tst-setcontext3 tst-tls-atexit-nodelete		    \
+		   tst-strtol-locale
 tests-static	:= tst-secure-getenv
 
 modules-names	= tst-tls-atexit-lib
@@ -122,7 +123,8 @@ endif
 include ../Rules
 
 ifeq ($(run-built-tests),yes)
-LOCALES := cs_CZ.UTF-8 de_DE.UTF-8 en_US.ISO-8859-1 tr_TR.UTF-8
+LOCALES := cs_CZ.UTF-8 de_DE.UTF-8 en_US.ISO-8859-1 tr_TR.UTF-8 \
+	   tr_TR.ISO-8859-9
 include ../gen-locales.mk
 
 $(objpfx)bug-strtod2.out: $(gen-locales)
@@ -131,6 +133,7 @@ $(objpfx)tst-strtod.out: $(gen-locales)
 $(objpfx)tst-strtod3.out: $(gen-locales)
 $(objpfx)tst-strtod4.out: $(gen-locales)
 $(objpfx)tst-strtod5.out: $(gen-locales)
+$(objpfx)tst-strtol-locale.out: $(gen-locales)
 endif
 
 # Testdir has to be named stdlib and needs to be writable
diff --git a/stdlib/strtol_l.c b/stdlib/strtol_l.c
index 8f6163d..a6d8ed1 100644
--- a/stdlib/strtol_l.c
+++ b/stdlib/strtol_l.c
@@ -137,8 +137,6 @@
 # define UCHAR_TYPE wint_t
 # define STRING_TYPE wchar_t
 # define ISSPACE(Ch) __iswspace_l ((Ch), loc)
-# define ISALPHA(Ch) __iswalpha_l ((Ch), loc)
-# define TOUPPER(Ch) __towupper_l ((Ch), loc)
 #else
 # if defined _LIBC \
    || defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
@@ -150,10 +148,18 @@
 # define UCHAR_TYPE unsigned char
 # define STRING_TYPE char
 # define ISSPACE(Ch) __isspace_l ((Ch), loc)
-# define ISALPHA(Ch) __isalpha_l ((Ch), loc)
-# define TOUPPER(Ch) __toupper_l ((Ch), loc)
 #endif
 
+#define ISALPHA_BASE(Ch, BASE)				\
+  (((Ch) >= (UCHAR_TYPE) L_('A')			\
+    && (Ch) < (UCHAR_TYPE) L_('A') - 10 + (BASE))	\
+   || ((Ch) >= (UCHAR_TYPE) L_('a')			\
+       && (Ch) < (UCHAR_TYPE) L_('a') - 10 + (BASE)))
+#define FROM_ALPHA(Ch)				\
+  (((Ch) >= L_('A') && (Ch) <= L_('Z'))		\
+   ? (Ch) - L_('A') + 10			\
+   : (Ch) - L_('a') + 10)
+
 #define INTERNAL(X) INTERNAL1(X)
 #define INTERNAL1(X) __##X##_internal
 #define WEAKNAME(X) WEAKNAME1(X)
@@ -308,7 +314,7 @@ INTERNAL (__strtol_l) (const STRING_TYPE *nptr, STRING_TYPE **endptr,
   /* Recognize number prefix and if BASE is zero, figure it out ourselves.  */
   if (*s == L_('0'))
     {
-      if ((base == 0 || base == 16) && TOUPPER (s[1]) == L_('X'))
+      if ((base == 0 || base == 16) && (s[1] == L_('X') || s[1] == L_('x')))
 	{
 	  s += 2;
 	  base = 16;
@@ -355,8 +361,7 @@ INTERNAL (__strtol_l) (const STRING_TYPE *nptr, STRING_TYPE **endptr,
 			break;
 		      cnt < thousands_len; })
 # endif
-		&& (!ISALPHA (c)
-		    || (int) (TOUPPER (c) - L_('A') + 10) >= base))
+		&& !ISALPHA_BASE (c, base))
 	      break;
 
 # ifdef USE_WIDE_CHAR
@@ -403,15 +408,15 @@ INTERNAL (__strtol_l) (const STRING_TYPE *nptr, STRING_TYPE **endptr,
 		  s += thousands_len - 1;
 		  continue;
 		}
-	      if (ISALPHA (c))
-		c = TOUPPER (c) - L_('A') + 10;
+	      if (ISALPHA_BASE (c, base))
+		c = FROM_ALPHA (c);
 	      else
 		break;
 	    }
 # endif
 #endif
-	  else if (ISALPHA (c))
-	    c = TOUPPER (c) - L_('A') + 10;
+	  else if (ISALPHA_BASE (c, base))
+	    c = FROM_ALPHA (c);
 	  else
 	    break;
 	  if ((int) c >= base)
@@ -451,15 +456,15 @@ INTERNAL (__strtol_l) (const STRING_TYPE *nptr, STRING_TYPE **endptr,
 		s += thousands_len - 1;
 		continue;
 	      }
-	    if (ISALPHA (c))
-	      c = TOUPPER (c) - L_('A') + 10;
+	    if (ISALPHA_BASE (c, base))
+	      c = FROM_ALPHA (c);
 	    else
 	      break;
 	  }
 # endif
 #endif
-	else if (ISALPHA (c))
-	  c = TOUPPER (c) - L_('A') + 10;
+	else if (ISALPHA_BASE (c, base))
+	  c = FROM_ALPHA (c);
 	else
 	  break;
 	if ((int) c >= base)
@@ -514,7 +519,7 @@ noconv:
      ENDPTR points to the `x`.  */
   if (endptr != NULL)
     {
-      if (save - nptr >= 2 && TOUPPER (save[-1]) == L_('X')
+      if (save - nptr >= 2 && (save[-1] == L_('X') || save[-1] == L'x')
 	  && save[-2] == L_('0'))
 	*endptr = (STRING_TYPE *) &save[-1];
       else
diff --git a/stdlib/tst-strtol-locale-main.c b/stdlib/tst-strtol-locale-main.c
new file mode 100644
index 0000000..8293e07
--- /dev/null
+++ b/stdlib/tst-strtol-locale-main.c
@@ -0,0 +1,87 @@
+/* Test strtol functions work with all ASCII letters in Turkish
+   locales (bug 19242).
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <wchar.h>
+
+#define STR_(X) #X
+#define STR(X) STR_(X)
+#define FNPFXS STR (FNPFX)
+#define CONCAT_(X, Y) X ## Y
+#define CONCAT(X, Y) CONCAT_ (X, Y)
+#define FNX(FN) CONCAT (FNPFX, FN)
+
+#define TEST(LOC, STR, EXP_VAL, FN, TYPE, FMT)				\
+  do									\
+    {									\
+      CHAR *ep;								\
+      TYPE val = FNX (FN) (STR, &ep, 36);				\
+      printf ("%s: " FNPFXS #FN " (" SFMT ") == " FMT "\n", LOC, STR, val); \
+      if (val == (TYPE) (EXP_VAL) && *ep == 0)				\
+	printf ("PASS: %s: " FNPFXS #FN " (" SFMT ")\n", LOC, STR);	\
+      else								\
+	{								\
+	  printf ("FAIL: %s: " FNPFXS #FN " (" SFMT ")\n", LOC, STR);	\
+	  result = 1;							\
+	}								\
+    }									\
+  while (0)
+
+static int
+test_one_locale (const char *loc)
+{
+  if (setlocale (LC_ALL, loc) == NULL)
+    {
+      printf ("setlocale (LC_ALL, \"%s\") failed\n", loc);
+      return 1;
+    }
+  int result = 0;
+  for (int i = 10; i < 36; i++)
+    {
+      CHAR s[2];
+      s[0] = L_('A') + i - 10;
+      s[1] = 0;
+      TEST (loc, s, i, l, long int, "%ld");
+      TEST (loc, s, i, ul, unsigned long int, "%lu");
+      TEST (loc, s, i, ll, long long int, "%lld");
+      TEST (loc, s, i, ull, unsigned long long int, "%llu");
+      s[0] = L_('a') + i - 10;
+      s[1] = 0;
+      TEST (loc, s, i, l, long int, "%ld");
+      TEST (loc, s, i, ul, unsigned long int, "%lu");
+      TEST (loc, s, i, ll, long long int, "%lld");
+      TEST (loc, s, i, ull, unsigned long long int, "%llu");
+    }
+  return result;
+}
+
+static int
+do_test (void)
+{
+  int result = 0;
+  result |= test_one_locale ("C");
+  result |= test_one_locale ("tr_TR.UTF-8");
+  result |= test_one_locale ("tr_TR.ISO-8859-9");
+  return result;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/stdlib/tst-strtol-locale.c b/stdlib/tst-strtol-locale.c
new file mode 100644
index 0000000..739d0b0
--- /dev/null
+++ b/stdlib/tst-strtol-locale.c
@@ -0,0 +1,25 @@
+/* Test strtol functions work with all ASCII letters in Turkish
+   locales (bug 19242).  Narrow string version.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define CHAR char
+#define SFMT "\"%s\""
+#define FNPFX strto
+#define L_(C) C
+
+#include <tst-strtol-locale-main.c>
diff --git a/wcsmbs/Makefile b/wcsmbs/Makefile
index f333fa8..c1bb937 100644
--- a/wcsmbs/Makefile
+++ b/wcsmbs/Makefile
@@ -47,13 +47,14 @@ strop-tests :=  wcscmp wcsncmp wmemcmp wcslen wcschr wcsrchr wcscpy wcsnlen \
 		wcscspn wmemchr wmemset
 tests := tst-wcstof wcsmbs-tst1 tst-wcsnlen tst-btowc tst-mbrtowc \
 	 tst-wcrtomb tst-wcpncpy tst-mbsrtowcs tst-wchar-h tst-mbrtowc2 \
-	 tst-c16c32-1 wcsatcliff $(addprefix test-,$(strop-tests))
+	 tst-c16c32-1 wcsatcliff tst-wcstol-locale \
+	 $(addprefix test-,$(strop-tests))
 
 include ../Rules
 
 ifeq ($(run-built-tests),yes)
 LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 hr_HR.ISO-8859-2 \
-	   ja_JP.EUC-JP zh_TW.EUC-TW
+	   ja_JP.EUC-JP zh_TW.EUC-TW tr_TR.UTF-8 tr_TR.ISO-8859-9
 include ../gen-locales.mk
 
 $(objpfx)tst-btowc.out: $(gen-locales)
@@ -62,6 +63,7 @@ $(objpfx)tst-mbrtowc.out: $(gen-locales)
 $(objpfx)tst-mbrtowc2.out: $(gen-locales)
 $(objpfx)tst-wcrtomb.out: $(gen-locales)
 $(objpfx)wcsmbs-tst1.out: $(gen-locales)
+$(objpfx)tst-wcstol-locale.out: $(gen-locales)
 endif
 
 CFLAGS-wcwidth.c = -I../wctype
diff --git a/wcsmbs/tst-wcstol-locale.c b/wcsmbs/tst-wcstol-locale.c
new file mode 100644
index 0000000..ea27e8e
--- /dev/null
+++ b/wcsmbs/tst-wcstol-locale.c
@@ -0,0 +1,25 @@
+/* Test strtol functions work with all ASCII letters in Turkish
+   locales (bug 19242).  Wide string version.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define CHAR wchar_t
+#define SFMT "L\"%ls\""
+#define FNPFX wcsto
+#define L_(C) L ## C
+
+#include "../stdlib/tst-strtol-locale-main.c"

-- 
Joseph S. Myers
joseph@codesourcery.com

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]