This is the mail archive of the
newlib@sourceware.org
mailing list for the newlib project.
[PATCH] setlocale: Allow caseinsensitive charsets
- From: Corinna Vinschen <vinschen at redhat dot com>
- To: newlib at sourceware dot org
- Date: Thu, 20 Aug 2009 15:36:40 +0200
- Subject: [PATCH] setlocale: Allow caseinsensitive charsets
- Reply-to: newlib at sourceware dot org
Hi,
for compatibility with other implementations, the below patch allows to
specify the charset case insensitive. This allows for settings as, for
instance, LC_ALL=en_US.utf-8, instead of enforcing "UTF-8" in all
uppercase. The charset is internally always stored in uppercase so that
internal functions checking the string returned by __locale_charset ()
will always see the same, uppercased string as before.
Additionally the patch allows to specify "UTF-8" also as "UTF8" or
"utf8", without the dash, just like on Linux.
Documentation is changed accordingly.
Tested on Cygwin.
Ok to apply?
Thanks,
Corinna
* libc/locale/locale.c: Add this change to documentation.
Throughout check charset string case insensitive and store
internal charset string uppercased. Allow "UTF8" additionally
to "UTF-8".
Index: libc/locale/locale.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/locale/locale.c,v
retrieving revision 1.22
diff -u -p -r1.22 locale.c
--- libc/locale/locale.c 18 Jun 2009 09:13:39 -0000 1.22
+++ libc/locale/locale.c 20 Aug 2009 13:32:21 -0000
@@ -65,7 +65,10 @@ Even when using POSIX locale strings, th
<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>/<<"eucJP">>, <<"SJIS">>, <<"ISO-8859-x">>
with 1 <= x <= 15, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850,
852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252, 1253, 1254,
-1255, 1256, 1257, 1258].
+1255, 1256, 1257, 1258]. Charsets are case insensitive. For instance,
+<<"UTF-8">> and <<"utf-8">> are equivalent. <<"UTF-8">> can also be
+written without dash, as in <<"UTF8">> or <<"utf8">>.
+
(<<"">> is also accepted; if given, the settings are read from the
corresponding LC_* environment variables and $LANG according to POSIX rules.
@@ -487,8 +490,10 @@ loadlocale(struct _reent *p, int categor
switch (charset[0])
{
case 'U':
- if (strcmp (charset, "UTF-8"))
+ case 'u':
+ if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
return NULL;
+ strcpy (charset, "UTF-8");
mbc_max = 6;
#ifdef _MB_CAPABLE
l_wctomb = __utf8_wctomb;
@@ -496,8 +501,10 @@ loadlocale(struct _reent *p, int categor
#endif
break;
case 'J':
- if (strcmp (charset, "JIS"))
+ case 'j':
+ if (strcasecmp (charset, "JIS"))
return NULL;
+ strcpy (charset, "JIS");
mbc_max = 8;
#ifdef _MB_CAPABLE
l_wctomb = __jis_wctomb;
@@ -506,7 +513,7 @@ loadlocale(struct _reent *p, int categor
break;
case 'E':
case 'e':
- if (!strcmp (charset, "EUCJP") || !strcmp (charset, "eucJP"))
+ if (!strcasecmp (charset, "EUCJP"))
{
strcpy (charset, "EUCJP");
mbc_max = 3;
@@ -516,7 +523,7 @@ loadlocale(struct _reent *p, int categor
#endif
}
#ifdef __CYGWIN__
- else if (!strcmp (charset, "EUCKR") || !strcmp (charset, "eucKR"))
+ else if (!strcasecmp (charset, "EUCKR"))
{
strcpy (charset, "EUCKR");
mbc_max = 2;
@@ -530,8 +537,10 @@ loadlocale(struct _reent *p, int categor
return NULL;
break;
case 'S':
- if (strcmp (charset, "SJIS"))
+ case 's':
+ if (strcasecmp (charset, "SJIS"))
return NULL;
+ strcpy (charset, "SJIS");
mbc_max = 2;
#ifdef _MB_CAPABLE
l_wctomb = __sjis_wctomb;
@@ -539,10 +548,12 @@ loadlocale(struct _reent *p, int categor
#endif
break;
case 'I':
+ case 'i':
/* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
ISO-8859-12. */
- if (strncmp (charset, "ISO-8859-", 9))
+ if (strncasecmp (charset, "ISO-8859-", 9))
return NULL;
+ strncpy (charset, "ISO", 3);
val = _strtol_r (p, charset + 9, &end, 10);
if (val < 1 || val > 16 || val == 12 || *end)
return NULL;
@@ -558,8 +569,10 @@ loadlocale(struct _reent *p, int categor
#endif
break;
case 'C':
- if (charset[1] != 'P')
+ case 'c':
+ if (charset[1] != 'P' && charset[1] != 'p')
return NULL;
+ strncpy (charset, "CP", 2);
val = _strtol_r (p, charset + 2, &end, 10);
if (*end)
return NULL;
@@ -603,8 +616,10 @@ loadlocale(struct _reent *p, int categor
}
break;
case 'A':
- if (strcmp (charset, "ASCII"))
+ case 'a':
+ if (strcasecmp (charset, "ASCII"))
return NULL;
+ strcpy (charset, "ASCII");
mbc_max = 1;
#ifdef _MB_CAPABLE
l_wctomb = __ascii_wctomb;
@@ -613,8 +628,10 @@ loadlocale(struct _reent *p, int categor
break;
#ifdef __CYGWIN__
case 'G':
- if (strcmp (charset, "GBK"))
+ case 'g':
+ if (strcasecmp (charset, "GBK"))
return NULL;
+ strcpy (charset, "GBK");
mbc_max = 2;
#ifdef _MB_CAPABLE
l_wctomb = __gbk_wctomb;
@@ -622,7 +639,8 @@ loadlocale(struct _reent *p, int categor
#endif
break;
case 'B':
- if (strcmp (charset, "BIG5") && strcmp (charset, "Big5"))
+ case 'b':
+ if (strcasecmp (charset, "BIG5"))
return NULL;
strcpy (charset, "BIG5");
mbc_max = 2;
--
Corinna Vinschen
Cygwin Project Co-Leader
Red Hat