This is the mail archive of the
cygwin-developers
mailing list for the Cygwin project.
The old $LANG-in-a-box trick (was Re: X11R7.5 and C.UTF-8)
- From: Corinna Vinschen <corinna-cygwin at cygwin dot com>
- To: cygwin-developers at cygwin dot com
- Date: Thu, 3 Dec 2009 19:44:58 +0100
- Subject: The old $LANG-in-a-box trick (was Re: X11R7.5 and C.UTF-8)
- References: <4AE8539E.9080004@cornell.edu> <20091028172216.P60895@mail101.his.com> <4AE8BC12.1060109@cornell.edu> <416096c60910281507n4774534dode1d24ac47d5b0a2@mail.gmail.com> <4B1115EC.7010308@cornell.edu> <4B174C20.1040900@tlinx.org> <416096c60912022348i36504e14l726efc9fc9c360e6@mail.gmail.com> <20091203045401.L85368@mail101.his.com> <416096c60912030516r42f67c05yfaa3b64fcca68b43@mail.gmail.com> <20091203134837.GX8059@calimero.vinschen.de>
- Reply-to: cygwin-developers at cygwin dot com
[redirected to cygwin-developers]
On Dec 3 14:48, Corinna Vinschen wrote:
> On Dec 3 13:16, Andy Koppe wrote:
> > 2009/12/3 Thomas Dickey:
> > >> From
> > >> http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html,
> > >> §7.2:
> > >>
> > >> "The tables in Locale Definition describe the characteristics and
> > >> behavior of the POSIX locale for data consisting entirely of
> > >> characters from the portable character set and the control character
> > >> set. For other characters, the behavior is unspecified."
> > >>
> > >> This means that characters 0..127 have to be treated as ASCII, but
> > >> beyond that an implementation can do what it wants. And on Cygwin 1.7,
> > >> plain "C" actually does imply UTF-8, which happily is
> > >> backward-compatible with ASCII.
> > >
> > > That's an interpretation that so far hasn't been blessed by the standards
> > > people. ?Any discussion of this topic should mention that, as a caveat.
> >
> > Fair point. It also means that apps are entitled to assume that "C"
> > supports no more than ASCII, which is why Cygwin 1.7's default locale
> > is C.UTF-8. A default locale setting based on the user's language
> > selection would be better, but we don't have that (yet?).
>
> Try the attached. Note: It has a hidden "--testloop" option...
I created a new, simplified version of my tiny getlocale tool, which
prints "export LANG=..." to stdout, or "setenv LANG ..." if the -c
option has been given. See attached source.
In theory we could use it in /etc/profile.d/lang.{c}sh:
/etc/profile.d/lang.sh:
test -z "${LC_ALL:-${LC_CTYPE:-$LANG}}" && eval $(getlocale -U)
/etc/profile.d/lang.csh:
if ( $?LC_ALL == 0 && $?LC_CTYPE == 0 && $?LANG == 0 ) eval `getlocale -cU`
If that's desired, I could easily create a getlocale package for the
Base category.
One problem we still have is that the iso639 language code returned by
Windows is a three-letter code in rare cases (see the --testloop output)
This is not feasible for newlib so far. Maybe the getlocale tool should
drop back to "C" in these cases.
Corinna
--
Corinna Vinschen Please, send mails regarding Cygwin to
Cygwin Project Co-Leader cygwin AT cygwin DOT com
Red Hat
#include <stdio.h>
#include <getopt.h>
#include <locale.h>
#include <windows.h>
#define VERSION "1.1"
extern char *__progname;
void
version ()
{
printf ("%s (Cygwin) %s\n", __progname, VERSION);
exit (0);
}
void
usage (FILE * stream, int status)
{
fprintf (stream, "\n\
Usage: %s [-suU] [-l LCID]\n\
\n\
Return POSIX LANG identifier corresponding to a locale, default is the\n\
system default locale\n\
Possible options are:\n\
\n\
-c, --csh return LANG setting in C-shell syntax\n\
-s, --system return LANG for the system's default locale\n\
-u, --user return LANG for the current user's default locale\n\
-l, --lcid LCID return LANG for the LCID given as argument\n\
-U, --UTF-8 always attach .UTF-8 to LANG\n\
-h, --help this text\n\
-V, --version print the version of %s and exit\n",
__progname, __progname);
exit (status);
}
struct option longopts[] = {
{"csh", no_argument, NULL, 'c'},
{"system", no_argument, NULL, 's'},
{"user", no_argument, NULL, 'u'},
{"lcid", required_argument, NULL, 'l'},
{"UTF-8", no_argument, NULL, 'U'},
{"help", no_argument, NULL, 'h'},
{"version", no_argument, NULL, 'V'},
{"testloop", no_argument, NULL, 'T'},
{0, no_argument, NULL, 0}
};
const char *opts = "csul:UhV";
int
getlocale (LCID lcid, BOOL utf, BOOL csh, BOOL test)
{
UINT codepage;
wchar_t iso639[10];
wchar_t iso3166[10];
if (!GetLocaleInfoW (lcid,
LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
(PWCHAR) &codepage, sizeof codepage)
|| !GetLocaleInfoW (lcid, LOCALE_SISO639LANGNAME, iso639, 10)
|| !GetLocaleInfoW (lcid, LOCALE_SISO3166CTRYNAME, iso3166, 10))
{
if (!test)
fprintf (stderr, "%s: Non existant locale\n", __progname);
return 2;
}
if (utf)
codepage = 0;
if (test)
{
wchar_t cty[256];
wchar_t lang[256];
GetLocaleInfoW (lcid, LOCALE_SENGCOUNTRY, cty, 256);
GetLocaleInfoW (lcid, LOCALE_SENGLANGUAGE, lang, 256);
printf ("0x%04x=\"%ls_%ls\", %ls (%ls)\n", (unsigned) lcid, iso639,
iso3166, lang, cty);
return 0;
}
else if (csh)
printf ("setenv LANG ");
else
printf ("export LANG=");
printf ("\"%ls_%ls%s\"\n", iso639, iso3166, codepage ? "" : ".UTF-8");
return 0;
}
int main (int argc, char **argv)
{
int opt;
LCID lcid = LOCALE_SYSTEM_DEFAULT;
BOOL csh = FALSE;
BOOL utf = FALSE;
BOOL test = FALSE;
setlocale (LC_ALL, "");
while ((opt = getopt_long (argc, argv, opts, longopts, NULL)) != EOF)
switch (opt)
{
case 's':
lcid = LOCALE_SYSTEM_DEFAULT;
break;
case 'u':
lcid = LOCALE_USER_DEFAULT;
break;
case 'c':
csh = TRUE;
break;
case 'l':
lcid = strtoul (optarg, NULL, 0);
break;
case 'U':
utf = TRUE;
break;
case 'h':
usage (stdout, 0);
break;
case 'V':
version ();
break;
case 'T':
test = TRUE;
break;
default:
usage (stderr, 1);
break;
}
if (test)
{
unsigned lang, sublang;
for (lang = 1; lang <= 0x3ff; ++lang)
for (sublang = 1; sublang <= 0x3f; ++sublang)
getlocale ((sublang << 10) | lang, FALSE, FALSE, TRUE);
return 0;
}
return getlocale (lcid, utf, csh, FALSE);
}