This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
[PATCH] Reading localized digits in scanf (third try)
- From: Hamed Malek <hamed at bamdad dot org>
- To: libc-alpha at sources dot redhat dot com
- Cc: Roozbeh Pournader <roozbeh at farsiweb dot info>, Behdad Esfahbod <behdad at cs dot toronto dot edu>
- Date: Mon, 11 Oct 2004 15:22:32 +0330
- Subject: [PATCH] Reading localized digits in scanf (third try)
- Organization: Sharif FarsiWeb, Inc.
In order to read localized digits, this patch adds a new level to digits
set which comes from a map in locale file (to_inpuct). This is already
implemented in scanf for 'I' flag, but needs new digits set to be in
locale file which is forbidden. I added this new level to scanf as a
patch.
Please apply the patch or tell me if you need more explanation.
Patch is attached.
Hamed
Index: libc/stdio-common/vfscanf.c
===================================================================
RCS file: /cvs/glibc/libc/stdio-common/vfscanf.c,v
retrieving revision 1.110
diff -u -r1.110 vfscanf.c
--- libc/stdio-common/vfscanf.c 20 Apr 2004 18:51:32 -0000 1.110
+++ libc/stdio-common/vfscanf.c 4 Sep 2004 12:04:34 -0000
@@ -1163,9 +1163,16 @@
int level;
#ifdef COMPILE_WSCANF
const wchar_t *wcdigits[10];
+ /* Extra array to extend wcdigits[] for new digits in locale. */
+ const wchar_t *wcdigits_extended[10];
#else
const char *mbdigits[10];
+ const char *mbdigits_extended[10];
#endif
+ /* "to_inpunct" is a map from ASCII digits to their
+ equivalent in locale. This is defined for locales
+ which use an extra digits set. */
+ wctrans_t map = __wctrans ("to_inpunct");
int n;
from_level = 0;
@@ -1173,9 +1180,68 @@
to_level = _NL_CURRENT_WORD (LC_CTYPE,
_NL_CTYPE_INDIGITS_WC_LEN) - 1;
#else
- to_level = (uint32_t) curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
+ to_level = (uint32_t)
+ curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
#endif
+ if (map)
+ {
+ /* Adding new level for extra digits set in locale file. */
+ to_level++;
+
+ for (n = 0; n < 10; n++)
+ {
+#ifdef COMPILE_WSCANF
+ wcdigits[n] = (const wchar_t *)
+ _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
+
+ wchar_t *wc_extended = (wchar_t *)
+ alloca ((to_level + 2) * sizeof (wchar_t));
+ MEMCPY (wc_extended, wcdigits[n], to_level);
+ wc_extended[to_level] = __towctrans (L'0' + n, map);
+ wc_extended[to_level + 1] = '\0';
+ wcdigits_extended[n] = wc_extended;
+#else
+ mbdigits[n]
+ = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
+
+ /* Get the equivalent wide char in map. */
+ wint_t extra_wcdigit = __towctrans (L'0' + n, map);
+
+ /* Convert it to multibyte representation. */
+ mbstate_t state;
+ memset (&state, '\0', sizeof (state));
+
+ char extra_mbdigit[MB_LEN_MAX];
+ size_t mblen
+ = __wcrtomb (extra_mbdigit, extra_wcdigit, &state);
+
+ if (mblen == (size_t) -1)
+ {
+ /* Ignore this new level. */
+ map = NULL;
+ break;
+ }
+
+ /* Calculate the length of mbdigits[n]. */
+ const char *last_char = mbdigits[n];
+ for (level = 0; level < to_level; level++)
+ last_char = strchr (last_char, '\0') + 1;
+
+ size_t mbdigits_len = last_char - mbdigits[n];
+
+ /* Allocate memory for extended multibyte digit. */
+ mb_extended = (char *) alloca (mbdigits_len + mblen + 1);
+
+ /* And get the mbdigits + extra_digit string. */
+ MEMCPY (mb_extended, mbdigits[n], mbdigits_len);
+ MEMCPY (mb_extended + mbdigits_len, extra_mbdigit, mblen);
+ mb_extended[mbdigits_len + mblen] = '\0';
+ mbdigits_extended[n] = mb_extended;
+#endif
+ }
+ }
+
/* Read the number into workspace. */
while (c != EOF && width != 0)
{
@@ -1185,8 +1251,12 @@
{
/* Get the string for the digits with value N. */
#ifdef COMPILE_WSCANF
- wcdigits[n] = (const wchar_t *)
- _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
+ if (map)
+ wcdigits[n] = wcdigits_extended[n];
+ else
+ wcdigits[n] = (const wchar_t *)
+ _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
+
wcdigits[n] += from_level;
if (c == (wint_t) *wcdigits[n])
@@ -1201,8 +1271,11 @@
const char *cmpp;
int avail = width > 0 ? width : INT_MAX;
- mbdigits[n]
- = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
+ if (map)
+ mbdigits[n] = mbdigits_extended[n];
+ else
+ mbdigits[n]
+ = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
for (level = 0; level < from_level; level++)
mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
Index: libc/localedata/locales/fa_IR
===================================================================
RCS file: /cvs/glibc/libc/localedata/locales/fa_IR,v
retrieving revision 1.11
diff -u -r1.11 fa_IR
--- libc/localedata/locales/fa_IR 17 Mar 2004 17:22:43 -0000 1.11
+++ libc/localedata/locales/fa_IR 4 Sep 2004 12:06:47 -0000
@@ -10,8 +10,8 @@
% Fax: +98 21 6019568
% Language: fa
% Territory: IR
-% Revision: 2.3
-% Date: 2004-03-16
+% Revision: 2.4
+% Date: 2004-09-04
% Users: general
% Repertoiremap:
% Charset: UTF-8
@@ -50,6 +50,20 @@
outdigit <U06F0>..<U06F9>
+map to_inpunct; /
+ (<U0030>,<U06F0>); /
+ (<U0031>,<U06F1>); /
+ (<U0032>,<U06F2>); /
+ (<U0033>,<U06F3>); /
+ (<U0034>,<U06F4>); /
+ (<U0035>,<U06F5>); /
+ (<U0036>,<U06F6>); /
+ (<U0037>,<U06F7>); /
+ (<U0038>,<U06F8>); /
+ (<U0039>,<U06F9>); /
+ (<U002E>,<U066B>); /
+ (<U002C>,<U066C>)
+
map to_outpunct; /
(<U002E>,<U066B>); /
(<U002C>,<U066C>)