This is the mail archive of the
newlib@sources.redhat.com
mailing list for the newlib project.
%ls and %lc in scanf.c
- From: "Artem B. Bityuckiy" <abityuckiy at yandex dot ru>
- To: newlib at sources dot redhat dot com
- Date: Mon, 24 Nov 2003 18:42:25 +0300
- Subject: %ls and %lc in scanf.c
Hello,
We've added widechar support to vfscanf - %S, %ls, %C, %lc modifiers.
The code was given from FreeBSD's vfprintf.c - since
Newlib's license is BSD, I think this is legal.
The patch was tested a little bit - with default locale and UTF8-based
locale. If mbsrtowcs and mbrtowc work well, this should work too.
One issue that I've met - '#ifndef CYGNUS_NEC' macro that surrounds
__srefill function. I've added it to %C and %S-related code
(analogiously as it is in %c and %s-related code) but not sure if this
is correct.
Please, see, and enjoy :-))
--
Best Regards,
Artem B. Bityuckiy,
St.-Petersburg, Russia.
--- /home/dedekind/work/AnonymousCVSes/Newlib/src/newlib/libc/stdio/vfscanf.c 2003-11-22 11:48:18.000000000 +0300
+++ snapshot_vfscanf.c 2003-11-24 18:07:13.000000000 +0300
@@ -138,7 +138,13 @@
#endif
#include "floatio.h"
-#define BUF (MAXEXP+MAXFRACT+3) /* 3 = sign + decimal point + NUL */
+
+#if ((MAXEXP+MAXFRACT+3) > MB_LEN_MAX)
+# define BUF (MAXEXP+MAXFRACT+3) /* 3 = sign + decimal point + NUL */
+#else
+# define BUF MB_LEN_MAX
+#endif
+
/* An upper bound for how long a long prints in decimal. 4 / 13 approximates
log (2). Add one char for roundoff compensation and one for the sign. */
#define MAX_LONG_LEN ((CHAR_BIT * sizeof (long) - 1) * 4 / 13 + 2)
@@ -254,14 +260,14 @@
int base = 0; /* base argument to strtol/strtoul */
int nbytes = 1; /* number of bytes read from fmt string */
wchar_t wc; /* wchar to use to read format string */
+ wchar_t *wcp; /* handy wide character pointer */
+ size_t mbslen; /* length of converted multibyte sequence */
+ mbstate_t state; /* value to keep track of multibyte state */
u_long (*ccfn) () = 0; /* conversion function (strtol/strtoul) */
char ccltab[256]; /* character class table for %[...] */
char buf[BUF]; /* buffer for numeric conversions */
char *lptr; /* literal pointer */
-#ifdef MB_CAPABLE
- mbstate_t state; /* value to keep track of multibyte state */
-#endif
char *cp;
short *sp;
@@ -428,6 +434,9 @@
c = CT_FLOAT;
break;
#endif
+ case 'S':
+ flags |= LONG;
+ /* FALLTHROUGH */
case 's':
c = CT_STRING;
@@ -439,6 +448,10 @@
c = CT_CCL;
break;
+ case 'C':
+ flags |= LONG;
+ /* FALLTHROUGH */
+
case 'c':
flags |= NOSKIP;
c = CT_CHAR;
@@ -538,20 +551,53 @@
/* scan arbitrary characters (sets NOSKIP) */
if (width == 0)
width = 1;
- if (flags & SUPPRESS)
- {
- size_t sum = 0;
-
- for (;;)
+ if (flags & LONG) {
+ if ((flags & SUPPRESS) == 0)
+ wcp = va_arg(ap, wchar_t *);
+ else
+ wcp = NULL;
+ n = 0;
+ while (width != 0) {
+ if (n == MB_CUR_MAX)
+ goto input_failure;
+ buf[n++] = *fp->_p;
+ fp->_r -= 1;
+ fp->_p += 1;
+ memset((void *)&state, '\0', sizeof(mbstate_t));
+ if ((mbslen = mbrtowc(wcp, buf, n, &state)) == (size_t)-1)
+ goto input_failure; /* Invalid sequence */
+ if (mbslen == 0 && !(flags & SUPPRESS))
+ *wcp = L'\0';
+ if (mbslen != (size_t)-2) { /* Incomplete sequence */
+ nread += n;
+ width -= 1;
+ if (!(flags & SUPPRESS))
+ wcp += 1;
+ n = 0;
+ }
+ if (fp->_r <= 0
+#ifndef CYGNUS_NEC
+ && __srefill(fp))
+#endif
{
- if ((n = fp->_r) < (int)width)
+ if (n != 0)
+ goto input_failure;
+ break;
+ }
+ }
+ if (!(flags & SUPPRESS))
+ nassigned++;
+ }
+ else if (flags & SUPPRESS)
{
+ size_t sum = 0;
+ for (;;) {
+ if ((n = fp->_r) < (int)width) {
sum += n;
width -= n;
fp->_p += n;
#ifndef CYGNUS_NEC
- if (__srefill (fp))
- {
+ if (__srefill (fp)) {
#endif
if (sum == 0)
goto input_failure;
@@ -570,8 +616,7 @@
}
nread += sum;
}
- else
- {
+ else {
#ifdef CYGNUS_NEC
/* Kludge city for the moment */
char *dest = va_arg (ap, char *);
@@ -579,8 +624,7 @@
if (fp->_r == 0)
goto input_failure;
- while (n && fp->_r)
- {
+ while (n && fp->_r) {
*dest++ = *(fp->_p++);
n--;
fp->_r--;
@@ -648,12 +692,50 @@
case CT_STRING:
/* like CCL, but zero-length string OK, & no NOSKIP */
if (width == 0)
- width = ~0;
- if (flags & SUPPRESS)
- {
+ width = (size_t)~0;
+ if (flags & LONG) {
+ /* Process %S and %ls placeholders */
+ if ((flags & SUPPRESS) == 0)
+ wcp = va_arg(ap, wchar_t *);
+ else
+ wcp = &wc;
n = 0;
- while (!isspace (*fp->_p))
- {
+ while (!isspace(*fp->_p) && width != 0) {
+ if (n == MB_CUR_MAX)
+ goto input_failure;
+ buf[n++] = *fp->_p;
+ fp->_r += 1;
+ fp->_p += 1;
+ memset((void *)&state, '\0', sizeof(mbstate_t));
+ if ((mbslen = mbrtowc(wcp, buf, n, &state)) == (size_t)-1)
+ goto input_failure;
+ if (mbslen == 0)
+ *wcp = L'\0';
+ if (mbslen != (size_t)-2) { /* Incomplete sequence */
+ if (iswspace(*wcp)) {
+ while (n != 0)
+ ungetc(buf[--n], fp);
+ break;
+ }
+ nread += n;
+ width -= 1;
+ if ((flags & SUPPRESS) == 0)
+ wcp += 1;
+ n = 0;
+ }
+ if (fp->_r <= 0 && __srefill(fp)) {
+ if (n != 0)
+ goto input_failure;
+ break;
+ }
+ }
+ if (!(flags & SUPPRESS)) {
+ *wcp = L'\0';
+ nassigned++;
+ }
+ } else if (flags & SUPPRESS) {
+ n = 0;
+ while (!isspace (*fp->_p)) {
n++, fp->_r--, fp->_p++;
if (--width == 0)
break;
@@ -662,11 +744,9 @@
}
nread += n;
}
- else
- {
+ else {
p0 = p = va_arg (ap, char *);
- while (!isspace (*fp->_p))
- {
+ while (!isspace (*fp->_p)) {
fp->_r--;
*p++ = *fp->_p++;
if (--width == 0)