This is the mail archive of the newlib@sources.redhat.com mailing list for the newlib project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

%ls and %lc in scanf.c


Hello,

We've added widechar support to vfscanf - %S, %ls, %C, %lc modifiers. The code was given from FreeBSD's vfprintf.c - since
Newlib's license is BSD, I think this is legal.


The patch was tested a little bit - with default locale and UTF8-based locale. If mbsrtowcs and mbrtowc work well, this should work too.

One issue that I've met - '#ifndef CYGNUS_NEC' macro that surrounds __srefill function. I've added it to %C and %S-related code (analogiously as it is in %c and %s-related code) but not sure if this is correct.

Please, see, and enjoy :-))

--
Best Regards,
Artem B. Bityuckiy,
St.-Petersburg, Russia.

--- /home/dedekind/work/AnonymousCVSes/Newlib/src/newlib/libc/stdio/vfscanf.c	2003-11-22 11:48:18.000000000 +0300
+++ snapshot_vfscanf.c	2003-11-24 18:07:13.000000000 +0300
@@ -138,7 +138,13 @@
 #endif
 
 #include "floatio.h"
-#define	BUF	(MAXEXP+MAXFRACT+3)	/* 3 = sign + decimal point + NUL */
+
+#if ((MAXEXP+MAXFRACT+3) > MB_LEN_MAX)
+#  define BUF (MAXEXP+MAXFRACT+3)        /* 3 = sign + decimal point + NUL */
+#else
+#  define BUF MB_LEN_MAX
+#endif
+
 /* An upper bound for how long a long prints in decimal.  4 / 13 approximates
    log (2).  Add one char for roundoff compensation and one for the sign.  */
 #define MAX_LONG_LEN ((CHAR_BIT * sizeof (long)  - 1) * 4 / 13 + 2)
@@ -254,14 +260,14 @@
   int base = 0;			/* base argument to strtol/strtoul */
   int nbytes = 1;               /* number of bytes read from fmt string */
   wchar_t wc;                   /* wchar to use to read format string */
+  wchar_t *wcp;                 /* handy wide character pointer */
+  size_t mbslen;                /* length of converted multibyte sequence */
+  mbstate_t state;              /* value to keep track of multibyte state */
 
   u_long (*ccfn) () = 0;	/* conversion function (strtol/strtoul) */
   char ccltab[256];		/* character class table for %[...] */
   char buf[BUF];		/* buffer for numeric conversions */
   char *lptr;                   /* literal pointer */
-#ifdef MB_CAPABLE
-  mbstate_t state;                /* value to keep track of multibyte state */
-#endif
 
   char *cp;
   short *sp;
@@ -428,6 +434,9 @@
 	  c = CT_FLOAT;
 	  break;
 #endif
+        case 'S':
+          flags |= LONG;
+          /* FALLTHROUGH */
 
 	case 's':
 	  c = CT_STRING;
@@ -439,6 +448,10 @@
 	  c = CT_CCL;
 	  break;
 
+        case 'C':
+          flags |= LONG;
+          /* FALLTHROUGH */
+
 	case 'c':
 	  flags |= NOSKIP;
 	  c = CT_CHAR;
@@ -538,20 +551,53 @@
 	  /* scan arbitrary characters (sets NOSKIP) */
 	  if (width == 0)
 	    width = 1;
-	  if (flags & SUPPRESS)
-	    {
-	      size_t sum = 0;
-
-	      for (;;)
+            if (flags & LONG) {
+                if ((flags & SUPPRESS) == 0)
+                    wcp = va_arg(ap, wchar_t *);
+                else
+                    wcp = NULL;
+                n = 0;
+                while (width != 0) {
+                    if (n == MB_CUR_MAX)
+                        goto input_failure;
+                    buf[n++] = *fp->_p;
+                    fp->_r -= 1;
+                    fp->_p += 1;
+                    memset((void *)&state, '\0', sizeof(mbstate_t));
+                    if ((mbslen = mbrtowc(wcp, buf, n, &state)) == (size_t)-1)
+                        goto input_failure; /* Invalid sequence */
+                    if (mbslen == 0 && !(flags & SUPPRESS))
+                        *wcp = L'\0';
+                    if (mbslen != (size_t)-2) { /* Incomplete sequence */
+                        nread += n;
+                        width -= 1;
+                        if (!(flags & SUPPRESS))
+                            wcp += 1;
+                        n = 0;
+                    }
+                    if (fp->_r <= 0 
+#ifndef CYGNUS_NEC
+                        && __srefill(fp)) 
+#endif                        
 		{
-		  if ((n = fp->_r) < (int)width)
+                        if (n != 0) 
+                            goto input_failure;
+                        break;
+                    }
+                }
+                if (!(flags & SUPPRESS))
+                    nassigned++;
+            } 
+            else if (flags & SUPPRESS) 
 		    {
+ 	        size_t sum = 0;
+	        for (;;) {
+	            if ((n = fp->_r) < (int)width) {
 		      sum += n;
 		      width -= n;
 		      fp->_p += n;
 #ifndef CYGNUS_NEC
-		      if (__srefill (fp))
-			{
+	                if (__srefill (fp)) {
 #endif
 			  if (sum == 0)
 			    goto input_failure;
@@ -570,8 +616,7 @@
 		}
 	      nread += sum;
 	    }
-	  else
-	    {
+	  else {
 #ifdef CYGNUS_NEC
 	      /* Kludge city for the moment */
 	      char *dest = va_arg (ap, char *);
@@ -579,8 +624,7 @@
 	      if (fp->_r == 0)
 		goto input_failure;
 
-	      while (n && fp->_r)
-		{
+	      while (n && fp->_r) {
 		  *dest++ = *(fp->_p++);
 		  n--;
 		  fp->_r--;
@@ -648,12 +692,50 @@
 	case CT_STRING:
 	  /* like CCL, but zero-length string OK, & no NOSKIP */
 	  if (width == 0)
-	    width = ~0;
-	  if (flags & SUPPRESS)
-	    {
+                width = (size_t)~0;
+            if (flags & LONG) {
+                /* Process %S and %ls placeholders */
+                if ((flags & SUPPRESS) == 0)
+                    wcp = va_arg(ap, wchar_t *);
+                else
+                    wcp = &wc;
 	      n = 0;
-	      while (!isspace (*fp->_p))
-		{
+                while (!isspace(*fp->_p) && width != 0) {
+                    if (n == MB_CUR_MAX)
+                        goto input_failure;
+                    buf[n++] = *fp->_p;
+                    fp->_r += 1;
+                    fp->_p += 1;
+                    memset((void *)&state, '\0', sizeof(mbstate_t));
+                    if ((mbslen = mbrtowc(wcp, buf, n, &state)) == (size_t)-1)
+                        goto input_failure;
+                    if (mbslen == 0)
+                        *wcp = L'\0';
+                    if (mbslen != (size_t)-2) { /* Incomplete sequence */
+                        if (iswspace(*wcp)) {
+                            while (n != 0)
+                                ungetc(buf[--n], fp);
+                            break;
+                        }
+                        nread += n;
+                        width -= 1;
+                        if ((flags & SUPPRESS) == 0)
+                            wcp += 1;
+                        n = 0;
+                    }
+                    if (fp->_r <= 0 && __srefill(fp)) {
+                        if (n != 0)
+                            goto input_failure;
+                        break;
+                    }
+                }
+                if (!(flags & SUPPRESS)) {
+                    *wcp = L'\0';
+                    nassigned++;
+                }
+            } else if (flags & SUPPRESS) {
+	      n = 0;
+	      while (!isspace (*fp->_p)) {
 		  n++, fp->_r--, fp->_p++;
 		  if (--width == 0)
 		    break;
@@ -662,11 +744,9 @@
 		}
 	      nread += n;
 	    }
-	  else
-	    {
+	  else {
 	      p0 = p = va_arg (ap, char *);
-	      while (!isspace (*fp->_p))
-		{
+	      while (!isspace (*fp->_p)) {
 		  fp->_r--;
 		  *p++ = *fp->_p++;
 		  if (--width == 0)

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]