This is the mail archive of the newlib@sources.redhat.com mailing list for the newlib project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: vfprintf multibyte


J. Johnston wrote:

Artem B. Bityuckiy wrote:

Hello.

I offer some optimization.

Look at CVS-Newlib newlib/libc/stdio/vfprintf.c line 553 for example. mbtowc call is used. This is needed to support multibyte format string. But usually in newlib such things are placed in #ifdef MB_CAPABLE/#endif pair. And this is right because it excludes junk code if Newlib is configured without multibyte support (if MB_CAPABLE isn't defined then the only multibyte is US_ASCII or, possibly, other 1 byte encoding like iso-8859-x).

I offer the patch that excludes mbtowc calls if newlib shouldn't support multibyte.

Please, see, check, comment.


The non-mb code doesn't appear to handle the end of the format string. In the old code, there is a check if _mbtowc_r returns <= 0 at which point the code breaks. For the nul terminator, _mbtowc_r will return 0. The replacement code stops the loop when *fmt is '\0' but it does not check it afterwards. The code later makes the assumption it is skipping over the format specifier (fmt++) when this could well be the nul terminator.


-- Jeff J.




Hello.
Corrected.
This time, I've tested vpfrintf with both --enable-newlib-mb=yes and --enable-newlib-mb=no
Tested by test from glibc-2.3.2.


--
Best Regards,
Artem B. Bityuckiy,
St.-Petersburg, Russia.


--- /home/dedekind/work/AnonymousCVSes/Newlib/src/newlib/libc/stdio/vfprintf.c	2003-11-20 15:02:25.000000000 +0300
+++ newlib-1.11.0-softmine-00/newlib/libc/stdio/vfprintf.c	2003-11-21 15:06:48.000000000 +0300
@@ -404,7 +404,6 @@
 	int width;		/* width from format (%8d), or 0 */
 	int prec;		/* precision from format (%.3d), or -1 */
 	char sign;		/* sign prefix (' ', '+', '-', or \0) */
-	wchar_t wc;
 #ifdef FLOATING_POINT
 	char *decimal_point = localeconv()->decimal_point;
 	char softsign;		/* temporary negative sign for floats */
@@ -421,10 +420,7 @@
 	int ndig;		/* actual number of digits returned by cvt */
 	char expstr[7];		/* buffer for exponent string */
 #endif
-
-
 	u_quad_t _uquad;	/* integer arguments %[diouxX] */
-
 	enum { OCT, DEC, HEX } base;/* base for [diouxX] conversion */
 	int dprec;		/* a copy of prec if [diouxX], 0 otherwise */
 	int realsz;		/* field size expanded by dprec */
@@ -435,7 +431,10 @@
 	struct __siov iov[NIOV];/* ... and individual io vectors */
 	char buf[BUF];		/* space for %c, %[diouxX], %[eEfgG] */
 	char ox[2];		/* space for 0x hex-prefix */
+#ifdef MB_CAPABLE
+	wchar_t wc;
 	mbstate_t state;        /* mbtowc calls from library must not change state */
+#endif
 	char *malloc_buf = NULL;/* handy pointer for malloced buffers */
 
 	/*
@@ -449,7 +448,9 @@
 	static _CONST char zeroes[PADSIZE] =
 	 {'0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0'};
 
+#ifdef MB_CAPABLE
         memset (&state, '\0', sizeof (state));
+#endif
 	/*
 	 * BEWARE, these `goto error' on error, and PAD uses `n'.
 	 */
@@ -550,20 +551,26 @@
 	 */
 	for (;;) {
 	        cp = fmt;
+#ifdef MB_CAPABLE
 	        while ((n = _mbtowc_r(data, &wc, fmt, MB_CUR_MAX, &state)) > 0) {
-			fmt += n;
-			if (wc == '%') {
-				fmt--;
-				break;
-			}
+                    if (wc == '%')
+                        break;
+                    fmt += n;
 		}
+#else
+                while (*fmt != '\0' && *fmt != '%')
+                    fmt += 1;
+#endif
 		if ((m = fmt - cp) != 0) {
 			PRINT(cp, m);
 			ret += m;
 		}
+#ifdef MB_CAPABLE
 		if (n <= 0)
-			goto done;
-
+#else
+                if (*fmt == '\0')
+#endif
+                    goto done;
 		fmt_anchor = fmt;
 		fmt++;		/* skip over '%' */
 
@@ -1445,38 +1452,47 @@
 	 int *arg_type, char **last_fmt) 
 {
   int ch;
-  wchar_t wc;
-  int nbytes, number, flags;
+  int number, flags;
   int spec_type;
   int numargs = *numargs_p;
   CH_CLASS chtype;
   STATE state, next_state;
   ACTION action;
   int pos, last_arg;
-  mbstate_t wc_state;
   int max_pos_arg = n;
   enum types { INT, LONG_INT, SHORT_INT, QUAD_INT, CHAR, CHAR_PTR, DOUBLE, LONG_DOUBLE, WIDE_CHAR };
-  
+#ifdef MB_CAPABLE
+  wchar_t wc;
+  mbstate_t wc_state;
+  int nbytes; 
+#endif
+    
   /* if this isn't the first call, pick up where we left off last time */
   if (*last_fmt != NULL)
     fmt = *last_fmt;
 
+#ifdef MB_CAPABLE
   memset (&wc_state, '\0', sizeof (wc_state));
+#endif
 
   /* we need to process either to end of fmt string or until we have actually
      read the desired parameter from the vararg list. */
   while (*fmt && n >= numargs)
-    {
+  {
+#ifdef MB_CAPABLE
       while ((nbytes = _mbtowc_r(data, &wc, fmt, MB_CUR_MAX, &wc_state)) > 0) 
-	{
-	  fmt += nbytes;
-	  if (wc == '%') 
-	    break;
-	}
-      
+      {
+          fmt += nbytes;
+          if (wc == '%') 
+              break;
+      }
       if (nbytes <= 0)
 	break;
-
+#else
+      while (*fmt != '\0' && *(fmt++) != '%')
+      if (*(fmt - 1) == '\0')
+          break;
+#endif
       state = START;
       flags = 0;
       pos = -1;

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]