This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.

Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix regex mb char handling


Hi!

The following patch fixes segfault on:
#include <locale.h>
#include <regex.h>
#include <stdio.h>

int main (void)
{
  setlocale (LC_ALL, "zh_CN");

  regex_t re;
  if (regcomp (&re, "abc", REG_ICASE))
    {
      puts ("regcomp failed");
      return 1;
    }
  if (regexec (&re, "\xa8\xa7", 0, NULL, 0) != REG_NOMATCH)
    {
      puts ("regexec unexpectedly succeeded");
      return 1;
    }
  regfree (&re);
  return 0;
}
testcase (not included yet in the patch, because I want to find out if
some already built testing locale has similar property, particularly
that for a mb char its towupper (resp. towlower) char is not
represent in the locale's character set).

2005-03-17  Jakub Jelinek  <jakub@redhat.com>

	* posix/regcomp.c (re_compile_fastmap_iter): Fix check for failed
	__wcrtomb.  Check return values of other __wcrtomb calls.
	* posix/regex_internal.c (build_wcs_buffer, re_string_skip_chars):
	Change mbclen type to size_t.
	(build_wcs_upper_buffer): Change mbclen and mbcdlen type to size_t.
	Handle mb chars whose upper case doesn't have multibyte representation
	in locale's charset.

--- libc/posix/regcomp.c.jj	2005-02-22 10:02:42.000000000 +0100
+++ libc/posix/regcomp.c	2005-03-17 21:06:43.622233856 +0100
@@ -359,7 +359,8 @@ re_compile_fastmap_iter (bufp, init_stat
 	      memset (&state, 0, sizeof (state));
 	      if (mbrtowc (&wc, (const char *) buf, p - buf,
 			   &state) == p - buf
-		  && __wcrtomb ((char *) buf, towlower (wc), &state) > 0)
+		  && (__wcrtomb ((char *) buf, towlower (wc), &state)
+		      != (size_t) -1))
 		re_set_fastmap (fastmap, 0, buf[0]);
 	    }
 #endif
@@ -409,12 +410,13 @@ re_compile_fastmap_iter (bufp, init_stat
 	      char buf[256];
 	      mbstate_t state;
 	      memset (&state, '\0', sizeof (state));
-	      __wcrtomb (buf, cset->mbchars[i], &state);
-	      re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+	      if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
+		re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
 	      if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
 		{
-		  __wcrtomb (buf, towlower (cset->mbchars[i]), &state);
-		  re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
+		  if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
+		      != (size_t) -1)
+		    re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
 		}
 	    }
 	}
--- libc/posix/regex_internal.c.jj	2005-03-08 13:05:18.000000000 +0100
+++ libc/posix/regex_internal.c	2005-03-17 21:17:48.069329822 +0100
@@ -220,7 +220,8 @@ build_wcs_buffer (pstr)
   unsigned char buf[64];
 #endif
   mbstate_t prev_st;
-  int byte_idx, end_idx, mbclen, remain_len;
+  int byte_idx, end_idx, remain_len;
+  size_t mbclen;
 
   /* Build the buffers from pstr->valid_len to either pstr->len or
      pstr->bufs_len.  */
@@ -281,7 +282,8 @@ build_wcs_upper_buffer (pstr)
      re_string_t *pstr;
 {
   mbstate_t prev_st;
-  int src_idx, byte_idx, end_idx, mbclen, remain_len;
+  int src_idx, byte_idx, end_idx, remain_len;
+  size_t mbclen;
 #ifdef _LIBC
   char buf[MB_CUR_MAX];
   assert (MB_CUR_MAX >= pstr->mb_cur_max);
@@ -318,12 +320,12 @@ build_wcs_upper_buffer (pstr)
 	  mbclen = mbrtowc (&wc,
 			    ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
 			     + byte_idx), remain_len, &pstr->cur_state);
-	  if (BE (mbclen > 0, 1))
+	  if (BE (mbclen + 2 > 2, 1))
 	    {
 	      wchar_t wcu = wc;
 	      if (iswlower (wc))
 		{
-		  int mbcdlen;
+		  size_t mbcdlen;
 
 		  wcu = towupper (wc);
 		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
@@ -386,20 +388,20 @@ build_wcs_upper_buffer (pstr)
 	else
 	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
 	mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
-	if (BE (mbclen > 0, 1))
+	if (BE (mbclen + 2 > 2, 1))
 	  {
 	    wchar_t wcu = wc;
 	    if (iswlower (wc))
 	      {
-		int mbcdlen;
+		size_t mbcdlen;
 
 		wcu = towupper (wc);
 		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
 		if (BE (mbclen == mbcdlen, 1))
 		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
-		else
+		else if (mbcdlen != (size_t) -1)
 		  {
-		    int i;
+		    size_t i;
 
 		    if (byte_idx + mbcdlen > pstr->bufs_len)
 		      {
@@ -416,7 +418,7 @@ build_wcs_upper_buffer (pstr)
 		      }
 		    if (!pstr->offsets_needed)
 		      {
-			for (i = 0; i < byte_idx; ++i)
+			for (i = 0; i < (size_t) byte_idx; ++i)
 			  pstr->offsets[i] = i;
 			pstr->offsets_needed = 1;
 		      }
@@ -439,13 +441,15 @@ build_wcs_upper_buffer (pstr)
 		    src_idx += mbclen;
 		    continue;
 		  }
+                else
+                  memcpy (pstr->mbs + byte_idx, p, mbclen);
 	      }
 	    else
 	      memcpy (pstr->mbs + byte_idx, p, mbclen);
 
 	    if (BE (pstr->offsets_needed != 0, 0))
 	      {
-		int i;
+		size_t i;
 		for (i = 0; i < mbclen; ++i)
 		  pstr->offsets[byte_idx + i] = src_idx + i;
 	      }
@@ -496,7 +500,8 @@ re_string_skip_chars (pstr, new_raw_idx,
      wint_t *last_wc;
 {
   mbstate_t prev_st;
-  int rawbuf_idx, mbclen;
+  int rawbuf_idx;
+  size_t mbclen;
   wchar_t wc = 0;
 
   /* Skip the characters which are not necessary to check.  */

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]