This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix buffer overrun in regexp matcher


When extending regex buffers, make sure we allocate enough room for the
state log.  Merely doubling the space may not be enough if the current
node has accepted a long run of characters.  This part of the code only
triggers with multibyte characters.

Andreas.

	[BZ #15078]
	* posix/regexec.c (extend_buffers): Add parameter min_len.
	(check_matching): Pass minimum needed length.
	(clean_state_log_if_needed): Likewise.
	(get_subexp): Likewise.
	* posix/Makefile (tests): Add bug-regex34.
	(bug-regex34-ENV): Define.
	* posix/bug-regex34.c: New file.

diff --git a/NEWS b/NEWS
index 9a039d8..ec4175c 100644
--- a/NEWS
+++ b/NEWS
@@ -10,7 +10,7 @@ Version 2.18
 * The following bugs are resolved with this release:
 
   13951, 14200, 14317, 14327, 14496, 14964, 14981, 14982, 14985, 14994,
-  14996, 15003, 15020, 15023, 15036.
+  14996, 15003, 15020, 15023, 15036, 15078.
 
 
 Version 2.17
diff --git a/posix/Makefile b/posix/Makefile
index 57672d8..6ceb440 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -86,7 +86,7 @@ tests		:= tstgetopt testfnm runtests runptests	     \
 		   tst-rfc3484-3 \
 		   tst-getaddrinfo3 tst-fnmatch2 tst-cpucount tst-cpuset \
 		   bug-getopt1 bug-getopt2 bug-getopt3 bug-getopt4 \
-		   bug-getopt5 tst-getopt_long1
+		   bug-getopt5 tst-getopt_long1 bug-regex34
 xtests		:= bug-ga2
 ifeq (yes,$(build-shared))
 test-srcs	:= globtest
@@ -199,6 +199,7 @@ bug-regex26-ENV = LOCPATH=$(common-objpfx)localedata
 bug-regex30-ENV = LOCPATH=$(common-objpfx)localedata
 bug-regex32-ENV = LOCPATH=$(common-objpfx)localedata
 bug-regex33-ENV = LOCPATH=$(common-objpfx)localedata
+bug-regex34-ENV = LOCPATH=$(common-objpfx)localedata
 tst-rxspencer-ARGS = --utf8 rxspencer/tests
 tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata
 tst-pcre-ARGS = PCRE.tests
diff --git a/posix/bug-regex34.c b/posix/bug-regex34.c
new file mode 100644
index 0000000..bb3b613
--- /dev/null
+++ b/posix/bug-regex34.c
@@ -0,0 +1,46 @@
+/* Test re_search with multi-byte characters in UTF-8.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _GNU_SOURCE 1
+#include <stdio.h>
+#include <string.h>
+#include <locale.h>
+#include <regex.h>
+
+static int
+do_test (void)
+{
+  struct re_pattern_buffer r;
+  /* ááááááááx */
+  const char *s = "\xe1\x80\x80\xe1\x80\xbb\xe1\x80\xbd\xe1\x80\x94\xe1\x80\xba\xe1\x80\xaf\xe1\x80\x95\xe1\x80\xbax";
+
+  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
+    {
+      puts ("setlocale failed");
+      return 1;
+    }
+  memset (&r, 0, sizeof (r));
+
+  re_compile_pattern ("[^x]x", 5, &r);
+  /* This was triggering a buffer overflow.  */
+  re_search (&r, s, strlen (s), 0, strlen (s), 0);
+  return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/posix/regexec.c b/posix/regexec.c
index 7f2de85..5ca2bf6 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -197,7 +197,7 @@ static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
 static int check_node_accept (const re_match_context_t *mctx,
 			      const re_token_t *node, int idx)
      internal_function;
-static reg_errcode_t extend_buffers (re_match_context_t *mctx)
+static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
      internal_function;
 
 /* Entry point for POSIX code.  */
@@ -1160,7 +1160,7 @@ check_matching (re_match_context_t *mctx, int fl_longest_match,
 	  || (BE (next_char_idx >= mctx->input.valid_len, 0)
 	      && mctx->input.valid_len < mctx->input.len))
 	{
-	  err = extend_buffers (mctx);
+	  err = extend_buffers (mctx, next_char_idx + 1);
 	  if (BE (err != REG_NOERROR, 0))
 	    {
 	      assert (err == REG_ESPACE);
@@ -1738,7 +1738,7 @@ clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
 	  && mctx->input.valid_len < mctx->input.len))
     {
       reg_errcode_t err;
-      err = extend_buffers (mctx);
+      err = extend_buffers (mctx, next_state_log_idx + 1);
       if (BE (err != REG_NOERROR, 0))
 	return err;
     }
@@ -2792,7 +2792,7 @@ get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
 		  if (bkref_str_off >= mctx->input.len)
 		    break;
 
-		  err = extend_buffers (mctx);
+		  err = extend_buffers (mctx, bkref_str_off + 1);
 		  if (BE (err != REG_NOERROR, 0))
 		    return err;
 
@@ -4102,7 +4102,7 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
 
 static reg_errcode_t
 internal_function __attribute_warn_unused_result__
-extend_buffers (re_match_context_t *mctx)
+extend_buffers (re_match_context_t *mctx, int min_len)
 {
   reg_errcode_t ret;
   re_string_t *pstr = &mctx->input;
@@ -4111,8 +4111,10 @@ extend_buffers (re_match_context_t *mctx)
   if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
     return REG_ESPACE;
 
-  /* Double the lengthes of the buffers.  */
-  ret = re_string_realloc_buffers (pstr, MIN (pstr->len, pstr->bufs_len * 2));
+  /* Double the lengthes of the buffers, but allocate at least MIN_LEN.  */
+  ret = re_string_realloc_buffers (pstr,
+				   MAX (min_len,
+					MIN (pstr->len, pstr->bufs_len * 2)));
   if (BE (ret != REG_NOERROR, 0))
     return ret;
 
-- 
1.8.1.2


-- 
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]