This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
[PATCH] regex bug fix for gawk
- From: Isamu Hasegawa <isamu at yamato dot ibm dot com>
- To: Ulrich Drepper <drepper at redhat dot com>
- Cc: Aharon Robbins <arnold at skeeve dot com>, "Bonzini" <bonzini at gnu dot org>, shoji at jp dot ibm dot com, libc-alpha at sources dot redhat dot com
- Date: Mon, 21 Oct 2002 22:27:19 +0900
- Subject: [PATCH] regex bug fix for gawk
Hi,
Attached patch is a test case and a fix for the bug Arnold told me.
In concretely, the RE "[0\-9]" incorrectly matches "1" when the syntax
contains RE_BACKSLASH_ESCAPE_IN_LISTS.
Would you please try the patch?
2002-10-21 Isamu Hasegawa <isamu@yamato.ibm.com>
* posix/Makefile: Add a test case for the bug reported by Aharon
Robbins <arnold@skeeve.com>.
* posix/bug-regex13.c: New file.
* posix/regcomp.c (peek_token_bracket): Skip the byte already read.
--
Isamu Hasegawa
IBM Japan, Ltd.
diff -urNp libc-/posix/Makefile libc/posix/Makefile
--- libc-/posix/Makefile 2002-10-21 22:07:21.000000000 +0900
+++ libc/posix/Makefile 2002-10-21 22:08:24.000000000 +0900
@@ -73,7 +73,8 @@ tests := tstgetopt testfnm runtests run
tst-truncate64 tst-fork tst-fnmatch tst-regexloc tst-dir \
tst-chmod bug-regex1 bug-regex2 bug-regex3 bug-regex4 \
tst-gnuglob tst-regex bug-regex5 bug-regex6 bug-regex7 \
- bug-regex8 bug-regex9 bug-regex10 bug-regex11 bug-regex12
+ bug-regex8 bug-regex9 bug-regex10 bug-regex11 bug-regex12 \
+ bug-regex13
ifeq (yes,$(build-shared))
test-srcs := globtest
tests += wordexp-test tst-exec tst-spawn
diff -urNp libc-/posix/bug-regex13.c libc/posix/bug-regex13.c
--- libc-/posix/bug-regex13.c 1970-01-01 09:00:00.000000000 +0900
+++ libc/posix/bug-regex13.c 2002-10-21 22:11:06.000000000 +0900
@@ -0,0 +1,75 @@
+/* Regular expression tests.
+ Copyright (C) 2002 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>, 2002.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sys/types.h>
+#include <mcheck.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+struct
+{
+ int syntax;
+ const char *pattern;
+ const char *string;
+ int start;
+} tests[] = {
+ {RE_BACKSLASH_ESCAPE_IN_LISTS, "[0\\-9]", "1", -1}, /* It should not match. */
+ {RE_BACKSLASH_ESCAPE_IN_LISTS, "[0\\-9]", "-", 0}, /* It should match. */
+};
+
+int
+main (void)
+{
+ struct re_pattern_buffer regbuf;
+ const char *err;
+ size_t i;
+ int ret = 0;
+
+ mtrace ();
+
+ for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i)
+ {
+ int start;
+ re_set_syntax (tests[i].syntax);
+ memset (®buf, '\0', sizeof (regbuf));
+ err = re_compile_pattern (tests[i].pattern, strlen (tests[i].pattern),
+ ®buf);
+ if (err != NULL)
+ {
+ printf ("re_compile_pattern failed: %s\n", err);
+ ret = 1;
+ continue;
+ }
+
+ start = re_search (®buf, tests[i].string, strlen (tests[i].string),
+ 0, strlen (tests[i].string), NULL);
+ if (start != tests[i].start)
+ {
+ printf ("re_search failed %d\n", start);
+ ret = 1;
+ regfree (®buf);
+ continue;
+ }
+ regfree (®buf);
+ }
+
+ return ret;
+}
diff -urNp libc-/posix/regcomp.c libc/posix/regcomp.c
--- libc-/posix/regcomp.c 2002-10-21 22:07:15.000000000 +0900
+++ libc/posix/regcomp.c 2002-10-21 22:08:59.000000000 +0900
@@ -1690,7 +1690,8 @@ peek_token_bracket (token, input, syntax
{
/* In this case, '\' escape a character. */
unsigned char c2;
- c2 = re_string_peek_byte (input, 1);
+ re_string_skip_bytes (input, 1);
+ c2 = re_string_peek_byte (input, 0);
token->opr.c = c2;
token->type = CHARACTER;
return 1;