This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch, master, updated. glibc-2.11-24-g3933378


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  3933378fd3cbe49c1e0ca42844f959eb12f05f60 (commit)
       via  3a00b16da491d5c869795251e1ad4f43b3ba1469 (commit)
       via  815d8147a3418334ffa91e2384c6e159f0809d65 (commit)
      from  7443244740724babd575943ee33c45da326afbe7 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=3933378fd3cbe49c1e0ca42844f959eb12f05f60

commit 3933378fd3cbe49c1e0ca42844f959eb12f05f60
Author: Ulrich Drepper <drepper@redhat.com>
Date:   Tue Nov 17 16:24:26 2009 -0800

    Whitespace fixes.

diff --git a/posix/bug-regex30.c b/posix/bug-regex30.c
index ef2bd76..43df64d 100644
--- a/posix/bug-regex30.c
+++ b/posix/bug-regex30.c
@@ -34,7 +34,7 @@ struct
   regmatch_t rm[5];
 } tests[] = {
   /* U+0413	\xd0\x93	CYRILLIC CAPITAL LETTER GHE
-     U+0420	\xd0\xa0        CYRILLIC CAPITAL LETTER ER 
+     U+0420	\xd0\xa0        CYRILLIC CAPITAL LETTER ER
      U+0430	\xd0\xb0	CYRILLIC SMALL LETTER A
      U+0433	\xd0\xb3	CYRILLIC SMALL LETTER GHE
      U+0440	\xd1\x80	CYRILLIC SMALL LETTER ER
@@ -83,7 +83,7 @@ do_test (void)
 
       for (n = 0; n < tests[i].nmatch; ++n)
 	if (rm[n].rm_so != tests[i].rm[n].rm_so
-              || rm[n].rm_eo != tests[i].rm[n].rm_eo)
+	      || rm[n].rm_eo != tests[i].rm[n].rm_eo)
 	  {
 	    if (tests[i].rm[n].rm_so == -1 && tests[i].rm[n].rm_eo == -1)
 	      break;

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=3a00b16da491d5c869795251e1ad4f43b3ba1469

commit 3a00b16da491d5c869795251e1ad4f43b3ba1469
Author: Ulrich Drepper <drepper@redhat.com>
Date:   Tue Nov 17 16:23:57 2009 -0800

    Add missing test files.

diff --git a/locale/tst-duplocale.c b/locale/tst-duplocale.c
new file mode 100644
index 0000000..53e5fbb
--- /dev/null
+++ b/locale/tst-duplocale.c
@@ -0,0 +1,14 @@
+#include <locale.h>
+#include <stdio.h>
+
+static int
+do_test (void)
+{
+  locale_t d = duplocale (LC_GLOBAL_LOCALE);
+  if (d != (locale_t) 0)
+    freelocale (d);
+  return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/locale/tst-locname.c b/locale/tst-locname.c
new file mode 100644
index 0000000..7eb71ad
--- /dev/null
+++ b/locale/tst-locname.c
@@ -0,0 +1,20 @@
+#include <langinfo.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+
+static int
+do_test (void)
+{
+  const char *s = nl_langinfo (_NL_LOCALE_NAME (LC_CTYPE));
+  if (s == NULL || strcmp (s, "C") != 0)
+    {
+      printf ("incorrect locale name returned: %s, expected \"C\"\n", s);
+      return 1;
+    }
+
+  return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=815d8147a3418334ffa91e2384c6e159f0809d65

commit 815d8147a3418334ffa91e2384c6e159f0809d65
Author: Paolo Bonzini <bonzini@gnu.org>
Date:   Tue Nov 17 16:23:24 2009 -0800

    Fix ranges with multibyte characters as endpoints.
    
    This is another bug in computing the fastmap.  It was reported by a user
    of sed because it usually does not happen with !_LIBC.  However, it is
    there in that case too.
    
    The bug is that whenever we have a range at the beginning of the regex,
    the regex must be tested on any possible multibyte character.  The reason
    why _LIBC masks it, is that in general there is a collation symbol for
    each possible multibyte-character lead byte, so all the lead bytes are
    in general already part of the fastmap.
    
    The tests use cyrillic characters as an example.  With _LIBC, they pass
    without the patch too, but you can make them fail by removing collation
    symbols handling.

diff --git a/ChangeLog b/ChangeLog
index b92fd42..0a6ae19 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2009-11-17  Paolo Bonzini  <bonzini@gnu.org>
+
+	* posix/bug-regex30.c: New file.
+	* posix/Makefile: Add rules to build and run bug-regex30.
+	* posix/regcomp.c (re_compile_fastmap_iter): Add all multibyte
+	character lead bytes when there is a range in a COMPLEX_BRACKET.
+	Reported by Oleg Bylatov.
+
 2009-11-17  Ulrich Drepper  <drepper@redhat.com>
 
 	[BZ #10969]
diff --git a/posix/Makefile b/posix/Makefile
index c9ce18b..2a467a8 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -82,7 +82,7 @@ tests		:= tstgetopt testfnm runtests runptests	     \
 		   bug-regex17 bug-regex18 bug-regex19 bug-regex20 \
 		   bug-regex21 bug-regex22 bug-regex23 bug-regex24 \
 		   bug-regex25 bug-regex26 bug-regex27 bug-regex28 \
-		   bug-regex29 \
+		   bug-regex29 bug-regex30 \
 		   tst-nice tst-nanosleep tst-regex2 \
 		   transbug tst-rxspencer tst-pcre tst-boost \
 		   bug-ga1 tst-vfork1 tst-vfork2 tst-vfork3 tst-waitid \
@@ -195,6 +195,7 @@ bug-regex22-ENV = LOCPATH=$(common-objpfx)localedata
 bug-regex23-ENV = LOCPATH=$(common-objpfx)localedata
 bug-regex25-ENV = LOCPATH=$(common-objpfx)localedata
 bug-regex26-ENV = LOCPATH=$(common-objpfx)localedata
+bug-regex30-ENV = LOCPATH=$(common-objpfx)localedata
 tst-rxspencer-ARGS = --utf8 rxspencer/tests
 tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata
 tst-pcre-ARGS = PCRE.tests
diff --git a/posix/bug-regex30.c b/posix/bug-regex30.c
new file mode 100644
index 0000000..ef2bd76
--- /dev/null
+++ b/posix/bug-regex30.c
@@ -0,0 +1,103 @@
+/* Russian regular expression tests.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Paolo Bonzini <pbonzini@redhat.com>, 2009.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sys/types.h>
+#include <mcheck.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <locale.h>
+
+/* Tests supposed to match.  */
+struct
+{
+  const char *pattern;
+  const char *string;
+  int flags, nmatch;
+  regmatch_t rm[5];
+} tests[] = {
+  /* U+0413	\xd0\x93	CYRILLIC CAPITAL LETTER GHE
+     U+0420	\xd0\xa0        CYRILLIC CAPITAL LETTER ER 
+     U+0430	\xd0\xb0	CYRILLIC SMALL LETTER A
+     U+0433	\xd0\xb3	CYRILLIC SMALL LETTER GHE
+     U+0440	\xd1\x80	CYRILLIC SMALL LETTER ER
+     U+044F	\xd1\x8f	CYRILLIC SMALL LETTER YA */
+  { "[\xd0\xb0-\xd1\x8f]", "\xd0\xb3", 0, 1,
+    { { 0, 2 } } },
+  { "[\xd0\xb0-\xd1\x8f]", "\xd0\x93", REG_ICASE, 1,
+    { { 0, 2 } } },
+  { "[\xd1\x80-\xd1\x8f]", "\xd0\xa0", REG_ICASE, 1,
+    { { 0, 2 } } },
+};
+
+
+static int
+do_test (void)
+{
+  if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
+    {
+      puts ("setlocale failed");
+      return 1;
+    }
+
+  int ret = 0;
+
+  for (size_t i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i)
+    {
+      regex_t re;
+      regmatch_t rm[5];
+      int n = regcomp (&re, tests[i].pattern, tests[i].flags);
+      if (n != 0)
+	{
+	  char buf[500];
+	  regerror (n, &re, buf, sizeof (buf));
+	  printf ("regcomp %zd failed: %s\n", i, buf);
+	  ret = 1;
+	  continue;
+	}
+
+      if (regexec (&re, tests[i].string, tests[i].nmatch, rm, 0))
+	{
+	  printf ("regexec %zd failed\n", i);
+	  ret = 1;
+	  regfree (&re);
+	  continue;
+	}
+
+      for (n = 0; n < tests[i].nmatch; ++n)
+	if (rm[n].rm_so != tests[i].rm[n].rm_so
+              || rm[n].rm_eo != tests[i].rm[n].rm_eo)
+	  {
+	    if (tests[i].rm[n].rm_so == -1 && tests[i].rm[n].rm_eo == -1)
+	      break;
+	    printf ("regexec match failure rm[%d] %d..%d\n",
+		    n, rm[n].rm_so, rm[n].rm_eo);
+	    ret = 1;
+	    break;
+	  }
+
+      regfree (&re);
+    }
+
+  return ret;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 446fed5..6966b5d 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -377,7 +377,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
 	     applies to multibyte character sets; for single byte character
 	     sets, the SIMPLE_BRACKET again suffices.  */
 	  if (dfa->mb_cur_max > 1
-	      && (cset->nchar_classes || cset->non_match
+	      && (cset->nchar_classes || cset->non_match || cset->nranges
 # ifdef _LIBC
 		  || cset->nequiv_classes
 # endif /* _LIBC */

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                     |    8 +++
 string/tst-strfry.c => locale/tst-duplocale.c |    9 ++--
 locale/tst-locname.c                          |   20 ++++++++
 posix/Makefile                                |    3 +-
 posix/{bug-regex18.c => bug-regex30.c}        |   63 +++++++++++++------------
 posix/regcomp.c                               |    2 +-
 6 files changed, 68 insertions(+), 37 deletions(-)
 copy string/tst-strfry.c => locale/tst-duplocale.c (53%)
 create mode 100644 locale/tst-locname.c
 copy posix/{bug-regex18.c => bug-regex30.c} (55%)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]