This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.
Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
Hi! The following testcase (invalid UTF-8) causes regex segfault. I'm not sure if it is a good idea to rely on _NL_COLLATE_SYMB_EXTRAMB+1 follow _NL_COLLATE_SYMB_EXTRAMB in LC_COLLATE data, but I don't see how else easily find out where it ends. Without that find_collation_sequence_value loops until it falls off the cliff (likely at the end of locale-archive mapping). 2004-01-11 Jakub Jelinek <jakub@redhat.com> * posix/regexec.c (check_node_accept_bytes): Return 0 if char_len is 0. (find_collation_sequence_value): Don't look beyond end of SYMB_EXTRAMB table. * posix/Makefile (tests): Add bug-regex23. (bug-regex23-ENV): New. --- libc/posix/regexec.c.jj 2004-02-09 14:52:33.000000000 +0100 +++ libc/posix/regexec.c 2004-02-11 16:14:13.000000000 +0100 @@ -3666,7 +3666,7 @@ check_node_accept_bytes (dfa, node_idx, } elem_len = re_string_elem_size_at (input, str_idx); - if (elem_len <= 1 && char_len <= 1) + if ((elem_len <= 1 && char_len <= 1) || char_len == 0) return 0; if (node->type == COMPLEX_BRACKET) @@ -3847,8 +3847,10 @@ find_collation_sequence_value (mbs, mbs_ int32_t idx; const unsigned char *extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + int32_t extrasize = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra; - for (idx = 0; ;) + for (idx = 0; idx < extrasize;) { int mbs_cnt, found = 0; int32_t elem_mbs_len; @@ -3878,6 +3880,7 @@ find_collation_sequence_value (mbs, mbs_ /* Skip the collation sequence value. */ idx += sizeof (uint32_t); } + return UINT_MAX; } } # endif /* _LIBC */ --- libc/posix/Makefile.jj 2004-01-12 10:52:37.000000000 +0100 +++ libc/posix/Makefile 2004-02-11 15:44:45.000000000 +0100 @@ -79,7 +79,7 @@ tests := tstgetopt testfnm runtests run bug-regex8 bug-regex9 bug-regex10 bug-regex11 bug-regex12 \ bug-regex13 bug-regex14 bug-regex15 bug-regex16 \ bug-regex17 bug-regex18 bug-regex19 bug-regex20 \ - bug-regex21 bug-regex22 tst-nice tst-nanosleep \ + bug-regex21 bug-regex22 bug-regex23 tst-nice tst-nanosleep \ transbug tst-rxspencer tst-pcre tst-boost ifeq (yes,$(build-shared)) test-srcs := globtest @@ -163,6 +163,7 @@ bug-regex18-ENV = LOCPATH=$(common-objpf bug-regex19-ENV = LOCPATH=$(common-objpfx)localedata bug-regex20-ENV = LOCPATH=$(common-objpfx)localedata bug-regex22-ENV = LOCPATH=$(common-objpfx)localedata +bug-regex23-ENV = LOCPATH=$(common-objpfx)localedata tst-rxspencer-ARGS = --utf8 rxspencer/tests tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata tst-pcre-ARGS = PCRE.tests --- libc/posix/bug-regex23.c.jj 2004-02-11 15:39:24.000000000 +0100 +++ libc/posix/bug-regex23.c 2004-02-11 15:40:17.000000000 +0100 @@ -0,0 +1,35 @@ +/* Test we don't segfault on invalid UTF-8 sequence. + Copyright (C) 2004 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2004. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <locale.h> +#include <regex.h> +#include <string.h> + +int +main (void) +{ + regex_t r; + + memset (&r, 0, sizeof (r)); + setlocale (LC_ALL, "de_DE.UTF-8"); + regcomp (&r, "[-a-z_0-9.]+@[-a-z_0-9.]+", REG_EXTENDED | REG_ICASE); + regexec (&r, "\xe7\xb7\x95\xe7\x97", 0, NULL, 0); + return 0; +} Jakub
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |