This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.
Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
Hi! As http://sources.redhat.com/bugzilla/show_bug.cgi?id=693 shows, old GNU regex, as well as dfa.c and perl regex all implement \B as negation of \b (but still regex.texi documents it as an empty string inside of word, rather than empty string inside of word or surrounded by non-word characters). 2005-01-26 Jakub Jelinek <jakub@redhat.com> [BZ #693] * posix/regex_internal.h (DUMMY_CONSTRAINT): Rename to... (WORD_DELIM_CONSTRAINT): ...this. (NOT_WORD_DELIM_CONSTRAINT): Define. (re_context_type): Add INSIDE_NOTWORD and NOT_WORD_DELIM, change WORD_DELIM to use WORD_DELIM_CONSTRAINT. * posix/regcomp.c (peek_token): For \B create NOT_WORD_DELIM anchor instead of INSIDE_WORD. (parse_expression): Handle NOT_WORD_DELIM constraint. * posix/bug-regex19.c (tests): Adjust tests that relied on \B being inside word instead of not word delim. * posix/tst-rxspencer.c (mb_frob_pattern): Don't frob escaped characters. * posix/rxspencer/tests: Add some new tests. --- libc/posix/regex_internal.h.jj 2005-01-08 16:50:24.000000000 +0100 +++ libc/posix/regex_internal.h 2005-01-26 14:56:20.210528592 +0100 @@ -1,5 +1,5 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc. + Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. @@ -143,18 +143,21 @@ static inline void bitset_mask (bitset d #define NEXT_NEWLINE_CONSTRAINT 0x0020 #define PREV_BEGBUF_CONSTRAINT 0x0040 #define NEXT_ENDBUF_CONSTRAINT 0x0080 -#define DUMMY_CONSTRAINT 0x0100 +#define WORD_DELIM_CONSTRAINT 0x0100 +#define NOT_WORD_DELIM_CONSTRAINT 0x0200 typedef enum { INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, LINE_FIRST = PREV_NEWLINE_CONSTRAINT, LINE_LAST = NEXT_NEWLINE_CONSTRAINT, BUF_FIRST = PREV_BEGBUF_CONSTRAINT, BUF_LAST = NEXT_ENDBUF_CONSTRAINT, - WORD_DELIM = DUMMY_CONSTRAINT + WORD_DELIM = WORD_DELIM_CONSTRAINT, + NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT } re_context_type; typedef struct --- libc/posix/regcomp.c.jj 2005-01-19 14:13:00.000000000 +0100 +++ libc/posix/regcomp.c 2005-01-26 15:01:58.715941197 +0100 @@ -1859,7 +1859,7 @@ peek_token (token, input, syntax) if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; - token->opr.ctx_type = INSIDE_WORD; + token->opr.ctx_type = NOT_WORD_DELIM; } break; case 'w': @@ -2349,15 +2349,25 @@ parse_expression (regexp, preg, token, s break; case ANCHOR: if ((token->opr.ctx_type - & (WORD_DELIM | INSIDE_WORD | WORD_FIRST | WORD_LAST)) + & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) && dfa->word_ops_used == 0) init_word_char (dfa); - if (token->opr.ctx_type == WORD_DELIM) + if (token->opr.ctx_type == WORD_DELIM + || token->opr.ctx_type == NOT_WORD_DELIM) { bin_tree_t *tree_first, *tree_last; - token->opr.ctx_type = WORD_FIRST; - tree_first = re_dfa_add_tree_node (dfa, NULL, NULL, token); - token->opr.ctx_type = WORD_LAST; + if (token->opr.ctx_type == WORD_DELIM) + { + token->opr.ctx_type = WORD_FIRST; + tree_first = re_dfa_add_tree_node (dfa, NULL, NULL, token); + token->opr.ctx_type = WORD_LAST; + } + else + { + token->opr.ctx_type = INSIDE_WORD; + tree_first = re_dfa_add_tree_node (dfa, NULL, NULL, token); + token->opr.ctx_type = INSIDE_NOTWORD; + } tree_last = re_dfa_add_tree_node (dfa, NULL, NULL, token); token->type = OP_ALT; tree = re_dfa_add_tree_node (dfa, tree_first, tree_last, token); --- libc/posix/bug-regex19.c.jj 2003-12-22 13:38:11.000000000 +0100 +++ libc/posix/bug-regex19.c 2005-01-26 15:10:51.334648249 +0100 @@ -1,5 +1,5 @@ /* Regular expression tests. - Copyright (C) 2003 Free Software Foundation, Inc. + Copyright (C) 2003, 2005 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. @@ -170,22 +170,22 @@ static struct test_s {ERE, "[^k]\\B[^k]", "kBk", 0, -1}, {ERE, "[^C]\\B[^C]", "CCCABA", 0, 3}, {ERE, "[^C]\\B[^C]", "CBC", 0, -1}, - {ERE, ".(\\b|\\B).", "=~AB", 0, 1}, + {ERE, ".(\\b|\\B).", "=~AB", 0, 0}, {ERE, ".(\\b|\\B).", "A=C", 0, 0}, {ERE, ".(\\b|\\B).", "ABC", 0, 0}, - {ERE, ".(\\b|\\B).", "=~\\!", 0, -1}, - {ERE, "[^k](\\b|\\B)[^k]", "=~AB", 0, 1}, + {ERE, ".(\\b|\\B).", "=~\\!", 0, 0}, + {ERE, "[^k](\\b|\\B)[^k]", "=~AB", 0, 0}, {ERE, "[^k](\\b|\\B)[^k]", "A=C", 0, 0}, {ERE, "[^k](\\b|\\B)[^k]", "ABC", 0, 0}, - {ERE, "[^k](\\b|\\B)[^k]", "=~kBD", 0, 3}, - {ERE, "[^k](\\b|\\B)[^k]", "=~\\!", 0, -1}, - {ERE, "[^k](\\b|\\B)[^k]", "=~kB", 0, -1}, - {ERE, "[^C](\\b|\\B)[^C]", "=~AB", 0, 1}, + {ERE, "[^k](\\b|\\B)[^k]", "=~kBD", 0, 0}, + {ERE, "[^k](\\b|\\B)[^k]", "=~\\!", 0, 0}, + {ERE, "[^k](\\b|\\B)[^k]", "=~kB", 0, 0}, + {ERE, "[^C](\\b|\\B)[^C]", "=~AB", 0, 0}, {ERE, "[^C](\\b|\\B)[^C]", "A=C", 0, 0}, {ERE, "[^C](\\b|\\B)[^C]", "ABC", 0, 0}, - {ERE, "[^C](\\b|\\B)[^C]", "=~CBD", 0, 3}, - {ERE, "[^C](\\b|\\B)[^C]", "=~\\!", 0, -1}, - {ERE, "[^C](\\b|\\B)[^C]", "=~CB", 0, -1}, + {ERE, "[^C](\\b|\\B)[^C]", "=~CBD", 0, 0}, + {ERE, "[^C](\\b|\\B)[^C]", "=~\\!", 0, 0}, + {ERE, "[^C](\\b|\\B)[^C]", "=~CB", 0, 0}, {ERE, "\\b([A]|[!]|.B)", "A=AC", 0, 0}, {ERE, "\\b([A]|[!]|.B)", "=AC", 0, 1}, {ERE, "\\b([A]|[!]|.B)", "!AC", 0, 1}, --- libc/posix/tst-rxspencer.c.jj 2003-12-29 15:01:28.000000000 +0100 +++ libc/posix/tst-rxspencer.c 2005-01-26 17:01:31.234511142 +0100 @@ -1,5 +1,5 @@ /* Regular expression tests. - Copyright (C) 2003 Free Software Foundation, Inc. + Copyright (C) 2003, 2005 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. @@ -127,14 +127,15 @@ mb_frob_string (const char *str, const c } /* Like mb_frob_string, but don't replace anything between - [: and :], [. and .] or [= and =]. */ + [: and :], [. and .] or [= and =] or characters escaped + with a backslash. */ static char * mb_frob_pattern (const char *str, const char *letters) { char *ret, *dst; const char *src; - int in_class = 0; + int in_class = 0, escaped = 0; if (str == NULL) return NULL; @@ -144,7 +145,18 @@ mb_frob_pattern (const char *str, const return NULL; for (src = str, dst = ret; *src; ++src) - if (!in_class && strchr (letters, *src)) + if (*src == '\\') + { + escaped ^= 1; + *dst++ = *src; + } + else if (escaped) + { + escaped = 0; + *dst++ = *src; + continue; + } + else if (!in_class && strchr (letters, *src)) dst = mb_replace (dst, *src); else { --- libc/posix/rxspencer/tests.jj 2004-11-19 13:38:46.000000000 +0100 +++ libc/posix/rxspencer/tests 2005-01-26 16:35:32.596730739 +0100 @@ -526,3 +526,12 @@ a((b+|((c)*)))+d - abcd abcd c,c,c,c (((\b))){0} - x @x -,-,- a(((.*)))b((\2)){0}c - abc abc @bc,@bc,@bc,-,- a(((.*)))b((\1)){0}c - axbc axbc x,x,x,-,- + +\b & SaT @aT +\b & aT @aT +a.*\b & abT ab +\b & STSS +\B & abc @bc +\B & aSbTc +\B & SaT @SaT +\B & aSTSb @TSb Jakub
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |