* [PATCH] Small regex tweaks
@ 2003-11-26 3:24 Jakub Jelinek
0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2003-11-26 3:24 UTC (permalink / raw)
To: Ulrich Drepper, Roland McGrath; +Cc: Glibc hackers
Hi!
Small regex tweaks:
1) regexec would happily return success and registers outside of string
limits in some cases.
2) Avoids segfault on bug-regex11.c with uncommented failing tests.
From what I can say, state_log[i] == NULL is completely legal situation
and so prune_impossible_nodes needs to cope with it.
3) We don't need to set has_plural_match if creating just COMPLEX_BRACKET
and not SIMPLE_BRACKET OP_ALT COMPLEX_BRACKET.
4) build_charclass_op creates SIMPLE_BRACKET OP_ALT COMPLEX_BRACKET,
but did not set has_plural_match. Looks like a bug to me.
2003-11-25 Jakub Jelinek <jakub@redhat.com>
* posix/regexec.c (re_search_internal): If prune_impossible_nodes
returned REG_NOMATCH, set match_last to -1. Don't initialize
pmatch[0] needlessly. Fix comment.
(prune_impossible_nodes): Don't segfault on NULL state_log entry.
(set_regs): Fix comment.
* posix/regcomp.c (parse_bracket_exp): Only set has_plural_match
if adding both SIMPLE_BRACKET and COMPLEX_BRACKET.
(build_charclass_op): Set has_plural_match if adding both
SIMPLE_BRACKET and COMPLEX_BRACKET.
* posix/bug-regex11.c (tests): Fix register values for one commented
out test. Add new tests.
--- libc/posix/regexec.c.jj 2003-11-25 13:06:02.000000000 +0100
+++ libc/posix/regexec.c 2003-11-25 20:02:56.000000000 +0100
@@ -766,6 +766,7 @@ re_search_internal (preg, string, length
break;
if (BE (err != REG_NOMATCH, 0))
goto free_return;
+ match_last = -1;
}
else
break; /* We found a match. */
@@ -785,7 +786,7 @@ re_search_internal (preg, string, length
int reg_idx;
/* Initialize registers. */
- for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+ for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
/* Set the points where matching start/end. */
@@ -801,7 +802,8 @@ re_search_internal (preg, string, length
}
/* At last, add the offset to the each registers, since we slided
- the buffers so that We can assume that the matching starts from 0. */
+ the buffers so that we could assume that the matching starts
+ from 0. */
for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
if (pmatch[reg_idx].rm_so != -1)
{
@@ -869,7 +871,8 @@ prune_impossible_nodes (preg, mctx)
ret = REG_NOMATCH;
goto free_return;
}
- } while (!mctx->state_log[match_last]->halt);
+ } while (mctx->state_log[match_last] == NULL
+ || !mctx->state_log[match_last]->halt);
halt_node = check_halt_state_context (preg,
mctx->state_log[match_last],
mctx, match_last);
@@ -1236,7 +1239,7 @@ pop_fail_stack (fs, pidx, nregs, regs, e
/* Set the positions where the subexpressions are starts/ends to registers
PMATCH.
Note: We assume that pmatch[0] is already set, and
- pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */
+ pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
static reg_errcode_t
set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
--- libc/posix/regcomp.c.jj 2003-11-24 23:49:53.000000000 +0100
+++ libc/posix/regcomp.c 2003-11-25 19:59:13.000000000 +0100
@@ -3213,7 +3213,6 @@ parse_bracket_exp (regexp, dfa, token, s
int sbc_idx;
/* Build a tree for complex bracket. */
dfa->has_mb_node = 1;
- dfa->has_plural_match = 1;
for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx)
if (sbcset[sbc_idx])
break;
@@ -3233,6 +3232,7 @@ parse_bracket_exp (regexp, dfa, token, s
goto parse_bracket_exp_espace;
/* Then join them by ALT node. */
alt_token.type = OP_ALT;
+ dfa->has_plural_match = 1;
work_tree = re_dfa_add_tree_node (dfa, work_tree, mbc_tree, &alt_token);
if (BE (mbc_tree != NULL, 1))
return work_tree;
@@ -3627,6 +3627,7 @@ build_charclass_op (dfa, trans, class_na
goto build_word_op_espace;
/* Then join them by ALT node. */
alt_token.type = OP_ALT;
+ dfa->has_plural_match = 1;
tree = re_dfa_add_tree_node (dfa, tree, mbc_tree, &alt_token);
if (BE (mbc_tree != NULL, 1))
return tree;
--- libc/posix/bug-regex11.c.jj 2003-11-21 23:49:48.000000000 +0100
+++ libc/posix/bug-regex11.c 2003-11-25 20:08:39.000000000 +0100
@@ -54,13 +54,22 @@ struct
{ "(^|foo)bar", "(^|foo)bar", 0, 2, { { 0, 10 }, { -1, -1 } } },
{ "(foo|^)bar", "(foo|^)bar", 0, 2, { { 0, 10 }, { -1, -1 } } },
/* More tests on backreferences. */
+ { "()\\1", "x", REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } },
+ { "()x\\1", "x", REG_EXTENDED, 2, { { 0, 1 }, { 0, 0 } } },
{ "()\\1*\\1*", "", REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } },
{ "([0-9]).*\\1(a*)", "7;7a6", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } },
{ "([0-9]).*\\1(a*)", "7;7a", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } },
+ { "(b)()c\\1", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 1 }, { 1, 1 } } },
+ { "()(b)c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } },
+ { "a(b)()c\\1", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 2 }, { 2, 2 } } },
+ { "a()(b)c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } },
#if 0
/* XXX Not used since they fail so far. */
- { "()(b)\\1c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 1, 2 } } },
+ { "()(b)\\1c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } },
{ "(b())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 2 }, { 0, 1 }, { 1, 1 } } },
+ { "a()(b)\\1c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } },
+ { "a()d(b)\\1c\\2", "adbcb", REG_EXTENDED, 3, { { 0, 5 }, { 1, 1 }, { 2, 3 } } },
+ { "a(b())\\2\\1", "abbbb", REG_EXTENDED, 3, { { 0, 3 }, { 1, 2 }, { 2, 2 } } },
{ "(bb())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 4 }, { 0, 2 }, { 2, 2 } } },
{ "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$",
"level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } },
Jakub
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2003-11-25 19:19 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-11-26 3:24 [PATCH] Small regex tweaks Jakub Jelinek
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).