* [PATCH 1/3] posix: add (failing) test for REG_STARTEND
@ 2023-04-22 2:21 наб
2023-04-22 2:22 ` [PATCH 2/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: наб @ 2023-04-22 2:21 UTC (permalink / raw)
To: libc-alpha
[-- Attachment #1: Type: text/plain, Size: 6149 bytes --]
This test passes on NetBSD, the illumos gate, and musl
with https://www.openwall.com/lists/musl/2023/04/20/2;
it's nothing revolutionary and the behaviour it tests
is largely guaranteed by the 4.4BSD-Lite manual;
nevertheless, it currently fails with
tst-reg-startend.c: ^a: a^@c: no match$
tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
tst-reg-startend.c: ^a: abc: no match$
tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
tst-reg-startend.c: ^a.c$: a^@c: no match$
tst-reg-startend.c: ^a.c$: abc: no match$
tst-reg-startend.c: ^a.*c$: a^@c: no match$
tst-reg-startend.c: ^a.*c$: abc: no match$
tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
tst-reg-startend.c: ^a[^c]c$: abc: no match$
tst-reg-startend.c: ^a..: a^@c: no match$
tst-reg-startend.c: ^a..: abc: no match$
tst-reg-startend.c: ..c: a^@c: no match$
The test may also be compiled stand-alone (-DSTANDALONE)
and on all platforms that have the interface
(hence the macro to initialise regmatch_ts,
which start with pointer fields on the illumos gate),
for ease of testing and inclusion in other test suites.
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
Please keep me in CC, as I'm not subscribed.
posix/Makefile | 1 +
posix/tst-reg-startend.c | 124 +++++++++++++++++++++++++++++++++++++++
2 files changed, 125 insertions(+)
create mode 100644 posix/tst-reg-startend.c
diff --git a/posix/Makefile b/posix/Makefile
index cc77e939ad..24aeb781ca 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -295,6 +295,7 @@ tests := \
tst-posix_spawn-setsid \
tst-preadwrite \
tst-preadwrite64 \
+ tst-reg-startend \
tst-regcomp-truncated \
tst-regex \
tst-regex2 \
diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
new file mode 100644
index 0000000000..ed2be224f4
--- /dev/null
+++ b/posix/tst-reg-startend.c
@@ -0,0 +1,124 @@
+/* Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted.
+
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <assert.h>
+#include <locale.h>
+#include <string.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdbool.h>
+
+
+#define M(s, e) (regmatch_t) {.rm_so = s, .rm_eo = e}
+#define MEQ(l, r) ((l).rm_so == (r).rm_so && (l).rm_eo == (r).rm_eo)
+
+static const regmatch_t bound = M(1, 4);
+
+static const char *const regex_ac[] =
+ {"^a", "c$", "^a.c$", "^a.*c$", "^a[^c]c$", "^a..", "..c", "[^z]c", NULL};
+static const char *const regex_aa[] =
+ {"^", "^a", "a$", "^\\(a\\).\\1$", "^a[^a]*", NULL};
+static const char *const data_ac[] = {"_a\0cdef", "_abcdef"};
+static const char *const data_aa[] = {"_a\0adef", "_abadef"};
+static const regmatch_t results_ac[] =
+ {M(1, 2), M(3, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(2, 4)};
+static const regmatch_t results_aa[] =
+ {M(1, 1), M(1, 2), M(3, 4), M(1, 4), M(1, 3)};
+static_assert(sizeof(regex_ac) / sizeof(*regex_ac) - 1 ==
+ sizeof(results_ac) / sizeof(*results_ac), "");
+static_assert(sizeof(regex_aa) / sizeof(*regex_aa) - 1 ==
+ sizeof(results_aa) / sizeof(*results_aa), "");
+
+
+static bool
+testbunch (const char *const *regexes, const char *const data[static 2],
+ const regmatch_t *results)
+{
+#define BASEERR(data) \
+ err = true, \
+ fprintf (stdout, __FILE__ ": %s: ", *regexes), \
+ fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
+
+ bool err = false;
+ for (; *regexes; ++regexes, ++results)
+ {
+ regex_t rgx;
+ assert (!regcomp (&rgx, *regexes, 0));
+
+ for (size_t i = 0; i < 2; ++i)
+ {
+ regmatch_t match = bound;
+ if (regexec (&rgx, data[i], 1, &match, REG_STARTEND))
+ BASEERR(data), fputs (": no match\n", stdout);
+
+ if (!MEQ(match, *results))
+ BASEERR(data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+ (int)results->rm_so, (int)results->rm_eo,
+ (int)match.rm_so, (int)match.rm_eo);
+ }
+
+ regfree(&rgx);
+ }
+
+ return err;
+}
+
+
+static const char *const ać_data[2] = {"_aaćdef", "_aćdef"};
+static const bool ać_exp[] = {false, true};
+
+static bool
+testać (void)
+{
+ bool err = false;
+ regex_t rgx;
+ const char *const regexes[] = {"ać"};
+ assert (!regcomp (&rgx, *regexes, 0));
+
+ for (size_t i = 0; i < 2; ++i)
+ {
+ regmatch_t match = bound;
+ if (regexec (&rgx, ać_data[i], 1, &match, REG_STARTEND) == ać_exp[i])
+ BASEERR(ać_data), fprintf (stdout, ": %s match\n",
+ ać_exp[i] ? "no" : "yes");
+
+ if (!MEQ(match, bound))
+ BASEERR(ać_data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+ (int)bound.rm_so, (int)bound.rm_eo,
+ (int)match.rm_so, (int)match.rm_eo);
+ }
+
+ regfree(&rgx);
+ return err;
+}
+
+
+static int
+do_test (int argc, char **argv)
+{
+ (void) argc, (void) argv;
+ assert (setlocale (LC_ALL, "C.UTF-8"));
+
+ return testbunch (regex_ac, data_ac, results_ac) ||
+ testbunch (regex_aa, data_aa, results_aa) ||
+ testać ();
+}
+
+
+#ifndef STANDALONE
+#include "../test-skeleton.c"
+#else
+int
+main(int argc, char **argv)
+{
+ return do_test(argc, argv);
+}
+#endif
--
2.30.2
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 2/3] posix: regcomp(): clear RE_DOT_NOT_NULL
2023-04-22 2:21 [PATCH 1/3] posix: add (failing) test for REG_STARTEND наб
@ 2023-04-22 2:22 ` наб
2023-04-22 2:23 ` [PATCH 3/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor наб
2023-04-22 7:11 ` [PATCH 1/3] posix: add (failing) test for REG_STARTEND Andreas Schwab
2 siblings, 0 replies; 5+ messages in thread
From: наб @ 2023-04-22 2:22 UTC (permalink / raw)
To: libc-alpha
[-- Attachment #1: Type: text/plain, Size: 1379 bytes --]
The POSIX API always stops at first NUL so there's no change for that.
The BSD REG_STARTEND API, with its explicit range, can include NULs
within that range, and those NULs are matched with . and [^].
Heretofor, for a string of "a\0c", glibc would match "[^q]c", but not
".c". This is both inconsistent and nonconformant to BSD REG_STARTEND.
With this patch, they're identical like you'd expect, and the
tst-reg-startend.c: ..c: a^@c: no match$
failure is removed.
Another approach would be to remove it from _RE_SYNTAX_POSIX_COMMON,
but it's unclear to me what the custody chain is like for that and what
other regex APIs glibc offers that could be affected by this.
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
Please keep me in CC, as I'm not subscribed.
posix/regcomp.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 647b18ba9e..cbd9bfc673 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -462,7 +462,7 @@ regcomp (regex_t *__restrict preg, const char *__restrict pattern, int cflags)
{
reg_errcode_t ret;
reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
- : RE_SYNTAX_POSIX_BASIC);
+ : RE_SYNTAX_POSIX_BASIC) & ~RE_DOT_NOT_NULL;
preg->buffer = NULL;
preg->allocated = 0;
--
2.30.2
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 3/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor
2023-04-22 2:21 [PATCH 1/3] posix: add (failing) test for REG_STARTEND наб
2023-04-22 2:22 ` [PATCH 2/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
@ 2023-04-22 2:23 ` наб
2023-04-22 7:11 ` [PATCH 1/3] posix: add (failing) test for REG_STARTEND Andreas Schwab
2 siblings, 0 replies; 5+ messages in thread
From: наб @ 2023-04-22 2:23 UTC (permalink / raw)
To: libc-alpha
[-- Attachment #1: Type: text/plain, Size: 3402 bytes --]
re_search_internal () starts with
/* If initial states with non-begbuf contexts have no elements,
the regex must be anchored. If preg->newline_anchor is set,
we'll never use init_state_nl, so do not check it. */
if (dfa->init_state->nodes.nelem == 0
&& dfa->init_state_word->nodes.nelem == 0
&& (dfa->init_state_nl->nodes.nelem == 0
|| !preg->newline_anchor))
{
if (start != 0 && last_start != 0)
return REG_NOMATCH;
start = last_start = 0;
}
and heretofor start and last_start (for example when "abc", {1, 2},
so matching just the "b") were != 0, and the return was taken for a "^b"
regex, which is erroneous.
Fix this by giving re_search_internal (string+rm_so, start=0),
then fixing up the returned matches in an after-pass.
This brings us to compatibility with the BSD spec and implementations.
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
Please keep me in CC, as I'm not subscribed.
posix/regexec.c | 41 ++++++++++++++++++++++++++++-------------
1 file changed, 28 insertions(+), 13 deletions(-)
diff --git a/posix/regexec.c b/posix/regexec.c
index bd0cd412d0..2ef868e1f6 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -187,38 +187,53 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
string; if REG_NOTEOL is set, then $ does not match at the end.
Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if
- EFLAGS is invalid. */
+ EFLAGS is invalid.
+
+ If REG_STARTEND, the bounds are
+ [STRING + PMATCH->rm_so, STRING + PMATCH->rm_eo)
+ instead of the usual
+ [STRING, STRING + strlen(STRING)),
+ but returned matches are still referenced to STRING,
+ and matching is unaffected (i.e. "abc", {1, 2} matches regex "^b$").
+ re_search_internal () has a built-in assumption of
+ (start != 0) <=> (^ doesn't match), so give it a truncated view
+ and fix up the matches afterward. */
int
regexec (const regex_t *__restrict preg, const char *__restrict string,
size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
{
reg_errcode_t err;
- Idx start, length;
+ Idx startoff = 0, length;
re_dfa_t *dfa = preg->buffer;
+ size_t i = 0;
if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
return REG_BADPAT;
if (eflags & REG_STARTEND)
{
- start = pmatch[0].rm_so;
- length = pmatch[0].rm_eo;
+ startoff = pmatch[0].rm_so;
+ string += startoff;
+ length = pmatch[0].rm_eo - startoff;
}
else
- {
- start = 0;
- length = strlen (string);
- }
+ length = strlen (string);
lock_lock (dfa->lock);
if (preg->no_sub)
- err = re_search_internal (preg, string, length, start, length,
- length, 0, NULL, eflags);
- else
- err = re_search_internal (preg, string, length, start, length,
- length, nmatch, pmatch, eflags);
+ nmatch = 0;
+ err = re_search_internal (preg, string, length, 0, length,
+ length, nmatch, pmatch, eflags);
lock_unlock (dfa->lock);
+
+ if (err == REG_NOERROR && startoff)
+ for (i = 0; i < nmatch; ++i)
+ if (pmatch[i].rm_so != -1)
+ {
+ pmatch[i].rm_so += startoff;
+ pmatch[i].rm_eo += startoff;
+ }
return err != REG_NOERROR;
}
--
2.30.2
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/3] posix: add (failing) test for REG_STARTEND
2023-04-22 2:21 [PATCH 1/3] posix: add (failing) test for REG_STARTEND наб
2023-04-22 2:22 ` [PATCH 2/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
2023-04-22 2:23 ` [PATCH 3/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor наб
@ 2023-04-22 7:11 ` Andreas Schwab
2023-04-22 11:12 ` [PATCH v2 " наб
2 siblings, 1 reply; 5+ messages in thread
From: Andreas Schwab @ 2023-04-22 7:11 UTC (permalink / raw)
To: наб via Libc-alpha; +Cc: наб
tst-reg-startend.c:75:27: error: stray ‘\304’ in program
static const char *const a��_data[2] = {"_aaćdef", "_aćdef"};
^
tst-reg-startend.c:75:28: error: stray ‘\207’ in program
static const char *const a��_data[2] = {"_aaćdef", "_aćdef"};
^
tst-reg-startend.c:75:29: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘_data’
static const char *const ać_data[2] = {"_aaćdef", "_aćdef"};
^~~~~
tst-reg-startend.c:76:20: error: stray ‘\304’ in program
static const bool a��_exp[] = {false, true};
^
tst-reg-startend.c:76:21: error: stray ‘\207’ in program
static const bool a��_exp[] = {false, true};
^
tst-reg-startend.c:76:22: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘_exp’
static const bool ać_exp[] = {false, true};
^~~~
tst-reg-startend.c:79:6: error: stray ‘\304’ in program
testa�� (void)
^
tst-reg-startend.c:79:7: error: stray ‘\207’ in program
testa�� (void)
^
tst-reg-startend.c: In function ‘testa’:
tst-reg-startend.c:89:27: error: stray ‘\304’ in program
if (regexec (&rgx, a��_data[i], 1, &match, REG_STARTEND) == ać_exp[i])
^
tst-reg-startend.c:89:28: error: stray ‘\207’ in program
if (regexec (&rgx, a��_data[i], 1, &match, REG_STARTEND) == ać_exp[i])
^
tst-reg-startend.c:89:26: error: ‘a’ undeclared (first use in this function)
if (regexec (&rgx, ać_data[i], 1, &match, REG_STARTEND) == ać_exp[i])
^
tst-reg-startend.c:89:26: note: each undeclared identifier is reported only once for each function it appears in
tst-reg-startend.c:89:29: error: expected ‘)’ before ‘_data’
if (regexec (&rgx, ać_data[i], 1, &match, REG_STARTEND) == ać_exp[i])
^~~~~
tst-reg-startend.c:89:11: error: too few arguments to function ‘regexec’
if (regexec (&rgx, ać_data[i], 1, &match, REG_STARTEND) == ać_exp[i])
^~~~~~~
In file included from ../include/regex.h:2:0,
from tst-reg-startend.c:15:
../posix/regex.h:679:12: note: declared here
extern int regexec (const regex_t *_Restrict_ __preg,
^~~~~~~
tst-reg-startend.c:89:68: error: stray ‘\304’ in program
if (regexec (&rgx, ać_data[i], 1, &match, REG_STARTEND) == a��_exp[i])
^
tst-reg-startend.c:89:69: error: stray ‘\207’ in program
if (regexec (&rgx, ać_data[i], 1, &match, REG_STARTEND) == a��_exp[i])
^
tst-reg-startend.c:89:70: error: expected ‘)’ before ‘_exp’
if (regexec (&rgx, ać_data[i], 1, &match, REG_STARTEND) == ać_exp[i])
^~~~
tst-reg-startend.c:90:18: error: stray ‘\304’ in program
BASEERR(a��_data), fprintf (stdout, ": %s match\n",
^
tst-reg-startend.c:48:13: note: in definition of macro ‘BASEERR’
fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
^~~~
tst-reg-startend.c:90:19: error: stray ‘\207’ in program
BASEERR(a��_data), fprintf (stdout, ": %s match\n",
^
tst-reg-startend.c:48:13: note: in definition of macro ‘BASEERR’
fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
^~~~
tst-reg-startend.c:90:20: error: expected ‘)’ before ‘_data’
BASEERR(ać_data), fprintf (stdout, ": %s match\n",
^
tst-reg-startend.c:48:13: note: in definition of macro ‘BASEERR’
fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
^~~~
tst-reg-startend.c:48:5: error: too few arguments to function ‘fwrite’
fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
^
tst-reg-startend.c:90:9: note: in expansion of macro ‘BASEERR’
BASEERR(ać_data), fprintf (stdout, ": %s match\n",
^~~~~~~
In file included from ../include/stdio.h:14:0,
from tst-reg-startend.c:16:
../libio/stdio.h:739:15: note: declared here
extern size_t fwrite (const void *__restrict __ptr, size_t __size,
^~~~~~
tst-reg-startend.c:91:37: error: stray ‘\304’ in program
a��_exp[i] ? "no" : "yes");
^
tst-reg-startend.c:91:38: error: stray ‘\207’ in program
a��_exp[i] ? "no" : "yes");
^
tst-reg-startend.c:91:39: error: expected ‘)’ before ‘_exp’
ać_exp[i] ? "no" : "yes");
^~~~
tst-reg-startend.c:90:26: error: left-hand operand of comma expression has no effect [-Werror=unused-value]
BASEERR(ać_data), fprintf (stdout, ": %s match\n",
^
tst-reg-startend.c:94:18: error: stray ‘\304’ in program
BASEERR(a��_data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
^
tst-reg-startend.c:48:13: note: in definition of macro ‘BASEERR’
fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
^~~~
tst-reg-startend.c:94:19: error: stray ‘\207’ in program
BASEERR(a��_data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
^
tst-reg-startend.c:48:13: note: in definition of macro ‘BASEERR’
fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
^~~~
tst-reg-startend.c:94:20: error: expected ‘)’ before ‘_data’
BASEERR(ać_data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
^
tst-reg-startend.c:48:13: note: in definition of macro ‘BASEERR’
fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
^~~~
tst-reg-startend.c:48:5: error: too few arguments to function ‘fwrite’
fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
^
tst-reg-startend.c:94:9: note: in expansion of macro ‘BASEERR’
BASEERR(ać_data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
^~~~~~~
In file included from ../include/stdio.h:14:0,
from tst-reg-startend.c:16:
../libio/stdio.h:739:15: note: declared here
extern size_t fwrite (const void *__restrict __ptr, size_t __size,
^~~~~~
tst-reg-startend.c:94:26: error: left-hand operand of comma expression has no effect [-Werror=unused-value]
BASEERR(ać_data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
^
tst-reg-startend.c: In function ‘do_test’:
tst-reg-startend.c:112:15: error: stray ‘\304’ in program
testa�� ();
^
tst-reg-startend.c:112:16: error: stray ‘\207’ in program
testa�� ();
^
--
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1
"And now for something completely different."
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v2 1/3] posix: add (failing) test for REG_STARTEND
2023-04-22 7:11 ` [PATCH 1/3] posix: add (failing) test for REG_STARTEND Andreas Schwab
@ 2023-04-22 11:12 ` наб
0 siblings, 0 replies; 5+ messages in thread
From: наб @ 2023-04-22 11:12 UTC (permalink / raw)
To: Andreas Schwab; +Cc: libc-alpha
[-- Attachment #1: Type: text/plain, Size: 6300 bytes --]
This test passes on NetBSD, the illumos gate, and musl
with https://www.openwall.com/lists/musl/2023/04/20/2;
it's nothing revolutionary and the behaviour it tests
is largely guaranteed by the 4.4BSD-Lite manual;
nevertheless, it currently fails with
tst-reg-startend.c: ^a: a^@c: no match$
tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
tst-reg-startend.c: ^a: abc: no match$
tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
tst-reg-startend.c: ^a.c$: a^@c: no match$
tst-reg-startend.c: ^a.c$: abc: no match$
tst-reg-startend.c: ^a.*c$: a^@c: no match$
tst-reg-startend.c: ^a.*c$: abc: no match$
tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
tst-reg-startend.c: ^a[^c]c$: abc: no match$
tst-reg-startend.c: ^a..: a^@c: no match$
tst-reg-startend.c: ^a..: abc: no match$
tst-reg-startend.c: ..c: a^@c: no match$
The test may also be compiled stand-alone (-DSTANDALONE)
and on all platforms that have the interface
(hence the macro to initialise regmatch_ts,
which start with pointer fields on the illumos gate),
for ease of testing and inclusion in other test suites.
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
Hm, yeah; worked for me on clang trunk and bullseye gcc so I left it,
but other platforms hating the UTF-8 identifier isn't entirely unexpected.
Changed to "mb", with multibyte characters only in strings.
posix/Makefile | 1 +
posix/tst-reg-startend.c | 124 +++++++++++++++++++++++++++++++++++++++
2 files changed, 125 insertions(+)
create mode 100644 posix/tst-reg-startend.c
diff --git a/posix/Makefile b/posix/Makefile
index cc77e939ad..24aeb781ca 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -295,6 +295,7 @@ tests := \
tst-posix_spawn-setsid \
tst-preadwrite \
tst-preadwrite64 \
+ tst-reg-startend \
tst-regcomp-truncated \
tst-regex \
tst-regex2 \
diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
new file mode 100644
index 0000000000..c3bfac0359
--- /dev/null
+++ b/posix/tst-reg-startend.c
@@ -0,0 +1,124 @@
+/* Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted.
+
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <assert.h>
+#include <locale.h>
+#include <string.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdbool.h>
+
+
+#define M(s, e) (regmatch_t) {.rm_so = s, .rm_eo = e}
+#define MEQ(l, r) ((l).rm_so == (r).rm_so && (l).rm_eo == (r).rm_eo)
+
+static const regmatch_t bound = M(1, 4);
+
+static const char *const regex_ac[] =
+ {"^a", "c$", "^a.c$", "^a.*c$", "^a[^c]c$", "^a..", "..c", "[^z]c", NULL};
+static const char *const regex_aa[] =
+ {"^", "^a", "a$", "^\\(a\\).\\1$", "^a[^a]*", NULL};
+static const char *const data_ac[] = {"_a\0cdef", "_abcdef"};
+static const char *const data_aa[] = {"_a\0adef", "_abadef"};
+static const regmatch_t results_ac[] =
+ {M(1, 2), M(3, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(2, 4)};
+static const regmatch_t results_aa[] =
+ {M(1, 1), M(1, 2), M(3, 4), M(1, 4), M(1, 3)};
+static_assert(sizeof(regex_ac) / sizeof(*regex_ac) - 1 ==
+ sizeof(results_ac) / sizeof(*results_ac), "");
+static_assert(sizeof(regex_aa) / sizeof(*regex_aa) - 1 ==
+ sizeof(results_aa) / sizeof(*results_aa), "");
+
+
+static bool
+testbunch (const char *const *regexes, const char *const data[static 2],
+ const regmatch_t *results)
+{
+#define BASEERR(data) \
+ err = true, \
+ fprintf (stdout, __FILE__ ": %s: ", *regexes), \
+ fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
+
+ bool err = false;
+ for (; *regexes; ++regexes, ++results)
+ {
+ regex_t rgx;
+ assert (!regcomp (&rgx, *regexes, 0));
+
+ for (size_t i = 0; i < 2; ++i)
+ {
+ regmatch_t match = bound;
+ if (regexec (&rgx, data[i], 1, &match, REG_STARTEND))
+ BASEERR(data), fputs (": no match\n", stdout);
+
+ if (!MEQ(match, *results))
+ BASEERR(data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+ (int)results->rm_so, (int)results->rm_eo,
+ (int)match.rm_so, (int)match.rm_eo);
+ }
+
+ regfree(&rgx);
+ }
+
+ return err;
+}
+
+
+static const char *const mb_data[2] = {"_aaćdef", "_aćdef"};
+static const bool mb_exp[] = {false, true};
+
+static bool
+testmb (void)
+{
+ bool err = false;
+ regex_t rgx;
+ const char *const regexes[] = {"ać"};
+ assert (!regcomp (&rgx, *regexes, 0));
+
+ for (size_t i = 0; i < 2; ++i)
+ {
+ regmatch_t match = bound;
+ if (regexec (&rgx, mb_data[i], 1, &match, REG_STARTEND) == mb_exp[i])
+ BASEERR(mb_data), fprintf (stdout, ": %s match\n",
+ mb_exp[i] ? "no" : "yes");
+
+ if (!MEQ(match, bound))
+ BASEERR(mb_data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+ (int)bound.rm_so, (int)bound.rm_eo,
+ (int)match.rm_so, (int)match.rm_eo);
+ }
+
+ regfree(&rgx);
+ return err;
+}
+
+
+static int
+do_test (int argc, char **argv)
+{
+ (void) argc, (void) argv;
+ assert (setlocale (LC_ALL, "C.UTF-8"));
+
+ return testbunch (regex_ac, data_ac, results_ac) ||
+ testbunch (regex_aa, data_aa, results_aa) ||
+ testmb ();
+}
+
+
+#ifndef STANDALONE
+#include "../test-skeleton.c"
+#else
+int
+main(int argc, char **argv)
+{
+ return do_test(argc, argv);
+}
+#endif
--
2.30.2
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-04-22 11:12 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-22 2:21 [PATCH 1/3] posix: add (failing) test for REG_STARTEND наб
2023-04-22 2:22 ` [PATCH 2/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
2023-04-22 2:23 ` [PATCH 3/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor наб
2023-04-22 7:11 ` [PATCH 1/3] posix: add (failing) test for REG_STARTEND Andreas Schwab
2023-04-22 11:12 ` [PATCH v2 " наб
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).