public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v4 1/3] posix: add (failing) test for REG_STARTEND
@ 2023-05-07 22:56 наб
  2023-05-07 22:56 ` [PATCH v4 2/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
                   ` (5 more replies)
  0 siblings, 6 replies; 16+ messages in thread
From: наб @ 2023-05-07 22:56 UTC (permalink / raw)
  Cc: libc-alpha

[-- Attachment #1: Type: text/plain, Size: 6133 bytes --]

This test passes on NetBSD, the illumos gate, and musl
with https://www.openwall.com/lists/musl/2023/04/20/2;
it's nothing revolutionary and the behaviour it tests
is largely guaranteed by the 4.4BSD-Lite manual;
nevertheless, it currently fails with
  tst-reg-startend.c: ^a: a^@c: no match$
  tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
  tst-reg-startend.c: ^a: abc: no match$
  tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
  tst-reg-startend.c: ^a.c$: a^@c: no match$
  tst-reg-startend.c: ^a.c$: abc: no match$
  tst-reg-startend.c: ^a.*c$: a^@c: no match$
  tst-reg-startend.c: ^a.*c$: abc: no match$
  tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
  tst-reg-startend.c: ^a[^c]c$: abc: no match$
  tst-reg-startend.c: ^a..: a^@c: no match$
  tst-reg-startend.c: ^a..: abc: no match$
  tst-reg-startend.c: ..c: a^@c: no match$

The test may also be compiled stand-alone (-DSTANDALONE)
and on all platforms that have the interface
(hence the macro to initialise regmatch_ts,
 which start with pointer fields on the illumos gate),
for ease of testing and inclusion in other test suites.

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
Resending after a week; clean rebase.

 posix/Makefile           |   1 +
 posix/tst-reg-startend.c | 124 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 125 insertions(+)
 create mode 100644 posix/tst-reg-startend.c

diff --git a/posix/Makefile b/posix/Makefile
index cc77e939ad..24aeb781ca 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -295,6 +295,7 @@ tests := \
   tst-posix_spawn-setsid \
   tst-preadwrite \
   tst-preadwrite64 \
+  tst-reg-startend \
   tst-regcomp-truncated \
   tst-regex \
   tst-regex2 \
diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
new file mode 100644
index 0000000000..c3bfac0359
--- /dev/null
+++ b/posix/tst-reg-startend.c
@@ -0,0 +1,124 @@
+/* Permission to use, copy, modify, and/or distribute this software for any
+   purpose with or without fee is hereby granted.
+
+   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  */
+
+#include <assert.h>
+#include <locale.h>
+#include <string.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdbool.h>
+
+
+#define M(s, e) (regmatch_t) {.rm_so = s, .rm_eo = e}
+#define MEQ(l, r) ((l).rm_so == (r).rm_so && (l).rm_eo == (r).rm_eo)
+
+static const regmatch_t bound = M(1, 4);
+
+static const char *const regex_ac[] =
+  {"^a", "c$", "^a.c$", "^a.*c$", "^a[^c]c$", "^a..", "..c", "[^z]c", NULL};
+static const char *const regex_aa[] =
+  {"^", "^a", "a$", "^\\(a\\).\\1$", "^a[^a]*", NULL};
+static const char *const data_ac[] = {"_a\0cdef", "_abcdef"};
+static const char *const data_aa[] = {"_a\0adef", "_abadef"};
+static const regmatch_t results_ac[] =
+  {M(1, 2), M(3, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(2, 4)};
+static const regmatch_t results_aa[] =
+  {M(1, 1), M(1, 2), M(3, 4), M(1, 4), M(1, 3)};
+static_assert(sizeof(regex_ac) / sizeof(*regex_ac) - 1 ==
+              sizeof(results_ac) / sizeof(*results_ac), "");
+static_assert(sizeof(regex_aa) / sizeof(*regex_aa) - 1 ==
+              sizeof(results_aa) / sizeof(*results_aa), "");
+
+
+static bool
+testbunch (const char *const *regexes, const char *const data[static 2],
+           const regmatch_t *results)
+{
+#define BASEERR(data)                              \
+  err = true,                                      \
+    fprintf (stdout, __FILE__ ": %s: ", *regexes), \
+    fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
+
+  bool err = false;
+  for (; *regexes; ++regexes, ++results)
+    {
+      regex_t rgx;
+      assert (!regcomp (&rgx, *regexes, 0));
+
+      for (size_t i = 0; i < 2; ++i)
+        {
+          regmatch_t match = bound;
+          if (regexec (&rgx, data[i], 1, &match, REG_STARTEND))
+            BASEERR(data), fputs (": no match\n", stdout);
+
+          if (!MEQ(match, *results))
+            BASEERR(data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+                                    (int)results->rm_so, (int)results->rm_eo,
+                                    (int)match.rm_so, (int)match.rm_eo);
+        }
+
+      regfree(&rgx);
+    }
+
+  return err;
+}
+
+
+static const char *const mb_data[2] = {"_aaćdef", "_aćdef"};
+static const bool mb_exp[] = {false, true};
+
+static bool
+testmb (void)
+{
+  bool err = false;
+  regex_t rgx;
+  const char *const regexes[] = {"ać"};
+  assert (!regcomp (&rgx, *regexes, 0));
+
+  for (size_t i = 0; i < 2; ++i)
+    {
+      regmatch_t match = bound;
+      if (regexec (&rgx, mb_data[i], 1, &match, REG_STARTEND) == mb_exp[i])
+        BASEERR(mb_data), fprintf (stdout, ": %s match\n",
+                                   mb_exp[i] ? "no" : "yes");
+
+      if (!MEQ(match, bound))
+        BASEERR(mb_data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+                                   (int)bound.rm_so, (int)bound.rm_eo,
+                                   (int)match.rm_so, (int)match.rm_eo);
+    }
+
+  regfree(&rgx);
+  return err;
+}
+
+
+static int
+do_test (int argc, char **argv)
+{
+  (void) argc, (void) argv;
+  assert (setlocale (LC_ALL, "C.UTF-8"));
+
+  return testbunch (regex_ac, data_ac, results_ac) ||
+         testbunch (regex_aa, data_aa, results_aa) ||
+         testmb ();
+}
+
+
+#ifndef STANDALONE
+#include "../test-skeleton.c"
+#else
+int
+main(int argc, char **argv)
+{
+  return do_test(argc, argv);
+}
+#endif
-- 
2.30.2


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v4 2/3] posix: regcomp(): clear RE_DOT_NOT_NULL
  2023-05-07 22:56 [PATCH v4 1/3] posix: add (failing) test for REG_STARTEND наб
@ 2023-05-07 22:56 ` наб
  2023-05-07 22:56 ` [PATCH v4 3/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor наб
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: наб @ 2023-05-07 22:56 UTC (permalink / raw)
  Cc: libc-alpha

[-- Attachment #1: Type: text/plain, Size: 1331 bytes --]

The POSIX API always stops at first NUL so there's no change for that.

The BSD REG_STARTEND API, with its explicit range, can include NULs
within that range, and those NULs are matched with . and [^].

Heretofor, for a string of "a\0c", glibc would match "[^q]c", but not
".c". This is both inconsistent and nonconformant to BSD REG_STARTEND.

With this patch, they're identical like you'd expect, and the
  tst-reg-startend.c: ..c: a^@c: no match$
failure is removed.

Another approach would be to remove it from _RE_SYNTAX_POSIX_COMMON,
but it's unclear to me what the custody chain is like for that and what
other regex APIs glibc offers that could be affected by this.

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
 posix/regcomp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/posix/regcomp.c b/posix/regcomp.c
index 12650714c0..a928ef6c2d 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -462,7 +462,7 @@ regcomp (regex_t *__restrict preg, const char *__restrict pattern, int cflags)
 {
   reg_errcode_t ret;
   reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
-			 : RE_SYNTAX_POSIX_BASIC);
+			 : RE_SYNTAX_POSIX_BASIC) & ~RE_DOT_NOT_NULL;
 
   preg->buffer = NULL;
   preg->allocated = 0;
-- 
2.30.2


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v4 3/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor
  2023-05-07 22:56 [PATCH v4 1/3] posix: add (failing) test for REG_STARTEND наб
  2023-05-07 22:56 ` [PATCH v4 2/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
@ 2023-05-07 22:56 ` наб
  2023-05-29 18:11   ` Adhemerval Zanella Netto
  2023-05-29 13:22 ` [PATCH v5 1/3] posix: add (failing) test for REG_STARTEND наб
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: наб @ 2023-05-07 22:56 UTC (permalink / raw)
  Cc: libc-alpha

[-- Attachment #1: Type: text/plain, Size: 3354 bytes --]

re_search_internal () starts with
  /* If initial states with non-begbuf contexts have no elements,
     the regex must be anchored.  If preg->newline_anchor is set,
     we'll never use init_state_nl, so do not check it.  */
  if (dfa->init_state->nodes.nelem == 0
      && dfa->init_state_word->nodes.nelem == 0
      && (dfa->init_state_nl->nodes.nelem == 0
	  || !preg->newline_anchor))
    {
      if (start != 0 && last_start != 0)
        return REG_NOMATCH;
      start = last_start = 0;
    }
and heretofor start and last_start (for example when "abc", {1, 2},
so matching just the "b") were != 0, and the return was taken for a "^b"
regex, which is erroneous.

Fix this by giving re_search_internal (string+rm_so, start=0),
then fixing up the returned matches in an after-pass.

This brings us to compatibility with the BSD spec and implementations.

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
 posix/regexec.c | 41 ++++++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/posix/regexec.c b/posix/regexec.c
index bd0cd412d0..2ef868e1f6 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -187,38 +187,53 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
    string; if REG_NOTEOL is set, then $ does not match at the end.
 
    Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if
-   EFLAGS is invalid.  */
+   EFLAGS is invalid.
+
+   If REG_STARTEND, the bounds are
+     [STRING + PMATCH->rm_so, STRING + PMATCH->rm_eo)
+   instead of the usual
+     [STRING, STRING + strlen(STRING)),
+   but returned matches are still referenced to STRING,
+   and matching is unaffected (i.e. "abc", {1, 2} matches regex "^b$").
+   re_search_internal () has a built-in assumption of
+   (start != 0) <=> (^ doesn't match), so give it a truncated view
+   and fix up the matches afterward.  */
 
 int
 regexec (const regex_t *__restrict preg, const char *__restrict string,
 	 size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
 {
   reg_errcode_t err;
-  Idx start, length;
+  Idx startoff = 0, length;
   re_dfa_t *dfa = preg->buffer;
+  size_t i = 0;
 
   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
     return REG_BADPAT;
 
   if (eflags & REG_STARTEND)
     {
-      start = pmatch[0].rm_so;
-      length = pmatch[0].rm_eo;
+      startoff = pmatch[0].rm_so;
+      string += startoff;
+      length = pmatch[0].rm_eo - startoff;
     }
   else
-    {
-      start = 0;
-      length = strlen (string);
-    }
+    length = strlen (string);
 
   lock_lock (dfa->lock);
   if (preg->no_sub)
-    err = re_search_internal (preg, string, length, start, length,
-			      length, 0, NULL, eflags);
-  else
-    err = re_search_internal (preg, string, length, start, length,
-			      length, nmatch, pmatch, eflags);
+    nmatch = 0;
+  err = re_search_internal (preg, string, length, 0, length,
+			    length, nmatch, pmatch, eflags);
   lock_unlock (dfa->lock);
+
+  if (err == REG_NOERROR && startoff)
+    for (i = 0; i < nmatch; ++i)
+      if (pmatch[i].rm_so != -1)
+	{
+	  pmatch[i].rm_so += startoff;
+	  pmatch[i].rm_eo += startoff;
+	}
   return err != REG_NOERROR;
 }
 
-- 
2.30.2

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v5 1/3] posix: add (failing) test for REG_STARTEND
  2023-05-07 22:56 [PATCH v4 1/3] posix: add (failing) test for REG_STARTEND наб
  2023-05-07 22:56 ` [PATCH v4 2/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
  2023-05-07 22:56 ` [PATCH v4 3/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor наб
@ 2023-05-29 13:22 ` наб
  2023-05-29 13:22 ` [PATCH v5 2/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: наб @ 2023-05-29 13:22 UTC (permalink / raw)
  Cc: libc-alpha

[-- Attachment #1: Type: text/plain, Size: 6092 bytes --]

This test passes on NetBSD, the illumos gate, and musl
with https://www.openwall.com/lists/musl/2023/04/20/2;
it's nothing revolutionary and the behaviour it tests
is largely guaranteed by the 4.4BSD-Lite manual;
nevertheless, it currently fails with
  tst-reg-startend.c: ^a: a^@c: no match$
  tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
  tst-reg-startend.c: ^a: abc: no match$
  tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
  tst-reg-startend.c: ^a.c$: a^@c: no match$
  tst-reg-startend.c: ^a.c$: abc: no match$
  tst-reg-startend.c: ^a.*c$: a^@c: no match$
  tst-reg-startend.c: ^a.*c$: abc: no match$
  tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
  tst-reg-startend.c: ^a[^c]c$: abc: no match$
  tst-reg-startend.c: ^a..: a^@c: no match$
  tst-reg-startend.c: ^a..: abc: no match$
  tst-reg-startend.c: ..c: a^@c: no match$

The test may also be compiled stand-alone (-DSTANDALONE)
and on all platforms that have the interface
(hence the macro to initialise regmatch_ts,
 which start with pointer fields on the illumos gate),
for ease of testing and inclusion in other test suites.

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
 posix/Makefile           |   1 +
 posix/tst-reg-startend.c | 124 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 125 insertions(+)
 create mode 100644 posix/tst-reg-startend.c

diff --git a/posix/Makefile b/posix/Makefile
index e19b74cd67..abc0ff1f60 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -295,6 +295,7 @@ tests := \
   tst-posix_spawn-setsid \
   tst-preadwrite \
   tst-preadwrite64 \
+  tst-reg-startend \
   tst-regcomp-truncated \
   tst-regex \
   tst-regex2 \
diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
new file mode 100644
index 0000000000..c3bfac0359
--- /dev/null
+++ b/posix/tst-reg-startend.c
@@ -0,0 +1,124 @@
+/* Permission to use, copy, modify, and/or distribute this software for any
+   purpose with or without fee is hereby granted.
+
+   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  */
+
+#include <assert.h>
+#include <locale.h>
+#include <string.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdbool.h>
+
+
+#define M(s, e) (regmatch_t) {.rm_so = s, .rm_eo = e}
+#define MEQ(l, r) ((l).rm_so == (r).rm_so && (l).rm_eo == (r).rm_eo)
+
+static const regmatch_t bound = M(1, 4);
+
+static const char *const regex_ac[] =
+  {"^a", "c$", "^a.c$", "^a.*c$", "^a[^c]c$", "^a..", "..c", "[^z]c", NULL};
+static const char *const regex_aa[] =
+  {"^", "^a", "a$", "^\\(a\\).\\1$", "^a[^a]*", NULL};
+static const char *const data_ac[] = {"_a\0cdef", "_abcdef"};
+static const char *const data_aa[] = {"_a\0adef", "_abadef"};
+static const regmatch_t results_ac[] =
+  {M(1, 2), M(3, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(2, 4)};
+static const regmatch_t results_aa[] =
+  {M(1, 1), M(1, 2), M(3, 4), M(1, 4), M(1, 3)};
+static_assert(sizeof(regex_ac) / sizeof(*regex_ac) - 1 ==
+              sizeof(results_ac) / sizeof(*results_ac), "");
+static_assert(sizeof(regex_aa) / sizeof(*regex_aa) - 1 ==
+              sizeof(results_aa) / sizeof(*results_aa), "");
+
+
+static bool
+testbunch (const char *const *regexes, const char *const data[static 2],
+           const regmatch_t *results)
+{
+#define BASEERR(data)                              \
+  err = true,                                      \
+    fprintf (stdout, __FILE__ ": %s: ", *regexes), \
+    fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
+
+  bool err = false;
+  for (; *regexes; ++regexes, ++results)
+    {
+      regex_t rgx;
+      assert (!regcomp (&rgx, *regexes, 0));
+
+      for (size_t i = 0; i < 2; ++i)
+        {
+          regmatch_t match = bound;
+          if (regexec (&rgx, data[i], 1, &match, REG_STARTEND))
+            BASEERR(data), fputs (": no match\n", stdout);
+
+          if (!MEQ(match, *results))
+            BASEERR(data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+                                    (int)results->rm_so, (int)results->rm_eo,
+                                    (int)match.rm_so, (int)match.rm_eo);
+        }
+
+      regfree(&rgx);
+    }
+
+  return err;
+}
+
+
+static const char *const mb_data[2] = {"_aaćdef", "_aćdef"};
+static const bool mb_exp[] = {false, true};
+
+static bool
+testmb (void)
+{
+  bool err = false;
+  regex_t rgx;
+  const char *const regexes[] = {"ać"};
+  assert (!regcomp (&rgx, *regexes, 0));
+
+  for (size_t i = 0; i < 2; ++i)
+    {
+      regmatch_t match = bound;
+      if (regexec (&rgx, mb_data[i], 1, &match, REG_STARTEND) == mb_exp[i])
+        BASEERR(mb_data), fprintf (stdout, ": %s match\n",
+                                   mb_exp[i] ? "no" : "yes");
+
+      if (!MEQ(match, bound))
+        BASEERR(mb_data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+                                   (int)bound.rm_so, (int)bound.rm_eo,
+                                   (int)match.rm_so, (int)match.rm_eo);
+    }
+
+  regfree(&rgx);
+  return err;
+}
+
+
+static int
+do_test (int argc, char **argv)
+{
+  (void) argc, (void) argv;
+  assert (setlocale (LC_ALL, "C.UTF-8"));
+
+  return testbunch (regex_ac, data_ac, results_ac) ||
+         testbunch (regex_aa, data_aa, results_aa) ||
+         testmb ();
+}
+
+
+#ifndef STANDALONE
+#include "../test-skeleton.c"
+#else
+int
+main(int argc, char **argv)
+{
+  return do_test(argc, argv);
+}
+#endif
-- 
2.30.2


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v5 2/3] posix: regcomp(): clear RE_DOT_NOT_NULL
  2023-05-07 22:56 [PATCH v4 1/3] posix: add (failing) test for REG_STARTEND наб
                   ` (2 preceding siblings ...)
  2023-05-29 13:22 ` [PATCH v5 1/3] posix: add (failing) test for REG_STARTEND наб
@ 2023-05-29 13:22 ` наб
  2023-05-29 13:22 ` [PATCH v5 3/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor наб
  2023-05-29 17:37 ` [PATCH v4 1/3] posix: add (failing) test for REG_STARTEND Adhemerval Zanella Netto
  5 siblings, 0 replies; 16+ messages in thread
From: наб @ 2023-05-29 13:22 UTC (permalink / raw)
  Cc: libc-alpha

[-- Attachment #1: Type: text/plain, Size: 1331 bytes --]

The POSIX API always stops at first NUL so there's no change for that.

The BSD REG_STARTEND API, with its explicit range, can include NULs
within that range, and those NULs are matched with . and [^].

Heretofor, for a string of "a\0c", glibc would match "[^q]c", but not
".c". This is both inconsistent and nonconformant to BSD REG_STARTEND.

With this patch, they're identical like you'd expect, and the
  tst-reg-startend.c: ..c: a^@c: no match$
failure is removed.

Another approach would be to remove it from _RE_SYNTAX_POSIX_COMMON,
but it's unclear to me what the custody chain is like for that and what
other regex APIs glibc offers that could be affected by this.

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
 posix/regcomp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/posix/regcomp.c b/posix/regcomp.c
index 12650714c0..a928ef6c2d 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -462,7 +462,7 @@ regcomp (regex_t *__restrict preg, const char *__restrict pattern, int cflags)
 {
   reg_errcode_t ret;
   reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
-			 : RE_SYNTAX_POSIX_BASIC);
+			 : RE_SYNTAX_POSIX_BASIC) & ~RE_DOT_NOT_NULL;
 
   preg->buffer = NULL;
   preg->allocated = 0;
-- 
2.30.2


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v5 3/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor
  2023-05-07 22:56 [PATCH v4 1/3] posix: add (failing) test for REG_STARTEND наб
                   ` (3 preceding siblings ...)
  2023-05-29 13:22 ` [PATCH v5 2/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
@ 2023-05-29 13:22 ` наб
  2023-05-29 17:37 ` [PATCH v4 1/3] posix: add (failing) test for REG_STARTEND Adhemerval Zanella Netto
  5 siblings, 0 replies; 16+ messages in thread
From: наб @ 2023-05-29 13:22 UTC (permalink / raw)
  Cc: libc-alpha

[-- Attachment #1: Type: text/plain, Size: 3354 bytes --]

re_search_internal () starts with
  /* If initial states with non-begbuf contexts have no elements,
     the regex must be anchored.  If preg->newline_anchor is set,
     we'll never use init_state_nl, so do not check it.  */
  if (dfa->init_state->nodes.nelem == 0
      && dfa->init_state_word->nodes.nelem == 0
      && (dfa->init_state_nl->nodes.nelem == 0
	  || !preg->newline_anchor))
    {
      if (start != 0 && last_start != 0)
        return REG_NOMATCH;
      start = last_start = 0;
    }
and heretofor start and last_start (for example when "abc", {1, 2},
so matching just the "b") were != 0, and the return was taken for a "^b"
regex, which is erroneous.

Fix this by giving re_search_internal (string+rm_so, start=0),
then fixing up the returned matches in an after-pass.

This brings us to compatibility with the BSD spec and implementations.

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
 posix/regexec.c | 41 ++++++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/posix/regexec.c b/posix/regexec.c
index bd0cd412d0..2ef868e1f6 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -187,38 +187,53 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
    string; if REG_NOTEOL is set, then $ does not match at the end.
 
    Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if
-   EFLAGS is invalid.  */
+   EFLAGS is invalid.
+
+   If REG_STARTEND, the bounds are
+     [STRING + PMATCH->rm_so, STRING + PMATCH->rm_eo)
+   instead of the usual
+     [STRING, STRING + strlen(STRING)),
+   but returned matches are still referenced to STRING,
+   and matching is unaffected (i.e. "abc", {1, 2} matches regex "^b$").
+   re_search_internal () has a built-in assumption of
+   (start != 0) <=> (^ doesn't match), so give it a truncated view
+   and fix up the matches afterward.  */
 
 int
 regexec (const regex_t *__restrict preg, const char *__restrict string,
 	 size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
 {
   reg_errcode_t err;
-  Idx start, length;
+  Idx startoff = 0, length;
   re_dfa_t *dfa = preg->buffer;
+  size_t i = 0;
 
   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
     return REG_BADPAT;
 
   if (eflags & REG_STARTEND)
     {
-      start = pmatch[0].rm_so;
-      length = pmatch[0].rm_eo;
+      startoff = pmatch[0].rm_so;
+      string += startoff;
+      length = pmatch[0].rm_eo - startoff;
     }
   else
-    {
-      start = 0;
-      length = strlen (string);
-    }
+    length = strlen (string);
 
   lock_lock (dfa->lock);
   if (preg->no_sub)
-    err = re_search_internal (preg, string, length, start, length,
-			      length, 0, NULL, eflags);
-  else
-    err = re_search_internal (preg, string, length, start, length,
-			      length, nmatch, pmatch, eflags);
+    nmatch = 0;
+  err = re_search_internal (preg, string, length, 0, length,
+			    length, nmatch, pmatch, eflags);
   lock_unlock (dfa->lock);
+
+  if (err == REG_NOERROR && startoff)
+    for (i = 0; i < nmatch; ++i)
+      if (pmatch[i].rm_so != -1)
+	{
+	  pmatch[i].rm_so += startoff;
+	  pmatch[i].rm_eo += startoff;
+	}
   return err != REG_NOERROR;
 }
 
-- 
2.30.2

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 1/3] posix: add (failing) test for REG_STARTEND
  2023-05-07 22:56 [PATCH v4 1/3] posix: add (failing) test for REG_STARTEND наб
                   ` (4 preceding siblings ...)
  2023-05-29 13:22 ` [PATCH v5 3/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor наб
@ 2023-05-29 17:37 ` Adhemerval Zanella Netto
  2023-05-29 20:10   ` наб
  5 siblings, 1 reply; 16+ messages in thread
From: Adhemerval Zanella Netto @ 2023-05-29 17:37 UTC (permalink / raw)
  To: наб, Carlos O'Donell; +Cc: libc-alpha



On 07/05/23 19:56, наб via Libc-alpha wrote:
> This test passes on NetBSD, the illumos gate, and musl
> with https://www.openwall.com/lists/musl/2023/04/20/2;
> it's nothing revolutionary and the behaviour it tests
> is largely guaranteed by the 4.4BSD-Lite manual;
> nevertheless, it currently fails with
>   tst-reg-startend.c: ^a: a^@c: no match$
>   tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
>   tst-reg-startend.c: ^a: abc: no match$
>   tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
>   tst-reg-startend.c: ^a.c$: a^@c: no match$
>   tst-reg-startend.c: ^a.c$: abc: no match$
>   tst-reg-startend.c: ^a.*c$: a^@c: no match$
>   tst-reg-startend.c: ^a.*c$: abc: no match$
>   tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
>   tst-reg-startend.c: ^a[^c]c$: abc: no match$
>   tst-reg-startend.c: ^a..: a^@c: no match$
>   tst-reg-startend.c: ^a..: abc: no match$
>   tst-reg-startend.c: ..c: a^@c: no match$
> 
> The test may also be compiled stand-alone (-DSTANDALONE)
> and on all platforms that have the interface
> (hence the macro to initialise regmatch_ts,
>  which start with pointer fields on the illumos gate),
> for ease of testing and inclusion in other test suites.

Tests that should triggers newer regressions should be either marks as XFAIL,
or in this case, move after the patch that actually fixes it. 

> 
> Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
> ---
> Resending after a week; clean rebase.
> 
>  posix/Makefile           |   1 +
>  posix/tst-reg-startend.c | 124 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 125 insertions(+)
>  create mode 100644 posix/tst-reg-startend.c
> 
> diff --git a/posix/Makefile b/posix/Makefile
> index cc77e939ad..24aeb781ca 100644
> --- a/posix/Makefile
> +++ b/posix/Makefile
> @@ -295,6 +295,7 @@ tests := \
>    tst-posix_spawn-setsid \
>    tst-preadwrite \
>    tst-preadwrite64 \
> +  tst-reg-startend \
>    tst-regcomp-truncated \
>    tst-regex \
>    tst-regex2 \
> diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
> new file mode 100644
> index 0000000000..c3bfac0359
> --- /dev/null
> +++ b/posix/tst-reg-startend.c
> @@ -0,0 +1,124 @@
> +/* Permission to use, copy, modify, and/or distribute this software for any
> +   purpose with or without fee is hereby granted.
> +
> +   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> +   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> +   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> +   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> +   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> +   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> +   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  */

I am not sure if we can accept such license. It not the current one used for
newer submission, including tests (LGPL 2.1).

> +
> +#include <assert.h>
> +#include <locale.h>
> +#include <string.h>
> +#include <regex.h>
> +#include <stdio.h>
> +#include <stdbool.h>
> +
> +
> +#define M(s, e) (regmatch_t) {.rm_so = s, .rm_eo = e}
> +#define MEQ(l, r) ((l).rm_so == (r).rm_so && (l).rm_eo == (r).rm_eo)
> +
> +static const regmatch_t bound = M(1, 4);
> +
> +static const char *const regex_ac[] =
> +  {"^a", "c$", "^a.c$", "^a.*c$", "^a[^c]c$", "^a..", "..c", "[^z]c", NULL};
> +static const char *const regex_aa[] =
> +  {"^", "^a", "a$", "^\\(a\\).\\1$", "^a[^a]*", NULL};
> +static const char *const data_ac[] = {"_a\0cdef", "_abcdef"};
> +static const char *const data_aa[] = {"_a\0adef", "_abadef"};
> +static const regmatch_t results_ac[] =
> +  {M(1, 2), M(3, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(2, 4)};
> +static const regmatch_t results_aa[] =
> +  {M(1, 1), M(1, 2), M(3, 4), M(1, 4), M(1, 3)};
> +static_assert(sizeof(regex_ac) / sizeof(*regex_ac) - 1 ==
> +              sizeof(results_ac) / sizeof(*results_ac), "");
> +static_assert(sizeof(regex_aa) / sizeof(*regex_aa) - 1 ==
> +              sizeof(results_aa) / sizeof(*results_aa), "");


Instead of the static_assert, why not add the input arguments and the
expect result on same struct?

> +
> +
> +static bool
> +testbunch (const char *const *regexes, const char *const data[static 2],
> +           const regmatch_t *results)
> +{
> +#define BASEERR(data)                              \
> +  err = true,                                      \
> +    fprintf (stdout, __FILE__ ": %s: ", *regexes), \
> +    fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)


We have macros that already log and handle the required boilerplate to
report tests issues on support/check.h.  Newer tests should use it.

> +
> +  bool err = false;
> +  for (; *regexes; ++regexes, ++results)
> +    {
> +      regex_t rgx;
> +      assert (!regcomp (&rgx, *regexes, 0));
> +
> +      for (size_t i = 0; i < 2; ++i)
> +        {
> +          regmatch_t match = bound;
> +          if (regexec (&rgx, data[i], 1, &match, REG_STARTEND))
> +            BASEERR(data), fputs (": no match\n", stdout);
> +
> +          if (!MEQ(match, *results))
> +            BASEERR(data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
> +                                    (int)results->rm_so, (int)results->rm_eo,
> +                                    (int)match.rm_so, (int)match.rm_eo);
> +        }
> +
> +      regfree(&rgx);
> +    }
> +
> +  return err;
> +}
> +
> +
> +static const char *const mb_data[2] = {"_aaćdef", "_aćdef"};
> +static const bool mb_exp[] = {false, true};
> +
> +static bool
> +testmb (void)
> +{
> +  bool err = false;
> +  regex_t rgx;
> +  const char *const regexes[] = {"ać"};
> +  assert (!regcomp (&rgx, *regexes, 0));
> +
> +  for (size_t i = 0; i < 2; ++i)

We have array_length macro to avoid putting array sizes everywhere (and they
work better if we want to extend the tests).

> +    {
> +      regmatch_t match = bound;
> +      if (regexec (&rgx, mb_data[i], 1, &match, REG_STARTEND) == mb_exp[i])
> +        BASEERR(mb_data), fprintf (stdout, ": %s match\n",
> +                                   mb_exp[i] ? "no" : "yes");
> +
> +      if (!MEQ(match, bound))
> +        BASEERR(mb_data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
> +                                   (int)bound.rm_so, (int)bound.rm_eo,
> +                                   (int)match.rm_so, (int)match.rm_eo);
> +    }
> +
> +  regfree(&rgx);
> +  return err;
> +}
> +
> +
> +static int
> +do_test (int argc, char **argv)
> +{
> +  (void) argc, (void) argv;

Not really need here.

> +  assert (setlocale (LC_ALL, "C.UTF-8"));
> +
> +  return testbunch (regex_ac, data_ac, results_ac) ||
> +         testbunch (regex_aa, data_aa, results_aa) ||
> +         testmb ();
> +}
> +
> +
> +#ifndef STANDALONE
> +#include "../test-skeleton.c"

Use #include <support/test-driver.c> instead.

> +#else
> +int
> +main(int argc, char **argv)
> +{
> +  return do_test(argc, argv);
> +}
> +#endif

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 3/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor
  2023-05-07 22:56 ` [PATCH v4 3/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor наб
@ 2023-05-29 18:11   ` Adhemerval Zanella Netto
  0 siblings, 0 replies; 16+ messages in thread
From: Adhemerval Zanella Netto @ 2023-05-29 18:11 UTC (permalink / raw)
  To: наб, Paul Eggert; +Cc: libc-alpha

Hey Paul,

Could you take a look if this change make sense? We usually tend just sync 
with gnulib, and you and other gnulib developers seems way more active 
working on this code than glibc community.

On 07/05/23 19:56, наб via Libc-alpha wrote:
> re_search_internal () starts with
>   /* If initial states with non-begbuf contexts have no elements,
>      the regex must be anchored.  If preg->newline_anchor is set,
>      we'll never use init_state_nl, so do not check it.  */
>   if (dfa->init_state->nodes.nelem == 0
>       && dfa->init_state_word->nodes.nelem == 0
>       && (dfa->init_state_nl->nodes.nelem == 0
> 	  || !preg->newline_anchor))
>     {
>       if (start != 0 && last_start != 0)
>         return REG_NOMATCH;
>       start = last_start = 0;
>     }
> and heretofor start and last_start (for example when "abc", {1, 2},
> so matching just the "b") were != 0, and the return was taken for a "^b"
> regex, which is erroneous.
> 
> Fix this by giving re_search_internal (string+rm_so, start=0),
> then fixing up the returned matches in an after-pass.
> 
> This brings us to compatibility with the BSD spec and implementations.
> 
> Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
> ---
>  posix/regexec.c | 41 ++++++++++++++++++++++++++++-------------
>  1 file changed, 28 insertions(+), 13 deletions(-)
> 
> diff --git a/posix/regexec.c b/posix/regexec.c
> index bd0cd412d0..2ef868e1f6 100644
> --- a/posix/regexec.c
> +++ b/posix/regexec.c
> @@ -187,38 +187,53 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
>     string; if REG_NOTEOL is set, then $ does not match at the end.
>  
>     Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if
> -   EFLAGS is invalid.  */
> +   EFLAGS is invalid.
> +
> +   If REG_STARTEND, the bounds are
> +     [STRING + PMATCH->rm_so, STRING + PMATCH->rm_eo)
> +   instead of the usual
> +     [STRING, STRING + strlen(STRING)),
> +   but returned matches are still referenced to STRING,
> +   and matching is unaffected (i.e. "abc", {1, 2} matches regex "^b$").
> +   re_search_internal () has a built-in assumption of
> +   (start != 0) <=> (^ doesn't match), so give it a truncated view
> +   and fix up the matches afterward.  */
>  
>  int
>  regexec (const regex_t *__restrict preg, const char *__restrict string,
>  	 size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
>  {
>    reg_errcode_t err;
> -  Idx start, length;
> +  Idx startoff = 0, length;
>    re_dfa_t *dfa = preg->buffer;
> +  size_t i = 0;
>  
>    if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
>      return REG_BADPAT;
>  
>    if (eflags & REG_STARTEND)
>      {
> -      start = pmatch[0].rm_so;
> -      length = pmatch[0].rm_eo;
> +      startoff = pmatch[0].rm_so;
> +      string += startoff;
> +      length = pmatch[0].rm_eo - startoff;
>      }
>    else
> -    {
> -      start = 0;
> -      length = strlen (string);
> -    }
> +    length = strlen (string);
>  
>    lock_lock (dfa->lock);
>    if (preg->no_sub)
> -    err = re_search_internal (preg, string, length, start, length,
> -			      length, 0, NULL, eflags);
> -  else
> -    err = re_search_internal (preg, string, length, start, length,
> -			      length, nmatch, pmatch, eflags);
> +    nmatch = 0;
> +  err = re_search_internal (preg, string, length, 0, length,
> +			    length, nmatch, pmatch, eflags);
>    lock_unlock (dfa->lock);
> +
> +  if (err == REG_NOERROR && startoff)
> +    for (i = 0; i < nmatch; ++i)
> +      if (pmatch[i].rm_so != -1)
> +	{
> +	  pmatch[i].rm_so += startoff;
> +	  pmatch[i].rm_eo += startoff;
> +	}
>    return err != REG_NOERROR;
>  }
>  

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 1/3] posix: add (failing) test for REG_STARTEND
  2023-05-29 17:37 ` [PATCH v4 1/3] posix: add (failing) test for REG_STARTEND Adhemerval Zanella Netto
@ 2023-05-29 20:10   ` наб
  2023-05-29 20:23     ` Adhemerval Zanella Netto
  0 siblings, 1 reply; 16+ messages in thread
From: наб @ 2023-05-29 20:10 UTC (permalink / raw)
  To: Adhemerval Zanella Netto; +Cc: Carlos O'Donell, libc-alpha

[-- Attachment #1: Type: text/plain, Size: 10942 bytes --]

On Mon, May 29, 2023 at 02:37:39PM -0300, Adhemerval Zanella Netto wrote:
> On 07/05/23 19:56, наб via Libc-alpha wrote:
> > This test passes on NetBSD, the illumos gate, and musl
> > with https://www.openwall.com/lists/musl/2023/04/20/2;
> > it's nothing revolutionary and the behaviour it tests
> > is largely guaranteed by the 4.4BSD-Lite manual;
> > nevertheless, it currently fails with
> >   tst-reg-startend.c: ^a: a^@c: no match$
> >   tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
> >   tst-reg-startend.c: ^a: abc: no match$
> >   tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
> >   tst-reg-startend.c: ^a.c$: a^@c: no match$
> >   tst-reg-startend.c: ^a.c$: abc: no match$
> >   tst-reg-startend.c: ^a.*c$: a^@c: no match$
> >   tst-reg-startend.c: ^a.*c$: abc: no match$
> >   tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
> >   tst-reg-startend.c: ^a[^c]c$: abc: no match$
> >   tst-reg-startend.c: ^a..: a^@c: no match$
> >   tst-reg-startend.c: ^a..: abc: no match$
> >   tst-reg-startend.c: ..c: a^@c: no match$
> > 
> > The test may also be compiled stand-alone (-DSTANDALONE)
> > and on all platforms that have the interface
> > (hence the macro to initialise regmatch_ts,
> >  which start with pointer fields on the illumos gate),
> > for ease of testing and inclusion in other test suites.
> Tests that should triggers newer regressions should be either marks as XFAIL,
> or in this case, move after the patch that actually fixes it. 
I've moved it to the end as 3/3 locally.

> > diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
> > new file mode 100644
> > index 0000000000..c3bfac0359
> > --- /dev/null
> > +++ b/posix/tst-reg-startend.c
> > @@ -0,0 +1,124 @@
> > +/* Permission to use, copy, modify, and/or distribute this software for any
> > +   purpose with or without fee is hereby granted.
> > +
> > +   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> > +   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> > +   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> > +   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> > +   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> > +   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> > +   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  */
> I am not sure if we can accept such license. It not the current one used for
> newer submission, including tests (LGPL 2.1).
There are tests and other glibc code under ISC-style licences already
but sure; I've slapped Unlicense text on this, which appears to be
preferred by the Foundation since I'm an EU citizen
(and the Unlicense is also already used in glibc).

> > +static_assert(sizeof(regex_ac) / sizeof(*regex_ac) - 1 ==
> > +              sizeof(results_ac) / sizeof(*results_ac), "");
> > +static_assert(sizeof(regex_aa) / sizeof(*regex_aa) - 1 ==
> > +              sizeof(results_aa) / sizeof(*results_aa), "");
> Instead of the static_assert, why not add the input arguments and the
> expect result on same struct?
Applied.

> > +static bool
> > +testbunch (const char *const *regexes, const char *const data[static 2],
> > +           const regmatch_t *results)
> > +{
> > +#define BASEERR(data)                              \
> > +  err = true,                                      \
> > +    fprintf (stdout, __FILE__ ": %s: ", *regexes), \
> > +    fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
> We have macros that already log and handle the required boilerplate to
> report tests issues on support/check.h.  Newer tests should use it.
I've used those for recording errors, but since the error logging
takes format strings, it don't allow arbitrary data being logged.

> > +  for (size_t i = 0; i < 2; ++i)
> We have array_length macro to avoid putting array sizes everywhere (and they
> work better if we want to extend the tests).
Converted everything into 0-terminated arrays.

> > +static int
> > +do_test (int argc, char **argv)
> > +{
> > +  (void) argc, (void) argv;
> Not really need here.
And forbidden by the new wrapper.

> > +  assert (setlocale (LC_ALL, "C.UTF-8"));
> > +
> > +  return testbunch (regex_ac, data_ac, results_ac) ||
> > +         testbunch (regex_aa, data_aa, results_aa) ||
> > +         testmb ();
> > +}
> > +
> > +
> > +#ifndef STANDALONE
> > +#include "../test-skeleton.c"
> Use #include <support/test-driver.c> instead.
Applied.

v6 scissor-patch follows.

Best,
-- >8 --
Date: Fri, 21 Apr 2023 23:57:16 +0200
Subject: [PATCH v5 3/3] posix: add test for REG_STARTEND

This test passes on NetBSD, the illumos gate, musl with
https://www.openwall.com/lists/musl/2023/05/14/1,
and now glibc.
It's nothing revolutionary and the behaviour it tests
is largely guaranteed by the 4.4BSD-Lite manual;
nevertheless, it used to fail with
  tst-reg-startend.c: ^a: a^@c: no match$
  tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
  tst-reg-startend.c: ^a: abc: no match$
  tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
  tst-reg-startend.c: ^a.c$: a^@c: no match$
  tst-reg-startend.c: ^a.c$: abc: no match$
  tst-reg-startend.c: ^a.*c$: a^@c: no match$
  tst-reg-startend.c: ^a.*c$: abc: no match$
  tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
  tst-reg-startend.c: ^a[^c]c$: abc: no match$
  tst-reg-startend.c: ^a..: a^@c: no match$
  tst-reg-startend.c: ^a..: abc: no match$
  tst-reg-startend.c: ..c: a^@c: no match$

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
 posix/Makefile           |   1 +
 posix/tst-reg-startend.c | 142 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+)
 create mode 100644 posix/tst-reg-startend.c

diff --git a/posix/Makefile b/posix/Makefile
index e19b74cd67..abc0ff1f60 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -295,6 +295,7 @@ tests := \
   tst-posix_spawn-setsid \
   tst-preadwrite \
   tst-preadwrite64 \
+  tst-reg-startend \
   tst-regcomp-truncated \
   tst-regex \
   tst-regex2 \
diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
new file mode 100644
index 0000000000..854d430676
--- /dev/null
+++ b/posix/tst-reg-startend.c
@@ -0,0 +1,142 @@
+/* This is free and unencumbered software released into the public domain.
+
+   Anyone is free to copy, modify, publish, use, compile, sell, or
+   distribute this software, either in source code form or as a compiled
+   binary, for any purpose, commercial or non-commercial, and by any
+   means.
+
+   In jurisdictions that recognize copyright laws, the author or authors
+   of this software dedicate any and all copyright interest in the
+   software to the public domain. We make this dedication for the benefit
+   of the public at large and to the detriment of our heirs and
+   successors. We intend this dedication to be an overt act of
+   relinquishment in perpetuity of all present and future rights to this
+   software under copyright law.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.  */
+
+
+#include <assert.h>
+#include <locale.h>
+#include <string.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <support/check.h>
+
+
+static const regmatch_t bound = {1, 4};
+
+
+struct reg_res {
+  const char *regex;
+  regmatch_t result;
+};
+static const struct reg_res reg_res_ac[] = {
+  {"^a",       {1, 2}},
+  {"c$",       {3, 4}},
+  {"^a.c$",    {1, 4}},
+  {"^a.*c$",   {1, 4}},
+  {"^a[^c]c$", {1, 4}},
+  {"^a..",     {1, 4}},
+  {"..c",      {1, 4}},
+  {"[^z]c",    {2, 4}},
+  {}
+};
+static const char *const data_ac[] = {"_a\0cdef", "_abcdef", NULL};
+
+static const struct reg_res reg_res_aa[] = {
+  {"^",             {1, 1}},
+  {"^a",            {1, 2}},
+  {"a$",            {3, 4}},
+  {"^\\(a\\).\\1$", {1, 4}},
+  {"^a[^a]*" ,      {1, 3}},
+  {}
+};
+static const char *const data_aa[] = {"_a\0adef", "_abadef", NULL};
+
+
+static void
+testbunch (const struct reg_res *reg_reses, const char *const *const data)
+{
+#define BASEERR(data)                                      \
+  support_record_failure (),                               \
+    fprintf (stdout, __FILE__ ": %s: ", reg_reses->regex), \
+    fwrite (data + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
+
+  for (; reg_reses->regex; ++reg_reses)
+    {
+      regex_t rgx;
+      assert (!regcomp (&rgx, reg_reses->regex, 0));
+
+      for (const char *const *dt = data; *dt; ++dt)
+        {
+          regmatch_t match = bound;
+          if (regexec (&rgx, *dt, 1, &match, REG_STARTEND))
+            BASEERR(dt), fputs (": no match\n", stdout);
+
+          if (memcmp(&match, &reg_reses->result, sizeof (regmatch_t)))
+            BASEERR(dt), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+                                  (int)reg_reses->result.rm_so,
+                                  (int)reg_reses->result.rm_eo,
+                                  (int)match.rm_so, (int)match.rm_eo);
+        }
+
+      regfree(&rgx);
+    }
+}
+
+
+struct mb_data_exp {
+  const char *data;
+  bool exp;
+};
+static const struct mb_data_exp mb_data_exp[] = {
+  {"_aaćdef", false},
+  {"_aćdef", true},
+  {}
+};
+
+static void
+testmb (void)
+{
+  regex_t rgx;
+  const struct reg_res reg_reses[] = {{"ać"}};
+  assert (!regcomp (&rgx, reg_reses->regex, 0));
+
+  for (const struct mb_data_exp *de = mb_data_exp; de->data; ++de)
+    {
+      regmatch_t match = bound;
+      if (regexec (&rgx, de->data, 1, &match, REG_STARTEND) == de->exp)
+        BASEERR(de->data), fprintf (stdout, ": %s match\n",
+                                    de->exp ? "no" : "yes");
+
+      if (memcmp(&match, &bound, sizeof (regmatch_t)))
+        BASEERR(de->data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+                                    (int)bound.rm_so, (int)bound.rm_eo,
+                                    (int)match.rm_so, (int)match.rm_eo);
+    }
+
+  regfree(&rgx);
+}
+
+
+static int
+do_test (void)
+{
+  assert (setlocale (LC_ALL, "C.UTF-8"));
+
+  testbunch (reg_res_ac, data_ac);
+  testbunch (reg_res_aa, data_aa);
+  testmb ();
+  return 0;
+}
+
+
+#include <support/test-driver.c>
-- 
2.30.2

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 1/3] posix: add (failing) test for REG_STARTEND
  2023-05-29 20:10   ` наб
@ 2023-05-29 20:23     ` Adhemerval Zanella Netto
  2023-06-12  0:47       ` [PATCH v7 1/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
                         ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Adhemerval Zanella Netto @ 2023-05-29 20:23 UTC (permalink / raw)
  To: наб, Carlos O'Donell; +Cc: libc-alpha



On 29/05/23 17:10, наб wrote:
> On Mon, May 29, 2023 at 02:37:39PM -0300, Adhemerval Zanella Netto wrote:
>> On 07/05/23 19:56, наб via Libc-alpha wrote:
>>> This test passes on NetBSD, the illumos gate, and musl
>>> with https://www.openwall.com/lists/musl/2023/04/20/2;
>>> it's nothing revolutionary and the behaviour it tests
>>> is largely guaranteed by the 4.4BSD-Lite manual;
>>> nevertheless, it currently fails with
>>>   tst-reg-startend.c: ^a: a^@c: no match$
>>>   tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
>>>   tst-reg-startend.c: ^a: abc: no match$
>>>   tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
>>>   tst-reg-startend.c: ^a.c$: a^@c: no match$
>>>   tst-reg-startend.c: ^a.c$: abc: no match$
>>>   tst-reg-startend.c: ^a.*c$: a^@c: no match$
>>>   tst-reg-startend.c: ^a.*c$: abc: no match$
>>>   tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
>>>   tst-reg-startend.c: ^a[^c]c$: abc: no match$
>>>   tst-reg-startend.c: ^a..: a^@c: no match$
>>>   tst-reg-startend.c: ^a..: abc: no match$
>>>   tst-reg-startend.c: ..c: a^@c: no match$
>>>
>>> The test may also be compiled stand-alone (-DSTANDALONE)
>>> and on all platforms that have the interface
>>> (hence the macro to initialise regmatch_ts,
>>>  which start with pointer fields on the illumos gate),
>>> for ease of testing and inclusion in other test suites.
>> Tests that should triggers newer regressions should be either marks as XFAIL,
>> or in this case, move after the patch that actually fixes it. 
> I've moved it to the end as 3/3 locally.
> 
>>> diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
>>> new file mode 100644
>>> index 0000000000..c3bfac0359
>>> --- /dev/null
>>> +++ b/posix/tst-reg-startend.c
>>> @@ -0,0 +1,124 @@
>>> +/* Permission to use, copy, modify, and/or distribute this software for any
>>> +   purpose with or without fee is hereby granted.
>>> +
>>> +   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
>>> +   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
>>> +   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
>>> +   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
>>> +   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
>>> +   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
>>> +   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  */
>> I am not sure if we can accept such license. It not the current one used for
>> newer submission, including tests (LGPL 2.1).
> There are tests and other glibc code under ISC-style licences already
> but sure; I've slapped Unlicense text on this, which appears to be
> preferred by the Foundation since I'm an EU citizen
> (and the Unlicense is also already used in glibc).

There are usually pretty old tests that are glue together from other projects,
and added prior we have a clear definition on how tests should be added.

At least from our current Contribution Checklist [1], new code should follow
LGPL 2.1; but other maintainers can correct me (I am not really an expert in
this area).

[1] https://sourceware.org/glibc/wiki/Contribution%20checklist#Copyright_and_license

> 
>>> +static_assert(sizeof(regex_ac) / sizeof(*regex_ac) - 1 ==
>>> +              sizeof(results_ac) / sizeof(*results_ac), "");
>>> +static_assert(sizeof(regex_aa) / sizeof(*regex_aa) - 1 ==
>>> +              sizeof(results_aa) / sizeof(*results_aa), "");
>> Instead of the static_assert, why not add the input arguments and the
>> expect result on same struct?
> Applied.
> 
>>> +static bool
>>> +testbunch (const char *const *regexes, const char *const data[static 2],
>>> +           const regmatch_t *results)
>>> +{
>>> +#define BASEERR(data)                              \
>>> +  err = true,                                      \
>>> +    fprintf (stdout, __FILE__ ": %s: ", *regexes), \
>>> +    fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
>> We have macros that already log and handle the required boilerplate to
>> report tests issues on support/check.h.  Newer tests should use it.
> I've used those for recording errors, but since the error logging
> takes format strings, it don't allow arbitrary data being logged.

Fair enough.

> 
>>> +  for (size_t i = 0; i < 2; ++i)
>> We have array_length macro to avoid putting array sizes everywhere (and they
>> work better if we want to extend the tests).
> Converted everything into 0-terminated arrays.
> 
>>> +static int
>>> +do_test (int argc, char **argv)
>>> +{
>>> +  (void) argc, (void) argv;
>> Not really need here.
> And forbidden by the new wrapper.
> 
>>> +  assert (setlocale (LC_ALL, "C.UTF-8"));
>>> +
>>> +  return testbunch (regex_ac, data_ac, results_ac) ||
>>> +         testbunch (regex_aa, data_aa, results_aa) ||
>>> +         testmb ();
>>> +}
>>> +
>>> +
>>> +#ifndef STANDALONE
>>> +#include "../test-skeleton.c"
>> Use #include <support/test-driver.c> instead.
> Applied.
> 
> v6 scissor-patch follows.

So the only think I am not really sure if we can accept tests that do not follow
LGPL 2.1.  Carlos, do you know if we any blocker regarding it?

> 
> Best,
> -- >8 --
> Date: Fri, 21 Apr 2023 23:57:16 +0200
> Subject: [PATCH v5 3/3] posix: add test for REG_STARTEND
> 
> This test passes on NetBSD, the illumos gate, musl with
> https://www.openwall.com/lists/musl/2023/05/14/1,
> and now glibc.
> It's nothing revolutionary and the behaviour it tests
> is largely guaranteed by the 4.4BSD-Lite manual;
> nevertheless, it used to fail with
>   tst-reg-startend.c: ^a: a^@c: no match$
>   tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
>   tst-reg-startend.c: ^a: abc: no match$
>   tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
>   tst-reg-startend.c: ^a.c$: a^@c: no match$
>   tst-reg-startend.c: ^a.c$: abc: no match$
>   tst-reg-startend.c: ^a.*c$: a^@c: no match$
>   tst-reg-startend.c: ^a.*c$: abc: no match$
>   tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
>   tst-reg-startend.c: ^a[^c]c$: abc: no match$
>   tst-reg-startend.c: ^a..: a^@c: no match$
>   tst-reg-startend.c: ^a..: abc: no match$
>   tst-reg-startend.c: ..c: a^@c: no match$
> 
> Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
> ---
>  posix/Makefile           |   1 +
>  posix/tst-reg-startend.c | 142 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 143 insertions(+)
>  create mode 100644 posix/tst-reg-startend.c
> 
> diff --git a/posix/Makefile b/posix/Makefile
> index e19b74cd67..abc0ff1f60 100644
> --- a/posix/Makefile
> +++ b/posix/Makefile
> @@ -295,6 +295,7 @@ tests := \
>    tst-posix_spawn-setsid \
>    tst-preadwrite \
>    tst-preadwrite64 \
> +  tst-reg-startend \
>    tst-regcomp-truncated \
>    tst-regex \
>    tst-regex2 \
> diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
> new file mode 100644
> index 0000000000..854d430676
> --- /dev/null
> +++ b/posix/tst-reg-startend.c
> @@ -0,0 +1,142 @@
> +/* This is free and unencumbered software released into the public domain.
> +
> +   Anyone is free to copy, modify, publish, use, compile, sell, or
> +   distribute this software, either in source code form or as a compiled
> +   binary, for any purpose, commercial or non-commercial, and by any
> +   means.
> +
> +   In jurisdictions that recognize copyright laws, the author or authors
> +   of this software dedicate any and all copyright interest in the
> +   software to the public domain. We make this dedication for the benefit
> +   of the public at large and to the detriment of our heirs and
> +   successors. We intend this dedication to be an overt act of
> +   relinquishment in perpetuity of all present and future rights to this
> +   software under copyright law.
> +
> +   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> +   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> +   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
> +   IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> +   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> +   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> +   OTHER DEALINGS IN THE SOFTWARE.  */
> +
> +
> +#include <assert.h>
> +#include <locale.h>
> +#include <string.h>
> +#include <regex.h>
> +#include <stdio.h>
> +#include <stdbool.h>
> +#include <support/check.h>
> +
> +
> +static const regmatch_t bound = {1, 4};
> +
> +
> +struct reg_res {
> +  const char *regex;
> +  regmatch_t result;
> +};
> +static const struct reg_res reg_res_ac[] = {
> +  {"^a",       {1, 2}},
> +  {"c$",       {3, 4}},
> +  {"^a.c$",    {1, 4}},
> +  {"^a.*c$",   {1, 4}},
> +  {"^a[^c]c$", {1, 4}},
> +  {"^a..",     {1, 4}},
> +  {"..c",      {1, 4}},
> +  {"[^z]c",    {2, 4}},
> +  {}
> +};
> +static const char *const data_ac[] = {"_a\0cdef", "_abcdef", NULL};
> +
> +static const struct reg_res reg_res_aa[] = {
> +  {"^",             {1, 1}},
> +  {"^a",            {1, 2}},
> +  {"a$",            {3, 4}},
> +  {"^\\(a\\).\\1$", {1, 4}},
> +  {"^a[^a]*" ,      {1, 3}},
> +  {}
> +};
> +static const char *const data_aa[] = {"_a\0adef", "_abadef", NULL};
> +
> +
> +static void
> +testbunch (const struct reg_res *reg_reses, const char *const *const data)
> +{
> +#define BASEERR(data)                                      \
> +  support_record_failure (),                               \
> +    fprintf (stdout, __FILE__ ": %s: ", reg_reses->regex), \
> +    fwrite (data + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
> +
> +  for (; reg_reses->regex; ++reg_reses)
> +    {
> +      regex_t rgx;
> +      assert (!regcomp (&rgx, reg_reses->regex, 0));
> +
> +      for (const char *const *dt = data; *dt; ++dt)
> +        {
> +          regmatch_t match = bound;
> +          if (regexec (&rgx, *dt, 1, &match, REG_STARTEND))
> +            BASEERR(dt), fputs (": no match\n", stdout);
> +
> +          if (memcmp(&match, &reg_reses->result, sizeof (regmatch_t)))
> +            BASEERR(dt), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
> +                                  (int)reg_reses->result.rm_so,
> +                                  (int)reg_reses->result.rm_eo,
> +                                  (int)match.rm_so, (int)match.rm_eo);
> +        }
> +
> +      regfree(&rgx);
> +    }
> +}
> +
> +
> +struct mb_data_exp {
> +  const char *data;
> +  bool exp;
> +};
> +static const struct mb_data_exp mb_data_exp[] = {
> +  {"_aaćdef", false},
> +  {"_aćdef", true},
> +  {}
> +};
> +
> +static void
> +testmb (void)
> +{
> +  regex_t rgx;
> +  const struct reg_res reg_reses[] = {{"ać"}};
> +  assert (!regcomp (&rgx, reg_reses->regex, 0));
> +
> +  for (const struct mb_data_exp *de = mb_data_exp; de->data; ++de)
> +    {
> +      regmatch_t match = bound;
> +      if (regexec (&rgx, de->data, 1, &match, REG_STARTEND) == de->exp)
> +        BASEERR(de->data), fprintf (stdout, ": %s match\n",
> +                                    de->exp ? "no" : "yes");
> +
> +      if (memcmp(&match, &bound, sizeof (regmatch_t)))
> +        BASEERR(de->data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
> +                                    (int)bound.rm_so, (int)bound.rm_eo,
> +                                    (int)match.rm_so, (int)match.rm_eo);
> +    }
> +
> +  regfree(&rgx);
> +}
> +
> +
> +static int
> +do_test (void)
> +{
> +  assert (setlocale (LC_ALL, "C.UTF-8"));
> +
> +  testbunch (reg_res_ac, data_ac);
> +  testbunch (reg_res_aa, data_aa);
> +  testmb ();
> +  return 0;
> +}
> +
> +
> +#include <support/test-driver.c>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v7 1/3] posix: regcomp(): clear RE_DOT_NOT_NULL
  2023-05-29 20:23     ` Adhemerval Zanella Netto
@ 2023-06-12  0:47       ` наб
  2023-06-12 13:11         ` Carlos O'Donell
  2023-06-12  0:47       ` [PATCH v7 2/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor наб
  2023-06-12  0:47       ` [PATCH v7 3/3] posix: add test for REG_STARTEND наб
  2 siblings, 1 reply; 16+ messages in thread
From: наб @ 2023-06-12  0:47 UTC (permalink / raw)
  To: Adhemerval Zanella Netto; +Cc: libc-alpha, Carlos O'Donell

[-- Attachment #1: Type: text/plain, Size: 1377 bytes --]

The POSIX API always stops at first NUL so there's no change for that.

The BSD REG_STARTEND API, with its explicit range, can include NULs
within that range, and those NULs are matched with . and [^].

Heretofor, for a string of "a\0c", glibc would match "[^q]c", but not
".c". This is both inconsistent and nonconformant to BSD REG_STARTEND.

With this patch, they're identical like you'd expect, and the
  tst-reg-startend.c: ..c: a^@c: no match$
failure is removed.

Another approach would be to remove it from _RE_SYNTAX_POSIX_COMMON,
but it's unclear to me what the custody chain is like for that and what
other regex APIs glibc offers that could be affected by this.

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
No changes (rebased cleanly); full resend.

 posix/regcomp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/posix/regcomp.c b/posix/regcomp.c
index 12650714c0..a928ef6c2d 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -462,7 +462,7 @@ regcomp (regex_t *__restrict preg, const char *__restrict pattern, int cflags)
 {
   reg_errcode_t ret;
   reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
-			 : RE_SYNTAX_POSIX_BASIC);
+			 : RE_SYNTAX_POSIX_BASIC) & ~RE_DOT_NOT_NULL;
 
   preg->buffer = NULL;
   preg->allocated = 0;
-- 
2.39.2


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v7 2/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor
  2023-05-29 20:23     ` Adhemerval Zanella Netto
  2023-06-12  0:47       ` [PATCH v7 1/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
@ 2023-06-12  0:47       ` наб
  2023-06-12 13:11         ` Carlos O'Donell
  2023-06-12  0:47       ` [PATCH v7 3/3] posix: add test for REG_STARTEND наб
  2 siblings, 1 reply; 16+ messages in thread
From: наб @ 2023-06-12  0:47 UTC (permalink / raw)
  To: Adhemerval Zanella Netto; +Cc: libc-alpha, Carlos O'Donell

[-- Attachment #1: Type: text/plain, Size: 3356 bytes --]

re_search_internal () starts with
  /* If initial states with non-begbuf contexts have no elements,
     the regex must be anchored.  If preg->newline_anchor is set,
     we'll never use init_state_nl, so do not check it.  */
  if (dfa->init_state->nodes.nelem == 0
      && dfa->init_state_word->nodes.nelem == 0
      && (dfa->init_state_nl->nodes.nelem == 0
	  || !preg->newline_anchor))
    {
      if (start != 0 && last_start != 0)
        return REG_NOMATCH;
      start = last_start = 0;
    }
and heretofor start and last_start (for example when "abc", {1, 2},
so matching just the "b") were != 0, and the return was taken for a "^b"
regex, which is erroneous.

Fix this by giving re_search_internal (string+rm_so, start=0),
then fixing up the returned matches in an after-pass.

This brings us to compatibility with the BSD spec and implementations.

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
 posix/regexec.c | 41 ++++++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/posix/regexec.c b/posix/regexec.c
index bd0cd412d0..2ef868e1f6 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -187,38 +187,53 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
    string; if REG_NOTEOL is set, then $ does not match at the end.
 
    Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if
-   EFLAGS is invalid.  */
+   EFLAGS is invalid.
+
+   If REG_STARTEND, the bounds are
+     [STRING + PMATCH->rm_so, STRING + PMATCH->rm_eo)
+   instead of the usual
+     [STRING, STRING + strlen(STRING)),
+   but returned matches are still referenced to STRING,
+   and matching is unaffected (i.e. "abc", {1, 2} matches regex "^b$").
+   re_search_internal () has a built-in assumption of
+   (start != 0) <=> (^ doesn't match), so give it a truncated view
+   and fix up the matches afterward.  */
 
 int
 regexec (const regex_t *__restrict preg, const char *__restrict string,
 	 size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
 {
   reg_errcode_t err;
-  Idx start, length;
+  Idx startoff = 0, length;
   re_dfa_t *dfa = preg->buffer;
+  size_t i = 0;
 
   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
     return REG_BADPAT;
 
   if (eflags & REG_STARTEND)
     {
-      start = pmatch[0].rm_so;
-      length = pmatch[0].rm_eo;
+      startoff = pmatch[0].rm_so;
+      string += startoff;
+      length = pmatch[0].rm_eo - startoff;
     }
   else
-    {
-      start = 0;
-      length = strlen (string);
-    }
+    length = strlen (string);
 
   lock_lock (dfa->lock);
   if (preg->no_sub)
-    err = re_search_internal (preg, string, length, start, length,
-			      length, 0, NULL, eflags);
-  else
-    err = re_search_internal (preg, string, length, start, length,
-			      length, nmatch, pmatch, eflags);
+    nmatch = 0;
+  err = re_search_internal (preg, string, length, 0, length,
+			    length, nmatch, pmatch, eflags);
   lock_unlock (dfa->lock);
+
+  if (err == REG_NOERROR && startoff)
+    for (i = 0; i < nmatch; ++i)
+      if (pmatch[i].rm_so != -1)
+	{
+	  pmatch[i].rm_so += startoff;
+	  pmatch[i].rm_eo += startoff;
+	}
   return err != REG_NOERROR;
 }
 
-- 
2.39.2


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v7 3/3] posix: add test for REG_STARTEND
  2023-05-29 20:23     ` Adhemerval Zanella Netto
  2023-06-12  0:47       ` [PATCH v7 1/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
  2023-06-12  0:47       ` [PATCH v7 2/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor наб
@ 2023-06-12  0:47       ` наб
  2 siblings, 0 replies; 16+ messages in thread
From: наб @ 2023-06-12  0:47 UTC (permalink / raw)
  To: Adhemerval Zanella Netto; +Cc: libc-alpha, Carlos O'Donell

[-- Attachment #1: Type: text/plain, Size: 6178 bytes --]

This test passes on NetBSD, the illumos gate, musl with
https://www.openwall.com/lists/musl/2023/05/14/1,
and now glibc.
It's nothing revolutionary and the behaviour it tests
is largely guaranteed by the 4.4BSD-Lite manual;
nevertheless, it used to fail with
  tst-reg-startend.c: ^a: a^@c: no match$
  tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
  tst-reg-startend.c: ^a: abc: no match$
  tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
  tst-reg-startend.c: ^a.c$: a^@c: no match$
  tst-reg-startend.c: ^a.c$: abc: no match$
  tst-reg-startend.c: ^a.*c$: a^@c: no match$
  tst-reg-startend.c: ^a.*c$: abc: no match$
  tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
  tst-reg-startend.c: ^a[^c]c$: abc: no match$
  tst-reg-startend.c: ^a..: a^@c: no match$
  tst-reg-startend.c: ^a..: abc: no match$
  tst-reg-startend.c: ..c: a^@c: no match$

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
 posix/Makefile           |   1 +
 posix/tst-reg-startend.c | 142 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+)
 create mode 100644 posix/tst-reg-startend.c

diff --git a/posix/Makefile b/posix/Makefile
index e19b74cd67..abc0ff1f60 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -295,6 +295,7 @@ tests := \
   tst-posix_spawn-setsid \
   tst-preadwrite \
   tst-preadwrite64 \
+  tst-reg-startend \
   tst-regcomp-truncated \
   tst-regex \
   tst-regex2 \
diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
new file mode 100644
index 0000000000..854d430676
--- /dev/null
+++ b/posix/tst-reg-startend.c
@@ -0,0 +1,142 @@
+/* This is free and unencumbered software released into the public domain.
+
+   Anyone is free to copy, modify, publish, use, compile, sell, or
+   distribute this software, either in source code form or as a compiled
+   binary, for any purpose, commercial or non-commercial, and by any
+   means.
+
+   In jurisdictions that recognize copyright laws, the author or authors
+   of this software dedicate any and all copyright interest in the
+   software to the public domain. We make this dedication for the benefit
+   of the public at large and to the detriment of our heirs and
+   successors. We intend this dedication to be an overt act of
+   relinquishment in perpetuity of all present and future rights to this
+   software under copyright law.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.  */
+
+
+#include <assert.h>
+#include <locale.h>
+#include <string.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <support/check.h>
+
+
+static const regmatch_t bound = {1, 4};
+
+
+struct reg_res {
+  const char *regex;
+  regmatch_t result;
+};
+static const struct reg_res reg_res_ac[] = {
+  {"^a",       {1, 2}},
+  {"c$",       {3, 4}},
+  {"^a.c$",    {1, 4}},
+  {"^a.*c$",   {1, 4}},
+  {"^a[^c]c$", {1, 4}},
+  {"^a..",     {1, 4}},
+  {"..c",      {1, 4}},
+  {"[^z]c",    {2, 4}},
+  {}
+};
+static const char *const data_ac[] = {"_a\0cdef", "_abcdef", NULL};
+
+static const struct reg_res reg_res_aa[] = {
+  {"^",             {1, 1}},
+  {"^a",            {1, 2}},
+  {"a$",            {3, 4}},
+  {"^\\(a\\).\\1$", {1, 4}},
+  {"^a[^a]*" ,      {1, 3}},
+  {}
+};
+static const char *const data_aa[] = {"_a\0adef", "_abadef", NULL};
+
+
+static void
+testbunch (const struct reg_res *reg_reses, const char *const *const data)
+{
+#define BASEERR(data)                                      \
+  support_record_failure (),                               \
+    fprintf (stdout, __FILE__ ": %s: ", reg_reses->regex), \
+    fwrite (data + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
+
+  for (; reg_reses->regex; ++reg_reses)
+    {
+      regex_t rgx;
+      assert (!regcomp (&rgx, reg_reses->regex, 0));
+
+      for (const char *const *dt = data; *dt; ++dt)
+        {
+          regmatch_t match = bound;
+          if (regexec (&rgx, *dt, 1, &match, REG_STARTEND))
+            BASEERR(dt), fputs (": no match\n", stdout);
+
+          if (memcmp(&match, &reg_reses->result, sizeof (regmatch_t)))
+            BASEERR(dt), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+                                  (int)reg_reses->result.rm_so,
+                                  (int)reg_reses->result.rm_eo,
+                                  (int)match.rm_so, (int)match.rm_eo);
+        }
+
+      regfree(&rgx);
+    }
+}
+
+
+struct mb_data_exp {
+  const char *data;
+  bool exp;
+};
+static const struct mb_data_exp mb_data_exp[] = {
+  {"_aaćdef", false},
+  {"_aćdef", true},
+  {}
+};
+
+static void
+testmb (void)
+{
+  regex_t rgx;
+  const struct reg_res reg_reses[] = {{"ać"}};
+  assert (!regcomp (&rgx, reg_reses->regex, 0));
+
+  for (const struct mb_data_exp *de = mb_data_exp; de->data; ++de)
+    {
+      regmatch_t match = bound;
+      if (regexec (&rgx, de->data, 1, &match, REG_STARTEND) == de->exp)
+        BASEERR(de->data), fprintf (stdout, ": %s match\n",
+                                    de->exp ? "no" : "yes");
+
+      if (memcmp(&match, &bound, sizeof (regmatch_t)))
+        BASEERR(de->data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+                                    (int)bound.rm_so, (int)bound.rm_eo,
+                                    (int)match.rm_so, (int)match.rm_eo);
+    }
+
+  regfree(&rgx);
+}
+
+
+static int
+do_test (void)
+{
+  assert (setlocale (LC_ALL, "C.UTF-8"));
+
+  testbunch (reg_res_ac, data_ac);
+  testbunch (reg_res_aa, data_aa);
+  testmb ();
+  return 0;
+}
+
+
+#include <support/test-driver.c>
-- 
2.39.2

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v7 1/3] posix: regcomp(): clear RE_DOT_NOT_NULL
  2023-06-12  0:47       ` [PATCH v7 1/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
@ 2023-06-12 13:11         ` Carlos O'Donell
  0 siblings, 0 replies; 16+ messages in thread
From: Carlos O'Donell @ 2023-06-12 13:11 UTC (permalink / raw)
  To: наб, Adhemerval Zanella Netto; +Cc: libc-alpha

On 6/11/23 20:47, наб wrote:
> The POSIX API always stops at first NUL so there's no change for that.
> 
> The BSD REG_STARTEND API, with its explicit range, can include NULs
> within that range, and those NULs are matched with . and [^].
> 
> Heretofor, for a string of "a\0c", glibc would match "[^q]c", but not
> ".c". This is both inconsistent and nonconformant to BSD REG_STARTEND.
> 
> With this patch, they're identical like you'd expect, and the
>   tst-reg-startend.c: ..c: a^@c: no match$
> failure is removed.
> 
> Another approach would be to remove it from _RE_SYNTAX_POSIX_COMMON,
> but it's unclear to me what the custody chain is like for that and what
> other regex APIs glibc offers that could be affected by this.
> 
> Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>

These changes are being made to sources shared between gnulib and glibc.

As the files are listed in SHARED-SOURCES we cannot easily accept changes to them
since they should be shared with gnulib.

Would you be willing to disclaim these changes or assign copyright?

> ---
> No changes (rebased cleanly); full resend.
> 
>  posix/regcomp.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/posix/regcomp.c b/posix/regcomp.c
> index 12650714c0..a928ef6c2d 100644
> --- a/posix/regcomp.c
> +++ b/posix/regcomp.c
> @@ -462,7 +462,7 @@ regcomp (regex_t *__restrict preg, const char *__restrict pattern, int cflags)
>  {
>    reg_errcode_t ret;
>    reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
> -			 : RE_SYNTAX_POSIX_BASIC);
> +			 : RE_SYNTAX_POSIX_BASIC) & ~RE_DOT_NOT_NULL;
>  
>    preg->buffer = NULL;
>    preg->allocated = 0;

-- 
Cheers,
Carlos.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v7 2/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor
  2023-06-12  0:47       ` [PATCH v7 2/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor наб
@ 2023-06-12 13:11         ` Carlos O'Donell
  2023-06-12 14:03           ` наб
  0 siblings, 1 reply; 16+ messages in thread
From: Carlos O'Donell @ 2023-06-12 13:11 UTC (permalink / raw)
  To: наб, Adhemerval Zanella Netto, Paul Eggert; +Cc: libc-alpha

On 6/11/23 20:47, наб wrote:
> re_search_internal () starts with
>   /* If initial states with non-begbuf contexts have no elements,
>      the regex must be anchored.  If preg->newline_anchor is set,
>      we'll never use init_state_nl, so do not check it.  */
>   if (dfa->init_state->nodes.nelem == 0
>       && dfa->init_state_word->nodes.nelem == 0
>       && (dfa->init_state_nl->nodes.nelem == 0
> 	  || !preg->newline_anchor))
>     {
>       if (start != 0 && last_start != 0)
>         return REG_NOMATCH;
>       start = last_start = 0;
>     }
> and heretofor start and last_start (for example when "abc", {1, 2},
> so matching just the "b") were != 0, and the return was taken for a "^b"
> regex, which is erroneous.
> 
> Fix this by giving re_search_internal (string+rm_so, start=0),
> then fixing up the returned matches in an after-pass.
> 
> This brings us to compatibility with the BSD spec and implementations.
> 
> Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>

These changes are being made to sources shared between gnulib and glibc.

As the files are listed in SHARED-SOURCES we cannot easily accept changes to them
via DCO since they should be shared with gnulib which still requires copyright
assignment.

Would you be willing to disclaim these changes or assign copyright?

> ---
>  posix/regexec.c | 41 ++++++++++++++++++++++++++++-------------
>  1 file changed, 28 insertions(+), 13 deletions(-)
> 
> diff --git a/posix/regexec.c b/posix/regexec.c
> index bd0cd412d0..2ef868e1f6 100644
> --- a/posix/regexec.c
> +++ b/posix/regexec.c
> @@ -187,38 +187,53 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
>     string; if REG_NOTEOL is set, then $ does not match at the end.
>  
>     Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if
> -   EFLAGS is invalid.  */
> +   EFLAGS is invalid.
> +
> +   If REG_STARTEND, the bounds are
> +     [STRING + PMATCH->rm_so, STRING + PMATCH->rm_eo)
> +   instead of the usual
> +     [STRING, STRING + strlen(STRING)),
> +   but returned matches are still referenced to STRING,
> +   and matching is unaffected (i.e. "abc", {1, 2} matches regex "^b$").
> +   re_search_internal () has a built-in assumption of
> +   (start != 0) <=> (^ doesn't match), so give it a truncated view
> +   and fix up the matches afterward.  */
>  
>  int
>  regexec (const regex_t *__restrict preg, const char *__restrict string,
>  	 size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
>  {
>    reg_errcode_t err;
> -  Idx start, length;
> +  Idx startoff = 0, length;
>    re_dfa_t *dfa = preg->buffer;
> +  size_t i = 0;
>  
>    if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
>      return REG_BADPAT;
>  
>    if (eflags & REG_STARTEND)
>      {
> -      start = pmatch[0].rm_so;
> -      length = pmatch[0].rm_eo;
> +      startoff = pmatch[0].rm_so;
> +      string += startoff;
> +      length = pmatch[0].rm_eo - startoff;
>      }
>    else
> -    {
> -      start = 0;
> -      length = strlen (string);
> -    }
> +    length = strlen (string);
>  
>    lock_lock (dfa->lock);
>    if (preg->no_sub)
> -    err = re_search_internal (preg, string, length, start, length,
> -			      length, 0, NULL, eflags);
> -  else
> -    err = re_search_internal (preg, string, length, start, length,
> -			      length, nmatch, pmatch, eflags);
> +    nmatch = 0;
> +  err = re_search_internal (preg, string, length, 0, length,
> +			    length, nmatch, pmatch, eflags);
>    lock_unlock (dfa->lock);
> +
> +  if (err == REG_NOERROR && startoff)
> +    for (i = 0; i < nmatch; ++i)
> +      if (pmatch[i].rm_so != -1)
> +	{
> +	  pmatch[i].rm_so += startoff;
> +	  pmatch[i].rm_eo += startoff;
> +	}
>    return err != REG_NOERROR;
>  }
>  

-- 
Cheers,
Carlos.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v7 2/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor
  2023-06-12 13:11         ` Carlos O'Donell
@ 2023-06-12 14:03           ` наб
  0 siblings, 0 replies; 16+ messages in thread
From: наб @ 2023-06-12 14:03 UTC (permalink / raw)
  To: Carlos O'Donell; +Cc: Adhemerval Zanella Netto, Paul Eggert, libc-alpha

[-- Attachment #1: Type: text/plain, Size: 1598 bytes --]

On Mon, Jun 12, 2023 at 09:11:54AM -0400, Carlos O'Donell wrote:
> On 6/11/23 20:47, наб wrote:
> > re_search_internal () starts with
> >   /* If initial states with non-begbuf contexts have no elements,
> >      the regex must be anchored.  If preg->newline_anchor is set,
> >      we'll never use init_state_nl, so do not check it.  */
> >   if (dfa->init_state->nodes.nelem == 0
> >       && dfa->init_state_word->nodes.nelem == 0
> >       && (dfa->init_state_nl->nodes.nelem == 0
> > 	  || !preg->newline_anchor))
> >     {
> >       if (start != 0 && last_start != 0)
> >         return REG_NOMATCH;
> >       start = last_start = 0;
> >     }
> > and heretofor start and last_start (for example when "abc", {1, 2},
> > so matching just the "b") were != 0, and the return was taken for a "^b"
> > regex, which is erroneous.
> > 
> > Fix this by giving re_search_internal (string+rm_so, start=0),
> > then fixing up the returned matches in an after-pass.
> > 
> > This brings us to compatibility with the BSD spec and implementations.
> > 
> > Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
> 
> These changes are being made to sources shared between gnulib and glibc.
> 
> As the files are listed in SHARED-SOURCES we cannot easily accept changes to them
> via DCO since they should be shared with gnulib which still requires copyright
> assignment.
> 
> Would you be willing to disclaim these changes or assign copyright?

Quite happy to disclaim all patches here, yeah;
the process itself is unclear to me, however. 

Best,

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2023-06-12 14:03 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-07 22:56 [PATCH v4 1/3] posix: add (failing) test for REG_STARTEND наб
2023-05-07 22:56 ` [PATCH v4 2/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
2023-05-07 22:56 ` [PATCH v4 3/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor наб
2023-05-29 18:11   ` Adhemerval Zanella Netto
2023-05-29 13:22 ` [PATCH v5 1/3] posix: add (failing) test for REG_STARTEND наб
2023-05-29 13:22 ` [PATCH v5 2/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
2023-05-29 13:22 ` [PATCH v5 3/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor наб
2023-05-29 17:37 ` [PATCH v4 1/3] posix: add (failing) test for REG_STARTEND Adhemerval Zanella Netto
2023-05-29 20:10   ` наб
2023-05-29 20:23     ` Adhemerval Zanella Netto
2023-06-12  0:47       ` [PATCH v7 1/3] posix: regcomp(): clear RE_DOT_NOT_NULL наб
2023-06-12 13:11         ` Carlos O'Donell
2023-06-12  0:47       ` [PATCH v7 2/3] posix: regexec(): fix REG_STARTEND, pmatch->rm_so != 0 w/^ anchor наб
2023-06-12 13:11         ` Carlos O'Donell
2023-06-12 14:03           ` наб
2023-06-12  0:47       ` [PATCH v7 3/3] posix: add test for REG_STARTEND наб

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).