public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 0/1] Adding mempbrk() function to glibc
@ 2022-06-13  2:57 brandonfoongyankai
  2022-06-13  2:57 ` [PATCH 1/1] string: Add basic implementation of mempbrk brandonfoongyankai
  2022-06-13 13:29 ` [PATCH 0/1] Adding mempbrk() function to glibc Florian Weimer
  0 siblings, 2 replies; 4+ messages in thread
From: brandonfoongyankai @ 2022-06-13  2:57 UTC (permalink / raw)
  To: libc-alpha; +Cc: Brandon Foong

From: Brandon Foong <brandonfoongyankai@gmail.com>

Hi,

This follows up on a previous thread regarding the addition of mempbrk (https://sourceware.org/pipermail/libc-alpha/2022-January/135013.html). I have added a simple implementation of mempbrk, as well as its corresponding tests. 

Brandon Foong (1):
  string: Add basic implementation of mempbrk

 include/string.h      |   1 +
 string/Makefile       |   2 +
 string/Versions       |   1 +
 string/mempbrk.c      |  56 ++++++++++
 string/string.h       |  15 +++
 string/test-mempbrk.c | 242 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 317 insertions(+)
 create mode 100644 string/mempbrk.c
 create mode 100644 string/test-mempbrk.c

-- 
2.17.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/1] string: Add basic implementation of mempbrk
  2022-06-13  2:57 [PATCH 0/1] Adding mempbrk() function to glibc brandonfoongyankai
@ 2022-06-13  2:57 ` brandonfoongyankai
  2022-06-13 13:29 ` [PATCH 0/1] Adding mempbrk() function to glibc Florian Weimer
  1 sibling, 0 replies; 4+ messages in thread
From: brandonfoongyankai @ 2022-06-13  2:57 UTC (permalink / raw)
  To: libc-alpha; +Cc: Brandon Foong

From: Brandon Foong <brandonfoongyankai@gmail.com>

---
 include/string.h      |   1 +
 string/Makefile       |   2 +
 string/Versions       |   1 +
 string/mempbrk.c      |  56 ++++++++++
 string/string.h       |  15 +++
 string/test-mempbrk.c | 242 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 317 insertions(+)
 create mode 100644 string/mempbrk.c
 create mode 100644 string/test-mempbrk.c

diff --git a/include/string.h b/include/string.h
index 21f641a413..ff84288d65 100644
--- a/include/string.h
+++ b/include/string.h
@@ -151,6 +151,7 @@ libc_hidden_builtin_proto (strrchr)
 libc_hidden_builtin_proto (strspn)
 libc_hidden_builtin_proto (strstr)
 libc_hidden_builtin_proto (ffs)
+libc_hidden_builtin_proto (mempbrk)
 
 #if IS_IN (rtld)
 extern __typeof (__stpcpy) __stpcpy attribute_hidden;
diff --git a/string/Makefile b/string/Makefile
index 641e062bbb..3d8819d9f0 100644
--- a/string/Makefile
+++ b/string/Makefile
@@ -114,6 +114,7 @@ routines := \
   swab \
   wordcopy \
   xpg-strerror \
+  mempbrk \
 # routines
 
 tests := \
@@ -183,6 +184,7 @@ tests := \
   tst-svc \
   tst-svc2 \
   tst-xbzero-opt \
+  test-mempbrk \
 # tests
 
 # Both tests require the .mo translation files generated by msgfmt.
diff --git a/string/Versions b/string/Versions
index 864c4cf7a4..1f071a1da5 100644
--- a/string/Versions
+++ b/string/Versions
@@ -91,5 +91,6 @@ libc {
   }
   GLIBC_2.35 {
     __memcmpeq;
+    mempbrk;
   }
 }
diff --git a/string/mempbrk.c b/string/mempbrk.c
new file mode 100644
index 0000000000..575409b1a2
--- /dev/null
+++ b/string/mempbrk.c
@@ -0,0 +1,56 @@
+/* Copyright (C) 1991-2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+#include <stdint.h>
+
+#undef mempbrk
+
+#ifndef MEMPBRK
+# define MEMPBRK mempbrk
+#endif
+
+/* Find the first occurrence of any character in ACCEPT
+   in the first N bytes of S. */
+void *
+MEMPBRK (const void *s, const char *accept, size_t n)
+{
+  const unsigned char *t;
+
+  if (__glibc_unlikely (accept[0] == '\0'))
+    return NULL;
+
+  if (__glibc_unlikely (accept[1] == '\0'))
+    return memchr (s, accept[0], n);
+
+  /* Use multiple small memsets to enable inlining on most targets.  */
+  unsigned char table[256];
+  unsigned char *p = memset (table, 0, 64);
+  memset (p + 64, 0, 64);
+  memset (p + 128, 0, 64);
+  memset (p + 192, 0, 64);
+
+  for (t = (unsigned char*) accept; *t; ++t)
+    p[*t] = 1;
+
+  for (t = (unsigned char*) s; n > 0; --n, ++t)
+    if (p[*t])
+      return (void *) t;
+
+  return NULL;
+}
+libc_hidden_builtin_def (mempbrk)
diff --git a/string/string.h b/string/string.h
index 54dd8344de..24badfef50 100644
--- a/string/string.h
+++ b/string/string.h
@@ -136,6 +136,21 @@ extern void *memrchr (const void *__s, int __c, size_t __n)
 # endif
 #endif
 
+/* Find the first occurrence of any character in ACCEPT
+   in the first N bytes of S. */
+#ifdef __CORRECT_ISO_CPP_STRING_H_PROTO
+extern "C++"
+{
+extern void *mempbrk (void *__s, const char *__accept, size_t __n)
+     __THROW __asm ("mempbrk") __attribute_pure__ __nonnull ((1, 2));
+extern const void *mempbrk (const void *__s, const char *__accept, size_t __n)
+     __THROW __asm ("mempbrk") __attribute_pure__ __nonnull ((1, 2));
+}
+#else
+extern void *mempbrk (const void *__s, const char *__accept, size_t __n)
+     __THROW __attribute_pure__ __nonnull ((1, 2));
+#endif
+
 
 /* Copy SRC to DEST.  */
 extern char *strcpy (char *__restrict __dest, const char *__restrict __src)
diff --git a/string/test-mempbrk.c b/string/test-mempbrk.c
new file mode 100644
index 0000000000..8d50039952
--- /dev/null
+++ b/string/test-mempbrk.c
@@ -0,0 +1,242 @@
+/* Test and measure strpbrk functions.
+   Copyright (C) 1999-2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef WIDE
+# define CHAR char
+# define UCHAR unsigned char
+# define STRLEN strlen
+# define STRCHR strchr
+# define BIG_CHAR CHAR_MAX
+# define SMALL_CHAR 127
+#else
+# include <wchar.h>
+# define CHAR wchar_t
+# define UCHAR wchar_t
+# define STRLEN wcslen
+# define STRCHR wcschr
+# define BIG_CHAR WCHAR_MAX
+# define SMALL_CHAR 1273
+#endif /* WIDE */
+
+#ifndef MEMPBRK_RESULT
+# define TEST_MAIN
+# ifndef WIDE
+#  define TEST_NAME "mempbrk"
+# else
+#  define TEST_NAME "wmempbrk"
+# endif /* WIDE */
+# include "test-string.h"
+
+# ifndef WIDE
+#  define MEMPBRK mempbrk
+#  define SIMPLE_MEMPBRK simple_mempbrk
+# else
+#  include <wchar.h>
+#  define MEMPBRK wmempbrk
+#  define SIMPLE_MEMPBRK simple_wmempbrk
+# endif /* WIDE */
+
+typedef void *(*proto_t) (const void *, const CHAR *, size_t);
+
+IMPL (MEMPBRK, 1)
+
+/* Naive implementation to verify results.  */
+
+CHAR *
+SIMPLE_MEMPBRK (const void *s, const CHAR *rej, size_t n)
+{
+  const CHAR *r;
+  CHAR c;
+
+  while (c = *(char *) s++, n--)
+    for (r = rej; *r != '\0'; ++r)
+      if (*r == c)
+       return (void *) s - 1;
+  return NULL;
+}
+
+#endif /* !MEMPBRK_RESULT */
+
+static void
+do_one_test (impl_t *impl, const void *s, const CHAR *acc, size_t n, void * exp_res)
+{
+  void * res = CALL (impl, s, acc, n);
+  if (res != exp_res)
+    {
+      error (0, 0, "Wrong result in function %s %p %p", impl->name,
+	      res, exp_res);
+      ret = 1;
+      return;
+    }
+}
+
+static void
+do_test (size_t align, size_t pos, size_t len)
+{
+  size_t i;
+  int c, byte_exists;
+  void * result;
+  CHAR *rej, *s;
+
+  align &= 7;
+  if ((align + pos + 10) * sizeof (CHAR) >= page_size || len > 240)
+    return;
+
+  rej = (CHAR *) (buf2) + (random () & 255);
+  s = (CHAR *) (buf1) + align;
+
+  for (i = 0; i < len; ++i)
+    {
+      rej[i] = random () & BIG_CHAR;
+      if (!rej[i])
+	rej[i] = random () & BIG_CHAR;
+      if (!rej[i])
+	rej[i] = 1 + (random () & SMALL_CHAR);
+    }
+  rej[len] = '\0';
+  for (c = 1; c <= BIG_CHAR; ++c)
+    if (STRCHR (rej, c) == NULL)
+      break;
+
+  for (i = 0; i < pos + 10; ++i)
+    {
+      s[i] = random () & BIG_CHAR;
+      if (STRCHR (rej, s[i]))
+	{
+	  s[i] = random () & BIG_CHAR;
+	  if (STRCHR (rej, s[i]))
+	    s[i] = c;
+	}
+    }
+  byte_exists = (random() & 1) && len;
+  s[pos] = byte_exists ? rej[random () % len] : s[pos];
+  result = byte_exists ? s + pos : NULL;
+
+  FOR_EACH_IMPL (impl, 0)
+    do_one_test (impl, s, rej, pos + 10, result);
+}
+
+static void
+do_random_tests (void)
+{
+  size_t i, j, n, align, pos, len, rlen;
+  void * result;
+  int c, byte_exists;
+  UCHAR *p = (UCHAR *) (buf1 + page_size) - 512;
+  UCHAR *rej;
+
+  for (n = 0; n < ITERATIONS; n++)
+    {
+      align = random () & 15;
+      pos = random () & 511;
+      if (pos + align >= 511)
+	pos = 510 - align - (random () & 7);
+      len = random () & 511;
+      if (pos >= len && (random () & 1))
+	len = pos + 1 + (random () & 7);
+      if (len + align >= 512)
+	len = 511 - align - (random () & 7);
+      if (random () & 1)
+	rlen = random () & 63;
+      else
+	rlen = random () & 15;
+      rej = (UCHAR *) (buf2 + page_size) - rlen - 1 - (random () & 7);
+      for (i = 0; i < rlen; ++i)
+	{
+	  rej[i] = random () & BIG_CHAR;
+	  if (!rej[i])
+	    rej[i] = random () & BIG_CHAR;
+	  if (!rej[i])
+	    rej[i] = 1 + (random () & SMALL_CHAR);
+	}
+      rej[i] = '\0';
+      for (c = 1; c <= BIG_CHAR; ++c)
+	if (STRCHR ((CHAR *) rej, c) == NULL)
+	  break;
+      j = (pos > len ? pos : len) + align + 64;
+      if (j > 512)
+	j = 512;
+
+      byte_exists = (random() & 1) && rlen;
+      for (i = 0; i < j; i++)
+	{
+	  if (i == pos + align)
+	    p[i] = byte_exists ? rej[random () % rlen] : '\0';
+	  else if (i < align || i > len + align)
+	    p[i] = random () & BIG_CHAR;
+	  else
+	    {
+	      p[i] = random () & BIG_CHAR;
+	      if (STRCHR ((CHAR *) rej, p[i]))
+		{
+		  p[i] = random () & BIG_CHAR;
+		  if (STRCHR ((CHAR *) rej, p[i]))
+		    p[i] = c;
+		}
+	    }
+	}
+
+      result = pos < len && byte_exists ? (CHAR *) p + align + pos : NULL;
+
+      FOR_EACH_IMPL (impl, 1)
+	if (CALL (impl, (CHAR *) p + align, (CHAR *) rej, len) != result)
+	  {
+	    error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %p, %zd, %zd, %zd) %p != %p",
+		   n, impl->name, align, rej, rlen, pos, len,
+		   (void *) CALL (impl, (CHAR *) (p + align), (CHAR *) rej, len),
+		   (void *) result);
+	    ret = 1;
+	  }
+    }
+}
+
+int
+test_main (void)
+{
+  size_t i;
+
+  test_init ();
+
+  printf ("%32s", "");
+  FOR_EACH_IMPL (impl, 0)
+    printf ("\t%s", impl->name);
+  putchar ('\n');
+
+  for (i = 0; i < 32; ++i)
+    {
+      do_test (0, 512, i);
+      do_test (i, 512, i);
+    }
+
+  for (i = 1; i < 8; ++i)
+    {
+      do_test (0, 16 << i, 4);
+      do_test (i, 16 << i, 4);
+    }
+
+  for (i = 1; i < 8; ++i)
+    do_test (i, 64, 10);
+
+  for (i = 0; i < 64; ++i)
+    do_test (0, i, 6);
+
+  do_random_tests ();
+  return ret;
+}
+
+#include <support/test-driver.c>
-- 
2.17.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 0/1] Adding mempbrk() function to glibc
  2022-06-13  2:57 [PATCH 0/1] Adding mempbrk() function to glibc brandonfoongyankai
  2022-06-13  2:57 ` [PATCH 1/1] string: Add basic implementation of mempbrk brandonfoongyankai
@ 2022-06-13 13:29 ` Florian Weimer
  2022-06-15 22:23   ` Jeff Law
  1 sibling, 1 reply; 4+ messages in thread
From: Florian Weimer @ 2022-06-13 13:29 UTC (permalink / raw)
  To: brandonfoongyankai--- via Libc-alpha; +Cc: brandonfoongyankai

* brandonfoongyankai:

> From: Brandon Foong <brandonfoongyankai@gmail.com>
>
> Hi,
>
> This follows up on a previous thread regarding the addition of mempbrk (https://sourceware.org/pipermail/libc-alpha/2022-January/135013.html). I have added a simple implementation of mempbrk, as well as its corresponding tests. 

Andrew Pinski's advice is that this should be fixed in the compiler or
libstdc++:

| Really the issue is:
|   _8 = __builtin_memchr ("eE", _7, 2);
|   if (_8 != 0B)
| 
| 
| Should just be expanded to _7 == 'e' || _7 == 'E' .
| 
| That is:
| int f(char a)
| {
|    return  __builtin_memchr ("e", a, 1) != 0;
| }
| int f1(char a)
| {
|    return a == 'e';
| }
| int g(char a)
| {
|    return  __builtin_memchr ("eE", a, 2) != 0;
| }
| int g1(char a)
| {
|    return a == 'e' || a == 'E';
| }
| 
| f and f1 should produce the same assembly
| and g and g1 should produce the same (similar) assembly.

<https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103798>

The challenge with mempbrk (and similar functions in general) is that
the setup cost is so high.  The interface is just isn't very good.  It
works for the programmer, but the compiler-generated code and the
library implementing it should likely use a different interface.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 0/1] Adding mempbrk() function to glibc
  2022-06-13 13:29 ` [PATCH 0/1] Adding mempbrk() function to glibc Florian Weimer
@ 2022-06-15 22:23   ` Jeff Law
  0 siblings, 0 replies; 4+ messages in thread
From: Jeff Law @ 2022-06-15 22:23 UTC (permalink / raw)
  To: libc-alpha



On 6/13/2022 7:29 AM, Florian Weimer via Libc-alpha wrote:
> * brandonfoongyankai:
>
>> From: Brandon Foong <brandonfoongyankai@gmail.com>
>>
>> Hi,
>>
>> This follows up on a previous thread regarding the addition of mempbrk (https://sourceware.org/pipermail/libc-alpha/2022-January/135013.html). I have added a simple implementation of mempbrk, as well as its corresponding tests.
> Andrew Pinski's advice is that this should be fixed in the compiler or
> libstdc++:
>
> | Really the issue is:
> |   _8 = __builtin_memchr ("eE", _7, 2);
> |   if (_8 != 0B)
> |
> |
> | Should just be expanded to _7 == 'e' || _7 == 'E' .
> |
> | That is:
> | int f(char a)
> | {
> |    return  __builtin_memchr ("e", a, 1) != 0;
> | }
> | int f1(char a)
> | {
> |    return a == 'e';
> | }
> | int g(char a)
> | {
> |    return  __builtin_memchr ("eE", a, 2) != 0;
> | }
> | int g1(char a)
> | {
> |    return a == 'e' || a == 'E';
> | }
> |
> | f and f1 should produce the same assembly
> | and g and g1 should produce the same (similar) assembly.
>
> <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103798>
Yea, I'd tend to agree with Andrew on this.  Paying the call overhead 
for something this trivial is crazy.

jeff


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-06-15 22:23 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-13  2:57 [PATCH 0/1] Adding mempbrk() function to glibc brandonfoongyankai
2022-06-13  2:57 ` [PATCH 1/1] string: Add basic implementation of mempbrk brandonfoongyankai
2022-06-13 13:29 ` [PATCH 0/1] Adding mempbrk() function to glibc Florian Weimer
2022-06-15 22:23   ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).