public inbox for newlib@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] strchr, strchrnul: implement strchr() as strchrnul()
@ 2023-12-05 14:58 James Tirta Halim
  2023-12-05 15:23 ` Corinna Vinschen
  0 siblings, 1 reply; 4+ messages in thread
From: James Tirta Halim @ 2023-12-05 14:58 UTC (permalink / raw)
  To: newlib; +Cc: James Tirta Halim

---
 newlib/libc/string/strchr.c    | 81 +---------------------------------
 newlib/libc/string/strchrnul.c | 77 +++++++++++++++++++++++++++++++-
 2 files changed, 77 insertions(+), 81 deletions(-)

diff --git a/newlib/libc/string/strchr.c b/newlib/libc/string/strchr.c
index 96f30be04..382275b1d 100644
--- a/newlib/libc/string/strchr.c
+++ b/newlib/libc/string/strchr.c
@@ -30,87 +30,10 @@ QUICKREF
 #include <string.h>
 #include <limits.h>
 
-/* Nonzero if X is not aligned on a "long" boundary.  */
-#define UNALIGNED(X) ((long)X & (sizeof (long) - 1))
-
-/* How many bytes are loaded each iteration of the word copy loop.  */
-#define LBLOCKSIZE (sizeof (long))
-
-#if LONG_MAX == 2147483647L
-#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
-#else
-#if LONG_MAX == 9223372036854775807L
-/* Nonzero if X (a long int) contains a NULL byte. */
-#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
-#else
-#error long int is not a 32bit or 64bit type.
-#endif
-#endif
-
-/* DETECTCHAR returns nonzero if (long)X contains the byte used
-   to fill (long)MASK. */
-#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK))
-
 char *
 strchr (const char *s1,
 	int i)
 {
-  const unsigned char *s = (const unsigned char *)s1;
-  unsigned char c = i;
-
-#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
-  unsigned long mask,j;
-  unsigned long *aligned_addr;
-
-  /* Special case for finding 0.  */
-  if (!c)
-    {
-      while (UNALIGNED (s))
-        {
-          if (!*s)
-            return (char *) s;
-          s++;
-        }
-      /* Operate a word at a time.  */
-      aligned_addr = (unsigned long *) s;
-      while (!DETECTNULL (*aligned_addr))
-        aligned_addr++;
-      /* Found the end of string.  */
-      s = (const unsigned char *) aligned_addr;
-      while (*s)
-        s++;
-      return (char *) s;
-    }
-
-  /* All other bytes.  Align the pointer, then search a long at a time.  */
-  while (UNALIGNED (s))
-    {
-      if (!*s)
-        return NULL;
-      if (*s == c)
-        return (char *) s;
-      s++;
-    }
-
-  mask = c;
-  for (j = 8; j < LBLOCKSIZE * 8; j <<= 1)
-    mask = (mask << j) | mask;
-
-  aligned_addr = (unsigned long *) s;
-  while (!DETECTNULL (*aligned_addr) && !DETECTCHAR (*aligned_addr, mask))
-    aligned_addr++;
-
-  /* The block of bytes currently pointed to by aligned_addr
-     contains either a null or the target char, or both.  We
-     catch it using the bytewise search.  */
-
-  s = (unsigned char *) aligned_addr;
-
-#endif /* not PREFER_SIZE_OVER_SPEED */
-
-  while (*s && *s != c)
-    s++;
-  if (*s == c)
-    return (char *)s;
-  return NULL;
+  s1 = strchrnul(s1, i);
+  return s1 && *s1 ? (char *) s1 : NULL;
 }
diff --git a/newlib/libc/string/strchrnul.c b/newlib/libc/string/strchrnul.c
index f5c3eb25d..69f66db63 100644
--- a/newlib/libc/string/strchrnul.c
+++ b/newlib/libc/string/strchrnul.c
@@ -29,12 +29,85 @@ QUICKREF
 */
 
 #include <string.h>
+#include <limits.h>
+
+/* Nonzero if X is not aligned on a "long" boundary.  */
+#define UNALIGNED(X) ((long)X & (sizeof (long) - 1))
+
+/* How many bytes are loaded each iteration of the word copy loop.  */
+#define LBLOCKSIZE (sizeof (long))
+
+#if LONG_MAX == 2147483647L
+#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
+#else
+#if LONG_MAX == 9223372036854775807L
+/* Nonzero if X (a long int) contains a NULL byte. */
+#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
+#else
+#error long int is not a 32bit or 64bit type.
+#endif
+#endif
+
+/* DETECTCHAR returns nonzero if (long)X contains the byte used
+   to fill (long)MASK. */
+#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK))
 
 char *
 strchrnul (const char *s1,
 	int i)
 {
-  char *s = strchr(s1, i);
+  const unsigned char *s = (const unsigned char *)s1;
+  unsigned char c = i;
+
+#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
+  unsigned long mask,j;
+  unsigned long *aligned_addr;
+
+  /* Special case for finding 0.  */
+  if (!c)
+    {
+      while (UNALIGNED (s))
+        {
+          if (!*s)
+            return (char *) s;
+          s++;
+        }
+      /* Operate a word at a time.  */
+      aligned_addr = (unsigned long *) s;
+      while (!DETECTNULL (*aligned_addr))
+        aligned_addr++;
+      /* Found the end of string.  */
+      s = (const unsigned char *) aligned_addr;
+      while (*s)
+        s++;
+      return (char *) s;
+    }
+
+  /* All other bytes.  Align the pointer, then search a long at a time.  */
+  while (UNALIGNED (s))
+    {
+      if (!*s || *s == c)
+        return (char *) s;
+      s++;
+    }
+
+  mask = c;
+  for (j = 8; j < LBLOCKSIZE * 8; j <<= 1)
+    mask = (mask << j) | mask;
+
+  aligned_addr = (unsigned long *) s;
+  while (!DETECTNULL (*aligned_addr) && !DETECTCHAR (*aligned_addr, mask))
+    aligned_addr++;
+
+  /* The block of bytes currently pointed to by aligned_addr
+     contains either a null or the target char, or both.  We
+     catch it using the bytewise search.  */
+
+  s = (unsigned char *) aligned_addr;
+
+#endif /* not PREFER_SIZE_OVER_SPEED */
 
-  return s ? s : (char *)s1 + strlen(s1);
+  while (*s && *s != c)
+    s++;
+  return (char *) s;
 }
-- 
2.43.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] strchr, strchrnul: implement strchr() as strchrnul()
  2023-12-05 14:58 [PATCH] strchr, strchrnul: implement strchr() as strchrnul() James Tirta Halim
@ 2023-12-05 15:23 ` Corinna Vinschen
  2023-12-05 15:52   ` James
  0 siblings, 1 reply; 4+ messages in thread
From: Corinna Vinschen @ 2023-12-05 15:23 UTC (permalink / raw)
  To: James Tirta Halim; +Cc: newlib

Why?

Please add a descriptive commit message with the reasoning and the
desired result for this change.


Thanks,
Corinna

On Dec  5 21:58, James Tirta Halim wrote:
> ---
>  newlib/libc/string/strchr.c    | 81 +---------------------------------
>  newlib/libc/string/strchrnul.c | 77 +++++++++++++++++++++++++++++++-
>  2 files changed, 77 insertions(+), 81 deletions(-)
> 
> diff --git a/newlib/libc/string/strchr.c b/newlib/libc/string/strchr.c
> index 96f30be04..382275b1d 100644
> --- a/newlib/libc/string/strchr.c
> +++ b/newlib/libc/string/strchr.c
> @@ -30,87 +30,10 @@ QUICKREF
>  #include <string.h>
>  #include <limits.h>
>  
> -/* Nonzero if X is not aligned on a "long" boundary.  */
> -#define UNALIGNED(X) ((long)X & (sizeof (long) - 1))
> -
> -/* How many bytes are loaded each iteration of the word copy loop.  */
> -#define LBLOCKSIZE (sizeof (long))
> -
> -#if LONG_MAX == 2147483647L
> -#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
> -#else
> -#if LONG_MAX == 9223372036854775807L
> -/* Nonzero if X (a long int) contains a NULL byte. */
> -#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
> -#else
> -#error long int is not a 32bit or 64bit type.
> -#endif
> -#endif
> -
> -/* DETECTCHAR returns nonzero if (long)X contains the byte used
> -   to fill (long)MASK. */
> -#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK))
> -
>  char *
>  strchr (const char *s1,
>  	int i)
>  {
> -  const unsigned char *s = (const unsigned char *)s1;
> -  unsigned char c = i;
> -
> -#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
> -  unsigned long mask,j;
> -  unsigned long *aligned_addr;
> -
> -  /* Special case for finding 0.  */
> -  if (!c)
> -    {
> -      while (UNALIGNED (s))
> -        {
> -          if (!*s)
> -            return (char *) s;
> -          s++;
> -        }
> -      /* Operate a word at a time.  */
> -      aligned_addr = (unsigned long *) s;
> -      while (!DETECTNULL (*aligned_addr))
> -        aligned_addr++;
> -      /* Found the end of string.  */
> -      s = (const unsigned char *) aligned_addr;
> -      while (*s)
> -        s++;
> -      return (char *) s;
> -    }
> -
> -  /* All other bytes.  Align the pointer, then search a long at a time.  */
> -  while (UNALIGNED (s))
> -    {
> -      if (!*s)
> -        return NULL;
> -      if (*s == c)
> -        return (char *) s;
> -      s++;
> -    }
> -
> -  mask = c;
> -  for (j = 8; j < LBLOCKSIZE * 8; j <<= 1)
> -    mask = (mask << j) | mask;
> -
> -  aligned_addr = (unsigned long *) s;
> -  while (!DETECTNULL (*aligned_addr) && !DETECTCHAR (*aligned_addr, mask))
> -    aligned_addr++;
> -
> -  /* The block of bytes currently pointed to by aligned_addr
> -     contains either a null or the target char, or both.  We
> -     catch it using the bytewise search.  */
> -
> -  s = (unsigned char *) aligned_addr;
> -
> -#endif /* not PREFER_SIZE_OVER_SPEED */
> -
> -  while (*s && *s != c)
> -    s++;
> -  if (*s == c)
> -    return (char *)s;
> -  return NULL;
> +  s1 = strchrnul(s1, i);
> +  return s1 && *s1 ? (char *) s1 : NULL;
>  }
> diff --git a/newlib/libc/string/strchrnul.c b/newlib/libc/string/strchrnul.c
> index f5c3eb25d..69f66db63 100644
> --- a/newlib/libc/string/strchrnul.c
> +++ b/newlib/libc/string/strchrnul.c
> @@ -29,12 +29,85 @@ QUICKREF
>  */
>  
>  #include <string.h>
> +#include <limits.h>
> +
> +/* Nonzero if X is not aligned on a "long" boundary.  */
> +#define UNALIGNED(X) ((long)X & (sizeof (long) - 1))
> +
> +/* How many bytes are loaded each iteration of the word copy loop.  */
> +#define LBLOCKSIZE (sizeof (long))
> +
> +#if LONG_MAX == 2147483647L
> +#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
> +#else
> +#if LONG_MAX == 9223372036854775807L
> +/* Nonzero if X (a long int) contains a NULL byte. */
> +#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
> +#else
> +#error long int is not a 32bit or 64bit type.
> +#endif
> +#endif
> +
> +/* DETECTCHAR returns nonzero if (long)X contains the byte used
> +   to fill (long)MASK. */
> +#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK))
>  
>  char *
>  strchrnul (const char *s1,
>  	int i)
>  {
> -  char *s = strchr(s1, i);
> +  const unsigned char *s = (const unsigned char *)s1;
> +  unsigned char c = i;
> +
> +#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
> +  unsigned long mask,j;
> +  unsigned long *aligned_addr;
> +
> +  /* Special case for finding 0.  */
> +  if (!c)
> +    {
> +      while (UNALIGNED (s))
> +        {
> +          if (!*s)
> +            return (char *) s;
> +          s++;
> +        }
> +      /* Operate a word at a time.  */
> +      aligned_addr = (unsigned long *) s;
> +      while (!DETECTNULL (*aligned_addr))
> +        aligned_addr++;
> +      /* Found the end of string.  */
> +      s = (const unsigned char *) aligned_addr;
> +      while (*s)
> +        s++;
> +      return (char *) s;
> +    }
> +
> +  /* All other bytes.  Align the pointer, then search a long at a time.  */
> +  while (UNALIGNED (s))
> +    {
> +      if (!*s || *s == c)
> +        return (char *) s;
> +      s++;
> +    }
> +
> +  mask = c;
> +  for (j = 8; j < LBLOCKSIZE * 8; j <<= 1)
> +    mask = (mask << j) | mask;
> +
> +  aligned_addr = (unsigned long *) s;
> +  while (!DETECTNULL (*aligned_addr) && !DETECTCHAR (*aligned_addr, mask))
> +    aligned_addr++;
> +
> +  /* The block of bytes currently pointed to by aligned_addr
> +     contains either a null or the target char, or both.  We
> +     catch it using the bytewise search.  */
> +
> +  s = (unsigned char *) aligned_addr;
> +
> +#endif /* not PREFER_SIZE_OVER_SPEED */
>  
> -  return s ? s : (char *)s1 + strlen(s1);
> +  while (*s && *s != c)
> +    s++;
> +  return (char *) s;
>  }
> -- 
> 2.43.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] strchr, strchrnul: implement strchr() as strchrnul()
  2023-12-05 15:23 ` Corinna Vinschen
@ 2023-12-05 15:52   ` James
  2023-12-05 16:39     ` Richard Earnshaw
  0 siblings, 1 reply; 4+ messages in thread
From: James @ 2023-12-05 15:52 UTC (permalink / raw)
  To: newlib, James Tirta Halim; +Cc: vinschen

[-- Attachment #1: Type: text/plain, Size: 943 bytes --]

Currently, strchrnul() is implemented as strchr() and falls back to a
strlen() to find the null-terminator if the character is not
found, scanning the string twice. However, strchr() is going to scan the
whole string anyway and discard the pointer to the null-terminator if the
character is not found, returning NULL.

Instead, we can just implement strchr() with discarding strchrnul()'s
pointer to null-terminator returning NULL and by that avoid calling
strlen() in strchrnul() if a character is not found.

I made a typo in the strchr(), it should be:
  return s1 && *s1 ? (char *) s1 : NULL;
which should be:
  return *s1 ? (char *) s1 : NULL;
since strchrnul will never return NULL.

We can avoid the strchrnul() function call in strchr() by implementing it
in a separate header like str-two-way.h and including them in both files.
The same could be done with the strlen() part in strchr() since the
implementations look to be the same.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] strchr, strchrnul: implement strchr() as strchrnul()
  2023-12-05 15:52   ` James
@ 2023-12-05 16:39     ` Richard Earnshaw
  0 siblings, 0 replies; 4+ messages in thread
From: Richard Earnshaw @ 2023-12-05 16:39 UTC (permalink / raw)
  To: James, newlib; +Cc: vinschen



On 05/12/2023 15:52, James wrote:
> Currently, strchrnul() is implemented as strchr() and falls back to a 
> strlen() to find the null-terminator if the character is not 
> found, scanning the string twice. However, strchr() is going to scan the 
> whole string anyway and discard the pointer to the null-terminator if 
> the character is not found, returning NULL.
> 
> Instead, we can just implement strchr() with discarding strchrnul()'s 
> pointer to null-terminator returning NULL and by that avoid calling 
> strlen() in strchrnul() if a character is not found.
> 
> I made a typo in the strchr(), it should be:
>    return s1 && *s1 ? (char *) s1 : NULL;
> which should be:
>    return *s1 ? (char *) s1 : NULL;
> since strchrnul will never return NULL.
> 
> We can avoid the strchrnul() function call in strchr() by implementing 
> it in a separate header like str-two-way.h and including them in both 
> files. The same could be done with the strlen() part in strchr() since 
> the implementations look to be the same.

strchrnul is not standard, it's a GNU extension.  So technically, it 
should be possible to redefine strchrnul to something else and strchr 
should still work as expected.

The other issue is that many ports implement strchr in assembly for 
performance and rely on strchrnul calling that to avoid having too many 
functions implemented in assembly.

So I'm not convinced this is a good idea.

R.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-12-05 16:39 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-05 14:58 [PATCH] strchr, strchrnul: implement strchr() as strchrnul() James Tirta Halim
2023-12-05 15:23 ` Corinna Vinschen
2023-12-05 15:52   ` James
2023-12-05 16:39     ` Richard Earnshaw

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).