public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v3] Improve strtok(_r) performance
@ 2016-12-13 13:20 Wilco Dijkstra
  2016-12-14 14:07 ` Adhemerval Zanella
  2016-12-14 17:43 ` Gabriel F. T. Gomes
  0 siblings, 2 replies; 7+ messages in thread
From: Wilco Dijkstra @ 2016-12-13 13:20 UTC (permalink / raw)
  To: libc-alpha; +Cc: nd

Version 3 fixes a minor style issue in bench-strtok.c.

Improve strtok and strtok_r performance.  Instead of calling strpbrk which
calls strcspn, call strcspn directly so we get the end of the token without
an extra call to rawmemchr.  Also avoid an unnecessary call to strcspn after
the last token by adding an early exit for an empty string.  Change strtok
to tailcall strtok_r to avoid unnecessary code duplication.

Remove the special header optimization for strtok_r of a 1-character
constant string - both strspn and strcspn contain optimizations for this
case.  Benchmarking this showed similar performance in the worst case,
but up to 5.5x better performance in the "found" case for large inputs.

Passes regression tests, OK for commit?

ChangeLog:
2015-12-13  Wilco Dijkstra  <wdijkstr@arm.com>

	* benchtests/bench-strtok.c (oldstrtok): Add old implementation.
	* string/strtok.c (strtok): Change to tailcall __strtok_r.
	* string/strtok_r.c (__strtok_r): Optimize for performance.
	* string/string-inlines.c (__old_strtok_r_1c): New function.
	* string/bits/string2.h (__strtok_r): Move to string-inlines.c
--
diff --git a/benchtests/bench-strtok.c b/benchtests/bench-strtok.c
index eeb798f01575c712b958ad1b7d5f88f91e158202..41e0e45db818428a3f3a940fa1a70ca3850e84b3 100644
--- a/benchtests/bench-strtok.c
+++ b/benchtests/bench-strtok.c
@@ -20,13 +20,41 @@
 #define TEST_NAME "strtok"
 #include "bench-string.h"
 
-#define STRTOK strtok_string
-#include <string/strtok.c>
+char *
+oldstrtok (char *s, const char *delim)
+{
+  static char *olds;
+  char *token;
+
+  if (s == NULL)
+    s = olds;
+
+  /* Scan leading delimiters.  */
+  s += strspn (s, delim);
+  if (*s == '\0')
+    {
+      olds = s;
+      return NULL;
+    }
 
+  /* Find the end of the token.  */
+  token = s;
+  s = strpbrk (token, delim);
+  if (s == NULL)
+    /* This token finishes the string.  */
+    olds = __rawmemchr (token, '\0');
+  else
+    {
+      /* Terminate the token and make OLDS point past it.  */
+      *s = '\0';
+      olds = s + 1;
+    }
+  return token;
+}
 
 typedef char *(*proto_t) (const char *, const char *);
 
-IMPL (strtok_string, 0)
+IMPL (oldstrtok, 0)
 IMPL (strtok, 1)
 
 static void
diff --git a/string/bits/string2.h b/string/bits/string2.h
index ca1eda9bd127aadb01ab5c929d16621357ddc6e6..e39d4f1a85c25a4f47418e6a5613b27177ca6cbb 100644
--- a/string/bits/string2.h
+++ b/string/bits/string2.h
@@ -180,45 +180,6 @@ extern void *__rawmemchr (const void *__s, int __c);
 #endif
 
 
-#if !defined _HAVE_STRING_ARCH_strtok_r || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strtok_r
-#  define __strtok_r(s, sep, nextp) \
-  (__extension__ (__builtin_constant_p (sep) && __string2_1bptr_p (sep)	      \
-		  && ((const char *) (sep))[0] != '\0'			      \
-		  && ((const char *) (sep))[1] == '\0'			      \
-		  ? __strtok_r_1c (s, ((const char *) (sep))[0], nextp)       \
-		  : __strtok_r (s, sep, nextp)))
-# endif
-
-__STRING_INLINE char *__strtok_r_1c (char *__s, char __sep, char **__nextp);
-__STRING_INLINE char *
-__strtok_r_1c (char *__s, char __sep, char **__nextp)
-{
-  char *__result;
-  if (__s == NULL)
-    __s = *__nextp;
-  while (*__s == __sep)
-    ++__s;
-  __result = NULL;
-  if (*__s != '\0')
-    {
-      __result = __s++;
-      while (*__s != '\0')
-	if (*__s++ == __sep)
-	  {
-	    __s[-1] = '\0';
-	    break;
-	  }
-    }
-  *__nextp = __s;
-  return __result;
-}
-# ifdef __USE_POSIX
-#  define strtok_r(s, sep, nextp) __strtok_r (s, sep, nextp)
-# endif
-#endif
-
-
 #if !defined _HAVE_STRING_ARCH_strsep || defined _FORCE_INLINES
 # ifndef _HAVE_STRING_ARCH_strsep
 
diff --git a/string/string-inlines.c b/string/string-inlines.c
index 1091468519e1561ac2a4e9c3ed6eb75ee9fdf43f..d43e5897c37430e5f97940469d65e7ddbdcbd09c 100644
--- a/string/string-inlines.c
+++ b/string/string-inlines.c
@@ -35,6 +35,36 @@
 
 #include "shlib-compat.h"
 
+#if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_25)
+/* The inline functions are not used from GLIBC 2.25 and forward, however
+   they are required to provide the symbols through string-inlines.c
+   (if inlining is not possible for compatibility reasons).  */
+
+char *
+__old_strtok_r_1c (char *__s, char __sep, char **__nextp)
+{
+  char *__result;
+  if (__s == NULL)
+    __s = *__nextp;
+  while (*__s == __sep)
+    ++__s;
+  __result = NULL;
+  if (*__s != '\0')
+    {
+      __result = __s++;
+      while (*__s != '\0')
+	if (*__s++ == __sep)
+	  {
+	    __s[-1] = '\0';
+	    break;
+	  }
+    }
+  *__nextp = __s;
+  return __result;
+}
+compat_symbol (libc, __old_strtok_r_1c, __strtok_r_1c, GLIBC_2_1_1);
+#endif
+
 #if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_24)
 /* The inline functions are not used from GLIBC 2.24 and forward, however
    they are required to provide the symbols through string-inlines.c
diff --git a/string/strtok.c b/string/strtok.c
index 7a4574db5c80501e47d045ad4347e8a287b32191..482cdc1da45a71173080b6eff3857e863b5977ea 100644
--- a/string/strtok.c
+++ b/string/strtok.c
@@ -18,14 +18,6 @@
 #include <string.h>
 
 
-static char *olds;
-
-#undef strtok
-
-#ifndef STRTOK
-# define STRTOK strtok
-#endif
-
 /* Parse S into tokens separated by characters in DELIM.
    If S is NULL, the last string strtok() was called with is
    used.  For example:
@@ -36,32 +28,8 @@ static char *olds;
 		// s = "abc\0=-def\0"
 */
 char *
-STRTOK (char *s, const char *delim)
+strtok (char *s, const char *delim)
 {
-  char *token;
-
-  if (s == NULL)
-    s = olds;
-
-  /* Scan leading delimiters.  */
-  s += strspn (s, delim);
-  if (*s == '\0')
-    {
-      olds = s;
-      return NULL;
-    }
-
-  /* Find the end of the token.  */
-  token = s;
-  s = strpbrk (token, delim);
-  if (s == NULL)
-    /* This token finishes the string.  */
-    olds = __rawmemchr (token, '\0');
-  else
-    {
-      /* Terminate the token and make OLDS point past it.  */
-      *s = '\0';
-      olds = s + 1;
-    }
-  return token;
+  static char *olds;
+  return __strtok_r (s, delim, &olds);
 }
diff --git a/string/strtok_r.c b/string/strtok_r.c
index f351304766108dad2c1cff881ad3bebae821b2a0..2d251f90d79b6c546e80e1d25c03955ea8dad92b 100644
--- a/string/strtok_r.c
+++ b/string/strtok_r.c
@@ -22,14 +22,10 @@
 
 #include <string.h>
 
-#undef strtok_r
-#undef __strtok_r
-
 #ifndef _LIBC
 /* Get specification.  */
 # include "strtok_r.h"
 # define __strtok_r strtok_r
-# define __rawmemchr strchr
 #endif
 
 /* Parse S into tokens separated by characters in DELIM.
@@ -45,11 +41,17 @@
 char *
 __strtok_r (char *s, const char *delim, char **save_ptr)
 {
-  char *token;
+  char *end;
 
   if (s == NULL)
     s = *save_ptr;
 
+  if (*s == '\0')
+    {
+      *save_ptr = s;
+      return NULL;
+    }
+
   /* Scan leading delimiters.  */
   s += strspn (s, delim);
   if (*s == '\0')
@@ -59,18 +61,17 @@ __strtok_r (char *s, const char *delim, char **save_ptr)
     }
 
   /* Find the end of the token.  */
-  token = s;
-  s = strpbrk (token, delim);
-  if (s == NULL)
-    /* This token finishes the string.  */
-    *save_ptr = __rawmemchr (token, '\0');
-  else
+  end = s + strcspn (s, delim);
+  if (*end == '\0')
     {
-      /* Terminate the token and make *SAVE_PTR point past it.  */
-      *s = '\0';
-      *save_ptr = s + 1;
+      *save_ptr = end;
+      return s;
     }
-  return token;
+
+  /* Terminate the token and make *SAVE_PTR point past it.  */
+  *end = '\0';
+  *save_ptr = end + 1;
+  return s;
 }
 #ifdef weak_alias
 libc_hidden_def (__strtok_r)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] Improve strtok(_r) performance
  2016-12-13 13:20 [PATCH v3] Improve strtok(_r) performance Wilco Dijkstra
@ 2016-12-14 14:07 ` Adhemerval Zanella
  2016-12-14 17:43 ` Gabriel F. T. Gomes
  1 sibling, 0 replies; 7+ messages in thread
From: Adhemerval Zanella @ 2016-12-14 14:07 UTC (permalink / raw)
  To: Wilco Dijkstra, libc-alpha; +Cc: nd

LGTM.

On 13/12/2016 11:20, Wilco Dijkstra wrote:
> Version 3 fixes a minor style issue in bench-strtok.c.
> 
> Improve strtok and strtok_r performance.  Instead of calling strpbrk which
> calls strcspn, call strcspn directly so we get the end of the token without
> an extra call to rawmemchr.  Also avoid an unnecessary call to strcspn after
> the last token by adding an early exit for an empty string.  Change strtok
> to tailcall strtok_r to avoid unnecessary code duplication.
> 
> Remove the special header optimization for strtok_r of a 1-character
> constant string - both strspn and strcspn contain optimizations for this
> case.  Benchmarking this showed similar performance in the worst case,
> but up to 5.5x better performance in the "found" case for large inputs.
> 
> Passes regression tests, OK for commit?
> 
> ChangeLog:
> 2015-12-13  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	* benchtests/bench-strtok.c (oldstrtok): Add old implementation.
> 	* string/strtok.c (strtok): Change to tailcall __strtok_r.
> 	* string/strtok_r.c (__strtok_r): Optimize for performance.
> 	* string/string-inlines.c (__old_strtok_r_1c): New function.
> 	* string/bits/string2.h (__strtok_r): Move to string-inlines.c
> --
> diff --git a/benchtests/bench-strtok.c b/benchtests/bench-strtok.c
> index eeb798f01575c712b958ad1b7d5f88f91e158202..41e0e45db818428a3f3a940fa1a70ca3850e84b3 100644
> --- a/benchtests/bench-strtok.c
> +++ b/benchtests/bench-strtok.c
> @@ -20,13 +20,41 @@
>  #define TEST_NAME "strtok"
>  #include "bench-string.h"
>  
> -#define STRTOK strtok_string
> -#include <string/strtok.c>
> +char *
> +oldstrtok (char *s, const char *delim)
> +{
> +  static char *olds;
> +  char *token;
> +
> +  if (s == NULL)
> +    s = olds;
> +
> +  /* Scan leading delimiters.  */
> +  s += strspn (s, delim);
> +  if (*s == '\0')
> +    {
> +      olds = s;
> +      return NULL;
> +    }
>  
> +  /* Find the end of the token.  */
> +  token = s;
> +  s = strpbrk (token, delim);
> +  if (s == NULL)
> +    /* This token finishes the string.  */
> +    olds = __rawmemchr (token, '\0');
> +  else
> +    {
> +      /* Terminate the token and make OLDS point past it.  */
> +      *s = '\0';
> +      olds = s + 1;
> +    }
> +  return token;
> +}
>  
>  typedef char *(*proto_t) (const char *, const char *);
>  
> -IMPL (strtok_string, 0)
> +IMPL (oldstrtok, 0)
>  IMPL (strtok, 1)
>  
>  static void
> diff --git a/string/bits/string2.h b/string/bits/string2.h
> index ca1eda9bd127aadb01ab5c929d16621357ddc6e6..e39d4f1a85c25a4f47418e6a5613b27177ca6cbb 100644
> --- a/string/bits/string2.h
> +++ b/string/bits/string2.h
> @@ -180,45 +180,6 @@ extern void *__rawmemchr (const void *__s, int __c);
>  #endif
>  
>  
> -#if !defined _HAVE_STRING_ARCH_strtok_r || defined _FORCE_INLINES
> -# ifndef _HAVE_STRING_ARCH_strtok_r
> -#  define __strtok_r(s, sep, nextp) \
> -  (__extension__ (__builtin_constant_p (sep) && __string2_1bptr_p (sep)	      \
> -		  && ((const char *) (sep))[0] != '\0'			      \
> -		  && ((const char *) (sep))[1] == '\0'			      \
> -		  ? __strtok_r_1c (s, ((const char *) (sep))[0], nextp)       \
> -		  : __strtok_r (s, sep, nextp)))
> -# endif
> -
> -__STRING_INLINE char *__strtok_r_1c (char *__s, char __sep, char **__nextp);
> -__STRING_INLINE char *
> -__strtok_r_1c (char *__s, char __sep, char **__nextp)
> -{
> -  char *__result;
> -  if (__s == NULL)
> -    __s = *__nextp;
> -  while (*__s == __sep)
> -    ++__s;
> -  __result = NULL;
> -  if (*__s != '\0')
> -    {
> -      __result = __s++;
> -      while (*__s != '\0')
> -	if (*__s++ == __sep)
> -	  {
> -	    __s[-1] = '\0';
> -	    break;
> -	  }
> -    }
> -  *__nextp = __s;
> -  return __result;
> -}
> -# ifdef __USE_POSIX
> -#  define strtok_r(s, sep, nextp) __strtok_r (s, sep, nextp)
> -# endif
> -#endif
> -
> -
>  #if !defined _HAVE_STRING_ARCH_strsep || defined _FORCE_INLINES
>  # ifndef _HAVE_STRING_ARCH_strsep
>  
> diff --git a/string/string-inlines.c b/string/string-inlines.c
> index 1091468519e1561ac2a4e9c3ed6eb75ee9fdf43f..d43e5897c37430e5f97940469d65e7ddbdcbd09c 100644
> --- a/string/string-inlines.c
> +++ b/string/string-inlines.c
> @@ -35,6 +35,36 @@
>  
>  #include "shlib-compat.h"
>  
> +#if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_25)
> +/* The inline functions are not used from GLIBC 2.25 and forward, however
> +   they are required to provide the symbols through string-inlines.c
> +   (if inlining is not possible for compatibility reasons).  */
> +
> +char *
> +__old_strtok_r_1c (char *__s, char __sep, char **__nextp)
> +{
> +  char *__result;
> +  if (__s == NULL)
> +    __s = *__nextp;
> +  while (*__s == __sep)
> +    ++__s;
> +  __result = NULL;
> +  if (*__s != '\0')
> +    {
> +      __result = __s++;
> +      while (*__s != '\0')
> +	if (*__s++ == __sep)
> +	  {
> +	    __s[-1] = '\0';
> +	    break;
> +	  }
> +    }
> +  *__nextp = __s;
> +  return __result;
> +}
> +compat_symbol (libc, __old_strtok_r_1c, __strtok_r_1c, GLIBC_2_1_1);
> +#endif
> +
>  #if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_24)
>  /* The inline functions are not used from GLIBC 2.24 and forward, however
>     they are required to provide the symbols through string-inlines.c
> diff --git a/string/strtok.c b/string/strtok.c
> index 7a4574db5c80501e47d045ad4347e8a287b32191..482cdc1da45a71173080b6eff3857e863b5977ea 100644
> --- a/string/strtok.c
> +++ b/string/strtok.c
> @@ -18,14 +18,6 @@
>  #include <string.h>
>  
>  
> -static char *olds;
> -
> -#undef strtok
> -
> -#ifndef STRTOK
> -# define STRTOK strtok
> -#endif
> -
>  /* Parse S into tokens separated by characters in DELIM.
>     If S is NULL, the last string strtok() was called with is
>     used.  For example:
> @@ -36,32 +28,8 @@ static char *olds;
>  		// s = "abc\0=-def\0"
>  */
>  char *
> -STRTOK (char *s, const char *delim)
> +strtok (char *s, const char *delim)
>  {
> -  char *token;
> -
> -  if (s == NULL)
> -    s = olds;
> -
> -  /* Scan leading delimiters.  */
> -  s += strspn (s, delim);
> -  if (*s == '\0')
> -    {
> -      olds = s;
> -      return NULL;
> -    }
> -
> -  /* Find the end of the token.  */
> -  token = s;
> -  s = strpbrk (token, delim);
> -  if (s == NULL)
> -    /* This token finishes the string.  */
> -    olds = __rawmemchr (token, '\0');
> -  else
> -    {
> -      /* Terminate the token and make OLDS point past it.  */
> -      *s = '\0';
> -      olds = s + 1;
> -    }
> -  return token;
> +  static char *olds;
> +  return __strtok_r (s, delim, &olds);
>  }
> diff --git a/string/strtok_r.c b/string/strtok_r.c
> index f351304766108dad2c1cff881ad3bebae821b2a0..2d251f90d79b6c546e80e1d25c03955ea8dad92b 100644
> --- a/string/strtok_r.c
> +++ b/string/strtok_r.c
> @@ -22,14 +22,10 @@
>  
>  #include <string.h>
>  
> -#undef strtok_r
> -#undef __strtok_r
> -
>  #ifndef _LIBC
>  /* Get specification.  */
>  # include "strtok_r.h"
>  # define __strtok_r strtok_r
> -# define __rawmemchr strchr
>  #endif
>  
>  /* Parse S into tokens separated by characters in DELIM.
> @@ -45,11 +41,17 @@
>  char *
>  __strtok_r (char *s, const char *delim, char **save_ptr)
>  {
> -  char *token;
> +  char *end;
>  
>    if (s == NULL)
>      s = *save_ptr;
>  
> +  if (*s == '\0')
> +    {
> +      *save_ptr = s;
> +      return NULL;
> +    }
> +
>    /* Scan leading delimiters.  */
>    s += strspn (s, delim);
>    if (*s == '\0')
> @@ -59,18 +61,17 @@ __strtok_r (char *s, const char *delim, char **save_ptr)
>      }
>  
>    /* Find the end of the token.  */
> -  token = s;
> -  s = strpbrk (token, delim);
> -  if (s == NULL)
> -    /* This token finishes the string.  */
> -    *save_ptr = __rawmemchr (token, '\0');
> -  else
> +  end = s + strcspn (s, delim);
> +  if (*end == '\0')
>      {
> -      /* Terminate the token and make *SAVE_PTR point past it.  */
> -      *s = '\0';
> -      *save_ptr = s + 1;
> +      *save_ptr = end;
> +      return s;
>      }
> -  return token;
> +
> +  /* Terminate the token and make *SAVE_PTR point past it.  */
> +  *end = '\0';
> +  *save_ptr = end + 1;
> +  return s;
>  }
>  #ifdef weak_alias
>  libc_hidden_def (__strtok_r)
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] Improve strtok(_r) performance
  2016-12-13 13:20 [PATCH v3] Improve strtok(_r) performance Wilco Dijkstra
  2016-12-14 14:07 ` Adhemerval Zanella
@ 2016-12-14 17:43 ` Gabriel F. T. Gomes
  2016-12-14 18:04   ` Wilco Dijkstra
  1 sibling, 1 reply; 7+ messages in thread
From: Gabriel F. T. Gomes @ 2016-12-14 17:43 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: libc-alpha, nd

Could this have broken string/test-rawmemchr ?

I am seeing several lines like this:

string/test-rawmemchr: Iteration 108 - wrong result in function __rawmemchr_ppc (10, 65, 140, 5) (nil) != 0x3fffa2effe0f, p 0x3fffa2effe00

On Tue, 13 Dec 2016 13:20:20 +0000
Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:

> Version 3 fixes a minor style issue in bench-strtok.c.
> 
> Improve strtok and strtok_r performance.  Instead of calling strpbrk which
> calls strcspn, call strcspn directly so we get the end of the token without
> an extra call to rawmemchr.  Also avoid an unnecessary call to strcspn after
> the last token by adding an early exit for an empty string.  Change strtok
> to tailcall strtok_r to avoid unnecessary code duplication.
> 
> Remove the special header optimization for strtok_r of a 1-character
> constant string - both strspn and strcspn contain optimizations for this
> case.  Benchmarking this showed similar performance in the worst case,
> but up to 5.5x better performance in the "found" case for large inputs.
> 
> Passes regression tests, OK for commit?
> 
> ChangeLog:
> 2015-12-13  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	* benchtests/bench-strtok.c (oldstrtok): Add old implementation.
> 	* string/strtok.c (strtok): Change to tailcall __strtok_r.
> 	* string/strtok_r.c (__strtok_r): Optimize for performance.
> 	* string/string-inlines.c (__old_strtok_r_1c): New function.
> 	* string/bits/string2.h (__strtok_r): Move to string-inlines.c
> --
> diff --git a/benchtests/bench-strtok.c b/benchtests/bench-strtok.c
> index eeb798f01575c712b958ad1b7d5f88f91e158202..41e0e45db818428a3f3a940fa1a70ca3850e84b3 100644
> --- a/benchtests/bench-strtok.c
> +++ b/benchtests/bench-strtok.c
> @@ -20,13 +20,41 @@
>  #define TEST_NAME "strtok"
>  #include "bench-string.h"
>  
> -#define STRTOK strtok_string
> -#include <string/strtok.c>
> +char *
> +oldstrtok (char *s, const char *delim)
> +{
> +  static char *olds;
> +  char *token;
> +
> +  if (s == NULL)
> +    s = olds;
> +
> +  /* Scan leading delimiters.  */
> +  s += strspn (s, delim);
> +  if (*s == '\0')
> +    {
> +      olds = s;
> +      return NULL;
> +    }
>  
> +  /* Find the end of the token.  */
> +  token = s;
> +  s = strpbrk (token, delim);
> +  if (s == NULL)
> +    /* This token finishes the string.  */
> +    olds = __rawmemchr (token, '\0');
> +  else
> +    {
> +      /* Terminate the token and make OLDS point past it.  */
> +      *s = '\0';
> +      olds = s + 1;
> +    }
> +  return token;
> +}
>  
>  typedef char *(*proto_t) (const char *, const char *);
>  
> -IMPL (strtok_string, 0)
> +IMPL (oldstrtok, 0)
>  IMPL (strtok, 1)
>  
>  static void
> diff --git a/string/bits/string2.h b/string/bits/string2.h
> index ca1eda9bd127aadb01ab5c929d16621357ddc6e6..e39d4f1a85c25a4f47418e6a5613b27177ca6cbb 100644
> --- a/string/bits/string2.h
> +++ b/string/bits/string2.h
> @@ -180,45 +180,6 @@ extern void *__rawmemchr (const void *__s, int __c);
>  #endif
>  
>  
> -#if !defined _HAVE_STRING_ARCH_strtok_r || defined _FORCE_INLINES
> -# ifndef _HAVE_STRING_ARCH_strtok_r
> -#  define __strtok_r(s, sep, nextp) \
> -  (__extension__ (__builtin_constant_p (sep) && __string2_1bptr_p (sep)	      \
> -		  && ((const char *) (sep))[0] != '\0'			      \
> -		  && ((const char *) (sep))[1] == '\0'			      \
> -		  ? __strtok_r_1c (s, ((const char *) (sep))[0], nextp)       \
> -		  : __strtok_r (s, sep, nextp)))
> -# endif
> -
> -__STRING_INLINE char *__strtok_r_1c (char *__s, char __sep, char **__nextp);
> -__STRING_INLINE char *
> -__strtok_r_1c (char *__s, char __sep, char **__nextp)
> -{
> -  char *__result;
> -  if (__s == NULL)
> -    __s = *__nextp;
> -  while (*__s == __sep)
> -    ++__s;
> -  __result = NULL;
> -  if (*__s != '\0')
> -    {
> -      __result = __s++;
> -      while (*__s != '\0')
> -	if (*__s++ == __sep)
> -	  {
> -	    __s[-1] = '\0';
> -	    break;
> -	  }
> -    }
> -  *__nextp = __s;
> -  return __result;
> -}
> -# ifdef __USE_POSIX
> -#  define strtok_r(s, sep, nextp) __strtok_r (s, sep, nextp)
> -# endif
> -#endif
> -
> -
>  #if !defined _HAVE_STRING_ARCH_strsep || defined _FORCE_INLINES
>  # ifndef _HAVE_STRING_ARCH_strsep
>  
> diff --git a/string/string-inlines.c b/string/string-inlines.c
> index 1091468519e1561ac2a4e9c3ed6eb75ee9fdf43f..d43e5897c37430e5f97940469d65e7ddbdcbd09c 100644
> --- a/string/string-inlines.c
> +++ b/string/string-inlines.c
> @@ -35,6 +35,36 @@
>  
>  #include "shlib-compat.h"
>  
> +#if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_25)
> +/* The inline functions are not used from GLIBC 2.25 and forward, however
> +   they are required to provide the symbols through string-inlines.c
> +   (if inlining is not possible for compatibility reasons).  */
> +
> +char *
> +__old_strtok_r_1c (char *__s, char __sep, char **__nextp)
> +{
> +  char *__result;
> +  if (__s == NULL)
> +    __s = *__nextp;
> +  while (*__s == __sep)
> +    ++__s;
> +  __result = NULL;
> +  if (*__s != '\0')
> +    {
> +      __result = __s++;
> +      while (*__s != '\0')
> +	if (*__s++ == __sep)
> +	  {
> +	    __s[-1] = '\0';
> +	    break;
> +	  }
> +    }
> +  *__nextp = __s;
> +  return __result;
> +}
> +compat_symbol (libc, __old_strtok_r_1c, __strtok_r_1c, GLIBC_2_1_1);
> +#endif
> +
>  #if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_24)
>  /* The inline functions are not used from GLIBC 2.24 and forward, however
>     they are required to provide the symbols through string-inlines.c
> diff --git a/string/strtok.c b/string/strtok.c
> index 7a4574db5c80501e47d045ad4347e8a287b32191..482cdc1da45a71173080b6eff3857e863b5977ea 100644
> --- a/string/strtok.c
> +++ b/string/strtok.c
> @@ -18,14 +18,6 @@
>  #include <string.h>
>  
>  
> -static char *olds;
> -
> -#undef strtok
> -
> -#ifndef STRTOK
> -# define STRTOK strtok
> -#endif
> -
>  /* Parse S into tokens separated by characters in DELIM.
>     If S is NULL, the last string strtok() was called with is
>     used.  For example:
> @@ -36,32 +28,8 @@ static char *olds;
>  		// s = "abc\0=-def\0"
>  */
>  char *
> -STRTOK (char *s, const char *delim)
> +strtok (char *s, const char *delim)
>  {
> -  char *token;
> -
> -  if (s == NULL)
> -    s = olds;
> -
> -  /* Scan leading delimiters.  */
> -  s += strspn (s, delim);
> -  if (*s == '\0')
> -    {
> -      olds = s;
> -      return NULL;
> -    }
> -
> -  /* Find the end of the token.  */
> -  token = s;
> -  s = strpbrk (token, delim);
> -  if (s == NULL)
> -    /* This token finishes the string.  */
> -    olds = __rawmemchr (token, '\0');
> -  else
> -    {
> -      /* Terminate the token and make OLDS point past it.  */
> -      *s = '\0';
> -      olds = s + 1;
> -    }
> -  return token;
> +  static char *olds;
> +  return __strtok_r (s, delim, &olds);
>  }
> diff --git a/string/strtok_r.c b/string/strtok_r.c
> index f351304766108dad2c1cff881ad3bebae821b2a0..2d251f90d79b6c546e80e1d25c03955ea8dad92b 100644
> --- a/string/strtok_r.c
> +++ b/string/strtok_r.c
> @@ -22,14 +22,10 @@
>  
>  #include <string.h>
>  
> -#undef strtok_r
> -#undef __strtok_r
> -
>  #ifndef _LIBC
>  /* Get specification.  */
>  # include "strtok_r.h"
>  # define __strtok_r strtok_r
> -# define __rawmemchr strchr
>  #endif
>  
>  /* Parse S into tokens separated by characters in DELIM.
> @@ -45,11 +41,17 @@
>  char *
>  __strtok_r (char *s, const char *delim, char **save_ptr)
>  {
> -  char *token;
> +  char *end;
>  
>    if (s == NULL)
>      s = *save_ptr;
>  
> +  if (*s == '\0')
> +    {
> +      *save_ptr = s;
> +      return NULL;
> +    }
> +
>    /* Scan leading delimiters.  */
>    s += strspn (s, delim);
>    if (*s == '\0')
> @@ -59,18 +61,17 @@ __strtok_r (char *s, const char *delim, char **save_ptr)
>      }
>  
>    /* Find the end of the token.  */
> -  token = s;
> -  s = strpbrk (token, delim);
> -  if (s == NULL)
> -    /* This token finishes the string.  */
> -    *save_ptr = __rawmemchr (token, '\0');
> -  else
> +  end = s + strcspn (s, delim);
> +  if (*end == '\0')
>      {
> -      /* Terminate the token and make *SAVE_PTR point past it.  */
> -      *s = '\0';
> -      *save_ptr = s + 1;
> +      *save_ptr = end;
> +      return s;
>      }
> -  return token;
> +
> +  /* Terminate the token and make *SAVE_PTR point past it.  */
> +  *end = '\0';
> +  *save_ptr = end + 1;
> +  return s;
>  }
>  #ifdef weak_alias
>  libc_hidden_def (__strtok_r)
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] Improve strtok(_r) performance
  2016-12-14 17:43 ` Gabriel F. T. Gomes
@ 2016-12-14 18:04   ` Wilco Dijkstra
  2016-12-14 18:25     ` Wilco Dijkstra
  0 siblings, 1 reply; 7+ messages in thread
From: Wilco Dijkstra @ 2016-12-14 18:04 UTC (permalink / raw)
  To: Gabriel F. T. Gomes; +Cc: libc-alpha, nd

Gabriel F. T. Gomes wrote:
>    
> Could this have broken string/test-rawmemchr ?

Well the test passes for me, even if I disable AArch64 rawmemchr.S.

> I am seeing several lines like this:
>
> string/test-rawmemchr: Iteration 108 - wrong result in function __rawmemchr_ppc (10, 65, 140, 5) (nil) != 0x3fffa2effe0f, p 0x3fffa2effe00

Since rawmemchr now defers to memchr, could there be an issue in memchr? It seems to return NULL, which is not possible.

Wilco

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] Improve strtok(_r) performance
  2016-12-14 18:04   ` Wilco Dijkstra
@ 2016-12-14 18:25     ` Wilco Dijkstra
  2016-12-14 19:26       ` Adhemerval Zanella
  0 siblings, 1 reply; 7+ messages in thread
From: Wilco Dijkstra @ 2016-12-14 18:25 UTC (permalink / raw)
  To: Gabriel F. T. Gomes; +Cc: libc-alpha, nd

Hi,

In powerpc64/power7/memchr.S this bit looks suspicious:

L(done):
#ifdef __LITTLE_ENDIAN__
        addi    r0,r3,-1
        andc    r0,r0,r3
        popcntd r0,r0         /* Count trailing zeros.  */
#else
        cntlzd  r0,r3         /* Count leading zeros before the match.  */
#endif
        cmpld   r8,r7         /* Are we on the last dword?  */
        srdi    r0,r0,3       /* Convert leading/trailing zeros to bytes.  */
        add     r3,r8,r0
        cmpld   cr7,r0,r5     /* If on the last dword, check byte offset.  */
        bnelr
        blelr   cr7
        li      r3,0
        blr

When the size is the maximum and the input pointer is at offset 2 or larger within an 8-byte aligned address, r7 == r8, and it will return NULL if it matches in the first 8 bytes since the match appears to be after the end pointer. When it matches later, r7 != r8 and it works.

Wilco


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] Improve strtok(_r) performance
  2016-12-14 18:25     ` Wilco Dijkstra
@ 2016-12-14 19:26       ` Adhemerval Zanella
  2016-12-14 19:43         ` Adhemerval Zanella
  0 siblings, 1 reply; 7+ messages in thread
From: Adhemerval Zanella @ 2016-12-14 19:26 UTC (permalink / raw)
  To: libc-alpha



On 14/12/2016 16:25, Wilco Dijkstra wrote:
> Hi,
> 
> In powerpc64/power7/memchr.S this bit looks suspicious:
> 
> L(done):
> #ifdef __LITTLE_ENDIAN__
>         addi    r0,r3,-1
>         andc    r0,r0,r3
>         popcntd r0,r0         /* Count trailing zeros.  */
> #else
>         cntlzd  r0,r3         /* Count leading zeros before the match.  */
> #endif
>         cmpld   r8,r7         /* Are we on the last dword?  */
>         srdi    r0,r0,3       /* Convert leading/trailing zeros to bytes.  */
>         add     r3,r8,r0
>         cmpld   cr7,r0,r5     /* If on the last dword, check byte offset.  */
>         bnelr
>         blelr   cr7
>         li      r3,0
>         blr
> 
> When the size is the maximum and the input pointer is at offset 2 or larger within an 8-byte aligned address, r7 == r8, and it will return NULL if it matches in the first 8 bytes since the match appears to be after the end pointer. When it matches later, r7 != r8 and it works.

I think this snippet is ok, the problem is at r7 calculation where it should
handle overflow.  This simple test triggers the issue:

#define _GNU_SOURCE 1
#include <string.h>
#include <stdio.h>

void *
my_rawmemchr (const void *s, int c)
{ 
  if (c != '\0')
    return memchr (s, c, (size_t)-1);
  return (char *)s + strlen (s);
}

int main ()
{
  // p=0x3fffb057fe00 | aling=10
  int seek_char = 0x41;
  size_t align = 10;
  unsigned char input [32];
  input[10] = 0x34;
  input[11] = 0x78;
  input[12] = 0x3d;
  input[13] = 0x7b;
  input[14] = 0xa1;
  input[15] = seek_char;

  printf ("%p\n", my_rawmemchr (input+align, seek_char));
  printf ("%p\n", rawmemchr (input+align, seek_char));
  return 0;
}

On POWER7 memchr.S:

 24 ENTRY (__memchr)
 25         CALL_MCOUNT 3
 26         dcbt    0,r3
 27         clrrdi  r8,r3,3
 28         insrdi  r4,r4,8,48
 29         add     r7,r3,r5      /* Calculate the last acceptable address.  */

And using the example above:

(gdb) i r r3 r5
r3             0x3fffffffeab2   70368744172210
r5             0xffffffffffffffff       18446744073709551615
(gdb) ni
30      in ../sysdeps/powerpc/powerpc64/power7/memchr.S
(gdb) i r r7
r7             0x3fffffffeab1   70368744172209

If we set r7 to maximum pointer value (0xf...f) memchr returns the expected result.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] Improve strtok(_r) performance
  2016-12-14 19:26       ` Adhemerval Zanella
@ 2016-12-14 19:43         ` Adhemerval Zanella
  0 siblings, 0 replies; 7+ messages in thread
From: Adhemerval Zanella @ 2016-12-14 19:43 UTC (permalink / raw)
  To: libc-alpha



On 14/12/2016 17:26, Adhemerval Zanella wrote:
> 
> 
> On 14/12/2016 16:25, Wilco Dijkstra wrote:
>> Hi,
>>
>> In powerpc64/power7/memchr.S this bit looks suspicious:
>>
>> L(done):
>> #ifdef __LITTLE_ENDIAN__
>>         addi    r0,r3,-1
>>         andc    r0,r0,r3
>>         popcntd r0,r0         /* Count trailing zeros.  */
>> #else
>>         cntlzd  r0,r3         /* Count leading zeros before the match.  */
>> #endif
>>         cmpld   r8,r7         /* Are we on the last dword?  */
>>         srdi    r0,r0,3       /* Convert leading/trailing zeros to bytes.  */
>>         add     r3,r8,r0
>>         cmpld   cr7,r0,r5     /* If on the last dword, check byte offset.  */
>>         bnelr
>>         blelr   cr7
>>         li      r3,0
>>         blr
>>
>> When the size is the maximum and the input pointer is at offset 2 or larger within an 8-byte aligned address, r7 == r8, and it will return NULL if it matches in the first 8 bytes since the match appears to be after the end pointer. When it matches later, r7 != r8 and it works.
> 
> I think this snippet is ok, the problem is at r7 calculation where it should
> handle overflow.  This simple test triggers the issue:
> 
> #define _GNU_SOURCE 1
> #include <string.h>
> #include <stdio.h>
> 
> void *
> my_rawmemchr (const void *s, int c)
> { 
>   if (c != '\0')
>     return memchr (s, c, (size_t)-1);
>   return (char *)s + strlen (s);
> }
> 
> int main ()
> {
>   // p=0x3fffb057fe00 | aling=10
>   int seek_char = 0x41;
>   size_t align = 10;
>   unsigned char input [32];
>   input[10] = 0x34;
>   input[11] = 0x78;
>   input[12] = 0x3d;
>   input[13] = 0x7b;
>   input[14] = 0xa1;
>   input[15] = seek_char;
> 
>   printf ("%p\n", my_rawmemchr (input+align, seek_char));
>   printf ("%p\n", rawmemchr (input+align, seek_char));
>   return 0;
> }
> 
> On POWER7 memchr.S:
> 
>  24 ENTRY (__memchr)
>  25         CALL_MCOUNT 3
>  26         dcbt    0,r3
>  27         clrrdi  r8,r3,3
>  28         insrdi  r4,r4,8,48
>  29         add     r7,r3,r5      /* Calculate the last acceptable address.  */
> 
> And using the example above:
> 
> (gdb) i r r3 r5
> r3             0x3fffffffeab2   70368744172210
> r5             0xffffffffffffffff       18446744073709551615
> (gdb) ni
> 30      in ../sysdeps/powerpc/powerpc64/power7/memchr.S
> (gdb) i r r7
> r7             0x3fffffffeab1   70368744172209
> 
> If we set r7 to maximum pointer value (0xf...f) memchr returns the expected result.
> 

This patch should fix test-rawmemchr by adding a saturated addition when
calculating the last double address to check.  I will prepare a patch
when the make check finished on the powerpc64le machine I have access.

diff --git a/sysdeps/powerpc/powerpc64/power7/memchr.S b/sysdeps/powerpc/powerpc64/power7/memchr.S
index 03f0d7c..34605a7 100644
--- a/sysdeps/powerpc/powerpc64/power7/memchr.S
+++ b/sysdeps/powerpc/powerpc64/power7/memchr.S
@@ -26,7 +26,15 @@ ENTRY (__memchr)
        dcbt    0,r3
        clrrdi  r8,r3,3
        insrdi  r4,r4,8,48
-       add     r7,r3,r5      /* Calculate the last acceptable address.  */
+
+       /* Calculate the last acceptable address and also take care of possible
+          overflow by using a large size.  */
+       add     r7,r3,r5
+       subfc   r6,r3,r7
+       subfe   r9,r9,r9
+       extsw   r6,r9
+       or      r7,r7,r6
+
        insrdi  r4,r4,16,32
        cmpldi  r5,32
        li      r9, -1

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-12-14 19:43 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-13 13:20 [PATCH v3] Improve strtok(_r) performance Wilco Dijkstra
2016-12-14 14:07 ` Adhemerval Zanella
2016-12-14 17:43 ` Gabriel F. T. Gomes
2016-12-14 18:04   ` Wilco Dijkstra
2016-12-14 18:25     ` Wilco Dijkstra
2016-12-14 19:26       ` Adhemerval Zanella
2016-12-14 19:43         ` Adhemerval Zanella

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).