public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 3/4] Improve generic strpbrk performance
  2016-03-28 15:20 [PATCH 0/4] Improve generic strspn/strcspn/strpbrk Adhemerval Zanella
@ 2016-03-28 15:20 ` Adhemerval Zanella
  2016-03-28 15:20 ` [PATCH 1/4] Improve generic strcspn performance Adhemerval Zanella
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 22+ messages in thread
From: Adhemerval Zanella @ 2016-03-28 15:20 UTC (permalink / raw)
  To: libc-alpha

With now a faster strcspn implementation, it is faster to just use
it with some return tests than reimplementing strpbrk itself.
As for strcspn optimization, it is generally at least 10 times faster
than the existing implementation on bench-strspn on a few AArch64
implementations.

Also the string/bits/string2.h inlines make no longer sense, as current
implementation will already implement most of the optimizations.

Tested on x86_64, i386, and aarch64.

	* string/strpbrk.c (strpbrk): Rewrite function.
	* string/bits/string2.h (strpbrk): Use __builtin_strpbrk.
---
 ChangeLog             |  3 +++
 string/bits/string2.h | 41 ++++++-----------------------------------
 string/strpbrk.c      | 12 ++----------
 3 files changed, 11 insertions(+), 45 deletions(-)

diff --git a/string/bits/string2.h b/string/bits/string2.h
index a1684eb..76a179a 100644
--- a/string/bits/string2.h
+++ b/string/bits/string2.h
@@ -999,43 +999,14 @@ __strspn_c3 (const char *__s, int __accept1, int __accept2, int __accept3)
 
 
 /* Find the first occurrence in S of any character in ACCEPT.  */
-#if !defined _HAVE_STRING_ARCH_strpbrk || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strpbrk
-#  if __GNUC_PREREQ (3, 2)
-#   define strpbrk(s, accept) \
-  __extension__								      \
-  ({ char __a0, __a1, __a2;						      \
-     (__builtin_constant_p (accept) && __string2_1bptr_p (accept)	      \
-      ? ((__builtin_constant_p (s) && __string2_1bptr_p (s))		      \
-	 ? __builtin_strpbrk (s, accept)				      \
-	 : ((__a0 = ((const char  *) (accept))[0], __a0 == '\0')	      \
-	    ? ((void) (s), (char *) NULL)				      \
-	    : ((__a1 = ((const char *) (accept))[1], __a1 == '\0')	      \
-	       ? __builtin_strchr (s, __a0)				      \
-	       : ((__a2 = ((const char *) (accept))[2], __a2 == '\0')	      \
-		  ? __strpbrk_c2 (s, __a0, __a1)			      \
-		  : (((const char *) (accept))[3] == '\0'		      \
-		     ? __strpbrk_c3 (s, __a0, __a1, __a2)		      \
-		     : __builtin_strpbrk (s, accept))))))		      \
-      : __builtin_strpbrk (s, accept)); })
-#  else
-#   define strpbrk(s, accept) \
-  __extension__								      \
-  ({ char __a0, __a1, __a2;						      \
-     (__builtin_constant_p (accept) && __string2_1bptr_p (accept)	      \
-      ? ((__a0 = ((const char  *) (accept))[0], __a0 == '\0')		      \
-	 ? ((void) (s), (char *) NULL)					      \
-	 : ((__a1 = ((const char *) (accept))[1], __a1 == '\0')		      \
-	    ? strchr (s, __a0)						      \
-	    : ((__a2 = ((const char *) (accept))[2], __a2 == '\0')	      \
-	       ? __strpbrk_c2 (s, __a0, __a1)				      \
-	       : (((const char *) (accept))[3] == '\0'			      \
-		  ? __strpbrk_c3 (s, __a0, __a1, __a2)			      \
-		  : strpbrk (s, accept)))))				      \
-      : strpbrk (s, accept)); })
-#  endif
+#ifndef _HAVE_STRING_ARCH_strpbrk
+# if __GNUC_PREREQ (3, 2)
+#  define strpbrk(s, accept) __builtin_strpbrk (s, accept)
 # endif
 
+/* The inline functions are not used from GLIBC 2.24 and forward, however
+   they are required to provide the symbols through string-inlines.c
+   (if inlining is not possible for compatibility reasons).  */
 __STRING_INLINE char *__strpbrk_c2 (const char *__s, int __accept1,
 				    int __accept2);
 __STRING_INLINE char *
diff --git a/string/strpbrk.c b/string/strpbrk.c
index fddd473..1ede719 100644
--- a/string/strpbrk.c
+++ b/string/strpbrk.c
@@ -27,15 +27,7 @@
 char *
 STRPBRK (const char *s, const char *accept)
 {
-  while (*s != '\0')
-    {
-      const char *a = accept;
-      while (*a != '\0')
-	if (*a++ == *s)
-	  return (char *) s;
-      ++s;
-    }
-
-  return NULL;
+  s += strcspn (s, accept);
+  return *s ? (char *)s : NULL;
 }
 libc_hidden_builtin_def (strpbrk)
-- 
1.9.1

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 4/4] Remove powerpc64 strspn, strcspn, and strpbrk implementation
  2016-03-28 15:20 [PATCH 0/4] Improve generic strspn/strcspn/strpbrk Adhemerval Zanella
  2016-03-28 15:20 ` [PATCH 3/4] Improve generic strpbrk performance Adhemerval Zanella
  2016-03-28 15:20 ` [PATCH 1/4] Improve generic strcspn performance Adhemerval Zanella
@ 2016-03-28 15:20 ` Adhemerval Zanella
  2016-03-28 16:10   ` Paul E. Murphy
  2016-03-30 13:14   ` Tulio Magno Quites Machado Filho
  2016-03-28 15:20 ` [PATCH 2/4] Improve generic strspn performance Adhemerval Zanella
  3 siblings, 2 replies; 22+ messages in thread
From: Adhemerval Zanella @ 2016-03-28 15:20 UTC (permalink / raw)
  To: libc-alpha

This patch removes the powerpc64 optimized strspn, strcspn, and
strpbrk assembly implementation now that the default C one
implements the same strategy.  On internal glibc benchtests
current implementations shows similar performance with -O2.

Tested on powerpc64le (POWER8).

	* sysdeps/powerpc/powerpc64/strcspn.S: Remove file.
	* sysdeps/powerpc/powerpc64/strpbrk.S: Likewise.
	* sysdeps/powerpc/powerpc64/strspn.S: Likewise.
---
 ChangeLog                           |   4 +
 sysdeps/powerpc/powerpc64/strcspn.S | 127 -------------------------------
 sysdeps/powerpc/powerpc64/strpbrk.S | 135 ---------------------------------
 sysdeps/powerpc/powerpc64/strspn.S  | 144 ------------------------------------
 4 files changed, 4 insertions(+), 406 deletions(-)
 delete mode 100644 sysdeps/powerpc/powerpc64/strcspn.S
 delete mode 100644 sysdeps/powerpc/powerpc64/strpbrk.S
 delete mode 100644 sysdeps/powerpc/powerpc64/strspn.S

diff --git a/sysdeps/powerpc/powerpc64/strcspn.S b/sysdeps/powerpc/powerpc64/strcspn.S
deleted file mode 100644
index 31e619d..0000000
--- a/sysdeps/powerpc/powerpc64/strcspn.S
+++ /dev/null
@@ -1,127 +0,0 @@
-/* Optimized strcspn implementation for PowerPC64.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-/* size_t [r3] strcspn (const char [r4] *s, const char [r5] *reject)  */
-
-EALIGN (strcspn, 4, 0)
-	CALL_MCOUNT 3
-
-	/* The idea to speed up the algorithm is to create a lookup table
-	   for fast check if input character should be considered.  For ASCII
-	   or ISO-8859-X character sets it has 256 positions.  */
-
-	/* PPC64 ELF ABI stack is aligned to 16 bytes.  */
-	addi 	r9,r1,-256
-	/* Clear the table with 0 values  */
-	li	r6, 0
-	li	r8, 4
-	mtctr	r8
-	mr	r10, r9
-	.align 	4
-L(zerohash):
-	std	r6, 0(r10)
-	std	r6, 8(r10)
-	std	r6, 16(r10)
-	std	r6, 24(r10)
-	std	r6, 32(r10)
-	std	r6, 40(r10)
-	std	r6, 48(r10)
-	std	r6, 56(r10)
-	addi	r10, r10, 64
-	bdnz	L(zerohash)
-
-	lbz	r10,0(r4)
-	cmpdi	cr7,r10,0	/* reject[0] == '\0' ?  */
-	li	r8,1
-	beq     cr7,L(finish_table)  /* If reject[0] == '\0' skip  */
-
-	/* Initialize the table as:
-	   for (i=0; reject[i]; i++
-	     table[reject[i]]] = 1  */
-	.align	4
-L(init_table):
-	stbx	r8,r9,r10
-	lbzu	r10,1(r4)
-	cmpdi	cr7,r10,0           /* If reject[0] == '\0' finish  */
-	bne	cr7,L(init_table)
-L(finish_table):
-	/* set table[0] = 1  */
-	li 	r10,1
-	stb	r10,0(r9)
-	li	r10,0
-	b	L(mainloop)
-
-	/* Unrool the loop 4 times and check using the table as:
-	   i = 0;
-	   while (1)
-	     {
-	       if (table[input[i++]] == 1)
-	         return i - 1;
-	       if (table[input[i++]] == 1)
-	         return i - 1;
-	       if (table[input[i++]] == 1)
-	         return i - 1;
-	       if (table[input[i++]] == 1)
-	         return i - 1;
-	     }  */
-	.align 4
-L(unroll):
-	lbz	r8,1(r3)
-	addi	r10,r10,4
-	lbzx	r8,r9,r8
-	cmpwi	r7,r8,1
-	beq	cr7,L(end)
-	lbz	r8,2(r3)
-	addi	r3,r3,4
-	lbzx	r8,r9,r8
-	cmpwi	cr7,r8,1
-	beq	cr7,L(end2)
-	lbz	r8,3(r7)
-	lbzx	r8,r9,r8
-	cmpwi	cr7,r8,1
-	beq	cr7,L(end3)
-L(mainloop):
-	lbz	r8,0(r3)
-	mr	r7,r3
-	addi	r6,r10,1
-	addi	r4,r10,2
-	addi	r5,r10,3
-	lbzx	r8,r9,8
-	cmpwi	cr7,r8,1
-	bne	cr7,L(unroll)
-	mr	r3,r10
-	blr
-
-	.align 4
-L(end):
-	mr	r3,r6
-	blr
-
-	.align 4
-L(end2):
-	mr	r3,r4
-	blr
-
-	.align 4
-L(end3):
-	mr	r3,r5
-	blr
-END (strcspn)
-libc_hidden_builtin_def (strcspn)
diff --git a/sysdeps/powerpc/powerpc64/strpbrk.S b/sysdeps/powerpc/powerpc64/strpbrk.S
deleted file mode 100644
index 5e9d1a6..0000000
--- a/sysdeps/powerpc/powerpc64/strpbrk.S
+++ /dev/null
@@ -1,135 +0,0 @@
-/* Optimized strpbrk implementation for PowerPC64.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-/* char [r3] *strpbrk(const char [r4] *s, const char [r5] *accept)  */
-
-EALIGN (strpbrk, 4, 0)
-	CALL_MCOUNT 3
-
-	lbz	r10,0(r4)
-	cmpdi	cr7,r10,0	/* accept[0] == '\0' ?  */
-	beq	cr7,L(nullfound)
-
-	/* The idea to speed up the algorithm is to create a lookup table
-	   for fast check if input character should be considered.  For ASCII
-	   or ISO-8859-X character sets it has 256 positions.  */
-
-	/* PPC64 ELF ABI stack is aligned to 16 bytes.  */
-	addi 	r9,r1,-256
-	/* Clear the table with 0 values  */
-	li	r6, 0
-	li	r7, 4
-	mtctr	r7
-	mr	r8, r9
-	.align 	4
-L(zerohash):
-	std	r6, 0(r8)
-	std	r6, 8(r8)
-	std	r6, 16(r8)
-	std	r6, 24(r8)
-	std	r6, 32(r8)
-	std	r6, 40(r8)
-	std	r6, 48(r8)
-	std	r6, 56(r8)
-	addi	r8, r8, 64
-	bdnz	L(zerohash)
-
-	/* Initialize the table as:
-	   for (i=0; accept[i]; i++
-	     table[accept[i]]] = 1  */
-	li      r0,1
-	.align 4
-L(init_table):
-	stbx	r0,r9,r10
-	lbzu	r10,1(r4)
-	cmpdi	r0,r10,0
-	bne	cr0,L(init_table)
-L(finish_table):
-	/* set table[0] = 1  */
-	li	r4,1
-	stb	r4,0(r9)
-	b	L(mainloop)
-
-	/* Unrool the loop 4 times and check using the table as:
-	   i = 0;
-	   while (1)
-	     {
-	       if (table[input[i++]] == 1)
-	         return (s[i -1] ? s + i - 1: NULL);
-	       if (table[input[i++]] == 1)
-	         return (s[i -1] ? s + i - 1: NULL);
-	       if (table[input[i++]] == 1)
-	         return (s[i -1] ? s + i - 1: NULL);
-	       if (table[input[i++]] == 1)
-	         return (s[i -1] ? s + i - 1: NULL);
-	     }  */
-	.align 4
-L(unroll):
-	lbz	r0,1(r3)
-	lbzx	r8,r9,r0
-	cmpwi	cr6,r8,1
-	beq	cr6,L(checkend2)
-	lbz	r10,2(r3)
-	lbzx	r4,r9,r10
-	cmpwi	cr7,r4,1
-	beq	cr7,L(checkend3)
-	lbz	r12,3(r3)
-	addi	r3,r3,4
-	lbzx	r11,r9,r12
-	cmpwi	cr0,r11,1
-	beq	cr0,L(checkend)
-L(mainloop):
-	lbz	r12,0(r3)
-	addi	r11,r3,1
-	addi	r5,r3,2
-	addi	r7,r3,3
-	lbzx	r6,r9,r12
-	cmpwi	cr1,r6,1
-	bne	cr1,L(unroll)
-	cmpdi	cr0,r12,0
-	beq	cr0,L(nullfound)
-L(end):
-	blr
-
-	.align 4
-L(checkend):
-	cmpdi	cr1,r12,0
-	mr	r3,r7
-	bne	cr1,L(end)
-L(nullfound):
-	/* return NULL  */
-	li 3,0
-	blr
-
-	.align 4
-L(checkend2):
-	cmpdi	cr7,r0,0
-	mr	r3,r11
-	beq	cr7,L(nullfound)
-	blr
-
-	.align 4
-L(checkend3):
-	cmpdi	cr6,r10,0
-	mr	r3,r5
-	beq	cr6,L(nullfound)
-	blr
-END (strpbrk)
-libc_hidden_builtin_def (strpbrk)
diff --git a/sysdeps/powerpc/powerpc64/strspn.S b/sysdeps/powerpc/powerpc64/strspn.S
deleted file mode 100644
index cf10da1..0000000
--- a/sysdeps/powerpc/powerpc64/strspn.S
+++ /dev/null
@@ -1,144 +0,0 @@
-/* Optimized strspn implementation for PowerPC64.
-
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* size_t [r3] strspn (const char *string [r3],
-                       const char *needleAccept [r4]  */
-
-/* Performance gains are grabbed through following techniques:
-
-   > hashing of needle.
-   > hashing avoids scanning of duplicate entries in needle
-     across the string.
-   > unrolling when scanning for character in string
-     across hash table.  */
-
-/* Algorithm is as below:
-   1. A empty hash table/dictionary is created comprising of
-      256 ascii character set
-   2. When hash entry is found in needle , the hash index
-      is initialized to 1
-   3. The string is scanned until end and for every character,
-      its corresponding hash index is compared.
-   4. initial length of string (count) until first hit of
-      accept needle to be found is set to 0
-   4. If hash index is set to 1 for the index of string,
-      count is returned.
-   5. Otherwise count is incremented and scanning continues
-      until end of string.  */
-
-#include <sysdep.h>
-
-EALIGN(strspn, 4, 0)
-	CALL_MCOUNT 3
-
-	/* PPC64 ELF ABI stack is aligned to 16 bytes.  */
-	addi 	r9,r1,-256
-	/* Clear the table with 0 values  */
-	li	r6, 0
-	li	r8, 4
-	mtctr	r8
-	mr	r10, r9
-	.align 	4
-L(zerohash):
-	std	r6, 0(r10)
-	std	r6, 8(r10)
-	std	r6, 16(r10)
-	std	r6, 24(r10)
-	std	r6, 32(r10)
-	std	r6, 40(r10)
-	std	r6, 48(r10)
-	std	r6, 56(r10)
-	addi	r10, r10, 64
-	bdnz	L(zerohash)
-
-	lbz	r10,0(r4)
-	li r8, 1		/* r8=1, marker into hash if found in
-				   needle  */
-	cmpdi cr7, r10, 0	/* accept needle is NULL  */
-	beq cr7, L(skipHashing)	/* if needle is NULL, skip hashing  */
-
-	.align 4		/* align section to 16 byte boundary  */
-L(hashing):
-	stbx r8, r9, r10	/* update hash with marker for the pivot of
-				   the needle  */
-	lbzu r10, 1(r4)		/* load needle into r10 and update to next  */
-	cmpdi cr7, r10, 0	/* if needle is has reached NULL, continue  */
-	bne cr7, L(hashing)	/* loop to hash the needle  */
-
-L(skipHashing):
-	li r10, 0		/* load counter = 0  */
-	b L(beginScan)
-
-	.align 4		/* align section to 16 byte boundary  */
-L(scanUnroll):
-	lbzx r8, r9, r8		/* load r8 with hash value at index  */
-	cmpwi cr7, r8, 0	/* if we hit marker in hash, we have found
-				   accept needle  */
-	beq cr7, L(ret1stIndex)	/* we have hit accept needle, return the
-				   count  */
-
-	lbz r8, 1(r3)		/* load string[1] into r8  */
-	addi r10, r10, 4	/* increment counter  */
-	lbzx r8, r9, r8		/* load r8 with hash value at index  */
-	cmpwi cr7, r8, 0	/* if we hit marker in hash, we have found
-				   accept needle  */
-	beq cr7, L(ret2ndIndex)	/* we have hit accept needle, return the
-				   count  */
-
-	lbz r8, 2(r3)		/* load string[2] into r8  */
-	lbzx r8, r9, r8		/* load r8 with hash value at index  */
-	cmpwi cr7, r8, 0	/* if we hit marker in hash, we have found
-				   accept needle  */
-	beq cr7, L(ret3rdIndex)	/* we have hit accept needle, return the
-				   count  */
-
-	lbz r8, 3(r3)		/* load string[3] into r8  */
-	lbzx r8, r9, r8		/* load r8 with hash value at index  */
-	addi r3, r3, 4		/* unroll factor , increment string by 4  */
-	cmpwi cr7, r8, 0	/* if we hit marker in hash, we have found
-				   accept needle  */
-	beq cr7,L(ret4thIndex)	/* we have hit accept needle, return the
-				   count  */
-
-L(beginScan):
-	lbz r8, 0(r3)		/* load string[0] into r8  */
-	addi r6, r10, 1		/* place holder for counter + 1  */
-	addi r5, r10, 2		/* place holder for counter + 2  */
-	addi r4, r10, 3		/* place holder for counter + 3  */
-	cmpdi cr7, r8, 0	/* if we hit marker in hash, we have found
-				   accept needle  */
-	bne cr7, L(scanUnroll)	/* continue scanning  */
-
-L(ret1stIndex):
-	mr r3, r10		/* update r3 for return  */
-	blr			/* return  */
-
-L(ret2ndIndex):
-	mr r3, r6		/* update r3 for return  */
-	blr			/* return  */
-
-L(ret3rdIndex):
-	mr r3, r5		/* update r3 for return  */
-	blr			/* return  */
-
-L(ret4thIndex):
-	mr r3, r4		/* update r3 for return  */
-	blr			/* done  */
-END(strspn)
-libc_hidden_builtin_def (strspn)
-- 
1.9.1

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/4] Improve generic strcspn performance
  2016-03-28 15:20 [PATCH 0/4] Improve generic strspn/strcspn/strpbrk Adhemerval Zanella
  2016-03-28 15:20 ` [PATCH 3/4] Improve generic strpbrk performance Adhemerval Zanella
@ 2016-03-28 15:20 ` Adhemerval Zanella
  2016-03-29 13:02   ` [PATCH 2/4] Improve generic strspn performance Wilco Dijkstra
                     ` (2 more replies)
  2016-03-28 15:20 ` [PATCH 4/4] Remove powerpc64 strspn, strcspn, and strpbrk implementation Adhemerval Zanella
  2016-03-28 15:20 ` [PATCH 2/4] Improve generic strspn performance Adhemerval Zanella
  3 siblings, 3 replies; 22+ messages in thread
From: Adhemerval Zanella @ 2016-03-28 15:20 UTC (permalink / raw)
  To: libc-alpha; +Cc: Wilco Dijkstra

From: Wilco Dijkstra <wdijkstr@arm.com>

Improve strcspn performance using a much faster algorithm.  It is kept simple
so it works well on most targets.  It is generally at least 10 times faster
than the existing implementation on bench-strcspn on a few AArch64
implementations, and for some tests 100 times as fast (repeatedly calling
strchr on a small string is extremely slow...).

In fact the string/bits/string2.h inlines make no longer sense, as GCC
already uses strlen if reject is an empty string, strchrnul is 5 times as
fast as __strcspn_c1, while __strcspn_c2 and __strcspn_c3 are slower than
the strcspn main loop for large strings (though reject length 2-4 could be
special cased in the future to gain even more performance).

Tested on x86_64, i686, and aarch64.

	* string/strcspn.c (strcspn): Rewrite function.
	* string/bits/string2.h (strcspn): Use __builtin_strcspn.
---
 ChangeLog             |  6 ++++++
 string/bits/string2.h | 41 ++++++-----------------------------------
 string/strcspn.c      | 44 ++++++++++++++++++++++++++++++++++++--------
 3 files changed, 48 insertions(+), 43 deletions(-)

diff --git a/string/bits/string2.h b/string/bits/string2.h
index 8200ef1..1b87686 100644
--- a/string/bits/string2.h
+++ b/string/bits/string2.h
@@ -905,43 +905,14 @@ __stpcpy_small (char *__dest,
 
 /* Return the length of the initial segment of S which
    consists entirely of characters not in REJECT.  */
-#if !defined _HAVE_STRING_ARCH_strcspn || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strcspn
-#  if __GNUC_PREREQ (3, 2)
-#   define strcspn(s, reject) \
-  __extension__								      \
-  ({ char __r0, __r1, __r2;						      \
-     (__builtin_constant_p (reject) && __string2_1bptr_p (reject)	      \
-      ? ((__builtin_constant_p (s) && __string2_1bptr_p (s))		      \
-	 ? __builtin_strcspn (s, reject)				      \
-	 : ((__r0 = ((const char *) (reject))[0], __r0 == '\0')		      \
-	    ? strlen (s)						      \
-	    : ((__r1 = ((const char *) (reject))[1], __r1 == '\0')	      \
-	       ? __strcspn_c1 (s, __r0)					      \
-	       : ((__r2 = ((const char *) (reject))[2], __r2 == '\0')	      \
-		  ? __strcspn_c2 (s, __r0, __r1)			      \
-		  : (((const char *) (reject))[3] == '\0'		      \
-		     ? __strcspn_c3 (s, __r0, __r1, __r2)		      \
-		     : __builtin_strcspn (s, reject))))))		      \
-      : __builtin_strcspn (s, reject)); })
-#  else
-#   define strcspn(s, reject) \
-  __extension__								      \
-  ({ char __r0, __r1, __r2;						      \
-     (__builtin_constant_p (reject) && __string2_1bptr_p (reject)	      \
-      ? ((__r0 = ((const char *) (reject))[0], __r0 == '\0')		      \
-	 ? strlen (s)							      \
-	 : ((__r1 = ((const char *) (reject))[1], __r1 == '\0')		      \
-	    ? __strcspn_c1 (s, __r0)					      \
-	    : ((__r2 = ((const char *) (reject))[2], __r2 == '\0')	      \
-	       ? __strcspn_c2 (s, __r0, __r1)				      \
-	       : (((const char *) (reject))[3] == '\0'			      \
-		  ? __strcspn_c3 (s, __r0, __r1, __r2)			      \
-		  : strcspn (s, reject)))))				      \
-      : strcspn (s, reject)); })
-#  endif
+#ifndef _HAVE_STRING_ARCH_strcspn
+# if __GNUC_PREREQ (3, 2)
+#  define strcspn(s, reject) __builtin_strcspn (s, reject)
 # endif
 
+ /* The inline functions are not used from GLIBC 2.24 and forward, however
+    they are required to provide the symbols through string-inlines.c
+    (if inlining is not possible for compatibility reasons).  */
 __STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject);
 __STRING_INLINE size_t
 __strcspn_c1 (const char *__s, int __reject)
diff --git a/string/strcspn.c b/string/strcspn.c
index 8888919..89ba4ca 100644
--- a/string/strcspn.c
+++ b/string/strcspn.c
@@ -26,16 +26,44 @@
 /* Return the length of the maximum initial segment of S
    which contains no characters from REJECT.  */
 size_t
-STRCSPN (const char *s, const char *reject)
+STRCSPN (const char *str, const char *reject)
 {
-  size_t count = 0;
+  if (reject[0] == '\0' || reject[1] == '\0')
+    return __strchrnul (str, reject [0]) - str;
 
-  while (*s != '\0')
-    if (strchr (reject, *s++) == NULL)
-      ++count;
-    else
-      return count;
+  /* Use multiple small memsets to enable inlining on most targets.  */
+  unsigned char table[256];
+  unsigned char *p = memset (table, 0, 64);
+  memset (p + 64, 0, 64);
+  memset (p + 128, 0, 64);
+  memset (p + 192, 0, 64);
 
-  return count;
+  unsigned char *s = (unsigned char*) reject;
+  unsigned char tmp;
+  do
+    p[tmp = *s++] = 1;
+  while (tmp);
+
+  s = (unsigned char*) str;
+  if (p[s[0]]) return 0;
+  if (p[s[1]]) return 1;
+  if (p[s[2]]) return 2;
+  if (p[s[3]]) return 3;
+
+  s = (unsigned char *) ((size_t)s & ~3);
+
+  unsigned int c0, c1, c2, c3;
+  do
+    {
+      s += 4;
+      c0 = p[s[0]];
+      c1 = p[s[1]];
+      c2 = p[s[2]];
+      c3 = p[s[3]];
+    }
+  while ((c0 | c1 | c2 | c3) == 0);
+
+  size_t count = s - (unsigned char *) str;
+  return (c0 | c1) != 0 ? count - c0 + 1 : count - c2 + 3;
 }
 libc_hidden_builtin_def (strcspn)
-- 
1.9.1

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 0/4] Improve generic strspn/strcspn/strpbrk
@ 2016-03-28 15:20 Adhemerval Zanella
  2016-03-28 15:20 ` [PATCH 3/4] Improve generic strpbrk performance Adhemerval Zanella
                   ` (3 more replies)
  0 siblings, 4 replies; 22+ messages in thread
From: Adhemerval Zanella @ 2016-03-28 15:20 UTC (permalink / raw)
  To: libc-alpha

This is a followup on previous Wilco patch [1] to optimize strcspn
that also optimizes the generic strspn and strpbrk.  I used the
same strategy Wilco has used on strcspn for strspn and rewrote
strpbrk to just use strcspn instead.

I also tried to play with compiler options to check if it could
omit the memset call generation when initializing the lookup
table, but without success.  This is a similar strategy I used
on powerpc64 str{c}spn optimization.

Wilco initial approach was to remove the __strcspn_c{1,2,3}
inline function in string2.h header, however they are part of
ABI (to support compilers that do not inline the calls) and it
is not safe to remove then.  I have added it back, although the
strcspn new macro does not uses them and I also used the same
strategy for both strspn and strpbrk.

Performance-wise the algorithm is similar with current optimized
assembly one already in GLIBC (x86 and powerpc).  In fact, for
powerpc64 the algorithm performance is similar to assembly
routines which lead me to remove them.  i686 default one
is slight faster, while the SSE4.1 variant shows much better
performance (through the use of SIMD instructions).

[1] https://sourceware.org/ml/libc-alpha/2016-01/msg00173.html

Adhemerval Zanella (3):
  Improve generic strspn performance
  Improve generic strpbrk performance
  Remove powerpc64 strspn, strcspn, and strpbrk implementation

Wilco Dijkstra (1):
  Improve generic strcspn performance

 ChangeLog                           |  18 +++++
 string/bits/string2.h               | 123 +++++-------------------------
 string/strcspn.c                    |  44 +++++++++--
 string/strpbrk.c                    |  12 +--
 string/strspn.c                     |  56 ++++++++++----
 sysdeps/powerpc/powerpc64/strcspn.S | 127 -------------------------------
 sysdeps/powerpc/powerpc64/strpbrk.S | 135 ---------------------------------
 sysdeps/powerpc/powerpc64/strspn.S  | 144 ------------------------------------
 8 files changed, 115 insertions(+), 544 deletions(-)
 delete mode 100644 sysdeps/powerpc/powerpc64/strcspn.S
 delete mode 100644 sysdeps/powerpc/powerpc64/strpbrk.S
 delete mode 100644 sysdeps/powerpc/powerpc64/strspn.S

-- 
1.9.1

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 2/4] Improve generic strspn performance
  2016-03-28 15:20 [PATCH 0/4] Improve generic strspn/strcspn/strpbrk Adhemerval Zanella
                   ` (2 preceding siblings ...)
  2016-03-28 15:20 ` [PATCH 4/4] Remove powerpc64 strspn, strcspn, and strpbrk implementation Adhemerval Zanella
@ 2016-03-28 15:20 ` Adhemerval Zanella
  2016-03-29 20:32   ` Tulio Magno Quites Machado Filho
  3 siblings, 1 reply; 22+ messages in thread
From: Adhemerval Zanella @ 2016-03-28 15:20 UTC (permalink / raw)
  To: libc-alpha

As for strcspn, this patch improves strspn performance using a much
faster algorithm.  It first constructs a 256-entry table based on
the accept string and then uses it as a lookup table for the
input string.  As for strcspn optimization, it is generally at least
10 times faster than the existing implementation on bench-strspn
on a few AArch64 implementations.

Also the string/bits/string2.h inlines make no longer sense, as current
implementation will already implement most of the optimizations.

Tested on x86_64, i686, and aarch64.

	* string/strspn.c (strspn): Rewrite function.
	* string/bits/string2.h (strspn): Use __builtin_strcspn.
---
 ChangeLog             |  5 +++++
 string/bits/string2.h | 41 ++++++-------------------------------
 string/strspn.c       | 56 +++++++++++++++++++++++++++++++++++++--------------
 3 files changed, 52 insertions(+), 50 deletions(-)

diff --git a/string/bits/string2.h b/string/bits/string2.h
index 1b87686..a1684eb 100644
--- a/string/bits/string2.h
+++ b/string/bits/string2.h
@@ -952,43 +952,14 @@ __strcspn_c3 (const char *__s, int __reject1, int __reject2,
 
 /* Return the length of the initial segment of S which
    consists entirely of characters in ACCEPT.  */
-#if !defined _HAVE_STRING_ARCH_strspn || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strspn
-#  if __GNUC_PREREQ (3, 2)
-#   define strspn(s, accept) \
-  __extension__								      \
-  ({ char __a0, __a1, __a2;						      \
-     (__builtin_constant_p (accept) && __string2_1bptr_p (accept)	      \
-      ? ((__builtin_constant_p (s) && __string2_1bptr_p (s))		      \
-	 ? __builtin_strspn (s, accept)					      \
-	 : ((__a0 = ((const char *) (accept))[0], __a0 == '\0')		      \
-	    ? ((void) (s), (size_t) 0)					      \
-	    : ((__a1 = ((const char *) (accept))[1], __a1 == '\0')	      \
-	       ? __strspn_c1 (s, __a0)					      \
-	       : ((__a2 = ((const char *) (accept))[2], __a2 == '\0')	      \
-		  ? __strspn_c2 (s, __a0, __a1)				      \
-		  : (((const char *) (accept))[3] == '\0'		      \
-		     ? __strspn_c3 (s, __a0, __a1, __a2)		      \
-		     : __builtin_strspn (s, accept))))))		      \
-      : __builtin_strspn (s, accept)); })
-#  else
-#   define strspn(s, accept) \
-  __extension__								      \
-  ({ char __a0, __a1, __a2;						      \
-     (__builtin_constant_p (accept) && __string2_1bptr_p (accept)	      \
-      ? ((__a0 = ((const char *) (accept))[0], __a0 == '\0')		      \
-	 ? ((void) (s), (size_t) 0)					      \
-	 : ((__a1 = ((const char *) (accept))[1], __a1 == '\0')		      \
-	    ? __strspn_c1 (s, __a0)					      \
-	    : ((__a2 = ((const char *) (accept))[2], __a2 == '\0')	      \
-	       ? __strspn_c2 (s, __a0, __a1)				      \
-	       : (((const char *) (accept))[3] == '\0'			      \
-		  ? __strspn_c3 (s, __a0, __a1, __a2)			      \
-		  : strspn (s, accept)))))				      \
-      : strspn (s, accept)); })
-#  endif
+#ifndef _HAVE_STRING_ARCH_strspn
+# if __GNUC_PREREQ (3, 2)
+#  define strspn(s, accept) __builtin_strspn (s, accept)
 # endif
 
+/* The inline functions are not used from GLIBC 2.24 and forward, however
+   they are required to provide the symbols through string-inlines.c
+   (if inlining is not possible for compatibility reasons).  */
 __STRING_INLINE size_t __strspn_c1 (const char *__s, int __accept);
 __STRING_INLINE size_t
 __strspn_c1 (const char *__s, int __accept)
diff --git a/string/strspn.c b/string/strspn.c
index f0635c1..0547f41 100644
--- a/string/strspn.c
+++ b/string/strspn.c
@@ -25,23 +25,49 @@
 /* Return the length of the maximum initial segment
    of S which contains only characters in ACCEPT.  */
 size_t
-STRSPN (const char *s, const char *accept)
+STRSPN (const char *str, const char *accept)
 {
-  const char *p;
-  const char *a;
-  size_t count = 0;
-
-  for (p = s; *p != '\0'; ++p)
-    {
-      for (a = accept; *a != '\0'; ++a)
-	if (*p == *a)
-	  break;
-      if (*a == '\0')
-	return count;
-      else
-	++count;
+  if (accept[0] == '\0')
+    return 0;
+  if (accept[1] == '\0')
+    { 
+      const char *a = str;
+      for (; *str == *accept; str++);
+      return str - a;
     }
 
-  return count;
+  /* Use multiple small memsets to enable inlining on most targets.  */
+  unsigned char table[256];
+  unsigned char *p = memset (table, 0, 64);
+  memset (p + 64, 0, 64);
+  memset (p + 128, 0, 64);
+  memset (p + 192, 0, 64);
+
+  unsigned char *s = (unsigned char*) accept;
+  /* Different from strcspn it does not add the NULL on the table
+     so can avoid check if str[i] is NULL, since table['\0'] will
+     be 0 and thus stopping the loop check.  */
+  do
+    p[*s++] = 1;
+  while (*s);
+
+  s = (unsigned char*) str;
+  if (!p[s[0]]) return 0;
+  if (!p[s[1]]) return 1;
+  if (!p[s[2]]) return 2;
+  if (!p[s[3]]) return 3;
+          
+  s = (unsigned char *) ((size_t)(s) & ~3);
+  unsigned int c0, c1, c2, c3; 
+  do {
+      s += 4;
+      c0 = p[s[0]];
+      c1 = p[s[1]];
+      c2 = p[s[2]];
+      c3 = p[s[3]];
+  } while ((c0 && c1 && c2 && c3) == 1);
+
+  size_t count = s - (unsigned char *) str;
+  return (c0 && c1) == 0 ? count - !c0 + 1 : count - !c2 + 3;
 }
 libc_hidden_builtin_def (strspn)
-- 
1.9.1

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/4] Remove powerpc64 strspn, strcspn, and strpbrk implementation
  2016-03-28 15:20 ` [PATCH 4/4] Remove powerpc64 strspn, strcspn, and strpbrk implementation Adhemerval Zanella
@ 2016-03-28 16:10   ` Paul E. Murphy
  2016-03-28 17:56     ` Adhemerval Zanella
  2016-03-30 13:14   ` Tulio Magno Quites Machado Filho
  1 sibling, 1 reply; 22+ messages in thread
From: Paul E. Murphy @ 2016-03-28 16:10 UTC (permalink / raw)
  To: Adhemerval Zanella, libc-alpha; +Cc: Tulio Magno Quites Machado Filho

Hi Adhemerval,

Thanks for commonizing that. I'm surprised it hasn't been done earlier.

Anyhow, coincidental timing. I posted https://sourceware.org/ml/libc-alpha/2016-03/msg00642.html
friday to optimize strspn with vector bit permutes. We'll need to coordinate
these patches.

Thanks,
Paul

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/4] Remove powerpc64 strspn, strcspn, and strpbrk implementation
  2016-03-28 16:10   ` Paul E. Murphy
@ 2016-03-28 17:56     ` Adhemerval Zanella
  0 siblings, 0 replies; 22+ messages in thread
From: Adhemerval Zanella @ 2016-03-28 17:56 UTC (permalink / raw)
  To: Paul E. Murphy, libc-alpha; +Cc: Tulio Magno Quites Machado Filho

On 28-03-2016 13:08, Paul E. Murphy wrote:
> Hi Adhemerval,
> 
> Thanks for commonizing that. I'm surprised it hasn't been done earlier.
> 
> Anyhow, coincidental timing. I posted https://sourceware.org/ml/libc-alpha/2016-03/msg00642.html
> friday to optimize strspn with vector bit permutes. We'll need to coordinate
> these patches.
> 
> Thanks,
> Paul
> 

Indeed, however a fix is just by adjusting 
sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.{s,c} like:

---
#include <string.h>

#define STRSPN __strspn_ppc

#undef libc_hidden_builtin_def
#define libc_hidden_builtin_def(name)

extern __typeof (strspn) __strspn_ppc;

#include <string/strspn.c>
---

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/4] Improve generic strspn performance
  2016-03-28 15:20 ` [PATCH 1/4] Improve generic strcspn performance Adhemerval Zanella
@ 2016-03-29 13:02   ` Wilco Dijkstra
  2016-03-29 14:08     ` Adhemerval Zanella
  2016-03-30 17:47   ` [PATCH 1/4] Improve generic strcspn performance Richard Henderson
  2016-03-31 17:00   ` Richard Henderson
  2 siblings, 1 reply; 22+ messages in thread
From: Wilco Dijkstra @ 2016-03-29 13:02 UTC (permalink / raw)
  To: Adhemerval Zanella, libc-alpha; +Cc: nd

Adhemerval Zanella wrote:

> +  if (accept[0] == '\0')
> +    return 0;
> +  if (accept[1] == '\0')
> +    { 

GCC doesn't get the static branch prediction correct for the 2nd if,
so it would be useful to add __glibc_unlikely given that single-character
accepts are rare.

> +  s = (unsigned char *) ((size_t)(s) & ~3);
> +  unsigned int c0, c1, c2, c3; 
> +  do {
> +      s += 4;
> +      c0 = p[s[0]];
> +      c1 = p[s[1]];
> +      c2 = p[s[2]];
> +      c3 = p[s[3]];
> +  } while ((c0 && c1 && c2 && c3) == 1);

That should use '&' rather than '&&' and '!= 0' similar to how I did it in strcspn.
This will use 3 AND(S) instructions and a single branch.

> +
> +  size_t count = s - (unsigned char *) str;
> +  return (c0 && c1) == 0 ? count - !c0 + 1 : count - !c2 + 3;

Again, c0 & c1 is better and allows CSE with the while expression above.
Also -!c0 +1 is equivalent to c0, -!c2 + 3 is equivalent to c2 + 2 - this is simpler
and faster.

Otherwise it looks good, and thanks for doing this one too!

Cheers,
Wilco

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/4] Improve generic strspn performance
  2016-03-29 13:02   ` [PATCH 2/4] Improve generic strspn performance Wilco Dijkstra
@ 2016-03-29 14:08     ` Adhemerval Zanella
  0 siblings, 0 replies; 22+ messages in thread
From: Adhemerval Zanella @ 2016-03-29 14:08 UTC (permalink / raw)
  To: Wilco Dijkstra, libc-alpha; +Cc: nd



On 29-03-2016 10:02, Wilco Dijkstra wrote:
> Adhemerval Zanella wrote:
> 
>> +  if (accept[0] == '\0')
>> +    return 0;
>> +  if (accept[1] == '\0')
>> +    { 
> 
> GCC doesn't get the static branch prediction correct for the 2nd if,
> so it would be useful to add __glibc_unlikely given that single-character
> accepts are rare.
> 
>> +  s = (unsigned char *) ((size_t)(s) & ~3);
>> +  unsigned int c0, c1, c2, c3; 
>> +  do {
>> +      s += 4;
>> +      c0 = p[s[0]];
>> +      c1 = p[s[1]];
>> +      c2 = p[s[2]];
>> +      c3 = p[s[3]];
>> +  } while ((c0 && c1 && c2 && c3) == 1);
> 
> That should use '&' rather than '&&' and '!= 0' similar to how I did it in strcspn.
> This will use 3 AND(S) instructions and a single branch.
> 
>> +
>> +  size_t count = s - (unsigned char *) str;
>> +  return (c0 && c1) == 0 ? count - !c0 + 1 : count - !c2 + 3;
> 
> Again, c0 & c1 is better and allows CSE with the while expression above.
> Also -!c0 +1 is equivalent to c0, -!c2 + 3 is equivalent to c2 + 2 - this is simpler
> and faster.
> 
> Otherwise it looks good, and thanks for doing this one too!
> 
> Cheers,
> Wilco
> 

Thanks for the review.  I think this version fixes the points you noted:

diff --git a/string/strspn.c b/string/strspn.c
index f0635c1..15d7fa7 100644
--- a/string/strspn.c
+++ b/string/strspn.c
@@ -25,23 +25,49 @@
 /* Return the length of the maximum initial segment
    of S which contains only characters in ACCEPT.  */
 size_t
-STRSPN (const char *s, const char *accept)
+STRSPN (const char *str, const char *accept)
 {
-  const char *p;
-  const char *a;
-  size_t count = 0;
-
-  for (p = s; *p != '\0'; ++p)
-    {
-      for (a = accept; *a != '\0'; ++a)
-	if (*p == *a)
-	  break;
-      if (*a == '\0')
-	return count;
-      else
-	++count;
+  if (accept[0] == '\0')
+    return 0;
+  if (__glibc_unlikely (accept[1] == '\0'))
+    { 
+      const char *a = str;
+      for (; *str == *accept; str++);
+      return str - a;
     }
 
-  return count;
+  /* Use multiple small memsets to enable inlining on most targets.  */
+  unsigned char table[256];
+  unsigned char *p = memset (table, 0, 64);
+  memset (p + 64, 0, 64);
+  memset (p + 128, 0, 64);
+  memset (p + 192, 0, 64);
+
+  unsigned char *s = (unsigned char*) accept;
+  /* Different from strcspn it does not add the NULL on the table
+     so can avoid check if str[i] is NULL, since table['\0'] will
+     be 0 and thus stopping the loop check.  */
+  do
+    p[*s++] = 1;
+  while (*s);
+
+  s = (unsigned char*) str;
+  if (!p[s[0]]) return 0;
+  if (!p[s[1]]) return 1;
+  if (!p[s[2]]) return 2;
+  if (!p[s[3]]) return 3;
+          
+  s = (unsigned char *) ((size_t)(s) & ~3);
+  unsigned int c0, c1, c2, c3; 
+  do {
+      s += 4;
+      c0 = p[s[0]];
+      c1 = p[s[1]];
+      c2 = p[s[2]];
+      c3 = p[s[3]];
+  } while ((c0 & c1 & c2 & c3) != 0);
+
+  size_t count = s - (unsigned char *) str;
+  return (c0 & c1) == 0 ? count + c0 : count + c2 + 2;
 }
 libc_hidden_builtin_def (strspn)

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/4] Improve generic strspn performance
  2016-03-28 15:20 ` [PATCH 2/4] Improve generic strspn performance Adhemerval Zanella
@ 2016-03-29 20:32   ` Tulio Magno Quites Machado Filho
  0 siblings, 0 replies; 22+ messages in thread
From: Tulio Magno Quites Machado Filho @ 2016-03-29 20:32 UTC (permalink / raw)
  To: Adhemerval Zanella, libc-alpha

Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:

> As for strcspn, this patch improves strspn performance using a much
> faster algorithm.  It first constructs a 256-entry table based on
> the accept string and then uses it as a lookup table for the
> input string.  As for strcspn optimization, it is generally at least
> 10 times faster than the existing implementation on bench-strspn
> on a few AArch64 implementations.
>
> Also the string/bits/string2.h inlines make no longer sense, as current
> implementation will already implement most of the optimizations.
>
> Tested on x86_64, i686, and aarch64.

And on powerpc64 and powerpc64le.

Git reported some trailing whitespaces in this patch.

> diff --git a/string/strspn.c b/string/strspn.c
> index f0635c1..0547f41 100644
> --- a/string/strspn.c
> +++ b/string/strspn.c
> @@ -25,23 +25,49 @@
>  /* Return the length of the maximum initial segment
>     of S which contains only characters in ACCEPT.  */
>  size_t
> -STRSPN (const char *s, const char *accept)
> +STRSPN (const char *str, const char *accept)
>  {
> -  const char *p;
> -  const char *a;
> -  size_t count = 0;
> -
> -  for (p = s; *p != '\0'; ++p)
> -    {
> -      for (a = accept; *a != '\0'; ++a)
> -	if (*p == *a)
> -	  break;
> -      if (*a == '\0')
> -	return count;
> -      else
> -	++count;
> +  if (accept[0] == '\0')
> +    return 0;
> +  if (accept[1] == '\0')
> +    { 

Here.

> +      const char *a = str;
> +      for (; *str == *accept; str++);
> +      return str - a;
>      }
>
> -  return count;
> +  /* Use multiple small memsets to enable inlining on most targets.  */
> +  unsigned char table[256];
> +  unsigned char *p = memset (table, 0, 64);
> +  memset (p + 64, 0, 64);
> +  memset (p + 128, 0, 64);
> +  memset (p + 192, 0, 64);
> +
> +  unsigned char *s = (unsigned char*) accept;
> +  /* Different from strcspn it does not add the NULL on the table
> +     so can avoid check if str[i] is NULL, since table['\0'] will
> +     be 0 and thus stopping the loop check.  */
> +  do
> +    p[*s++] = 1;
> +  while (*s);
> +
> +  s = (unsigned char*) str;
> +  if (!p[s[0]]) return 0;
> +  if (!p[s[1]]) return 1;
> +  if (!p[s[2]]) return 2;
> +  if (!p[s[3]]) return 3;
> +          

Here.

> +  s = (unsigned char *) ((size_t)(s) & ~3);
> +  unsigned int c0, c1, c2, c3; 

And here.

-- 
Tulio Magno

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/4] Remove powerpc64 strspn, strcspn, and strpbrk implementation
  2016-03-28 15:20 ` [PATCH 4/4] Remove powerpc64 strspn, strcspn, and strpbrk implementation Adhemerval Zanella
  2016-03-28 16:10   ` Paul E. Murphy
@ 2016-03-30 13:14   ` Tulio Magno Quites Machado Filho
  2016-03-30 17:06     ` Adhemerval Zanella
  1 sibling, 1 reply; 22+ messages in thread
From: Tulio Magno Quites Machado Filho @ 2016-03-30 13:14 UTC (permalink / raw)
  To: Adhemerval Zanella, libc-alpha

Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:

> This patch removes the powerpc64 optimized strspn, strcspn, and
> strpbrk assembly implementation now that the default C one
> implements the same strategy.  On internal glibc benchtests
> current implementations shows similar performance with -O2.
>
> Tested on powerpc64le (POWER8).
>
> 	* sysdeps/powerpc/powerpc64/strspn.S: Likewise.

I tested this on POWER7 as well.  Although it isn't as good as in POWER8,
it's competitive.  I agree to remove it.

> 	* sysdeps/powerpc/powerpc64/strcspn.S: Remove file.
> 	* sysdeps/powerpc/powerpc64/strpbrk.S: Likewise.

But the removal of these 2 brings some serious performance degradation.
I think we should keep them for now.

-- 
Tulio Magno

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/4] Remove powerpc64 strspn, strcspn, and strpbrk implementation
  2016-03-30 13:14   ` Tulio Magno Quites Machado Filho
@ 2016-03-30 17:06     ` Adhemerval Zanella
  2016-03-30 18:14       ` Tulio Magno Quites Machado Filho
  0 siblings, 1 reply; 22+ messages in thread
From: Adhemerval Zanella @ 2016-03-30 17:06 UTC (permalink / raw)
  To: Tulio Magno Quites Machado Filho, libc-alpha



On 30-03-2016 10:14, Tulio Magno Quites Machado Filho wrote:
> Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
> 
>> This patch removes the powerpc64 optimized strspn, strcspn, and
>> strpbrk assembly implementation now that the default C one
>> implements the same strategy.  On internal glibc benchtests
>> current implementations shows similar performance with -O2.
>>
>> Tested on powerpc64le (POWER8).
>>
>> 	* sysdeps/powerpc/powerpc64/strspn.S: Likewise.
> 
> I tested this on POWER7 as well.  Although it isn't as good as in POWER8,
> it's competitive.  I agree to remove it.
> 
>> 	* sysdeps/powerpc/powerpc64/strcspn.S: Remove file.
>> 	* sysdeps/powerpc/powerpc64/strpbrk.S: Likewise.
> 
> But the removal of these 2 brings some serious performance degradation.
> I think we should keep them for now.

Do you mean regression on POWER7 only or for POWER8 as well? 
And what do you mean by 'serious'?  I am asking because the
algorithm is essentially the same, with the slight difference
powerpc64 version does not align neither try to read four
bytes at time. I will expect some degradation for strpbrk, since
generic call now is calling strcsn.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] Improve generic strcspn performance
  2016-03-28 15:20 ` [PATCH 1/4] Improve generic strcspn performance Adhemerval Zanella
  2016-03-29 13:02   ` [PATCH 2/4] Improve generic strspn performance Wilco Dijkstra
@ 2016-03-30 17:47   ` Richard Henderson
  2016-03-30 18:01     ` Wilco Dijkstra
  2016-03-31 17:00   ` Richard Henderson
  2 siblings, 1 reply; 22+ messages in thread
From: Richard Henderson @ 2016-03-30 17:47 UTC (permalink / raw)
  To: Adhemerval Zanella, libc-alpha; +Cc: Wilco Dijkstra

On 03/28/2016 08:19 AM, Adhemerval Zanella wrote:
> + /* The inline functions are not used from GLIBC 2.24 and forward, however
> +    they are required to provide the symbols through string-inlines.c
> +    (if inlining is not possible for compatibility reasons).  */
>  __STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject);
>  __STRING_INLINE size_t
>  __strcspn_c1 (const char *__s, int __reject)

They could, however, be moved out of the installed header file and be given
compatibility symbol versions.


r~

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] Improve generic strcspn performance
  2016-03-30 17:47   ` [PATCH 1/4] Improve generic strcspn performance Richard Henderson
@ 2016-03-30 18:01     ` Wilco Dijkstra
  2016-03-30 18:24       ` Adhemerval Zanella
  2016-03-30 18:46       ` Richard Henderson
  0 siblings, 2 replies; 22+ messages in thread
From: Wilco Dijkstra @ 2016-03-30 18:01 UTC (permalink / raw)
  To: Richard Henderson, Adhemerval Zanella, libc-alpha; +Cc: nd


Richard Henderson wrote:
> On 03/28/2016 08:19 AM, Adhemerval Zanella wrote:
> > + /* The inline functions are not used from GLIBC 2.24 and forward, however
> > +    they are required to provide the symbols through string-inlines.c
> > +    (if inlining is not possible for compatibility reasons).  */
> >  __STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject);
> >  __STRING_INLINE size_t
> >  __strcspn_c1 (const char *__s, int __reject)
>
> They could, however, be moved out of the installed header file and be given
> compatibility symbol versions.

I have several patches out for review that move all the inlines from string2.h 
to string/string-inlines.c for backwards compatibility and discontinue the complex
inlining of some code from string2.h so one no longer needs to worry about
accidental ABI changes.

See http://patchwork.sourceware.org/patch/10936/ for the strcspn move (this builds on
http://patchwork.sourceware.org/patch/10933/ and http://patchwork.sourceware.org/patch/10934/).

What do you mean with "compatibility symbol versions"?

Wilco 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/4] Remove powerpc64 strspn, strcspn, and strpbrk implementation
  2016-03-30 17:06     ` Adhemerval Zanella
@ 2016-03-30 18:14       ` Tulio Magno Quites Machado Filho
  2016-03-30 19:57         ` Adhemerval Zanella
  0 siblings, 1 reply; 22+ messages in thread
From: Tulio Magno Quites Machado Filho @ 2016-03-30 18:14 UTC (permalink / raw)
  To: Adhemerval Zanella, libc-alpha

Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:

> On 30-03-2016 10:14, Tulio Magno Quites Machado Filho wrote:
>> Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
>> 
>>> 	* sysdeps/powerpc/powerpc64/strcspn.S: Remove file.
>>> 	* sysdeps/powerpc/powerpc64/strpbrk.S: Likewise.
>> 
>> But the removal of these 2 brings some serious performance degradation.
>> I think we should keep them for now.
>
> Do you mean regression on POWER7 only or for POWER8 as well? 

Both.

> And what do you mean by 'serious'?

The new strcspn C implementation spent 8x the time of the powerpc64 assembly.
While strpbrk spent ~12x the time.
Using the benchtests.

-- 
Tulio Magno

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] Improve generic strcspn performance
  2016-03-30 18:01     ` Wilco Dijkstra
@ 2016-03-30 18:24       ` Adhemerval Zanella
  2016-03-30 18:46       ` Richard Henderson
  1 sibling, 0 replies; 22+ messages in thread
From: Adhemerval Zanella @ 2016-03-30 18:24 UTC (permalink / raw)
  To: Wilco Dijkstra, Richard Henderson, libc-alpha; +Cc: nd



On 30-03-2016 15:00, Wilco Dijkstra wrote:
> 
> Richard Henderson wrote:
>> On 03/28/2016 08:19 AM, Adhemerval Zanella wrote:
>>> + /* The inline functions are not used from GLIBC 2.24 and forward, however
>>> +    they are required to provide the symbols through string-inlines.c
>>> +    (if inlining is not possible for compatibility reasons).  */
>>>  __STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject);
>>>  __STRING_INLINE size_t
>>>  __strcspn_c1 (const char *__s, int __reject)
>>
>> They could, however, be moved out of the installed header file and be given
>> compatibility symbol versions.
> 
> I have several patches out for review that move all the inlines from string2.h 
> to string/string-inlines.c for backwards compatibility and discontinue the complex
> inlining of some code from string2.h so one no longer needs to worry about
> accidental ABI changes.
> 
> See http://patchwork.sourceware.org/patch/10936/ for the strcspn move (this builds on
> http://patchwork.sourceware.org/patch/10933/ and http://patchwork.sourceware.org/patch/10934/).
> 
> What do you mean with "compatibility symbol versions"?
> 
> Wilco 
> 

My understanding is since GLIBC will not emit the __str{spn,cspn,brk}
anymore the symbols can now be build as compatibility mode only.
I will change it for this patch series and check out the ones you
noted.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] Improve generic strcspn performance
  2016-03-30 18:01     ` Wilco Dijkstra
  2016-03-30 18:24       ` Adhemerval Zanella
@ 2016-03-30 18:46       ` Richard Henderson
  1 sibling, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2016-03-30 18:46 UTC (permalink / raw)
  To: Wilco Dijkstra, Adhemerval Zanella, libc-alpha; +Cc: nd

On 03/30/2016 11:00 AM, Wilco Dijkstra wrote:
> 
> Richard Henderson wrote:
>> On 03/28/2016 08:19 AM, Adhemerval Zanella wrote:
>>> + /* The inline functions are not used from GLIBC 2.24 and forward, however
>>> +    they are required to provide the symbols through string-inlines.c
>>> +    (if inlining is not possible for compatibility reasons).  */
>>>  __STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject);
>>>  __STRING_INLINE size_t
>>>  __strcspn_c1 (const char *__s, int __reject)
>>
>> They could, however, be moved out of the installed header file and be given
>> compatibility symbol versions.
> 
> I have several patches out for review that move all the inlines from string2.h 
> to string/string-inlines.c for backwards compatibility and discontinue the complex
> inlining of some code from string2.h so one no longer needs to worry about
> accidental ABI changes.
> 
> See http://patchwork.sourceware.org/patch/10936/ for the strcspn move (this builds on
> http://patchwork.sourceware.org/patch/10933/ and http://patchwork.sourceware.org/patch/10934/).
> 
> What do you mean with "compatibility symbol versions"?

It means that the symbols are not available with a default version, and so new
programs cannot link against the symbol.  However, old programs that are
already linked will continue to use the old symbol version.

See e.g. nptl/pt-fork.c, wherein "fork" and "__fork" were removed from the
default symbols for libpthread, and so now are exported only as compatibility
symbols.


r~

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/4] Remove powerpc64 strspn, strcspn, and strpbrk implementation
  2016-03-30 18:14       ` Tulio Magno Quites Machado Filho
@ 2016-03-30 19:57         ` Adhemerval Zanella
  2016-03-30 21:45           ` Tulio Magno Quites Machado Filho
  0 siblings, 1 reply; 22+ messages in thread
From: Adhemerval Zanella @ 2016-03-30 19:57 UTC (permalink / raw)
  To: libc-alpha

[-- Attachment #1: Type: text/plain, Size: 1479 bytes --]



On 30-03-2016 15:14, Tulio Magno Quites Machado Filho wrote:
> Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
> 
>> On 30-03-2016 10:14, Tulio Magno Quites Machado Filho wrote:
>>> Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
>>>
>>>> 	* sysdeps/powerpc/powerpc64/strcspn.S: Remove file.
>>>> 	* sysdeps/powerpc/powerpc64/strpbrk.S: Likewise.
>>>
>>> But the removal of these 2 brings some serious performance degradation.
>>> I think we should keep them for now.
>>
>> Do you mean regression on POWER7 only or for POWER8 as well? 
> 
> Both.
> 
>> And what do you mean by 'serious'?
> 
> The new strcspn C implementation spent 8x the time of the powerpc64 assembly.
> While strpbrk spent ~12x the time.
> Using the benchtests.
> 

This is definitively not what I am seeing the environment I am using
(powerpc64le, POWER8E (raw), 3.5GHz, --with-cpu=power7, CFLAGS=-O3,
gcc 4.8.4).
I am sending you the benchtests data in attachment.

For strcspn if reject is length {0,1} strchrnul will be used, which
is way faster than constructing the table and reading byte per byte.
Also for the rest of inputs, C version is slight faster in fact.

It is the same for strpbrk, since it will call strcspn in the end.
The only patch it will be faster is when 'accept == '\0', since
powerpc64 algorithm has a fast patch to return 0 in this case
(where C implementation will call strchrnul).

I am not seeing much difference with default optimization (-O2)
as well. 

[-- Attachment #2: bench-strcspn-base.out --]
[-- Type: text/plain, Size: 9387 bytes --]

                                	strcspn	simple_strcspn	stupid_strcspn
Length  512, alignment  0, rej len  0:	128.422	227.219	455.562
Length  512, alignment  0, rej len  0:	127.984	227.328	454.797
Length  512, alignment  0, rej len  1:	127.703	502.953	525.672
Length  512, alignment  1, rej len  1:	128.609	502.594	526.344
Length  512, alignment  0, rej len  2:	128.219	793.281	742.531
Length  512, alignment  2, rej len  2:	127.484	757.312	740.688
Length  512, alignment  0, rej len  3:	128.156	1196.06	1128
Length  512, alignment  3, rej len  3:	129.016	931.328	985.562
Length  512, alignment  0, rej len  4:	128.609	4227.89	1170.81
Length  512, alignment  4, rej len  4:	128.641	4223.08	1170.67
Length  512, alignment  0, rej len  5:	128.875	1376.97	1384.28
Length  512, alignment  5, rej len  5:	129.672	1377.48	1380.06
Length  512, alignment  0, rej len  6:	129.203	1708.42	1612.5
Length  512, alignment  6, rej len  6:	128.859	1692.61	1613.03
Length  512, alignment  0, rej len  7:	129.297	3508.08	3436.64
Length  512, alignment  7, rej len  7:	130.938	3434.67	3438.05
Length  512, alignment  0, rej len  8:	129.812	2017.94	2041.5
Length  512, alignment  0, rej len  8:	130.031	2017.17	2040.72
Length  512, alignment  0, rej len  9:	130.266	2601.59	2278.92
Length  512, alignment  1, rej len  9:	131	2514.72	2311.7
Length  512, alignment  0, rej len 10:	130.703	2444.09	2464.25
Length  512, alignment  2, rej len 10:	130.297	2445.75	2464.92
Length  512, alignment  0, rej len 11:	130.969	2665.77	2891.3
Length  512, alignment  3, rej len 11:	131.844	2654.03	2675.64
Length  512, alignment  0, rej len 12:	131.203	2887.67	2898.72
Length  512, alignment  4, rej len 12:	131.156	2974.91	2904.7
Length  512, alignment  0, rej len 13:	131.422	3078.86	3300.47
Length  512, alignment  5, rej len 13:	132.375	3068.33	3098.16
Length  512, alignment  0, rej len 14:	131.219	6712.7	6062.61
Length  512, alignment  6, rej len 14:	131.25	6788.25	6058.97
Length  512, alignment  0, rej len 15:	131.953	6989.05	6303.98
Length  512, alignment  7, rej len 15:	132.641	7231.27	6305.5
Length  512, alignment  0, rej len 16:	132.266	7414.98	6536.98
Length  512, alignment  0, rej len 16:	132.266	7211.02	6595.03
Length  512, alignment  0, rej len 17:	132.938	7423.56	6765.81
Length  512, alignment  1, rej len 17:	134.219	7465.7	6832.23
Length  512, alignment  0, rej len 18:	133.547	4151.47	12783
Length  512, alignment  2, rej len 18:	133.328	4191.38	7062.42
Length  512, alignment  0, rej len 19:	133.828	4767.27	4369.67
Length  512, alignment  3, rej len 19:	135.203	4838.03	6219.95
Length  512, alignment  0, rej len 20:	134.281	5896.31	4843.36
Length  512, alignment  4, rej len 20:	133.844	5898.67	4773.36
Length  512, alignment  0, rej len 21:	134.75	8659.16	6678.89
Length  512, alignment  5, rej len 21:	136.172	6970.73	6810.22
Length  512, alignment  0, rej len 22:	138.688	6920.62	8729.5
Length  512, alignment  6, rej len 22:	138.844	6986.19	6902.38
Length  512, alignment  0, rej len 23:	139.344	7132.86	7553.72
Length  512, alignment  7, rej len 23:	140.266	7199.47	7137.47
Length  512, alignment  0, rej len 24:	140.062	7612.83	7341.11
Length  512, alignment  0, rej len 24:	139.703	7360.91	7334.28
Length  512, alignment  0, rej len 25:	140.688	7596.69	7597.62
Length  512, alignment  1, rej len 25:	141.406	7598.59	7597.66
Length  512, alignment  0, rej len 26:	140.875	7887.5	7774.39
Length  512, alignment  2, rej len 26:	140.938	8122.03	7795.39
Length  512, alignment  0, rej len 27:	140.906	8180.59	8062.3
Length  512, alignment  3, rej len 27:	142.203	8187.14	8054.39
Length  512, alignment  0, rej len 28:	141.688	8392.16	8309.39
Length  512, alignment  4, rej len 28:	141.547	8394	8299.14
Length  512, alignment  0, rej len 29:	142.312	8777.47	8500.94
Length  512, alignment  5, rej len 29:	143.266	8586.08	8507.03
Length  512, alignment  0, rej len 30:	142.906	8791.2	8721.89
Length  512, alignment  6, rej len 30:	142.594	8790.55	8707.36
Length  512, alignment  0, rej len 31:	142.906	9259.94	8928.67
Length  512, alignment  7, rej len 31:	143.891	9033.44	8913.73
Length   32, alignment  0, rej len  4:	17.7656	200.188	83.1094
Length   32, alignment  1, rej len  4:	17.5469	264.547	86.9375
Length   64, alignment  0, rej len  4:	24.75	465.266	158.562
Length   64, alignment  2, rej len  4:	24.7188	436.891	157.078
Length  128, alignment  0, rej len  4:	39.5781	992	299.969
Length  128, alignment  3, rej len  4:	40.0781	1009.91	300.094
Length  256, alignment  0, rej len  4:	69.3594	2192.12	586.359
Length  256, alignment  4, rej len  4:	69.2344	2216.08	585.609
Length  512, alignment  0, rej len  4:	127.922	4362.44	1166.25
Length  512, alignment  5, rej len  4:	129.156	4435.39	1165.72
Length 1024, alignment  0, rej len  4:	246.969	6236.97	2319.8
Length 1024, alignment  6, rej len  4:	246.703	8768.05	2321.05
Length 2048, alignment  0, rej len  4:	483.594	17973.9	4635.91
Length 2048, alignment  7, rej len  4:	487.219	17118.2	4622.03
Length   64, alignment  1, rej len 10:	27.3125	330.156	316.219
Length   64, alignment  2, rej len 10:	26.5469	313.484	320.375
Length   64, alignment  3, rej len 10:	26.8906	317.25	320.422
Length   64, alignment  4, rej len 10:	27.0156	310.672	315.75
Length   64, alignment  5, rej len 10:	27.0781	314.516	318.484
Length   64, alignment  6, rej len 10:	27.0312	313.203	319.031
Length   64, alignment  7, rej len 10:	27.0938	314.031	320.594
Length    0, alignment  0, rej len  6:	7.54688	2.3125	7.20312
Length    1, alignment  0, rej len  6:	8.59375	14.0469	12.5625
Length    2, alignment  0, rej len  6:	8.39062	9.14062	14.25
Length    3, alignment  0, rej len  6:	8.54688	19.8281	23.9688
Length    4, alignment  0, rej len  6:	8.6875	22.6875	27.6875
Length    5, alignment  0, rej len  6:	9.45312	21.3594	25.4688
Length    6, alignment  0, rej len  6:	9.15625	26.3281	30.0781
Length    7, alignment  0, rej len  6:	9.1875	30.0156	33.4219
Length    8, alignment  0, rej len  6:	9.125	31.7344	35.5781
Length    9, alignment  0, rej len  6:	9.625	40.8125	40.5
Length   10, alignment  0, rej len  6:	9.875	38.0781	42.0156
Length   11, alignment  0, rej len  6:	10.0625	42.4844	45.0312
Length   12, alignment  0, rej len  6:	9.71875	45.5938	48.3125
Length   13, alignment  0, rej len  6:	10.4844	48.4844	51.0781
Length   14, alignment  0, rej len  6:	10.6406	52.0469	55.9062
Length   15, alignment  0, rej len  6:	10.6094	54.1875	55.7188
Length   16, alignment  0, rej len  6:	10.375	58.6875	60.6094
Length   17, alignment  0, rej len  6:	11.5781	67.8281	64.6875
Length   18, alignment  0, rej len  6:	11.3906	63.0156	67.4062
Length   19, alignment  0, rej len  6:	11.2656	66.5625	70.6094
Length   20, alignment  0, rej len  6:	11.2812	72.4844	74.8281
Length   21, alignment  0, rej len  6:	12.2969	73.3125	76.0469
Length   22, alignment  0, rej len  6:	12.1719	76.3594	77.4531
Length   23, alignment  0, rej len  6:	12.6562	80.2188	84.0625
Length   24, alignment  0, rej len  6:	11.9844	83.6406	88.125
Length   25, alignment  0, rej len  6:	15.875	85.9219	87.0312
Length   26, alignment  0, rej len  6:	16.5469	98.0781	94.8125
Length   27, alignment  0, rej len  6:	17.5469	100.281	96.9375
Length   28, alignment  0, rej len  6:	17.2656	94.5625	96.2031
Length   29, alignment  0, rej len  6:	17.7344	98.5625	103.312
Length   30, alignment  0, rej len  6:	17.875	104.828	105.453
Length   31, alignment  0, rej len  6:	17.9531	105.266	109.109
Length   32, alignment  0, rej len  6:	18.1094	109.891	110.484
Length   33, alignment  0, rej len  6:	18.6406	113.297	113.984
Length   34, alignment  0, rej len  6:	18.7031	115	118.5
Length   35, alignment  0, rej len  6:	18.9531	119.078	119.375
Length   36, alignment  0, rej len  6:	19.1406	129.297	125.469
Length   37, alignment  0, rej len  6:	19.0625	126.953	128.078
Length   38, alignment  0, rej len  6:	19.5312	138.078	132.75
Length   39, alignment  0, rej len  6:	19.6562	127.797	133.375
Length   40, alignment  0, rej len  6:	20.2188	133.016	135.719
Length   41, alignment  0, rej len  6:	20.3125	150.156	140.891
Length   42, alignment  0, rej len  6:	20.6094	138.656	141.516
Length   43, alignment  0, rej len  6:	20.6875	145.562	147.5
Length   44, alignment  0, rej len  6:	20.9531	156.641	151.031
Length   45, alignment  0, rej len  6:	21.4688	150.047	153.719
Length   46, alignment  0, rej len  6:	21.5	154.719	154.172
Length   47, alignment  0, rej len  6:	21.5625	155.719	156.188
Length   48, alignment  0, rej len  6:	21.6406	157.391	159.234
Length   49, alignment  0, rej len  6:	21.8438	164.609	163.469
Length   50, alignment  0, rej len  6:	22.0625	167.562	169.312
Length   51, alignment  0, rej len  6:	22.625	170.125	170.859
Length   52, alignment  0, rej len  6:	22.7656	169.203	172.719
Length   53, alignment  0, rej len  6:	23.0469	191.438	177.578
Length   54, alignment  0, rej len  6:	24.2656	180.266	181.203
Length   55, alignment  0, rej len  6:	24.2656	193.547	184.844
Length   56, alignment  0, rej len  6:	23.75	198.953	187.938
Length   57, alignment  0, rej len  6:	23.9375	184.797	188.094
Length   58, alignment  0, rej len  6:	25.625	208.5	194.125
Length   59, alignment  0, rej len  6:	24.4688	199.031	196.203
Length   60, alignment  0, rej len  6:	24.6562	199.703	198.297
Length   61, alignment  0, rej len  6:	24.9688	207.547	202.234
Length   62, alignment  0, rej len  6:	26.2031	208.875	205.438
Length   63, alignment  0, rej len  6:	26.9844	210.328	209.219

[-- Attachment #3: bench-strcspn-patch.out --]
[-- Type: text/plain, Size: 9351 bytes --]

                                	strcspn	simple_strcspn	stupid_strcspn
Length  512, alignment  0, rej len  0:	30.8281	228.438	455.844
Length  512, alignment  0, rej len  0:	30.5781	228.422	455.359
Length  512, alignment  0, rej len  1:	30.3281	503.047	525.734
Length  512, alignment  1, rej len  1:	30.25	503.656	525.062
Length  512, alignment  0, rej len  2:	118.844	898.906	739.328
Length  512, alignment  2, rej len  2:	120.781	812.203	743.906
Length  512, alignment  0, rej len  3:	120.484	933.859	1032.83
Length  512, alignment  3, rej len  3:	119.328	934.562	1030.2
Length  512, alignment  0, rej len  4:	120.562	4582.17	1172.33
Length  512, alignment  4, rej len  4:	119.375	4463.53	1170.03
Length  512, alignment  0, rej len  5:	118.609	1380.27	1445.47
Length  512, alignment  5, rej len  5:	121.781	1385.53	1382.23
Length  512, alignment  0, rej len  6:	121.125	1708.62	1613.8
Length  512, alignment  6, rej len  6:	122.672	1712.03	1613.95
Length  512, alignment  0, rej len  7:	121.344	3438.31	3433.64
Length  512, alignment  7, rej len  7:	122.656	3582.81	3432.95
Length  512, alignment  0, rej len  8:	121	2010.2	2043.27
Length  512, alignment  0, rej len  8:	121.172	2010.88	2042.39
Length  512, alignment  0, rej len  9:	120.672	2584.86	2294.7
Length  512, alignment  1, rej len  9:	121.047	2226.33	2312.17
Length  512, alignment  0, rej len 10:	120.766	2449.53	2476.66
Length  512, alignment  2, rej len 10:	121.891	2764.81	2476.3
Length  512, alignment  0, rej len 11:	121.75	2664.52	2682.58
Length  512, alignment  3, rej len 11:	121.562	2662.45	2682.06
Length  512, alignment  0, rej len 12:	122.797	2892.45	2906.58
Length  512, alignment  4, rej len 12:	121.875	2891.06	2910.88
Length  512, alignment  0, rej len 13:	121.203	3214.92	3113.84
Length  512, alignment  5, rej len 13:	120.75	3088.09	3109.42
Length  512, alignment  0, rej len 14:	122.344	6628.83	6064.28
Length  512, alignment  6, rej len 14:	120.953	6610.45	6125.94
Length  512, alignment  0, rej len 15:	121.906	6931.39	6504.91
Length  512, alignment  7, rej len 15:	121.469	6939.05	6369.52
Length  512, alignment  0, rej len 16:	122.906	7182.42	6767.98
Length  512, alignment  0, rej len 16:	122.969	7206.17	6544.19
Length  512, alignment  0, rej len 17:	122.156	7548.8	6762.89
Length  512, alignment  1, rej len 17:	125.312	7476.7	6767.03
Length  512, alignment  0, rej len 18:	125	4272.11	7244.67
Length  512, alignment  2, rej len 18:	124.906	4166.55	7014.36
Length  512, alignment  0, rej len 19:	122.75	4384.55	4385.72
Length  512, alignment  3, rej len 19:	124.859	4451.66	4379.7
Length  512, alignment  0, rej len 20:	123.672	5182.55	4820.5
Length  512, alignment  4, rej len 20:	124.391	5899.25	4822.28
Length  512, alignment  0, rej len 21:	128.516	6787.83	6713.72
Length  512, alignment  5, rej len 21:	129.5	6913.53	7190.72
Length  512, alignment  0, rej len 22:	130.656	7012.83	6896.02
Length  512, alignment  6, rej len 22:	131.812	6951.5	6895.67
Length  512, alignment  0, rej len 23:	131.5	7149.08	7192.59
Length  512, alignment  7, rej len 23:	129.812	7148.02	7129.81
Length  512, alignment  0, rej len 24:	130.375	7347.7	7743.53
Length  512, alignment  0, rej len 24:	130.203	7342.94	7480.36
Length  512, alignment  0, rej len 25:	131.5	7598.86	7599.28
Length  512, alignment  1, rej len 25:	130.734	7605.09	7549.78
Length  512, alignment  0, rej len 26:	132.109	7937.88	7874.22
Length  512, alignment  2, rej len 26:	131.438	7944.22	8109.36
Length  512, alignment  0, rej len 27:	130.984	8216.77	8082.97
Length  512, alignment  3, rej len 27:	129.422	8212.08	8061.84
Length  512, alignment  0, rej len 28:	132.312	8426.66	8318.17
Length  512, alignment  4, rej len 28:	131.734	8406.52	8312.45
Length  512, alignment  0, rej len 29:	130.312	8840.38	8438.86
Length  512, alignment  5, rej len 29:	132.328	8648.34	8470.8
Length  512, alignment  0, rej len 30:	132.156	8861.14	8729.22
Length  512, alignment  6, rej len 30:	134.641	8838.42	8746.12
Length  512, alignment  0, rej len 31:	132.922	9075.7	9156.62
Length  512, alignment  7, rej len 31:	133.078	9072.17	8957.02
Length   32, alignment  0, rej len  4:	14.3438	212.344	83.7031
Length   32, alignment  1, rej len  4:	13.7812	263.672	87.1875
Length   64, alignment  0, rej len  4:	26.5781	477.406	158.016
Length   64, alignment  2, rej len  4:	26	499.406	156.703
Length  128, alignment  0, rej len  4:	39.9062	1025.47	299.328
Length  128, alignment  3, rej len  4:	39.7656	1023.25	299.344
Length  256, alignment  0, rej len  4:	67.5938	2098.67	586.734
Length  256, alignment  4, rej len  4:	66.1562	2114.23	585.453
Length  512, alignment  0, rej len  4:	120.875	4272	1167.88
Length  512, alignment  5, rej len  4:	121.969	4280.02	1181.12
Length 1024, alignment  0, rej len  4:	223.625	5593.78	2318.53
Length 1024, alignment  6, rej len  4:	222.844	8599.42	2318.97
Length 2048, alignment  0, rej len  4:	429.656	17639.2	4618.69
Length 2048, alignment  7, rej len  4:	429.781	17838.5	4617.67
Length   64, alignment  1, rej len 10:	27.0312	316.375	318.422
Length   64, alignment  2, rej len 10:	28	314.922	320.344
Length   64, alignment  3, rej len 10:	27.2812	315.969	321.266
Length   64, alignment  4, rej len 10:	27.9062	311.469	318
Length   64, alignment  5, rej len 10:	27.3281	314.719	320.641
Length   64, alignment  6, rej len 10:	28.6094	314.578	320.609
Length   64, alignment  7, rej len 10:	27.8906	314.953	321.219
Length    0, alignment  0, rej len  6:	7.73438	1.9375	7.40625
Length    1, alignment  0, rej len  6:	7.85938	13.6719	12.5938
Length    2, alignment  0, rej len  6:	7.14062	9.14062	14.0469
Length    3, alignment  0, rej len  6:	7.60938	20.0625	24.3281
Length    4, alignment  0, rej len  6:	8.15625	22.7031	27.0156
Length    5, alignment  0, rej len  6:	7.64062	21.2812	25.875
Length    6, alignment  0, rej len  6:	8.25	25.9844	30.2031
Length    7, alignment  0, rej len  6:	7.8125	30.2344	33.5781
Length    8, alignment  0, rej len  6:	9.5	32.9219	35.5
Length    9, alignment  0, rej len  6:	8.67188	42.1562	41.1094
Length   10, alignment  0, rej len  6:	8.75	40.2812	42
Length   11, alignment  0, rej len  6:	8.76562	43.8906	44.2969
Length   12, alignment  0, rej len  6:	9.67188	47.625	48.9531
Length   13, alignment  0, rej len  6:	9.28125	48.5469	50.75
Length   14, alignment  0, rej len  6:	9.84375	54.7188	55.5781
Length   15, alignment  0, rej len  6:	9.39062	57.75	56.0469
Length   16, alignment  0, rej len  6:	11.625	60.2656	60.2031
Length   17, alignment  0, rej len  6:	10.7031	70.3594	65.5625
Length   18, alignment  0, rej len  6:	10.3438	68.9062	66.9844
Length   19, alignment  0, rej len  6:	10.3281	69.2031	69.3906
Length   20, alignment  0, rej len  6:	11.3438	76.7188	74.5156
Length   21, alignment  0, rej len  6:	11.1562	77.75	75.4688
Length   22, alignment  0, rej len  6:	12.375	83.5469	78.6094
Length   23, alignment  0, rej len  6:	11.9375	83.6875	82.6562
Length   24, alignment  0, rej len  6:	12.8594	89.7969	87.0312
Length   25, alignment  0, rej len  6:	12.75	91.875	88.7969
Length   26, alignment  0, rej len  6:	12.3594	101.672	94.3438
Length   27, alignment  0, rej len  6:	12.125	112.75	96.6875
Length   28, alignment  0, rej len  6:	13.7969	106.344	96.8125
Length   29, alignment  0, rej len  6:	13.0469	106.141	101.109
Length   30, alignment  0, rej len  6:	13.2969	108.016	104.625
Length   31, alignment  0, rej len  6:	13.3281	114.359	107.969
Length   32, alignment  0, rej len  6:	14.5	110.906	111.609
Length   33, alignment  0, rej len  6:	14	123.422	113.266
Length   34, alignment  0, rej len  6:	14.2344	120.969	117.5
Length   35, alignment  0, rej len  6:	14.2656	121.891	119.25
Length   36, alignment  0, rej len  6:	15.2031	144.281	124.938
Length   37, alignment  0, rej len  6:	14.9062	139.812	126.891
Length   38, alignment  0, rej len  6:	14.9219	143.156	132.5
Length   39, alignment  0, rej len  6:	14.7969	142.719	132.266
Length   40, alignment  0, rej len  6:	15.8281	140.047	135.422
Length   41, alignment  0, rej len  6:	15.25	152.688	140.219
Length   42, alignment  0, rej len  6:	15.5156	148.484	139.938
Length   43, alignment  0, rej len  6:	15.7031	152.547	145.609
Length   44, alignment  0, rej len  6:	16.8125	170.531	150.359
Length   45, alignment  0, rej len  6:	16.8594	156.297	152.75
Length   46, alignment  0, rej len  6:	16.8281	159.266	154.203
Length   47, alignment  0, rej len  6:	16.6875	175.391	155.234
Length   48, alignment  0, rej len  6:	25.6406	170.953	160.594
Length   49, alignment  0, rej len  6:	25.5	182.328	162.734
Length   50, alignment  0, rej len  6:	22.3906	176.047	168.125
Length   51, alignment  0, rej len  6:	22.6094	184.938	169.875
Length   52, alignment  0, rej len  6:	23.8594	183.375	172.312
Length   53, alignment  0, rej len  6:	23.7031	200.688	177.531
Length   54, alignment  0, rej len  6:	24.0469	192	180.859
Length   55, alignment  0, rej len  6:	23.9688	211.562	184.891
Length   56, alignment  0, rej len  6:	24.3594	217.969	187.562
Length   57, alignment  0, rej len  6:	24.5312	217.359	187.703
Length   58, alignment  0, rej len  6:	24.5781	209.406	194.094
Length   59, alignment  0, rej len  6:	24.7656	205.891	194.953
Length   60, alignment  0, rej len  6:	25.7031	208.781	198.016
Length   61, alignment  0, rej len  6:	25.5781	210.562	202.156
Length   62, alignment  0, rej len  6:	25.4375	215.375	204.812
Length   63, alignment  0, rej len  6:	26.2656	218.781	208.625

[-- Attachment #4: bench-strpbrk-base.out --]
[-- Type: text/plain, Size: 9386 bytes --]

                                	strpbrk	simple_strpbrk	stupid_strpbrk
Length  512, alignment  0, rej len  0:	3.32812	571.312	600.641
Length  512, alignment  0, rej len  0:	3.1875	570.938	600.031
Length  512, alignment  0, rej len  1:	167.891	690.969	694.469
Length  512, alignment  1, rej len  1:	167.312	691.812	695
Length  512, alignment  0, rej len  2:	166.953	2624.94	977.734
Length  512, alignment  2, rej len  2:	166.297	2624.2	983.766
Length  512, alignment  0, rej len  3:	165.656	3284.27	1304.61
Length  512, alignment  3, rej len  3:	166.422	3283.77	1293.73
Length  512, alignment  0, rej len  4:	166.328	4501.44	1549.88
Length  512, alignment  4, rej len  4:	166.578	4129.94	1546.94
Length  512, alignment  0, rej len  5:	166.797	4413.64	1822.59
Length  512, alignment  5, rej len  5:	166.562	4415.8	1824.94
Length  512, alignment  0, rej len  6:	166.625	4649.62	4342.05
Length  512, alignment  6, rej len  6:	167.109	4648.95	4232.84
Length  512, alignment  0, rej len  7:	167.625	4883.31	2393.58
Length  512, alignment  7, rej len  7:	168.328	4957.83	2391.19
Length  512, alignment  0, rej len  8:	167.922	5223.75	2693.02
Length  512, alignment  0, rej len  8:	168.109	5224.8	2692.55
Length  512, alignment  0, rej len  9:	168.125	5824.36	2996.23
Length  512, alignment  1, rej len  9:	169.562	5528.22	2956.69
Length  512, alignment  0, rej len 10:	168.547	5881.34	3254.45
Length  512, alignment  2, rej len 10:	168.531	5800.03	3257.47
Length  512, alignment  0, rej len 11:	168.5	6109.39	3536.62
Length  512, alignment  3, rej len 11:	168.938	6212.09	3536.06
Length  512, alignment  0, rej len 12:	171.031	6344.77	3831.66
Length  512, alignment  4, rej len 12:	168.766	6348.02	3838.38
Length  512, alignment  0, rej len 13:	169.422	7073.3	4100.12
Length  512, alignment  5, rej len 13:	169.734	6964.66	4094.66
Length  512, alignment  0, rej len 14:	170.891	7042.03	8111.47
Length  512, alignment  6, rej len 14:	170.406	6970.62	8030.34
Length  512, alignment  0, rej len 15:	170.578	7430.56	8398.62
Length  512, alignment  7, rej len 15:	170.297	7457.28	8589.69
Length  512, alignment  0, rej len 16:	170.375	7736	8732.7
Length  512, alignment  0, rej len 16:	170.656	7745	8648.5
Length  512, alignment  0, rej len 17:	171.578	8165.05	8942.62
Length  512, alignment  1, rej len 17:	171.656	8078.28	8944.56
Length  512, alignment  0, rej len 18:	172.234	8726.38	5738.89
Length  512, alignment  2, rej len 18:	172	8401.2	5742.19
Length  512, alignment  0, rej len 19:	171.891	8711.75	6030.14
Length  512, alignment  3, rej len 19:	173.609	8630.44	6022.58
Length  512, alignment  0, rej len 20:	172.406	8975.27	6304.23
Length  512, alignment  4, rej len 20:	173.703	9073.08	5952.91
Length  512, alignment  0, rej len 21:	164.172	8834.38	8326.31
Length  512, alignment  5, rej len 21:	164.422	8824.62	8554.05
Length  512, alignment  0, rej len 22:	168.719	9045.92	8609.53
Length  512, alignment  6, rej len 22:	169.406	9026.95	8878.55
Length  512, alignment  0, rej len 23:	169.5	9234.98	8901.08
Length  512, alignment  7, rej len 23:	170.125	9249.12	8902.78
Length  512, alignment  0, rej len 24:	170.375	9536.64	9153.67
Length  512, alignment  0, rej len 24:	169.938	9541.92	9380.88
Length  512, alignment  0, rej len 25:	170.953	9994.41	9496.52
Length  512, alignment  1, rej len 25:	170.703	9837.92	9476.8
Length  512, alignment  0, rej len 26:	171.422	10246.9	9702.78
Length  512, alignment  2, rej len 26:	171.641	10228.8	9991.16
Length  512, alignment  0, rej len 27:	172.109	10568.4	10056
Length  512, alignment  3, rej len 27:	172.188	10574.7	10046
Length  512, alignment  0, rej len 28:	172.203	10849.7	10383.8
Length  512, alignment  4, rej len 28:	172.016	11081.6	10351.5
Length  512, alignment  0, rej len 29:	172.812	11052	10604
Length  512, alignment  5, rej len 29:	173.406	11034.5	10628.9
Length  512, alignment  0, rej len 30:	173.375	11371.2	10862.7
Length  512, alignment  6, rej len 30:	191.906	11365.4	10860.3
Length  512, alignment  0, rej len 31:	173.766	11685.4	11125.5
Length  512, alignment  7, rej len 31:	174.109	11810.7	11137.4
Length   32, alignment  0, rej len  4:	16.0625	241.266	105.766
Length   32, alignment  1, rej len  4:	15.8281	239.812	104.141
Length   64, alignment  0, rej len  4:	30.5625	484.672	194.406
Length   64, alignment  2, rej len  4:	30.375	484.375	200.438
Length  128, alignment  0, rej len  4:	48.8438	972.625	381.453
Length  128, alignment  3, rej len  4:	48.7812	972.562	373.75
Length  256, alignment  0, rej len  4:	84.7188	1947.25	986.844
Length  256, alignment  4, rej len  4:	84.7969	1946.8	731.5
Length  512, alignment  0, rej len  4:	156.641	3897.45	1457.31
Length  512, alignment  5, rej len  4:	156.781	3897.23	1462.75
Length 1024, alignment  0, rej len  4:	300.609	7805.02	2889.56
Length 1024, alignment  6, rej len  4:	300.641	7797.88	2894.14
Length 2048, alignment  0, rej len  4:	589.047	15618.7	5761.38
Length 2048, alignment  7, rej len  4:	589.609	15605.9	5780.72
Length   64, alignment  1, rej len 10:	32.8125	732.547	395.75
Length   64, alignment  2, rej len 10:	32.5938	692.703	398.641
Length   64, alignment  3, rej len 10:	32.8281	693.984	405.859
Length   64, alignment  4, rej len 10:	33.375	689.594	394.531
Length   64, alignment  5, rej len 10:	33.1094	692.812	398.75
Length   64, alignment  6, rej len 10:	32.9531	692.531	398.266
Length   64, alignment  7, rej len 10:	32.6562	693.281	399.281
Length    0, alignment  0, rej len  6:	9.03125	3.34375	8.92188
Length    1, alignment  0, rej len  6:	10.1406	13.4062	15.6719
Length    2, alignment  0, rej len  6:	9.95312	18.3906	17.7656
Length    3, alignment  0, rej len  6:	10.875	29.3125	30.5312
Length    4, alignment  0, rej len  6:	10.2969	41.4375	37.1562
Length    5, alignment  0, rej len  6:	11.2812	43.6562	42.6875
Length    6, alignment  0, rej len  6:	11.9062	53.1562	53.3906
Length    7, alignment  0, rej len  6:	11.875	62.4844	61.4219
Length    8, alignment  0, rej len  6:	11.5469	69.3125	67.4219
Length    9, alignment  0, rej len  6:	11.625	82.1094	78.4531
Length   10, alignment  0, rej len  6:	12	86.9062	83.75
Length   11, alignment  0, rej len  6:	12.4531	94.4219	89.9062
Length   12, alignment  0, rej len  6:	11.8281	104.219	99.6406
Length   13, alignment  0, rej len  6:	12.5781	111.047	105.719
Length   14, alignment  0, rej len  6:	13.0781	121.844	115.234
Length   15, alignment  0, rej len  6:	13.9062	129.719	119.359
Length   16, alignment  0, rej len  6:	12.9219	136.844	129.375
Length   17, alignment  0, rej len  6:	13.5781	149.203	140.188
Length   18, alignment  0, rej len  6:	14.8125	154.5	144.438
Length   19, alignment  0, rej len  6:	14.4531	163.938	153.266
Length   20, alignment  0, rej len  6:	13.8125	172.969	161.703
Length   21, alignment  0, rej len  6:	14.2969	180.766	168.016
Length   22, alignment  0, rej len  6:	15.4531	189.062	174.797
Length   23, alignment  0, rej len  6:	15.4531	197.953	185.312
Length   24, alignment  0, rej len  6:	14.4219	228.734	193.531
Length   25, alignment  0, rej len  6:	16.1875	214.5	198.297
Length   26, alignment  0, rej len  6:	15.5625	225.641	211.047
Length   27, alignment  0, rej len  6:	16.1719	234.125	218.172
Length   28, alignment  0, rej len  6:	16.2812	239.734	221.406
Length   29, alignment  0, rej len  6:	16.2344	248.719	231.797
Length   30, alignment  0, rej len  6:	16.5938	256.703	238.016
Length   31, alignment  0, rej len  6:	16.8906	266.641	246.828
Length   32, alignment  0, rej len  6:	16.4688	297.75	254.891
Length   33, alignment  0, rej len  6:	23.1562	282.344	260.656
Length   34, alignment  0, rej len  6:	22.8594	292.672	271.062
Length   35, alignment  0, rej len  6:	23.75	350.906	277.953
Length   36, alignment  0, rej len  6:	23.125	311.062	287.953
Length   37, alignment  0, rej len  6:	23.8438	319.344	294.234
Length   38, alignment  0, rej len  6:	23.8281	327.781	304.609
Length   39, alignment  0, rej len  6:	24.6875	374.922	307.578
Length   40, alignment  0, rej len  6:	25.7031	342.188	315.266
Length   41, alignment  0, rej len  6:	24.9531	353.828	327.312
Length   42, alignment  0, rej len  6:	25.9375	359.312	330.641
Length   43, alignment  0, rej len  6:	25.5156	368.938	340.734
Length   44, alignment  0, rej len  6:	25.625	379.578	350.094
Length   45, alignment  0, rej len  6:	26.4375	387.375	356.625
Length   46, alignment  0, rej len  6:	26.6719	393.781	363.125
Length   47, alignment  0, rej len  6:	27.0156	402.531	369.422
Length   48, alignment  0, rej len  6:	27.4219	410.828	377.641
Length   49, alignment  0, rej len  6:	28.1719	419.203	385.438
Length   50, alignment  0, rej len  6:	27.7812	429.797	395.641
Length   51, alignment  0, rej len  6:	28.4062	436	400.984
Length   52, alignment  0, rej len  6:	28.5781	445.375	408.094
Length   53, alignment  0, rej len  6:	30.0938	456.75	419.672
Length   54, alignment  0, rej len  6:	30.125	464.016	426.25
Length   55, alignment  0, rej len  6:	29.3125	473.688	436.312
Length   56, alignment  0, rej len  6:	29.0781	482.172	443.438
Length   57, alignment  0, rej len  6:	30.2656	490.516	447.234
Length   58, alignment  0, rej len  6:	31.25	499.484	459.125
Length   59, alignment  0, rej len  6:	30.2656	505.391	463.422
Length   60, alignment  0, rej len  6:	30.3281	574.391	470.969
Length   61, alignment  0, rej len  6:	32.5156	522.594	479.766
Length   62, alignment  0, rej len  6:	30.7344	532.453	488.25
Length   63, alignment  0, rej len  6:	31.7969	542.391	496.984

[-- Attachment #5: bench-strpbrk-patch.out --]
[-- Type: text/plain, Size: 9367 bytes --]

                                	strpbrk	simple_strpbrk	stupid_strpbrk
Length  512, alignment  0, rej len  0:	31.9844	432.281	454.812
Length  512, alignment  0, rej len  0:	31.5156	432.484	453.984
Length  512, alignment  0, rej len  1:	31.6875	519.812	525.562
Length  512, alignment  1, rej len  1:	31.0781	519.844	525.125
Length  512, alignment  0, rej len  2:	120.766	1986.5	755.453
Length  512, alignment  2, rej len  2:	119.797	1985.59	753.984
Length  512, alignment  0, rej len  3:	120.297	2485.22	994.922
Length  512, alignment  3, rej len  3:	121.484	2729.67	995.156
Length  512, alignment  0, rej len  4:	121.531	3158.44	1174.41
Length  512, alignment  4, rej len  4:	119.562	3157.81	1168.62
Length  512, alignment  0, rej len  5:	121.031	3529.47	1380.53
Length  512, alignment  5, rej len  5:	122.859	3385.02	1380.77
Length  512, alignment  0, rej len  6:	123.25	3515.09	3208.81
Length  512, alignment  6, rej len  6:	122.141	3514.66	3202.94
Length  512, alignment  0, rej len  7:	121.75	3742.19	1828.61
Length  512, alignment  7, rej len  7:	123.094	3838.39	1826.2
Length  512, alignment  0, rej len  8:	122.312	3948.89	2046.92
Length  512, alignment  0, rej len  8:	120.812	3948.42	2045.77
Length  512, alignment  0, rej len  9:	122.609	4195.7	2243.88
Length  512, alignment  1, rej len  9:	122.5	4194.5	2243.05
Length  512, alignment  0, rej len 10:	122.781	4711.84	2472.77
Length  512, alignment  2, rej len 10:	121.203	4395.06	2475.17
Length  512, alignment  0, rej len 11:	119.016	4635.25	2685.88
Length  512, alignment  3, rej len 11:	122.531	4631.94	2683.97
Length  512, alignment  0, rej len 12:	125.062	5135.8	2920.31
Length  512, alignment  4, rej len 12:	122.25	4823.7	2924.8
Length  512, alignment  0, rej len 13:	122.844	5073.91	3367.11
Length  512, alignment  5, rej len 13:	122.281	5068.56	3421.7
Length  512, alignment  0, rej len 14:	123.625	5284.17	6070.98
Length  512, alignment  6, rej len 14:	123.484	5577.41	6072.08
Length  512, alignment  0, rej len 15:	123	5602.22	6406.64
Length  512, alignment  7, rej len 15:	123.906	5601.11	6331.78
Length  512, alignment  0, rej len 16:	123.578	5851.16	6523.03
Length  512, alignment  0, rej len 16:	122.75	5942.39	6531.67
Length  512, alignment  0, rej len 17:	125.391	6121.16	6765.05
Length  512, alignment  1, rej len 17:	125.688	6189.09	6963.7
Length  512, alignment  0, rej len 18:	125.406	6345.72	4162.02
Length  512, alignment  2, rej len 18:	125.422	6347.86	4164.7
Length  512, alignment  0, rej len 19:	123.562	6661.36	4386.5
Length  512, alignment  3, rej len 19:	125.75	6558.55	4379.56
Length  512, alignment  0, rej len 20:	124.609	6758.45	4670.8
Length  512, alignment  4, rej len 20:	124.047	6759.48	4596.5
Length  512, alignment  0, rej len 21:	128.031	7267.75	6692.92
Length  512, alignment  5, rej len 21:	129.219	7142.05	6690.05
Length  512, alignment  0, rej len 22:	131.609	7220.78	6899.05
Length  512, alignment  6, rej len 22:	132.297	7284.2	6896.22
Length  512, alignment  0, rej len 23:	131.25	7393.2	7110.58
Length  512, alignment  7, rej len 23:	130.359	7397.89	7375.52
Length  512, alignment  0, rej len 24:	131.047	7634.12	7359.72
Length  512, alignment  0, rej len 24:	130.594	7626.22	7406.2
Length  512, alignment  0, rej len 25:	131.766	7992.25	7571.67
Length  512, alignment  1, rej len 25:	132.031	7882.83	7588.42
Length  512, alignment  0, rej len 26:	132.672	8196.73	8012.22
Length  512, alignment  2, rej len 26:	131.922	8215.14	7813.8
Length  512, alignment  0, rej len 27:	131.234	8475.47	8040.23
Length  512, alignment  3, rej len 27:	130.406	8478.97	8034.33
Length  512, alignment  0, rej len 28:	131.125	8690.69	8255.8
Length  512, alignment  4, rej len 28:	132.906	8914.58	8246.28
Length  512, alignment  0, rej len 29:	133.812	8823.16	8474.53
Length  512, alignment  5, rej len 29:	133.25	8825.83	8461.36
Length  512, alignment  0, rej len 30:	133.609	9107.45	8705.95
Length  512, alignment  6, rej len 30:	136.438	9105.73	8888.67
Length  512, alignment  0, rej len 31:	135	9368	8913.77
Length  512, alignment  7, rej len 31:	133.938	9348.05	8912.02
Length   32, alignment  0, rej len  4:	14.8594	194.516	83.1094
Length   32, alignment  1, rej len  4:	14.625	194.281	83.1875
Length   64, alignment  0, rej len  4:	27.4688	391.953	155.547
Length   64, alignment  2, rej len  4:	26.8281	405.516	155.141
Length  128, alignment  0, rej len  4:	40.8594	787.266	304.891
Length  128, alignment  3, rej len  4:	40.9375	787.562	299.016
Length  256, alignment  0, rej len  4:	68.3594	1576.23	586.25
Length  256, alignment  4, rej len  4:	67.3594	1576.14	585.359
Length  512, alignment  0, rej len  4:	121.391	3157.28	1165.92
Length  512, alignment  5, rej len  4:	122.453	3157.03	1172.22
Length 1024, alignment  0, rej len  4:	224.203	6326.06	2317.02
Length 1024, alignment  6, rej len  4:	225.016	6536.86	2317.48
Length 2048, alignment  0, rej len  4:	430.906	12641.5	4617.19
Length 2048, alignment  7, rej len  4:	428.438	12663.4	4617.78
Length   64, alignment  1, rej len 10:	28.7344	580.859	319.016
Length   64, alignment  2, rej len 10:	29.5469	555.625	321.125
Length   64, alignment  3, rej len 10:	28.3906	557.547	321.797
Length   64, alignment  4, rej len 10:	29.2031	552.922	318.031
Length   64, alignment  5, rej len 10:	28.7344	554.969	320.547
Length   64, alignment  6, rej len 10:	29.5781	554.891	321.188
Length   64, alignment  7, rej len 10:	28.9062	555.516	321.562
Length    0, alignment  0, rej len  6:	8.10938	2.53125	7.20312
Length    1, alignment  0, rej len  6:	8.73438	10.5781	12.6094
Length    2, alignment  0, rej len  6:	8.15625	14.875	13.7344
Length    3, alignment  0, rej len  6:	8.45312	23.3438	24.6406
Length    4, alignment  0, rej len  6:	9.09375	31.5	30.1875
Length    5, alignment  0, rej len  6:	8.85938	34.6875	33.8438
Length    6, alignment  0, rej len  6:	9.39062	42.4844	42.7188
Length    7, alignment  0, rej len  6:	9.14062	49.8906	49.0469
Length    8, alignment  0, rej len  6:	10.5	55.4219	53.7031
Length    9, alignment  0, rej len  6:	9.98438	65.2031	62.5469
Length   10, alignment  0, rej len  6:	10.1719	69.6094	67.0312
Length   11, alignment  0, rej len  6:	10.0781	75.5781	71.5625
Length   12, alignment  0, rej len  6:	10.9531	83.125	79.4375
Length   13, alignment  0, rej len  6:	10.4375	89	84.3438
Length   14, alignment  0, rej len  6:	10.7344	97.625	92.3906
Length   15, alignment  0, rej len  6:	11.2656	103.688	96.1406
Length   16, alignment  0, rej len  6:	11.6875	109.953	103.531
Length   17, alignment  0, rej len  6:	11.2969	119.453	111.719
Length   18, alignment  0, rej len  6:	11.7188	123.938	115.734
Length   19, alignment  0, rej len  6:	11.6562	131.594	122.547
Length   20, alignment  0, rej len  6:	12.5469	138.922	129.375
Length   21, alignment  0, rej len  6:	12.1562	144.938	134.422
Length   22, alignment  0, rej len  6:	12.8125	151.375	140.359
Length   23, alignment  0, rej len  6:	12.4375	158.672	148.406
Length   24, alignment  0, rej len  6:	13.4531	177.922	155
Length   25, alignment  0, rej len  6:	13.2812	172.062	158.797
Length   26, alignment  0, rej len  6:	13.6406	181	168.391
Length   27, alignment  0, rej len  6:	13.125	187.625	173.969
Length   28, alignment  0, rej len  6:	14.6094	192.375	177.094
Length   29, alignment  0, rej len  6:	14	199.078	185.203
Length   30, alignment  0, rej len  6:	14.2344	205.812	190.641
Length   31, alignment  0, rej len  6:	14.2188	213.453	197.875
Length   32, alignment  0, rej len  6:	15.375	236.438	204.094
Length   33, alignment  0, rej len  6:	14.9219	226.344	208.828
Length   34, alignment  0, rej len  6:	15	234.609	217.297
Length   35, alignment  0, rej len  6:	14.9844	259.656	222.547
Length   36, alignment  0, rej len  6:	15.9062	249.375	230.312
Length   37, alignment  0, rej len  6:	15.6719	256.062	235.797
Length   38, alignment  0, rej len  6:	15.8125	262.75	243.625
Length   39, alignment  0, rej len  6:	15.875	289.109	246.469
Length   40, alignment  0, rej len  6:	16.8281	274.656	252.828
Length   41, alignment  0, rej len  6:	16.5156	283.562	261.953
Length   42, alignment  0, rej len  6:	16.9219	287.734	264.625
Length   43, alignment  0, rej len  6:	16.6406	295.953	272.594
Length   44, alignment  0, rej len  6:	17.7812	304.219	280.531
Length   45, alignment  0, rej len  6:	17.5312	310.344	285.938
Length   46, alignment  0, rej len  6:	17.7188	315.438	291.109
Length   47, alignment  0, rej len  6:	17.7188	322.578	295.5
Length   48, alignment  0, rej len  6:	26.5	329.172	302.641
Length   49, alignment  0, rej len  6:	26.0312	335.25	308.797
Length   50, alignment  0, rej len  6:	23.5781	344.375	317.016
Length   51, alignment  0, rej len  6:	24.5312	349.5	321.422
Length   52, alignment  0, rej len  6:	24.6875	356.734	327.391
Length   53, alignment  0, rej len  6:	24.8594	366.156	335.828
Length   54, alignment  0, rej len  6:	25.2031	371.656	341.328
Length   55, alignment  0, rej len  6:	24.9688	379.859	348.938
Length   56, alignment  0, rej len  6:	25.0156	386.562	355.047
Length   57, alignment  0, rej len  6:	25.75	391.844	358.328
Length   58, alignment  0, rej len  6:	25.3125	400.359	564.906
Length   59, alignment  0, rej len  6:	25.875	404.531	371.531
Length   60, alignment  0, rej len  6:	27.0938	446.172	377.5
Length   61, alignment  0, rej len  6:	26.5312	418.562	384.656
Length   62, alignment  0, rej len  6:	26.2969	426.5	391.234
Length   63, alignment  0, rej len  6:	27.1094	434.578	398.344

[-- Attachment #6: bench-strspn-base.out --]
[-- Type: text/plain, Size: 9251 bytes --]

                                	strspn	simple_strspn	stupid_strspn
Length  512, alignment  0, acc len  1:	158.688	363.656	570.016
Length  512, alignment  1, acc len  1:	157.969	364.578	567.641
Length  512, alignment  0, acc len  2:	157.078	1525.09	1415.5
Length  512, alignment  2, acc len  2:	157.891	1393.67	1466.19
Length  512, alignment  0, acc len  3:	156.406	1630.22	1327.44
Length  512, alignment  3, acc len  3:	156.922	1659.16	1306.72
Length  512, alignment  0, acc len  4:	157.016	1839.75	1545.73
Length  512, alignment  4, acc len  4:	156.203	1935	1544.92
Length  512, alignment  0, acc len  5:	156.562	2384.78	1941.69
Length  512, alignment  5, acc len  5:	156.016	2186.39	1727.41
Length  512, alignment  0, acc len  6:	156.375	2650.61	1977.38
Length  512, alignment  6, acc len  6:	157.016	2589.7	1986.8
Length  512, alignment  0, acc len  7:	157.375	2965.27	2375.7
Length  512, alignment  7, acc len  7:	157	2958.78	2298.7
Length  512, alignment  0, acc len  8:	158.422	3107.53	2646.33
Length  512, alignment  0, acc len  8:	157.141	3056.38	2774.64
Length  512, alignment  0, acc len  9:	157.828	3315.69	2949.06
Length  512, alignment  1, acc len  9:	158.438	3374.55	2936.45
Length  512, alignment  0, acc len 10:	158	3276.64	2887.52
Length  512, alignment  2, acc len 10:	158.031	3615.41	3116.66
Length  512, alignment  0, acc len 11:	158.734	3980.84	3266.09
Length  512, alignment  3, acc len 11:	158.812	3861.12	3615.14
Length  512, alignment  0, acc len 12:	158.078	3921.94	3784.84
Length  512, alignment  4, acc len 12:	158.703	3851.88	3790.64
Length  512, alignment  0, acc len 13:	160.609	4153.08	3935.19
Length  512, alignment  5, acc len 13:	160.031	3926.42	3760.8
Length  512, alignment  0, acc len 14:	159.828	4169.02	4101.16
Length  512, alignment  6, acc len 14:	160.531	4281.33	4152.53
Length  512, alignment  0, acc len 15:	159.094	4441.59	4387.81
Length  512, alignment  7, acc len 15:	159.844	4827.41	4536.67
Length  512, alignment  0, acc len 16:	159.469	4560.52	4546.67
Length  512, alignment  0, acc len 16:	159.719	4642.5	4571.16
Length  512, alignment  0, acc len 17:	160.672	4707.52	4658.72
Length  512, alignment  1, acc len 17:	160.141	4678.16	4700.55
Length  512, alignment  0, acc len 18:	159.984	4833.14	4723.58
Length  512, alignment  2, acc len 18:	160.234	4890.98	4894.98
Length  512, alignment  0, acc len 19:	162.688	5649.73	4593.56
Length  512, alignment  3, acc len 19:	162.109	5389.47	5034.64
Length  512, alignment  0, acc len 20:	163.5	5059.75	5031.05
Length  512, alignment  4, acc len 20:	162.016	5165.59	5166.3
Length  512, alignment  0, acc len 21:	163	5297.09	5265.48
Length  512, alignment  5, acc len 21:	162.391	5248.69	5244.28
Length  512, alignment  0, acc len 22:	166.297	5446.33	5466.03
Length  512, alignment  6, acc len 22:	167.703	5261.39	5336.91
Length  512, alignment  0, acc len 23:	167.766	5936.36	5705.61
Length  512, alignment  7, acc len 23:	168.641	5516.36	5624.83
Length  512, alignment  0, acc len 24:	167.828	5425.11	5389.17
Length  512, alignment  0, acc len 24:	168.641	5785.23	5837.66
Length  512, alignment  0, acc len 25:	168.828	5565.75	5523.59
Length  512, alignment  1, acc len 25:	168.703	5539.2	5548.2
Length  512, alignment  0, acc len 26:	169.125	6035.78	6356.83
Length  512, alignment  2, acc len 26:	169.406	5922.55	5905.88
Length  512, alignment  0, acc len 27:	169.438	5842.27	5894.55
Length  512, alignment  3, acc len 27:	171.031	5784.33	5711.06
Length  512, alignment  0, acc len 28:	170.453	6288.53	6297.58
Length  512, alignment  4, acc len 28:	168.953	6337.27	6421.39
Length  512, alignment  0, acc len 29:	169.984	5858.84	5824.91
Length  512, alignment  5, acc len 29:	171.141	6401.27	6195.45
Length  512, alignment  0, acc len 30:	171.375	6594.3	6534.14
Length  512, alignment  6, acc len 30:	171.688	6094.19	6089.11
Length  512, alignment  0, acc len 31:	172.328	6575.41	6606.22
Length  512, alignment  7, acc len 31:	172.703	6720.2	6789.62
Length   32, alignment  0, acc len  4:	23.2969	74.5	67.6094
Length   32, alignment  1, acc len  4:	23.6875	74.6406	63.0938
Length   64, alignment  0, acc len  4:	32.2656	155.828	117.234
Length   64, alignment  2, acc len  4:	32.6406	190.516	141.516
Length  128, alignment  0, acc len  4:	50.2031	356.578	304.188
Length  128, alignment  3, acc len  4:	50.6094	330.891	267.141
Length  256, alignment  0, acc len  4:	85.4062	839.469	656.359
Length  256, alignment  4, acc len  4:	85.4062	877.984	686.641
Length  512, alignment  0, acc len  4:	156.141	1936.41	1530.06
Length  512, alignment  5, acc len  4:	157.562	2170.42	1590.73
Length 1024, alignment  0, acc len  4:	297.078	4232.92	3818.31
Length 1024, alignment  6, acc len  4:	297.422	4251.73	3745.47
Length 2048, alignment  0, acc len  4:	579.547	9231.22	8625.72
Length 2048, alignment  7, acc len  4:	579.953	9336.56	8436.3
Length   64, alignment  1, acc len 10:	35.0156	368.609	276.219
Length   64, alignment  2, acc len 10:	35.7812	348.922	257.062
Length   64, alignment  3, acc len 10:	35.1875	425.312	320.812
Length   64, alignment  4, acc len 10:	35.1406	378.812	315.875
Length   64, alignment  5, acc len 10:	55.9219	397.266	293.875
Length   64, alignment  6, acc len 10:	34.2188	444.625	355.578
Length   64, alignment  7, acc len 10:	34.5156	321.359	263.234
Length    0, alignment  0, acc len  6:	9.79688	5.5	13.9531
Length    1, alignment  0, acc len  6:	10.2031	8.28125	13.4844
Length    2, alignment  0, acc len  6:	10.8438	11.1875	16.3438
Length    3, alignment  0, acc len  6:	11.0469	14.875	19.7969
Length    4, alignment  0, acc len  6:	11.8594	18.7812	22.0781
Length    5, alignment  0, acc len  6:	11.25	20.6875	23.25
Length    6, alignment  0, acc len  6:	11.4219	20.4531	23.9375
Length    7, alignment  0, acc len  6:	11.3125	25.1562	23.8906
Length    8, alignment  0, acc len  6:	12.4219	38.875	40.1562
Length    9, alignment  0, acc len  6:	13	35.3438	34.125
Length   10, alignment  0, acc len  6:	13.6094	37.8594	36.3438
Length   11, alignment  0, acc len  6:	12.6875	41.9062	37.75
Length   12, alignment  0, acc len  6:	13.0625	44.6094	43.2344
Length   13, alignment  0, acc len  6:	14.2188	55.5	54.6094
Length   14, alignment  0, acc len  6:	13.5469	46.1406	43.7344
Length   15, alignment  0, acc len  6:	13.5781	53.8906	49.3594
Length   16, alignment  0, acc len  6:	15.0625	52.7344	46.125
Length   17, alignment  0, acc len  6:	15.9844	54.9531	53.8594
Length   18, alignment  0, acc len  6:	15.4844	80.2031	59.1562
Length   19, alignment  0, acc len  6:	15.5312	65.8125	54.3125
Length   20, alignment  0, acc len  6:	16.0156	74.4688	58.9688
Length   21, alignment  0, acc len  6:	16.0625	70.5156	61.1719
Length   22, alignment  0, acc len  6:	16.5625	70.0625	57.6094
Length   23, alignment  0, acc len  6:	16.625	65.7969	59.6719
Length   24, alignment  0, acc len  6:	25.5156	78.1719	65.75
Length   25, alignment  0, acc len  6:	22.5938	87.5938	67.6562
Length   26, alignment  0, acc len  6:	23.0156	101.25	80.6562
Length   27, alignment  0, acc len  6:	22.4531	118.109	84.7344
Length   28, alignment  0, acc len  6:	23.7344	100.031	89.4688
Length   29, alignment  0, acc len  6:	23.0156	99.9531	80.7188
Length   30, alignment  0, acc len  6:	23.375	112.859	88.2656
Length   31, alignment  0, acc len  6:	23.3906	135.25	92.4062
Length   32, alignment  0, acc len  6:	24.2031	115.656	90.3438
Length   33, alignment  0, acc len  6:	24.5156	113.328	88.1875
Length   34, alignment  0, acc len  6:	24.3594	108	90.8906
Length   35, alignment  0, acc len  6:	24.8594	130.875	100.406
Length   36, alignment  0, acc len  6:	25.1875	116.094	86.4688
Length   37, alignment  0, acc len  6:	25.4219	155.484	110.641
Length   38, alignment  0, acc len  6:	25.5	143.281	92.8281
Length   39, alignment  0, acc len  6:	25.5938	119.109	96.3281
Length   40, alignment  0, acc len  6:	26.1406	136	99.3125
Length   41, alignment  0, acc len  6:	27.3594	167.25	110.984
Length   42, alignment  0, acc len  6:	27.7656	142.281	132.328
Length   43, alignment  0, acc len  6:	27.1406	142.406	116.938
Length   44, alignment  0, acc len  6:	28.0625	158.891	126.141
Length   45, alignment  0, acc len  6:	28.2344	130.797	109.531
Length   46, alignment  0, acc len  6:	28.4531	151.141	132.047
Length   47, alignment  0, acc len  6:	28.3906	157.109	125.422
Length   48, alignment  0, acc len  6:	28.5469	180.125	130.172
Length   49, alignment  0, acc len  6:	29.9531	162.25	133.75
Length   50, alignment  0, acc len  6:	28.7812	188.016	144.891
Length   51, alignment  0, acc len  6:	29.4531	187.828	134.422
Length   52, alignment  0, acc len  6:	29.9844	196.031	134.516
Length   53, alignment  0, acc len  6:	30.6875	234.094	170.344
Length   54, alignment  0, acc len  6:	30.3125	174.641	145.312
Length   55, alignment  0, acc len  6:	29.9219	181.969	144.5
Length   56, alignment  0, acc len  6:	31.1562	214.297	150.641
Length   57, alignment  0, acc len  6:	31.6094	172.156	136.844
Length   58, alignment  0, acc len  6:	31.9062	214.953	175.203
Length   59, alignment  0, acc len  6:	31.875	222.125	165.078
Length   60, alignment  0, acc len  6:	32.4219	227.766	154.016
Length   61, alignment  0, acc len  6:	32.7969	230.016	168.328
Length   62, alignment  0, acc len  6:	32.8438	209.641	168.281
Length   63, alignment  0, acc len  6:	32.0938	205.359	159.859

[-- Attachment #7: bench-strspn-patch.out --]
[-- Type: text/plain, Size: 9239 bytes --]

                                	strspn	simple_strspn	stupid_strspn
Length  512, alignment  0, acc len  1:	220	289.984	528.609
Length  512, alignment  1, acc len  1:	268.688	289.594	527.844
Length  512, alignment  0, acc len  2:	111.938	1022.72	1151.69
Length  512, alignment  2, acc len  2:	114.594	1011.97	1211.52
Length  512, alignment  0, acc len  3:	111.188	1202.47	1044.94
Length  512, alignment  3, acc len  3:	112.312	1237	1048.23
Length  512, alignment  0, acc len  4:	113	1481.73	1192.55
Length  512, alignment  4, acc len  4:	112.5	1482.81	1212.41
Length  512, alignment  0, acc len  5:	111.719	1835.44	1557.42
Length  512, alignment  5, acc len  5:	111.438	1672.19	1393.5
Length  512, alignment  0, acc len  6:	111.078	1968.72	1613.12
Length  512, alignment  6, acc len  6:	111.75	2186.75	1639.25
Length  512, alignment  0, acc len  7:	112.969	2342.31	1973.11
Length  512, alignment  7, acc len  7:	113.266	2302.25	1941.7
Length  512, alignment  0, acc len  8:	111.438	2496.97	2179.03
Length  512, alignment  0, acc len  8:	112.453	2408.95	2193.41
Length  512, alignment  0, acc len  9:	112.719	2606.36	2415.39
Length  512, alignment  1, acc len  9:	112.609	2651.72	2363.64
Length  512, alignment  0, acc len 10:	113.406	2573.05	2355.75
Length  512, alignment  2, acc len 10:	113.859	2834.88	2640.03
Length  512, alignment  0, acc len 11:	112.859	2829.08	2676.97
Length  512, alignment  3, acc len 11:	113.812	3067.55	2920.31
Length  512, alignment  0, acc len 12:	113.656	3103.72	3065.81
Length  512, alignment  4, acc len 12:	113.547	3117.33	3070.61
Length  512, alignment  0, acc len 13:	113.891	3200.64	3469.22
Length  512, alignment  5, acc len 13:	114.047	3343.5	3126.77
Length  512, alignment  0, acc len 14:	114.312	3300.22	3372.05
Length  512, alignment  6, acc len 14:	114.359	3380.97	3350.34
Length  512, alignment  0, acc len 15:	113.828	3526.45	3604.19
Length  512, alignment  7, acc len 15:	114.438	3696.12	3694.73
Length  512, alignment  0, acc len 16:	113.953	3616.84	3716.67
Length  512, alignment  0, acc len 16:	113.5	3651.16	3740.81
Length  512, alignment  0, acc len 17:	115.812	3738.92	3792.25
Length  512, alignment  1, acc len 17:	113.375	4153.62	3857.61
Length  512, alignment  0, acc len 18:	115.641	3772.41	3869.94
Length  512, alignment  2, acc len 18:	115.016	4120.28	4017.11
Length  512, alignment  0, acc len 19:	115.781	3634.44	3808.55
Length  512, alignment  3, acc len 19:	164.297	3993.97	4186.23
Length  512, alignment  0, acc len 20:	116.812	4010.36	4005.97
Length  512, alignment  4, acc len 20:	115.125	4138.95	4162.77
Length  512, alignment  0, acc len 21:	116.766	4188.08	4365.45
Length  512, alignment  5, acc len 21:	115.281	4212.64	4284.53
Length  512, alignment  0, acc len 22:	118.406	4364.14	4415.62
Length  512, alignment  6, acc len 22:	119.281	4209.67	4345.05
Length  512, alignment  0, acc len 23:	118.094	4969.12	4674.56
Length  512, alignment  7, acc len 23:	119.594	4429.22	4529.36
Length  512, alignment  0, acc len 24:	118.859	4349.09	4409.11
Length  512, alignment  0, acc len 24:	119.969	4726.88	4761.03
Length  512, alignment  0, acc len 25:	119.5	4384.31	4521.44
Length  512, alignment  1, acc len 25:	120.812	4440.06	4532.08
Length  512, alignment  0, acc len 26:	121.109	4852.56	5039.61
Length  512, alignment  2, acc len 26:	120.031	4759.64	4832.75
Length  512, alignment  0, acc len 27:	120.141	4933.09	4821.77
Length  512, alignment  3, acc len 27:	121.234	4590.89	4733.67
Length  512, alignment  0, acc len 28:	120.953	5067.5	5158.39
Length  512, alignment  4, acc len 28:	121.094	5121.14	5192.17
Length  512, alignment  0, acc len 29:	121.156	4690.94	4821.72
Length  512, alignment  5, acc len 29:	122.016	4955.44	5059.11
Length  512, alignment  0, acc len 30:	122.516	5287.14	5327
Length  512, alignment  6, acc len 30:	124.531	4895.97	5012.22
Length  512, alignment  0, acc len 31:	122.516	5300.39	5369.83
Length  512, alignment  7, acc len 31:	123.656	5421.92	5523.91
Length   32, alignment  0, acc len  4:	13.9375	63.6406	54.5938
Length   32, alignment  1, acc len  4:	14.4375	55.6094	56.5
Length   64, alignment  0, acc len  4:	23.4219	125.094	95.2656
Length   64, alignment  2, acc len  4:	24.1094	146.766	114.203
Length  128, alignment  0, acc len  4:	36	281.156	218.281
Length  128, alignment  3, acc len  4:	36.5625	250.266	201.594
Length  256, alignment  0, acc len  4:	61.0938	629.281	507.031
Length  256, alignment  4, acc len  4:	62.0469	666.344	536.609
Length  512, alignment  0, acc len  4:	111.703	1482.39	1232.56
Length  512, alignment  5, acc len  4:	113.031	1471.25	1244.05
Length 1024, alignment  0, acc len  4:	214.156	3133.48	3026.95
Length 1024, alignment  6, acc len  4:	212.766	3194.31	2937.48
Length 2048, alignment  0, acc len  4:	415.719	6929.59	6891.05
Length 2048, alignment  7, acc len  4:	416.062	6993.75	6735.89
Length   64, alignment  1, acc len 10:	25.8438	497.516	228.781
Length   64, alignment  2, acc len 10:	25.6094	272.219	211.734
Length   64, alignment  3, acc len 10:	26.2188	338.219	255.578
Length   64, alignment  4, acc len 10:	25.5312	293.859	255.266
Length   64, alignment  5, acc len 10:	26.0625	309.969	241.094
Length   64, alignment  6, acc len 10:	25.375	351.219	285.844
Length   64, alignment  7, acc len 10:	25.9375	249.875	215.234
Length    0, alignment  0, acc len  6:	6.10938	4.625	10.4688
Length    1, alignment  0, acc len  6:	6.96875	6.25	10.7344
Length    2, alignment  0, acc len  6:	7.5625	8.1875	13.2188
Length    3, alignment  0, acc len  6:	7.03125	12.5156	15.7344
Length    4, alignment  0, acc len  6:	7.6875	15.1875	17.1406
Length    5, alignment  0, acc len  6:	7.51562	17.1406	18.7812
Length    6, alignment  0, acc len  6:	7.5	16.1562	20.0625
Length    7, alignment  0, acc len  6:	7.6875	21.1406	19.5469
Length    8, alignment  0, acc len  6:	8.40625	30.6875	38.6094
Length    9, alignment  0, acc len  6:	8.21875	28.9219	28.3281
Length   10, alignment  0, acc len  6:	9.3125	29.875	29.375
Length   11, alignment  0, acc len  6:	9.125	36.4844	31.6719
Length   12, alignment  0, acc len  6:	8.84375	36	32.5625
Length   13, alignment  0, acc len  6:	9.21875	43.125	37.9062
Length   14, alignment  0, acc len  6:	9.1875	35.8906	36.3594
Length   15, alignment  0, acc len  6:	9.54688	41	37.2031
Length   16, alignment  0, acc len  6:	10.0938	42.8125	38.2969
Length   17, alignment  0, acc len  6:	10.5625	45.2969	39.7188
Length   18, alignment  0, acc len  6:	10.7344	62.7031	47.3594
Length   19, alignment  0, acc len  6:	11.1406	51.9219	46.0312
Length   20, alignment  0, acc len  6:	11.2656	59.5625	46.9219
Length   21, alignment  0, acc len  6:	11.4062	55.8594	49.9219
Length   22, alignment  0, acc len  6:	11.5938	52.7188	45.8594
Length   23, alignment  0, acc len  6:	11.4688	54.75	50.5469
Length   24, alignment  0, acc len  6:	12.1562	62.25	52.4531
Length   25, alignment  0, acc len  6:	12	68.0156	54.1875
Length   26, alignment  0, acc len  6:	12.2656	82.6406	66.4688
Length   27, alignment  0, acc len  6:	12.8125	90.2656	66.0312
Length   28, alignment  0, acc len  6:	12.9062	78.8594	72.4062
Length   29, alignment  0, acc len  6:	12.9688	83.9375	71.0938
Length   30, alignment  0, acc len  6:	13.3594	85.1406	73.8438
Length   31, alignment  0, acc len  6:	13.0156	103.453	75.75
Length   32, alignment  0, acc len  6:	13.8125	89.0938	74.375
Length   33, alignment  0, acc len  6:	13.6719	84	71.3594
Length   34, alignment  0, acc len  6:	13.9531	86.3281	72.4062
Length   35, alignment  0, acc len  6:	14.2812	103.281	85.5625
Length   36, alignment  0, acc len  6:	18.2812	106.953	74.2188
Length   37, alignment  0, acc len  6:	18.5625	122.781	90.9844
Length   38, alignment  0, acc len  6:	18.6875	110.125	79.6406
Length   39, alignment  0, acc len  6:	18.7656	94.5156	80.5625
Length   40, alignment  0, acc len  6:	18.9219	105.875	82.3594
Length   41, alignment  0, acc len  6:	19.2656	136.25	93.8594
Length   42, alignment  0, acc len  6:	19.5781	116.047	93.6875
Length   43, alignment  0, acc len  6:	19.625	117.359	99.7656
Length   44, alignment  0, acc len  6:	19.5781	126.562	96.3281
Length   45, alignment  0, acc len  6:	19.8281	108.828	92.5156
Length   46, alignment  0, acc len  6:	20.4531	123.656	109.047
Length   47, alignment  0, acc len  6:	20.4844	122.047	96.375
Length   48, alignment  0, acc len  6:	20.5156	140.359	105.641
Length   49, alignment  0, acc len  6:	20.7656	124.969	109.172
Length   50, alignment  0, acc len  6:	20.9062	145.344	112.75
Length   51, alignment  0, acc len  6:	21.1406	147.031	110.766
Length   52, alignment  0, acc len  6:	21.2812	152.125	114.344
Length   53, alignment  0, acc len  6:	21.5	183.828	139.953
Length   54, alignment  0, acc len  6:	21.9844	142.391	117.031
Length   55, alignment  0, acc len  6:	22.0156	142.906	117.156
Length   56, alignment  0, acc len  6:	22.0938	163.609	118.438
Length   57, alignment  0, acc len  6:	22.2656	134.391	122.031
Length   58, alignment  0, acc len  6:	22.6562	171.625	141.891
Length   59, alignment  0, acc len  6:	22.7344	172.531	135.797
Length   60, alignment  0, acc len  6:	22.9219	172.938	130.5
Length   61, alignment  0, acc len  6:	23.0781	179.766	127.906
Length   62, alignment  0, acc len  6:	23.1562	167.531	135.203
Length   63, alignment  0, acc len  6:	23.9531	158.656	135.547

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/4] Remove powerpc64 strspn, strcspn, and strpbrk implementation
  2016-03-30 19:57         ` Adhemerval Zanella
@ 2016-03-30 21:45           ` Tulio Magno Quites Machado Filho
  0 siblings, 0 replies; 22+ messages in thread
From: Tulio Magno Quites Machado Filho @ 2016-03-30 21:45 UTC (permalink / raw)
  To: Adhemerval Zanella, libc-alpha

Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:

> On 30-03-2016 15:14, Tulio Magno Quites Machado Filho wrote:
>> Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
>> 
>>> On 30-03-2016 10:14, Tulio Magno Quites Machado Filho wrote:
>>>> Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
>>>>
>>>>> 	* sysdeps/powerpc/powerpc64/strcspn.S: Remove file.
>>>>> 	* sysdeps/powerpc/powerpc64/strpbrk.S: Likewise.
>>>>
>>>> But the removal of these 2 brings some serious performance degradation.
>>>> I think we should keep them for now.
>>>
>>> Do you mean regression on POWER7 only or for POWER8 as well? 
>> 
>> Both.
>> 
>>> And what do you mean by 'serious'?
>> 
>> The new strcspn C implementation spent 8x the time of the powerpc64 assembly.
>> While strpbrk spent ~12x the time.
>> Using the benchtests.
>> 
>
> This is definitively not what I am seeing the environment I am using
> (powerpc64le, POWER8E (raw), 3.5GHz, --with-cpu=power7, CFLAGS=-O3,
> gcc 4.8.4).
> I am sending you the benchtests data in attachment.

I'm sorry for the unnecessary noise.
I found the problem here between the chair and the keyboard.  :-D

The whole patch LGTM now.

Thanks!

-- 
Tulio Magno

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] Improve generic strcspn performance
  2016-03-28 15:20 ` [PATCH 1/4] Improve generic strcspn performance Adhemerval Zanella
  2016-03-29 13:02   ` [PATCH 2/4] Improve generic strspn performance Wilco Dijkstra
  2016-03-30 17:47   ` [PATCH 1/4] Improve generic strcspn performance Richard Henderson
@ 2016-03-31 17:00   ` Richard Henderson
  2016-04-01 20:44     ` Roland McGrath
  2 siblings, 1 reply; 22+ messages in thread
From: Richard Henderson @ 2016-03-31 17:00 UTC (permalink / raw)
  To: Adhemerval Zanella, libc-alpha; +Cc: Wilco Dijkstra

On 03/28/2016 08:19 AM, Adhemerval Zanella wrote:
> +  s = (unsigned char *) ((size_t)s & ~3);

Nit: s/size_t/uintptr_t/.

It's the same type for all supported targets,
but the spelling says what you mean.


r~

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] Improve generic strcspn performance
  2016-03-31 17:00   ` Richard Henderson
@ 2016-04-01 20:44     ` Roland McGrath
  0 siblings, 0 replies; 22+ messages in thread
From: Roland McGrath @ 2016-04-01 20:44 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Adhemerval Zanella, libc-alpha, Wilco Dijkstra

> On 03/28/2016 08:19 AM, Adhemerval Zanella wrote:
> > +  s = (unsigned char *) ((size_t)s & ~3);
> 
> Nit: s/size_t/uintptr_t/.
> 
> It's the same type for all supported targets,
> but the spelling says what you mean.

libc-internal.h has PTR_ALIGN_DOWN for this.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 2/4] Improve generic strspn performance
  2016-03-31 14:01 [PATCH 0/4] Improve generic strspn/strcspn/strpbrk Adhemerval Zanella
@ 2016-03-31 14:01 ` Adhemerval Zanella
  0 siblings, 0 replies; 22+ messages in thread
From: Adhemerval Zanella @ 2016-03-31 14:01 UTC (permalink / raw)
  To: libc-alpha

As for strcspn, this patch improves strspn performance using a much
faster algorithm.  It first constructs a 256-entry table based on
the accept string and then uses it as a lookup table for the
input string.  As for strcspn optimization, it is generally at least
10 times faster than the existing implementation on bench-strspn
on a few AArch64 implementations.

Also the string/bits/string2.h inlines make no longer sense, as current
implementation will already implement most of the optimizations.

Tested on x86_64, i686, and aarch64.

	* string/strspn.c (strcspn): Rewrite function.
	* string/bits/string2.h (strspn): Use __builtin_strcspn.
	(__strspn_c1): Remove inline function.
	(__strspn_c2): Likewise.
	(__strspn_c3): Likewise.
	* string/string-inlines.c
	[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c1): Add
	compatibility symbol.
	[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c2):
	Likewise.
	[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c3):
	Likewise.
---
 ChangeLog               | 15 ++++++++++
 string/bits/string2.h   | 74 ++-----------------------------------------------
 string/string-inlines.c | 36 ++++++++++++++++++++++++
 string/strspn.c         | 54 ++++++++++++++++++++++++++----------
 4 files changed, 94 insertions(+), 85 deletions(-)

diff --git a/string/bits/string2.h b/string/bits/string2.h
index a8df0db..75a66a1 100644
--- a/string/bits/string2.h
+++ b/string/bits/string2.h
@@ -914,78 +914,10 @@ __stpcpy_small (char *__dest,
 
 /* Return the length of the initial segment of S which
    consists entirely of characters in ACCEPT.  */
-#if !defined _HAVE_STRING_ARCH_strspn || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strspn
-#  if __GNUC_PREREQ (3, 2)
-#   define strspn(s, accept) \
-  __extension__								      \
-  ({ char __a0, __a1, __a2;						      \
-     (__builtin_constant_p (accept) && __string2_1bptr_p (accept)	      \
-      ? ((__builtin_constant_p (s) && __string2_1bptr_p (s))		      \
-	 ? __builtin_strspn (s, accept)					      \
-	 : ((__a0 = ((const char *) (accept))[0], __a0 == '\0')		      \
-	    ? ((void) (s), (size_t) 0)					      \
-	    : ((__a1 = ((const char *) (accept))[1], __a1 == '\0')	      \
-	       ? __strspn_c1 (s, __a0)					      \
-	       : ((__a2 = ((const char *) (accept))[2], __a2 == '\0')	      \
-		  ? __strspn_c2 (s, __a0, __a1)				      \
-		  : (((const char *) (accept))[3] == '\0'		      \
-		     ? __strspn_c3 (s, __a0, __a1, __a2)		      \
-		     : __builtin_strspn (s, accept))))))		      \
-      : __builtin_strspn (s, accept)); })
-#  else
-#   define strspn(s, accept) \
-  __extension__								      \
-  ({ char __a0, __a1, __a2;						      \
-     (__builtin_constant_p (accept) && __string2_1bptr_p (accept)	      \
-      ? ((__a0 = ((const char *) (accept))[0], __a0 == '\0')		      \
-	 ? ((void) (s), (size_t) 0)					      \
-	 : ((__a1 = ((const char *) (accept))[1], __a1 == '\0')		      \
-	    ? __strspn_c1 (s, __a0)					      \
-	    : ((__a2 = ((const char *) (accept))[2], __a2 == '\0')	      \
-	       ? __strspn_c2 (s, __a0, __a1)				      \
-	       : (((const char *) (accept))[3] == '\0'			      \
-		  ? __strspn_c3 (s, __a0, __a1, __a2)			      \
-		  : strspn (s, accept)))))				      \
-      : strspn (s, accept)); })
-#  endif
+#ifndef _HAVE_STRING_ARCH_strspn
+# if __GNUC_PREREQ (3, 2)
+#  define strspn(s, accept) __builtin_strspn (s, accept)
 # endif
-
-__STRING_INLINE size_t __strspn_c1 (const char *__s, int __accept);
-__STRING_INLINE size_t
-__strspn_c1 (const char *__s, int __accept)
-{
-  size_t __result = 0;
-  /* Please note that __accept never can be '\0'.  */
-  while (__s[__result] == __accept)
-    ++__result;
-  return __result;
-}
-
-__STRING_INLINE size_t __strspn_c2 (const char *__s, int __accept1,
-				    int __accept2);
-__STRING_INLINE size_t
-__strspn_c2 (const char *__s, int __accept1, int __accept2)
-{
-  size_t __result = 0;
-  /* Please note that __accept1 and __accept2 never can be '\0'.  */
-  while (__s[__result] == __accept1 || __s[__result] == __accept2)
-    ++__result;
-  return __result;
-}
-
-__STRING_INLINE size_t __strspn_c3 (const char *__s, int __accept1,
-				    int __accept2, int __accept3);
-__STRING_INLINE size_t
-__strspn_c3 (const char *__s, int __accept1, int __accept2, int __accept3)
-{
-  size_t __result = 0;
-  /* Please note that __accept1 to __accept3 never can be '\0'.  */
-  while (__s[__result] == __accept1 || __s[__result] == __accept2
-	 || __s[__result] == __accept3)
-    ++__result;
-  return __result;
-}
 #endif
 
 
diff --git a/string/string-inlines.c b/string/string-inlines.c
index 83bdd6c..754b315 100644
--- a/string/string-inlines.c
+++ b/string/string-inlines.c
@@ -71,4 +71,40 @@ __old_strcspn_c3 (const char *__s, int __reject1, int __reject2,
   return __result;
 }
 compat_symbol (libc, __old_strcspn_c3, __strcspn_c3, GLIBC_2_1_1);
+
+size_t
+__old_strspn_c1 (const char *__s, int __accept)
+{
+  size_t __result = 0;
+  /* Please note that __accept never can be '\0'.  */
+  while (__s[__result] == __accept)
+    ++__result;
+  return __result;
+}
+compat_symbol (libc, __old_strspn_c1, __strspn_c1, GLIBC_2_1_1);
+
+size_t
+__old_strspn_c2 (const char *__s, int __accept1, int __accept2)
+{
+  size_t __result = 0;
+  /* Please note that __accept1 and __accept2 never can be '\0'.  */
+  while (__s[__result] == __accept1 || __s[__result] == __accept2)
+    ++__result;
+  return __result;
+}
+compat_symbol (libc, __old_strspn_c2, __strspn_c2, GLIBC_2_1_1);
+
+size_t
+__old_strspn_c3 (const char *__s, int __accept1, int __accept2,
+		 int __accept3)
+{
+  size_t __result = 0;
+  /* Please note that __accept1 to __accept3 never can be '\0'.  */
+  while (__s[__result] == __accept1 || __s[__result] == __accept2
+	 || __s[__result] == __accept3)
+    ++__result;
+  return __result;
+}
+compat_symbol (libc, __old_strspn_c3, __strspn_c3, GLIBC_2_1_1);
+
 #endif
diff --git a/string/strspn.c b/string/strspn.c
index f0635c1..30f7747 100644
--- a/string/strspn.c
+++ b/string/strspn.c
@@ -25,23 +25,49 @@
 /* Return the length of the maximum initial segment
    of S which contains only characters in ACCEPT.  */
 size_t
-STRSPN (const char *s, const char *accept)
+STRSPN (const char *str, const char *accept)
 {
-  const char *p;
-  const char *a;
-  size_t count = 0;
-
-  for (p = s; *p != '\0'; ++p)
+  if (accept[0] == '\0')
+    return 0;
+  if (__glibc_unlikely (accept[1] == '\0'))
     {
-      for (a = accept; *a != '\0'; ++a)
-	if (*p == *a)
-	  break;
-      if (*a == '\0')
-	return count;
-      else
-	++count;
+      const char *a = str;
+      for (; *str == *accept; str++);
+      return str - a;
     }
 
-  return count;
+  /* Use multiple small memsets to enable inlining on most targets.  */
+  unsigned char table[256];
+  unsigned char *p = memset (table, 0, 64);
+  memset (p + 64, 0, 64);
+  memset (p + 128, 0, 64);
+  memset (p + 192, 0, 64);
+
+  unsigned char *s = (unsigned char*) accept;
+  /* Different from strcspn it does not add the NULL on the table
+     so can avoid check if str[i] is NULL, since table['\0'] will
+     be 0 and thus stopping the loop check.  */
+  do
+    p[*s++] = 1;
+  while (*s);
+
+  s = (unsigned char*) str;
+  if (!p[s[0]]) return 0;
+  if (!p[s[1]]) return 1;
+  if (!p[s[2]]) return 2;
+  if (!p[s[3]]) return 3;
+
+  s = (unsigned char *) ((size_t)(s) & ~3);
+  unsigned int c0, c1, c2, c3;
+  do {
+      s += 4;
+      c0 = p[s[0]];
+      c1 = p[s[1]];
+      c2 = p[s[2]];
+      c3 = p[s[3]];
+  } while ((c0 & c1 & c2 & c3) != 0);
+
+  size_t count = s - (unsigned char *) str;
+  return (c0 & c1) == 0 ? count + c0 : count + c2 + 2;
 }
 libc_hidden_builtin_def (strspn)
-- 
1.9.1

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2016-04-01 20:44 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-28 15:20 [PATCH 0/4] Improve generic strspn/strcspn/strpbrk Adhemerval Zanella
2016-03-28 15:20 ` [PATCH 3/4] Improve generic strpbrk performance Adhemerval Zanella
2016-03-28 15:20 ` [PATCH 1/4] Improve generic strcspn performance Adhemerval Zanella
2016-03-29 13:02   ` [PATCH 2/4] Improve generic strspn performance Wilco Dijkstra
2016-03-29 14:08     ` Adhemerval Zanella
2016-03-30 17:47   ` [PATCH 1/4] Improve generic strcspn performance Richard Henderson
2016-03-30 18:01     ` Wilco Dijkstra
2016-03-30 18:24       ` Adhemerval Zanella
2016-03-30 18:46       ` Richard Henderson
2016-03-31 17:00   ` Richard Henderson
2016-04-01 20:44     ` Roland McGrath
2016-03-28 15:20 ` [PATCH 4/4] Remove powerpc64 strspn, strcspn, and strpbrk implementation Adhemerval Zanella
2016-03-28 16:10   ` Paul E. Murphy
2016-03-28 17:56     ` Adhemerval Zanella
2016-03-30 13:14   ` Tulio Magno Quites Machado Filho
2016-03-30 17:06     ` Adhemerval Zanella
2016-03-30 18:14       ` Tulio Magno Quites Machado Filho
2016-03-30 19:57         ` Adhemerval Zanella
2016-03-30 21:45           ` Tulio Magno Quites Machado Filho
2016-03-28 15:20 ` [PATCH 2/4] Improve generic strspn performance Adhemerval Zanella
2016-03-29 20:32   ` Tulio Magno Quites Machado Filho
2016-03-31 14:01 [PATCH 0/4] Improve generic strspn/strcspn/strpbrk Adhemerval Zanella
2016-03-31 14:01 ` [PATCH 2/4] Improve generic strspn performance Adhemerval Zanella

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).