From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 15750 invoked by alias); 28 Mar 2016 15:20:10 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Received: (qmail 14787 invoked by uid 89); 28 Mar 2016 15:20:09 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.6 required=5.0 tests=BAYES_00,RCVD_IN_DNSWL_LOW,SPF_PASS autolearn=ham version=3.3.2 spammy=memsets, c0, Different, sk:libc_hi X-HELO: mail-qk0-f178.google.com X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:subject:date:message-id:in-reply-to :references; bh=xeEa5mhN3C8Hl5xJ8tUmzDgH1s+dnJ/muTJVn+nEjY4=; b=ZPwr6mckK0pCLk51kame1Ce6g2pvA5vhx8eTeYFDLQQnGv95FGKlXp+Rb3uw9ySxTM b0znXrNZGbKHPKEZrGLVBb5H7+1x83+aEjPB/TUWLA2lN4cx441wUEXv3krlp9O7Us4R OdM4cMjm15+i1lZHu84cYsJyZZtDf6iyHSRn0sq8k2Aik9o0S8o3GnGTlYYo9Zw4LDAm /wBrr+UltMJq/4HkLekiR0530YDlGrDwt2vYDhe3CZTwB+sUHMWwr90NORXVYyu4HSvI eBlAc+wLIMXcskrfBmuoGyVSUoi18wnH+ClH2DjXsaykm4kX+E5WChWP45VeQ8tu1mAs SlnQ== X-Gm-Message-State: AD7BkJI55ygyqEMfzhE55+sj/lzwnV3gYhcuSAC5nlhCuDDzzlwmNAa/Ejb7HweNmKI8ctn+ X-Received: by 10.13.208.198 with SMTP id s189mr15081849ywd.130.1459178399252; Mon, 28 Mar 2016 08:19:59 -0700 (PDT) From: Adhemerval Zanella To: libc-alpha@sourceware.org Subject: [PATCH 2/4] Improve generic strspn performance Date: Mon, 28 Mar 2016 15:20:00 -0000 Message-Id: <1459178389-14133-3-git-send-email-adhemerval.zanella@linaro.org> In-Reply-To: <1459178389-14133-1-git-send-email-adhemerval.zanella@linaro.org> References: <1459178389-14133-1-git-send-email-adhemerval.zanella@linaro.org> X-SW-Source: 2016-03/txt/msg00663.txt.bz2 As for strcspn, this patch improves strspn performance using a much faster algorithm. It first constructs a 256-entry table based on the accept string and then uses it as a lookup table for the input string. As for strcspn optimization, it is generally at least 10 times faster than the existing implementation on bench-strspn on a few AArch64 implementations. Also the string/bits/string2.h inlines make no longer sense, as current implementation will already implement most of the optimizations. Tested on x86_64, i686, and aarch64. * string/strspn.c (strspn): Rewrite function. * string/bits/string2.h (strspn): Use __builtin_strcspn. --- ChangeLog | 5 +++++ string/bits/string2.h | 41 ++++++------------------------------- string/strspn.c | 56 +++++++++++++++++++++++++++++++++++++-------------- 3 files changed, 52 insertions(+), 50 deletions(-) diff --git a/string/bits/string2.h b/string/bits/string2.h index 1b87686..a1684eb 100644 --- a/string/bits/string2.h +++ b/string/bits/string2.h @@ -952,43 +952,14 @@ __strcspn_c3 (const char *__s, int __reject1, int __reject2, /* Return the length of the initial segment of S which consists entirely of characters in ACCEPT. */ -#if !defined _HAVE_STRING_ARCH_strspn || defined _FORCE_INLINES -# ifndef _HAVE_STRING_ARCH_strspn -# if __GNUC_PREREQ (3, 2) -# define strspn(s, accept) \ - __extension__ \ - ({ char __a0, __a1, __a2; \ - (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \ - ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \ - ? __builtin_strspn (s, accept) \ - : ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \ - ? ((void) (s), (size_t) 0) \ - : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \ - ? __strspn_c1 (s, __a0) \ - : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \ - ? __strspn_c2 (s, __a0, __a1) \ - : (((const char *) (accept))[3] == '\0' \ - ? __strspn_c3 (s, __a0, __a1, __a2) \ - : __builtin_strspn (s, accept)))))) \ - : __builtin_strspn (s, accept)); }) -# else -# define strspn(s, accept) \ - __extension__ \ - ({ char __a0, __a1, __a2; \ - (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \ - ? ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \ - ? ((void) (s), (size_t) 0) \ - : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \ - ? __strspn_c1 (s, __a0) \ - : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \ - ? __strspn_c2 (s, __a0, __a1) \ - : (((const char *) (accept))[3] == '\0' \ - ? __strspn_c3 (s, __a0, __a1, __a2) \ - : strspn (s, accept))))) \ - : strspn (s, accept)); }) -# endif +#ifndef _HAVE_STRING_ARCH_strspn +# if __GNUC_PREREQ (3, 2) +# define strspn(s, accept) __builtin_strspn (s, accept) # endif +/* The inline functions are not used from GLIBC 2.24 and forward, however + they are required to provide the symbols through string-inlines.c + (if inlining is not possible for compatibility reasons). */ __STRING_INLINE size_t __strspn_c1 (const char *__s, int __accept); __STRING_INLINE size_t __strspn_c1 (const char *__s, int __accept) diff --git a/string/strspn.c b/string/strspn.c index f0635c1..0547f41 100644 --- a/string/strspn.c +++ b/string/strspn.c @@ -25,23 +25,49 @@ /* Return the length of the maximum initial segment of S which contains only characters in ACCEPT. */ size_t -STRSPN (const char *s, const char *accept) +STRSPN (const char *str, const char *accept) { - const char *p; - const char *a; - size_t count = 0; - - for (p = s; *p != '\0'; ++p) - { - for (a = accept; *a != '\0'; ++a) - if (*p == *a) - break; - if (*a == '\0') - return count; - else - ++count; + if (accept[0] == '\0') + return 0; + if (accept[1] == '\0') + { + const char *a = str; + for (; *str == *accept; str++); + return str - a; } - return count; + /* Use multiple small memsets to enable inlining on most targets. */ + unsigned char table[256]; + unsigned char *p = memset (table, 0, 64); + memset (p + 64, 0, 64); + memset (p + 128, 0, 64); + memset (p + 192, 0, 64); + + unsigned char *s = (unsigned char*) accept; + /* Different from strcspn it does not add the NULL on the table + so can avoid check if str[i] is NULL, since table['\0'] will + be 0 and thus stopping the loop check. */ + do + p[*s++] = 1; + while (*s); + + s = (unsigned char*) str; + if (!p[s[0]]) return 0; + if (!p[s[1]]) return 1; + if (!p[s[2]]) return 2; + if (!p[s[3]]) return 3; + + s = (unsigned char *) ((size_t)(s) & ~3); + unsigned int c0, c1, c2, c3; + do { + s += 4; + c0 = p[s[0]]; + c1 = p[s[1]]; + c2 = p[s[2]]; + c3 = p[s[3]]; + } while ((c0 && c1 && c2 && c3) == 1); + + size_t count = s - (unsigned char *) str; + return (c0 && c1) == 0 ? count - !c0 + 1 : count - !c2 + 3; } libc_hidden_builtin_def (strspn) -- 1.9.1