public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
From: Sunil Pandey <skpgkp2@sourceware.org>
To: glibc-cvs@sourceware.org
Subject: [glibc/release/2.34/master] x86: Optimize strcspn and strpbrk in strcspn-c.c
Date: Tue, 17 May 2022 03:20:10 +0000 (GMT)	[thread overview]
Message-ID: <20220517032010.B09393857354@sourceware.org> (raw)

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=0ae1006967eef11909fbed0f6ecef2f260b133d3

commit 0ae1006967eef11909fbed0f6ecef2f260b133d3
Author: Noah Goldstein <goldstein.w.n@gmail.com>
Date:   Wed Mar 23 16:57:22 2022 -0500

    x86: Optimize strcspn and strpbrk in strcspn-c.c
    
    Use _mm_cmpeq_epi8 and _mm_movemask_epi8 to get strlen instead of
    _mm_cmpistri. Also change offset to unsigned to avoid unnecessary
    sign extensions.
    
    geometric_mean(N=20) of all benchmarks that dont fallback on
    sse2/strlen; New / Original: .928
    
    All string/memory tests pass.
    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
    
    (cherry picked from commit 30d627d477d7255345a4b713cf352ac32d644d61)

Diff:
---
 sysdeps/x86_64/multiarch/strcspn-c.c | 83 ++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 46 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
index c56ddbd22f..2436b6dcd9 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c.c
@@ -85,83 +85,74 @@ STRCSPN_SSE42 (const char *s, const char *a)
     RETURN (NULL, strlen (s));
 
   const char *aligned;
-  __m128i mask;
-  int offset = (int) ((size_t) a & 15);
+  __m128i mask, maskz, zero;
+  unsigned int maskz_bits;
+  unsigned int offset = (unsigned int) ((size_t) a & 15);
+  zero = _mm_set1_epi8 (0);
   if (offset != 0)
     {
       /* Load masks.  */
       aligned = (const char *) ((size_t) a & -16L);
       __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
-
-      mask = __m128i_shift_right (mask0, offset);
+      maskz = _mm_cmpeq_epi8 (mask0, zero);
 
       /* Find where the NULL terminator is.  */
-      int length = _mm_cmpistri (mask, mask, 0x3a);
-      if (length == 16 - offset)
-	{
-	  /* There is no NULL terminator.  */
-	  __m128i mask1 = _mm_load_si128 ((__m128i *) (aligned + 16));
-	  int index = _mm_cmpistri (mask1, mask1, 0x3a);
-	  length += index;
-
-	  /* Don't use SSE4.2 if the length of A > 16.  */
-	  if (length > 16)
-	    return STRCSPN_SSE2 (s, a);
-
-	  if (index != 0)
-	    {
-	      /* Combine mask0 and mask1.  We could play games with
-		 palignr, but frankly this data should be in L1 now
-		 so do the merge via an unaligned load.  */
-	      mask = _mm_loadu_si128 ((__m128i *) a);
-	    }
-	}
+      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
+      if (maskz_bits != 0)
+        {
+          mask = __m128i_shift_right (mask0, offset);
+          offset = (unsigned int) ((size_t) s & 15);
+          if (offset)
+            goto start_unaligned;
+
+          aligned = s;
+          goto start_loop;
+        }
     }
-  else
-    {
-      /* A is aligned.  */
-      mask = _mm_load_si128 ((__m128i *) a);
 
-      /* Find where the NULL terminator is.  */
-      int length = _mm_cmpistri (mask, mask, 0x3a);
-      if (length == 16)
-	{
-	  /* There is no NULL terminator.  Don't use SSE4.2 if the length
-	     of A > 16.  */
-	  if (a[16] != 0)
-	    return STRCSPN_SSE2 (s, a);
-	}
+  /* A is aligned.  */
+  mask = _mm_loadu_si128 ((__m128i *) a);
+  /* Find where the NULL terminator is.  */
+  maskz = _mm_cmpeq_epi8 (mask, zero);
+  maskz_bits = _mm_movemask_epi8 (maskz);
+  if (maskz_bits == 0)
+    {
+      /* There is no NULL terminator.  Don't use SSE4.2 if the length
+         of A > 16.  */
+      if (a[16] != 0)
+        return STRCSPN_SSE2 (s, a);
     }
 
-  offset = (int) ((size_t) s & 15);
+  aligned = s;
+  offset = (unsigned int) ((size_t) s & 15);
   if (offset != 0)
     {
+    start_unaligned:
       /* Check partial string.  */
       aligned = (const char *) ((size_t) s & -16L);
       __m128i value = _mm_load_si128 ((__m128i *) aligned);
 
       value = __m128i_shift_right (value, offset);
 
-      int length = _mm_cmpistri (mask, value, 0x2);
+      unsigned int length = _mm_cmpistri (mask, value, 0x2);
       /* No need to check ZFlag since ZFlag is always 1.  */
-      int cflag = _mm_cmpistrc (mask, value, 0x2);
+      unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
       if (cflag)
 	RETURN ((char *) (s + length), length);
       /* Find where the NULL terminator is.  */
-      int index = _mm_cmpistri (value, value, 0x3a);
+      unsigned int index = _mm_cmpistri (value, value, 0x3a);
       if (index < 16 - offset)
 	RETURN (NULL, index);
       aligned += 16;
     }
-  else
-    aligned = s;
 
+start_loop:
   while (1)
     {
       __m128i value = _mm_load_si128 ((__m128i *) aligned);
-      int index = _mm_cmpistri (mask, value, 0x2);
-      int cflag = _mm_cmpistrc (mask, value, 0x2);
-      int zflag = _mm_cmpistrz (mask, value, 0x2);
+      unsigned int index = _mm_cmpistri (mask, value, 0x2);
+      unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
+      unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
       if (cflag)
 	RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
       if (zflag)


                 reply	other threads:[~2022-05-17  3:20 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220517032010.B09393857354@sourceware.org \
    --to=skpgkp2@sourceware.org \
    --cc=glibc-cvs@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).