public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: James Tirta Halim <tirtajames45@gmail.com>
To: wilco.dijkstra@arm.com
Cc: libc-alpha@sourceware.org, tirtajames45@gmail.com
Subject: [PATCH] strcasestr: try to find non-alpha char in NEEDLE
Date: Tue, 28 Nov 2023 21:01:37 +0700	[thread overview]
Message-ID: <20231128140137.81955-1-tirtajames45@gmail.com> (raw)
In-Reply-To: <PAWPR08MB89829F6BE1B835956E61AEE983EEA@PAWPR08MB8982.eurprd08.prod.outlook.com>

---
 string/strcasestr.c | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/string/strcasestr.c b/string/strcasestr.c
index 2f6b4f8641..65eae2f047 100644
--- a/string/strcasestr.c
+++ b/string/strcasestr.c
@@ -54,7 +54,6 @@
 #define STRCASESTR __strcasestr
 #endif
 
-
 /* Find the first occurrence of NEEDLE in HAYSTACK, using
    case-insensitive comparison.  This function gives unspecified
    results in multibyte locales.  */
@@ -63,18 +62,42 @@ STRCASESTR (const char *haystack, const char *needle)
 {
   size_t needle_len; /* Length of NEEDLE.  */
   size_t haystack_len; /* Known minimum length of HAYSTACK.  */
+  const char *h, *n;
 
   /* Handle empty NEEDLE special case.  */
   if (needle[0] == '\0')
     return (char *) haystack;
 
-  /* Ensure HAYSTACK length is at least as long as NEEDLE length.
-     Since a match may occur early on in a huge HAYSTACK, use strnlen
-     and read ahead a few cachelines for improved performance.  */
-  needle_len = strlen (needle);
-  haystack_len = __strnlen (haystack, needle_len + 256);
-  if (haystack_len < needle_len)
+  /* Try to find a non-alphanumeric character in NEEDLE to pass to
+     strchr() while checking if HAYSTACK is as long as NEEDLE.  */
+  for (h = haystack, n = needle; *h && isalpha (*n); ++h, ++n);
+  if (__glibc_unlikely (*h == '\0'))
     return NULL;
+  if (*n) {
+    size_t shift;
+    shift = n - needle;
+    haystack = strchr (h + shift, *n);
+    if (__glibc_unlikely (haystack == NULL))
+      return NULL;
+    haystack -= shift;
+    /* Check if we have an early match. */
+    for (h = haystack, n = needle; TOLOWER (*h) == TOLOWER (*n) && *h; ++h, ++n);
+    if (*n == '\0')
+      return (char *)haystack;
+    if (__glibc_unlikely (*h == '\0'))
+      return NULL;
+    if ((size_t) (n - needle) > shift)
+      shift = n - needle;
+  /* Since a match may occur early on in a huge HAYSTACK, use strnlen
+     and read ahead a few cachelines for improved performance.  */
+    needle_len = shift + strlen (needle + shift);
+    haystack_len = shift + __strnlen (h + shift, 256);
+    if (__glibc_unlikely (haystack_len < needle_len))
+      return NULL;
+  } else {
+    needle_len = n - needle;
+    haystack_len = needle_len + __strnlen (haystack + needle_len, 256);
+  }
 
   /* Perform the search.  Abstract memory is considered to be an array
      of 'unsigned char' values, not an array of 'char' values.  See
-- 
2.43.0


  parent reply	other threads:[~2023-11-28 14:01 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-07 22:14 [PATCH] strcasestr: check if ne[0] is in hs with strchr or strpbrk as does strstr Wilco Dijkstra
2023-10-14  8:32 ` James Tirta Halim
2023-10-16 12:59   ` Adhemerval Zanella Netto
2023-10-16 13:52     ` Wilco Dijkstra
2023-10-16 16:56       ` Noah Goldstein
2023-10-17  9:57         ` Wilco Dijkstra
2023-10-14  8:56 ` [PATCH 1/2] " James Tirta Halim
2023-11-28 14:01 ` James Tirta Halim [this message]
2023-12-04 14:44   ` [PATCH] strcasestr: try to find non-alpha char in NEEDLE Carlos O'Donell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231128140137.81955-1-tirtajames45@gmail.com \
    --to=tirtajames45@gmail.com \
    --cc=libc-alpha@sourceware.org \
    --cc=wilco.dijkstra@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).