public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] sysdeps/memmem-avx2.c: add memmem-avx2.c
@ 2023-12-11 17:33 James Tirta Halim
  2023-12-11 17:44 ` Sunil Pandey
  0 siblings, 1 reply; 9+ messages in thread
From: James Tirta Halim @ 2023-12-11 17:33 UTC (permalink / raw)
  To: libc-alpha; +Cc: James Tirta Halim

---
 sysdeps/x86_64/multiarch/memmem-avx2.c | 55 ++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 sysdeps/x86_64/multiarch/memmem-avx2.c

diff --git a/sysdeps/x86_64/multiarch/memmem-avx2.c b/sysdeps/x86_64/multiarch/memmem-avx2.c
new file mode 100644
index 0000000000..b0cced73aa
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmem-avx2.c
@@ -0,0 +1,55 @@
+#include <immintrin.h>
+#include <string.h>
+#include <inttypes.h>
+#include <libc-pointer-arith.h>
+
+void *
+__memmem_avx2 (const void *hs, size_t hs_len, const void *ne, size_t ne_len)
+{
+  if (ne_len == 1)
+    return (void *) memchr (hs, *(unsigned char *) ne, hs_len);
+  if (__glibc_unlikely (ne_len == 0))
+    return (void *) hs;
+  if (__glibc_unlikely (hs_len == ne_len))
+    return !memcmp (hs, ne, ne_len) ? (void *) hs : NULL;
+  if (__glibc_unlikely (hs_len < ne_len))
+    return NULL;
+  const __m256i nv = _mm256_set1_epi8 (*(char *) ne);
+  const unsigned char *h = (const unsigned char *) hs;
+  const unsigned char *n = (const unsigned char *) ne;
+  const unsigned char *const end = h + hs_len - ne_len;
+  const int c1 = *(n + 1);
+  n += 2, ne_len -= 2;
+  __m256i hv;
+  uint32_t i, m;
+  if (!PTR_IS_ALIGNED (h)) {
+    hv = _mm256_loadu_si256 ((const __m256i *) h);
+    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
+    for (; m; m = _blsr_u32 (m)) {
+      i = _tzcnt_u32 (m);
+      if (__glibc_unlikely (h + i > end))
+        return NULL;
+      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
+        return (char *) h + i;
+    }
+    h += sizeof (__m256i);
+    if (__glibc_unlikely (h > end))
+      return NULL;
+    h = (const unsigned char *) PTR_ALIGN_UP (h, sizeof (__m256i));
+  }
+  for (;;) {
+    hv = _mm256_load_si256 ((const __m256i *) h);
+    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
+    for (; m; m = _blsr_u32 (m)) {
+      i = _tzcnt_u32 (m);
+      if (__glibc_unlikely (h + i > end))
+        return NULL;
+      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
+        return (char *) h + i;
+    }
+    h += sizeof (__m256i);
+    if (__glibc_unlikely (h > end))
+      return NULL;
+  }
+  return NULL;
+}
-- 
2.43.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] sysdeps/memmem-avx2.c: add memmem-avx2.c
  2023-12-11 17:33 [PATCH] sysdeps/memmem-avx2.c: add memmem-avx2.c James Tirta Halim
@ 2023-12-11 17:44 ` Sunil Pandey
  2023-12-11 18:11   ` Noah Goldstein
  0 siblings, 1 reply; 9+ messages in thread
From: Sunil Pandey @ 2023-12-11 17:44 UTC (permalink / raw)
  To: James Tirta Halim; +Cc: libc-alpha

[-- Attachment #1: Type: text/plain, Size: 2610 bytes --]

Can you please provide some context/comment on this patch?

--Sunil

On Mon, Dec 11, 2023 at 9:37 AM James Tirta Halim <tirtajames45@gmail.com>
wrote:

> ---
>  sysdeps/x86_64/multiarch/memmem-avx2.c | 55 ++++++++++++++++++++++++++
>  1 file changed, 55 insertions(+)
>  create mode 100644 sysdeps/x86_64/multiarch/memmem-avx2.c
>
> diff --git a/sysdeps/x86_64/multiarch/memmem-avx2.c
> b/sysdeps/x86_64/multiarch/memmem-avx2.c
> new file mode 100644
> index 0000000000..b0cced73aa
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/memmem-avx2.c
> @@ -0,0 +1,55 @@
> +#include <immintrin.h>
> +#include <string.h>
> +#include <inttypes.h>
> +#include <libc-pointer-arith.h>
> +
> +void *
> +__memmem_avx2 (const void *hs, size_t hs_len, const void *ne, size_t
> ne_len)
> +{
> +  if (ne_len == 1)
> +    return (void *) memchr (hs, *(unsigned char *) ne, hs_len);
> +  if (__glibc_unlikely (ne_len == 0))
> +    return (void *) hs;
> +  if (__glibc_unlikely (hs_len == ne_len))
> +    return !memcmp (hs, ne, ne_len) ? (void *) hs : NULL;
> +  if (__glibc_unlikely (hs_len < ne_len))
> +    return NULL;
> +  const __m256i nv = _mm256_set1_epi8 (*(char *) ne);
> +  const unsigned char *h = (const unsigned char *) hs;
> +  const unsigned char *n = (const unsigned char *) ne;
> +  const unsigned char *const end = h + hs_len - ne_len;
> +  const int c1 = *(n + 1);
> +  n += 2, ne_len -= 2;
> +  __m256i hv;
> +  uint32_t i, m;
> +  if (!PTR_IS_ALIGNED (h)) {
> +    hv = _mm256_loadu_si256 ((const __m256i *) h);
> +    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
> +    for (; m; m = _blsr_u32 (m)) {
> +      i = _tzcnt_u32 (m);
> +      if (__glibc_unlikely (h + i > end))
> +        return NULL;
> +      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
> +        return (char *) h + i;
> +    }
> +    h += sizeof (__m256i);
> +    if (__glibc_unlikely (h > end))
> +      return NULL;
> +    h = (const unsigned char *) PTR_ALIGN_UP (h, sizeof (__m256i));
> +  }
> +  for (;;) {
> +    hv = _mm256_load_si256 ((const __m256i *) h);
> +    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
> +    for (; m; m = _blsr_u32 (m)) {
> +      i = _tzcnt_u32 (m);
> +      if (__glibc_unlikely (h + i > end))
> +        return NULL;
> +      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
> +        return (char *) h + i;
> +    }
> +    h += sizeof (__m256i);
> +    if (__glibc_unlikely (h > end))
> +      return NULL;
> +  }
> +  return NULL;
> +}
> --
> 2.43.0
>
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] sysdeps/memmem-avx2.c: add memmem-avx2.c
  2023-12-11 17:44 ` Sunil Pandey
@ 2023-12-11 18:11   ` Noah Goldstein
  2023-12-15 17:03     ` James Tirta Halim
  0 siblings, 1 reply; 9+ messages in thread
From: Noah Goldstein @ 2023-12-11 18:11 UTC (permalink / raw)
  To: Sunil Pandey; +Cc: James Tirta Halim, libc-alpha

On Mon, Dec 11, 2023 at 11:45 AM Sunil Pandey <skpgkp2@gmail.com> wrote:
>
> Can you please provide some context/comment on this patch?
>
Likewise performance data.
> --Sunil
>
> On Mon, Dec 11, 2023 at 9:37 AM James Tirta Halim <tirtajames45@gmail.com> wrote:
>>
>> ---
>>  sysdeps/x86_64/multiarch/memmem-avx2.c | 55 ++++++++++++++++++++++++++
>>  1 file changed, 55 insertions(+)
>>  create mode 100644 sysdeps/x86_64/multiarch/memmem-avx2.c
>>
>> diff --git a/sysdeps/x86_64/multiarch/memmem-avx2.c b/sysdeps/x86_64/multiarch/memmem-avx2.c
>> new file mode 100644
>> index 0000000000..b0cced73aa
>> --- /dev/null
>> +++ b/sysdeps/x86_64/multiarch/memmem-avx2.c
>> @@ -0,0 +1,55 @@
>> +#include <immintrin.h>
>> +#include <string.h>
>> +#include <inttypes.h>
>> +#include <libc-pointer-arith.h>
>> +
>> +void *
>> +__memmem_avx2 (const void *hs, size_t hs_len, const void *ne, size_t ne_len)
>> +{
>> +  if (ne_len == 1)
>> +    return (void *) memchr (hs, *(unsigned char *) ne, hs_len);
>> +  if (__glibc_unlikely (ne_len == 0))
>> +    return (void *) hs;
>> +  if (__glibc_unlikely (hs_len == ne_len))
>> +    return !memcmp (hs, ne, ne_len) ? (void *) hs : NULL;
>> +  if (__glibc_unlikely (hs_len < ne_len))
>> +    return NULL;
>> +  const __m256i nv = _mm256_set1_epi8 (*(char *) ne);
>> +  const unsigned char *h = (const unsigned char *) hs;
>> +  const unsigned char *n = (const unsigned char *) ne;
>> +  const unsigned char *const end = h + hs_len - ne_len;
>> +  const int c1 = *(n + 1);
>> +  n += 2, ne_len -= 2;
>> +  __m256i hv;
>> +  uint32_t i, m;
>> +  if (!PTR_IS_ALIGNED (h)) {
>> +    hv = _mm256_loadu_si256 ((const __m256i *) h);
>> +    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
>> +    for (; m; m = _blsr_u32 (m)) {
>> +      i = _tzcnt_u32 (m);
>> +      if (__glibc_unlikely (h + i > end))
>> +        return NULL;
>> +      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
>> +        return (char *) h + i;
>> +    }
>> +    h += sizeof (__m256i);
>> +    if (__glibc_unlikely (h > end))
>> +      return NULL;
>> +    h = (const unsigned char *) PTR_ALIGN_UP (h, sizeof (__m256i));
>> +  }
>> +  for (;;) {
>> +    hv = _mm256_load_si256 ((const __m256i *) h);
>> +    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
>> +    for (; m; m = _blsr_u32 (m)) {
>> +      i = _tzcnt_u32 (m);
>> +      if (__glibc_unlikely (h + i > end))
>> +        return NULL;
>> +      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
>> +        return (char *) h + i;
>> +    }
>> +    h += sizeof (__m256i);
>> +    if (__glibc_unlikely (h > end))
>> +      return NULL;
>> +  }
>> +  return NULL;
>> +}
>> --
>> 2.43.0
>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] sysdeps/memmem-avx2.c: add memmem-avx2.c
  2023-12-11 18:11   ` Noah Goldstein
@ 2023-12-15 17:03     ` James Tirta Halim
  2023-12-15 19:53       ` Carlos O'Donell
  0 siblings, 1 reply; 9+ messages in thread
From: James Tirta Halim @ 2023-12-15 17:03 UTC (permalink / raw)
  To: goldstein.w.n; +Cc: libc-alpha, skpgkp2, tirtajames45

Find the rarest byte in NE. Do a naive loop until HS is aligned. Once aligned, find
the parts of HS that matches the rare byte and the byte after it, shift
back to the position of HS that should match NE and do a memcmp.

Average timings (Core i5 8400):
__memmem_avx2	basic_memmem	twoway_memmem	memmem
1342.942864	19100.87074	3335.335377	2745.971856

Passes make check.

---
 sysdeps/x86_64/multiarch/memmem-avx2.c | 83 ++++++++++++++++----------
 1 file changed, 50 insertions(+), 33 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/memmem-avx2.c b/sysdeps/x86_64/multiarch/memmem-avx2.c
index b0cced73aa..524d0fe45f 100644
--- a/sysdeps/x86_64/multiarch/memmem-avx2.c
+++ b/sysdeps/x86_64/multiarch/memmem-avx2.c
@@ -3,53 +3,70 @@
 #include <inttypes.h>
 #include <libc-pointer-arith.h>
 
+static inline void *
+__find_rarest_byte (const void *ne,
+                      size_t n)
+{
+  static const unsigned char rarebyte_table[256] = { 0, 1, 13, 56, 59, 60, 61, 62, 63, 232, 248, 2, 158, 4, 5, 6, 7, 8, 9, 10, 14, 20, 26, 29, 37, 46, 52, 53, 54, 55, 57, 58, 255, 172, 242, 193, 162, 174, 178, 182, 218, 219, 212, 180, 249, 197, 221, 210, 253, 231, 230, 224, 225, 226, 227, 223, 222, 220, 176, 213, 184, 229, 188, 164, 159, 209, 181, 203, 189, 216, 196, 192, 185, 205, 161, 168, 215, 187, 211, 194, 195, 165, 206, 204, 214, 198, 173, 179, 175, 183, 167, 202, 239, 201, 160, 241, 163, 246, 233, 238, 240, 254, 237, 208, 234, 250, 169, 186, 236, 217, 245, 243, 228, 170, 247, 244, 251, 235, 199, 200, 252, 207, 177, 191, 171, 190, 166, 3, 140, 134, 124, 126, 86, 128, 95, 117, 114, 93, 81, 87, 132, 96, 112, 97, 103, 82, 139, 89, 98, 88, 119, 74, 156, 115, 104, 75, 120, 106, 76, 155, 90, 122, 107, 125, 152, 145, 136, 137, 101, 116, 102, 108, 99, 141, 77, 78, 118, 79, 109, 100, 150, 73, 94, 72, 121, 151, 113, 135, 110, 105, 83, 91, 11, 12, 64, 149, 146, 111, 65, 69, 66, 15, 16, 17, 18, 19, 130, 92, 144, 123, 21, 22, 23, 24, 131, 133, 127, 142, 25, 70, 129, 27, 28, 67, 153, 84, 143, 138, 147, 157, 148, 68, 71, 30, 31, 32, 33, 34, 35, 36, 154, 38, 39, 40, 41, 42, 80, 43, 44, 45, 47, 48, 85, 49, 50, 51 };
+  const unsigned char *rare = (const unsigned char *)  ne;
+  const unsigned char *p = (const unsigned char *)  ne;
+  int c_rare = rarebyte_table[*rare];
+  int c;
+  for (; n--; ++p)
+    {
+    c = rarebyte_table[*p];
+    if (c < c_rare) {
+      rare = p;
+      c_rare = c;
+    }
+    }
+  return (void *) rare;
+}
+
 void *
-__memmem_avx2 (const void *hs, size_t hs_len, const void *ne, size_t ne_len)
+__memmem_avx2 (const void *hs,
+              size_t hs_len,
+              const void *ne,
+              size_t ne_len)
 {
   if (ne_len == 1)
     return (void *) memchr (hs, *(unsigned char *) ne, hs_len);
   if (__glibc_unlikely (ne_len == 0))
     return (void *) hs;
-  if (__glibc_unlikely (hs_len == ne_len))
-    return !memcmp (hs, ne, ne_len) ? (void *) hs : NULL;
   if (__glibc_unlikely (hs_len < ne_len))
     return NULL;
-  const __m256i nv = _mm256_set1_epi8 (*(char *) ne);
   const unsigned char *h = (const unsigned char *) hs;
-  const unsigned char *n = (const unsigned char *) ne;
   const unsigned char *const end = h + hs_len - ne_len;
-  const int c1 = *(n + 1);
-  n += 2, ne_len -= 2;
-  __m256i hv;
-  uint32_t i, m;
-  if (!PTR_IS_ALIGNED (h)) {
-    hv = _mm256_loadu_si256 ((const __m256i *) h);
-    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
-    for (; m; m = _blsr_u32 (m)) {
-      i = _tzcnt_u32 (m);
-      if (__glibc_unlikely (h + i > end))
-        return NULL;
-      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
-        return (char *) h + i;
-    }
-    h += sizeof (__m256i);
-    if (__glibc_unlikely (h > end))
+  size_t shift = PTR_DIFF (__find_rarest_byte (ne, ne_len), ne);
+  if (shift == ne_len - 1)
+    --shift;
+  h += shift;
+  for (; !PTR_IS_ALIGNED (h, sizeof (__m256i)); ++h)
+    {
+    if (__glibc_unlikely (h - shift > end))
       return NULL;
-    h = (const unsigned char *) PTR_ALIGN_UP (h, sizeof (__m256i));
-  }
-  for (;;) {
+    if (*h == *((unsigned char *) ne + shift) && !memcmp (h - shift, ne, ne_len))
+      return (void *) (h - shift);
+    }
+  const __m256i nv = _mm256_set1_epi8 (*((char *) ne + shift));
+  const __m256i nv1 = _mm256_set1_epi8 (*((char *) ne + shift + 1));
+  __m256i hv, hv1;
+  uint32_t i, hm0, hm1, m;
+  for (; h - shift <= end; h += sizeof (__m256i)) {
     hv = _mm256_load_si256 ((const __m256i *) h);
-    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
-    for (; m; m = _blsr_u32 (m)) {
+    hv1 = _mm256_loadu_si256 ((const __m256i *) (h + 1));
+    hm0 = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
+    hm1 = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv1, nv1));
+    m = hm0 & hm1;
+    while (m)
+      {
       i = _tzcnt_u32 (m);
-      if (__glibc_unlikely (h + i > end))
+      m = _blsr_u32 (m);
+      if (__glibc_unlikely (h + i - shift > end))
         return NULL;
-      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
-        return (char *) h + i;
-    }
-    h += sizeof (__m256i);
-    if (__glibc_unlikely (h > end))
-      return NULL;
+      if (!memcmp (h + i - shift, ne, ne_len))
+        return (char *) h + i - shift;
+      }
   }
   return NULL;
 }
-- 
2.43.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] sysdeps/memmem-avx2.c: add memmem-avx2.c
  2023-12-15 17:03     ` James Tirta Halim
@ 2023-12-15 19:53       ` Carlos O'Donell
  2023-12-16  4:33         ` [PATCH] sysdeps/x86_64/multiarch/memmem-avx2.c: " James Tirta Halim
  0 siblings, 1 reply; 9+ messages in thread
From: Carlos O'Donell @ 2023-12-15 19:53 UTC (permalink / raw)
  To: James Tirta Halim, goldstein.w.n; +Cc: libc-alpha, skpgkp2

On 12/15/23 12:03, James Tirta Halim wrote:
> Find the rarest byte in NE. Do a naive loop until HS is aligned. Once aligned, find
> the parts of HS that matches the rare byte and the byte after it, shift
> back to the position of HS that should match NE and do a memcmp.

Patch fails pre-commit CI -- Doesn't apply.

https://patchwork.sourceware.org/project/glibc/patch/20231215170315.1806024-1-tirtajames45@gmail.com/

This looks like it depends on the up-thread patch.

Please send patches as a series e.g. git format-patch HEAD~1; then use git send email.

Please review the contribution checklist:
https://sourceware.org/glibc/wiki/Contribution%20checklist

Please review Copyright and license:
https://sourceware.org/glibc/wiki/Contribution%20checklist#Copyright_and_license

This patch needs either DCO or assignment.

> Average timings (Core i5 8400):
> __memmem_avx2	basic_memmem	twoway_memmem	memmem
> 1342.942864	19100.87074	3335.335377	2745.971856
> 
> Passes make check.
> 
> ---
>  sysdeps/x86_64/multiarch/memmem-avx2.c | 83 ++++++++++++++++----------
>  1 file changed, 50 insertions(+), 33 deletions(-)
> 
> diff --git a/sysdeps/x86_64/multiarch/memmem-avx2.c b/sysdeps/x86_64/multiarch/memmem-avx2.c
> index b0cced73aa..524d0fe45f 100644
> --- a/sysdeps/x86_64/multiarch/memmem-avx2.c
> +++ b/sysdeps/x86_64/multiarch/memmem-avx2.c
> @@ -3,53 +3,70 @@
>  #include <inttypes.h>
>  #include <libc-pointer-arith.h>
>  
> +static inline void *
> +__find_rarest_byte (const void *ne,
> +                      size_t n)
> +{
> +  static const unsigned char rarebyte_table[256] = { 0, 1, 13, 56, 59, 60, 61, 62, 63, 232, 248, 2, 158, 4, 5, 6, 7, 8, 9, 10, 14, 20, 26, 29, 37, 46, 52, 53, 54, 55, 57, 58, 255, 172, 242, 193, 162, 174, 178, 182, 218, 219, 212, 180, 249, 197, 221, 210, 253, 231, 230, 224, 225, 226, 227, 223, 222, 220, 176, 213, 184, 229, 188, 164, 159, 209, 181, 203, 189, 216, 196, 192, 185, 205, 161, 168, 215, 187, 211, 194, 195, 165, 206, 204, 214, 198, 173, 179, 175, 183, 167, 202, 239, 201, 160, 241, 163, 246, 233, 238, 240, 254, 237, 208, 234, 250, 169, 186, 236, 217, 245, 243, 228, 170, 247, 244, 251, 235, 199, 200, 252, 207, 177, 191, 171, 190, 166, 3, 140, 134, 124, 126, 86, 128, 95, 117, 114, 93, 81, 87, 132, 96, 112, 97, 103, 82, 139, 89, 98, 88, 119, 74, 156, 115, 104, 75, 120, 106, 76, 155, 90, 122, 107, 125, 152, 145, 136, 137, 101, 116, 102, 108, 99, 141, 77, 78, 118, 79, 109, 100, 150, 73, 94, 72, 121, 151, 113, 135, 110, 105, 83, 91, 11, 12, 64, 149, 146, 111, 65, 69, 66, 15, 16, 17, 18, 19, 130, 92, 144, 123, 21, 22, 23, 24, 131, 133, 127, 142, 25, 70, 129, 27, 28, 67, 153, 84, 143, 138, 147, 157, 148, 68, 71, 30, 31, 32, 33, 34, 35, 36, 154, 38, 39, 40, 41, 42, 80, 43, 44, 45, 47, 48, 85, 49, 50, 51 };
> +  const unsigned char *rare = (const unsigned char *)  ne;
> +  const unsigned char *p = (const unsigned char *)  ne;
> +  int c_rare = rarebyte_table[*rare];
> +  int c;
> +  for (; n--; ++p)
> +    {
> +    c = rarebyte_table[*p];
> +    if (c < c_rare) {
> +      rare = p;
> +      c_rare = c;
> +    }
> +    }
> +  return (void *) rare;
> +}
> +
>  void *
> -__memmem_avx2 (const void *hs, size_t hs_len, const void *ne, size_t ne_len)
> +__memmem_avx2 (const void *hs,
> +              size_t hs_len,
> +              const void *ne,
> +              size_t ne_len)
>  {
>    if (ne_len == 1)
>      return (void *) memchr (hs, *(unsigned char *) ne, hs_len);
>    if (__glibc_unlikely (ne_len == 0))
>      return (void *) hs;
> -  if (__glibc_unlikely (hs_len == ne_len))
> -    return !memcmp (hs, ne, ne_len) ? (void *) hs : NULL;
>    if (__glibc_unlikely (hs_len < ne_len))
>      return NULL;
> -  const __m256i nv = _mm256_set1_epi8 (*(char *) ne);
>    const unsigned char *h = (const unsigned char *) hs;
> -  const unsigned char *n = (const unsigned char *) ne;
>    const unsigned char *const end = h + hs_len - ne_len;
> -  const int c1 = *(n + 1);
> -  n += 2, ne_len -= 2;
> -  __m256i hv;
> -  uint32_t i, m;
> -  if (!PTR_IS_ALIGNED (h)) {
> -    hv = _mm256_loadu_si256 ((const __m256i *) h);
> -    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
> -    for (; m; m = _blsr_u32 (m)) {
> -      i = _tzcnt_u32 (m);
> -      if (__glibc_unlikely (h + i > end))
> -        return NULL;
> -      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
> -        return (char *) h + i;
> -    }
> -    h += sizeof (__m256i);
> -    if (__glibc_unlikely (h > end))
> +  size_t shift = PTR_DIFF (__find_rarest_byte (ne, ne_len), ne);
> +  if (shift == ne_len - 1)
> +    --shift;
> +  h += shift;
> +  for (; !PTR_IS_ALIGNED (h, sizeof (__m256i)); ++h)
> +    {
> +    if (__glibc_unlikely (h - shift > end))
>        return NULL;
> -    h = (const unsigned char *) PTR_ALIGN_UP (h, sizeof (__m256i));
> -  }
> -  for (;;) {
> +    if (*h == *((unsigned char *) ne + shift) && !memcmp (h - shift, ne, ne_len))
> +      return (void *) (h - shift);
> +    }
> +  const __m256i nv = _mm256_set1_epi8 (*((char *) ne + shift));
> +  const __m256i nv1 = _mm256_set1_epi8 (*((char *) ne + shift + 1));
> +  __m256i hv, hv1;
> +  uint32_t i, hm0, hm1, m;
> +  for (; h - shift <= end; h += sizeof (__m256i)) {
>      hv = _mm256_load_si256 ((const __m256i *) h);
> -    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
> -    for (; m; m = _blsr_u32 (m)) {
> +    hv1 = _mm256_loadu_si256 ((const __m256i *) (h + 1));
> +    hm0 = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
> +    hm1 = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv1, nv1));
> +    m = hm0 & hm1;
> +    while (m)
> +      {
>        i = _tzcnt_u32 (m);
> -      if (__glibc_unlikely (h + i > end))
> +      m = _blsr_u32 (m);
> +      if (__glibc_unlikely (h + i - shift > end))
>          return NULL;
> -      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
> -        return (char *) h + i;
> -    }
> -    h += sizeof (__m256i);
> -    if (__glibc_unlikely (h > end))
> -      return NULL;
> +      if (!memcmp (h + i - shift, ne, ne_len))
> +        return (char *) h + i - shift;
> +      }
>    }
>    return NULL;
>  }

-- 
Cheers,
Carlos.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] sysdeps/x86_64/multiarch/memmem-avx2.c: add memmem-avx2.c
  2023-12-15 19:53       ` Carlos O'Donell
@ 2023-12-16  4:33         ` James Tirta Halim
  2023-12-18 14:12           ` Carlos O'Donell
  2023-12-18 17:48           ` Noah Goldstein
  0 siblings, 2 replies; 9+ messages in thread
From: James Tirta Halim @ 2023-12-16  4:33 UTC (permalink / raw)
  To: carlos; +Cc: goldstein.w.n, libc-alpha, skpgkp2, tirtajames45

Find the rarest byte in NE. Do a naive loop until HS is aligned. Once aligned, find
the parts of HS that matches the rare byte and the byte after it, shift
back to the position of HS that should match NE and do a memcmp.

Average timings (Core i5 8400):
__memmem_avx2   basic_memmem    twoway_memmem   memmem
1342.942864     19100.87074     3335.335377     2745.971856

---
 sysdeps/x86_64/multiarch/memmem-avx2.c | 72 ++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 sysdeps/x86_64/multiarch/memmem-avx2.c

diff --git a/sysdeps/x86_64/multiarch/memmem-avx2.c b/sysdeps/x86_64/multiarch/memmem-avx2.c
new file mode 100644
index 0000000000..524d0fe45f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmem-avx2.c
@@ -0,0 +1,72 @@
+#include <immintrin.h>
+#include <string.h>
+#include <inttypes.h>
+#include <libc-pointer-arith.h>
+
+static inline void *
+__find_rarest_byte (const void *ne,
+                      size_t n)
+{
+  static const unsigned char rarebyte_table[256] = { 0, 1, 13, 56, 59, 60, 61, 62, 63, 232, 248, 2, 158, 4, 5, 6, 7, 8, 9, 10, 14, 20, 26, 29, 37, 46, 52, 53, 54, 55, 57, 58, 255, 172, 242, 193, 162, 174, 178, 182, 218, 219, 212, 180, 249, 197, 221, 210, 253, 231, 230, 224, 225, 226, 227, 223, 222, 220, 176, 213, 184, 229, 188, 164, 159, 209, 181, 203, 189, 216, 196, 192, 185, 205, 161, 168, 215, 187, 211, 194, 195, 165, 206, 204, 214, 198, 173, 179, 175, 183, 167, 202, 239, 201, 160, 241, 163, 246, 233, 238, 240, 254, 237, 208, 234, 250, 169, 186, 236, 217, 245, 243, 228, 170, 247, 244, 251, 235, 199, 200, 252, 207, 177, 191, 171, 190, 166, 3, 140, 134, 124, 126, 86, 128, 95, 117, 114, 93, 81, 87, 132, 96, 112, 97, 103, 82, 139, 89, 98, 88, 119, 74, 156, 115, 104, 75, 120, 106, 76, 155, 90, 122, 107, 125, 152, 145, 136, 137, 101, 116, 102, 108, 99, 141, 77, 78, 118, 79, 109, 100, 150, 73, 94, 72, 121, 151, 113, 135, 110, 105, 83, 91, 11, 12, 64, 149, 146, 111, 65, 69, 66, 15, 16, 17, 18, 19, 130, 92, 144, 123, 21, 22, 23, 24, 131, 133, 127, 142, 25, 70, 129, 27, 28, 67, 153, 84, 143, 138, 147, 157, 148, 68, 71, 30, 31, 32, 33, 34, 35, 36, 154, 38, 39, 40, 41, 42, 80, 43, 44, 45, 47, 48, 85, 49, 50, 51 };
+  const unsigned char *rare = (const unsigned char *)  ne;
+  const unsigned char *p = (const unsigned char *)  ne;
+  int c_rare = rarebyte_table[*rare];
+  int c;
+  for (; n--; ++p)
+    {
+    c = rarebyte_table[*p];
+    if (c < c_rare) {
+      rare = p;
+      c_rare = c;
+    }
+    }
+  return (void *) rare;
+}
+
+void *
+__memmem_avx2 (const void *hs,
+              size_t hs_len,
+              const void *ne,
+              size_t ne_len)
+{
+  if (ne_len == 1)
+    return (void *) memchr (hs, *(unsigned char *) ne, hs_len);
+  if (__glibc_unlikely (ne_len == 0))
+    return (void *) hs;
+  if (__glibc_unlikely (hs_len < ne_len))
+    return NULL;
+  const unsigned char *h = (const unsigned char *) hs;
+  const unsigned char *const end = h + hs_len - ne_len;
+  size_t shift = PTR_DIFF (__find_rarest_byte (ne, ne_len), ne);
+  if (shift == ne_len - 1)
+    --shift;
+  h += shift;
+  for (; !PTR_IS_ALIGNED (h, sizeof (__m256i)); ++h)
+    {
+    if (__glibc_unlikely (h - shift > end))
+      return NULL;
+    if (*h == *((unsigned char *) ne + shift) && !memcmp (h - shift, ne, ne_len))
+      return (void *) (h - shift);
+    }
+  const __m256i nv = _mm256_set1_epi8 (*((char *) ne + shift));
+  const __m256i nv1 = _mm256_set1_epi8 (*((char *) ne + shift + 1));
+  __m256i hv, hv1;
+  uint32_t i, hm0, hm1, m;
+  for (; h - shift <= end; h += sizeof (__m256i)) {
+    hv = _mm256_load_si256 ((const __m256i *) h);
+    hv1 = _mm256_loadu_si256 ((const __m256i *) (h + 1));
+    hm0 = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
+    hm1 = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv1, nv1));
+    m = hm0 & hm1;
+    while (m)
+      {
+      i = _tzcnt_u32 (m);
+      m = _blsr_u32 (m);
+      if (__glibc_unlikely (h + i - shift > end))
+        return NULL;
+      if (!memcmp (h + i - shift, ne, ne_len))
+        return (char *) h + i - shift;
+      }
+  }
+  return NULL;
+}
-- 
2.43.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] sysdeps/x86_64/multiarch/memmem-avx2.c: add memmem-avx2.c
  2023-12-16  4:33         ` [PATCH] sysdeps/x86_64/multiarch/memmem-avx2.c: " James Tirta Halim
@ 2023-12-18 14:12           ` Carlos O'Donell
  2023-12-18 17:48           ` Noah Goldstein
  1 sibling, 0 replies; 9+ messages in thread
From: Carlos O'Donell @ 2023-12-18 14:12 UTC (permalink / raw)
  To: James Tirta Halim, goldstein.w.n; +Cc: libc-alpha, skpgkp2

On 12/15/23 23:33, James Tirta Halim wrote:
> Find the rarest byte in NE. Do a naive loop until HS is aligned. Once aligned, find
> the parts of HS that matches the rare byte and the byte after it, shift
> back to the position of HS that should match NE and do a memcmp.

James,

Please clarify your assignment status or provide DCO for these changes.

Please review the contribution checklist here:
https://sourceware.org/glibc/wiki/Contribution%20checklist


> Average timings (Core i5 8400):
> __memmem_avx2   basic_memmem    twoway_memmem   memmem
> 1342.942864     19100.87074     3335.335377     2745.971856
> 
> ---
>  sysdeps/x86_64/multiarch/memmem-avx2.c | 72 ++++++++++++++++++++++++++
>  1 file changed, 72 insertions(+)
>  create mode 100644 sysdeps/x86_64/multiarch/memmem-avx2.c
> 
> diff --git a/sysdeps/x86_64/multiarch/memmem-avx2.c b/sysdeps/x86_64/multiarch/memmem-avx2.c
> new file mode 100644
> index 0000000000..524d0fe45f
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/memmem-avx2.c
> @@ -0,0 +1,72 @@
> +#include <immintrin.h>
> +#include <string.h>
> +#include <inttypes.h>
> +#include <libc-pointer-arith.h>
> +
> +static inline void *
> +__find_rarest_byte (const void *ne,
> +                      size_t n)
> +{
> +  static const unsigned char rarebyte_table[256] = { 0, 1, 13, 56, 59, 60, 61, 62, 63, 232, 248, 2, 158, 4, 5, 6, 7, 8, 9, 10, 14, 20, 26, 29, 37, 46, 52, 53, 54, 55, 57, 58, 255, 172, 242, 193, 162, 174, 178, 182, 218, 219, 212, 180, 249, 197, 221, 210, 253, 231, 230, 224, 225, 226, 227, 223, 222, 220, 176, 213, 184, 229, 188, 164, 159, 209, 181, 203, 189, 216, 196, 192, 185, 205, 161, 168, 215, 187, 211, 194, 195, 165, 206, 204, 214, 198, 173, 179, 175, 183, 167, 202, 239, 201, 160, 241, 163, 246, 233, 238, 240, 254, 237, 208, 234, 250, 169, 186, 236, 217, 245, 243, 228, 170, 247, 244, 251, 235, 199, 200, 252, 207, 177, 191, 171, 190, 166, 3, 140, 134, 124, 126, 86, 128, 95, 117, 114, 93, 81, 87, 132, 96, 112, 97, 103, 82, 139, 89, 98, 88, 119, 74, 156, 115, 104, 75, 120, 106, 76, 155, 90, 122, 107, 125, 152, 145, 136, 137, 101, 116, 102, 108, 99, 141, 77, 78, 118, 79, 109, 100, 150, 73, 94, 72, 121, 151, 113, 135, 110, 105, 83, 91, 11, 12, 64, 149, 146, 111, 65, 69, 66, 15, 16, 17, 18, 19, 130, 92, 144, 123, 21, 22, 23, 24, 131, 133, 127, 142, 25, 70, 129, 27, 28, 67, 153, 84, 143, 138, 147, 157, 148, 68, 71, 30, 31, 32, 33, 34, 35, 36, 154, 38, 39, 40, 41, 42, 80, 43, 44, 45, 47, 48, 85, 49, 50, 51 };
> +  const unsigned char *rare = (const unsigned char *)  ne;
> +  const unsigned char *p = (const unsigned char *)  ne;
> +  int c_rare = rarebyte_table[*rare];
> +  int c;
> +  for (; n--; ++p)
> +    {
> +    c = rarebyte_table[*p];
> +    if (c < c_rare) {
> +      rare = p;
> +      c_rare = c;
> +    }
> +    }
> +  return (void *) rare;
> +}
> +
> +void *
> +__memmem_avx2 (const void *hs,
> +              size_t hs_len,
> +              const void *ne,
> +              size_t ne_len)
> +{
> +  if (ne_len == 1)
> +    return (void *) memchr (hs, *(unsigned char *) ne, hs_len);
> +  if (__glibc_unlikely (ne_len == 0))
> +    return (void *) hs;
> +  if (__glibc_unlikely (hs_len < ne_len))
> +    return NULL;
> +  const unsigned char *h = (const unsigned char *) hs;
> +  const unsigned char *const end = h + hs_len - ne_len;
> +  size_t shift = PTR_DIFF (__find_rarest_byte (ne, ne_len), ne);
> +  if (shift == ne_len - 1)
> +    --shift;
> +  h += shift;
> +  for (; !PTR_IS_ALIGNED (h, sizeof (__m256i)); ++h)
> +    {
> +    if (__glibc_unlikely (h - shift > end))
> +      return NULL;
> +    if (*h == *((unsigned char *) ne + shift) && !memcmp (h - shift, ne, ne_len))
> +      return (void *) (h - shift);
> +    }
> +  const __m256i nv = _mm256_set1_epi8 (*((char *) ne + shift));
> +  const __m256i nv1 = _mm256_set1_epi8 (*((char *) ne + shift + 1));
> +  __m256i hv, hv1;
> +  uint32_t i, hm0, hm1, m;
> +  for (; h - shift <= end; h += sizeof (__m256i)) {
> +    hv = _mm256_load_si256 ((const __m256i *) h);
> +    hv1 = _mm256_loadu_si256 ((const __m256i *) (h + 1));
> +    hm0 = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
> +    hm1 = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv1, nv1));
> +    m = hm0 & hm1;
> +    while (m)
> +      {
> +      i = _tzcnt_u32 (m);
> +      m = _blsr_u32 (m);
> +      if (__glibc_unlikely (h + i - shift > end))
> +        return NULL;
> +      if (!memcmp (h + i - shift, ne, ne_len))
> +        return (char *) h + i - shift;
> +      }
> +  }
> +  return NULL;
> +}

-- 
Cheers,
Carlos.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] sysdeps/x86_64/multiarch/memmem-avx2.c: add memmem-avx2.c
  2023-12-16  4:33         ` [PATCH] sysdeps/x86_64/multiarch/memmem-avx2.c: " James Tirta Halim
  2023-12-18 14:12           ` Carlos O'Donell
@ 2023-12-18 17:48           ` Noah Goldstein
  2023-12-18 23:08             ` James
  1 sibling, 1 reply; 9+ messages in thread
From: Noah Goldstein @ 2023-12-18 17:48 UTC (permalink / raw)
  To: James Tirta Halim; +Cc: carlos, libc-alpha, skpgkp2

On Fri, Dec 15, 2023 at 10:37 PM James Tirta Halim
<tirtajames45@gmail.com> wrote:
>
> Find the rarest byte in NE. Do a naive loop until HS is aligned. Once aligned, find
> the parts of HS that matches the rare byte and the byte after it, shift
> back to the position of HS that should match NE and do a memcmp.
>
> Average timings (Core i5 8400):
> __memmem_avx2   basic_memmem    twoway_memmem   memmem
> 1342.942864     19100.87074     3335.335377     2745.971856
can you attach the .out result file?
>
> ---
>  sysdeps/x86_64/multiarch/memmem-avx2.c | 72 ++++++++++++++++++++++++++
>  1 file changed, 72 insertions(+)
>  create mode 100644 sysdeps/x86_64/multiarch/memmem-avx2.c
>
> diff --git a/sysdeps/x86_64/multiarch/memmem-avx2.c b/sysdeps/x86_64/multiarch/memmem-avx2.c
> new file mode 100644
> index 0000000000..524d0fe45f
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/memmem-avx2.c
> @@ -0,0 +1,72 @@
> +#include <immintrin.h>
> +#include <string.h>
> +#include <inttypes.h>
> +#include <libc-pointer-arith.h>
> +
> +static inline void *
> +__find_rarest_byte (const void *ne,
> +                      size_t n)
> +{
> +  static const unsigned char rarebyte_table[256] = { 0, 1, 13, 56, 59, 60, 61, 62, 63, 232, 248, 2, 158, 4, 5, 6, 7, 8, 9, 10, 14, 20, 26, 29, 37, 46, 52, 53, 54, 55, 57, 58, 255, 172, 242, 193, 162, 174, 178, 182, 218, 219, 212, 180, 249, 197, 221, 210, 253, 231, 230, 224, 225, 226, 227, 223, 222, 220, 176, 213, 184, 229, 188, 164, 159, 209, 181, 203, 189, 216, 196, 192, 185, 205, 161, 168, 215, 187, 211, 194, 195, 165, 206, 204, 214, 198, 173, 179, 175, 183, 167, 202, 239, 201, 160, 241, 163, 246, 233, 238, 240, 254, 237, 208, 234, 250, 169, 186, 236, 217, 245, 243, 228, 170, 247, 244, 251, 235, 199, 200, 252, 207, 177, 191, 171, 190, 166, 3, 140, 134, 124, 126, 86, 128, 95, 117, 114, 93, 81, 87, 132, 96, 112, 97, 103, 82, 139, 89, 98, 88, 119, 74, 156, 115, 104, 75, 120, 106, 76, 155, 90, 122, 107, 125, 152, 145, 136, 137, 101, 116, 102, 108, 99, 141, 77, 78, 118, 79, 109, 100, 150, 73, 94, 72, 121, 151, 113, 135, 110, 105, 83, 91, 11, 12, 64, 149, 146, 111, 65, 69, 66, 15, 16, 17, 18, 19, 130, 92, 144, 123, 21, 22, 23, 24, 131, 133, 127, 142, 25, 70, 129, 27, 28, 67, 153, 84, 143, 138, 147, 157, 148, 68, 71, 30, 31, 32, 33, 34, 35, 36, 154, 38, 39, 40, 41, 42, 80, 43, 44, 45, 47, 48, 85, 49, 50, 51 };
can you add a coment explaining how this table was generated / what it is?
> +  const unsigned char *rare = (const unsigned char *)  ne;
> +  const unsigned char *p = (const unsigned char *)  ne;
> +  int c_rare = rarebyte_table[*rare];
> +  int c;
> +  for (; n--; ++p)
> +    {
> +    c = rarebyte_table[*p];
> +    if (c < c_rare) {
> +      rare = p;
> +      c_rare = c;
> +    }
> +    }
> +  return (void *) rare;
> +}
> +
> +void *
> +__memmem_avx2 (const void *hs,
> +              size_t hs_len,
> +              const void *ne,
> +              size_t ne_len)
> +{
> +  if (ne_len == 1)
> +    return (void *) memchr (hs, *(unsigned char *) ne, hs_len);
> +  if (__glibc_unlikely (ne_len == 0))
> +    return (void *) hs;
> +  if (__glibc_unlikely (hs_len < ne_len))
> +    return NULL;
> +  const unsigned char *h = (const unsigned char *) hs;
> +  const unsigned char *const end = h + hs_len - ne_len;
> +  size_t shift = PTR_DIFF (__find_rarest_byte (ne, ne_len), ne);
> +  if (shift == ne_len - 1)
> +    --shift;
> +  h += shift;
> +  for (; !PTR_IS_ALIGNED (h, sizeof (__m256i)); ++h)
> +    {
> +    if (__glibc_unlikely (h - shift > end))
> +      return NULL;
> +    if (*h == *((unsigned char *) ne + shift) && !memcmp (h - shift, ne, ne_len))
should be `__memcmp` or you could directly use `__memcmpeq_avx2`
(probably the fastest here).
likewise below.
> +      return (void *) (h - shift);
> +    }
> +  const __m256i nv = _mm256_set1_epi8 (*((char *) ne + shift));
> +  const __m256i nv1 = _mm256_set1_epi8 (*((char *) ne + shift + 1));
> +  __m256i hv, hv1;
> +  uint32_t i, hm0, hm1, m;
> +  for (; h - shift <= end; h += sizeof (__m256i)) {
> +    hv = _mm256_load_si256 ((const __m256i *) h);
> +    hv1 = _mm256_loadu_si256 ((const __m256i *) (h + 1));
> +    hm0 = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
> +    hm1 = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv1, nv1));
think faster is:
m = _mm256_movemask_epi8(_mm256_and_si256(_mm256_cmpeq_epi8(hv, nv),
_mm256_cmpeq_epi8(hv1, nv)));
> +    m = hm0 & hm1;
> +    while (m)
> +      {
> +      i = _tzcnt_u32 (m);
> +      m = _blsr_u32 (m);
> +      if (__glibc_unlikely (h + i - shift > end))
> +        return NULL;
> +      if (!memcmp (h + i - shift, ne, ne_len))
> +        return (char *) h + i - shift;
> +      }
> +  }
> +  return NULL;
> +}
> --
> 2.43.0
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] sysdeps/x86_64/multiarch/memmem-avx2.c: add memmem-avx2.c
  2023-12-18 17:48           ` Noah Goldstein
@ 2023-12-18 23:08             ` James
  0 siblings, 0 replies; 9+ messages in thread
From: James @ 2023-12-18 23:08 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: carlos, libc-alpha, skpgkp2


[-- Attachment #1.1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: bench-memmem.out --]
[-- Type: application/x-wine-extension-out, Size: 54387 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-12-18 23:12 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-11 17:33 [PATCH] sysdeps/memmem-avx2.c: add memmem-avx2.c James Tirta Halim
2023-12-11 17:44 ` Sunil Pandey
2023-12-11 18:11   ` Noah Goldstein
2023-12-15 17:03     ` James Tirta Halim
2023-12-15 19:53       ` Carlos O'Donell
2023-12-16  4:33         ` [PATCH] sysdeps/x86_64/multiarch/memmem-avx2.c: " James Tirta Halim
2023-12-18 14:12           ` Carlos O'Donell
2023-12-18 17:48           ` Noah Goldstein
2023-12-18 23:08             ` James

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).