Can you please provide some context/comment on this patch? --Sunil On Mon, Dec 11, 2023 at 9:37 AM James Tirta Halim wrote: > --- > sysdeps/x86_64/multiarch/memmem-avx2.c | 55 ++++++++++++++++++++++++++ > 1 file changed, 55 insertions(+) > create mode 100644 sysdeps/x86_64/multiarch/memmem-avx2.c > > diff --git a/sysdeps/x86_64/multiarch/memmem-avx2.c > b/sysdeps/x86_64/multiarch/memmem-avx2.c > new file mode 100644 > index 0000000000..b0cced73aa > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/memmem-avx2.c > @@ -0,0 +1,55 @@ > +#include > +#include > +#include > +#include > + > +void * > +__memmem_avx2 (const void *hs, size_t hs_len, const void *ne, size_t > ne_len) > +{ > + if (ne_len == 1) > + return (void *) memchr (hs, *(unsigned char *) ne, hs_len); > + if (__glibc_unlikely (ne_len == 0)) > + return (void *) hs; > + if (__glibc_unlikely (hs_len == ne_len)) > + return !memcmp (hs, ne, ne_len) ? (void *) hs : NULL; > + if (__glibc_unlikely (hs_len < ne_len)) > + return NULL; > + const __m256i nv = _mm256_set1_epi8 (*(char *) ne); > + const unsigned char *h = (const unsigned char *) hs; > + const unsigned char *n = (const unsigned char *) ne; > + const unsigned char *const end = h + hs_len - ne_len; > + const int c1 = *(n + 1); > + n += 2, ne_len -= 2; > + __m256i hv; > + uint32_t i, m; > + if (!PTR_IS_ALIGNED (h)) { > + hv = _mm256_loadu_si256 ((const __m256i *) h); > + m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv)); > + for (; m; m = _blsr_u32 (m)) { > + i = _tzcnt_u32 (m); > + if (__glibc_unlikely (h + i > end)) > + return NULL; > + if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len)) > + return (char *) h + i; > + } > + h += sizeof (__m256i); > + if (__glibc_unlikely (h > end)) > + return NULL; > + h = (const unsigned char *) PTR_ALIGN_UP (h, sizeof (__m256i)); > + } > + for (;;) { > + hv = _mm256_load_si256 ((const __m256i *) h); > + m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv)); > + for (; m; m = _blsr_u32 (m)) { > + i = _tzcnt_u32 (m); > + if (__glibc_unlikely (h + i > end)) > + return NULL; > + if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len)) > + return (char *) h + i; > + } > + h += sizeof (__m256i); > + if (__glibc_unlikely (h > end)) > + return NULL; > + } > + return NULL; > +} > -- > 2.43.0 > >