From: "H.J. Lu" <hjl.tools@gmail.com>
To: Noah Goldstein <goldstein.w.n@gmail.com>
Cc: GNU C Library <libc-alpha@sourceware.org>,
"Carlos O'Donell" <carlos@systemhalted.org>
Subject: Re: [PATCH v1 23/23] x86: Remove AVX str{n}casecmp
Date: Thu, 24 Mar 2022 12:04:51 -0700 [thread overview]
Message-ID: <CAMe9rOpzEL=V1OmUFJuScNetUc3mgMqYeqcqiD9aK+tBTN_sxQ@mail.gmail.com> (raw)
In-Reply-To: <20220323215734.3927131-23-goldstein.w.n@gmail.com>
On Wed, Mar 23, 2022 at 3:03 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> The rational is:
>
> 1. SSE42 has nearly identical logic so any benefit is minimal (3.4%
> regression on Tigerlake using SSE42 versus AVX across the
> benchtest suite).
> 2. AVX2 version covers the majority of targets that previously
> prefered it.
> 3. The targets where AVX would still be best (SnB and IVB) are
> becoming outdated.
>
> All in all the saving the code size is worth it.
>
> All string/memory tests pass.
> ---
> Geomtric Mean N=40 runs; All functions page aligned
> length, align1, align2, max_char, AVX Time / SSE42 Time
> 1, 1, 1, 127, 0.928
> 2, 2, 2, 127, 0.934
> 3, 3, 3, 127, 0.975
> 4, 4, 4, 127, 0.96
> 5, 5, 5, 127, 0.935
> 6, 6, 6, 127, 0.929
> 7, 7, 7, 127, 0.959
> 8, 0, 0, 127, 0.955
> 9, 1, 1, 127, 0.944
> 10, 2, 2, 127, 0.975
> 11, 3, 3, 127, 0.935
> 12, 4, 4, 127, 0.931
> 13, 5, 5, 127, 0.926
> 14, 6, 6, 127, 0.901
> 15, 7, 7, 127, 0.951
> 4, 0, 0, 127, 0.958
> 4, 0, 0, 254, 0.956
> 8, 0, 0, 254, 0.977
> 16, 0, 0, 127, 0.955
> 16, 0, 0, 254, 0.953
> 32, 0, 0, 127, 0.943
> 32, 0, 0, 254, 0.941
> 64, 0, 0, 127, 0.941
> 64, 0, 0, 254, 0.955
> 128, 0, 0, 127, 0.972
> 128, 0, 0, 254, 0.975
> 256, 0, 0, 127, 0.996
> 256, 0, 0, 254, 0.993
> 512, 0, 0, 127, 0.992
> 512, 0, 0, 254, 0.986
> 1024, 0, 0, 127, 0.994
> 1024, 0, 0, 254, 0.993
> 16, 1, 2, 127, 0.933
> 16, 2, 1, 254, 0.953
> 32, 2, 4, 127, 0.927
> 32, 4, 2, 254, 0.986
> 64, 3, 6, 127, 0.991
> 64, 6, 3, 254, 1.014
> 128, 4, 0, 127, 1.001
> 128, 0, 4, 254, 0.991
> 256, 5, 2, 127, 1.011
> 256, 2, 5, 254, 1.013
> 512, 6, 4, 127, 1.056
> 512, 4, 6, 254, 0.916
> 1024, 7, 6, 127, 1.059
> 1024, 6, 7, 254, 1.043
>
> sysdeps/x86_64/multiarch/Makefile | 2 -
> sysdeps/x86_64/multiarch/ifunc-impl-list.c | 12 -
> sysdeps/x86_64/multiarch/ifunc-strcasecmp.h | 4 -
> sysdeps/x86_64/multiarch/strcasecmp_l-avx.S | 22 --
> sysdeps/x86_64/multiarch/strcmp-sse42.S | 240 +++++++++-----------
> sysdeps/x86_64/multiarch/strncase_l-avx.S | 22 --
> 6 files changed, 105 insertions(+), 197 deletions(-)
> delete mode 100644 sysdeps/x86_64/multiarch/strcasecmp_l-avx.S
> delete mode 100644 sysdeps/x86_64/multiarch/strncase_l-avx.S
>
> diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> index 35d80dc2ff..6507d1b7fa 100644
> --- a/sysdeps/x86_64/multiarch/Makefile
> +++ b/sysdeps/x86_64/multiarch/Makefile
> @@ -54,7 +54,6 @@ sysdep_routines += \
> stpncpy-evex \
> stpncpy-sse2-unaligned \
> stpncpy-ssse3 \
> - strcasecmp_l-avx \
> strcasecmp_l-avx2 \
> strcasecmp_l-avx2-rtm \
> strcasecmp_l-evex \
> @@ -95,7 +94,6 @@ sysdep_routines += \
> strlen-avx2-rtm \
> strlen-evex \
> strlen-sse2 \
> - strncase_l-avx \
> strncase_l-avx2 \
> strncase_l-avx2-rtm \
> strncase_l-evex \
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index f1a4d3dac2..40cc6cc49e 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -447,9 +447,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> (CPU_FEATURE_USABLE (AVX2)
> && CPU_FEATURE_USABLE (RTM)),
> __strcasecmp_avx2_rtm)
> - IFUNC_IMPL_ADD (array, i, strcasecmp,
> - CPU_FEATURE_USABLE (AVX),
> - __strcasecmp_avx)
> IFUNC_IMPL_ADD (array, i, strcasecmp,
> CPU_FEATURE_USABLE (SSE4_2),
> __strcasecmp_sse42)
> @@ -471,9 +468,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> (CPU_FEATURE_USABLE (AVX2)
> && CPU_FEATURE_USABLE (RTM)),
> __strcasecmp_l_avx2_rtm)
> - IFUNC_IMPL_ADD (array, i, strcasecmp_l,
> - CPU_FEATURE_USABLE (AVX),
> - __strcasecmp_l_avx)
> IFUNC_IMPL_ADD (array, i, strcasecmp_l,
> CPU_FEATURE_USABLE (SSE4_2),
> __strcasecmp_l_sse42)
> @@ -609,9 +603,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> (CPU_FEATURE_USABLE (AVX2)
> && CPU_FEATURE_USABLE (RTM)),
> __strncasecmp_avx2_rtm)
> - IFUNC_IMPL_ADD (array, i, strncasecmp,
> - CPU_FEATURE_USABLE (AVX),
> - __strncasecmp_avx)
> IFUNC_IMPL_ADD (array, i, strncasecmp,
> CPU_FEATURE_USABLE (SSE4_2),
> __strncasecmp_sse42)
> @@ -634,9 +625,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> (CPU_FEATURE_USABLE (AVX2)
> && CPU_FEATURE_USABLE (RTM)),
> __strncasecmp_l_avx2_rtm)
> - IFUNC_IMPL_ADD (array, i, strncasecmp_l,
> - CPU_FEATURE_USABLE (AVX),
> - __strncasecmp_l_avx)
> IFUNC_IMPL_ADD (array, i, strncasecmp_l,
> CPU_FEATURE_USABLE (SSE4_2),
> __strncasecmp_l_sse42)
> diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> index bf0d146e7f..766539c241 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> @@ -22,7 +22,6 @@
> extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
> @@ -46,9 +45,6 @@ IFUNC_SELECTOR (void)
> return OPTIMIZE (avx2);
> }
>
> - if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
> - return OPTIMIZE (avx);
> -
> if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
> && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
> return OPTIMIZE (sse42);
> diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S
> deleted file mode 100644
> index 7ec7c21b5a..0000000000
> --- a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S
> +++ /dev/null
> @@ -1,22 +0,0 @@
> -/* strcasecmp_l optimized with AVX.
> - Copyright (C) 2017-2022 Free Software Foundation, Inc.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <https://www.gnu.org/licenses/>. */
> -
> -#define STRCMP_SSE42 __strcasecmp_l_avx
> -#define USE_AVX 1
> -#define USE_AS_STRCASECMP_L
> -#include "strcmp-sse42.S"
> diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
> index 7805ae9d41..a9178ad25c 100644
> --- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
> +++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
> @@ -41,13 +41,8 @@
> # define UPDATE_STRNCMP_COUNTER
> #endif
>
> -#ifdef USE_AVX
> -# define SECTION avx
> -# define GLABEL(l) l##_avx
> -#else
> -# define SECTION sse4.2
> -# define GLABEL(l) l##_sse42
> -#endif
> +#define SECTION sse4.2
> +#define GLABEL(l) l##_sse42
>
> #define LABEL(l) .L##l
>
> @@ -105,21 +100,7 @@ END (GLABEL(__strncasecmp))
> #endif
>
>
> -#ifdef USE_AVX
> -# define movdqa vmovdqa
> -# define movdqu vmovdqu
> -# define pmovmskb vpmovmskb
> -# define pcmpistri vpcmpistri
> -# define psubb vpsubb
> -# define pcmpeqb vpcmpeqb
> -# define psrldq vpsrldq
> -# define pslldq vpslldq
> -# define palignr vpalignr
> -# define pxor vpxor
> -# define D(arg) arg, arg
> -#else
> -# define D(arg) arg
> -#endif
> +#define arg arg
>
> STRCMP_SSE42:
> cfi_startproc
> @@ -191,18 +172,7 @@ LABEL(case_add):
> movdqu (%rdi), %xmm1
> movdqu (%rsi), %xmm2
> #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
> -# ifdef USE_AVX
> -# define TOLOWER(reg1, reg2) \
> - vpaddb LCASE_MIN_reg, reg1, %xmm7; \
> - vpaddb LCASE_MIN_reg, reg2, %xmm8; \
> - vpcmpgtb LCASE_MAX_reg, %xmm7, %xmm7; \
> - vpcmpgtb LCASE_MAX_reg, %xmm8, %xmm8; \
> - vpandn CASE_ADD_reg, %xmm7, %xmm7; \
> - vpandn CASE_ADD_reg, %xmm8, %xmm8; \
> - vpaddb %xmm7, reg1, reg1; \
> - vpaddb %xmm8, reg2, reg2
> -# else
> -# define TOLOWER(reg1, reg2) \
> +# define TOLOWER(reg1, reg2) \
> movdqa LCASE_MIN_reg, %xmm7; \
> movdqa LCASE_MIN_reg, %xmm8; \
> paddb reg1, %xmm7; \
> @@ -213,15 +183,15 @@ LABEL(case_add):
> pandn CASE_ADD_reg, %xmm8; \
> paddb %xmm7, reg1; \
> paddb %xmm8, reg2
> -# endif
> +
> TOLOWER (%xmm1, %xmm2)
> #else
> # define TOLOWER(reg1, reg2)
> #endif
> - pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char checks */
> - pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */
> - pcmpeqb %xmm2, D(%xmm1) /* compare first 16 bytes for equality */
> - psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/
> + pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
> + pcmpeqb %xmm1, %xmm0 /* Any null chars? */
> + pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
> + psubb %xmm0, %xmm1 /* packed sub of comparison results*/
> pmovmskb %xmm1, %edx
> sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
> jnz LABEL(less16bytes)/* If not, find different value or null char */
> @@ -245,7 +215,7 @@ LABEL(crosscache):
> xor %r8d, %r8d
> and $0xf, %ecx /* offset of rsi */
> and $0xf, %eax /* offset of rdi */
> - pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char check */
> + pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
> cmp %eax, %ecx
> je LABEL(ashr_0) /* rsi and rdi relative offset same */
> ja LABEL(bigger)
> @@ -259,7 +229,7 @@ LABEL(bigger):
> sub %rcx, %r9
> lea LABEL(unaligned_table)(%rip), %r10
> movslq (%r10, %r9,4), %r9
> - pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */
> + pcmpeqb %xmm1, %xmm0 /* Any null chars? */
> lea (%r10, %r9), %r10
> _CET_NOTRACK jmp *%r10 /* jump to corresponding case */
>
> @@ -272,15 +242,15 @@ LABEL(bigger):
> LABEL(ashr_0):
>
> movdqa (%rsi), %xmm1
> - pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */
> + pcmpeqb %xmm1, %xmm0 /* Any null chars? */
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> - pcmpeqb (%rdi), D(%xmm1) /* compare 16 bytes for equality */
> + pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
> #else
> movdqa (%rdi), %xmm2
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm2, D(%xmm1) /* compare 16 bytes for equality */
> + pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
> #endif
> - psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/
> + psubb %xmm0, %xmm1 /* packed sub of comparison results*/
> pmovmskb %xmm1, %r9d
> shr %cl, %edx /* adjust 0xffff for offset */
> shr %cl, %r9d /* adjust for 16-byte offset */
> @@ -360,10 +330,10 @@ LABEL(ashr_0_exit_use):
> */
> .p2align 4
> LABEL(ashr_1):
> - pslldq $15, D(%xmm2) /* shift first string to align with second */
> + pslldq $15, %xmm2 /* shift first string to align with second */
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm1, D(%xmm2) /* compare 16 bytes for equality */
> - psubb %xmm0, D(%xmm2) /* packed sub of comparison results*/
> + pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
> + psubb %xmm0, %xmm2 /* packed sub of comparison results*/
> pmovmskb %xmm2, %r9d
> shr %cl, %edx /* adjust 0xffff for offset */
> shr %cl, %r9d /* adjust for 16-byte offset */
> @@ -391,7 +361,7 @@ LABEL(loop_ashr_1_use):
>
> LABEL(nibble_ashr_1_restart_use):
> movdqa (%rdi, %rdx), %xmm0
> - palignr $1, -16(%rdi, %rdx), D(%xmm0)
> + palignr $1, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a,(%rsi,%rdx), %xmm0
> #else
> @@ -410,7 +380,7 @@ LABEL(nibble_ashr_1_restart_use):
> jg LABEL(nibble_ashr_1_use)
>
> movdqa (%rdi, %rdx), %xmm0
> - palignr $1, -16(%rdi, %rdx), D(%xmm0)
> + palignr $1, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a,(%rsi,%rdx), %xmm0
> #else
> @@ -430,7 +400,7 @@ LABEL(nibble_ashr_1_restart_use):
> LABEL(nibble_ashr_1_use):
> sub $0x1000, %r10
> movdqa -16(%rdi, %rdx), %xmm0
> - psrldq $1, D(%xmm0)
> + psrldq $1, %xmm0
> pcmpistri $0x3a,%xmm0, %xmm0
> #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
> cmp %r11, %rcx
> @@ -448,10 +418,10 @@ LABEL(nibble_ashr_1_use):
> */
> .p2align 4
> LABEL(ashr_2):
> - pslldq $14, D(%xmm2)
> + pslldq $14, %xmm2
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm1, D(%xmm2)
> - psubb %xmm0, D(%xmm2)
> + pcmpeqb %xmm1, %xmm2
> + psubb %xmm0, %xmm2
> pmovmskb %xmm2, %r9d
> shr %cl, %edx
> shr %cl, %r9d
> @@ -479,7 +449,7 @@ LABEL(loop_ashr_2_use):
>
> LABEL(nibble_ashr_2_restart_use):
> movdqa (%rdi, %rdx), %xmm0
> - palignr $2, -16(%rdi, %rdx), D(%xmm0)
> + palignr $2, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a,(%rsi,%rdx), %xmm0
> #else
> @@ -498,7 +468,7 @@ LABEL(nibble_ashr_2_restart_use):
> jg LABEL(nibble_ashr_2_use)
>
> movdqa (%rdi, %rdx), %xmm0
> - palignr $2, -16(%rdi, %rdx), D(%xmm0)
> + palignr $2, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a,(%rsi,%rdx), %xmm0
> #else
> @@ -518,7 +488,7 @@ LABEL(nibble_ashr_2_restart_use):
> LABEL(nibble_ashr_2_use):
> sub $0x1000, %r10
> movdqa -16(%rdi, %rdx), %xmm0
> - psrldq $2, D(%xmm0)
> + psrldq $2, %xmm0
> pcmpistri $0x3a,%xmm0, %xmm0
> #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
> cmp %r11, %rcx
> @@ -536,10 +506,10 @@ LABEL(nibble_ashr_2_use):
> */
> .p2align 4
> LABEL(ashr_3):
> - pslldq $13, D(%xmm2)
> + pslldq $13, %xmm2
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm1, D(%xmm2)
> - psubb %xmm0, D(%xmm2)
> + pcmpeqb %xmm1, %xmm2
> + psubb %xmm0, %xmm2
> pmovmskb %xmm2, %r9d
> shr %cl, %edx
> shr %cl, %r9d
> @@ -567,7 +537,7 @@ LABEL(loop_ashr_3_use):
>
> LABEL(nibble_ashr_3_restart_use):
> movdqa (%rdi, %rdx), %xmm0
> - palignr $3, -16(%rdi, %rdx), D(%xmm0)
> + palignr $3, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a,(%rsi,%rdx), %xmm0
> #else
> @@ -586,7 +556,7 @@ LABEL(nibble_ashr_3_restart_use):
> jg LABEL(nibble_ashr_3_use)
>
> movdqa (%rdi, %rdx), %xmm0
> - palignr $3, -16(%rdi, %rdx), D(%xmm0)
> + palignr $3, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a,(%rsi,%rdx), %xmm0
> #else
> @@ -606,7 +576,7 @@ LABEL(nibble_ashr_3_restart_use):
> LABEL(nibble_ashr_3_use):
> sub $0x1000, %r10
> movdqa -16(%rdi, %rdx), %xmm0
> - psrldq $3, D(%xmm0)
> + psrldq $3, %xmm0
> pcmpistri $0x3a,%xmm0, %xmm0
> #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
> cmp %r11, %rcx
> @@ -624,10 +594,10 @@ LABEL(nibble_ashr_3_use):
> */
> .p2align 4
> LABEL(ashr_4):
> - pslldq $12, D(%xmm2)
> + pslldq $12, %xmm2
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm1, D(%xmm2)
> - psubb %xmm0, D(%xmm2)
> + pcmpeqb %xmm1, %xmm2
> + psubb %xmm0, %xmm2
> pmovmskb %xmm2, %r9d
> shr %cl, %edx
> shr %cl, %r9d
> @@ -656,7 +626,7 @@ LABEL(loop_ashr_4_use):
>
> LABEL(nibble_ashr_4_restart_use):
> movdqa (%rdi, %rdx), %xmm0
> - palignr $4, -16(%rdi, %rdx), D(%xmm0)
> + palignr $4, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a,(%rsi,%rdx), %xmm0
> #else
> @@ -675,7 +645,7 @@ LABEL(nibble_ashr_4_restart_use):
> jg LABEL(nibble_ashr_4_use)
>
> movdqa (%rdi, %rdx), %xmm0
> - palignr $4, -16(%rdi, %rdx), D(%xmm0)
> + palignr $4, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a,(%rsi,%rdx), %xmm0
> #else
> @@ -695,7 +665,7 @@ LABEL(nibble_ashr_4_restart_use):
> LABEL(nibble_ashr_4_use):
> sub $0x1000, %r10
> movdqa -16(%rdi, %rdx), %xmm0
> - psrldq $4, D(%xmm0)
> + psrldq $4, %xmm0
> pcmpistri $0x3a,%xmm0, %xmm0
> #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
> cmp %r11, %rcx
> @@ -713,10 +683,10 @@ LABEL(nibble_ashr_4_use):
> */
> .p2align 4
> LABEL(ashr_5):
> - pslldq $11, D(%xmm2)
> + pslldq $11, %xmm2
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm1, D(%xmm2)
> - psubb %xmm0, D(%xmm2)
> + pcmpeqb %xmm1, %xmm2
> + psubb %xmm0, %xmm2
> pmovmskb %xmm2, %r9d
> shr %cl, %edx
> shr %cl, %r9d
> @@ -745,7 +715,7 @@ LABEL(loop_ashr_5_use):
>
> LABEL(nibble_ashr_5_restart_use):
> movdqa (%rdi, %rdx), %xmm0
> - palignr $5, -16(%rdi, %rdx), D(%xmm0)
> + palignr $5, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a,(%rsi,%rdx), %xmm0
> #else
> @@ -765,7 +735,7 @@ LABEL(nibble_ashr_5_restart_use):
>
> movdqa (%rdi, %rdx), %xmm0
>
> - palignr $5, -16(%rdi, %rdx), D(%xmm0)
> + palignr $5, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a,(%rsi,%rdx), %xmm0
> #else
> @@ -785,7 +755,7 @@ LABEL(nibble_ashr_5_restart_use):
> LABEL(nibble_ashr_5_use):
> sub $0x1000, %r10
> movdqa -16(%rdi, %rdx), %xmm0
> - psrldq $5, D(%xmm0)
> + psrldq $5, %xmm0
> pcmpistri $0x3a,%xmm0, %xmm0
> #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
> cmp %r11, %rcx
> @@ -803,10 +773,10 @@ LABEL(nibble_ashr_5_use):
> */
> .p2align 4
> LABEL(ashr_6):
> - pslldq $10, D(%xmm2)
> + pslldq $10, %xmm2
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm1, D(%xmm2)
> - psubb %xmm0, D(%xmm2)
> + pcmpeqb %xmm1, %xmm2
> + psubb %xmm0, %xmm2
> pmovmskb %xmm2, %r9d
> shr %cl, %edx
> shr %cl, %r9d
> @@ -835,7 +805,7 @@ LABEL(loop_ashr_6_use):
>
> LABEL(nibble_ashr_6_restart_use):
> movdqa (%rdi, %rdx), %xmm0
> - palignr $6, -16(%rdi, %rdx), D(%xmm0)
> + palignr $6, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a,(%rsi,%rdx), %xmm0
> #else
> @@ -854,7 +824,7 @@ LABEL(nibble_ashr_6_restart_use):
> jg LABEL(nibble_ashr_6_use)
>
> movdqa (%rdi, %rdx), %xmm0
> - palignr $6, -16(%rdi, %rdx), D(%xmm0)
> + palignr $6, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a,(%rsi,%rdx), %xmm0
> #else
> @@ -874,7 +844,7 @@ LABEL(nibble_ashr_6_restart_use):
> LABEL(nibble_ashr_6_use):
> sub $0x1000, %r10
> movdqa -16(%rdi, %rdx), %xmm0
> - psrldq $6, D(%xmm0)
> + psrldq $6, %xmm0
> pcmpistri $0x3a,%xmm0, %xmm0
> #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
> cmp %r11, %rcx
> @@ -892,10 +862,10 @@ LABEL(nibble_ashr_6_use):
> */
> .p2align 4
> LABEL(ashr_7):
> - pslldq $9, D(%xmm2)
> + pslldq $9, %xmm2
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm1, D(%xmm2)
> - psubb %xmm0, D(%xmm2)
> + pcmpeqb %xmm1, %xmm2
> + psubb %xmm0, %xmm2
> pmovmskb %xmm2, %r9d
> shr %cl, %edx
> shr %cl, %r9d
> @@ -924,7 +894,7 @@ LABEL(loop_ashr_7_use):
>
> LABEL(nibble_ashr_7_restart_use):
> movdqa (%rdi, %rdx), %xmm0
> - palignr $7, -16(%rdi, %rdx), D(%xmm0)
> + palignr $7, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a,(%rsi,%rdx), %xmm0
> #else
> @@ -943,7 +913,7 @@ LABEL(nibble_ashr_7_restart_use):
> jg LABEL(nibble_ashr_7_use)
>
> movdqa (%rdi, %rdx), %xmm0
> - palignr $7, -16(%rdi, %rdx), D(%xmm0)
> + palignr $7, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a,(%rsi,%rdx), %xmm0
> #else
> @@ -963,7 +933,7 @@ LABEL(nibble_ashr_7_restart_use):
> LABEL(nibble_ashr_7_use):
> sub $0x1000, %r10
> movdqa -16(%rdi, %rdx), %xmm0
> - psrldq $7, D(%xmm0)
> + psrldq $7, %xmm0
> pcmpistri $0x3a,%xmm0, %xmm0
> #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
> cmp %r11, %rcx
> @@ -981,10 +951,10 @@ LABEL(nibble_ashr_7_use):
> */
> .p2align 4
> LABEL(ashr_8):
> - pslldq $8, D(%xmm2)
> + pslldq $8, %xmm2
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm1, D(%xmm2)
> - psubb %xmm0, D(%xmm2)
> + pcmpeqb %xmm1, %xmm2
> + psubb %xmm0, %xmm2
> pmovmskb %xmm2, %r9d
> shr %cl, %edx
> shr %cl, %r9d
> @@ -1013,7 +983,7 @@ LABEL(loop_ashr_8_use):
>
> LABEL(nibble_ashr_8_restart_use):
> movdqa (%rdi, %rdx), %xmm0
> - palignr $8, -16(%rdi, %rdx), D(%xmm0)
> + palignr $8, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1032,7 +1002,7 @@ LABEL(nibble_ashr_8_restart_use):
> jg LABEL(nibble_ashr_8_use)
>
> movdqa (%rdi, %rdx), %xmm0
> - palignr $8, -16(%rdi, %rdx), D(%xmm0)
> + palignr $8, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1052,7 +1022,7 @@ LABEL(nibble_ashr_8_restart_use):
> LABEL(nibble_ashr_8_use):
> sub $0x1000, %r10
> movdqa -16(%rdi, %rdx), %xmm0
> - psrldq $8, D(%xmm0)
> + psrldq $8, %xmm0
> pcmpistri $0x3a,%xmm0, %xmm0
> #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
> cmp %r11, %rcx
> @@ -1070,10 +1040,10 @@ LABEL(nibble_ashr_8_use):
> */
> .p2align 4
> LABEL(ashr_9):
> - pslldq $7, D(%xmm2)
> + pslldq $7, %xmm2
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm1, D(%xmm2)
> - psubb %xmm0, D(%xmm2)
> + pcmpeqb %xmm1, %xmm2
> + psubb %xmm0, %xmm2
> pmovmskb %xmm2, %r9d
> shr %cl, %edx
> shr %cl, %r9d
> @@ -1103,7 +1073,7 @@ LABEL(loop_ashr_9_use):
> LABEL(nibble_ashr_9_restart_use):
> movdqa (%rdi, %rdx), %xmm0
>
> - palignr $9, -16(%rdi, %rdx), D(%xmm0)
> + palignr $9, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1122,7 +1092,7 @@ LABEL(nibble_ashr_9_restart_use):
> jg LABEL(nibble_ashr_9_use)
>
> movdqa (%rdi, %rdx), %xmm0
> - palignr $9, -16(%rdi, %rdx), D(%xmm0)
> + palignr $9, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1142,7 +1112,7 @@ LABEL(nibble_ashr_9_restart_use):
> LABEL(nibble_ashr_9_use):
> sub $0x1000, %r10
> movdqa -16(%rdi, %rdx), %xmm0
> - psrldq $9, D(%xmm0)
> + psrldq $9, %xmm0
> pcmpistri $0x3a,%xmm0, %xmm0
> #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
> cmp %r11, %rcx
> @@ -1160,10 +1130,10 @@ LABEL(nibble_ashr_9_use):
> */
> .p2align 4
> LABEL(ashr_10):
> - pslldq $6, D(%xmm2)
> + pslldq $6, %xmm2
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm1, D(%xmm2)
> - psubb %xmm0, D(%xmm2)
> + pcmpeqb %xmm1, %xmm2
> + psubb %xmm0, %xmm2
> pmovmskb %xmm2, %r9d
> shr %cl, %edx
> shr %cl, %r9d
> @@ -1192,7 +1162,7 @@ LABEL(loop_ashr_10_use):
>
> LABEL(nibble_ashr_10_restart_use):
> movdqa (%rdi, %rdx), %xmm0
> - palignr $10, -16(%rdi, %rdx), D(%xmm0)
> + palignr $10, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1211,7 +1181,7 @@ LABEL(nibble_ashr_10_restart_use):
> jg LABEL(nibble_ashr_10_use)
>
> movdqa (%rdi, %rdx), %xmm0
> - palignr $10, -16(%rdi, %rdx), D(%xmm0)
> + palignr $10, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1231,7 +1201,7 @@ LABEL(nibble_ashr_10_restart_use):
> LABEL(nibble_ashr_10_use):
> sub $0x1000, %r10
> movdqa -16(%rdi, %rdx), %xmm0
> - psrldq $10, D(%xmm0)
> + psrldq $10, %xmm0
> pcmpistri $0x3a,%xmm0, %xmm0
> #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
> cmp %r11, %rcx
> @@ -1249,10 +1219,10 @@ LABEL(nibble_ashr_10_use):
> */
> .p2align 4
> LABEL(ashr_11):
> - pslldq $5, D(%xmm2)
> + pslldq $5, %xmm2
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm1, D(%xmm2)
> - psubb %xmm0, D(%xmm2)
> + pcmpeqb %xmm1, %xmm2
> + psubb %xmm0, %xmm2
> pmovmskb %xmm2, %r9d
> shr %cl, %edx
> shr %cl, %r9d
> @@ -1281,7 +1251,7 @@ LABEL(loop_ashr_11_use):
>
> LABEL(nibble_ashr_11_restart_use):
> movdqa (%rdi, %rdx), %xmm0
> - palignr $11, -16(%rdi, %rdx), D(%xmm0)
> + palignr $11, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1300,7 +1270,7 @@ LABEL(nibble_ashr_11_restart_use):
> jg LABEL(nibble_ashr_11_use)
>
> movdqa (%rdi, %rdx), %xmm0
> - palignr $11, -16(%rdi, %rdx), D(%xmm0)
> + palignr $11, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1320,7 +1290,7 @@ LABEL(nibble_ashr_11_restart_use):
> LABEL(nibble_ashr_11_use):
> sub $0x1000, %r10
> movdqa -16(%rdi, %rdx), %xmm0
> - psrldq $11, D(%xmm0)
> + psrldq $11, %xmm0
> pcmpistri $0x3a,%xmm0, %xmm0
> #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
> cmp %r11, %rcx
> @@ -1338,10 +1308,10 @@ LABEL(nibble_ashr_11_use):
> */
> .p2align 4
> LABEL(ashr_12):
> - pslldq $4, D(%xmm2)
> + pslldq $4, %xmm2
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm1, D(%xmm2)
> - psubb %xmm0, D(%xmm2)
> + pcmpeqb %xmm1, %xmm2
> + psubb %xmm0, %xmm2
> pmovmskb %xmm2, %r9d
> shr %cl, %edx
> shr %cl, %r9d
> @@ -1370,7 +1340,7 @@ LABEL(loop_ashr_12_use):
>
> LABEL(nibble_ashr_12_restart_use):
> movdqa (%rdi, %rdx), %xmm0
> - palignr $12, -16(%rdi, %rdx), D(%xmm0)
> + palignr $12, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1389,7 +1359,7 @@ LABEL(nibble_ashr_12_restart_use):
> jg LABEL(nibble_ashr_12_use)
>
> movdqa (%rdi, %rdx), %xmm0
> - palignr $12, -16(%rdi, %rdx), D(%xmm0)
> + palignr $12, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1409,7 +1379,7 @@ LABEL(nibble_ashr_12_restart_use):
> LABEL(nibble_ashr_12_use):
> sub $0x1000, %r10
> movdqa -16(%rdi, %rdx), %xmm0
> - psrldq $12, D(%xmm0)
> + psrldq $12, %xmm0
> pcmpistri $0x3a,%xmm0, %xmm0
> #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
> cmp %r11, %rcx
> @@ -1427,10 +1397,10 @@ LABEL(nibble_ashr_12_use):
> */
> .p2align 4
> LABEL(ashr_13):
> - pslldq $3, D(%xmm2)
> + pslldq $3, %xmm2
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm1, D(%xmm2)
> - psubb %xmm0, D(%xmm2)
> + pcmpeqb %xmm1, %xmm2
> + psubb %xmm0, %xmm2
> pmovmskb %xmm2, %r9d
> shr %cl, %edx
> shr %cl, %r9d
> @@ -1460,7 +1430,7 @@ LABEL(loop_ashr_13_use):
>
> LABEL(nibble_ashr_13_restart_use):
> movdqa (%rdi, %rdx), %xmm0
> - palignr $13, -16(%rdi, %rdx), D(%xmm0)
> + palignr $13, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1479,7 +1449,7 @@ LABEL(nibble_ashr_13_restart_use):
> jg LABEL(nibble_ashr_13_use)
>
> movdqa (%rdi, %rdx), %xmm0
> - palignr $13, -16(%rdi, %rdx), D(%xmm0)
> + palignr $13, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1499,7 +1469,7 @@ LABEL(nibble_ashr_13_restart_use):
> LABEL(nibble_ashr_13_use):
> sub $0x1000, %r10
> movdqa -16(%rdi, %rdx), %xmm0
> - psrldq $13, D(%xmm0)
> + psrldq $13, %xmm0
> pcmpistri $0x3a,%xmm0, %xmm0
> #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
> cmp %r11, %rcx
> @@ -1517,10 +1487,10 @@ LABEL(nibble_ashr_13_use):
> */
> .p2align 4
> LABEL(ashr_14):
> - pslldq $2, D(%xmm2)
> + pslldq $2, %xmm2
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm1, D(%xmm2)
> - psubb %xmm0, D(%xmm2)
> + pcmpeqb %xmm1, %xmm2
> + psubb %xmm0, %xmm2
> pmovmskb %xmm2, %r9d
> shr %cl, %edx
> shr %cl, %r9d
> @@ -1550,7 +1520,7 @@ LABEL(loop_ashr_14_use):
>
> LABEL(nibble_ashr_14_restart_use):
> movdqa (%rdi, %rdx), %xmm0
> - palignr $14, -16(%rdi, %rdx), D(%xmm0)
> + palignr $14, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1569,7 +1539,7 @@ LABEL(nibble_ashr_14_restart_use):
> jg LABEL(nibble_ashr_14_use)
>
> movdqa (%rdi, %rdx), %xmm0
> - palignr $14, -16(%rdi, %rdx), D(%xmm0)
> + palignr $14, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1589,7 +1559,7 @@ LABEL(nibble_ashr_14_restart_use):
> LABEL(nibble_ashr_14_use):
> sub $0x1000, %r10
> movdqa -16(%rdi, %rdx), %xmm0
> - psrldq $14, D(%xmm0)
> + psrldq $14, %xmm0
> pcmpistri $0x3a,%xmm0, %xmm0
> #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
> cmp %r11, %rcx
> @@ -1607,10 +1577,10 @@ LABEL(nibble_ashr_14_use):
> */
> .p2align 4
> LABEL(ashr_15):
> - pslldq $1, D(%xmm2)
> + pslldq $1, %xmm2
> TOLOWER (%xmm1, %xmm2)
> - pcmpeqb %xmm1, D(%xmm2)
> - psubb %xmm0, D(%xmm2)
> + pcmpeqb %xmm1, %xmm2
> + psubb %xmm0, %xmm2
> pmovmskb %xmm2, %r9d
> shr %cl, %edx
> shr %cl, %r9d
> @@ -1642,7 +1612,7 @@ LABEL(loop_ashr_15_use):
>
> LABEL(nibble_ashr_15_restart_use):
> movdqa (%rdi, %rdx), %xmm0
> - palignr $15, -16(%rdi, %rdx), D(%xmm0)
> + palignr $15, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1661,7 +1631,7 @@ LABEL(nibble_ashr_15_restart_use):
> jg LABEL(nibble_ashr_15_use)
>
> movdqa (%rdi, %rdx), %xmm0
> - palignr $15, -16(%rdi, %rdx), D(%xmm0)
> + palignr $15, -16(%rdi, %rdx), %xmm0
> #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
> pcmpistri $0x1a, (%rsi,%rdx), %xmm0
> #else
> @@ -1681,7 +1651,7 @@ LABEL(nibble_ashr_15_restart_use):
> LABEL(nibble_ashr_15_use):
> sub $0x1000, %r10
> movdqa -16(%rdi, %rdx), %xmm0
> - psrldq $15, D(%xmm0)
> + psrldq $15, %xmm0
> pcmpistri $0x3a,%xmm0, %xmm0
> #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
> cmp %r11, %rcx
> diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx.S b/sysdeps/x86_64/multiarch/strncase_l-avx.S
> deleted file mode 100644
> index b51b86d223..0000000000
> --- a/sysdeps/x86_64/multiarch/strncase_l-avx.S
> +++ /dev/null
> @@ -1,22 +0,0 @@
> -/* strncasecmp_l optimized with AVX.
> - Copyright (C) 2017-2022 Free Software Foundation, Inc.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <https://www.gnu.org/licenses/>. */
> -
> -#define STRCMP_SSE42 __strncasecmp_l_avx
> -#define USE_AVX 1
> -#define USE_AS_STRNCASECMP_L
> -#include "strcmp-sse42.S"
> --
> 2.25.1
>
LGTM.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
Thanks.
--
H.J.
next prev parent reply other threads:[~2022-03-24 19:05 UTC|newest]
Thread overview: 76+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-03-23 21:57 [PATCH v1 01/23] benchtests: Use json-lib in bench-strchr.c Noah Goldstein
2022-03-23 21:57 ` [PATCH v1 02/23] benchtests: Add random benchmark " Noah Goldstein
2022-03-24 18:44 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 03/23] x86: Code cleanup in strchr-avx2 and comment justifying branch Noah Goldstein
2022-03-24 18:53 ` H.J. Lu
2022-03-24 19:20 ` Noah Goldstein
2022-03-24 19:36 ` H.J. Lu
2022-05-12 19:31 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 04/23] x86: Code cleanup in strchr-evex " Noah Goldstein
2022-03-24 18:54 ` H.J. Lu
2022-05-12 19:32 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 05/23] benchtests: Use json-lib in bench-strpbrk.c Noah Goldstein
2022-03-24 18:54 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 06/23] benchtests: Use json-lib in bench-strspn.c Noah Goldstein
2022-03-24 18:54 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 07/23] x86: Optimize strcspn and strpbrk in strcspn-c.c Noah Goldstein
2022-03-24 18:55 ` H.J. Lu
2022-05-12 19:34 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 08/23] x86: Optimize strspn in strspn-c.c Noah Goldstein
2022-03-24 18:56 ` H.J. Lu
2022-05-12 19:39 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 09/23] x86: Remove strcspn-sse2.S and use the generic implementation Noah Goldstein
2022-03-24 18:57 ` H.J. Lu
2022-05-12 19:40 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 10/23] x86: Remove strpbrk-sse2.S " Noah Goldstein
2022-03-24 18:57 ` H.J. Lu
2022-05-12 19:41 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 11/23] x86: Remove strspn-sse2.S " Noah Goldstein
2022-03-24 18:57 ` H.J. Lu
2022-05-12 19:42 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 12/23] x86: Fix fallback for wcsncmp_avx2 in strcmp-avx2.S [BZ #28896] Noah Goldstein
2022-03-24 18:59 ` H.J. Lu
2022-03-24 19:18 ` Noah Goldstein
2022-03-24 19:34 ` H.J. Lu
2022-03-24 19:39 ` Noah Goldstein
2022-03-24 20:50 ` [PATCH v2 12/31] " Noah Goldstein
2022-03-24 21:26 ` H.J. Lu
2022-03-24 21:43 ` Noah Goldstein
2022-03-24 21:58 ` H.J. Lu
2022-05-04 6:05 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 13/23] benchtests: Use json-lib in bench-strcasecmp.c Noah Goldstein
2022-03-24 19:00 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 14/23] benchtests: Use json-lib in bench-strncasecmp.c Noah Goldstein
2022-03-24 19:00 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 15/23] string: Expand page cross tests in test-strcasecmp.c Noah Goldstein
2022-03-24 19:01 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 16/23] string: Expand page cross tests in test-strncasecmp.c Noah Goldstein
2022-03-24 19:01 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 17/23] x86: Optimize str{n}casecmp TOLOWER logic in strcmp.S Noah Goldstein
2022-03-24 19:02 ` H.J. Lu
2022-05-12 19:44 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 18/23] x86: Optimize str{n}casecmp TOLOWER logic in strcmp-sse42.S Noah Goldstein
2022-03-24 19:02 ` H.J. Lu
2022-05-12 19:45 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 19/23] string: Expand page cross test cases in test-strcmp.c Noah Goldstein
2022-03-24 19:02 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 20/23] string: Expand page cross test cases in test-strncmp.c Noah Goldstein
2022-03-24 19:02 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 21/23] x86: Add AVX2 optimized str{n}casecmp Noah Goldstein
2022-03-24 19:03 ` H.J. Lu
2022-03-24 22:41 ` [PATCH v3 " Noah Goldstein
2022-03-24 22:41 ` [PATCH v3 22/23] x86: Add EVEX " Noah Goldstein
2022-03-24 23:56 ` [PATCH v4 21/23] x86: Add AVX2 " Noah Goldstein
2022-03-24 23:56 ` [PATCH v4 22/23] x86: Add EVEX " Noah Goldstein
2022-03-25 18:15 ` H.J. Lu
2022-03-25 18:18 ` Noah Goldstein
2022-05-12 19:47 ` Sunil Pandey
2022-05-12 19:52 ` Sunil Pandey
2022-03-25 18:14 ` [PATCH v4 21/23] x86: Add AVX2 " H.J. Lu
2022-05-12 19:52 ` Sunil Pandey
2022-03-23 21:57 ` [PATCH v1 22/23] x86: Add EVEX " Noah Goldstein
2022-03-24 19:04 ` H.J. Lu
2022-03-23 21:57 ` [PATCH v1 23/23] x86: Remove AVX str{n}casecmp Noah Goldstein
2022-03-24 19:04 ` H.J. Lu [this message]
2022-05-12 19:54 ` Sunil Pandey
2022-03-24 18:43 ` [PATCH v1 01/23] benchtests: Use json-lib in bench-strchr.c H.J. Lu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAMe9rOpzEL=V1OmUFJuScNetUc3mgMqYeqcqiD9aK+tBTN_sxQ@mail.gmail.com' \
--to=hjl.tools@gmail.com \
--cc=carlos@systemhalted.org \
--cc=goldstein.w.n@gmail.com \
--cc=libc-alpha@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).