public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: "H.J. Lu" <hjl.tools@gmail.com>
To: Noah Goldstein <goldstein.w.n@gmail.com>
Cc: libc-alpha@sourceware.org, carlos@systemhalted.org
Subject: Re: [PATCH v1 5/6] x86: Remove str{n}cat-ssse3
Date: Fri, 25 Mar 2022 12:57:15 -0700	[thread overview]
Message-ID: <CAMe9rOpcfLQc5YUKFbofBVADv1W8CuVymCyp5tfE--+Zbtet0g@mail.gmail.com> (raw)
In-Reply-To: <20220325183625.1170867-5-goldstein.w.n@gmail.com>

On Fri, Mar 25, 2022 at 11:36 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> With SSE2, SSE4.1, AVX2, and EVEX versions very few targets prefer
> SSSE3. As a result its no longer with the code size cost.
> ---
>  sysdeps/x86_64/multiarch/Makefile          |   2 -
>  sysdeps/x86_64/multiarch/ifunc-impl-list.c |   4 -
>  sysdeps/x86_64/multiarch/ifunc-strcpy.h    |   4 -
>  sysdeps/x86_64/multiarch/strcat-ssse3.S    | 866 ---------------------
>  sysdeps/x86_64/multiarch/strncat-ssse3.S   |   3 -
>  5 files changed, 879 deletions(-)
>  delete mode 100644 sysdeps/x86_64/multiarch/strcat-ssse3.S
>  delete mode 100644 sysdeps/x86_64/multiarch/strncat-ssse3.S
>
> diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> index 323be3b969..a2ebc06c5f 100644
> --- a/sysdeps/x86_64/multiarch/Makefile
> +++ b/sysdeps/x86_64/multiarch/Makefile
> @@ -59,7 +59,6 @@ sysdep_routines += \
>    strcat-evex \
>    strcat-sse2 \
>    strcat-sse2-unaligned \
> -  strcat-ssse3 \
>    strchr-avx2 \
>    strchr-avx2-rtm \
>    strchr-evex \
> @@ -97,7 +96,6 @@ sysdep_routines += \
>    strncat-c \
>    strncat-evex \
>    strncat-sse2-unaligned \
> -  strncat-ssse3 \
>    strncmp-avx2 \
>    strncmp-avx2-rtm \
>    strncmp-evex \
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index d6852ab365..4133ed7e43 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -471,8 +471,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>                               (CPU_FEATURE_USABLE (AVX512VL)
>                                && CPU_FEATURE_USABLE (AVX512BW)),
>                               __strcat_evex)
> -             IFUNC_IMPL_ADD (array, i, strcat, CPU_FEATURE_USABLE (SSSE3),
> -                             __strcat_ssse3)
>               IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned)
>               IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2))
>
> @@ -620,8 +618,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>                               (CPU_FEATURE_USABLE (AVX512VL)
>                                && CPU_FEATURE_USABLE (AVX512BW)),
>                               __strncat_evex)
> -             IFUNC_IMPL_ADD (array, i, strncat, CPU_FEATURE_USABLE (SSSE3),
> -                             __strncat_ssse3)
>               IFUNC_IMPL_ADD (array, i, strncat, 1,
>                               __strncat_sse2_unaligned)
>               IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
> diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> index 5bece38f78..a15afa44e9 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> @@ -23,7 +23,6 @@
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
>    attribute_hidden;
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
> @@ -50,8 +49,5 @@ IFUNC_SELECTOR (void)
>    if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
>      return OPTIMIZE (sse2_unaligned);
>
> -  if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
> -    return OPTIMIZE (ssse3);
> -
>    return OPTIMIZE (sse2);
>  }
> diff --git a/sysdeps/x86_64/multiarch/strcat-ssse3.S b/sysdeps/x86_64/multiarch/strcat-ssse3.S
> deleted file mode 100644
> index 9f39e4fcd1..0000000000
> --- a/sysdeps/x86_64/multiarch/strcat-ssse3.S
> +++ /dev/null
> @@ -1,866 +0,0 @@
> -/* strcat with SSSE3
> -   Copyright (C) 2011-2022 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library; if not, see
> -   <https://www.gnu.org/licenses/>.  */
> -
> -#if IS_IN (libc)
> -
> -# include <sysdep.h>
> -
> -# ifndef STRCAT
> -#  define STRCAT  __strcat_ssse3
> -# endif
> -
> -# define USE_AS_STRCAT
> -
> -.text
> -ENTRY (STRCAT)
> -# ifdef USE_AS_STRNCAT
> -       mov     %rdx, %r8
> -# endif
> -
> -
> -/* Inline corresponding strlen file, temporary until new strcpy
> -   implementation gets merged.  */
> -
> -       xor     %eax, %eax
> -       cmpb    $0, (%rdi)
> -       jz      L(exit_tail0)
> -       cmpb    $0, 1(%rdi)
> -       jz      L(exit_tail1)
> -       cmpb    $0, 2(%rdi)
> -       jz      L(exit_tail2)
> -       cmpb    $0, 3(%rdi)
> -       jz      L(exit_tail3)
> -
> -       cmpb    $0, 4(%rdi)
> -       jz      L(exit_tail4)
> -       cmpb    $0, 5(%rdi)
> -       jz      L(exit_tail5)
> -       cmpb    $0, 6(%rdi)
> -       jz      L(exit_tail6)
> -       cmpb    $0, 7(%rdi)
> -       jz      L(exit_tail7)
> -
> -       cmpb    $0, 8(%rdi)
> -       jz      L(exit_tail8)
> -       cmpb    $0, 9(%rdi)
> -       jz      L(exit_tail9)
> -       cmpb    $0, 10(%rdi)
> -       jz      L(exit_tail10)
> -       cmpb    $0, 11(%rdi)
> -       jz      L(exit_tail11)
> -
> -       cmpb    $0, 12(%rdi)
> -       jz      L(exit_tail12)
> -       cmpb    $0, 13(%rdi)
> -       jz      L(exit_tail13)
> -       cmpb    $0, 14(%rdi)
> -       jz      L(exit_tail14)
> -       cmpb    $0, 15(%rdi)
> -       jz      L(exit_tail15)
> -       pxor    %xmm0, %xmm0
> -       lea     16(%rdi), %rcx
> -       lea     16(%rdi), %rax
> -       and     $-16, %rax
> -
> -       pcmpeqb (%rax), %xmm0
> -       pmovmskb %xmm0, %edx
> -       pxor    %xmm1, %xmm1
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       pcmpeqb (%rax), %xmm1
> -       pmovmskb %xmm1, %edx
> -       pxor    %xmm2, %xmm2
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       pcmpeqb (%rax), %xmm2
> -       pmovmskb %xmm2, %edx
> -       pxor    %xmm3, %xmm3
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       pcmpeqb (%rax), %xmm3
> -       pmovmskb %xmm3, %edx
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       pcmpeqb (%rax), %xmm0
> -       pmovmskb %xmm0, %edx
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       pcmpeqb (%rax), %xmm1
> -       pmovmskb %xmm1, %edx
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       pcmpeqb (%rax), %xmm2
> -       pmovmskb %xmm2, %edx
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       pcmpeqb (%rax), %xmm3
> -       pmovmskb %xmm3, %edx
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       pcmpeqb (%rax), %xmm0
> -       pmovmskb %xmm0, %edx
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       pcmpeqb (%rax), %xmm1
> -       pmovmskb %xmm1, %edx
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       pcmpeqb (%rax), %xmm2
> -       pmovmskb %xmm2, %edx
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       pcmpeqb (%rax), %xmm3
> -       pmovmskb %xmm3, %edx
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       pcmpeqb (%rax), %xmm0
> -       pmovmskb %xmm0, %edx
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       pcmpeqb (%rax), %xmm1
> -       pmovmskb %xmm1, %edx
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       pcmpeqb (%rax), %xmm2
> -       pmovmskb %xmm2, %edx
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       pcmpeqb (%rax), %xmm3
> -       pmovmskb %xmm3, %edx
> -       test    %edx, %edx
> -       lea     16(%rax), %rax
> -       jnz     L(exit)
> -
> -       and     $-0x40, %rax
> -
> -       .p2align 4
> -L(aligned_64):
> -       pcmpeqb (%rax), %xmm0
> -       pcmpeqb 16(%rax), %xmm1
> -       pcmpeqb 32(%rax), %xmm2
> -       pcmpeqb 48(%rax), %xmm3
> -       pmovmskb %xmm0, %edx
> -       pmovmskb %xmm1, %r11d
> -       pmovmskb %xmm2, %r10d
> -       pmovmskb %xmm3, %r9d
> -       or      %edx, %r9d
> -       or      %r11d, %r9d
> -       or      %r10d, %r9d
> -       lea     64(%rax), %rax
> -       jz      L(aligned_64)
> -
> -       test    %edx, %edx
> -       jnz     L(aligned_64_exit_16)
> -       test    %r11d, %r11d
> -       jnz     L(aligned_64_exit_32)
> -       test    %r10d, %r10d
> -       jnz     L(aligned_64_exit_48)
> -
> -L(aligned_64_exit_64):
> -       pmovmskb %xmm3, %edx
> -       jmp     L(exit)
> -
> -L(aligned_64_exit_48):
> -       lea     -16(%rax), %rax
> -       mov     %r10d, %edx
> -       jmp     L(exit)
> -
> -L(aligned_64_exit_32):
> -       lea     -32(%rax), %rax
> -       mov     %r11d, %edx
> -       jmp     L(exit)
> -
> -L(aligned_64_exit_16):
> -       lea     -48(%rax), %rax
> -
> -L(exit):
> -       sub     %rcx, %rax
> -       test    %dl, %dl
> -       jz      L(exit_high)
> -       test    $0x01, %dl
> -       jnz     L(exit_tail0)
> -
> -       test    $0x02, %dl
> -       jnz     L(exit_tail1)
> -
> -       test    $0x04, %dl
> -       jnz     L(exit_tail2)
> -
> -       test    $0x08, %dl
> -       jnz     L(exit_tail3)
> -
> -       test    $0x10, %dl
> -       jnz     L(exit_tail4)
> -
> -       test    $0x20, %dl
> -       jnz     L(exit_tail5)
> -
> -       test    $0x40, %dl
> -       jnz     L(exit_tail6)
> -       add     $7, %eax
> -L(exit_tail0):
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_high):
> -       add     $8, %eax
> -       test    $0x01, %dh
> -       jnz     L(exit_tail0)
> -
> -       test    $0x02, %dh
> -       jnz     L(exit_tail1)
> -
> -       test    $0x04, %dh
> -       jnz     L(exit_tail2)
> -
> -       test    $0x08, %dh
> -       jnz     L(exit_tail3)
> -
> -       test    $0x10, %dh
> -       jnz     L(exit_tail4)
> -
> -       test    $0x20, %dh
> -       jnz     L(exit_tail5)
> -
> -       test    $0x40, %dh
> -       jnz     L(exit_tail6)
> -       add     $7, %eax
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_tail1):
> -       add     $1, %eax
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_tail2):
> -       add     $2, %eax
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_tail3):
> -       add     $3, %eax
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_tail4):
> -       add     $4, %eax
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_tail5):
> -       add     $5, %eax
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_tail6):
> -       add     $6, %eax
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_tail7):
> -       add     $7, %eax
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_tail8):
> -       add     $8, %eax
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_tail9):
> -       add     $9, %eax
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_tail10):
> -       add     $10, %eax
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_tail11):
> -       add     $11, %eax
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_tail12):
> -       add     $12, %eax
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_tail13):
> -       add     $13, %eax
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_tail14):
> -       add     $14, %eax
> -       jmp     L(StartStrcpyPart)
> -
> -       .p2align 4
> -L(exit_tail15):
> -       add     $15, %eax
> -
> -       .p2align 4
> -L(StartStrcpyPart):
> -       mov     %rsi, %rcx
> -       lea     (%rdi, %rax), %rdx
> -# ifdef USE_AS_STRNCAT
> -       test    %r8, %r8
> -       jz      L(StrncatExit0)
> -       cmp     $8, %r8
> -       jbe     L(StrncatExit8Bytes)
> -# endif
> -       cmpb    $0, (%rcx)
> -       jz      L(Exit1)
> -       cmpb    $0, 1(%rcx)
> -       jz      L(Exit2)
> -       cmpb    $0, 2(%rcx)
> -       jz      L(Exit3)
> -       cmpb    $0, 3(%rcx)
> -       jz      L(Exit4)
> -       cmpb    $0, 4(%rcx)
> -       jz      L(Exit5)
> -       cmpb    $0, 5(%rcx)
> -       jz      L(Exit6)
> -       cmpb    $0, 6(%rcx)
> -       jz      L(Exit7)
> -       cmpb    $0, 7(%rcx)
> -       jz      L(Exit8)
> -       cmpb    $0, 8(%rcx)
> -       jz      L(Exit9)
> -# ifdef USE_AS_STRNCAT
> -       cmp     $16, %r8
> -       jb      L(StrncatExit15Bytes)
> -# endif
> -       cmpb    $0, 9(%rcx)
> -       jz      L(Exit10)
> -       cmpb    $0, 10(%rcx)
> -       jz      L(Exit11)
> -       cmpb    $0, 11(%rcx)
> -       jz      L(Exit12)
> -       cmpb    $0, 12(%rcx)
> -       jz      L(Exit13)
> -       cmpb    $0, 13(%rcx)
> -       jz      L(Exit14)
> -       cmpb    $0, 14(%rcx)
> -       jz      L(Exit15)
> -       cmpb    $0, 15(%rcx)
> -       jz      L(Exit16)
> -# ifdef USE_AS_STRNCAT
> -       cmp     $16, %r8
> -       je      L(StrncatExit16)
> -#  define USE_AS_STRNCPY
> -# endif
> -
> -# include "strcpy-ssse3.S"
> -
> -       .p2align 4
> -L(CopyFrom1To16Bytes):
> -       add     %rsi, %rdx
> -       add     %rsi, %rcx
> -
> -       test    %al, %al
> -       jz      L(ExitHigh)
> -       test    $0x01, %al
> -       jnz     L(Exit1)
> -       test    $0x02, %al
> -       jnz     L(Exit2)
> -       test    $0x04, %al
> -       jnz     L(Exit3)
> -       test    $0x08, %al
> -       jnz     L(Exit4)
> -       test    $0x10, %al
> -       jnz     L(Exit5)
> -       test    $0x20, %al
> -       jnz     L(Exit6)
> -       test    $0x40, %al
> -       jnz     L(Exit7)
> -       movlpd  (%rcx), %xmm0
> -       movlpd  %xmm0, (%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(ExitHigh):
> -       test    $0x01, %ah
> -       jnz     L(Exit9)
> -       test    $0x02, %ah
> -       jnz     L(Exit10)
> -       test    $0x04, %ah
> -       jnz     L(Exit11)
> -       test    $0x08, %ah
> -       jnz     L(Exit12)
> -       test    $0x10, %ah
> -       jnz     L(Exit13)
> -       test    $0x20, %ah
> -       jnz     L(Exit14)
> -       test    $0x40, %ah
> -       jnz     L(Exit15)
> -       movlpd  (%rcx), %xmm0
> -       movlpd  8(%rcx), %xmm1
> -       movlpd  %xmm0, (%rdx)
> -       movlpd  %xmm1, 8(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit1):
> -       xor     %ah, %ah
> -       movb    %ah, 1(%rdx)
> -L(Exit1):
> -       movb    (%rcx), %al
> -       movb    %al, (%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit2):
> -       xor     %ah, %ah
> -       movb    %ah, 2(%rdx)
> -L(Exit2):
> -       movw    (%rcx), %ax
> -       movw    %ax, (%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit3):
> -       xor     %ah, %ah
> -       movb    %ah, 3(%rdx)
> -L(Exit3):
> -       movw    (%rcx), %ax
> -       movw    %ax, (%rdx)
> -       movb    2(%rcx), %al
> -       movb    %al, 2(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit4):
> -       xor     %ah, %ah
> -       movb    %ah, 4(%rdx)
> -L(Exit4):
> -       mov     (%rcx), %eax
> -       mov     %eax, (%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit5):
> -       xor     %ah, %ah
> -       movb    %ah, 5(%rdx)
> -L(Exit5):
> -       mov     (%rcx), %eax
> -       mov     %eax, (%rdx)
> -       movb    4(%rcx), %al
> -       movb    %al, 4(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit6):
> -       xor     %ah, %ah
> -       movb    %ah, 6(%rdx)
> -L(Exit6):
> -       mov     (%rcx), %eax
> -       mov     %eax, (%rdx)
> -       movw    4(%rcx), %ax
> -       movw    %ax, 4(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit7):
> -       xor     %ah, %ah
> -       movb    %ah, 7(%rdx)
> -L(Exit7):
> -       mov     (%rcx), %eax
> -       mov     %eax, (%rdx)
> -       mov     3(%rcx), %eax
> -       mov     %eax, 3(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit8):
> -       xor     %ah, %ah
> -       movb    %ah, 8(%rdx)
> -L(Exit8):
> -       movlpd  (%rcx), %xmm0
> -       movlpd  %xmm0, (%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit9):
> -       xor     %ah, %ah
> -       movb    %ah, 9(%rdx)
> -L(Exit9):
> -       movlpd  (%rcx), %xmm0
> -       movlpd  %xmm0, (%rdx)
> -       movb    8(%rcx), %al
> -       movb    %al, 8(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit10):
> -       xor     %ah, %ah
> -       movb    %ah, 10(%rdx)
> -L(Exit10):
> -       movlpd  (%rcx), %xmm0
> -       movlpd  %xmm0, (%rdx)
> -       movw    8(%rcx), %ax
> -       movw    %ax, 8(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit11):
> -       xor     %ah, %ah
> -       movb    %ah, 11(%rdx)
> -L(Exit11):
> -       movlpd  (%rcx), %xmm0
> -       movlpd  %xmm0, (%rdx)
> -       mov     7(%rcx), %eax
> -       mov     %eax, 7(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit12):
> -       xor     %ah, %ah
> -       movb    %ah, 12(%rdx)
> -L(Exit12):
> -       movlpd  (%rcx), %xmm0
> -       movlpd  %xmm0, (%rdx)
> -       mov     8(%rcx), %eax
> -       mov     %eax, 8(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit13):
> -       xor     %ah, %ah
> -       movb    %ah, 13(%rdx)
> -L(Exit13):
> -       movlpd  (%rcx), %xmm0
> -       movlpd  %xmm0, (%rdx)
> -       movlpd  5(%rcx), %xmm1
> -       movlpd  %xmm1, 5(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit14):
> -       xor     %ah, %ah
> -       movb    %ah, 14(%rdx)
> -L(Exit14):
> -       movlpd  (%rcx), %xmm0
> -       movlpd  %xmm0, (%rdx)
> -       movlpd  6(%rcx), %xmm1
> -       movlpd  %xmm1, 6(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit15):
> -       xor     %ah, %ah
> -       movb    %ah, 15(%rdx)
> -L(Exit15):
> -       movlpd  (%rcx), %xmm0
> -       movlpd  %xmm0, (%rdx)
> -       movlpd  7(%rcx), %xmm1
> -       movlpd  %xmm1, 7(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit16):
> -       xor     %ah, %ah
> -       movb    %ah, 16(%rdx)
> -L(Exit16):
> -       movlpd  (%rcx), %xmm0
> -       movlpd  8(%rcx), %xmm1
> -       movlpd  %xmm0, (%rdx)
> -       movlpd  %xmm1, 8(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -# ifdef USE_AS_STRNCPY
> -
> -       .p2align 4
> -L(CopyFrom1To16BytesCase2):
> -       add     $16, %r8
> -       add     %rsi, %rcx
> -       lea     (%rsi, %rdx), %rsi
> -       lea     -9(%r8), %rdx
> -       and     $1<<7, %dh
> -       or      %al, %dh
> -       test    %dh, %dh
> -       lea     (%rsi), %rdx
> -       jz      L(ExitHighCase2)
> -
> -       test    $0x01, %al
> -       jnz     L(Exit1)
> -       cmp     $1, %r8
> -       je      L(StrncatExit1)
> -       test    $0x02, %al
> -       jnz     L(Exit2)
> -       cmp     $2, %r8
> -       je      L(StrncatExit2)
> -       test    $0x04, %al
> -       jnz     L(Exit3)
> -       cmp     $3, %r8
> -       je      L(StrncatExit3)
> -       test    $0x08, %al
> -       jnz     L(Exit4)
> -       cmp     $4, %r8
> -       je      L(StrncatExit4)
> -       test    $0x10, %al
> -       jnz     L(Exit5)
> -       cmp     $5, %r8
> -       je      L(StrncatExit5)
> -       test    $0x20, %al
> -       jnz     L(Exit6)
> -       cmp     $6, %r8
> -       je      L(StrncatExit6)
> -       test    $0x40, %al
> -       jnz     L(Exit7)
> -       cmp     $7, %r8
> -       je      L(StrncatExit7)
> -       movlpd  (%rcx), %xmm0
> -       movlpd  %xmm0, (%rdx)
> -       lea     7(%rdx), %rax
> -       cmpb    $1, (%rax)
> -       sbb     $-1, %rax
> -       xor     %cl, %cl
> -       movb    %cl, (%rax)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(ExitHighCase2):
> -       test    $0x01, %ah
> -       jnz     L(Exit9)
> -       cmp     $9, %r8
> -       je      L(StrncatExit9)
> -       test    $0x02, %ah
> -       jnz     L(Exit10)
> -       cmp     $10, %r8
> -       je      L(StrncatExit10)
> -       test    $0x04, %ah
> -       jnz     L(Exit11)
> -       cmp     $11, %r8
> -       je      L(StrncatExit11)
> -       test    $0x8, %ah
> -       jnz     L(Exit12)
> -       cmp     $12, %r8
> -       je      L(StrncatExit12)
> -       test    $0x10, %ah
> -       jnz     L(Exit13)
> -       cmp     $13, %r8
> -       je      L(StrncatExit13)
> -       test    $0x20, %ah
> -       jnz     L(Exit14)
> -       cmp     $14, %r8
> -       je      L(StrncatExit14)
> -       test    $0x40, %ah
> -       jnz     L(Exit15)
> -       cmp     $15, %r8
> -       je      L(StrncatExit15)
> -       movlpd  (%rcx), %xmm0
> -       movlpd  %xmm0, (%rdx)
> -       movlpd  8(%rcx), %xmm1
> -       movlpd  %xmm1, 8(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -L(CopyFrom1To16BytesCase2OrCase3):
> -       test    %rax, %rax
> -       jnz     L(CopyFrom1To16BytesCase2)
> -
> -       .p2align 4
> -L(CopyFrom1To16BytesCase3):
> -       add     $16, %r8
> -       add     %rsi, %rdx
> -       add     %rsi, %rcx
> -
> -       cmp     $8, %r8
> -       ja      L(ExitHighCase3)
> -       cmp     $1, %r8
> -       je      L(StrncatExit1)
> -       cmp     $2, %r8
> -       je      L(StrncatExit2)
> -       cmp     $3, %r8
> -       je      L(StrncatExit3)
> -       cmp     $4, %r8
> -       je      L(StrncatExit4)
> -       cmp     $5, %r8
> -       je      L(StrncatExit5)
> -       cmp     $6, %r8
> -       je      L(StrncatExit6)
> -       cmp     $7, %r8
> -       je      L(StrncatExit7)
> -       movlpd  (%rcx), %xmm0
> -       movlpd  %xmm0, (%rdx)
> -       xor     %ah, %ah
> -       movb    %ah, 8(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(ExitHighCase3):
> -       cmp     $9, %r8
> -       je      L(StrncatExit9)
> -       cmp     $10, %r8
> -       je      L(StrncatExit10)
> -       cmp     $11, %r8
> -       je      L(StrncatExit11)
> -       cmp     $12, %r8
> -       je      L(StrncatExit12)
> -       cmp     $13, %r8
> -       je      L(StrncatExit13)
> -       cmp     $14, %r8
> -       je      L(StrncatExit14)
> -       cmp     $15, %r8
> -       je      L(StrncatExit15)
> -       movlpd  (%rcx), %xmm0
> -       movlpd  %xmm0, (%rdx)
> -       movlpd  8(%rcx), %xmm1
> -       movlpd  %xmm1, 8(%rdx)
> -       xor     %ah, %ah
> -       movb    %ah, 16(%rdx)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit0):
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit15Bytes):
> -       cmp     $9, %r8
> -       je      L(StrncatExit9)
> -       cmpb    $0, 9(%rcx)
> -       jz      L(Exit10)
> -       cmp     $10, %r8
> -       je      L(StrncatExit10)
> -       cmpb    $0, 10(%rcx)
> -       jz      L(Exit11)
> -       cmp     $11, %r8
> -       je      L(StrncatExit11)
> -       cmpb    $0, 11(%rcx)
> -       jz      L(Exit12)
> -       cmp     $12, %r8
> -       je      L(StrncatExit12)
> -       cmpb    $0, 12(%rcx)
> -       jz      L(Exit13)
> -       cmp     $13, %r8
> -       je      L(StrncatExit13)
> -       cmpb    $0, 13(%rcx)
> -       jz      L(Exit14)
> -       cmp     $14, %r8
> -       je      L(StrncatExit14)
> -       movlpd  (%rcx), %xmm0
> -       movlpd  %xmm0, (%rdx)
> -       movlpd  7(%rcx), %xmm1
> -       movlpd  %xmm1, 7(%rdx)
> -       lea     14(%rdx), %rax
> -       cmpb    $1, (%rax)
> -       sbb     $-1, %rax
> -       xor     %cl, %cl
> -       movb    %cl, (%rax)
> -       mov     %rdi, %rax
> -       ret
> -
> -       .p2align 4
> -L(StrncatExit8Bytes):
> -       cmpb    $0, (%rcx)
> -       jz      L(Exit1)
> -       cmp     $1, %r8
> -       je      L(StrncatExit1)
> -       cmpb    $0, 1(%rcx)
> -       jz      L(Exit2)
> -       cmp     $2, %r8
> -       je      L(StrncatExit2)
> -       cmpb    $0, 2(%rcx)
> -       jz      L(Exit3)
> -       cmp     $3, %r8
> -       je      L(StrncatExit3)
> -       cmpb    $0, 3(%rcx)
> -       jz      L(Exit4)
> -       cmp     $4, %r8
> -       je      L(StrncatExit4)
> -       cmpb    $0, 4(%rcx)
> -       jz      L(Exit5)
> -       cmp     $5, %r8
> -       je      L(StrncatExit5)
> -       cmpb    $0, 5(%rcx)
> -       jz      L(Exit6)
> -       cmp     $6, %r8
> -       je      L(StrncatExit6)
> -       cmpb    $0, 6(%rcx)
> -       jz      L(Exit7)
> -       cmp     $7, %r8
> -       je      L(StrncatExit7)
> -       movlpd  (%rcx), %xmm0
> -       movlpd  %xmm0, (%rdx)
> -       lea     7(%rdx), %rax
> -       cmpb    $1, (%rax)
> -       sbb     $-1, %rax
> -       xor     %cl, %cl
> -       movb    %cl, (%rax)
> -       mov     %rdi, %rax
> -       ret
> -
> -# endif
> -END (STRCAT)
> -#endif
> diff --git a/sysdeps/x86_64/multiarch/strncat-ssse3.S b/sysdeps/x86_64/multiarch/strncat-ssse3.S
> deleted file mode 100644
> index 6c45ff3ec7..0000000000
> --- a/sysdeps/x86_64/multiarch/strncat-ssse3.S
> +++ /dev/null
> @@ -1,3 +0,0 @@
> -#define USE_AS_STRNCAT
> -#define STRCAT __strncat_ssse3
> -#include "strcat-ssse3.S"
> --
> 2.25.1
>

LGTM.

Reviewed-by: H.J. Lu <hjl.tools@gmail.com>

Thanks.

-- 
H.J.

  reply	other threads:[~2022-03-25 19:57 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-25 18:36 [PATCH v1 1/6] x86: Remove {w}memcmp-ssse3 Noah Goldstein
2022-03-25 18:36 ` [PATCH v1 2/6] x86: Remove str{n}{case}cmp-ssse3 Noah Goldstein
2022-03-25 19:55   ` H.J. Lu
2022-03-25 20:44   ` [PATCH v2 1/6] x86: Remove {w}memcmp-ssse3 Noah Goldstein
2022-03-25 20:44     ` [PATCH v2 2/6] x86: Remove str{n}{case}cmp-ssse3 Noah Goldstein
2022-03-25 20:44     ` [PATCH v2 3/6] x86: Remove mem{move|cpy}-ssse3 Noah Goldstein
2022-04-10  0:57       ` [PATCH v4 6/6] x86: Reduce code size of mem{move|pcpy|cpy}-ssse3 Noah Goldstein
2022-03-25 20:44     ` [PATCH v2 4/6] x86: Remove mem{move|cpy}-ssse3-back Noah Goldstein
2022-03-25 20:44     ` [PATCH v2 5/6] x86: Remove str{n}cat-ssse3 Noah Goldstein
2022-03-25 20:44     ` [PATCH v2 6/6] x86: Remove str{p}{n}cpy-ssse3 Noah Goldstein
2022-04-10  0:42   ` [PATCH v3 1/6] " Noah Goldstein
2022-04-10  0:48     ` Noah Goldstein
2022-04-10  0:42   ` [PATCH v3 2/6] x86: Remove mem{move|cpy}-ssse3-back Noah Goldstein
2022-04-10  0:48     ` Noah Goldstein
2022-04-10  0:42   ` [PATCH v3 3/6] x86: Reduce code size of Remove mem{move|pcpy|cpy}-ssse3 Noah Goldstein
2022-04-10  0:48     ` Noah Goldstein
2022-04-10  0:42   ` [PATCH v3 4/6] x86: Optimize memcmp SSE2 in memcmp.S Noah Goldstein
2022-04-10  0:48     ` Noah Goldstein
2022-04-10  0:42   ` [PATCH v3 5/6] x86: Remove memcmp-sse4.S Noah Goldstein
2022-04-10  0:48     ` Noah Goldstein
2022-04-10  0:42   ` [PATCH v3 6/6] x86: Cleanup page cross code in memcmp-avx2-movbe.S Noah Goldstein
2022-04-10  0:48     ` Noah Goldstein
2022-04-10  0:54   ` [PATCH v4 1/6] x86: Remove {w}memcmp-ssse3 Noah Goldstein
2022-04-10  0:54     ` [PATCH v4 2/6] x86: Remove str{n}{case}cmp-ssse3 Noah Goldstein
2022-04-10  0:54     ` [PATCH v4 3/6] x86: Remove str{n}cat-ssse3 Noah Goldstein
2022-04-10  0:54     ` [PATCH v4 4/6] x86: Remove str{p}{n}cpy-ssse3 Noah Goldstein
2022-04-10  0:54     ` [PATCH v4 5/6] x86: Remove mem{move|cpy}-ssse3-back Noah Goldstein
2022-04-14 16:47   ` [PATCH v5 1/6] x86: Remove {w}memcmp-ssse3 Noah Goldstein
2022-04-14 16:47     ` [PATCH v5 2/6] x86: Remove str{n}{case}cmp-ssse3 Noah Goldstein
2022-04-14 18:05       ` H.J. Lu
2022-04-14 16:47     ` [PATCH v5 3/6] x86: Remove str{n}cat-ssse3 Noah Goldstein
2022-04-14 18:06       ` H.J. Lu
2022-04-14 16:47     ` [PATCH v5 4/6] x86: Remove str{p}{n}cpy-ssse3 Noah Goldstein
2022-04-14 18:10       ` H.J. Lu
2022-04-14 16:47     ` [PATCH v5 5/6] x86: Remove mem{move|cpy}-ssse3-back Noah Goldstein
2022-04-14 18:13       ` H.J. Lu
2022-04-14 16:47     ` [PATCH v5 6/6] x86: Reduce code size of mem{move|pcpy|cpy}-ssse3 Noah Goldstein
2022-04-14 18:04     ` [PATCH v5 1/6] x86: Remove {w}memcmp-ssse3 H.J. Lu
2022-03-25 18:36 ` [PATCH v1 3/6] x86: Remove mem{move|cpy}-ssse3 Noah Goldstein
2022-03-25 19:56   ` H.J. Lu
2022-03-25 18:36 ` [PATCH v1 4/6] x86: Remove mem{move|cpy}-ssse3-back Noah Goldstein
2022-03-25 19:56   ` H.J. Lu
2022-03-25 18:36 ` [PATCH v1 5/6] x86: Remove str{n}cat-ssse3 Noah Goldstein
2022-03-25 19:57   ` H.J. Lu [this message]
2022-03-25 18:36 ` [PATCH v1 6/6] x86: Remove str{p}{n}cpy-ssse3 Noah Goldstein
2022-03-25 19:57   ` H.J. Lu
2022-03-25 19:54 ` [PATCH v1 1/6] x86: Remove {w}memcmp-ssse3 H.J. Lu
2022-03-25 20:34 ` Andreas Schwab
2022-03-25 20:40   ` Noah Goldstein

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAMe9rOpcfLQc5YUKFbofBVADv1W8CuVymCyp5tfE--+Zbtet0g@mail.gmail.com \
    --to=hjl.tools@gmail.com \
    --cc=carlos@systemhalted.org \
    --cc=goldstein.w.n@gmail.com \
    --cc=libc-alpha@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).