public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: "H.J. Lu" <hjl.tools@gmail.com>
To: Noah Goldstein <goldstein.w.n@gmail.com>
Cc: Adhemerval Zanella <adhemerval.zanella@linaro.org>,
	 GNU C Library <libc-alpha@sourceware.org>
Subject: Re: [PATCH] x86_64: Remove bzero optimization
Date: Fri, 13 May 2022 12:50:44 -0700	[thread overview]
Message-ID: <CAMe9rOpS2LVzRsidjYjWkiJFu7KO2OEQX0AZuRp=c5sJMSmnkw@mail.gmail.com> (raw)
In-Reply-To: <CAFUsyf+C4DZs5NYsOfNHSZi4FZG-rPNg-Di4pXJ=K8h81piaZw@mail.gmail.com>

On Fri, May 13, 2022 at 7:55 AM Noah Goldstein via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> On Fri, May 13, 2022 at 7:42 AM Adhemerval Zanella via Libc-alpha
> <libc-alpha@sourceware.org> wrote:
> >
> > Both symbols are marked as legacy in POSIX.1-2001 and removed on
> > POSIX.1-2008, although the prototypes are defined for _GNU_SOURCE
> > or _DEFAULT_SOURCE.
> >
> > GCC also replaces bcopy with a memmove and bzero with memset on default
> > configuration (to actually get a bzero libc call the code requires
> > to omit string.h inclusion and built with --fno-builtin), so it is
> > highly unlikely programs are actually calling libc bzero symbol.
> >
> > On a recent Linux distro (Ubuntu 22.04), there is no bzero calls
> > by the installed binaries.
> >
> >   $ cat count_bstring.sh
> >   #!/bin/bash
> >
> >   files=`IFS=':';for i in $PATH; do test -d "$i" && find "$i" -maxdepth 1 -executable -type f; done`
> >   total=0
> >   for file in $files; do
> >     symbols=`objdump -R $file 2>&1`
> >     if [ $? -eq 0 ]; then
> >       ncalls=`echo $symbols | grep -w $1 | wc -l`
> >       ((total=total+ncalls))
> >       if [ $ncalls -gt 0 ]; then
> >         echo "$file: $ncalls"
> >       fi
> >     fi
> >   done
> >   echo "TOTAL=$total"
> >   $ ./count_bstring.sh bzero
> >   TOTAL=0
> >
> > Checked on x86_64-linux-gnu.
> > ---
> >  sysdeps/x86_64/bzero.S                        |   1 -
> >  sysdeps/x86_64/memset.S                       |  10 +-
> >  sysdeps/x86_64/multiarch/Makefile             |   1 -
> >  sysdeps/x86_64/multiarch/bzero.c              | 106 ------------------
> >  sysdeps/x86_64/multiarch/ifunc-impl-list.c    |  42 -------
> >  .../memset-avx2-unaligned-erms-rtm.S          |   1 -
> >  .../multiarch/memset-avx2-unaligned-erms.S    |   6 -
> >  .../multiarch/memset-avx512-unaligned-erms.S  |   3 -
> >  .../multiarch/memset-evex-unaligned-erms.S    |   3 -
> >  .../multiarch/memset-sse2-unaligned-erms.S    |   1 -
> >  .../multiarch/memset-vec-unaligned-erms.S     |  56 +--------
> >  11 files changed, 2 insertions(+), 228 deletions(-)
> >  delete mode 100644 sysdeps/x86_64/bzero.S
> >  delete mode 100644 sysdeps/x86_64/multiarch/bzero.c
> >
> > diff --git a/sysdeps/x86_64/bzero.S b/sysdeps/x86_64/bzero.S
> > deleted file mode 100644
> > index f96d567fd8..0000000000
> > --- a/sysdeps/x86_64/bzero.S
> > +++ /dev/null
> > @@ -1 +0,0 @@
> > -/* Implemented in memset.S.  */
> > diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
> > index af26e9cedc..a6eea61a4d 100644
> > --- a/sysdeps/x86_64/memset.S
> > +++ b/sysdeps/x86_64/memset.S
> > @@ -1,4 +1,4 @@
> > -/* memset/bzero -- set memory area to CH/0
> > +/* memset -- set memory area to CH/0
> >     Optimized version for x86-64.
> >     Copyright (C) 2002-2022 Free Software Foundation, Inc.
> >     This file is part of the GNU C Library.
> > @@ -35,9 +35,6 @@
> >    punpcklwd %xmm0, %xmm0; \
> >    pshufd $0, %xmm0, %xmm0
> >
> > -# define BZERO_ZERO_VEC0() \
> > -  pxor %xmm0, %xmm0
> > -
> >  # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
> >    movd d, %xmm0; \
> >    pshufd $0, %xmm0, %xmm0; \
> > @@ -56,10 +53,6 @@
> >  # define MEMSET_SYMBOL(p,s)    memset
> >  #endif
> >
> > -#ifndef BZERO_SYMBOL
> > -# define BZERO_SYMBOL(p,s)     __bzero
> > -#endif
> > -
> >  #ifndef WMEMSET_SYMBOL
> >  # define WMEMSET_CHK_SYMBOL(p,s) p
> >  # define WMEMSET_SYMBOL(p,s)   __wmemset
> > @@ -70,7 +63,6 @@
> >  libc_hidden_builtin_def (memset)
> >
> >  #if IS_IN (libc)
> > -weak_alias (__bzero, bzero)
> >  libc_hidden_def (__wmemset)
> >  weak_alias (__wmemset, wmemset)
> >  libc_hidden_weak (wmemset)
> > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> > index 0400ea332b..f3ab5e0928 100644
> > --- a/sysdeps/x86_64/multiarch/Makefile
> > +++ b/sysdeps/x86_64/multiarch/Makefile
> > @@ -1,7 +1,6 @@
> >  ifeq ($(subdir),string)
> >
> >  sysdep_routines += \
> > -  bzero \
> >    memchr-avx2 \
> >    memchr-avx2-rtm \
> >    memchr-evex \
> > diff --git a/sysdeps/x86_64/multiarch/bzero.c b/sysdeps/x86_64/multiarch/bzero.c
> > deleted file mode 100644
> > index 58a14b2c33..0000000000
> > --- a/sysdeps/x86_64/multiarch/bzero.c
> > +++ /dev/null
> > @@ -1,106 +0,0 @@
> > -/* Multiple versions of bzero.
> > -   All versions must be listed in ifunc-impl-list.c.
> > -   Copyright (C) 2022 Free Software Foundation, Inc.
> > -   This file is part of the GNU C Library.
> > -
> > -   The GNU C Library is free software; you can redistribute it and/or
> > -   modify it under the terms of the GNU Lesser General Public
> > -   License as published by the Free Software Foundation; either
> > -   version 2.1 of the License, or (at your option) any later version.
> > -
> > -   The GNU C Library is distributed in the hope that it will be useful,
> > -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > -   Lesser General Public License for more details.
> > -
> > -   You should have received a copy of the GNU Lesser General Public
> > -   License along with the GNU C Library; if not, see
> > -   <https://www.gnu.org/licenses/>.  */
> > -
> > -/* Define multiple versions only for the definition in libc.  */
> > -#if IS_IN (libc)
> > -# define __bzero __redirect___bzero
> > -# include <string.h>
> > -# undef __bzero
> > -
> > -# define SYMBOL_NAME __bzero
> > -# include <init-arch.h>
> > -
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE1 (sse2_unaligned)
> > -  attribute_hidden;
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE1 (sse2_unaligned_erms)
> > -  attribute_hidden;
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx2_unaligned) attribute_hidden;
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx2_unaligned_erms)
> > -  attribute_hidden;
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx2_unaligned_rtm)
> > -  attribute_hidden;
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx2_unaligned_erms_rtm)
> > -  attribute_hidden;
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE1 (evex_unaligned)
> > -  attribute_hidden;
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE1 (evex_unaligned_erms)
> > -  attribute_hidden;
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx512_unaligned)
> > -  attribute_hidden;
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE1 (avx512_unaligned_erms)
> > -  attribute_hidden;
> > -
> > -static inline void *
> > -IFUNC_SELECTOR (void)
> > -{
> > -  const struct cpu_features* cpu_features = __get_cpu_features ();
> > -
> > -  if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
> > -      && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
> > -    {
> > -      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > -          && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > -          && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > -       {
> > -         if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
> > -           return OPTIMIZE1 (avx512_unaligned_erms);
> > -
> > -         return OPTIMIZE1 (avx512_unaligned);
> > -       }
> > -    }
> > -
> > -  if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
> > -    {
> > -      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > -          && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > -          && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > -       {
> > -         if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
> > -           return OPTIMIZE1 (evex_unaligned_erms);
> > -
> > -         return OPTIMIZE1 (evex_unaligned);
> > -       }
> > -
> > -      if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
> > -       {
> > -         if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
> > -           return OPTIMIZE1 (avx2_unaligned_erms_rtm);
> > -
> > -         return OPTIMIZE1 (avx2_unaligned_rtm);
> > -       }
> > -
> > -      if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
> > -       {
> > -         if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
> > -           return OPTIMIZE1 (avx2_unaligned_erms);
> > -
> > -         return OPTIMIZE1 (avx2_unaligned);
> > -       }
> > -    }
> > -
> > -  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
> > -    return OPTIMIZE1 (sse2_unaligned_erms);
> > -
> > -  return OPTIMIZE1 (sse2_unaligned);
> > -}
> > -
> > -libc_ifunc_redirected (__redirect___bzero, __bzero, IFUNC_SELECTOR ());
> > -
> > -weak_alias (__bzero, bzero)
> > -#endif
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > index a8afcf81bb..7218095430 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > @@ -291,48 +291,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >                               __memset_avx512_no_vzeroupper)
> >              )
> >
> > -  /* Support sysdeps/x86_64/multiarch/bzero.c.  */
> > -  IFUNC_IMPL (i, name, bzero,
> > -             IFUNC_IMPL_ADD (array, i, bzero, 1,
> > -                             __bzero_sse2_unaligned)
> > -             IFUNC_IMPL_ADD (array, i, bzero, 1,
> > -                             __bzero_sse2_unaligned_erms)
> > -             IFUNC_IMPL_ADD (array, i, bzero,
> > -                             CPU_FEATURE_USABLE (AVX2),
> > -                             __bzero_avx2_unaligned)
> > -             IFUNC_IMPL_ADD (array, i, bzero,
> > -                             CPU_FEATURE_USABLE (AVX2),
> > -                             __bzero_avx2_unaligned_erms)
> > -             IFUNC_IMPL_ADD (array, i, bzero,
> > -                             (CPU_FEATURE_USABLE (AVX2)
> > -                              && CPU_FEATURE_USABLE (RTM)),
> > -                             __bzero_avx2_unaligned_rtm)
> > -             IFUNC_IMPL_ADD (array, i, bzero,
> > -                             (CPU_FEATURE_USABLE (AVX2)
> > -                              && CPU_FEATURE_USABLE (RTM)),
> > -                             __bzero_avx2_unaligned_erms_rtm)
> > -             IFUNC_IMPL_ADD (array, i, bzero,
> > -                             (CPU_FEATURE_USABLE (AVX512VL)
> > -                              && CPU_FEATURE_USABLE (AVX512BW)
> > -                              && CPU_FEATURE_USABLE (BMI2)),
> > -                             __bzero_evex_unaligned)
> > -             IFUNC_IMPL_ADD (array, i, bzero,
> > -                             (CPU_FEATURE_USABLE (AVX512VL)
> > -                              && CPU_FEATURE_USABLE (AVX512BW)
> > -                              && CPU_FEATURE_USABLE (BMI2)),
> > -                             __bzero_evex_unaligned_erms)
> > -             IFUNC_IMPL_ADD (array, i, bzero,
> > -                             (CPU_FEATURE_USABLE (AVX512VL)
> > -                              && CPU_FEATURE_USABLE (AVX512BW)
> > -                              && CPU_FEATURE_USABLE (BMI2)),
> > -                             __bzero_avx512_unaligned_erms)
> > -             IFUNC_IMPL_ADD (array, i, bzero,
> > -                             (CPU_FEATURE_USABLE (AVX512VL)
> > -                              && CPU_FEATURE_USABLE (AVX512BW)
> > -                              && CPU_FEATURE_USABLE (BMI2)),
> > -                             __bzero_avx512_unaligned)
> > -            )
> > -
> >    /* Support sysdeps/x86_64/multiarch/rawmemchr.c.  */
> >    IFUNC_IMPL (i, name, rawmemchr,
> >               IFUNC_IMPL_ADD (array, i, rawmemchr,
> > diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S
> > index 5a5ee6f672..8ac3e479bb 100644
> > --- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S
> > +++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S
> > @@ -5,7 +5,6 @@
> >
> >  #define SECTION(p) p##.avx.rtm
> >  #define MEMSET_SYMBOL(p,s)     p##_avx2_##s##_rtm
> > -#define BZERO_SYMBOL(p,s)      p##_avx2_##s##_rtm
> >  #define WMEMSET_SYMBOL(p,s)    p##_avx2_##s##_rtm
> >
> >  #include "memset-avx2-unaligned-erms.S"
> > diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
> > index a093a2831f..c0bf2875d0 100644
> > --- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
> > +++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
> > @@ -14,9 +14,6 @@
> >    vmovd d, %xmm0; \
> >    movq r, %rax;
> >
> > -# define BZERO_ZERO_VEC0() \
> > -  vpxor %xmm0, %xmm0, %xmm0
> > -
> >  # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
> >    MEMSET_SET_VEC0_AND_SET_RETURN(d, r)
> >
> > @@ -32,9 +29,6 @@
> >  # ifndef MEMSET_SYMBOL
> >  #  define MEMSET_SYMBOL(p,s)   p##_avx2_##s
> >  # endif
> > -# ifndef BZERO_SYMBOL
> > -#  define BZERO_SYMBOL(p,s)    p##_avx2_##s
> > -# endif
> >  # ifndef WMEMSET_SYMBOL
> >  #  define WMEMSET_SYMBOL(p,s)  p##_avx2_##s
> >  # endif
> > diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
> > index 727c92133a..5241216a77 100644
> > --- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
> > +++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
> > @@ -19,9 +19,6 @@
> >    vpbroadcastb d, %VEC0; \
> >    movq r, %rax
> >
> > -# define BZERO_ZERO_VEC0() \
> > -  vpxorq %XMM0, %XMM0, %XMM0
> > -
> >  # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
> >    vpbroadcastd d, %VEC0; \
> >    movq r, %rax
> > diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> > index 5d8fa78f05..6370021506 100644
> > --- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> > +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> > @@ -19,9 +19,6 @@
> >    vpbroadcastb d, %VEC0; \
> >    movq r, %rax
> >
> > -# define BZERO_ZERO_VEC0() \
> > -  vpxorq %XMM0, %XMM0, %XMM0
> > -
> >  # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
> >    vpbroadcastd d, %VEC0; \
> >    movq r, %rax
> > diff --git a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
> > index d52d170804..3d92f6993a 100644
> > --- a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
> > +++ b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
> > @@ -22,7 +22,6 @@
> >
> >  #if IS_IN (libc)
> >  # define MEMSET_SYMBOL(p,s)    p##_sse2_##s
> > -# define BZERO_SYMBOL(p,s)     MEMSET_SYMBOL (p, s)
> >  # define WMEMSET_SYMBOL(p,s)   p##_sse2_##s
> >
> >  # ifdef SHARED
> > diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
> > index 785fee1d57..aa78fbb620 100644
> > --- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
> > +++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
> > @@ -1,4 +1,4 @@
> > -/* memset/bzero with unaligned store and rep stosb
> > +/* memset with unaligned store and rep stosb
> >     Copyright (C) 2016-2022 Free Software Foundation, Inc.
> >     This file is part of the GNU C Library.
> >
> > @@ -26,10 +26,6 @@
> >
> >  #include <sysdep.h>
> >
> > -#ifndef BZERO_SYMBOL
> > -# define BZERO_SYMBOL(p,s)             MEMSET_SYMBOL (p, s)
> > -#endif
> > -
> >  #ifndef MEMSET_CHK_SYMBOL
> >  # define MEMSET_CHK_SYMBOL(p,s)                MEMSET_SYMBOL(p, s)
> >  #endif
> > @@ -134,31 +130,6 @@ ENTRY (WMEMSET_SYMBOL (__wmemset, unaligned))
> >  END (WMEMSET_SYMBOL (__wmemset, unaligned))
> >  #endif
> >
> > -ENTRY (BZERO_SYMBOL(__bzero, unaligned))
> > -#if VEC_SIZE > 16
> > -       BZERO_ZERO_VEC0 ()
> > -#endif
> > -       mov     %RDI_LP, %RAX_LP
> > -       mov     %RSI_LP, %RDX_LP
> > -#ifndef USE_LESS_VEC_MASK_STORE
> > -       xorl    %esi, %esi
> > -#endif
> > -       cmp     $VEC_SIZE, %RDX_LP
> > -       jb      L(less_vec_no_vdup)
> > -#ifdef USE_LESS_VEC_MASK_STORE
> > -       xorl    %esi, %esi
> > -#endif
> > -#if VEC_SIZE <= 16
> > -       BZERO_ZERO_VEC0 ()
> > -#endif
> > -       cmp     $(VEC_SIZE * 2), %RDX_LP
> > -       ja      L(more_2x_vec)
> > -       /* From VEC and to 2 * VEC.  No branch when size == VEC_SIZE.  */
> > -       VMOVU   %VEC(0), (%rdi)
> > -       VMOVU   %VEC(0), (VEC_SIZE * -1)(%rdi, %rdx)
> > -       VZEROUPPER_RETURN
> > -END (BZERO_SYMBOL(__bzero, unaligned))
> > -
> >  #if defined SHARED && IS_IN (libc)
> >  ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned))
> >         cmp     %RDX_LP, %RCX_LP
> > @@ -216,31 +187,6 @@ END (__memset_erms)
> >  END (MEMSET_SYMBOL (__memset, erms))
> >  # endif
> >
> > -ENTRY_P2ALIGN (BZERO_SYMBOL(__bzero, unaligned_erms), 6)
> > -# if VEC_SIZE > 16
> > -       BZERO_ZERO_VEC0 ()
> > -# endif
> > -       mov     %RDI_LP, %RAX_LP
> > -       mov     %RSI_LP, %RDX_LP
> > -# ifndef USE_LESS_VEC_MASK_STORE
> > -       xorl    %esi, %esi
> > -# endif
> > -       cmp     $VEC_SIZE, %RDX_LP
> > -       jb      L(less_vec_no_vdup)
>
> This label 'L(less_vec_no_vdup)' is no longer used. Can you
> remove it from the rest of memset-vec-unaligned-erms.S
> > -# ifdef USE_LESS_VEC_MASK_STORE
> > -       xorl    %esi, %esi
> > -# endif
> > -# if VEC_SIZE <= 16
> > -       BZERO_ZERO_VEC0 ()
> > -# endif
> > -       cmp     $(VEC_SIZE * 2), %RDX_LP
> > -       ja      L(stosb_more_2x_vec)
> > -       /* From VEC and to 2 * VEC.  No branch when size == VEC_SIZE.  */
> > -       VMOVU   %VEC(0), (%rdi)
> > -       VMOVU   %VEC(0), (VEC_SIZE * -1)(%rdi, %rdx)
> > -       VZEROUPPER_RETURN
> > -END (BZERO_SYMBOL(__bzero, unaligned_erms))
> > -
> >  # if defined SHARED && IS_IN (libc)
> >  ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms))
> >         cmp     %RDX_LP, %RCX_LP
> > --
> > 2.34.1
> >
>
> Im in favor of this.
>
> Should we keep the old method of just swapping params that was removed in:
>
> commit 3d9f171bfb5325bd5f427e9fc386453358c6e840
> Author: H.J. Lu <hjl.tools@gmail.com>
> Date:   Mon Feb 7 05:55:15 2022 -0800
>
>     x86-64: Optimize bzero
>
> ```
> - .section SECTION(.text),"ax",@progbits
> -#if VEC_SIZE == 16 && IS_IN (libc)
> -ENTRY (__bzero)
> - mov %RDI_LP, %RAX_LP /* Set return value.  */
> - mov %RSI_LP, %RDX_LP /* Set n.  */
> - xorl %esi, %esi
> - pxor %XMM0, %XMM0
> - jmp L(entry_from_bzero)
> -END (__bzero)
> -weak_alias (__bzero, bzero)
> ```
> ?
>
> Old software that hasn't been recompiled will have a bit of a regression
> going through the generic impl and it only costs 16 bytes of code.

This old code hardcoded bzero to SSE2 memset.   The generic one
will use memset (0) which can use YMM or ZMM stores.  I don't think
there will be much of a regression.

-- 
H.J.

  reply	other threads:[~2022-05-13 19:51 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-13 12:40 Adhemerval Zanella
2022-05-13 14:54 ` Noah Goldstein
2022-05-13 19:50   ` H.J. Lu [this message]
2022-05-13 23:13     ` Noah Goldstein
2022-05-14  0:42       ` H.J. Lu
2022-05-14 23:51 ` Fangrui Song
2022-05-16 12:35   ` Adhemerval Zanella
2022-05-16 13:29     ` Wilco Dijkstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAMe9rOpS2LVzRsidjYjWkiJFu7KO2OEQX0AZuRp=c5sJMSmnkw@mail.gmail.com' \
    --to=hjl.tools@gmail.com \
    --cc=adhemerval.zanella@linaro.org \
    --cc=goldstein.w.n@gmail.com \
    --cc=libc-alpha@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).