public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: "H.J. Lu" <hjl.tools@gmail.com>
To: Noah Goldstein <goldstein.w.n@gmail.com>
Cc: libc-alpha@sourceware.org, carlos@systemhalted.org
Subject: Re: [PATCH v5 4/4] x86: Add avx2 optimized functions for the wchar_t strcpy family
Date: Tue, 8 Nov 2022 19:01:45 -0800	[thread overview]
Message-ID: <Y2sYGc5QPVMItEVS@gmail.com> (raw)
In-Reply-To: <20221109013841.3707572-4-goldstein.w.n@gmail.com>

On Tue, Nov 08, 2022 at 05:38:41PM -0800, Noah Goldstein wrote:
> Implemented:
>     wcscat-avx2  (+ 744 bytes
>     wcscpy-avx2  (+ 539 bytes)
>     wcpcpy-avx2  (+ 577 bytes)
>     wcsncpy-avx2 (+1108 bytes)
>     wcpncpy-avx2 (+1214 bytes)
>     wcsncat-avx2 (+1085 bytes)
> 
> Performance Changes:
>     Times are from N = 10 runs of the benchmark suite and are reported
>     as geometric mean of all ratios of New Implementation / Best Old
>     Implementation. Best Old Implementation was determined with the
>     highest ISA implementation.
> 
>     wcscat-avx2     -> 0.975
>     wcscpy-avx2     -> 0.591
>     wcpcpy-avx2     -> 0.698
>     wcsncpy-avx2    -> 0.730
>     wcpncpy-avx2    -> 0.711
>     wcsncat-avx2    -> 0.954
> 
> Code Size Changes:
>     This change  increase the size of libc.so by ~5.5kb bytes. For
>     reference the patch optimizing the normal strcpy family functions
>     decreases libc.so by ~5.2kb.
> 
> Full check passes on x86-64 and build succeeds for all ISA levels w/
> and w/o multiarch.
> ---
>  sysdeps/x86_64/multiarch/Makefile          |  6 +++++
>  sysdeps/x86_64/multiarch/ifunc-impl-list.c | 28 ++++++++++++++++++++--
>  sysdeps/x86_64/multiarch/ifunc-wcs.h       |  7 ++++++
>  sysdeps/x86_64/multiarch/wcpcpy-avx2.S     |  8 +++++++
>  sysdeps/x86_64/multiarch/wcpcpy-generic.c  |  2 +-
>  sysdeps/x86_64/multiarch/wcpncpy-avx2.S    |  8 +++++++
>  sysdeps/x86_64/multiarch/wcpncpy-generic.c |  2 +-
>  sysdeps/x86_64/multiarch/wcscat-avx2.S     | 10 ++++++++
>  sysdeps/x86_64/multiarch/wcscat-generic.c  |  2 +-
>  sysdeps/x86_64/multiarch/wcscpy-avx2.S     |  7 ++++++
>  sysdeps/x86_64/multiarch/wcscpy-generic.c  |  2 +-
>  sysdeps/x86_64/multiarch/wcscpy.c          |  5 ++++
>  sysdeps/x86_64/multiarch/wcsncat-avx2.S    |  9 +++++++
>  sysdeps/x86_64/multiarch/wcsncat-generic.c |  2 +-
>  sysdeps/x86_64/multiarch/wcsncpy-avx2.S    |  7 ++++++
>  sysdeps/x86_64/multiarch/wcsncpy-generic.c |  2 +-
>  sysdeps/x86_64/wcpcpy-generic.c            |  2 +-
>  sysdeps/x86_64/wcpcpy.S                    |  3 ++-
>  sysdeps/x86_64/wcpncpy-generic.c           |  2 +-
>  sysdeps/x86_64/wcpncpy.S                   |  3 ++-
>  sysdeps/x86_64/wcscat-generic.c            |  2 +-
>  sysdeps/x86_64/wcscat.S                    |  3 ++-
>  sysdeps/x86_64/wcscpy.S                    |  1 +
>  sysdeps/x86_64/wcsncat-generic.c           |  2 +-
>  sysdeps/x86_64/wcsncat.S                   |  3 ++-
>  sysdeps/x86_64/wcsncpy-generic.c           |  2 +-
>  sysdeps/x86_64/wcsncpy.S                   |  3 ++-
>  27 files changed, 115 insertions(+), 18 deletions(-)
>  create mode 100644 sysdeps/x86_64/multiarch/wcpcpy-avx2.S
>  create mode 100644 sysdeps/x86_64/multiarch/wcpncpy-avx2.S
>  create mode 100644 sysdeps/x86_64/multiarch/wcscat-avx2.S
>  create mode 100644 sysdeps/x86_64/multiarch/wcscpy-avx2.S
>  create mode 100644 sysdeps/x86_64/multiarch/wcsncat-avx2.S
>  create mode 100644 sysdeps/x86_64/multiarch/wcsncpy-avx2.S
> 
> diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> index d6e01940c3..e1e894c963 100644
> --- a/sysdeps/x86_64/multiarch/Makefile
> +++ b/sysdeps/x86_64/multiarch/Makefile
> @@ -131,10 +131,13 @@ endif
>  
>  ifeq ($(subdir),wcsmbs)
>  sysdep_routines += \
> +  wcpcpy-avx2 \
>    wcpcpy-evex \
>    wcpcpy-generic \
> +  wcpncpy-avx2 \
>    wcpncpy-evex \
>    wcpncpy-generic \
> +  wcscat-avx2 \
>    wcscat-evex \
>    wcscat-generic \
>    wcschr-avx2 \
> @@ -146,6 +149,7 @@ sysdep_routines += \
>    wcscmp-avx2-rtm \
>    wcscmp-evex \
>    wcscmp-sse2 \
> +  wcscpy-avx2 \
>    wcscpy-evex \
>    wcscpy-generic \
>    wcscpy-ssse3 \
> @@ -155,11 +159,13 @@ sysdep_routines += \
>    wcslen-evex512 \
>    wcslen-sse2 \
>    wcslen-sse4_1 \
> +  wcsncat-avx2 \
>    wcsncat-evex \
>    wcsncat-generic \
>    wcsncmp-avx2 \
>    wcsncmp-avx2-rtm \
>    wcsncmp-evex \
> +  wcsncpy-avx2 \
>    wcsncpy-evex \
>    wcsncpy-generic \
>    wcsnlen-avx2 \
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index c908d6c158..0c15dfebfd 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -907,6 +907,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>  				      && CPU_FEATURE_USABLE (BMI2)),
>  				     __wcscpy_evex)
>  	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcscpy,
> +				     (CPU_FEATURE_USABLE (AVX2)
> +				      && CPU_FEATURE_USABLE (BMI2)),
> +				     __wcscpy_avx2)
> +	      X86_IFUNC_IMPL_ADD_V2 (array, i, wcscpy,
>  				     CPU_FEATURE_USABLE (SSSE3),
>  				     __wcscpy_ssse3)
>  	      X86_IFUNC_IMPL_ADD_V1 (array, i, wcscpy,
> @@ -920,7 +924,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>  				      && CPU_FEATURE_USABLE (AVX512BW)
>  				      && CPU_FEATURE_USABLE (BMI2)),
>  				     __wcsncpy_evex)
> -	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcpncpy,
> +	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncpy,
> +				     (CPU_FEATURE_USABLE (AVX2)
> +				      && CPU_FEATURE_USABLE (BMI2)),
> +				     __wcsncpy_avx2)
> +	      X86_IFUNC_IMPL_ADD_V2 (array, i, wcpncpy,
>  				     1,
>  				     __wcsncpy_generic))
>  
> @@ -932,6 +940,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>  				      && CPU_FEATURE_USABLE (BMI2)),
>  				     __wcpcpy_evex)
>  	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcpcpy,
> +				     (CPU_FEATURE_USABLE (AVX2)
> +				      && CPU_FEATURE_USABLE (BMI2)),
> +				     __wcpcpy_avx2)
> +	      X86_IFUNC_IMPL_ADD_V2 (array, i, wcpcpy,
>  				     1,
>  				     __wcpcpy_generic))
>  
> @@ -942,7 +954,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>  				      && CPU_FEATURE_USABLE (AVX512BW)
>  				      && CPU_FEATURE_USABLE (BMI2)),
>  				     __wcpncpy_evex)
> -	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncpy,
> +	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcpncpy,
> +				     (CPU_FEATURE_USABLE (AVX2)
> +				      && CPU_FEATURE_USABLE (BMI2)),
> +				     __wcpncpy_avx2)
> +	      X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncpy,
>  				     1,
>  				     __wcpncpy_generic))
>  
> @@ -954,6 +970,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>  				      && CPU_FEATURE_USABLE (BMI2)),
>  				     __wcscat_evex)
>  	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcscat,
> +				     (CPU_FEATURE_USABLE (AVX2)
> +				      && CPU_FEATURE_USABLE (BMI2)),
> +				     __wcscat_avx2)
> +	      X86_IFUNC_IMPL_ADD_V2 (array, i, wcscat,
>  				     1,
>  				     __wcscat_generic))
>  
> @@ -965,6 +985,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>  				      && CPU_FEATURE_USABLE (BMI2)),
>  				     __wcsncat_evex)
>  	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncat,
> +				     (CPU_FEATURE_USABLE (AVX2)
> +				      && CPU_FEATURE_USABLE (BMI2)),
> +				     __wcsncat_avx2)
> +	      X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncat,
>  				     1,
>  				     __wcsncat_generic))
>  
> diff --git a/sysdeps/x86_64/multiarch/ifunc-wcs.h b/sysdeps/x86_64/multiarch/ifunc-wcs.h
> index 1d2a63458b..51194e620e 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-wcs.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-wcs.h
> @@ -27,6 +27,8 @@
>  
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
>  
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> +
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
>  
>  static inline void *
> @@ -42,6 +44,11 @@ IFUNC_SELECTOR (void)
>        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
>  	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
>  	return OPTIMIZE (evex);
> +
> +      if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> +				       Prefer_No_VZEROUPPER, !))
> +	return OPTIMIZE (avx2);
> +
>      }
>  
>    return OPTIMIZE (GENERIC);
> diff --git a/sysdeps/x86_64/multiarch/wcpcpy-avx2.S b/sysdeps/x86_64/multiarch/wcpcpy-avx2.S
> new file mode 100644
> index 0000000000..0fffd912d3
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/wcpcpy-avx2.S
> @@ -0,0 +1,8 @@
> +#ifndef WCPCPY
> +# define WCPCPY	__wcpcpy_avx2
> +#endif
> +
> +#define USE_AS_STPCPY
> +#define USE_AS_WCSCPY
> +#define STRCPY	WCPCPY
> +#include "strcpy-avx2.S"
> diff --git a/sysdeps/x86_64/multiarch/wcpcpy-generic.c b/sysdeps/x86_64/multiarch/wcpcpy-generic.c
> index 6039196a3e..0ba29b081f 100644
> --- a/sysdeps/x86_64/multiarch/wcpcpy-generic.c
> +++ b/sysdeps/x86_64/multiarch/wcpcpy-generic.c
> @@ -19,7 +19,7 @@
>  /* We always need to build this implementation as strspn-sse4 needs to
>     be able to fallback to it.  */
>  #include <isa-level.h>
> -#if ISA_SHOULD_BUILD (3)
> +#if ISA_SHOULD_BUILD (2)
>  
>  # define WCPCPY __wcpcpy_generic
>  # include <wcsmbs/wcpcpy.c>
> diff --git a/sysdeps/x86_64/multiarch/wcpncpy-avx2.S b/sysdeps/x86_64/multiarch/wcpncpy-avx2.S
> new file mode 100644
> index 0000000000..b7e594f7b7
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/wcpncpy-avx2.S
> @@ -0,0 +1,8 @@
> +#ifndef WCPNCPY
> +# define WCPNCPY	__wcpncpy_avx2
> +#endif
> +
> +#define USE_AS_WCSCPY
> +#define USE_AS_STPCPY
> +#define STRNCPY	WCPNCPY
> +#include "strncpy-avx2.S"
> diff --git a/sysdeps/x86_64/multiarch/wcpncpy-generic.c b/sysdeps/x86_64/multiarch/wcpncpy-generic.c
> index de8d34320e..4aab4ecdd2 100644
> --- a/sysdeps/x86_64/multiarch/wcpncpy-generic.c
> +++ b/sysdeps/x86_64/multiarch/wcpncpy-generic.c
> @@ -19,7 +19,7 @@
>  /* We always need to build this implementation as strspn-sse4 needs to
>     be able to fallback to it.  */
>  #include <isa-level.h>
> -#if ISA_SHOULD_BUILD (3)
> +#if ISA_SHOULD_BUILD (2)
>  
>  # define WCPNCPY __wcpncpy_generic
>  # include <wcsmbs/wcpncpy.c>
> diff --git a/sysdeps/x86_64/multiarch/wcscat-avx2.S b/sysdeps/x86_64/multiarch/wcscat-avx2.S
> new file mode 100644
> index 0000000000..a20f23c09d
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/wcscat-avx2.S
> @@ -0,0 +1,10 @@
> +#ifndef WCSCAT
> +# define WCSCAT	__wcscat_avx2
> +#endif
> +
> +#define USE_AS_WCSCPY
> +#define USE_AS_STRCAT
> +
> +#define STRCPY	WCSCAT
> +
> +#include "strcpy-avx2.S"
> diff --git a/sysdeps/x86_64/multiarch/wcscat-generic.c b/sysdeps/x86_64/multiarch/wcscat-generic.c
> index d86b4d5c00..6476f85bbb 100644
> --- a/sysdeps/x86_64/multiarch/wcscat-generic.c
> +++ b/sysdeps/x86_64/multiarch/wcscat-generic.c
> @@ -19,7 +19,7 @@
>  /* We always need to build this implementation as strspn-sse4 needs to
>     be able to fallback to it.  */
>  #include <isa-level.h>
> -#if ISA_SHOULD_BUILD (3)
> +#if ISA_SHOULD_BUILD (2)
>  
>  # define WCSCAT __wcscat_generic
>  # include <wcsmbs/wcscat.c>
> diff --git a/sysdeps/x86_64/multiarch/wcscpy-avx2.S b/sysdeps/x86_64/multiarch/wcscpy-avx2.S
> new file mode 100644
> index 0000000000..6bc509da07
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/wcscpy-avx2.S
> @@ -0,0 +1,7 @@
> +#ifndef WCSCPY
> +# define WCSCPY	__wcscpy_avx2
> +#endif
> +
> +#define USE_AS_WCSCPY
> +#define STRCPY	WCSCPY
> +#include "strcpy-avx2.S"
> diff --git a/sysdeps/x86_64/multiarch/wcscpy-generic.c b/sysdeps/x86_64/multiarch/wcscpy-generic.c
> index 4a1fffae4b..600d606c45 100644
> --- a/sysdeps/x86_64/multiarch/wcscpy-generic.c
> +++ b/sysdeps/x86_64/multiarch/wcscpy-generic.c
> @@ -18,7 +18,7 @@
>  
>  
>  #include <isa-level.h>
> -#if ISA_SHOULD_BUILD (3)
> +#if ISA_SHOULD_BUILD (2)
>  
>  # define WCSCPY  __wcscpy_generic
>  # include <wcsmbs/wcscpy.c>
> diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
> index 9ad77da8ac..e204059873 100644
> --- a/sysdeps/x86_64/multiarch/wcscpy.c
> +++ b/sysdeps/x86_64/multiarch/wcscpy.c
> @@ -28,6 +28,8 @@
>  
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
>  
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> +
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
>  
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
> @@ -44,6 +46,9 @@ IFUNC_SELECTOR (void)
>        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
>  	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
>  	return OPTIMIZE (evex);
> +
> +      if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER, !))
> +	return OPTIMIZE (avx2);
>      }
>  
>    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
> diff --git a/sysdeps/x86_64/multiarch/wcsncat-avx2.S b/sysdeps/x86_64/multiarch/wcsncat-avx2.S
> new file mode 100644
> index 0000000000..a72105b7e9
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/wcsncat-avx2.S
> @@ -0,0 +1,9 @@
> +#ifndef WCSNCAT
> +# define WCSNCAT	__wcsncat_avx2
> +#endif
> +
> +#define USE_AS_WCSCPY
> +#define USE_AS_STRCAT
> +
> +#define STRNCAT	WCSNCAT
> +#include "strncat-avx2.S"
> diff --git a/sysdeps/x86_64/multiarch/wcsncat-generic.c b/sysdeps/x86_64/multiarch/wcsncat-generic.c
> index 4b55cb40bc..9ced02b35e 100644
> --- a/sysdeps/x86_64/multiarch/wcsncat-generic.c
> +++ b/sysdeps/x86_64/multiarch/wcsncat-generic.c
> @@ -19,7 +19,7 @@
>  /* We always need to build this implementation as strspn-sse4 needs to
>     be able to fallback to it.  */
>  #include <isa-level.h>
> -#if ISA_SHOULD_BUILD (3)
> +#if ISA_SHOULD_BUILD (2)
>  
>  # define WCSNCAT __wcsncat_generic
>  # include <wcsmbs/wcsncat.c>
> diff --git a/sysdeps/x86_64/multiarch/wcsncpy-avx2.S b/sysdeps/x86_64/multiarch/wcsncpy-avx2.S
> new file mode 100644
> index 0000000000..3a1a8a372c
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/wcsncpy-avx2.S
> @@ -0,0 +1,7 @@
> +#ifndef WCSNCPY
> +# define WCSNCPY	__wcsncpy_avx2
> +#endif
> +
> +#define USE_AS_WCSCPY
> +#define STRNCPY	WCSNCPY
> +#include "strncpy-avx2.S"
> diff --git a/sysdeps/x86_64/multiarch/wcsncpy-generic.c b/sysdeps/x86_64/multiarch/wcsncpy-generic.c
> index d0e8a86605..693521713b 100644
> --- a/sysdeps/x86_64/multiarch/wcsncpy-generic.c
> +++ b/sysdeps/x86_64/multiarch/wcsncpy-generic.c
> @@ -19,7 +19,7 @@
>  /* We always need to build this implementation as strspn-sse4 needs to
>     be able to fallback to it.  */
>  #include <isa-level.h>
> -#if ISA_SHOULD_BUILD (3)
> +#if ISA_SHOULD_BUILD (2)
>  
>  # define WCSNCPY __wcsncpy_generic
>  # include <wcsmbs/wcsncpy.c>
> diff --git a/sysdeps/x86_64/wcpcpy-generic.c b/sysdeps/x86_64/wcpcpy-generic.c
> index 3ddc98872f..4ab6182cd9 100644
> --- a/sysdeps/x86_64/wcpcpy-generic.c
> +++ b/sysdeps/x86_64/wcpcpy-generic.c
> @@ -24,7 +24,7 @@
>  
>  #include <isa-level.h>
>  
> -#if MINIMUM_X86_ISA_LEVEL <= 3
> +#if MINIMUM_X86_ISA_LEVEL <= 2
>  
>  # include <wcsmbs/wcpcpy.c>
>  
> diff --git a/sysdeps/x86_64/wcpcpy.S b/sysdeps/x86_64/wcpcpy.S
> index 4e4fca71eb..e64af6977f 100644
> --- a/sysdeps/x86_64/wcpcpy.S
> +++ b/sysdeps/x86_64/wcpcpy.S
> @@ -24,11 +24,12 @@
>  
>  #include <isa-level.h>
>  
> -#if MINIMUM_X86_ISA_LEVEL >= 4
> +#if MINIMUM_X86_ISA_LEVEL >= 3
>  
>  # define WCPCPY	__wcpcpy
>  
>  # define DEFAULT_IMPL_V4	"multiarch/wcpcpy-evex.S"
> +# define DEFAULT_IMPL_V3	"multiarch/wcpcpy-avx2.S"
>  /* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
>     should never be used from here.  */
>  # define DEFAULT_IMPL_V1	"ERROR -- Invalid ISA IMPL"
> diff --git a/sysdeps/x86_64/wcpncpy-generic.c b/sysdeps/x86_64/wcpncpy-generic.c
> index 0c76e5614c..18c0377d35 100644
> --- a/sysdeps/x86_64/wcpncpy-generic.c
> +++ b/sysdeps/x86_64/wcpncpy-generic.c
> @@ -24,7 +24,7 @@
>  
>  #include <isa-level.h>
>  
> -#if MINIMUM_X86_ISA_LEVEL <= 3
> +#if MINIMUM_X86_ISA_LEVEL <= 2
>  
>  # include <wcsmbs/wcpncpy.c>
>  
> diff --git a/sysdeps/x86_64/wcpncpy.S b/sysdeps/x86_64/wcpncpy.S
> index b4e531473e..0e0f432fbb 100644
> --- a/sysdeps/x86_64/wcpncpy.S
> +++ b/sysdeps/x86_64/wcpncpy.S
> @@ -24,11 +24,12 @@
>  
>  #include <isa-level.h>
>  
> -#if MINIMUM_X86_ISA_LEVEL >= 4
> +#if MINIMUM_X86_ISA_LEVEL >= 3
>  
>  # define WCPNCPY	__wcpncpy
>  
>  # define DEFAULT_IMPL_V4	"multiarch/wcpncpy-evex.S"
> +# define DEFAULT_IMPL_V3	"multiarch/wcpncpy-avx2.S"
>  /* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
>     should never be used from here.  */
>  # define DEFAULT_IMPL_V1	"ERROR -- Invalid ISA IMPL"
> diff --git a/sysdeps/x86_64/wcscat-generic.c b/sysdeps/x86_64/wcscat-generic.c
> index 512d0e4d43..639ceac523 100644
> --- a/sysdeps/x86_64/wcscat-generic.c
> +++ b/sysdeps/x86_64/wcscat-generic.c
> @@ -24,7 +24,7 @@
>  
>  #include <isa-level.h>
>  
> -#if MINIMUM_X86_ISA_LEVEL <= 3
> +#if MINIMUM_X86_ISA_LEVEL <= 2
>  
>  # include <wcsmbs/wcscat.c>
>  
> diff --git a/sysdeps/x86_64/wcscat.S b/sysdeps/x86_64/wcscat.S
> index ee8360b6e8..06130f58f9 100644
> --- a/sysdeps/x86_64/wcscat.S
> +++ b/sysdeps/x86_64/wcscat.S
> @@ -24,11 +24,12 @@
>  
>  #include <isa-level.h>
>  
> -#if MINIMUM_X86_ISA_LEVEL >= 4
> +#if MINIMUM_X86_ISA_LEVEL >= 3
>  
>  # define WCSCAT	__wcscat
>  
>  # define DEFAULT_IMPL_V4	"multiarch/wcscat-evex.S"
> +# define DEFAULT_IMPL_V3	"multiarch/wcscat-avx2.S"
>  /* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
>     should never be used from here.  */
>  # define DEFAULT_IMPL_V1	"ERROR -- Invalid ISA IMPL"
> diff --git a/sysdeps/x86_64/wcscpy.S b/sysdeps/x86_64/wcscpy.S
> index e403579961..4a859585a6 100644
> --- a/sysdeps/x86_64/wcscpy.S
> +++ b/sysdeps/x86_64/wcscpy.S
> @@ -29,6 +29,7 @@
>  # define WCSCPY	__wcscpy
>  
>  # define DEFAULT_IMPL_V4	"multiarch/wcscpy-evex.S"
> +# define DEFAULT_IMPL_V3	"multiarch/wcscpy-avx2.S"
>  # define DEFAULT_IMPL_V2	"multiarch/wcscpy-ssse3.S"
>  /* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
>     should never be used from here.  */
> diff --git a/sysdeps/x86_64/wcsncat-generic.c b/sysdeps/x86_64/wcsncat-generic.c
> index 86e20d9028..57bdd9b7cf 100644
> --- a/sysdeps/x86_64/wcsncat-generic.c
> +++ b/sysdeps/x86_64/wcsncat-generic.c
> @@ -24,7 +24,7 @@
>  
>  #include <isa-level.h>
>  
> -#if MINIMUM_X86_ISA_LEVEL <= 3
> +#if MINIMUM_X86_ISA_LEVEL <= 2
>  
>  # include <wcsmbs/wcsncat.c>
>  
> diff --git a/sysdeps/x86_64/wcsncat.S b/sysdeps/x86_64/wcsncat.S
> index 090055a1b8..e1d8609651 100644
> --- a/sysdeps/x86_64/wcsncat.S
> +++ b/sysdeps/x86_64/wcsncat.S
> @@ -24,11 +24,12 @@
>  
>  #include <isa-level.h>
>  
> -#if MINIMUM_X86_ISA_LEVEL >= 4
> +#if MINIMUM_X86_ISA_LEVEL >= 3
>  
>  # define WCSNCAT	wcsncat
>  
>  # define DEFAULT_IMPL_V4	"multiarch/wcsncat-evex.S"
> +# define DEFAULT_IMPL_V3	"multiarch/wcsncat-avx2.S"
>  /* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
>     should never be used from here.  */
>  # define DEFAULT_IMPL_V1	"ERROR -- Invalid ISA IMPL"
> diff --git a/sysdeps/x86_64/wcsncpy-generic.c b/sysdeps/x86_64/wcsncpy-generic.c
> index 0f0ee65b65..4dcbd8ac7f 100644
> --- a/sysdeps/x86_64/wcsncpy-generic.c
> +++ b/sysdeps/x86_64/wcsncpy-generic.c
> @@ -24,7 +24,7 @@
>  
>  #include <isa-level.h>
>  
> -#if MINIMUM_X86_ISA_LEVEL <= 3
> +#if MINIMUM_X86_ISA_LEVEL <= 2
>  
>  # include <wcsmbs/wcsncpy.c>
>  
> diff --git a/sysdeps/x86_64/wcsncpy.S b/sysdeps/x86_64/wcsncpy.S
> index 32eaf1163b..f305b5eb9b 100644
> --- a/sysdeps/x86_64/wcsncpy.S
> +++ b/sysdeps/x86_64/wcsncpy.S
> @@ -24,11 +24,12 @@
>  
>  #include <isa-level.h>
>  
> -#if MINIMUM_X86_ISA_LEVEL >= 4
> +#if MINIMUM_X86_ISA_LEVEL >= 3
>  
>  # define WCSNCPY	__wcsncpy
>  
>  # define DEFAULT_IMPL_V4	"multiarch/wcsncpy-evex.S"
> +# define DEFAULT_IMPL_V3	"multiarch/wcsncpy-avx2.S"
>  /* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
>     should never be used from here.  */
>  # define DEFAULT_IMPL_V1	"ERROR -- Invalid ISA IMPL"
> -- 
> 2.34.1
> 

LGTM.

Thanks.

H.J.

  reply	other threads:[~2022-11-09  3:01 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-03  8:53 [PATCH v1 1/4] benchtests: Make str{n}{cat|cpy} benchmarks output json Noah Goldstein
2022-11-03  8:53 ` [PATCH v1 2/4] x86: Optimize and shrink st{r|p}{n}{cat|cpy}-evex functions Noah Goldstein
2022-11-03  8:55   ` Noah Goldstein
2022-11-04 23:04   ` [PATCH v4 1/4] " Noah Goldstein
2022-11-04 23:04     ` [PATCH v4 2/4] x86: Optimize and shrink st{r|p}{n}{cat|cpy}-avx2 functions Noah Goldstein
2022-11-04 23:04     ` [PATCH v4 3/4] x86: Add evex optimized functions for the wchar_t strcpy family Noah Goldstein
2022-11-04 23:04     ` [PATCH v4 4/4] x86: Add avx2 " Noah Goldstein
2022-11-04 23:34     ` [PATCH v4 1/4] x86: Optimize and shrink st{r|p}{n}{cat|cpy}-evex functions H.J. Lu
2022-11-09  1:38   ` [PATCH v5 " Noah Goldstein
2022-11-09  1:38     ` [PATCH v5 2/4] x86: Optimize and shrink st{r|p}{n}{cat|cpy}-avx2 functions Noah Goldstein
2022-11-09  3:00       ` H.J. Lu
2022-11-09  1:38     ` [PATCH v5 3/4] x86: Add evex optimized functions for the wchar_t strcpy family Noah Goldstein
2022-11-09  3:01       ` H.J. Lu
2022-11-09  1:38     ` [PATCH v5 4/4] x86: Add avx2 " Noah Goldstein
2022-11-09  3:01       ` H.J. Lu [this message]
2022-11-09  3:00     ` [PATCH v5 1/4] x86: Optimize and shrink st{r|p}{n}{cat|cpy}-evex functions H.J. Lu
2022-11-03  8:53 ` [PATCH v1 3/4] x86: Optimize and shrink st{r|p}{n}{cat|cpy}-avx2 functions Noah Goldstein
2022-11-03  8:55   ` Noah Goldstein
2022-11-03  8:53 ` [PATCH v1 4/4] x86: Add optimized functions for the wide-character strcpy family Noah Goldstein
2022-11-03  9:06 ` [PATCH v1 1/4] benchtests: Make str{n}{cat|cpy} benchmarks output json Noah Goldstein
2022-11-04  8:20 ` [PATCH v2 " Noah Goldstein
2022-11-04  8:20   ` [PATCH v2 2/4] x86: Optimize and shrink st{r|p}{n}{cat|cpy}-evex functions Noah Goldstein
2022-11-04 16:33     ` H.J. Lu
2022-11-04 20:20       ` Noah Goldstein
2022-11-04  8:20   ` [PATCH v2 3/4] x86: Optimize and shrink st{r|p}{n}{cat|cpy}-avx2 functions Noah Goldstein
2022-11-04 16:45     ` H.J. Lu
2022-11-04 20:21       ` Noah Goldstein
2022-11-04  8:20   ` [PATCH v2 4/4] x86: Add optimized functions for the wide-character strcpy family Noah Goldstein
2022-11-04 16:47     ` H.J. Lu
2022-11-04 20:22       ` Noah Goldstein
2022-11-04 16:26   ` [PATCH v2 1/4] benchtests: Make str{n}{cat|cpy} benchmarks output json H.J. Lu
2022-11-04 20:13 ` [PATCH v3 1/5] " Noah Goldstein
2022-11-04 20:13   ` [PATCH v3 2/5] x86: Optimize and shrink st{r|p}{n}{cat|cpy}-evex functions Noah Goldstein
2022-11-04 21:46     ` H.J. Lu
2022-11-04 22:27       ` Noah Goldstein
2022-11-04 22:47         ` H.J. Lu
2022-11-04 23:06           ` Noah Goldstein
2022-11-04 20:13   ` [PATCH v3 3/5] x86: Optimize and shrink st{r|p}{n}{cat|cpy}-avx2 functions Noah Goldstein
2022-11-04 20:13   ` [PATCH v3 4/5] x86: Add evex optimized functions for the wchar_t strcpy family Noah Goldstein
2022-11-04 20:13   ` [PATCH v3 5/5] x86: Add avx2 " Noah Goldstein
2022-11-04 21:01   ` [PATCH v3 1/5] benchtests: Make str{n}{cat|cpy} benchmarks output json H.J. Lu
2022-11-04 21:24     ` Noah Goldstein

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y2sYGc5QPVMItEVS@gmail.com \
    --to=hjl.tools@gmail.com \
    --cc=carlos@systemhalted.org \
    --cc=goldstein.w.n@gmail.com \
    --cc=libc-alpha@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).