From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1039) id 4583B393BC2F; Fri, 28 Jan 2022 02:21:31 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 4583B393BC2F Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: H.J. Lu To: glibc-cvs@sourceware.org Subject: [glibc/release/2.29/master] x86-64: Add wcslen optimize for sse4.1 X-Act-Checkin: glibc X-Git-Author: Noah Goldstein X-Git-Refname: refs/heads/release/2.29/master X-Git-Oldrev: ccf4e0edde3eb1411cb306dfcc4d0af4fdb867b7 X-Git-Newrev: 25999674499fc0b38a9c7f97a0f58f3c1df7fb2d Message-Id: <20220128022131.4583B393BC2F@sourceware.org> Date: Fri, 28 Jan 2022 02:21:31 +0000 (GMT) X-BeenThere: glibc-cvs@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Glibc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 28 Jan 2022 02:21:31 -0000 https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=25999674499fc0b38a9c7f97a0f58f3c1df7fb2d commit 25999674499fc0b38a9c7f97a0f58f3c1df7fb2d Author: Noah Goldstein Date: Wed Jun 23 01:19:34 2021 -0400 x86-64: Add wcslen optimize for sse4.1 No bug. This comment adds the ifunc / build infrastructure necessary for wcslen to prefer the sse4.1 implementation in strlen-vec.S. test-wcslen.c is passing. Signed-off-by: Noah Goldstein Reviewed-by: H.J. Lu (cherry picked from commit 6f573a27b6c8b4236445810a44660612323f5a73) Diff: --- sysdeps/x86_64/multiarch/Makefile | 4 +-- sysdeps/x86_64/multiarch/ifunc-impl-list.c | 3 ++ sysdeps/x86_64/multiarch/ifunc-wcslen.h | 52 ++++++++++++++++++++++++++++++ sysdeps/x86_64/multiarch/wcslen-sse4_1.S | 4 +++ sysdeps/x86_64/multiarch/wcslen.c | 2 +- sysdeps/x86_64/multiarch/wcsnlen.c | 34 +------------------ 6 files changed, 63 insertions(+), 36 deletions(-) diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index cf73790380..da1446d731 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -97,8 +97,8 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \ wcscpy-ssse3 wcscpy-c \ wcschr-sse2 wcschr-avx2 \ wcsrchr-sse2 wcsrchr-avx2 \ - wcsnlen-sse4_1 wcsnlen-c \ - wcslen-sse2 wcslen-avx2 wcsnlen-avx2 \ + wcslen-sse2 wcslen-sse4_1 wcslen-avx2 \ + wcsnlen-c wcsnlen-sse4_1 wcsnlen-avx2 \ wcschr-avx2-rtm \ wcscmp-avx2-rtm \ wcslen-avx2-rtm \ diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 9ef118a146..e57fb42af3 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -657,6 +657,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, && HAS_ARCH_FEATURE (AVX512BW_Usable) && HAS_CPU_FEATURE (BMI2)), __wcslen_evex) + IFUNC_IMPL_ADD (array, i, wcsnlen, + CPU_FEATURE_USABLE (SSE4_1), + __wcsnlen_sse4_1) IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2)) /* Support sysdeps/x86_64/multiarch/wcsnlen.c. */ diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h new file mode 100644 index 0000000000..564cc8cbec --- /dev/null +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h @@ -0,0 +1,52 @@ +/* Common definition for ifunc selections for wcslen and wcsnlen + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017-2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + && CPU_FEATURES_CPU_P (cpu_features, BMI2) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + return OPTIMIZE (evex); + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2); + } + + if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1)) + return OPTIMIZE (sse4_1); + + return OPTIMIZE (sse2); +} diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S new file mode 100644 index 0000000000..7e62621afc --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S @@ -0,0 +1,4 @@ +#define AS_WCSLEN +#define strlen __wcslen_sse4_1 + +#include "strlen-vec.S" diff --git a/sysdeps/x86_64/multiarch/wcslen.c b/sysdeps/x86_64/multiarch/wcslen.c index c23ce457d2..13070fdcf4 100644 --- a/sysdeps/x86_64/multiarch/wcslen.c +++ b/sysdeps/x86_64/multiarch/wcslen.c @@ -24,7 +24,7 @@ # undef __wcslen # define SYMBOL_NAME wcslen -# include "ifunc-avx2.h" +# include "ifunc-wcslen.h" libc_ifunc_redirected (__redirect_wcslen, __wcslen, IFUNC_SELECTOR ()); weak_alias (__wcslen, wcslen); diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c index 9f7983f2a5..f1b6bc87b4 100644 --- a/sysdeps/x86_64/multiarch/wcsnlen.c +++ b/sysdeps/x86_64/multiarch/wcsnlen.c @@ -24,39 +24,7 @@ # undef __wcsnlen # define SYMBOL_NAME wcsnlen -# include - -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; - -static inline void * -IFUNC_SELECTOR (void) -{ - const struct cpu_features* cpu_features = __get_cpu_features (); - - if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - { - if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) - && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable) - && CPU_FEATURES_CPU_P (cpu_features, BMI2)) - return OPTIMIZE (evex); - - if (CPU_FEATURES_CPU_P (cpu_features, RTM)) - return OPTIMIZE (avx2_rtm); - - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) - return OPTIMIZE (avx2); - } - - if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1)) - return OPTIMIZE (sse4_1); - - return OPTIMIZE (sse2); -} +# include "ifunc-wcslen.h" libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ()); weak_alias (__wcsnlen, wcsnlen);