public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Noah Goldstein <goldstein.w.n@gmail.com>
To: libc-alpha@sourceware.org
Subject: [PATCH v1 3/5] x86: Add wcsrchr optimized with SSE4_1 in wcsrchr-sse4_1.S
Date: Wed, 20 Apr 2022 22:14:09 -0500	[thread overview]
Message-ID: <20220421031410.2142238-3-goldstein.w.n@gmail.com> (raw)
In-Reply-To: <20220421031410.2142238-1-goldstein.w.n@gmail.com>

wcsrchr-sse2 can't use `pminud` which can speedup the main loop:

len, align, pos, seek,   max_char, freq, New Time / Old Time
256,     1,  64,   23,       1273,    1,               1.082
256,     1,  64,   23, 2147483647,    1,               1.076
256,    15,  64,   23,       1273,    1,               1.061
256,    15,  64,   23, 2147483647,    1,               1.075
256,     2,  64,   23,       1273,    1,               1.108
256,     2,  64,   23, 2147483647,    1,               1.109
256,    30,  64,   23,       1273,    1,               1.072
256,    30,  64,   23, 2147483647,    1,               1.077
256,     3,  64,   23,       1273,    1,               1.108
256,     3,  64,   23, 2147483647,    1,               1.103
256,    45,  64,   23,       1273,    1,               1.076
256,    45,  64,   23, 2147483647,    1,               1.079
256,     4,  64,   23,       1273,    1,               1.119
256,     4,  64,   23, 2147483647,    1,               1.112
256,    60,  64,   23,       1273,    1,               1.117
256,    60,  64,   23, 2147483647,    1,               1.112
256,     5,  64,   23,       1273,    1,                1.21
256,     5,  64,   23, 2147483647,    1,               1.194
256,    75,  64,   23,       1273,    1,               1.055
256,    75,  64,   23, 2147483647,    1,               1.045
256,     6,  64,   23,       1273,    1,               1.264
256,     6,  64,   23, 2147483647,    1,                 1.3
256,    90,  64,   23,       1273,    1,               1.022
256,    90,  64,   23, 2147483647,    1,               1.026
256,     7,  64,   23,       1273,    1,               1.316
256,     7,  64,   23, 2147483647,    1,               1.325

Overall this leads to a 5% performance improvement in the benchmark
suite.

Full xcheck passes on x86_64 with and without multiarch enabled.
---
 sysdeps/x86_64/multiarch/Makefile          |  1 +
 sysdeps/x86_64/multiarch/ifunc-impl-list.c |  3 +++
 sysdeps/x86_64/multiarch/wcsrchr-sse4_1.S  | 21 +++++++++++++++++++++
 sysdeps/x86_64/multiarch/wcsrchr.c         |  3 ++-
 4 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 sysdeps/x86_64/multiarch/wcsrchr-sse4_1.S

diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 0400ea332b..5ad7bc8c25 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -154,6 +154,7 @@ sysdep_routines += \
   wcsrchr-avx2-rtm \
   wcsrchr-evex \
   wcsrchr-sse2 \
+  wcsrchr-sse4_1 \
   wmemchr-avx2 \
   wmemchr-avx2-rtm \
   wmemchr-evex \
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index a8afcf81bb..1cbb6938c8 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -685,6 +685,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			       && CPU_FEATURE_USABLE (AVX512BW)
 			       && CPU_FEATURE_USABLE (BMI2)),
 			      __wcsrchr_evex)
+	      IFUNC_IMPL_ADD (array, i, wcsrchr,
+			      CPU_FEATURE_USABLE (SSE4_1),
+			      __wcsrchr_sse4_1)
 	      IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2))
 
   /* Support sysdeps/x86_64/multiarch/wcscmp.c.  */
diff --git a/sysdeps/x86_64/multiarch/wcsrchr-sse4_1.S b/sysdeps/x86_64/multiarch/wcsrchr-sse4_1.S
new file mode 100644
index 0000000000..34b92d28eb
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsrchr-sse4_1.S
@@ -0,0 +1,21 @@
+/* wcsrchr optimized with SSE4.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define USE_AS_WCSRCHR	1
+#define STRRCHR	__wcsrchr_sse4_1
+#include "../strrchr.S"
diff --git a/sysdeps/x86_64/multiarch/wcsrchr.c b/sysdeps/x86_64/multiarch/wcsrchr.c
index 8b30c06f2e..eb18038eec 100644
--- a/sysdeps/x86_64/multiarch/wcsrchr.c
+++ b/sysdeps/x86_64/multiarch/wcsrchr.c
@@ -23,7 +23,8 @@
 # undef wcsrchr
 
 # define SYMBOL_NAME wcsrchr
-# include "ifunc-avx2.h"
+
+# include "ifunc-wcslen.h"
 
 libc_ifunc_redirected (__redirect_wcsrchr, wcsrchr, IFUNC_SELECTOR ());
 #endif
-- 
2.25.1


  parent reply	other threads:[~2022-04-21  3:14 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-21  3:14 [PATCH v1 1/5] benchtests: Improve bench-strrchr Noah Goldstein
2022-04-21  3:14 ` [PATCH v1 2/5] x86: Optimize {str|wcs}rchr-sse2 Noah Goldstein
2022-04-21 20:26   ` H.J. Lu
2022-04-21 20:57     ` Noah Goldstein
2022-04-21 21:48       ` H.J. Lu
2022-04-21 22:23         ` Noah Goldstein
2022-04-21  3:14 ` Noah Goldstein [this message]
2022-04-21  3:14 ` [PATCH v1 4/5] x86: Optimize {str|wcs}rchr-avx2 Noah Goldstein
2022-04-21  3:14 ` [PATCH v1 5/5] x86: Optimize {str|wcs}rchr-evex Noah Goldstein
2022-04-21 20:12 ` [PATCH v1 1/5] benchtests: Improve bench-strrchr H.J. Lu
2022-04-21 22:07   ` Noah Goldstein
2022-04-21 23:49     ` H.J. Lu
2022-04-22  1:11       ` Noah Goldstein
2022-04-21 22:22 ` [PATCH v2 1/4] " Noah Goldstein
2022-04-21 22:22   ` [PATCH v2 2/4] x86: Optimize {str|wcs}rchr-sse2 Noah Goldstein
2022-04-21 23:46     ` H.J. Lu
2022-04-22  1:54       ` Noah Goldstein
2022-04-21 22:22   ` [PATCH v2 3/4] x86: Optimize {str|wcs}rchr-avx2 Noah Goldstein
2022-04-21 22:22   ` [PATCH v2 4/4] x86: Optimize {str|wcs}rchr-evex Noah Goldstein
2022-04-21 23:59     ` H.J. Lu
2022-04-22  1:53       ` Noah Goldstein
2022-04-22  1:52 ` [PATCH v3 1/4] benchtests: Improve bench-strrchr Noah Goldstein
2022-04-22  1:52   ` [PATCH v3 2/4] x86: Optimize {str|wcs}rchr-sse2 Noah Goldstein
2022-04-22 19:06     ` H.J. Lu
2022-05-12 20:13       ` Sunil Pandey
2022-04-22  1:52   ` [PATCH v3 3/4] x86: Optimize {str|wcs}rchr-avx2 Noah Goldstein
2022-04-22 19:03     ` H.J. Lu
2022-05-12 20:14       ` Sunil Pandey
2022-07-20 15:33         ` Noah Goldstein
2022-04-22  1:52   ` [PATCH v3 4/4] x86: Optimize {str|wcs}rchr-evex Noah Goldstein
2022-04-22 19:04     ` H.J. Lu
2022-05-12 20:16       ` Sunil Pandey
2022-04-22 18:29   ` [PATCH v3 1/4] benchtests: Improve bench-strrchr H.J. Lu
2022-04-22 19:12     ` Noah Goldstein
2022-04-22 19:11 ` [PATCH v4 " Noah Goldstein
2022-04-23  1:53   ` H.J. Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220421031410.2142238-3-goldstein.w.n@gmail.com \
    --to=goldstein.w.n@gmail.com \
    --cc=libc-alpha@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).