From: dengjianbo <dengjianbo@loongson.cn>
To: libc-alpha@sourceware.org
Cc: adhemerval.zanella@linaro.org, xry111@xry111.site,
caiyinyu@loongson.cn, xuchenghua@loongson.cn,
huangpei@loongson.cn, dengjianbo <dengjianbo@loongson.cn>
Subject: [PATCH 1/3] Loongarch: Add ifunc support for strnlen{aligned, lsx, lasx}
Date: Tue, 22 Aug 2023 10:11:16 +0800 [thread overview]
Message-ID: <20230822021118.3489949-2-dengjianbo@loongson.cn> (raw)
In-Reply-To: <20230822021118.3489949-1-dengjianbo@loongson.cn>
Based on the glibc microbenchmark, strnlen-aligned implementation could
reduce the runtime more than 10%, strnlen-lsx implementation could reduce
the runtime about 50%-78%, strnlen-lasx implementation could reduce the
runtime about 50%-88%.
---
sysdeps/loongarch/lp64/multiarch/Makefile | 3 +
.../lp64/multiarch/ifunc-impl-list.c | 8 ++
.../loongarch/lp64/multiarch/ifunc-strnlen.h | 41 +++++++
.../lp64/multiarch/strnlen-aligned.S | 102 ++++++++++++++++++
.../loongarch/lp64/multiarch/strnlen-lasx.S | 100 +++++++++++++++++
.../loongarch/lp64/multiarch/strnlen-lsx.S | 89 +++++++++++++++
sysdeps/loongarch/lp64/multiarch/strnlen.c | 39 +++++++
7 files changed, 382 insertions(+)
create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen.c
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
index afa5104174..c4dd3143d1 100644
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -3,6 +3,9 @@ sysdep_routines += \
strlen-aligned \
strlen-lsx \
strlen-lasx \
+ strnlen-aligned \
+ strnlen-lsx \
+ strnlen-lasx \
strchr-aligned \
strchr-lsx \
strchr-lasx \
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
index 25eb96b061..7cec0b7724 100644
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
@@ -38,6 +38,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
)
+ IFUNC_IMPL (i, name, strnlen,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LASX, __strnlen_lasx)
+ IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LSX, __strnlen_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_aligned)
+ )
+
IFUNC_IMPL (i, name, strchr,
#if !defined __loongarch_soft_float
IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx)
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
new file mode 100644
index 0000000000..5cf8981021
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
@@ -0,0 +1,41 @@
+/* Common definition for strnlen ifunc selections.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ return OPTIMIZE (lasx);
+ else if (SUPPORT_LSX)
+ return OPTIMIZE (lsx);
+ else
+#endif
+ return OPTIMIZE (aligned);
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
new file mode 100644
index 0000000000..b900430a5d
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
@@ -0,0 +1,102 @@
+/* Optimized strnlen implementation using basic Loongarch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRNLEN __strnlen_aligned
+#else
+# define STRNLEN __strnlen
+#endif
+
+LEAF(STRNLEN, 6)
+ beqz a1, L(out)
+ lu12i.w a2, 0x01010
+ andi t1, a0, 0x7
+ move t4, a0
+
+ bstrins.d a0, zero, 2, 0
+ ori a2, a2, 0x101
+ li.w t0, -1
+ ld.d t2, a0, 0
+
+ slli.d t3, t1, 3
+ bstrins.d a2, a2, 63, 32
+ li.w t5, 8
+ slli.d a3, a2, 7
+
+ sub.w t1, t5, t1
+ sll.d t0, t0, t3
+ orn t2, t2, t0
+ sub.d t0, t2, a2
+
+
+ andn t3, a3, t2
+ and t0, t0, t3
+ bnez t0, L(count_pos)
+ sub.d t5, a1, t1
+
+ bgeu t1, a1, L(out)
+ addi.d a0, a0, 8
+L(loop):
+ ld.d t2, a0, 0
+ sub.d t0, t2, a2
+
+ andn t1, a3, t2
+ sltui t6, t5, 9
+ and t0, t0, t1
+ or t7, t0, t6
+
+ bnez t7, L(count_pos)
+ ld.d t2, a0, 8
+ addi.d a0, a0, 16
+ sub.d t0, t2, a2
+
+
+ andn t1, a3, t2
+ sltui t6, t5, 17
+ and t0, t0, t1
+ addi.d t5, t5, -16
+
+ or t7, t0, t6
+ beqz t7, L(loop)
+ addi.d a0, a0, -8
+L(count_pos):
+ ctz.d t1, t0
+
+ sub.d a0, a0, t4
+ srli.d t1, t1, 3
+ add.d a0, t1, a0
+ sltu t0, a0, a1
+
+ masknez t1, a1, t0
+ maskeqz a0, a0, t0
+ or a0, a0, t1
+ jr ra
+
+
+L(out):
+ move a0, a1
+ jr ra
+END(STRNLEN)
+
+weak_alias (STRNLEN, strnlen)
+libc_hidden_builtin_def (STRNLEN)
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
new file mode 100644
index 0000000000..2c03d3d9b4
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
@@ -0,0 +1,100 @@
+/* Optimized strnlen implementation using loongarch LASX instructions
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STRNLEN __strnlen_lasx
+
+LEAF(STRNLEN, 6)
+ beqz a1, L(ret0)
+ andi t1, a0, 0x3f
+ li.d t3, 65
+ sub.d a2, a0, t1
+
+ xvld xr0, a2, 0
+ xvld xr1, a2, 32
+ sub.d t1, t3, t1
+ move a3, a0
+
+ sltu t1, a1, t1
+ xvmsknz.b xr0, xr0
+ xvmsknz.b xr1, xr1
+ xvpickve.w xr2, xr0, 4
+
+ xvpickve.w xr3, xr1, 4
+ vilvl.h vr0, vr2, vr0
+ vilvl.h vr1, vr3, vr1
+ vilvl.w vr0, vr1, vr0
+
+
+ movfr2gr.d t0, fa0
+ sra.d t0, t0, a0
+ orn t1, t1, t0
+ bnez t1, L(end)
+
+ add.d a4, a0, a1
+ move a0, a2
+ addi.d a4, a4, -1
+ bstrins.d a4, zero, 5, 0
+
+L(loop):
+ xvld xr0, a0, 64
+ xvld xr1, a0, 96
+ addi.d a0, a0, 64
+ beq a0, a4, L(out)
+
+ xvmin.bu xr2, xr0, xr1
+ xvsetanyeqz.b fcc0, xr2
+ bceqz fcc0, L(loop)
+L(out):
+ xvmsknz.b xr0, xr0
+
+
+ xvmsknz.b xr1, xr1
+ xvpickve.w xr2, xr0, 4
+ xvpickve.w xr3, xr1, 4
+ vilvl.h vr0, vr2, vr0
+
+ vilvl.h vr1, vr3, vr1
+ vilvl.w vr0, vr1, vr0
+ movfr2gr.d t0, fa0
+L(end):
+ sub.d a0, a0, a3
+
+ cto.d t0, t0
+ add.d a0, a0, t0
+ sltu t1, a0, a1
+ masknez t0, a1, t1
+
+ maskeqz t1, a0, t1
+ or a0, t0, t1
+ jr ra
+L(ret0):
+ move a0, zero
+
+
+ jr ra
+END(STRNLEN)
+
+libc_hidden_def (STRNLEN)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
new file mode 100644
index 0000000000..b769a89584
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
@@ -0,0 +1,89 @@
+/* Optimized strnlen implementation using loongarch LSX instructions
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STRNLEN __strnlen_lsx
+
+LEAF(STRNLEN, 6)
+ beqz a1, L(ret0)
+ andi t1, a0, 0x1f
+ li.d t3, 33
+ sub.d a2, a0, t1
+
+ vld vr0, a2, 0
+ vld vr1, a2, 16
+ sub.d t1, t3, t1
+ move a3, a0
+
+ sltu t1, a1, t1
+ vmsknz.b vr0, vr0
+ vmsknz.b vr1, vr1
+ vilvl.h vr0, vr1, vr0
+
+ movfr2gr.s t0, fa0
+ sra.w t0, t0, a0
+ orn t1, t1, t0
+ bnez t1, L(end)
+
+
+ add.d a4, a0, a1
+ move a0, a2
+ addi.d a4, a4, -1
+ bstrins.d a4, zero, 4, 0
+
+L(loop):
+ vld vr0, a0, 32
+ vld vr1, a0, 48
+ addi.d a0, a0, 32
+ beq a0, a4, L(out)
+
+ vmin.bu vr2, vr0, vr1
+ vsetanyeqz.b fcc0, vr2
+ bceqz fcc0, L(loop)
+L(out):
+ vmsknz.b vr0, vr0
+
+ vmsknz.b vr1, vr1
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+L(end):
+ sub.d a0, a0, a3
+
+
+ cto.w t0, t0
+ add.d a0, a0, t0
+ sltu t1, a0, a1
+ masknez t0, a1, t1
+
+ maskeqz t1, a0, t1
+ or a0, t0, t1
+ jr ra
+L(ret0):
+ move a0, zero
+
+ jr ra
+END(STRNLEN)
+
+libc_hidden_builtin_def (STRNLEN)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen.c b/sysdeps/loongarch/lp64/multiarch/strnlen.c
new file mode 100644
index 0000000000..38b7a25a7a
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strnlen.c
@@ -0,0 +1,39 @@
+/* Multiple versions of strnlen.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strnlen __redirect_strnlen
+# define __strnlen __redirect___strnlen
+# include <string.h>
+# undef __strnlen
+# undef strnlen
+
+# define SYMBOL_NAME strnlen
+# include "ifunc-strnlen.h"
+
+libc_ifunc_redirected (__redirect_strnlen, __strnlen, IFUNC_SELECTOR ());
+weak_alias (__strnlen, strnlen);
+# ifdef SHARED
+__hidden_ver1 (__strnlen, __GI___strnlen, __redirect___strnlen)
+ __attribute__((visibility ("hidden"))) __attribute_copy__ (strnlen);
+__hidden_ver1 (strnlen, __GI_strnlen, __redirect_strnlen)
+ __attribute__((weak, visibility ("hidden"))) __attribute_copy__ (strnlen);
+# endif
+#endif
--
2.40.0
next prev parent reply other threads:[~2023-08-22 2:11 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-22 2:11 [PATCH 0/3] Add ifunc support for str{nlen, cmp, ncmp} dengjianbo
2023-08-22 2:11 ` dengjianbo [this message]
2023-08-22 2:11 ` [PATCH 2/3] Loongarch: Add ifunc support for strcmp{aligned, lsx} dengjianbo
2023-08-22 2:11 ` [PATCH 3/3] Loongarch: Add ifunc support for strncmp{aligned, lsx} dengjianbo
2023-08-22 3:56 ` Richard Henderson
2023-08-22 6:37 ` dengjianbo
2023-08-22 11:13 ` Xi Ruoyao
2023-08-22 11:23 ` Xi Ruoyao
2023-08-23 7:25 ` dengjianbo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230822021118.3489949-2-dengjianbo@loongson.cn \
--to=dengjianbo@loongson.cn \
--cc=adhemerval.zanella@linaro.org \
--cc=caiyinyu@loongson.cn \
--cc=huangpei@loongson.cn \
--cc=libc-alpha@sourceware.org \
--cc=xry111@xry111.site \
--cc=xuchenghua@loongson.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).