From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 7891) id 94CB5385C301; Tue, 29 Aug 2023 04:33:15 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 94CB5385C301 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1693283595; bh=H9zsUmNXg8Uy3zkLlU75l1yKyjBiSRszU9eesbs33KU=; h=From:To:Subject:Date:From; b=gXgw5JcrMAIQTmh9mvw2wH5ssdyMSPSn7UWEHo8nxKJv1qrGh5yjWHRnHd0Dafdw0 YbGef1XQfwiV2kqptVApWutmiPCFevnF9vEGyPTi4AB7QYWIKiSuw2cvtQY3BszNK0 65DAnP7yoHqkMutp2I9hqtsi/ZndjOUmvxtM45Oo= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Yinyu Cai To: glibc-cvs@sourceware.org Subject: [glibc] LoongArch: Add ifunc support for rawmemchr{aligned, lsx, lasx} X-Act-Checkin: glibc X-Git-Author: dengjianbo X-Git-Refname: refs/heads/master X-Git-Oldrev: 3efa26749e4d28768558330353dc15c6f325ed4e X-Git-Newrev: f8664fe2155eb5ddc22272bac72ab26368735718 Message-Id: <20230829043315.94CB5385C301@sourceware.org> Date: Tue, 29 Aug 2023 04:33:15 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=f8664fe2155eb5ddc22272bac72ab26368735718 commit f8664fe2155eb5ddc22272bac72ab26368735718 Author: dengjianbo Date: Mon Aug 28 10:08:35 2023 +0800 LoongArch: Add ifunc support for rawmemchr{aligned, lsx, lasx} According to glibc rawmemchr microbenchmark, A few cases tested with char '\0' experience performance degradation due to the lasx and lsx versions don't handle the '\0' separately. Overall, rawmemchr-lasx implementation could reduce the runtime about 40%-80%, rawmemchr-lsx implementation could reduce the runtime about 40%-66%, rawmemchr-aligned implementation could reduce the runtime about 20%-40%. Diff: --- sysdeps/loongarch/lp64/multiarch/Makefile | 3 + sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c | 8 ++ sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h | 40 +++++++ .../loongarch/lp64/multiarch/rawmemchr-aligned.S | 124 +++++++++++++++++++++ sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S | 82 ++++++++++++++ sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S | 71 ++++++++++++ sysdeps/loongarch/lp64/multiarch/rawmemchr.c | 37 ++++++ 7 files changed, 365 insertions(+) diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile index 5d7ae7ae73..64416b025a 100644 --- a/sysdeps/loongarch/lp64/multiarch/Makefile +++ b/sysdeps/loongarch/lp64/multiarch/Makefile @@ -21,5 +21,8 @@ sysdep_routines += \ memmove-unaligned \ memmove-lsx \ memmove-lasx \ + rawmemchr-aligned \ + rawmemchr-lsx \ + rawmemchr-lasx \ # sysdep_routines endif diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c index c8ba87bd81..3db9af1460 100644 --- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c @@ -94,5 +94,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned) ) + IFUNC_IMPL (i, name, rawmemchr, +#if !defined __loongarch_soft_float + IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LASX, __rawmemchr_lasx) + IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LSX, __rawmemchr_lsx) +#endif + IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned) + ) + return i; } diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h new file mode 100644 index 0000000000..a7bb4cf9ea --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h @@ -0,0 +1,40 @@ +/* Common definition for rawmemchr ifunc selections. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +#if !defined __loongarch_soft_float +extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; +#endif +extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ +#if !defined __loongarch_soft_float + if (SUPPORT_LASX) + return OPTIMIZE (lasx); + else if (SUPPORT_LSX) + return OPTIMIZE (lsx); + else +#endif + return OPTIMIZE (aligned); +} diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S new file mode 100644 index 0000000000..9c7155ae82 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S @@ -0,0 +1,124 @@ +/* Optimized rawmemchr implementation using basic LoongArch instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include +#include +#include + +#if IS_IN (libc) +# define RAWMEMCHR_NAME __rawmemchr_aligned +#else +# define RAWMEMCHR_NAME __rawmemchr +#endif + +LEAF(RAWMEMCHR_NAME, 6) + andi t1, a0, 0x7 + bstrins.d a0, zero, 2, 0 + lu12i.w a2, 0x01010 + bstrins.d a1, a1, 15, 8 + + ld.d t0, a0, 0 + slli.d t1, t1, 3 + ori a2, a2, 0x101 + bstrins.d a1, a1, 31, 16 + + li.w t8, -1 + bstrins.d a1, a1, 63, 32 + bstrins.d a2, a2, 63, 32 + sll.d t2, t8, t1 + + sll.d t3, a1, t1 + orn t0, t0, t2 + slli.d a3, a2, 7 + beqz a1, L(find_zero) + + xor t0, t0, t3 + sub.d t1, t0, a2 + andn t2, a3, t0 + and t3, t1, t2 + + bnez t3, L(count_pos) + addi.d a0, a0, 8 + +L(loop): + ld.d t0, a0, 0 + xor t0, t0, a1 + + sub.d t1, t0, a2 + andn t2, a3, t0 + and t3, t1, t2 + bnez t3, L(count_pos) + + ld.d t0, a0, 8 + addi.d a0, a0, 16 + xor t0, t0, a1 + sub.d t1, t0, a2 + + andn t2, a3, t0 + and t3, t1, t2 + beqz t3, L(loop) + addi.d a0, a0, -8 +L(count_pos): + ctz.d t0, t3 + srli.d t0, t0, 3 + add.d a0, a0, t0 + jr ra + +L(loop_7bit): + ld.d t0, a0, 0 +L(find_zero): + sub.d t1, t0, a2 + and t2, t1, a3 + bnez t2, L(more_check) + + ld.d t0, a0, 8 + addi.d a0, a0, 16 + sub.d t1, t0, a2 + and t2, t1, a3 + + beqz t2, L(loop_7bit) + addi.d a0, a0, -8 + +L(more_check): + andn t2, a3, t0 + and t3, t1, t2 + bnez t3, L(count_pos) + addi.d a0, a0, 8 + +L(loop_8bit): + ld.d t0, a0, 0 + + sub.d t1, t0, a2 + andn t2, a3, t0 + and t3, t1, t2 + bnez t3, L(count_pos) + + ld.d t0, a0, 8 + addi.d a0, a0, 16 + sub.d t1, t0, a2 + + andn t2, a3, t0 + and t3, t1, t2 + beqz t3, L(loop_8bit) + + addi.d a0, a0, -8 + b L(count_pos) + +END(RAWMEMCHR_NAME) + +libc_hidden_builtin_def (__rawmemchr) diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S new file mode 100644 index 0000000000..be2eb59dbe --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S @@ -0,0 +1,82 @@ +/* Optimized rawmemchr implementation using LoongArch LASX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include +#include +#include + +#if IS_IN (libc) && !defined __loongarch_soft_float + +# define RAWMEMCHR __rawmemchr_lasx + +LEAF(RAWMEMCHR, 6) + move a2, a0 + bstrins.d a0, zero, 5, 0 + xvld xr0, a0, 0 + xvld xr1, a0, 32 + + xvreplgr2vr.b xr2, a1 + xvseq.b xr0, xr0, xr2 + xvseq.b xr1, xr1, xr2 + xvmsknz.b xr0, xr0 + + xvmsknz.b xr1, xr1 + xvpickve.w xr3, xr0, 4 + xvpickve.w xr4, xr1, 4 + vilvl.h vr0, vr3, vr0 + + vilvl.h vr1, vr4, vr1 + vilvl.w vr0, vr1, vr0 + movfr2gr.d t0, fa0 + sra.d t0, t0, a2 + + + beqz t0, L(loop) + ctz.d t0, t0 + add.d a0, a2, t0 + jr ra + +L(loop): + xvld xr0, a0, 64 + xvld xr1, a0, 96 + addi.d a0, a0, 64 + xvseq.b xr0, xr0, xr2 + + xvseq.b xr1, xr1, xr2 + xvmax.bu xr3, xr0, xr1 + xvseteqz.v fcc0, xr3 + bcnez fcc0, L(loop) + + xvmsknz.b xr0, xr0 + xvmsknz.b xr1, xr1 + xvpickve.w xr3, xr0, 4 + xvpickve.w xr4, xr1, 4 + + + vilvl.h vr0, vr3, vr0 + vilvl.h vr1, vr4, vr1 + vilvl.w vr0, vr1, vr0 + movfr2gr.d t0, fa0 + + ctz.d t0, t0 + add.d a0, a0, t0 + jr ra +END(RAWMEMCHR) + +libc_hidden_builtin_def (RAWMEMCHR) +#endif diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S new file mode 100644 index 0000000000..2f6fe024dc --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S @@ -0,0 +1,71 @@ +/* Optimized rawmemchr implementation using LoongArch LSX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include +#include +#include + +#if IS_IN (libc) && !defined __loongarch_soft_float + +# define RAWMEMCHR __rawmemchr_lsx + +LEAF(RAWMEMCHR, 6) + move a2, a0 + bstrins.d a0, zero, 4, 0 + vld vr0, a0, 0 + vld vr1, a0, 16 + + vreplgr2vr.b vr2, a1 + vseq.b vr0, vr0, vr2 + vseq.b vr1, vr1, vr2 + vmsknz.b vr0, vr0 + + vmsknz.b vr1, vr1 + vilvl.h vr0, vr1, vr0 + movfr2gr.s t0, fa0 + sra.w t0, t0, a2 + + beqz t0, L(loop) + ctz.w t0, t0 + add.d a0, a2, t0 + jr ra + + +L(loop): + vld vr0, a0, 32 + vld vr1, a0, 48 + addi.d a0, a0, 32 + vseq.b vr0, vr0, vr2 + + vseq.b vr1, vr1, vr2 + vmax.bu vr3, vr0, vr1 + vseteqz.v fcc0, vr3 + bcnez fcc0, L(loop) + + vmsknz.b vr0, vr0 + vmsknz.b vr1, vr1 + vilvl.h vr0, vr1, vr0 + movfr2gr.s t0, fa0 + + ctz.w t0, t0 + add.d a0, a0, t0 + jr ra +END(RAWMEMCHR) + +libc_hidden_builtin_def (RAWMEMCHR) +#endif diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr.c b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c new file mode 100644 index 0000000000..89c7ffff8f --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c @@ -0,0 +1,37 @@ +/* Multiple versions of rawmemchr. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) +# define rawmemchr __redirect_rawmemchr +# define __rawmemchr __redirect___rawmemchr +# include +# undef rawmemchr +# undef __rawmemchr + +# define SYMBOL_NAME rawmemchr +# include "ifunc-rawmemchr.h" + +libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr, + IFUNC_SELECTOR ()); +weak_alias (__rawmemchr, rawmemchr) +# ifdef SHARED +__hidden_ver1 (__rawmemchr, __GI___rawmemchr, __redirect___rawmemchr) + __attribute__((visibility ("hidden"))); +# endif +#endif