From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1791) id 7BBF138493CA; Fri, 13 Jan 2023 20:03:35 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 7BBF138493CA DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1673640215; bh=SL6vGWQdZ75BCY6JWFea9wGHwqVC/R0K9dSHtT/esVU=; h=From:To:Subject:Date:From; b=lCLFcqMqsgVDemmUlFtXkbGzb57WVqq8j3DPr+0XYNIS3nnKnl7aXdL0sW8SGRW1f DVoIQGHpV5mrZhah0izmma8cUHkUx00BeULZ/X44tRQ2GQSynz5iFSTb1bFfb+pdws ZVuAw95NhRly4YBaNbvXJGrByvS3Bj2dbgZ2aCXE= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Adhemerval Zanella To: glibc-cvs@sourceware.org Subject: [glibc/azanella/generic-strings] hppa: Add string-fzb.h and string-fzi.h X-Act-Checkin: glibc X-Git-Author: Richard Henderson X-Git-Refname: refs/heads/azanella/generic-strings X-Git-Oldrev: 363e8fb14cf06c35706bf03945ffcdcbdf8e84e6 X-Git-Newrev: bd1edea1835fb2ae0d9925019818b8bc05517209 Message-Id: <20230113200335.7BBF138493CA@sourceware.org> Date: Fri, 13 Jan 2023 20:03:35 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=bd1edea1835fb2ae0d9925019818b8bc05517209 commit bd1edea1835fb2ae0d9925019818b8bc05517209 Author: Richard Henderson Date: Tue Jan 10 18:01:02 2023 -0300 hppa: Add string-fzb.h and string-fzi.h Use UXOR,SBZ to test for a zero byte within a word. While we can get semi-decent code out of asm-goto, we would do slightly better with a compiler builtin. For index_zero et al, sequential testing of bytes is less expensive than any tricks that involve a count-leading-zeros insn that we don't have. Checked on hppa-linux-gnu. Diff: --- sysdeps/hppa/string-fzb.h | 70 ++++++++++++++++++++++++++ sysdeps/hppa/string-fzc.h | 124 ++++++++++++++++++++++++++++++++++++++++++++++ sysdeps/hppa/string-fzi.h | 63 +++++++++++++++++++++++ 3 files changed, 257 insertions(+) diff --git a/sysdeps/hppa/string-fzb.h b/sysdeps/hppa/string-fzb.h new file mode 100644 index 0000000000..865e548492 --- /dev/null +++ b/sysdeps/hppa/string-fzb.h @@ -0,0 +1,70 @@ +/* Zero byte detection, boolean. HPPA version. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _STRING_FZB_H +#define _STRING_FZB_H 1 + +#include +#include + +/* Determine if any byte within X is zero. This is a pure boolean test. */ + +static __always_inline _Bool +has_zero (op_t x) +{ + _Static_assert (sizeof (op_t) == 4, "64-bit not supported"); + + /* It's more useful to expose a control transfer to the compiler + than to expose a proper boolean result. */ + asm goto ("uxor,sbz %%r0,%0,%%r0\n\t" + "b,n %l1" : : "r"(x) : : nbz); + return 1; + nbz: + return 0; +} + +/* Likewise, but for byte equality between X1 and X2. */ + +static __always_inline _Bool +has_eq (op_t x1, op_t x2) +{ + _Static_assert (sizeof (op_t) == 4, "64-bit not supported"); + + asm goto ("uxor,sbz %0,%1,%%r0\n\t" + "b,n %l2" : : "r"(x1), "r"(x2) : : nbz); + return 1; + nbz: + return 0; +} + +/* Likewise, but for zeros in X1 and equal bytes between X1 and X2. */ + +static __always_inline _Bool +has_zero_eq (op_t x1, op_t x2) +{ + _Static_assert (sizeof (op_t) == 4, "64-bit not supported"); + + asm goto ("uxor,sbz %%r0,%0,%%r0\n\t" + "uxor,nbz %0,%1,%%r0\n\t" + "b,n %l2" : : "r"(x1), "r"(x2) : : sbz); + return 0; + sbz: + return 1; +} + +#endif /* _STRING_FZB_H */ diff --git a/sysdeps/hppa/string-fzc.h b/sysdeps/hppa/string-fzc.h new file mode 100644 index 0000000000..8ff3416b41 --- /dev/null +++ b/sysdeps/hppa/string-fzc.h @@ -0,0 +1,124 @@ +/* string-fzc.h -- zero byte detection with indexes. HPPA version. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _STRING_FZC_H +#define _STRING_FZC_H 1 + +#include + +_Static_assert (sizeof (op_t) == 4, "64-bit not supported"); + +/* Given a word X that is known to contain a zero byte, return the + index of the first such within the long in memory order. */ +static __always_inline unsigned int +index_first_zero (op_t x) +{ + unsigned int ret; + + /* Since we have no clz insn, direct tests of the bytes is faster + than loading up the constants to do the masking. */ + asm ("extrw,u,<> %1,23,8,%%r0\n\t" + "ldi 2,%0\n\t" + "extrw,u,<> %1,15,8,%%r0\n\t" + "ldi 1,%0\n\t" + "extrw,u,<> %1,7,8,%%r0\n\t" + "ldi 0,%0" + : "=r"(ret) : "r"(x), "0"(3)); + + return ret; +} + +/* Similarly, but perform the search for byte equality between X1 and X2. */ +static __always_inline unsigned int +index_first_eq (op_t x1, op_t x2) +{ + return index_first_zero (x1 ^ x2); +} + +/* Similarly, but perform the search for zero within X1 or + equality between X1 and X2. */ +static __always_inline unsigned int +index_first_zero_eq (op_t x1, op_t x2) +{ + unsigned int ret; + + /* Since we have no clz insn, direct tests of the bytes is faster + than loading up the constants to do the masking. */ + asm ("extrw,u,= %1,23,8,%%r0\n\t" + "extrw,u,<> %2,23,8,%%r0\n\t" + "ldi 2,%0\n\t" + "extrw,u,= %1,15,8,%%r0\n\t" + "extrw,u,<> %2,15,8,%%r0\n\t" + "ldi 1,%0\n\t" + "extrw,u,= %1,7,8,%%r0\n\t" + "extrw,u,<> %2,7,8,%%r0\n\t" + "ldi 0,%0" + : "=r"(ret) : "r"(x1), "r"(x1 ^ x2), "0"(3)); + + return ret; +} + +/* Similarly, but perform the search for zero within X1 or + inequality between X1 and X2. */ +static __always_inline unsigned int +index_first_zero_ne (op_t x1, op_t x2) +{ + unsigned int ret; + + /* Since we have no clz insn, direct tests of the bytes is faster + than loading up the constants to do the masking. */ + asm ("extrw,u,<> %2,23,8,%%r0\n\t" + "extrw,u,<> %1,23,8,%%r0\n\t" + "ldi 2,%0\n\t" + "extrw,u,<> %2,15,8,%%r0\n\t" + "extrw,u,<> %1,15,8,%%r0\n\t" + "ldi 1,%0\n\t" + "extrw,u,<> %2,7,8,%%r0\n\t" + "extrw,u,<> %1,7,8,%%r0\n\t" + "ldi 0,%0" + : "=r"(ret) : "r"(x1), "r"(x1 ^ x2), "0"(3)); + + return ret; +} + +/* Similarly, but search for the last zero within X. */ +static __always_inline unsigned int +index_last_zero (op_t x) +{ + unsigned int ret; + + /* Since we have no ctz insn, direct tests of the bytes is faster + than loading up the constants to do the masking. */ + asm ("extrw,u,<> %1,15,8,%%r0\n\t" + "ldi 1,%0\n\t" + "extrw,u,<> %1,23,8,%%r0\n\t" + "ldi 2,%0\n\t" + "extrw,u,<> %1,31,8,%%r0\n\t" + "ldi 3,%0" + : "=r"(ret) : "r"(x), "0"(0)); + + return ret; +} + +static __always_inline unsigned int +index_last_eq (op_t x1, op_t x2) +{ + return index_last_zero (x1 ^ x2); +} + +#endif /* _STRING_FZC_H */ diff --git a/sysdeps/hppa/string-fzi.h b/sysdeps/hppa/string-fzi.h new file mode 100644 index 0000000000..b560fa1a89 --- /dev/null +++ b/sysdeps/hppa/string-fzi.h @@ -0,0 +1,63 @@ +/* string-fzi.h -- zero byte indexes. HPPA version. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _STRING_FZI_H +#define _STRING_FZI_H 1 + +#include +#include + +_Static_assert (sizeof (op_t) == 4, "64-bit not supported"); + +static __always_inline unsigned int +index_first (find_t c) +{ + unsigned int ret; + + /* Since we have no clz insn, direct tests of the bytes is faster + than loading up the constants to do the masking. */ + asm ("extrw,u,= %1,23,8,%%r0\n\t" + "ldi 2,%0\n\t" + "extrw,u,= %1,15,8,%%r0\n\t" + "ldi 1,%0\n\t" + "extrw,u,= %1,7,8,%%r0\n\t" + "ldi 0,%0" + : "=r"(ret) : "r"(c), "0"(3)); + + return ret; +} + +static __always_inline unsigned int +index_last (find_t c) +{ + unsigned int ret; + + /* Since we have no ctz insn, direct tests of the bytes is faster + than loading up the constants to do the masking. */ + asm ("extrw,u,= %1,15,8,%%r0\n\t" + "ldi 1,%0\n\t" + "extrw,u,= %1,23,8,%%r0\n\t" + "ldi 2,%0\n\t" + "extrw,u,= %1,31,8,%%r0\n\t" + "ldi 3,%0" + : "=r"(ret) : "r"(c), "0"(0)); + + return ret; +} + +#endif /* _STRING_FZI_H */