public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Szabolcs Nagy <Szabolcs.Nagy@arm.com>
To: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
Cc: 'GNU C Library' <libc-alpha@sourceware.org>
Subject: Re: [PATCH] AArch64: Improve strlen_asimd
Date: Fri, 13 Jan 2023 12:25:35 +0000	[thread overview]
Message-ID: <Y8FNvxktEcu8WVwc@arm.com> (raw)
In-Reply-To: <PAWPR08MB8982BE5A8C1635DBCB89700D83FD9@PAWPR08MB8982.eurprd08.prod.outlook.com>

The 01/12/2023 15:51, Wilco Dijkstra wrote:
> Use shrn for the mask, merge tst+bne into cbnz, and tweak code alignment.
> Performance improves slightly as a result.  Passes regress.
> 

I prefer to commit this and the other string function optimization
patches and not delay to next release so start using and widely
testing them sooner (we can fix and backport perf regressions).

please commit it, thanks.

Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>


> ---
> 
> diff --git a/sysdeps/aarch64/multiarch/strlen_asimd.S b/sysdeps/aarch64/multiarch/strlen_asimd.S
> index ca6ab96ecf2de45def79539facd8e0b86f4edc95..490439491d19c3f14b0228f42248bc8aa6e9e8bd 100644
> --- a/sysdeps/aarch64/multiarch/strlen_asimd.S
> +++ b/sysdeps/aarch64/multiarch/strlen_asimd.S
> @@ -48,6 +48,7 @@
>  #define tmp    x2
>  #define tmpw   w2
>  #define synd   x3
> +#define syndw  w3
>  #define shift  x4
> 
>  /* For the first 32 bytes, NUL detection works on the principle that
> @@ -87,7 +88,6 @@
> 
>  ENTRY (__strlen_asimd)
>         PTR_ARG (0)
> -
>         and     tmp1, srcin, MIN_PAGE_SIZE - 1
>         cmp     tmp1, MIN_PAGE_SIZE - 32
>         b.hi    L(page_cross)
> @@ -123,7 +123,6 @@ ENTRY (__strlen_asimd)
>         add     len, len, tmp1, lsr 3
>         ret
> 
> -       .p2align 3
>         /* Look for a NUL byte at offset 16..31 in the string.  */
>  L(bytes16_31):
>         ldp     data1, data2, [srcin, 16]
> @@ -151,6 +150,7 @@ L(bytes16_31):
>         add     len, len, tmp1, lsr 3
>         ret
> 
> +       nop
>  L(loop_entry):
>         bic     src, srcin, 31
> 
> @@ -166,18 +166,12 @@ L(loop):
>         /* Low 32 bits of synd are non-zero if a NUL was found in datav1.  */
>         cmeq    maskv.16b, datav1.16b, 0
>         sub     len, src, srcin
> -       tst     synd, 0xffffffff
> -       b.ne    1f
> +       cbnz    syndw, 1f
>         cmeq    maskv.16b, datav2.16b, 0
>         add     len, len, 16
>  1:
>         /* Generate a bitmask and compute correct byte offset.  */
> -#ifdef __AARCH64EB__
> -       bic     maskv.8h, 0xf0
> -#else
> -       bic     maskv.8h, 0x0f, lsl 8
> -#endif
> -       umaxp   maskv.16b, maskv.16b, maskv.16b
> +       shrn    maskv.8b, maskv.8h, 4
>         fmov    synd, maskd
>  #ifndef __AARCH64EB__
>         rbit    synd, synd
> @@ -186,8 +180,6 @@ L(loop):
>         add     len, len, tmp, lsr 2
>         ret
> 
> -        .p2align 4
> -
>  L(page_cross):
>         bic     src, srcin, 31
>         mov     tmpw, 0x0c03

  reply	other threads:[~2023-01-13 12:26 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-12 15:51 Wilco Dijkstra
2023-01-13 12:25 ` Szabolcs Nagy [this message]
2023-01-16 22:16   ` Carlos O'Donell
2023-01-17 16:37     ` Wilco Dijkstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y8FNvxktEcu8WVwc@arm.com \
    --to=szabolcs.nagy@arm.com \
    --cc=Wilco.Dijkstra@arm.com \
    --cc=libc-alpha@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).