From: Sunil Pandey <skpgkp2@gmail.com>
To: Noah Goldstein <goldstein.w.n@gmail.com>
Cc: libc-alpha@sourceware.org
Subject: Re: [PATCH v10 2/6] x86: Update memrchr to use new VEC macros
Date: Fri, 14 Oct 2022 20:44:56 -0700 [thread overview]
Message-ID: <CAMAf5_c42vvv42RaOO7Nbdy+-p9+OrK88xR_HMea5O6oOTvjOA@mail.gmail.com> (raw)
In-Reply-To: <20221015030030.204172-2-goldstein.w.n@gmail.com>
On Fri, Oct 14, 2022 at 8:01 PM Noah Goldstein via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> Replace %VEC(n) -> %VMM(n)
>
> This commit does not change libc.so
>
> Tested build on x86-64
> ---
> sysdeps/x86_64/multiarch/memrchr-evex.S | 42 ++++++++++++-------------
> 1 file changed, 21 insertions(+), 21 deletions(-)
>
> diff --git a/sysdeps/x86_64/multiarch/memrchr-evex.S b/sysdeps/x86_64/multiarch/memrchr-evex.S
> index ea3a0a0a60..550b328c5a 100644
> --- a/sysdeps/x86_64/multiarch/memrchr-evex.S
> +++ b/sysdeps/x86_64/multiarch/memrchr-evex.S
> @@ -21,7 +21,7 @@
> #if ISA_SHOULD_BUILD (4)
>
> # include <sysdep.h>
> -# include "evex256-vecs.h"
> +# include "x86-evex256-vecs.h"
> # if VEC_SIZE != 32
> # error "VEC_SIZE != 32 unimplemented"
> # endif
> @@ -31,7 +31,7 @@
> # endif
>
> # define PAGE_SIZE 4096
> -# define VECMATCH VEC(0)
> +# define VMMMATCH VMM(0)
>
> .section SECTION(.text), "ax", @progbits
> ENTRY_P2ALIGN(MEMRCHR, 6)
> @@ -47,7 +47,7 @@ ENTRY_P2ALIGN(MEMRCHR, 6)
> correct page cross check and 2) it correctly sets up end ptr to be
> subtract by lzcnt aligned. */
> leaq -1(%rdi, %rdx), %rax
> - vpbroadcastb %esi, %VECMATCH
> + vpbroadcastb %esi, %VMMMATCH
>
> /* Check if we can load 1x VEC without cross a page. */
> testl $(PAGE_SIZE - VEC_SIZE), %eax
> @@ -55,7 +55,7 @@ ENTRY_P2ALIGN(MEMRCHR, 6)
>
> /* Don't use rax for pointer here because EVEX has better encoding with
> offset % VEC_SIZE == 0. */
> - vpcmpb $0, -(VEC_SIZE)(%rdi, %rdx), %VECMATCH, %k0
> + vpcmpb $0, -(VEC_SIZE)(%rdi, %rdx), %VMMMATCH, %k0
> kmovd %k0, %ecx
>
> /* Fall through for rdx (len) <= VEC_SIZE (expect small sizes). */
> @@ -96,7 +96,7 @@ L(more_1x_vec):
> movq %rax, %rdx
>
> /* Need no matter what. */
> - vpcmpb $0, -(VEC_SIZE)(%rax), %VECMATCH, %k0
> + vpcmpb $0, -(VEC_SIZE)(%rax), %VMMMATCH, %k0
> kmovd %k0, %ecx
>
> subq %rdi, %rdx
> @@ -115,7 +115,7 @@ L(last_2x_vec):
>
> /* Don't use rax for pointer here because EVEX has better encoding with
> offset % VEC_SIZE == 0. */
> - vpcmpb $0, -(VEC_SIZE * 2)(%rdi, %rdx), %VECMATCH, %k0
> + vpcmpb $0, -(VEC_SIZE * 2)(%rdi, %rdx), %VMMMATCH, %k0
> kmovd %k0, %ecx
> /* NB: 64-bit lzcnt. This will naturally add 32 to position. */
> lzcntq %rcx, %rcx
> @@ -131,7 +131,7 @@ L(last_2x_vec):
> L(page_cross):
> movq %rax, %rsi
> andq $-VEC_SIZE, %rsi
> - vpcmpb $0, (%rsi), %VECMATCH, %k0
> + vpcmpb $0, (%rsi), %VMMMATCH, %k0
> kmovd %k0, %r8d
> /* Shift out negative alignment (because we are starting from endptr and
> working backwards). */
> @@ -165,13 +165,13 @@ L(more_2x_vec):
> testl %ecx, %ecx
> jnz L(ret_vec_x0_dec)
>
> - vpcmpb $0, -(VEC_SIZE * 2)(%rax), %VECMATCH, %k0
> + vpcmpb $0, -(VEC_SIZE * 2)(%rax), %VMMMATCH, %k0
> kmovd %k0, %ecx
> testl %ecx, %ecx
> jnz L(ret_vec_x1)
>
> /* Need no matter what. */
> - vpcmpb $0, -(VEC_SIZE * 3)(%rax), %VECMATCH, %k0
> + vpcmpb $0, -(VEC_SIZE * 3)(%rax), %VMMMATCH, %k0
> kmovd %k0, %ecx
>
> subq $(VEC_SIZE * 4), %rdx
> @@ -185,7 +185,7 @@ L(last_vec):
>
>
> /* Need no matter what. */
> - vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VECMATCH, %k0
> + vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VMMMATCH, %k0
> kmovd %k0, %ecx
> lzcntl %ecx, %ecx
> subq $(VEC_SIZE * 3 + 1), %rax
> @@ -220,7 +220,7 @@ L(more_4x_vec):
> testl %ecx, %ecx
> jnz L(ret_vec_x2)
>
> - vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VECMATCH, %k0
> + vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VMMMATCH, %k0
> kmovd %k0, %ecx
>
> testl %ecx, %ecx
> @@ -243,17 +243,17 @@ L(more_4x_vec):
> L(loop_4x_vec):
> /* Store 1 were not-equals and 0 where equals in k1 (used to mask later
> on). */
> - vpcmpb $4, (VEC_SIZE * 3)(%rax), %VECMATCH, %k1
> + vpcmpb $4, (VEC_SIZE * 3)(%rax), %VMMMATCH, %k1
>
> /* VEC(2/3) will have zero-byte where we found a CHAR. */
> - vpxorq (VEC_SIZE * 2)(%rax), %VECMATCH, %VEC(2)
> - vpxorq (VEC_SIZE * 1)(%rax), %VECMATCH, %VEC(3)
> - vpcmpb $0, (VEC_SIZE * 0)(%rax), %VECMATCH, %k4
> + vpxorq (VEC_SIZE * 2)(%rax), %VMMMATCH, %VMM(2)
> + vpxorq (VEC_SIZE * 1)(%rax), %VMMMATCH, %VMM(3)
> + vpcmpb $0, (VEC_SIZE * 0)(%rax), %VMMMATCH, %k4
>
> /* Combine VEC(2/3) with min and maskz with k1 (k1 has zero bit where
> CHAR is found and VEC(2/3) have zero-byte where CHAR is found. */
> - vpminub %VEC(2), %VEC(3), %VEC(3){%k1}{z}
> - vptestnmb %VEC(3), %VEC(3), %k2
> + vpminub %VMM(2), %VMM(3), %VMM(3){%k1}{z}
> + vptestnmb %VMM(3), %VMM(3), %k2
>
> /* Any 1s and we found CHAR. */
> kortestd %k2, %k4
> @@ -270,7 +270,7 @@ L(loop_4x_vec):
> L(last_4x_vec):
>
> /* Used no matter what. */
> - vpcmpb $0, (VEC_SIZE * -1)(%rax), %VECMATCH, %k0
> + vpcmpb $0, (VEC_SIZE * -1)(%rax), %VMMMATCH, %k0
> kmovd %k0, %ecx
>
> cmpl $(VEC_SIZE * 2), %edx
> @@ -280,14 +280,14 @@ L(last_4x_vec):
> jnz L(ret_vec_x0_dec)
>
>
> - vpcmpb $0, (VEC_SIZE * -2)(%rax), %VECMATCH, %k0
> + vpcmpb $0, (VEC_SIZE * -2)(%rax), %VMMMATCH, %k0
> kmovd %k0, %ecx
>
> testl %ecx, %ecx
> jnz L(ret_vec_x1)
>
> /* Used no matter what. */
> - vpcmpb $0, (VEC_SIZE * -3)(%rax), %VECMATCH, %k0
> + vpcmpb $0, (VEC_SIZE * -3)(%rax), %VMMMATCH, %k0
> kmovd %k0, %ecx
>
> cmpl $(VEC_SIZE * 3), %edx
> @@ -309,7 +309,7 @@ L(loop_end):
> testl %ecx, %ecx
> jnz L(ret_vec_x0_end)
>
> - vptestnmb %VEC(2), %VEC(2), %k0
> + vptestnmb %VMM(2), %VMM(2), %k0
> kmovd %k0, %ecx
> testl %ecx, %ecx
> jnz L(ret_vec_x1_end)
> --
> 2.34.1
>
LGTM
--Sunil
next prev parent reply other threads:[~2022-10-15 3:45 UTC|newest]
Thread overview: 72+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-14 16:40 [PATCH v1 1/3] x86: Update evex256/512 vec macros Noah Goldstein
2022-10-14 16:40 ` [PATCH v1 2/3] x86: Add macros for GPRs / mask insn based on VEC_SIZE Noah Goldstein
2022-10-14 18:02 ` H.J. Lu
2022-10-14 18:26 ` Noah Goldstein
2022-10-14 18:35 ` H.J. Lu
2022-10-14 18:38 ` Noah Goldstein
2022-10-14 18:53 ` H.J. Lu
2022-10-14 19:00 ` Noah Goldstein
2022-10-14 19:13 ` H.J. Lu
2022-10-14 19:15 ` Noah Goldstein
2022-10-14 16:40 ` [PATCH v1 3/3] x86: Update strlen-evex-base to use new reg/vec macros Noah Goldstein
2022-10-14 17:31 ` [PATCH v1 1/3] x86: Update evex256/512 vec macros H.J. Lu
2022-10-14 18:01 ` [PATCH v2 " Noah Goldstein
2022-10-14 18:01 ` [PATCH v2 2/3] x86: Add macros for GPRs / mask insn based on VEC_SIZE Noah Goldstein
2022-10-14 18:01 ` [PATCH v2 3/3] x86: Update strlen-evex-base to use new reg/vec macros Noah Goldstein
2022-10-14 18:22 ` [PATCH v3 1/3] x86: Update evex256/512 vec macros Noah Goldstein
2022-10-14 18:22 ` [PATCH v3 2/3] x86: Add macros for GPRs / mask insn based on VEC_SIZE Noah Goldstein
2022-10-14 18:22 ` [PATCH v3 3/3] x86: Update strlen-evex-base to use new reg/vec macros Noah Goldstein
2022-10-14 18:41 ` [PATCH v4 1/3] x86: Update evex256/512 vec macros Noah Goldstein
2022-10-14 18:41 ` [PATCH v4 2/3] x86: Add macros for GPRs / mask insn based on VEC_SIZE Noah Goldstein
2022-10-14 18:41 ` [PATCH v4 3/3] x86: Update strlen-evex-base to use new reg/vec macros Noah Goldstein
2022-10-14 21:14 ` [PATCH v5 1/3] x86: Update evex256/512 vec macros Noah Goldstein
2022-10-14 21:15 ` [PATCH v5 2/3] x86: Add macros for GPRs / mask insn based on VEC_SIZE Noah Goldstein
2022-10-14 21:28 ` H.J. Lu
2022-10-14 22:01 ` Noah Goldstein
2022-10-14 22:05 ` H.J. Lu
2022-10-14 22:27 ` Noah Goldstein
2022-10-14 22:41 ` H.J. Lu
2022-10-14 23:15 ` Noah Goldstein
2022-10-14 23:22 ` H.J. Lu
2022-10-14 23:25 ` Noah Goldstein
2022-10-14 21:15 ` [PATCH v5 3/3] x86: Update strlen-evex-base to use new reg/vec macros Noah Goldstein
2022-10-14 22:39 ` [PATCH v6 1/7] x86: Update and move evex256/512 vec macros Noah Goldstein
2022-10-14 22:39 ` [PATCH v6 2/7] x86: Add macros for GPRs / mask insn based on VEC_SIZE Noah Goldstein
2022-10-14 22:39 ` [PATCH v6 3/7] x86: Update memrchr to use new VEC macros Noah Goldstein
2022-10-14 22:39 ` [PATCH v6 4/7] x86: Remove now unused vec header macros Noah Goldstein
2022-10-14 22:39 ` [PATCH v6 5/7] x86: Update memmove to use new VEC macros Noah Goldstein
2022-10-14 22:39 ` [PATCH v6 6/7] x86: Update memset " Noah Goldstein
2022-10-14 22:39 ` [PATCH v6 7/7] x86: Update strlen-evex-base to use new reg/vec macros Noah Goldstein
2022-10-15 0:06 ` [PATCH v8 1/6] x86: Update VEC macros to complete API for evex/evex512 impls Noah Goldstein
2022-10-15 0:06 ` [PATCH v8 2/6] x86: Update memrchr to use new VEC macros Noah Goldstein
2022-10-15 0:06 ` [PATCH v8 3/6] x86: Update memmove " Noah Goldstein
2022-10-15 0:06 ` [PATCH v8 4/6] x86: Update memset " Noah Goldstein
2022-10-15 0:06 ` [PATCH v8 5/6] x86: Remove now unused vec header macros Noah Goldstein
2022-10-15 0:06 ` [PATCH v8 6/6] x86: Update strlen-evex-base to use new reg/vec macros Noah Goldstein
2022-10-15 0:12 ` [PATCH v8 1/6] x86: Update VEC macros to complete API for evex/evex512 impls H.J. Lu
2022-10-15 0:20 ` Noah Goldstein
2022-10-15 0:20 ` [PATCH v9 " Noah Goldstein
2022-10-15 0:20 ` [PATCH v9 2/6] x86: Update memrchr to use new VEC macros Noah Goldstein
2022-10-15 2:48 ` H.J. Lu
2022-10-15 0:20 ` [PATCH v9 3/6] x86: Update memmove " Noah Goldstein
2022-10-15 2:52 ` H.J. Lu
2022-10-15 2:57 ` Noah Goldstein
2022-10-15 0:20 ` [PATCH v9 4/6] x86: Update memset " Noah Goldstein
2022-10-15 2:53 ` H.J. Lu
2022-10-15 0:20 ` [PATCH v9 5/6] x86: Remove now unused vec header macros Noah Goldstein
2022-10-15 2:56 ` H.J. Lu
2022-10-15 0:21 ` [PATCH v9 6/6] x86: Update strlen-evex-base to use new reg/vec macros Noah Goldstein
2022-10-15 2:58 ` H.J. Lu
2022-10-15 2:45 ` [PATCH v9 1/6] x86: Update VEC macros to complete API for evex/evex512 impls H.J. Lu
2022-10-15 3:00 ` [PATCH v10 " Noah Goldstein
2022-10-15 3:00 ` [PATCH v10 2/6] x86: Update memrchr to use new VEC macros Noah Goldstein
2022-10-15 3:44 ` Sunil Pandey [this message]
2022-10-15 3:00 ` [PATCH v10 3/6] x86: Update memmove " Noah Goldstein
2022-10-15 3:43 ` Sunil Pandey
2022-10-15 3:00 ` [PATCH v10 4/6] x86: Update memset " Noah Goldstein
2022-10-15 3:42 ` Sunil Pandey
2022-10-15 3:00 ` [PATCH v10 5/6] x86: Remove now unused vec header macros Noah Goldstein
2022-10-15 3:39 ` Sunil Pandey
2022-10-15 3:00 ` [PATCH v10 6/6] x86: Update strlen-evex-base to use new reg/vec macros Noah Goldstein
2022-10-15 3:48 ` Sunil Pandey
2022-10-15 3:37 ` [PATCH v10 1/6] x86: Update VEC macros to complete API for evex/evex512 impls Sunil Pandey
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAMAf5_c42vvv42RaOO7Nbdy+-p9+OrK88xR_HMea5O6oOTvjOA@mail.gmail.com \
--to=skpgkp2@gmail.com \
--cc=goldstein.w.n@gmail.com \
--cc=libc-alpha@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).