* [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2
@ 2022-10-20 2:13 Noah Goldstein
2022-10-20 2:13 ` [PATCH v1 2/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse2 Noah Goldstein
` (3 more replies)
0 siblings, 4 replies; 8+ messages in thread
From: Noah Goldstein @ 2022-10-20 2:13 UTC (permalink / raw)
To: libc-alpha; +Cc: goldstein.w.n, hjl.tools, carlos
`testb` saves a bit of code size is the imm-operand can be encoded
1-bytes.
Tested on x86-64.
---
sysdeps/x86_64/multiarch/strcmp-avx2.S | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
index 4c01d664e8..a24f886a27 100644
--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
+++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
@@ -214,7 +214,7 @@ STRCMP:
# else
mov (%LOCALE_REG), %RAX_LP
# endif
- testl $1, LOCALE_DATA_VALUES + _NL_CTYPE_NONASCII_CASE * SIZEOF_VALUES(%rax)
+ testb $1, LOCALE_DATA_VALUES + _NL_CTYPE_NONASCII_CASE * SIZEOF_VALUES(%rax)
jne STRCASECMP_L_NONASCII
leaq _nl_C_LC_CTYPE_tolower + 128 * 4(%rip), TOLOWER_BASE
# endif
--
2.34.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v1 2/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse2
2022-10-20 2:13 [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 Noah Goldstein
@ 2022-10-20 2:13 ` Noah Goldstein
2022-10-20 17:03 ` H.J. Lu
2022-10-20 2:13 ` [PATCH v1 3/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse42 Noah Goldstein
` (2 subsequent siblings)
3 siblings, 1 reply; 8+ messages in thread
From: Noah Goldstein @ 2022-10-20 2:13 UTC (permalink / raw)
To: libc-alpha; +Cc: goldstein.w.n, hjl.tools, carlos
`testb` saves a bit of code size is the imm-operand can be encoded
1-bytes.
Tested on x86-64.
---
sysdeps/x86_64/multiarch/strcmp-sse2.S | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2.S b/sysdeps/x86_64/multiarch/strcmp-sse2.S
index 3c69fc1df1..a975c924d8 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse2.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse2.S
@@ -104,7 +104,7 @@ ENTRY (STRCMP)
# else
mov (%rdx), %RAX_LP
# endif
- testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
+ testb $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
jne __strcasecmp_l_nonascii
# elif defined USE_AS_STRNCASECMP_L
/* We have to fall back on the C implementation for locales
@@ -114,7 +114,7 @@ ENTRY (STRCMP)
# else
mov (%rcx), %RAX_LP
# endif
- testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
+ testb $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
jne __strncasecmp_l_nonascii
# endif
--
2.34.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v1 3/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse42
2022-10-20 2:13 [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 Noah Goldstein
2022-10-20 2:13 ` [PATCH v1 2/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse2 Noah Goldstein
@ 2022-10-20 2:13 ` Noah Goldstein
2022-10-20 17:03 ` H.J. Lu
2022-10-20 2:13 ` [PATCH v1 4/4] x86: Use `testb` for FSRM check in memmove-vec-unaligned-erms Noah Goldstein
2022-10-20 17:02 ` [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 H.J. Lu
3 siblings, 1 reply; 8+ messages in thread
From: Noah Goldstein @ 2022-10-20 2:13 UTC (permalink / raw)
To: libc-alpha; +Cc: goldstein.w.n, hjl.tools, carlos
`testb` saves a bit of code size is the imm-operand can be encoded
1-bytes.
Tested on x86-64.
---
sysdeps/x86_64/multiarch/strcmp-sse4_2.S | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
index dc6fc90e14..6b540ff894 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
@@ -119,7 +119,7 @@ STRCMP:
# else
mov (%rdx), %RAX_LP
# endif
- testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
+ testb $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
jne __strcasecmp_l_nonascii
# endif
# ifdef USE_AS_STRNCASECMP_L
@@ -130,7 +130,7 @@ STRCMP:
# else
mov (%rcx), %RAX_LP
# endif
- testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
+ testb $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
jne __strncasecmp_l_nonascii
# endif
--
2.34.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v1 4/4] x86: Use `testb` for FSRM check in memmove-vec-unaligned-erms
2022-10-20 2:13 [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 Noah Goldstein
2022-10-20 2:13 ` [PATCH v1 2/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse2 Noah Goldstein
2022-10-20 2:13 ` [PATCH v1 3/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse42 Noah Goldstein
@ 2022-10-20 2:13 ` Noah Goldstein
2022-10-20 17:03 ` H.J. Lu
2022-10-20 17:02 ` [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 H.J. Lu
3 siblings, 1 reply; 8+ messages in thread
From: Noah Goldstein @ 2022-10-20 2:13 UTC (permalink / raw)
To: libc-alpha; +Cc: goldstein.w.n, hjl.tools, carlos
`testb` saves a bit of code size is the imm-operand can be encoded
1-bytes.
Tested on x86-64.
---
sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index 5b758cae5e..f6d7f3e88b 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -616,7 +616,11 @@ L(movsb):
jae L(large_memcpy_2x_check)
# if AVOID_SHORT_DISTANCE_REP_MOVSB || ALIGN_MOVSB
/* Only avoid short movsb if CPU has FSRM. */
+# if X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB < 256
+ testb $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip)
+# else
testl $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip)
+# endif
jz L(skip_short_movsb_check)
# if AVOID_SHORT_DISTANCE_REP_MOVSB
/* Avoid "rep movsb" if RCX, the distance between source and
--
2.34.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2
2022-10-20 2:13 [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 Noah Goldstein
` (2 preceding siblings ...)
2022-10-20 2:13 ` [PATCH v1 4/4] x86: Use `testb` for FSRM check in memmove-vec-unaligned-erms Noah Goldstein
@ 2022-10-20 17:02 ` H.J. Lu
3 siblings, 0 replies; 8+ messages in thread
From: H.J. Lu @ 2022-10-20 17:02 UTC (permalink / raw)
To: Noah Goldstein; +Cc: libc-alpha, carlos
On Wed, Oct 19, 2022 at 7:14 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> `testb` saves a bit of code size is the imm-operand can be encoded
> 1-bytes.
>
> Tested on x86-64.
> ---
> sysdeps/x86_64/multiarch/strcmp-avx2.S | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
> index 4c01d664e8..a24f886a27 100644
> --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
> +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
> @@ -214,7 +214,7 @@ STRCMP:
> # else
> mov (%LOCALE_REG), %RAX_LP
> # endif
> - testl $1, LOCALE_DATA_VALUES + _NL_CTYPE_NONASCII_CASE * SIZEOF_VALUES(%rax)
> + testb $1, LOCALE_DATA_VALUES + _NL_CTYPE_NONASCII_CASE * SIZEOF_VALUES(%rax)
> jne STRCASECMP_L_NONASCII
> leaq _nl_C_LC_CTYPE_tolower + 128 * 4(%rip), TOLOWER_BASE
> # endif
> --
> 2.34.1
>
LGTM.
Thanks.
--
H.J.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v1 2/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse2
2022-10-20 2:13 ` [PATCH v1 2/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse2 Noah Goldstein
@ 2022-10-20 17:03 ` H.J. Lu
0 siblings, 0 replies; 8+ messages in thread
From: H.J. Lu @ 2022-10-20 17:03 UTC (permalink / raw)
To: Noah Goldstein; +Cc: libc-alpha, carlos
On Wed, Oct 19, 2022 at 7:14 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> `testb` saves a bit of code size is the imm-operand can be encoded
> 1-bytes.
>
> Tested on x86-64.
> ---
> sysdeps/x86_64/multiarch/strcmp-sse2.S | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2.S b/sysdeps/x86_64/multiarch/strcmp-sse2.S
> index 3c69fc1df1..a975c924d8 100644
> --- a/sysdeps/x86_64/multiarch/strcmp-sse2.S
> +++ b/sysdeps/x86_64/multiarch/strcmp-sse2.S
> @@ -104,7 +104,7 @@ ENTRY (STRCMP)
> # else
> mov (%rdx), %RAX_LP
> # endif
> - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
> + testb $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
> jne __strcasecmp_l_nonascii
> # elif defined USE_AS_STRNCASECMP_L
> /* We have to fall back on the C implementation for locales
> @@ -114,7 +114,7 @@ ENTRY (STRCMP)
> # else
> mov (%rcx), %RAX_LP
> # endif
> - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
> + testb $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
> jne __strncasecmp_l_nonascii
> # endif
>
> --
> 2.34.1
>
LGTM.
Thanks.
--
H.J.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v1 3/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse42
2022-10-20 2:13 ` [PATCH v1 3/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse42 Noah Goldstein
@ 2022-10-20 17:03 ` H.J. Lu
0 siblings, 0 replies; 8+ messages in thread
From: H.J. Lu @ 2022-10-20 17:03 UTC (permalink / raw)
To: Noah Goldstein; +Cc: libc-alpha, carlos
On Wed, Oct 19, 2022 at 7:14 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> `testb` saves a bit of code size is the imm-operand can be encoded
> 1-bytes.
>
> Tested on x86-64.
> ---
> sysdeps/x86_64/multiarch/strcmp-sse4_2.S | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
> index dc6fc90e14..6b540ff894 100644
> --- a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
> +++ b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
> @@ -119,7 +119,7 @@ STRCMP:
> # else
> mov (%rdx), %RAX_LP
> # endif
> - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
> + testb $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
> jne __strcasecmp_l_nonascii
> # endif
> # ifdef USE_AS_STRNCASECMP_L
> @@ -130,7 +130,7 @@ STRCMP:
> # else
> mov (%rcx), %RAX_LP
> # endif
> - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
> + testb $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
> jne __strncasecmp_l_nonascii
> # endif
>
> --
> 2.34.1
>
LGTM.
Thanks.
--
H.J.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v1 4/4] x86: Use `testb` for FSRM check in memmove-vec-unaligned-erms
2022-10-20 2:13 ` [PATCH v1 4/4] x86: Use `testb` for FSRM check in memmove-vec-unaligned-erms Noah Goldstein
@ 2022-10-20 17:03 ` H.J. Lu
0 siblings, 0 replies; 8+ messages in thread
From: H.J. Lu @ 2022-10-20 17:03 UTC (permalink / raw)
To: Noah Goldstein; +Cc: libc-alpha, carlos
On Wed, Oct 19, 2022 at 7:14 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> `testb` saves a bit of code size is the imm-operand can be encoded
> 1-bytes.
>
> Tested on x86-64.
> ---
> sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
> index 5b758cae5e..f6d7f3e88b 100644
> --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
> +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
> @@ -616,7 +616,11 @@ L(movsb):
> jae L(large_memcpy_2x_check)
> # if AVOID_SHORT_DISTANCE_REP_MOVSB || ALIGN_MOVSB
> /* Only avoid short movsb if CPU has FSRM. */
> +# if X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB < 256
> + testb $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip)
> +# else
> testl $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip)
> +# endif
> jz L(skip_short_movsb_check)
> # if AVOID_SHORT_DISTANCE_REP_MOVSB
> /* Avoid "rep movsb" if RCX, the distance between source and
> --
> 2.34.1
>
LGTM.
Thanks.
--
H.J.
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2022-10-20 17:04 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-20 2:13 [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 Noah Goldstein
2022-10-20 2:13 ` [PATCH v1 2/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse2 Noah Goldstein
2022-10-20 17:03 ` H.J. Lu
2022-10-20 2:13 ` [PATCH v1 3/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse42 Noah Goldstein
2022-10-20 17:03 ` H.J. Lu
2022-10-20 2:13 ` [PATCH v1 4/4] x86: Use `testb` for FSRM check in memmove-vec-unaligned-erms Noah Goldstein
2022-10-20 17:03 ` H.J. Lu
2022-10-20 17:02 ` [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 H.J. Lu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).