public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2
@ 2022-10-20  2:13 Noah Goldstein
  2022-10-20  2:13 ` [PATCH v1 2/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse2 Noah Goldstein
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Noah Goldstein @ 2022-10-20  2:13 UTC (permalink / raw)
  To: libc-alpha; +Cc: goldstein.w.n, hjl.tools, carlos

`testb` saves a bit of code size is the imm-operand can be encoded
1-bytes.

Tested on x86-64.
---
 sysdeps/x86_64/multiarch/strcmp-avx2.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
index 4c01d664e8..a24f886a27 100644
--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
+++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
@@ -214,7 +214,7 @@ STRCMP:
 #  else
 	mov	(%LOCALE_REG), %RAX_LP
 #  endif
-	testl	$1, LOCALE_DATA_VALUES + _NL_CTYPE_NONASCII_CASE * SIZEOF_VALUES(%rax)
+	testb	$1, LOCALE_DATA_VALUES + _NL_CTYPE_NONASCII_CASE * SIZEOF_VALUES(%rax)
 	jne	STRCASECMP_L_NONASCII
 	leaq	_nl_C_LC_CTYPE_tolower + 128 * 4(%rip), TOLOWER_BASE
 # endif
-- 
2.34.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v1 2/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse2
  2022-10-20  2:13 [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 Noah Goldstein
@ 2022-10-20  2:13 ` Noah Goldstein
  2022-10-20 17:03   ` H.J. Lu
  2022-10-20  2:13 ` [PATCH v1 3/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse42 Noah Goldstein
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: Noah Goldstein @ 2022-10-20  2:13 UTC (permalink / raw)
  To: libc-alpha; +Cc: goldstein.w.n, hjl.tools, carlos

`testb` saves a bit of code size is the imm-operand can be encoded
1-bytes.

Tested on x86-64.
---
 sysdeps/x86_64/multiarch/strcmp-sse2.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2.S b/sysdeps/x86_64/multiarch/strcmp-sse2.S
index 3c69fc1df1..a975c924d8 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse2.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse2.S
@@ -104,7 +104,7 @@ ENTRY (STRCMP)
 #  else
 	mov	(%rdx), %RAX_LP
 #  endif
-	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
+	testb	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
 	jne	__strcasecmp_l_nonascii
 # elif defined USE_AS_STRNCASECMP_L
 	/* We have to fall back on the C implementation for locales
@@ -114,7 +114,7 @@ ENTRY (STRCMP)
 #  else
 	mov	(%rcx), %RAX_LP
 #  endif
-	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
+	testb	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
 	jne	__strncasecmp_l_nonascii
 # endif
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v1 3/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse42
  2022-10-20  2:13 [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 Noah Goldstein
  2022-10-20  2:13 ` [PATCH v1 2/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse2 Noah Goldstein
@ 2022-10-20  2:13 ` Noah Goldstein
  2022-10-20 17:03   ` H.J. Lu
  2022-10-20  2:13 ` [PATCH v1 4/4] x86: Use `testb` for FSRM check in memmove-vec-unaligned-erms Noah Goldstein
  2022-10-20 17:02 ` [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 H.J. Lu
  3 siblings, 1 reply; 8+ messages in thread
From: Noah Goldstein @ 2022-10-20  2:13 UTC (permalink / raw)
  To: libc-alpha; +Cc: goldstein.w.n, hjl.tools, carlos

`testb` saves a bit of code size is the imm-operand can be encoded
1-bytes.

Tested on x86-64.
---
 sysdeps/x86_64/multiarch/strcmp-sse4_2.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
index dc6fc90e14..6b540ff894 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
@@ -119,7 +119,7 @@ STRCMP:
 #  else
 	mov	(%rdx), %RAX_LP
 #  endif
-	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
+	testb	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
 	jne	__strcasecmp_l_nonascii
 # endif
 # ifdef USE_AS_STRNCASECMP_L
@@ -130,7 +130,7 @@ STRCMP:
 #  else
 	mov	(%rcx), %RAX_LP
 #  endif
-	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
+	testb	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
 	jne	__strncasecmp_l_nonascii
 # endif
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v1 4/4] x86: Use `testb` for FSRM check in memmove-vec-unaligned-erms
  2022-10-20  2:13 [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 Noah Goldstein
  2022-10-20  2:13 ` [PATCH v1 2/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse2 Noah Goldstein
  2022-10-20  2:13 ` [PATCH v1 3/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse42 Noah Goldstein
@ 2022-10-20  2:13 ` Noah Goldstein
  2022-10-20 17:03   ` H.J. Lu
  2022-10-20 17:02 ` [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 H.J. Lu
  3 siblings, 1 reply; 8+ messages in thread
From: Noah Goldstein @ 2022-10-20  2:13 UTC (permalink / raw)
  To: libc-alpha; +Cc: goldstein.w.n, hjl.tools, carlos

`testb` saves a bit of code size is the imm-operand can be encoded
1-bytes.

Tested on x86-64.
---
 sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index 5b758cae5e..f6d7f3e88b 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -616,7 +616,11 @@ L(movsb):
 	jae	L(large_memcpy_2x_check)
 # if AVOID_SHORT_DISTANCE_REP_MOVSB || ALIGN_MOVSB
 	/* Only avoid short movsb if CPU has FSRM.  */
+#  if X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB < 256
+	testb	$X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip)
+#  else
 	testl	$X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip)
+#  endif
 	jz	L(skip_short_movsb_check)
 #  if AVOID_SHORT_DISTANCE_REP_MOVSB
 	/* Avoid "rep movsb" if RCX, the distance between source and
-- 
2.34.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2
  2022-10-20  2:13 [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 Noah Goldstein
                   ` (2 preceding siblings ...)
  2022-10-20  2:13 ` [PATCH v1 4/4] x86: Use `testb` for FSRM check in memmove-vec-unaligned-erms Noah Goldstein
@ 2022-10-20 17:02 ` H.J. Lu
  3 siblings, 0 replies; 8+ messages in thread
From: H.J. Lu @ 2022-10-20 17:02 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: libc-alpha, carlos

On Wed, Oct 19, 2022 at 7:14 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> `testb` saves a bit of code size is the imm-operand can be encoded
> 1-bytes.
>
> Tested on x86-64.
> ---
>  sysdeps/x86_64/multiarch/strcmp-avx2.S | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
> index 4c01d664e8..a24f886a27 100644
> --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
> +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
> @@ -214,7 +214,7 @@ STRCMP:
>  #  else
>         mov     (%LOCALE_REG), %RAX_LP
>  #  endif
> -       testl   $1, LOCALE_DATA_VALUES + _NL_CTYPE_NONASCII_CASE * SIZEOF_VALUES(%rax)
> +       testb   $1, LOCALE_DATA_VALUES + _NL_CTYPE_NONASCII_CASE * SIZEOF_VALUES(%rax)
>         jne     STRCASECMP_L_NONASCII
>         leaq    _nl_C_LC_CTYPE_tolower + 128 * 4(%rip), TOLOWER_BASE
>  # endif
> --
> 2.34.1
>

LGTM.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v1 2/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse2
  2022-10-20  2:13 ` [PATCH v1 2/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse2 Noah Goldstein
@ 2022-10-20 17:03   ` H.J. Lu
  0 siblings, 0 replies; 8+ messages in thread
From: H.J. Lu @ 2022-10-20 17:03 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: libc-alpha, carlos

On Wed, Oct 19, 2022 at 7:14 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> `testb` saves a bit of code size is the imm-operand can be encoded
> 1-bytes.
>
> Tested on x86-64.
> ---
>  sysdeps/x86_64/multiarch/strcmp-sse2.S | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2.S b/sysdeps/x86_64/multiarch/strcmp-sse2.S
> index 3c69fc1df1..a975c924d8 100644
> --- a/sysdeps/x86_64/multiarch/strcmp-sse2.S
> +++ b/sysdeps/x86_64/multiarch/strcmp-sse2.S
> @@ -104,7 +104,7 @@ ENTRY (STRCMP)
>  #  else
>         mov     (%rdx), %RAX_LP
>  #  endif
> -       testl   $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
> +       testb   $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
>         jne     __strcasecmp_l_nonascii
>  # elif defined USE_AS_STRNCASECMP_L
>         /* We have to fall back on the C implementation for locales
> @@ -114,7 +114,7 @@ ENTRY (STRCMP)
>  #  else
>         mov     (%rcx), %RAX_LP
>  #  endif
> -       testl   $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
> +       testb   $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
>         jne     __strncasecmp_l_nonascii
>  # endif
>
> --
> 2.34.1
>

LGTM.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v1 3/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse42
  2022-10-20  2:13 ` [PATCH v1 3/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse42 Noah Goldstein
@ 2022-10-20 17:03   ` H.J. Lu
  0 siblings, 0 replies; 8+ messages in thread
From: H.J. Lu @ 2022-10-20 17:03 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: libc-alpha, carlos

On Wed, Oct 19, 2022 at 7:14 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> `testb` saves a bit of code size is the imm-operand can be encoded
> 1-bytes.
>
> Tested on x86-64.
> ---
>  sysdeps/x86_64/multiarch/strcmp-sse4_2.S | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
> index dc6fc90e14..6b540ff894 100644
> --- a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
> +++ b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
> @@ -119,7 +119,7 @@ STRCMP:
>  #  else
>         mov     (%rdx), %RAX_LP
>  #  endif
> -       testl   $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
> +       testb   $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
>         jne     __strcasecmp_l_nonascii
>  # endif
>  # ifdef USE_AS_STRNCASECMP_L
> @@ -130,7 +130,7 @@ STRCMP:
>  #  else
>         mov     (%rcx), %RAX_LP
>  #  endif
> -       testl   $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
> +       testb   $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
>         jne     __strncasecmp_l_nonascii
>  # endif
>
> --
> 2.34.1
>

LGTM.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v1 4/4] x86: Use `testb` for FSRM check in memmove-vec-unaligned-erms
  2022-10-20  2:13 ` [PATCH v1 4/4] x86: Use `testb` for FSRM check in memmove-vec-unaligned-erms Noah Goldstein
@ 2022-10-20 17:03   ` H.J. Lu
  0 siblings, 0 replies; 8+ messages in thread
From: H.J. Lu @ 2022-10-20 17:03 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: libc-alpha, carlos

On Wed, Oct 19, 2022 at 7:14 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> `testb` saves a bit of code size is the imm-operand can be encoded
> 1-bytes.
>
> Tested on x86-64.
> ---
>  sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S | 4 ++++
>  1 file changed, 4 insertions(+)
>
> diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
> index 5b758cae5e..f6d7f3e88b 100644
> --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
> +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
> @@ -616,7 +616,11 @@ L(movsb):
>         jae     L(large_memcpy_2x_check)
>  # if AVOID_SHORT_DISTANCE_REP_MOVSB || ALIGN_MOVSB
>         /* Only avoid short movsb if CPU has FSRM.  */
> +#  if X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB < 256
> +       testb   $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip)
> +#  else
>         testl   $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip)
> +#  endif
>         jz      L(skip_short_movsb_check)
>  #  if AVOID_SHORT_DISTANCE_REP_MOVSB
>         /* Avoid "rep movsb" if RCX, the distance between source and
> --
> 2.34.1
>

LGTM.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-10-20 17:04 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-20  2:13 [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 Noah Goldstein
2022-10-20  2:13 ` [PATCH v1 2/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse2 Noah Goldstein
2022-10-20 17:03   ` H.J. Lu
2022-10-20  2:13 ` [PATCH v1 3/4] x86: Use `testb` for case-locale check in str{n}casecmp-sse42 Noah Goldstein
2022-10-20 17:03   ` H.J. Lu
2022-10-20  2:13 ` [PATCH v1 4/4] x86: Use `testb` for FSRM check in memmove-vec-unaligned-erms Noah Goldstein
2022-10-20 17:03   ` H.J. Lu
2022-10-20 17:02 ` [PATCH v1 1/4] x86: Use `testb` for case-locale check in str{n}casecmp-avx2 H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).