[PATCH] Fix x86_64 memchr for large input sizes

public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed

* [PATCH] Fix x86_64 memchr for large input sizes
@ 2016-12-15 20:38 Adhemerval Zanella
  2016-12-20 14:03 ` Adhemerval Zanella
  0 siblings, 1 reply; 4+ messages in thread
From: Adhemerval Zanella @ 2016-12-15 20:38 UTC (permalink / raw)
  To: libc-alpha

Current optimized memchr for x86_64 does for input arguments pointers
module 64 in range of [49,63] if there is no searchr char in the rest
of 64-byte block a pointer addition which might overflow:

* sysdeps/x86_64/memchr.S

    77          .p2align 4
    78  L(unaligned_no_match):
    79          add     %rcx, %rdx

Add (uintptr_t)s % 16 to n in %rdx.

    80          sub     $16, %rdx
    81          jbe     L(return_null)

This patch fixes by adding a saturated math that sets a maximum pointer
value if it overflows (UINTPTR_MAX).  This is similar of the fix for
powerpc64/power7 version [1] and rely on for test-memchr.c changes.

Checked on x86_64-linux-gnu and powerpc64-linux-gnu.

[1] https://sourceware.org/ml/libc-alpha/2016-12/msg00576.html

	[BZ# 19387]
	* sysdeps/x86_64/memchr.S (memchr): Avoid overflow in pointer
	addition.
	* string/test-memchr.c (do_test): Remove alignment limitation.
	(test_main): Add test that trigger BZ# 19387.
---
 ChangeLog               | 6 ++++++
 string/test-memchr.c    | 9 ++++-----
 sysdeps/x86_64/memchr.S | 8 ++++++++
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/string/test-memchr.c b/string/test-memchr.c
index 7431386..6901711 100644
--- a/string/test-memchr.c
+++ b/string/test-memchr.c
@@ -76,7 +76,6 @@ do_test (size_t align, size_t pos, size_t len, size_t n, int seek_char)
   size_t i;
   CHAR *result;
 
-  align &= 7;
   if ((align + len) * sizeof (CHAR) >= page_size)
     return;
 
@@ -194,12 +193,12 @@ test_main (void)
       do_test (i, 64, 256, SIZE_MAX, 0);
     }
 
-  for (i = 1; i < 16; ++i)
+  for (i = 1; i < 64; ++i)
     {
-      for (j = 1; j < 16; j++)
+      for (j = 1; j < 64; j++)
         {
-	  do_test (0, 16 - j, 32, SIZE_MAX, 23);
-	  do_test (i, 16 - j, 32, SIZE_MAX, 23);
+	  do_test (0, 64 - j, 64, SIZE_MAX, 23);
+	  do_test (i, 64 - j, 64, SIZE_MAX, 23);
         }
     }
 
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
index 132eacb..b140de1 100644
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -76,7 +76,15 @@ L(crosscache):
 
 	.p2align 4
 L(unaligned_no_match):
+        /* Calculate the last acceptable address and check for possible
+           addition overflow by using satured math:
+           rdx = rcx + rdx
+           rdx |= -(rdx < x)  */
 	add	%rcx, %rdx
+	sbb	%eax, %eax
+	movslq	%eax, %rax
+	orq	%rdx, %rax
+	mov	%rax, %rdx
 	sub	$16, %rdx
 	jbe	L(return_null)
 	add	$16, %rdi
-- 
2.7.4

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Fix x86_64 memchr for large input sizes
  2016-12-15 20:38 [PATCH] Fix x86_64 memchr for large input sizes Adhemerval Zanella
@ 2016-12-20 14:03 ` Adhemerval Zanella
  2016-12-20 17:07   ` Richard Henderson
  0 siblings, 1 reply; 4+ messages in thread
From: Adhemerval Zanella @ 2016-12-20 14:03 UTC (permalink / raw)
  To: libc-alpha

Ping.

On 15/12/2016 18:38, Adhemerval Zanella wrote:
> Current optimized memchr for x86_64 does for input arguments pointers
> module 64 in range of [49,63] if there is no searchr char in the rest
> of 64-byte block a pointer addition which might overflow:
> 
> * sysdeps/x86_64/memchr.S
> 
>     77          .p2align 4
>     78  L(unaligned_no_match):
>     79          add     %rcx, %rdx
> 
> Add (uintptr_t)s % 16 to n in %rdx.
> 
>     80          sub     $16, %rdx
>     81          jbe     L(return_null)
> 
> This patch fixes by adding a saturated math that sets a maximum pointer
> value if it overflows (UINTPTR_MAX).  This is similar of the fix for
> powerpc64/power7 version [1] and rely on for test-memchr.c changes.
> 
> Checked on x86_64-linux-gnu and powerpc64-linux-gnu.
> 
> [1] https://sourceware.org/ml/libc-alpha/2016-12/msg00576.html
> 
> 	[BZ# 19387]
> 	* sysdeps/x86_64/memchr.S (memchr): Avoid overflow in pointer
> 	addition.
> 	* string/test-memchr.c (do_test): Remove alignment limitation.
> 	(test_main): Add test that trigger BZ# 19387.
> ---
>  ChangeLog               | 6 ++++++
>  string/test-memchr.c    | 9 ++++-----
>  sysdeps/x86_64/memchr.S | 8 ++++++++
>  3 files changed, 18 insertions(+), 5 deletions(-)
> 
> diff --git a/string/test-memchr.c b/string/test-memchr.c
> index 7431386..6901711 100644
> --- a/string/test-memchr.c
> +++ b/string/test-memchr.c
> @@ -76,7 +76,6 @@ do_test (size_t align, size_t pos, size_t len, size_t n, int seek_char)
>    size_t i;
>    CHAR *result;
>  
> -  align &= 7;
>    if ((align + len) * sizeof (CHAR) >= page_size)
>      return;
>  
> @@ -194,12 +193,12 @@ test_main (void)
>        do_test (i, 64, 256, SIZE_MAX, 0);
>      }
>  
> -  for (i = 1; i < 16; ++i)
> +  for (i = 1; i < 64; ++i)
>      {
> -      for (j = 1; j < 16; j++)
> +      for (j = 1; j < 64; j++)
>          {
> -	  do_test (0, 16 - j, 32, SIZE_MAX, 23);
> -	  do_test (i, 16 - j, 32, SIZE_MAX, 23);
> +	  do_test (0, 64 - j, 64, SIZE_MAX, 23);
> +	  do_test (i, 64 - j, 64, SIZE_MAX, 23);
>          }
>      }
>  
> diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
> index 132eacb..b140de1 100644
> --- a/sysdeps/x86_64/memchr.S
> +++ b/sysdeps/x86_64/memchr.S
> @@ -76,7 +76,15 @@ L(crosscache):
>  
>  	.p2align 4
>  L(unaligned_no_match):
> +        /* Calculate the last acceptable address and check for possible
> +           addition overflow by using satured math:
> +           rdx = rcx + rdx
> +           rdx |= -(rdx < x)  */
>  	add	%rcx, %rdx
> +	sbb	%eax, %eax
> +	movslq	%eax, %rax
> +	orq	%rdx, %rax
> +	mov	%rax, %rdx
>  	sub	$16, %rdx
>  	jbe	L(return_null)
>  	add	$16, %rdi
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Fix x86_64 memchr for large input sizes
  2016-12-20 14:03 ` Adhemerval Zanella
@ 2016-12-20 17:07   ` Richard Henderson
  2016-12-20 21:41     ` Adhemerval Zanella
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Henderson @ 2016-12-20 17:07 UTC (permalink / raw)
  To: Adhemerval Zanella, libc-alpha

On 12/20/2016 06:03 AM, Adhemerval Zanella wrote:
> +	sbb	%eax, %eax
> +	movslq	%eax, %rax

This should be "sbb %rax, %rax" in one step.

> +	orq	%rdx, %rax
> +	mov	%rax, %rdx

Given that the comment says you wanted the result in %rdx,
why not "or %rax, %rdx"?


r~

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Fix x86_64 memchr for large input sizes
  2016-12-20 17:07   ` Richard Henderson
@ 2016-12-20 21:41     ` Adhemerval Zanella
  0 siblings, 0 replies; 4+ messages in thread
From: Adhemerval Zanella @ 2016-12-20 21:41 UTC (permalink / raw)
  To: Richard Henderson, libc-alpha



On 20/12/2016 15:07, Richard Henderson wrote:
> On 12/20/2016 06:03 AM, Adhemerval Zanella wrote:
>> +	sbb	%eax, %eax
>> +	movslq	%eax, %rax
> 
> This should be "sbb %rax, %rax" in one step.
> 
>> +	orq	%rdx, %rax
>> +	mov	%rax, %rdx
> 
> Given that the comment says you wanted the result in %rdx,
> why not "or %rax, %rdx"?

Indeed it seems the 4 instructions can be simplified to just

        sbb     %rax, %rax
        or      %rax, %rdx

Thanks for the advice.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-12-20 21:41 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-15 20:38 [PATCH] Fix x86_64 memchr for large input sizes Adhemerval Zanella
2016-12-20 14:03 ` Adhemerval Zanella
2016-12-20 17:07   ` Richard Henderson
2016-12-20 21:41     ` Adhemerval Zanella

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).