public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] S390: Optimize __memset_z196.
@ 2020-06-19 13:51 Stefan Liebler
  2020-06-25  8:18 ` Stefan Liebler
  0 siblings, 1 reply; 3+ messages in thread
From: Stefan Liebler @ 2020-06-19 13:51 UTC (permalink / raw)
  To: libc-alpha; +Cc: Stefan Liebler

It turned out that an 256b-mvc instruction which depends on the
result of a previous 256b-mvc instruction is counterproductive.
Therefore this patch adjusts the 256b-loop by storing the
first byte with stc and setting the remaining 255b with mvc.
Now the 255b-mvc instruction depends on the stc instruction.
---
 sysdeps/s390/memset-z900.S | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/sysdeps/s390/memset-z900.S b/sysdeps/s390/memset-z900.S
index ca3eac0522..1e0c334156 100644
--- a/sysdeps/s390/memset-z900.S
+++ b/sysdeps/s390/memset-z900.S
@@ -157,28 +157,27 @@ ENTRY(MEMSET_Z196)
 # if !defined __s390x__
 	llgfr	%r4,%r4
 # endif /* !defined __s390x__  */
-	ltgr    %r4,%r4
-	je      .L_Z196_4
+	clgfi	%r4,1
+	jl	.L_Z196_4	    # n == 0
 	stc     %r3,0(%r2)
+	je      .L_Z196_4	    # n == 1
+	aghi	%r4,-2
 	lgr     %r1,%r2
-	cghi    %r4,1
-	je      .L_Z196_4
-	aghi    %r4,-2
-	srlg    %r5,%r4,8
-	ltgr    %r5,%r5
-	jne     .L_Z196_1
+	risbg	%r5,%r4,8,128+63,56 # r5 = n / 256
+	jne     .L_Z196_1	    # Jump away if r5 != 0
 .L_Z196_3:
 	exrl    %r4,.L_Z196_17
 .L_Z196_4:
 	br      %r14
 .L_Z196_1:
 	cgfi	%r5,1048576
-	jh	__memset_mvcle	   # Switch to mvcle for >256MB
+	jh	__memset_mvcle	    # Switch to mvcle for >256MB
 .L_Z196_2:
 	pfd     2,1024(%r1)
-	mvc     1(256,%r1),0(%r1)
+	mvc     1(255,%r1),0(%r1)
 	aghi    %r5,-1
 	la      %r1,256(%r1)
+	stc     %r3,0(%r1)
 	jne     .L_Z196_2
 	j       .L_Z196_3
 .L_Z196_17:
-- 
2.25.0


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] S390: Optimize __memset_z196.
  2020-06-19 13:51 [PATCH] S390: Optimize __memset_z196 Stefan Liebler
@ 2020-06-25  8:18 ` Stefan Liebler
  2020-06-26  7:47   ` Stefan Liebler
  0 siblings, 1 reply; 3+ messages in thread
From: Stefan Liebler @ 2020-06-25  8:18 UTC (permalink / raw)
  To: GNU C Library

Just as information, if nobody opposes, I'll commit this patch tomorrow.

On 6/19/20 3:51 PM, Stefan Liebler wrote:
> It turned out that an 256b-mvc instruction which depends on the
> result of a previous 256b-mvc instruction is counterproductive.
> Therefore this patch adjusts the 256b-loop by storing the
> first byte with stc and setting the remaining 255b with mvc.
> Now the 255b-mvc instruction depends on the stc instruction.
> ---
>  sysdeps/s390/memset-z900.S | 19 +++++++++----------
>  1 file changed, 9 insertions(+), 10 deletions(-)
> 
> diff --git a/sysdeps/s390/memset-z900.S b/sysdeps/s390/memset-z900.S
> index ca3eac0522..1e0c334156 100644
> --- a/sysdeps/s390/memset-z900.S
> +++ b/sysdeps/s390/memset-z900.S
> @@ -157,28 +157,27 @@ ENTRY(MEMSET_Z196)
>  # if !defined __s390x__
>  	llgfr	%r4,%r4
>  # endif /* !defined __s390x__  */
> -	ltgr    %r4,%r4
> -	je      .L_Z196_4
> +	clgfi	%r4,1
> +	jl	.L_Z196_4	    # n == 0
>  	stc     %r3,0(%r2)
> +	je      .L_Z196_4	    # n == 1
> +	aghi	%r4,-2
>  	lgr     %r1,%r2
> -	cghi    %r4,1
> -	je      .L_Z196_4
> -	aghi    %r4,-2
> -	srlg    %r5,%r4,8
> -	ltgr    %r5,%r5
> -	jne     .L_Z196_1
> +	risbg	%r5,%r4,8,128+63,56 # r5 = n / 256
> +	jne     .L_Z196_1	    # Jump away if r5 != 0
>  .L_Z196_3:
>  	exrl    %r4,.L_Z196_17
>  .L_Z196_4:
>  	br      %r14
>  .L_Z196_1:
>  	cgfi	%r5,1048576
> -	jh	__memset_mvcle	   # Switch to mvcle for >256MB
> +	jh	__memset_mvcle	    # Switch to mvcle for >256MB
>  .L_Z196_2:
>  	pfd     2,1024(%r1)
> -	mvc     1(256,%r1),0(%r1)
> +	mvc     1(255,%r1),0(%r1)
>  	aghi    %r5,-1
>  	la      %r1,256(%r1)
> +	stc     %r3,0(%r1)
>  	jne     .L_Z196_2
>  	j       .L_Z196_3
>  .L_Z196_17:
> 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] S390: Optimize __memset_z196.
  2020-06-25  8:18 ` Stefan Liebler
@ 2020-06-26  7:47   ` Stefan Liebler
  0 siblings, 0 replies; 3+ messages in thread
From: Stefan Liebler @ 2020-06-26  7:47 UTC (permalink / raw)
  To: libc-alpha

committed

On 6/25/20 10:18 AM, Stefan Liebler via Libc-alpha wrote:
> Just as information, if nobody opposes, I'll commit this patch tomorrow.
> 
> On 6/19/20 3:51 PM, Stefan Liebler wrote:
>> It turned out that an 256b-mvc instruction which depends on the
>> result of a previous 256b-mvc instruction is counterproductive.
>> Therefore this patch adjusts the 256b-loop by storing the
>> first byte with stc and setting the remaining 255b with mvc.
>> Now the 255b-mvc instruction depends on the stc instruction.
>> ---
>>  sysdeps/s390/memset-z900.S | 19 +++++++++----------
>>  1 file changed, 9 insertions(+), 10 deletions(-)
>>
>> diff --git a/sysdeps/s390/memset-z900.S b/sysdeps/s390/memset-z900.S
>> index ca3eac0522..1e0c334156 100644
>> --- a/sysdeps/s390/memset-z900.S
>> +++ b/sysdeps/s390/memset-z900.S
>> @@ -157,28 +157,27 @@ ENTRY(MEMSET_Z196)
>>  # if !defined __s390x__
>>  	llgfr	%r4,%r4
>>  # endif /* !defined __s390x__  */
>> -	ltgr    %r4,%r4
>> -	je      .L_Z196_4
>> +	clgfi	%r4,1
>> +	jl	.L_Z196_4	    # n == 0
>>  	stc     %r3,0(%r2)
>> +	je      .L_Z196_4	    # n == 1
>> +	aghi	%r4,-2
>>  	lgr     %r1,%r2
>> -	cghi    %r4,1
>> -	je      .L_Z196_4
>> -	aghi    %r4,-2
>> -	srlg    %r5,%r4,8
>> -	ltgr    %r5,%r5
>> -	jne     .L_Z196_1
>> +	risbg	%r5,%r4,8,128+63,56 # r5 = n / 256
>> +	jne     .L_Z196_1	    # Jump away if r5 != 0
>>  .L_Z196_3:
>>  	exrl    %r4,.L_Z196_17
>>  .L_Z196_4:
>>  	br      %r14
>>  .L_Z196_1:
>>  	cgfi	%r5,1048576
>> -	jh	__memset_mvcle	   # Switch to mvcle for >256MB
>> +	jh	__memset_mvcle	    # Switch to mvcle for >256MB
>>  .L_Z196_2:
>>  	pfd     2,1024(%r1)
>> -	mvc     1(256,%r1),0(%r1)
>> +	mvc     1(255,%r1),0(%r1)
>>  	aghi    %r5,-1
>>  	la      %r1,256(%r1)
>> +	stc     %r3,0(%r1)
>>  	jne     .L_Z196_2
>>  	j       .L_Z196_3
>>  .L_Z196_17:
>>
> 


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-06-26  7:47 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-19 13:51 [PATCH] S390: Optimize __memset_z196 Stefan Liebler
2020-06-25  8:18 ` Stefan Liebler
2020-06-26  7:47   ` Stefan Liebler

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).