public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: MAILER-DAEMON (Mail Delivery System)
To: libc-alpha@sourceware.org
Subject: Undelivered Mail Returned to Sender
Date: Tue, 10 Aug 2021 11:39:28 +0200 (CEST)	[thread overview]
Message-ID: <20210810093928.2317E3ACC12@fx601.security-mail.net> (raw)

[-- Attachment #1: Notification --]
[-- Type: text/plain, Size: 593 bytes --]

This is the mail system at host fx601.security-mail.net.

I'm sorry to have to inform you that your message could not
be delivered to one or more recipients. It's attached below.

For further assistance, please send mail to postmaster.

If you do so, please include this problem report. You can
delete your own text from the attached returned message.

                   The mail system

<mpoulhies@kalray.eu>: host zimbra2.kalray.eu[195.135.97.26] said: 550 5.1.1
    <mpoulhies@kalray.eu>: Recipient address rejected: User unknown in virtual
    mailbox table (in reply to RCPT TO command)

[-- Attachment #2: Delivery report --]
[-- Type: message/delivery-status, Size: 464 bytes --]

[-- Attachment #3: Undelivered Message --]
[-- Type: message/rfc822, Size: 17779 bytes --]

From: Szabolcs Nagy via Libc-alpha <libc-alpha@sourceware.org>
To: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
Cc: 'GNU C Library' <libc-alpha@sourceware.org>
Subject: Re: [PATCH v4 2/5] AArch64: Improve A64FX memset for large sizes
Date: Tue, 10 Aug 2021 10:38:31 +0100
Message-ID: <20210810093830.GD20410@arm.com>

The 08/09/2021 16:17, Wilco Dijkstra via Libc-alpha wrote:
> v4: Slightly tweak alignment code
> 
> Improve performance of large memsets. Simplify alignment code. For zero memset use DC ZVA,
> which almost doubles performance. For non-zero memsets use the unroll8 loop which is about 10% faster.


this is OK to commit.

you should keep

Reviewed-by: Naohiro Tamura <naohirot@fujitsu.com>

in the commit message if there are only minor tweaks or no changes.

> 
> ---
> 
> diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S
> index cf3d402ef681a9d98964d1751537945692a1ae68..6bc8ef5e0c84dbb59a57d114ae6ec8e3fa3822ad 100644
> --- a/sysdeps/aarch64/multiarch/memset_a64fx.S
> +++ b/sysdeps/aarch64/multiarch/memset_a64fx.S
> @@ -27,14 +27,11 @@
>   */
>  
>  #define L1_SIZE		(64*1024)	// L1 64KB
> -#define L2_SIZE         (8*1024*1024)	// L2 8MB - 1MB
> +#define L2_SIZE         (8*1024*1024)	// L2 8MB
>  #define CACHE_LINE_SIZE	256
>  #define PF_DIST_L1	(CACHE_LINE_SIZE * 16)	// Prefetch distance L1
> -#define ZF_DIST		(CACHE_LINE_SIZE * 21)	// Zerofill distance
> -#define rest		x8
> +#define rest		x2
>  #define vector_length	x9
> -#define vl_remainder	x10	// vector_length remainder
> -#define cl_remainder	x11	// CACHE_LINE_SIZE remainder
>  
>  #if HAVE_AARCH64_SVE_ASM
>  # if IS_IN (libc)
> @@ -42,14 +39,6 @@
>  
>  	.arch armv8.2-a+sve
>  
> -	.macro dc_zva times
> -	dc	zva, tmp1
> -	add	tmp1, tmp1, CACHE_LINE_SIZE
> -	.if \times-1
> -	dc_zva "(\times-1)"
> -	.endif
> -	.endm
> -
>  	.macro st1b_unroll first=0, last=7
>  	st1b	z0.b, p0, [dst, \first, mul vl]
>  	.if \last-\first
> @@ -188,54 +177,30 @@ L(L1_prefetch): // if rest >= L1_SIZE
>  	cbnz	rest, L(unroll32)
>  	ret
>  
> -L(L2):
> -	// align dst address at vector_length byte boundary
> -	sub	tmp1, vector_length, 1
> -	ands	tmp2, dst, tmp1
> -	// if vl_remainder == 0
> -	b.eq	1f
> -	sub	vl_remainder, vector_length, tmp2
> -	// process remainder until the first vector_length boundary
> -	whilelt	p2.b, xzr, vl_remainder
> -	st1b	z0.b, p2, [dst]
> -	add	dst, dst, vl_remainder
> -	sub	rest, rest, vl_remainder
> -	// align dstin address at CACHE_LINE_SIZE byte boundary
> -1:	mov	tmp1, CACHE_LINE_SIZE
> -	ands	tmp2, dst, CACHE_LINE_SIZE - 1
> -	// if cl_remainder == 0
> -	b.eq	L(L2_dc_zva)
> -	sub	cl_remainder, tmp1, tmp2
> -	// process remainder until the first CACHE_LINE_SIZE boundary
> -	mov	tmp1, xzr       // index
> -2:	whilelt	p2.b, tmp1, cl_remainder
> -	st1b	z0.b, p2, [dst, tmp1]
> -	incb	tmp1
> -	cmp	tmp1, cl_remainder
> -	b.lo	2b
> -	add	dst, dst, cl_remainder
> -	sub	rest, rest, cl_remainder
> -
> -L(L2_dc_zva):
> -	// zero fill
> -	mov	tmp1, dst
> -	dc_zva	(ZF_DIST / CACHE_LINE_SIZE) - 1
> -	mov	zva_len, ZF_DIST
> -	add	tmp1, zva_len, CACHE_LINE_SIZE * 2
> -	// unroll
> +	// count >= L2_SIZE
>  	.p2align 3
> -1:	st1b_unroll 0, 3
> -	add	tmp2, dst, zva_len
> -	dc	 zva, tmp2
> -	st1b_unroll 4, 7
> -	add	tmp2, tmp2, CACHE_LINE_SIZE
> -	dc	zva, tmp2
> -	add	dst, dst, CACHE_LINE_SIZE * 2
> -	sub	rest, rest, CACHE_LINE_SIZE * 2
> -	cmp	rest, tmp1	// ZF_DIST + CACHE_LINE_SIZE * 2
> -	b.ge	1b
> -	cbnz	rest, L(unroll8)
> -	ret
> +L(L2):
> +	tst	valw, 255
> +	b.ne	L(unroll8)
> +	// align dst to CACHE_LINE_SIZE byte boundary
> +	and	tmp2, dst, CACHE_LINE_SIZE - 1
> +	st1b	z0.b, p0, [dst, 0, mul vl]
> +	st1b	z0.b, p0, [dst, 1, mul vl]
> +	st1b	z0.b, p0, [dst, 2, mul vl]
> +	st1b	z0.b, p0, [dst, 3, mul vl]
> +	sub	dst, dst, tmp2
> +	add	count, count, tmp2
> +
> +	// clear cachelines using DC ZVA
> +	sub	count, count, CACHE_LINE_SIZE * 2
> +	.p2align 4
> +1:	add	dst, dst, CACHE_LINE_SIZE
> +	dc	zva, dst
> +	subs	count, count, CACHE_LINE_SIZE
> +	b.hi	1b
> +	add	count, count, CACHE_LINE_SIZE
> +	add	dst, dst, CACHE_LINE_SIZE
> +	b	L(last)
>  
>  END (MEMSET)
>  libc_hidden_builtin_def (MEMSET)
> 

-- 


To declare a filtering error, please use the following link : https://www.security-mail.net/reporter.php?mid=1101e.6112494e.4d6f3.0&r=mpoulhies%40kalray.eu&s=libc-alpha-bounces%2Bmpoulhies%3Dkalray.eu%40sourceware.org&o=Re%3A+%5BPATCH+v4+2%2F5%5D+AArch64%3A+Improve+A64FX+memset+for+large+sizes&verdict=C&c=7c105351eee0d1e6a135ee26aa59282aa1476ba2

             reply	other threads:[~2021-08-10  9:39 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-10  9:39 MAILER-DAEMON [this message]
  -- strict thread matches above, loose matches on Subject: below --
2021-08-11  0:54 MAILER-DAEMON
2021-08-10 22:34 MAILER-DAEMON
2021-08-10 22:19 MAILER-DAEMON
2021-08-10 21:15 MAILER-DAEMON
2021-08-10 21:09 MAILER-DAEMON
2021-08-10 21:03 MAILER-DAEMON
2021-08-10 20:11 MAILER-DAEMON
2021-08-10 19:50 MAILER-DAEMON
2021-08-10 18:12 MAILER-DAEMON
2021-08-10 18:04 MAILER-DAEMON
2021-08-10 18:03 MAILER-DAEMON
2021-08-10 17:48 MAILER-DAEMON
2021-08-10 17:41 MAILER-DAEMON
2021-08-10 17:39 MAILER-DAEMON
2021-08-10 15:42 MAILER-DAEMON
2021-08-10 14:39 MAILER-DAEMON
2021-08-10 13:49 MAILER-DAEMON
2021-08-10 13:34 MAILER-DAEMON
2021-08-10 13:21 MAILER-DAEMON
2021-08-10 13:02 MAILER-DAEMON
2021-08-10 11:20 MAILER-DAEMON
2021-08-10  9:45 MAILER-DAEMON
2021-08-10  9:44 MAILER-DAEMON
2021-08-10  9:41 MAILER-DAEMON
2021-08-10  9:37 MAILER-DAEMON
     [not found] <4CwXgY5nCWzFr7@mailbackend.panix.com>
     [not found] ` <9531c4c9-2354-4c87-4453-b492afec846f@redhat.com>
2020-12-16  0:42   ` Zack Weinberg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210810093928.2317E3ACC12@fx601.security-mail.net \
    --to=libc-alpha@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).