From: MAILER-DAEMON (Mail Delivery System)
To: libc-alpha@sourceware.org
Subject: Undelivered Mail Returned to Sender
Date: Tue, 10 Aug 2021 11:39:28 +0200 (CEST) [thread overview]
Message-ID: <20210810093928.2317E3ACC12@fx601.security-mail.net> (raw)
[-- Attachment #1: Notification --]
[-- Type: text/plain, Size: 593 bytes --]
This is the mail system at host fx601.security-mail.net.
I'm sorry to have to inform you that your message could not
be delivered to one or more recipients. It's attached below.
For further assistance, please send mail to postmaster.
If you do so, please include this problem report. You can
delete your own text from the attached returned message.
The mail system
<mpoulhies@kalray.eu>: host zimbra2.kalray.eu[195.135.97.26] said: 550 5.1.1
<mpoulhies@kalray.eu>: Recipient address rejected: User unknown in virtual
mailbox table (in reply to RCPT TO command)
[-- Attachment #2: Delivery report --]
[-- Type: message/delivery-status, Size: 464 bytes --]
[-- Attachment #3: Undelivered Message --]
[-- Type: message/rfc822, Size: 17779 bytes --]
From: Szabolcs Nagy via Libc-alpha <libc-alpha@sourceware.org>
To: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
Cc: 'GNU C Library' <libc-alpha@sourceware.org>
Subject: Re: [PATCH v4 2/5] AArch64: Improve A64FX memset for large sizes
Date: Tue, 10 Aug 2021 10:38:31 +0100
Message-ID: <20210810093830.GD20410@arm.com>
The 08/09/2021 16:17, Wilco Dijkstra via Libc-alpha wrote:
> v4: Slightly tweak alignment code
>
> Improve performance of large memsets. Simplify alignment code. For zero memset use DC ZVA,
> which almost doubles performance. For non-zero memsets use the unroll8 loop which is about 10% faster.
this is OK to commit.
you should keep
Reviewed-by: Naohiro Tamura <naohirot@fujitsu.com>
in the commit message if there are only minor tweaks or no changes.
>
> ---
>
> diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S
> index cf3d402ef681a9d98964d1751537945692a1ae68..6bc8ef5e0c84dbb59a57d114ae6ec8e3fa3822ad 100644
> --- a/sysdeps/aarch64/multiarch/memset_a64fx.S
> +++ b/sysdeps/aarch64/multiarch/memset_a64fx.S
> @@ -27,14 +27,11 @@
> */
>
> #define L1_SIZE (64*1024) // L1 64KB
> -#define L2_SIZE (8*1024*1024) // L2 8MB - 1MB
> +#define L2_SIZE (8*1024*1024) // L2 8MB
> #define CACHE_LINE_SIZE 256
> #define PF_DIST_L1 (CACHE_LINE_SIZE * 16) // Prefetch distance L1
> -#define ZF_DIST (CACHE_LINE_SIZE * 21) // Zerofill distance
> -#define rest x8
> +#define rest x2
> #define vector_length x9
> -#define vl_remainder x10 // vector_length remainder
> -#define cl_remainder x11 // CACHE_LINE_SIZE remainder
>
> #if HAVE_AARCH64_SVE_ASM
> # if IS_IN (libc)
> @@ -42,14 +39,6 @@
>
> .arch armv8.2-a+sve
>
> - .macro dc_zva times
> - dc zva, tmp1
> - add tmp1, tmp1, CACHE_LINE_SIZE
> - .if \times-1
> - dc_zva "(\times-1)"
> - .endif
> - .endm
> -
> .macro st1b_unroll first=0, last=7
> st1b z0.b, p0, [dst, \first, mul vl]
> .if \last-\first
> @@ -188,54 +177,30 @@ L(L1_prefetch): // if rest >= L1_SIZE
> cbnz rest, L(unroll32)
> ret
>
> -L(L2):
> - // align dst address at vector_length byte boundary
> - sub tmp1, vector_length, 1
> - ands tmp2, dst, tmp1
> - // if vl_remainder == 0
> - b.eq 1f
> - sub vl_remainder, vector_length, tmp2
> - // process remainder until the first vector_length boundary
> - whilelt p2.b, xzr, vl_remainder
> - st1b z0.b, p2, [dst]
> - add dst, dst, vl_remainder
> - sub rest, rest, vl_remainder
> - // align dstin address at CACHE_LINE_SIZE byte boundary
> -1: mov tmp1, CACHE_LINE_SIZE
> - ands tmp2, dst, CACHE_LINE_SIZE - 1
> - // if cl_remainder == 0
> - b.eq L(L2_dc_zva)
> - sub cl_remainder, tmp1, tmp2
> - // process remainder until the first CACHE_LINE_SIZE boundary
> - mov tmp1, xzr // index
> -2: whilelt p2.b, tmp1, cl_remainder
> - st1b z0.b, p2, [dst, tmp1]
> - incb tmp1
> - cmp tmp1, cl_remainder
> - b.lo 2b
> - add dst, dst, cl_remainder
> - sub rest, rest, cl_remainder
> -
> -L(L2_dc_zva):
> - // zero fill
> - mov tmp1, dst
> - dc_zva (ZF_DIST / CACHE_LINE_SIZE) - 1
> - mov zva_len, ZF_DIST
> - add tmp1, zva_len, CACHE_LINE_SIZE * 2
> - // unroll
> + // count >= L2_SIZE
> .p2align 3
> -1: st1b_unroll 0, 3
> - add tmp2, dst, zva_len
> - dc zva, tmp2
> - st1b_unroll 4, 7
> - add tmp2, tmp2, CACHE_LINE_SIZE
> - dc zva, tmp2
> - add dst, dst, CACHE_LINE_SIZE * 2
> - sub rest, rest, CACHE_LINE_SIZE * 2
> - cmp rest, tmp1 // ZF_DIST + CACHE_LINE_SIZE * 2
> - b.ge 1b
> - cbnz rest, L(unroll8)
> - ret
> +L(L2):
> + tst valw, 255
> + b.ne L(unroll8)
> + // align dst to CACHE_LINE_SIZE byte boundary
> + and tmp2, dst, CACHE_LINE_SIZE - 1
> + st1b z0.b, p0, [dst, 0, mul vl]
> + st1b z0.b, p0, [dst, 1, mul vl]
> + st1b z0.b, p0, [dst, 2, mul vl]
> + st1b z0.b, p0, [dst, 3, mul vl]
> + sub dst, dst, tmp2
> + add count, count, tmp2
> +
> + // clear cachelines using DC ZVA
> + sub count, count, CACHE_LINE_SIZE * 2
> + .p2align 4
> +1: add dst, dst, CACHE_LINE_SIZE
> + dc zva, dst
> + subs count, count, CACHE_LINE_SIZE
> + b.hi 1b
> + add count, count, CACHE_LINE_SIZE
> + add dst, dst, CACHE_LINE_SIZE
> + b L(last)
>
> END (MEMSET)
> libc_hidden_builtin_def (MEMSET)
>
--
To declare a filtering error, please use the following link : https://www.security-mail.net/reporter.php?mid=1101e.6112494e.4d6f3.0&r=mpoulhies%40kalray.eu&s=libc-alpha-bounces%2Bmpoulhies%3Dkalray.eu%40sourceware.org&o=Re%3A+%5BPATCH+v4+2%2F5%5D+AArch64%3A+Improve+A64FX+memset+for+large+sizes&verdict=C&c=7c105351eee0d1e6a135ee26aa59282aa1476ba2
next reply other threads:[~2021-08-10 9:39 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-10 9:39 MAILER-DAEMON [this message]
-- strict thread matches above, loose matches on Subject: below --
2021-08-11 0:54 MAILER-DAEMON
2021-08-10 22:34 MAILER-DAEMON
2021-08-10 22:19 MAILER-DAEMON
2021-08-10 21:15 MAILER-DAEMON
2021-08-10 21:09 MAILER-DAEMON
2021-08-10 21:03 MAILER-DAEMON
2021-08-10 20:11 MAILER-DAEMON
2021-08-10 19:50 MAILER-DAEMON
2021-08-10 18:12 MAILER-DAEMON
2021-08-10 18:04 MAILER-DAEMON
2021-08-10 18:03 MAILER-DAEMON
2021-08-10 17:48 MAILER-DAEMON
2021-08-10 17:41 MAILER-DAEMON
2021-08-10 17:39 MAILER-DAEMON
2021-08-10 15:42 MAILER-DAEMON
2021-08-10 14:39 MAILER-DAEMON
2021-08-10 13:49 MAILER-DAEMON
2021-08-10 13:34 MAILER-DAEMON
2021-08-10 13:21 MAILER-DAEMON
2021-08-10 13:02 MAILER-DAEMON
2021-08-10 11:20 MAILER-DAEMON
2021-08-10 9:45 MAILER-DAEMON
2021-08-10 9:44 MAILER-DAEMON
2021-08-10 9:41 MAILER-DAEMON
2021-08-10 9:37 MAILER-DAEMON
[not found] <4CwXgY5nCWzFr7@mailbackend.panix.com>
[not found] ` <9531c4c9-2354-4c87-4453-b492afec846f@redhat.com>
2020-12-16 0:42 ` Zack Weinberg
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210810093928.2317E3ACC12@fx601.security-mail.net \
--to=libc-alpha@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).