public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Szabolcs Nagy <szabolcs.nagy@arm.com>
To: libc-alpha@sourceware.org, Richard.Earnshaw@arm.com,
	DJ Delorie <dj@redhat.com>
Subject: [PATCH 15/16] aarch64: Optimize __libc_mtag_tag_region
Date: Thu,  4 Mar 2021 16:34:39 +0000	[thread overview]
Message-ID: <ed1ca33bfea25741d7fb4d52d4ca233db3ab5418.1614874816.git.szabolcs.nagy@arm.com> (raw)
In-Reply-To: <cover.1614874816.git.szabolcs.nagy@arm.com>

This is a target hook for memory tagging, the original was a naive
implementation. The optimized version relies on "dc gva" to tag 64
bytes at a time for large allocations and optimizes small cases without
adding too many branches. This was not benchmarked on real cpu, but
expected to be faster than the naive implementation.
---
 sysdeps/aarch64/__mtag_tag_region.S | 98 +++++++++++++++++++++++------
 1 file changed, 80 insertions(+), 18 deletions(-)

diff --git a/sysdeps/aarch64/__mtag_tag_region.S b/sysdeps/aarch64/__mtag_tag_region.S
index 9a8a3ffb60..cae0c8f121 100644
--- a/sysdeps/aarch64/__mtag_tag_region.S
+++ b/sysdeps/aarch64/__mtag_tag_region.S
@@ -20,32 +20,94 @@
 
 #ifdef USE_MTAG
 
-/* Use the same register names and assignments as memset.  */
-
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, MTE, LP64 ABI.
+ *
+ * Interface contract:
+ * Address is 16 byte aligned and size is multiple of 16.
+ * Returns the passed pointer.
+ * The memory region may remain untagged if tagging is not enabled.
+ */
 	.arch armv8.5-a
 	.arch_extension memtag
 
-/* NB, only supported on variants with 64-bit pointers.  */
+#define dstin	x0
+#define count	x1
+#define dst	x2
+#define dstend	x3
+#define tmp	x4
+#define zva_val	x4
+
+ENTRY (__libc_mtag_tag_region)
+	PTR_ARG (0)
+	SIZE_ARG (1)
+
+	add	dstend, dstin, count
 
-/* FIXME: This is a minimal implementation.  We could do better than
-   this for larger values of COUNT.  */
+	cmp	count, 96
+	b.hi	L(set_long)
 
-#define dstin x0
-#define count x1
-#define dst   x2
+	tbnz	count, 6, L(set96)
 
-ENTRY_ALIGN(__libc_mtag_tag_region, 6)
+	/* Set 0, 16, 32, or 48 bytes.  */
+	lsr	tmp, count, 5
+	add	tmp, dstin, tmp, lsl 4
+	cbz     count, L(end)
+	stg	dstin, [dstin]
+	stg	dstin, [tmp]
+	stg	dstin, [dstend, -16]
+L(end):
+	ret
+
+	.p2align 4
+	/* Set 64..96 bytes.  Write 64 bytes from the start and
+	   32 bytes from the end.  */
+L(set96):
+	st2g	dstin, [dstin]
+	st2g	dstin, [dstin, 32]
+	st2g	dstin, [dstend, -32]
+	ret
 
-	mov	dst, dstin
-L(loop):
-	stg	dst, [dst], #16
-	subs	count, count, 16
-	bne	L(loop)
-#if 0
-	/* This is not currently needed, since for now we are only called
-	   to tag memory that is taggable.  */
-	ldg	dstin, [dstin] // Recover the tag created (might be untagged).
+	.p2align 4
+	/* Size is > 96 bytes.  */
+L(set_long):
+	cmp	count, 160
+	b.lo	L(no_zva)
+
+#ifndef SKIP_ZVA_CHECK
+	mrs	zva_val, dczid_el0
+	and	zva_val, zva_val, 31
+	cmp	zva_val, 4		/* ZVA size is 64 bytes.  */
+	b.ne	L(no_zva)
 #endif
+	st2g	dstin, [dstin]
+	st2g	dstin, [dstin, 32]
+	bic	dst, dstin, 63
+	sub	count, dstend, dst	/* Count is now 64 too large.  */
+	sub	count, count, 128	/* Adjust count and bias for loop.  */
+
+	.p2align 4
+L(zva_loop):
+	add	dst, dst, 64
+	dc	gva, dst
+	subs	count, count, 64
+	b.hi	L(zva_loop)
+	st2g	dstin, [dstend, -64]
+	st2g	dstin, [dstend, -32]
 	ret
+
+L(no_zva):
+	sub	dst, dstin, 32		/* Dst is biased by -32.  */
+	sub	count, count, 64	/* Adjust count for loop.  */
+L(no_zva_loop):
+	st2g	dstin, [dst, 32]
+	st2g	dstin, [dst, 64]!
+	subs	count, count, 64
+	b.hi	L(no_zva_loop)
+	st2g	dstin, [dstend, -64]
+	st2g	dstin, [dstend, -32]
+	ret
+
 END (__libc_mtag_tag_region)
 #endif /* USE_MTAG */
-- 
2.17.1


  parent reply	other threads:[~2021-03-04 16:34 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-04 16:30 [PATCH 00/16] memory tagging improvements Szabolcs Nagy
2021-03-04 16:30 ` [PATCH 01/16] malloc: Fix a realloc crash with heap tagging [BZ 27468] Szabolcs Nagy
2021-03-05  0:15   ` DJ Delorie
2021-03-05 12:01     ` Szabolcs Nagy
2021-03-05 18:42       ` DJ Delorie
2021-03-05 20:51   ` DJ Delorie
2021-03-04 16:30 ` [PATCH 02/16] Remove PR_TAGGED_ADDR_ENABLE from sys/prctl.h Szabolcs Nagy
2021-03-26 11:29   ` Szabolcs Nagy
2021-04-13  8:37     ` Szabolcs Nagy
2021-04-13 21:32       ` DJ Delorie
2021-03-04 16:31 ` [PATCH 03/16] malloc: Move MTAG_MMAP_FLAGS definition Szabolcs Nagy
2021-03-05  1:07   ` DJ Delorie
2021-03-04 16:31 ` [PATCH 04/16] malloc: Simplify __mtag_tag_new_usable Szabolcs Nagy
2021-03-05  0:20   ` DJ Delorie
2021-03-05 12:24     ` Szabolcs Nagy
2021-03-05 18:52   ` DJ Delorie
2021-03-04 16:31 ` [PATCH 05/16] malloc: Avoid taggig mmaped memory on free Szabolcs Nagy
2021-03-05  1:01   ` DJ Delorie
2021-03-04 16:31 ` [PATCH 06/16] malloc: Ensure the generic mtag hooks are not used Szabolcs Nagy
2021-03-05  1:05   ` DJ Delorie
2021-03-05 12:44     ` Szabolcs Nagy
2021-03-05 20:30   ` DJ Delorie
2021-03-04 16:32 ` [PATCH 07/16] malloc: Refactor TAG_ macros to avoid indirection Szabolcs Nagy
2021-03-05  0:28   ` DJ Delorie
2021-03-04 16:32 ` [PATCH 08/16] malloc: Use global flag instead of function pointer dispatch for mtag Szabolcs Nagy
2021-03-05  0:46   ` DJ Delorie
2021-03-05 12:53     ` Szabolcs Nagy
2021-03-04 16:32 ` [PATCH 09/16] malloc: Only support zeroing and not arbitrary memset with mtag Szabolcs Nagy
2021-03-05  0:49   ` DJ Delorie
2021-03-04 16:33 ` [PATCH 10/16] malloc: Change calloc when tagging is disabled Szabolcs Nagy
2021-03-05  1:06   ` DJ Delorie
2021-03-04 16:33 ` [PATCH 11/16] malloc: Use branches instead of mtag_granule_mask Szabolcs Nagy
2021-03-05 21:00   ` DJ Delorie
2021-03-04 16:33 ` [PATCH 12/16] malloc: Use mtag_enabled instead of USE_MTAG Szabolcs Nagy
2021-03-05  0:56   ` DJ Delorie
2021-03-04 16:34 ` [PATCH 13/16] aarch64: inline __libc_mtag_address_get_tag Szabolcs Nagy
2021-03-04 16:34 ` [PATCH 14/16] aarch64: inline __libc_mtag_new_tag Szabolcs Nagy
2021-03-04 16:34 ` Szabolcs Nagy [this message]
2021-03-04 16:34 ` [PATCH 16/16] aarch64: Optimize __libc_mtag_tag_zero_region Szabolcs Nagy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ed1ca33bfea25741d7fb4d52d4ca233db3ab5418.1614874816.git.szabolcs.nagy@arm.com \
    --to=szabolcs.nagy@arm.com \
    --cc=Richard.Earnshaw@arm.com \
    --cc=dj@redhat.com \
    --cc=libc-alpha@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).