public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc/nsz/mtag] aarch64: Optimize __libc_mtag_tag_zero_region
@ 2021-03-04 16:26 Szabolcs Nagy
0 siblings, 0 replies; 4+ messages in thread
From: Szabolcs Nagy @ 2021-03-04 16:26 UTC (permalink / raw)
To: glibc-cvs
https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=90002cbd72c900bbceef86622a6b92e7c1611642
commit 90002cbd72c900bbceef86622a6b92e7c1611642
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Tue Feb 9 17:59:11 2021 +0000
aarch64: Optimize __libc_mtag_tag_zero_region
This is a target hook for memory tagging, the original was a naive
implementation. Uses the same algorithm as __libc_mtag_tag_region,
but with instructions that also zero the memory. This was not
benchmarked on real cpu, but expected to be faster than the naive
implementation.
Diff:
---
sysdeps/aarch64/__mtag_tag_zero_region.S | 96 ++++++++++++++++++++++++++------
1 file changed, 80 insertions(+), 16 deletions(-)
diff --git a/sysdeps/aarch64/__mtag_tag_zero_region.S b/sysdeps/aarch64/__mtag_tag_zero_region.S
index 74d398bba5..7d955fbd0c 100644
--- a/sysdeps/aarch64/__mtag_tag_zero_region.S
+++ b/sysdeps/aarch64/__mtag_tag_zero_region.S
@@ -20,30 +20,94 @@
#ifdef USE_MTAG
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, MTE, LP64 ABI.
+ *
+ * Interface contract:
+ * Address is 16 byte aligned and size is multiple of 16.
+ * Returns the passed pointer.
+ * The memory region may remain untagged if tagging is not enabled.
+ */
.arch armv8.5-a
.arch_extension memtag
-/* NB, only supported on variants with 64-bit pointers. */
+#define dstin x0
+#define count x1
+#define dst x2
+#define dstend x3
+#define tmp x4
+#define zva_val x4
-/* FIXME: This is a minimal implementation. We could do much better than
- this for large values of COUNT. */
+ENTRY (__libc_mtag_tag_zero_region)
+ PTR_ARG (0)
+ SIZE_ARG (1)
-#define dstin x0
-#define count x1
-#define dst x2
+ add dstend, dstin, count
-ENTRY(__libc_mtag_tag_zero_region)
+ cmp count, 96
+ b.hi L(set_long)
- mov dst, dstin
-L(loop):
- stzg dst, [dst], #16
- subs count, count, 16
- bne L(loop)
-#if 0
- /* This is not currently needed, since for now we are only called
- to tag memory that is taggable. */
- ldg dstin, [dstin] // Recover the tag created (might be untagged).
+ tbnz count, 6, L(set96)
+
+ /* Set 0, 16, 32, or 48 bytes. */
+ lsr tmp, count, 5
+ add tmp, dstin, tmp, lsl 4
+ cbz count, L(end)
+ stzg dstin, [dstin]
+ stzg dstin, [tmp]
+ stzg dstin, [dstend, -16]
+L(end):
+ ret
+
+ .p2align 4
+ /* Set 64..96 bytes. Write 64 bytes from the start and
+ 32 bytes from the end. */
+L(set96):
+ stz2g dstin, [dstin]
+ stz2g dstin, [dstin, 32]
+ stz2g dstin, [dstend, -32]
+ ret
+
+ .p2align 4
+ /* Size is > 96 bytes. */
+L(set_long):
+ cmp count, 160
+ b.lo L(no_zva)
+
+#ifndef SKIP_ZVA_CHECK
+ mrs zva_val, dczid_el0
+ and zva_val, zva_val, 31
+ cmp zva_val, 4 /* ZVA size is 64 bytes. */
+ b.ne L(no_zva)
#endif
+ stz2g dstin, [dstin]
+ stz2g dstin, [dstin, 32]
+ bic dst, dstin, 63
+ sub count, dstend, dst /* Count is now 64 too large. */
+ sub count, count, 128 /* Adjust count and bias for loop. */
+
+ .p2align 4
+L(zva_loop):
+ add dst, dst, 64
+ dc gzva, dst
+ subs count, count, 64
+ b.hi L(zva_loop)
+ stz2g dstin, [dstend, -64]
+ stz2g dstin, [dstend, -32]
ret
+
+L(no_zva):
+ sub dst, dstin, 32 /* Dst is biased by -32. */
+ sub count, count, 64 /* Adjust count for loop. */
+L(no_zva_loop):
+ stz2g dstin, [dst, 32]
+ stz2g dstin, [dst, 64]!
+ subs count, count, 64
+ b.hi L(no_zva_loop)
+ stz2g dstin, [dstend, -64]
+ stz2g dstin, [dstend, -32]
+ ret
+
END (__libc_mtag_tag_zero_region)
#endif /* USE_MTAG */
^ permalink raw reply [flat|nested] 4+ messages in thread
* [glibc/nsz/mtag] aarch64: Optimize __libc_mtag_tag_zero_region
@ 2021-03-19 11:58 Szabolcs Nagy
0 siblings, 0 replies; 4+ messages in thread
From: Szabolcs Nagy @ 2021-03-19 11:58 UTC (permalink / raw)
To: glibc-cvs
https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=601c47c4765e4b8cf921a719cf1d38da3eb97aa2
commit 601c47c4765e4b8cf921a719cf1d38da3eb97aa2
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Tue Feb 9 17:59:11 2021 +0000
aarch64: Optimize __libc_mtag_tag_zero_region
This is a target hook for memory tagging, the original was a naive
implementation. Uses the same algorithm as __libc_mtag_tag_region,
but with instructions that also zero the memory. This was not
benchmarked on real cpu, but expected to be faster than the naive
implementation.
Diff:
---
sysdeps/aarch64/__mtag_tag_zero_region.S | 96 ++++++++++++++++++++++++++------
1 file changed, 80 insertions(+), 16 deletions(-)
diff --git a/sysdeps/aarch64/__mtag_tag_zero_region.S b/sysdeps/aarch64/__mtag_tag_zero_region.S
index 74d398bba5..7d955fbd0c 100644
--- a/sysdeps/aarch64/__mtag_tag_zero_region.S
+++ b/sysdeps/aarch64/__mtag_tag_zero_region.S
@@ -20,30 +20,94 @@
#ifdef USE_MTAG
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, MTE, LP64 ABI.
+ *
+ * Interface contract:
+ * Address is 16 byte aligned and size is multiple of 16.
+ * Returns the passed pointer.
+ * The memory region may remain untagged if tagging is not enabled.
+ */
.arch armv8.5-a
.arch_extension memtag
-/* NB, only supported on variants with 64-bit pointers. */
+#define dstin x0
+#define count x1
+#define dst x2
+#define dstend x3
+#define tmp x4
+#define zva_val x4
-/* FIXME: This is a minimal implementation. We could do much better than
- this for large values of COUNT. */
+ENTRY (__libc_mtag_tag_zero_region)
+ PTR_ARG (0)
+ SIZE_ARG (1)
-#define dstin x0
-#define count x1
-#define dst x2
+ add dstend, dstin, count
-ENTRY(__libc_mtag_tag_zero_region)
+ cmp count, 96
+ b.hi L(set_long)
- mov dst, dstin
-L(loop):
- stzg dst, [dst], #16
- subs count, count, 16
- bne L(loop)
-#if 0
- /* This is not currently needed, since for now we are only called
- to tag memory that is taggable. */
- ldg dstin, [dstin] // Recover the tag created (might be untagged).
+ tbnz count, 6, L(set96)
+
+ /* Set 0, 16, 32, or 48 bytes. */
+ lsr tmp, count, 5
+ add tmp, dstin, tmp, lsl 4
+ cbz count, L(end)
+ stzg dstin, [dstin]
+ stzg dstin, [tmp]
+ stzg dstin, [dstend, -16]
+L(end):
+ ret
+
+ .p2align 4
+ /* Set 64..96 bytes. Write 64 bytes from the start and
+ 32 bytes from the end. */
+L(set96):
+ stz2g dstin, [dstin]
+ stz2g dstin, [dstin, 32]
+ stz2g dstin, [dstend, -32]
+ ret
+
+ .p2align 4
+ /* Size is > 96 bytes. */
+L(set_long):
+ cmp count, 160
+ b.lo L(no_zva)
+
+#ifndef SKIP_ZVA_CHECK
+ mrs zva_val, dczid_el0
+ and zva_val, zva_val, 31
+ cmp zva_val, 4 /* ZVA size is 64 bytes. */
+ b.ne L(no_zva)
#endif
+ stz2g dstin, [dstin]
+ stz2g dstin, [dstin, 32]
+ bic dst, dstin, 63
+ sub count, dstend, dst /* Count is now 64 too large. */
+ sub count, count, 128 /* Adjust count and bias for loop. */
+
+ .p2align 4
+L(zva_loop):
+ add dst, dst, 64
+ dc gzva, dst
+ subs count, count, 64
+ b.hi L(zva_loop)
+ stz2g dstin, [dstend, -64]
+ stz2g dstin, [dstend, -32]
ret
+
+L(no_zva):
+ sub dst, dstin, 32 /* Dst is biased by -32. */
+ sub count, count, 64 /* Adjust count for loop. */
+L(no_zva_loop):
+ stz2g dstin, [dst, 32]
+ stz2g dstin, [dst, 64]!
+ subs count, count, 64
+ b.hi L(no_zva_loop)
+ stz2g dstin, [dstend, -64]
+ stz2g dstin, [dstend, -32]
+ ret
+
END (__libc_mtag_tag_zero_region)
#endif /* USE_MTAG */
^ permalink raw reply [flat|nested] 4+ messages in thread
* [glibc/nsz/mtag] aarch64: Optimize __libc_mtag_tag_zero_region
@ 2021-03-11 17:41 Szabolcs Nagy
0 siblings, 0 replies; 4+ messages in thread
From: Szabolcs Nagy @ 2021-03-11 17:41 UTC (permalink / raw)
To: glibc-cvs
https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=d6540505dc5912b85f05cecaab6346e11ba93954
commit d6540505dc5912b85f05cecaab6346e11ba93954
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Tue Feb 9 17:59:11 2021 +0000
aarch64: Optimize __libc_mtag_tag_zero_region
This is a target hook for memory tagging, the original was a naive
implementation. Uses the same algorithm as __libc_mtag_tag_region,
but with instructions that also zero the memory. This was not
benchmarked on real cpu, but expected to be faster than the naive
implementation.
Diff:
---
sysdeps/aarch64/__mtag_tag_zero_region.S | 96 ++++++++++++++++++++++++++------
1 file changed, 80 insertions(+), 16 deletions(-)
diff --git a/sysdeps/aarch64/__mtag_tag_zero_region.S b/sysdeps/aarch64/__mtag_tag_zero_region.S
index 74d398bba5..7d955fbd0c 100644
--- a/sysdeps/aarch64/__mtag_tag_zero_region.S
+++ b/sysdeps/aarch64/__mtag_tag_zero_region.S
@@ -20,30 +20,94 @@
#ifdef USE_MTAG
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, MTE, LP64 ABI.
+ *
+ * Interface contract:
+ * Address is 16 byte aligned and size is multiple of 16.
+ * Returns the passed pointer.
+ * The memory region may remain untagged if tagging is not enabled.
+ */
.arch armv8.5-a
.arch_extension memtag
-/* NB, only supported on variants with 64-bit pointers. */
+#define dstin x0
+#define count x1
+#define dst x2
+#define dstend x3
+#define tmp x4
+#define zva_val x4
-/* FIXME: This is a minimal implementation. We could do much better than
- this for large values of COUNT. */
+ENTRY (__libc_mtag_tag_zero_region)
+ PTR_ARG (0)
+ SIZE_ARG (1)
-#define dstin x0
-#define count x1
-#define dst x2
+ add dstend, dstin, count
-ENTRY(__libc_mtag_tag_zero_region)
+ cmp count, 96
+ b.hi L(set_long)
- mov dst, dstin
-L(loop):
- stzg dst, [dst], #16
- subs count, count, 16
- bne L(loop)
-#if 0
- /* This is not currently needed, since for now we are only called
- to tag memory that is taggable. */
- ldg dstin, [dstin] // Recover the tag created (might be untagged).
+ tbnz count, 6, L(set96)
+
+ /* Set 0, 16, 32, or 48 bytes. */
+ lsr tmp, count, 5
+ add tmp, dstin, tmp, lsl 4
+ cbz count, L(end)
+ stzg dstin, [dstin]
+ stzg dstin, [tmp]
+ stzg dstin, [dstend, -16]
+L(end):
+ ret
+
+ .p2align 4
+ /* Set 64..96 bytes. Write 64 bytes from the start and
+ 32 bytes from the end. */
+L(set96):
+ stz2g dstin, [dstin]
+ stz2g dstin, [dstin, 32]
+ stz2g dstin, [dstend, -32]
+ ret
+
+ .p2align 4
+ /* Size is > 96 bytes. */
+L(set_long):
+ cmp count, 160
+ b.lo L(no_zva)
+
+#ifndef SKIP_ZVA_CHECK
+ mrs zva_val, dczid_el0
+ and zva_val, zva_val, 31
+ cmp zva_val, 4 /* ZVA size is 64 bytes. */
+ b.ne L(no_zva)
#endif
+ stz2g dstin, [dstin]
+ stz2g dstin, [dstin, 32]
+ bic dst, dstin, 63
+ sub count, dstend, dst /* Count is now 64 too large. */
+ sub count, count, 128 /* Adjust count and bias for loop. */
+
+ .p2align 4
+L(zva_loop):
+ add dst, dst, 64
+ dc gzva, dst
+ subs count, count, 64
+ b.hi L(zva_loop)
+ stz2g dstin, [dstend, -64]
+ stz2g dstin, [dstend, -32]
ret
+
+L(no_zva):
+ sub dst, dstin, 32 /* Dst is biased by -32. */
+ sub count, count, 64 /* Adjust count for loop. */
+L(no_zva_loop):
+ stz2g dstin, [dst, 32]
+ stz2g dstin, [dst, 64]!
+ subs count, count, 64
+ b.hi L(no_zva_loop)
+ stz2g dstin, [dstend, -64]
+ stz2g dstin, [dstend, -32]
+ ret
+
END (__libc_mtag_tag_zero_region)
#endif /* USE_MTAG */
^ permalink raw reply [flat|nested] 4+ messages in thread
* [glibc/nsz/mtag] aarch64: Optimize __libc_mtag_tag_zero_region
@ 2021-03-11 17:39 Szabolcs Nagy
0 siblings, 0 replies; 4+ messages in thread
From: Szabolcs Nagy @ 2021-03-11 17:39 UTC (permalink / raw)
To: glibc-cvs
https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=d6540505dc5912b85f05cecaab6346e11ba93954
commit d6540505dc5912b85f05cecaab6346e11ba93954
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Tue Feb 9 17:59:11 2021 +0000
aarch64: Optimize __libc_mtag_tag_zero_region
This is a target hook for memory tagging, the original was a naive
implementation. Uses the same algorithm as __libc_mtag_tag_region,
but with instructions that also zero the memory. This was not
benchmarked on real cpu, but expected to be faster than the naive
implementation.
Diff:
---
sysdeps/aarch64/__mtag_tag_zero_region.S | 96 ++++++++++++++++++++++++++------
1 file changed, 80 insertions(+), 16 deletions(-)
diff --git a/sysdeps/aarch64/__mtag_tag_zero_region.S b/sysdeps/aarch64/__mtag_tag_zero_region.S
index 74d398bba5..7d955fbd0c 100644
--- a/sysdeps/aarch64/__mtag_tag_zero_region.S
+++ b/sysdeps/aarch64/__mtag_tag_zero_region.S
@@ -20,30 +20,94 @@
#ifdef USE_MTAG
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, MTE, LP64 ABI.
+ *
+ * Interface contract:
+ * Address is 16 byte aligned and size is multiple of 16.
+ * Returns the passed pointer.
+ * The memory region may remain untagged if tagging is not enabled.
+ */
.arch armv8.5-a
.arch_extension memtag
-/* NB, only supported on variants with 64-bit pointers. */
+#define dstin x0
+#define count x1
+#define dst x2
+#define dstend x3
+#define tmp x4
+#define zva_val x4
-/* FIXME: This is a minimal implementation. We could do much better than
- this for large values of COUNT. */
+ENTRY (__libc_mtag_tag_zero_region)
+ PTR_ARG (0)
+ SIZE_ARG (1)
-#define dstin x0
-#define count x1
-#define dst x2
+ add dstend, dstin, count
-ENTRY(__libc_mtag_tag_zero_region)
+ cmp count, 96
+ b.hi L(set_long)
- mov dst, dstin
-L(loop):
- stzg dst, [dst], #16
- subs count, count, 16
- bne L(loop)
-#if 0
- /* This is not currently needed, since for now we are only called
- to tag memory that is taggable. */
- ldg dstin, [dstin] // Recover the tag created (might be untagged).
+ tbnz count, 6, L(set96)
+
+ /* Set 0, 16, 32, or 48 bytes. */
+ lsr tmp, count, 5
+ add tmp, dstin, tmp, lsl 4
+ cbz count, L(end)
+ stzg dstin, [dstin]
+ stzg dstin, [tmp]
+ stzg dstin, [dstend, -16]
+L(end):
+ ret
+
+ .p2align 4
+ /* Set 64..96 bytes. Write 64 bytes from the start and
+ 32 bytes from the end. */
+L(set96):
+ stz2g dstin, [dstin]
+ stz2g dstin, [dstin, 32]
+ stz2g dstin, [dstend, -32]
+ ret
+
+ .p2align 4
+ /* Size is > 96 bytes. */
+L(set_long):
+ cmp count, 160
+ b.lo L(no_zva)
+
+#ifndef SKIP_ZVA_CHECK
+ mrs zva_val, dczid_el0
+ and zva_val, zva_val, 31
+ cmp zva_val, 4 /* ZVA size is 64 bytes. */
+ b.ne L(no_zva)
#endif
+ stz2g dstin, [dstin]
+ stz2g dstin, [dstin, 32]
+ bic dst, dstin, 63
+ sub count, dstend, dst /* Count is now 64 too large. */
+ sub count, count, 128 /* Adjust count and bias for loop. */
+
+ .p2align 4
+L(zva_loop):
+ add dst, dst, 64
+ dc gzva, dst
+ subs count, count, 64
+ b.hi L(zva_loop)
+ stz2g dstin, [dstend, -64]
+ stz2g dstin, [dstend, -32]
ret
+
+L(no_zva):
+ sub dst, dstin, 32 /* Dst is biased by -32. */
+ sub count, count, 64 /* Adjust count for loop. */
+L(no_zva_loop):
+ stz2g dstin, [dst, 32]
+ stz2g dstin, [dst, 64]!
+ subs count, count, 64
+ b.hi L(no_zva_loop)
+ stz2g dstin, [dstend, -64]
+ stz2g dstin, [dstend, -32]
+ ret
+
END (__libc_mtag_tag_zero_region)
#endif /* USE_MTAG */
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-03-19 11:58 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-04 16:26 [glibc/nsz/mtag] aarch64: Optimize __libc_mtag_tag_zero_region Szabolcs Nagy
2021-03-11 17:39 Szabolcs Nagy
2021-03-11 17:41 Szabolcs Nagy
2021-03-19 11:58 Szabolcs Nagy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).