From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1944) id 495683851C0B; Fri, 19 Mar 2021 11:58:09 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 495683851C0B Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Szabolcs Nagy To: glibc-cvs@sourceware.org Subject: [glibc/nsz/mtag] aarch64: Optimize __libc_mtag_tag_zero_region X-Act-Checkin: glibc X-Git-Author: Szabolcs Nagy X-Git-Refname: refs/heads/nsz/mtag X-Git-Oldrev: 138ff9a5d15a9dd6fcf7238074f94632d2a05acb X-Git-Newrev: 601c47c4765e4b8cf921a719cf1d38da3eb97aa2 Message-Id: <20210319115809.495683851C0B@sourceware.org> Date: Fri, 19 Mar 2021 11:58:09 +0000 (GMT) X-BeenThere: glibc-cvs@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Glibc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 19 Mar 2021 11:58:09 -0000 https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=601c47c4765e4b8cf921a719cf1d38da3eb97aa2 commit 601c47c4765e4b8cf921a719cf1d38da3eb97aa2 Author: Szabolcs Nagy Date: Tue Feb 9 17:59:11 2021 +0000 aarch64: Optimize __libc_mtag_tag_zero_region This is a target hook for memory tagging, the original was a naive implementation. Uses the same algorithm as __libc_mtag_tag_region, but with instructions that also zero the memory. This was not benchmarked on real cpu, but expected to be faster than the naive implementation. Diff: --- sysdeps/aarch64/__mtag_tag_zero_region.S | 96 ++++++++++++++++++++++++++------ 1 file changed, 80 insertions(+), 16 deletions(-) diff --git a/sysdeps/aarch64/__mtag_tag_zero_region.S b/sysdeps/aarch64/__mtag_tag_zero_region.S index 74d398bba5..7d955fbd0c 100644 --- a/sysdeps/aarch64/__mtag_tag_zero_region.S +++ b/sysdeps/aarch64/__mtag_tag_zero_region.S @@ -20,30 +20,94 @@ #ifdef USE_MTAG +/* Assumptions: + * + * ARMv8-a, AArch64, MTE, LP64 ABI. + * + * Interface contract: + * Address is 16 byte aligned and size is multiple of 16. + * Returns the passed pointer. + * The memory region may remain untagged if tagging is not enabled. + */ .arch armv8.5-a .arch_extension memtag -/* NB, only supported on variants with 64-bit pointers. */ +#define dstin x0 +#define count x1 +#define dst x2 +#define dstend x3 +#define tmp x4 +#define zva_val x4 -/* FIXME: This is a minimal implementation. We could do much better than - this for large values of COUNT. */ +ENTRY (__libc_mtag_tag_zero_region) + PTR_ARG (0) + SIZE_ARG (1) -#define dstin x0 -#define count x1 -#define dst x2 + add dstend, dstin, count -ENTRY(__libc_mtag_tag_zero_region) + cmp count, 96 + b.hi L(set_long) - mov dst, dstin -L(loop): - stzg dst, [dst], #16 - subs count, count, 16 - bne L(loop) -#if 0 - /* This is not currently needed, since for now we are only called - to tag memory that is taggable. */ - ldg dstin, [dstin] // Recover the tag created (might be untagged). + tbnz count, 6, L(set96) + + /* Set 0, 16, 32, or 48 bytes. */ + lsr tmp, count, 5 + add tmp, dstin, tmp, lsl 4 + cbz count, L(end) + stzg dstin, [dstin] + stzg dstin, [tmp] + stzg dstin, [dstend, -16] +L(end): + ret + + .p2align 4 + /* Set 64..96 bytes. Write 64 bytes from the start and + 32 bytes from the end. */ +L(set96): + stz2g dstin, [dstin] + stz2g dstin, [dstin, 32] + stz2g dstin, [dstend, -32] + ret + + .p2align 4 + /* Size is > 96 bytes. */ +L(set_long): + cmp count, 160 + b.lo L(no_zva) + +#ifndef SKIP_ZVA_CHECK + mrs zva_val, dczid_el0 + and zva_val, zva_val, 31 + cmp zva_val, 4 /* ZVA size is 64 bytes. */ + b.ne L(no_zva) #endif + stz2g dstin, [dstin] + stz2g dstin, [dstin, 32] + bic dst, dstin, 63 + sub count, dstend, dst /* Count is now 64 too large. */ + sub count, count, 128 /* Adjust count and bias for loop. */ + + .p2align 4 +L(zva_loop): + add dst, dst, 64 + dc gzva, dst + subs count, count, 64 + b.hi L(zva_loop) + stz2g dstin, [dstend, -64] + stz2g dstin, [dstend, -32] ret + +L(no_zva): + sub dst, dstin, 32 /* Dst is biased by -32. */ + sub count, count, 64 /* Adjust count for loop. */ +L(no_zva_loop): + stz2g dstin, [dst, 32] + stz2g dstin, [dst, 64]! + subs count, count, 64 + b.hi L(no_zva_loop) + stz2g dstin, [dstend, -64] + stz2g dstin, [dstend, -32] + ret + END (__libc_mtag_tag_zero_region) #endif /* USE_MTAG */