From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1944) id 9C5E038582B4; Tue, 2 Jan 2024 17:21:46 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 9C5E038582B4 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1704216106; bh=8pTE/8tTiqLevj4f2QYJnaQqcPOyjaeTs1PpovVoa6U=; h=From:To:Subject:Date:From; b=LyLfkrDjN6Q4gaWcOeWI2xt/J+sng0F6U7AiPXnhsVGxrCdeg51d4ncuWBT16i68J dHoye3pqQ2lu0dqQi/Vpb/uxkvxVdnNRW4wnxxNGqGDJ4Qb/l8CO/LwQsEjHZ0CFaD nBzpFHnWsaqDLRT0rbqad78cOGbP7mxerqUS+lyE= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Szabolcs Nagy To: glibc-cvs@sourceware.org Subject: [glibc] aarch64: Add SME runtime support X-Act-Checkin: glibc X-Git-Author: Szabolcs Nagy X-Git-Refname: refs/heads/master X-Git-Oldrev: 67f371e882499ea46eca1b9dc76c98a7c2d06b69 X-Git-Newrev: d3c32ae207d4fc3e48bb47ce1b9f2c6cf0f35c4f Message-Id: <20240102172146.9C5E038582B4@sourceware.org> Date: Tue, 2 Jan 2024 17:21:46 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=d3c32ae207d4fc3e48bb47ce1b9f2c6cf0f35c4f commit d3c32ae207d4fc3e48bb47ce1b9f2c6cf0f35c4f Author: Szabolcs Nagy Date: Fri Sep 10 16:52:17 2021 +0100 aarch64: Add SME runtime support The runtime support routines for the call ABI of the Scalable Matrix Extension (SME) are mostly in libgcc. Since libc.so cannot depend on libgcc_s.so have an implementation of __arm_za_disable in libc for libc internal use in longjmp and similar APIs. __libc_arm_za_disable follows the same PCS rules as __arm_za_disable, but it's a hidden symbol so it does not need variant PCS marking. Using __libc_fatal instead of abort because it can print a message and works in ld.so too. But for now we don't need SME routines in ld.so. To check the SME HWCAP in asm, we need the _dl_hwcap2 member offset in _rtld_global_ro in the shared libc.so, while in libc.a the _dl_hwcap2 object is accessed. Reviewed-by: Adhemerval Zanella Diff: --- sysdeps/aarch64/Makefile | 10 ++- sysdeps/aarch64/__arm_za_disable.S | 112 ++++++++++++++++++++++++++++++++ sysdeps/aarch64/rtld-global-offsets.sym | 10 +++ 3 files changed, 129 insertions(+), 3 deletions(-) diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile index 6a9559e5f5..9d8844d9c8 100644 --- a/sysdeps/aarch64/Makefile +++ b/sysdeps/aarch64/Makefile @@ -48,7 +48,9 @@ endif endif ifeq ($(subdir),csu) -gen-as-const-headers += tlsdesc.sym +gen-as-const-headers += \ + tlsdesc.sym \ + rtld-global-offsets.sym endif ifeq ($(subdir),gmon) @@ -62,8 +64,10 @@ endif ifeq ($(subdir),misc) sysdep_headers += sys/ifunc.h -sysdep_routines += __mtag_tag_zero_region \ - __mtag_tag_region +sysdep_routines += \ + __mtag_tag_zero_region \ + __mtag_tag_region \ + __arm_za_disable endif ifeq ($(subdir),malloc) diff --git a/sysdeps/aarch64/__arm_za_disable.S b/sysdeps/aarch64/__arm_za_disable.S new file mode 100644 index 0000000000..649891ea7f --- /dev/null +++ b/sysdeps/aarch64/__arm_za_disable.S @@ -0,0 +1,112 @@ +/* Libc internal support routine for SME. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include +#include + +#define HWCAP2_SME_BIT 23 + +/* Disable ZA. Call ABI: + - Private ZA, streaming-compatible. + - x0-x13, x19-x29, sp and fp regs are call preserved. + - On return tpidr2_el0 = 0, ZA = 0. + - Takes no argument. + - Does not return a value. + - Can abort on failure (then registers are not preserved). */ + +ENTRY (__libc_arm_za_disable) + + /* Check if SME is available. */ +#ifdef SHARED + /* In libc.so. */ + adrp x14, :got:_rtld_global_ro + ldr x14, [x14, :got_lo12:_rtld_global_ro] + ldr x14, [x14, GLRO_DL_HWCAP2_OFFSET] +#else + /* In libc.a, may be PIC. */ + adrp x14, _dl_hwcap2 + ldr x14, [x14, :lo12:_dl_hwcap2] +#endif + tbz x14, HWCAP2_SME_BIT, L(end) + + .inst 0xd53bd0ae /* mrs x14, tpidr2_el0 */ + cbz x14, L(end) + + /* Check reserved bytes, abort on unknown extensions. */ + ldrh w15, [x14, 10] + ldr w16, [x14, 12] + orr w15, w15, w16 + cbnz w15, L(fail) + + ldr x16, [x14] + cbz x16, L(end) + ldrh w17, [x14, 8] + cbz w17, L(end) + + /* x14: tpidr2, x15: 0, + x16: za_save_buffer, x17: num_za_save_slices. */ + +L(save_loop): + .inst 0xe1206200 /* str za[w15, 0], [x16] */ + .inst 0xe1206201 /* str za[w15, 1], [x16, 1, mul vl] */ + .inst 0xe1206202 /* str za[w15, 2], [x16, 2, mul vl] */ + .inst 0xe1206203 /* str za[w15, 3], [x16, 3, mul vl] */ + .inst 0xe1206204 /* str za[w15, 4], [x16, 4, mul vl] */ + .inst 0xe1206205 /* str za[w15, 5], [x16, 5, mul vl] */ + .inst 0xe1206206 /* str za[w15, 6], [x16, 6, mul vl] */ + .inst 0xe1206207 /* str za[w15, 7], [x16, 7, mul vl] */ + .inst 0xe1206208 /* str za[w15, 8], [x16, 8, mul vl] */ + .inst 0xe1206209 /* str za[w15, 9], [x16, 9, mul vl] */ + .inst 0xe120620a /* str za[w15, 10], [x16, 10, mul vl] */ + .inst 0xe120620b /* str za[w15, 11], [x16, 11, mul vl] */ + .inst 0xe120620c /* str za[w15, 12], [x16, 12, mul vl] */ + .inst 0xe120620d /* str za[w15, 13], [x16, 13, mul vl] */ + .inst 0xe120620e /* str za[w15, 14], [x16, 14, mul vl] */ + .inst 0xe120620f /* str za[w15, 15], [x16, 15, mul vl] */ + add w15, w15, 16 + .inst 0x04305a10 /* addsvl x16, x16, 16 */ + cmp w17, w15 + bhi L(save_loop) + .inst 0xd51bd0bf /* msr tpidr2_el0, xzr */ + .inst 0xd503447f /* smstop za */ +L(end): + ret +L(fail): +#if HAVE_AARCH64_PAC_RET + PACIASP + cfi_window_save +#endif + stp x29, x30, [sp, -32]! + cfi_adjust_cfa_offset (32) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) + mov x29, sp + .inst 0x04e0e3f0 /* cntd x16 */ + str x16, [sp, 16] + cfi_rel_offset (46, 16) + .inst 0xd503467f /* smstop */ + adrp x0, L(msg) + add x0, x0, :lo12:L(msg) + bl HIDDEN_JUMPTARGET (__libc_fatal) +END (__libc_arm_za_disable) + + .section .rodata + .align 3 +L(msg): + .string "FATAL: __libc_arm_za_disable failed.\n" diff --git a/sysdeps/aarch64/rtld-global-offsets.sym b/sysdeps/aarch64/rtld-global-offsets.sym new file mode 100644 index 0000000000..23cdaf7d9e --- /dev/null +++ b/sysdeps/aarch64/rtld-global-offsets.sym @@ -0,0 +1,10 @@ +#define SHARED 1 + +#include + +#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name) + +-- Offsets of _rtld_global_ro in libc.so + +GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap) +GLRO_DL_HWCAP2_OFFSET GLRO_offsetof (dl_hwcap2)