* [PATCH v2] LoongArch: Add support for TLS Descriptors
@ 2024-02-29 1:43 mengqinggang
2024-02-29 2:56 ` caiyinyu
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: mengqinggang @ 2024-02-29 1:43 UTC (permalink / raw)
To: libc-alpha
Cc: adhemerval.zanella, xuchenghua, caiyinyu, chenglulu, cailulu,
xry111, i.swmail, maskray, luweining, wanglei, hejinyang,
mengqinggang
This is mostly based on AArch64 and RISC-V implementation.
Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
all vector registers.
---
Changes v1 -> v2:
- Fix vr24-vr31, xr24-xr31 typo.
- Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
- Save and restore fcsr0 in _dl_tlsdesc_dynamic.
v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
elf/elf.h | 2 +
sysdeps/loongarch/Makefile | 6 +
sysdeps/loongarch/dl-link.sym | 1 +
sysdeps/loongarch/dl-machine.h | 60 ++-
sysdeps/loongarch/dl-tls.h | 9 +-
sysdeps/loongarch/dl-tlsdesc-dynamic.h | 341 ++++++++++++++++++
sysdeps/loongarch/dl-tlsdesc.S | 93 +++++
sysdeps/loongarch/dl-tlsdesc.h | 53 +++
sysdeps/loongarch/linkmap.h | 1 +
sysdeps/loongarch/sys/asm.h | 1 +
sysdeps/loongarch/sys/regdef.h | 1 +
sysdeps/loongarch/tlsdesc.c | 39 ++
sysdeps/loongarch/tlsdesc.sym | 19 +
.../unix/sysv/linux/loongarch/localplt.data | 2 +
14 files changed, 625 insertions(+), 3 deletions(-)
create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
create mode 100644 sysdeps/loongarch/tlsdesc.c
create mode 100644 sysdeps/loongarch/tlsdesc.sym
diff --git a/elf/elf.h b/elf/elf.h
index f2206e5c06..eec24ea049 100644
--- a/elf/elf.h
+++ b/elf/elf.h
@@ -4237,6 +4237,8 @@ enum
#define R_LARCH_TLS_TPREL32 10
#define R_LARCH_TLS_TPREL64 11
#define R_LARCH_IRELATIVE 12
+#define R_LARCH_TLS_DESC32 13
+#define R_LARCH_TLS_DESC64 14
/* Reserved for future relocs that the dynamic linker must understand. */
diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
index 43d2f583cd..181389e787 100644
--- a/sysdeps/loongarch/Makefile
+++ b/sysdeps/loongarch/Makefile
@@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
endif
ifeq ($(subdir),elf)
+sysdep-dl-routines += tlsdesc dl-tlsdesc
gen-as-const-headers += dl-link.sym
endif
+ifeq ($(subdir),csu)
+gen-as-const-headers += tlsdesc.sym
+endif
+
+
# LoongArch's assembler also needs to know about PIC as it changes the
# definition of some assembler macros.
ASFLAGS-.os += $(pic-ccflag)
diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
index b534968e30..fd81ef37d5 100644
--- a/sysdeps/loongarch/dl-link.sym
+++ b/sysdeps/loongarch/dl-link.sym
@@ -1,6 +1,7 @@
#include <stddef.h>
#include <sysdep.h>
#include <link.h>
+#include <dl-tlsdesc.h>
DL_SIZEOF_RG sizeof(struct La_loongarch_regs)
DL_SIZEOF_RV sizeof(struct La_loongarch_retval)
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
index ab81b82d95..8ca6c224f6 100644
--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
@@ -25,7 +25,7 @@
#include <entry.h>
#include <elf/elf.h>
#include <sys/asm.h>
-#include <dl-tls.h>
+#include <dl-tlsdesc.h>
#include <dl-static-tls.h>
#include <dl-machine-rel.h>
@@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
*addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
break;
+ case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
+ {
+ struct tlsdesc volatile *td =
+ (struct tlsdesc volatile *)addr_field;
+ if (! sym)
+ {
+ td->arg = (void*)reloc->r_addend;
+ td->entry = _dl_tlsdesc_undefweak;
+ }
+ else
+ {
+# ifndef SHARED
+ CHECK_STATIC_TLS (map, sym_map);
+# else
+ if (!TRY_STATIC_TLS (map, sym_map))
+ {
+ td->arg = _dl_make_tlsdesc_dynamic
+ (sym_map, sym->st_value + reloc->r_addend);
+# if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ td->entry = _dl_tlsdesc_dynamic_lasx;
+ else
+ if (SUPPORT_LSX)
+ td->entry = _dl_tlsdesc_dynamic_lsx;
+ else
+# endif
+ td->entry = _dl_tlsdesc_dynamic;
+ }
+ else
+# endif
+ {
+ td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
+ + reloc->r_addend);
+ td->entry = _dl_tlsdesc_return;
+ }
+ }
+ break;
+ }
+
case R_LARCH_COPY:
{
if (sym == NULL)
@@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
else
*reloc_addr = map->l_mach.plt;
}
+ else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
+ {
+ const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+ const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+ const ElfW (Sym) *sym = &symtab[symndx];
+ const struct r_found_version *version = NULL;
+
+ if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+ {
+ const ElfW (Half) *vernum =
+ (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+ version = &map->l_versions[vernum[symndx] & 0x7fff];
+ }
+
+ /* Always initialize TLS descriptors completely, because lazy
+ initialization requires synchronization at every TLS access. */
+ elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
+ skip_ifunc);
+ }
else
_dl_reloc_bad_type (map, r_type, 1);
}
diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
index 29924b866d..de593c002d 100644
--- a/sysdeps/loongarch/dl-tls.h
+++ b/sysdeps/loongarch/dl-tls.h
@@ -16,6 +16,9 @@
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
+#ifndef _DL_TLS_H
+#define _DL_TLS_H
+
/* Type used for the representation of TLS information in the GOT. */
typedef struct
{
@@ -23,6 +26,8 @@ typedef struct
unsigned long int ti_offset;
} tls_index;
+extern void *__tls_get_addr (tls_index *ti);
+
/* The thread pointer points to the first static TLS block. */
#define TLS_TP_OFFSET 0
@@ -37,10 +42,10 @@ typedef struct
/* Compute the value for a DTPREL reloc. */
#define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
-extern void *__tls_get_addr (tls_index *ti);
-
#define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
#define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
/* Value used for dtv entries for which the allocation is delayed. */
#define TLS_DTV_UNALLOCATED ((void *) -1l)
+
+#endif
diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
new file mode 100644
index 0000000000..0d8c9bb991
--- /dev/null
+++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
@@ -0,0 +1,341 @@
+/* Thread-local storage handling in the ELF dynamic linker.
+ LoongArch version.
+ Copyright (C) 2011-2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifdef USE_LASX
+# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
+#elif defined USE_LSX
+# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
+#elif !defined __loongarch_soft_float
+# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
+#else
+# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
+#endif
+
+#ifdef SHARED
+ /* Handler for dynamic TLS symbols.
+ Prototype:
+ _dl_tlsdesc_dynamic (tlsdesc *) ;
+
+ The second word of the descriptor points to a
+ tlsdesc_dynamic_arg structure.
+
+ Returns the offset between the thread pointer and the
+ object referenced by the argument.
+
+ ptrdiff_t
+ __attribute__ ((__regparm__ (1)))
+ _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
+ {
+ struct tlsdesc_dynamic_arg *td = tdp->arg;
+ dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
+ if (__builtin_expect (td->gen_count <= dtv[0].counter
+ && (dtv[td->tlsinfo.ti_module].pointer.val
+ != TLS_DTV_UNALLOCATED),
+ 1))
+ return dtv[td->tlsinfo.ti_module].pointer.val
+ + td->tlsinfo.ti_offset
+ - __thread_pointer;
+
+ return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
+ }
+ */
+ .hidden _dl_tlsdesc_dynamic
+ .global _dl_tlsdesc_dynamic
+ .type _dl_tlsdesc_dynamic,%function
+ cfi_startproc
+ .align 2
+_dl_tlsdesc_dynamic:
+ /* Save just enough registers to support fast path, if we fall
+ into slow path we will save additional registers. */
+ ADDI sp, sp,-24
+ REG_S t0, sp, 0
+ REG_S t1, sp, 8
+ REG_S t2, sp, 16
+
+ REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start
+ REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg
+ REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
+ REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter
+ bltu t2, t1, Lslow
+
+ REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module
+ slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
+ add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t)
+ REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
+ li.d t2, TLS_DTV_UNALLOCATED
+ beq t1, t2, Lslow
+ REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
+ # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
+ add.d a0, t1, t2
+Lret:
+ sub.d a0, a0, tp
+ REG_L t0, sp, 0
+ REG_L t1, sp, 8
+ REG_L t2, sp, 16
+ ADDI sp, sp, 24
+ RET
+
+Lslow:
+ /* This is the slow path. We need to call __tls_get_addr() which
+ means we need to save and restore all the register that the
+ callee will trash. */
+
+ /* Save the remaining registers that we must treat as caller save. */
+ ADDI sp, sp, -FRAME_SIZE
+ REG_S ra, sp, 0 * SZREG
+ REG_S a1, sp, 1 * SZREG
+ REG_S a2, sp, 2 * SZREG
+ REG_S a3, sp, 3 * SZREG
+ REG_S a4, sp, 4 * SZREG
+ REG_S a5, sp, 5 * SZREG
+ REG_S a6, sp, 6 * SZREG
+ REG_S a7, sp, 7 * SZREG
+ REG_S t4, sp, 8 * SZREG
+ REG_S t5, sp, 9 * SZREG
+ REG_S t6, sp, 10 * SZREG
+ REG_S t7, sp, 11 * SZREG
+ REG_S t8, sp, 12 * SZREG
+
+#ifdef USE_LASX
+ xvst xr0, sp, 13*SZREG + 0*SZXREG
+ xvst xr1, sp, 13*SZREG + 1*SZXREG
+ xvst xr2, sp, 13*SZREG + 2*SZXREG
+ xvst xr3, sp, 13*SZREG + 3*SZXREG
+ xvst xr4, sp, 13*SZREG + 4*SZXREG
+ xvst xr5, sp, 13*SZREG + 5*SZXREG
+ xvst xr6, sp, 13*SZREG + 6*SZXREG
+ xvst xr7, sp, 13*SZREG + 7*SZXREG
+ xvst xr8, sp, 13*SZREG + 8*SZXREG
+ xvst xr9, sp, 13*SZREG + 9*SZXREG
+ xvst xr10, sp, 13*SZREG + 10*SZXREG
+ xvst xr11, sp, 13*SZREG + 11*SZXREG
+ xvst xr12, sp, 13*SZREG + 12*SZXREG
+ xvst xr13, sp, 13*SZREG + 13*SZXREG
+ xvst xr14, sp, 13*SZREG + 14*SZXREG
+ xvst xr15, sp, 13*SZREG + 15*SZXREG
+ xvst xr16, sp, 13*SZREG + 16*SZXREG
+ xvst xr17, sp, 13*SZREG + 17*SZXREG
+ xvst xr18, sp, 13*SZREG + 18*SZXREG
+ xvst xr19, sp, 13*SZREG + 19*SZXREG
+ xvst xr20, sp, 13*SZREG + 20*SZXREG
+ xvst xr21, sp, 13*SZREG + 21*SZXREG
+ xvst xr22, sp, 13*SZREG + 22*SZXREG
+ xvst xr23, sp, 13*SZREG + 23*SZXREG
+ xvst xr24, sp, 13*SZREG + 24*SZXREG
+ xvst xr25, sp, 13*SZREG + 25*SZXREG
+ xvst xr26, sp, 13*SZREG + 26*SZXREG
+ xvst xr27, sp, 13*SZREG + 27*SZXREG
+ xvst xr28, sp, 13*SZREG + 28*SZXREG
+ xvst xr29, sp, 13*SZREG + 29*SZXREG
+ xvst xr30, sp, 13*SZREG + 30*SZXREG
+ xvst xr31, sp, 13*SZREG + 31*SZXREG
+ # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
+ # some fields in fcsr0
+ movfcsr2gr t0, fcsr0
+ REG_S t0, sp, 32*SZXREG
+#elif defined USE_LSX
+ vst vr0, sp, 13*SZREG + 0*SZVREG
+ vst vr1, sp, 13*SZREG + 1*SZVREG
+ vst vr2, sp, 13*SZREG + 2*SZVREG
+ vst vr3, sp, 13*SZREG + 3*SZVREG
+ vst vr4, sp, 13*SZREG + 4*SZVREG
+ vst vr5, sp, 13*SZREG + 5*SZVREG
+ vst vr6, sp, 13*SZREG + 6*SZVREG
+ vst vr7, sp, 13*SZREG + 7*SZVREG
+ vst vr8, sp, 13*SZREG + 8*SZVREG
+ vst vr9, sp, 13*SZREG + 9*SZVREG
+ vst vr10, sp, 13*SZREG + 10*SZVREG
+ vst vr11, sp, 13*SZREG + 11*SZVREG
+ vst vr12, sp, 13*SZREG + 12*SZVREG
+ vst vr13, sp, 13*SZREG + 13*SZVREG
+ vst vr14, sp, 13*SZREG + 14*SZVREG
+ vst vr15, sp, 13*SZREG + 15*SZVREG
+ vst vr16, sp, 13*SZREG + 16*SZVREG
+ vst vr17, sp, 13*SZREG + 17*SZVREG
+ vst vr18, sp, 13*SZREG + 18*SZVREG
+ vst vr19, sp, 13*SZREG + 19*SZVREG
+ vst vr20, sp, 13*SZREG + 20*SZVREG
+ vst vr21, sp, 13*SZREG + 21*SZVREG
+ vst vr22, sp, 13*SZREG + 22*SZVREG
+ vst vr23, sp, 13*SZREG + 23*SZVREG
+ vst vr24, sp, 13*SZREG + 24*SZVREG
+ vst vr25, sp, 13*SZREG + 25*SZVREG
+ vst vr26, sp, 13*SZREG + 26*SZVREG
+ vst vr27, sp, 13*SZREG + 27*SZVREG
+ vst vr28, sp, 13*SZREG + 28*SZVREG
+ vst vr29, sp, 13*SZREG + 29*SZVREG
+ vst vr30, sp, 13*SZREG + 30*SZVREG
+ vst vr31, sp, 13*SZREG + 31*SZVREG
+ # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
+ # some fields in fcsr0
+ movfcsr2gr t0, fcsr0
+ REG_S t0, sp, 32*SZVREG
+#elif !defined __loongarch_soft_float
+ FREG_S fa0, sp, 13*SZREG + 0*SZFREG
+ FREG_S fa1, sp, 13*SZREG + 1*SZFREG
+ FREG_S fa2, sp, 13*SZREG + 2*SZFREG
+ FREG_S fa3, sp, 13*SZREG + 3*SZFREG
+ FREG_S fa4, sp, 13*SZREG + 4*SZFREG
+ FREG_S fa5, sp, 13*SZREG + 5*SZFREG
+ FREG_S fa6, sp, 13*SZREG + 6*SZFREG
+ FREG_S fa7, sp, 13*SZREG + 7*SZFREG
+ FREG_S ft0, sp, 13*SZREG + 8*SZFREG
+ FREG_S ft1, sp, 13*SZREG + 9*SZFREG
+ FREG_S ft2, sp, 13*SZREG + 10*SZFREG
+ FREG_S ft3, sp, 13*SZREG + 11*SZFREG
+ FREG_S ft4, sp, 13*SZREG + 12*SZFREG
+ FREG_S ft5, sp, 13*SZREG + 13*SZFREG
+ FREG_S ft6, sp, 13*SZREG + 14*SZFREG
+ FREG_S ft7, sp, 13*SZREG + 15*SZFREG
+ FREG_S ft8, sp, 13*SZREG + 16*SZFREG
+ FREG_S ft9, sp, 13*SZREG + 17*SZFREG
+ FREG_S ft10, sp, 13*SZREG + 18*SZFREG
+ FREG_S ft11, sp, 13*SZREG + 19*SZFREG
+ FREG_S ft12, sp, 13*SZREG + 20*SZFREG
+ FREG_S ft13, sp, 13*SZREG + 21*SZFREG
+ FREG_S ft14, sp, 13*SZREG + 22*SZFREG
+ FREG_S ft15, sp, 13*SZREG + 23*SZFREG
+ # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
+ # some fields in fcsr0
+ movfcsr2gr t0, fcsr0
+ REG_S t0, sp, 24*SZFREG
+#endif /* #ifdef USE_LASX */
+
+ bl __tls_get_addr
+ ADDI a0, a0, -TLS_DTV_OFFSET
+
+ REG_L ra, sp, 0
+ REG_L a1, sp, 1 * 8
+ REG_L a2, sp, 2 * 8
+ REG_L a3, sp, 3 * 8
+ REG_L a4, sp, 4 * 8
+ REG_L a5, sp, 5 * 8
+ REG_L a6, sp, 6 * 8
+ REG_L a7, sp, 7 * 8
+ REG_L t4, sp, 8 * 8
+ REG_L t5, sp, 9 * 8
+ REG_L t6, sp, 10 * 8
+ REG_L t7, sp, 11 * 8
+ REG_L t8, sp, 12 * 8
+
+#ifdef USE_LASX
+ xvld xr0, sp, 13*SZREG + 0*SZXREG
+ xvld xr1, sp, 13*SZREG + 1*SZXREG
+ xvld xr2, sp, 13*SZREG + 2*SZXREG
+ xvld xr3, sp, 13*SZREG + 3*SZXREG
+ xvld xr4, sp, 13*SZREG + 4*SZXREG
+ xvld xr5, sp, 13*SZREG + 5*SZXREG
+ xvld xr6, sp, 13*SZREG + 6*SZXREG
+ xvld xr7, sp, 13*SZREG + 7*SZXREG
+ xvld xr8, sp, 13*SZREG + 8*SZXREG
+ xvld xr9, sp, 13*SZREG + 9*SZXREG
+ xvld xr10, sp, 13*SZREG + 10*SZXREG
+ xvld xr11, sp, 13*SZREG + 11*SZXREG
+ xvld xr12, sp, 13*SZREG + 12*SZXREG
+ xvld xr13, sp, 13*SZREG + 13*SZXREG
+ xvld xr14, sp, 13*SZREG + 14*SZXREG
+ xvld xr15, sp, 13*SZREG + 15*SZXREG
+ xvld xr16, sp, 13*SZREG + 16*SZXREG
+ xvld xr17, sp, 13*SZREG + 17*SZXREG
+ xvld xr18, sp, 13*SZREG + 18*SZXREG
+ xvld xr19, sp, 13*SZREG + 19*SZXREG
+ xvld xr20, sp, 13*SZREG + 20*SZXREG
+ xvld xr21, sp, 13*SZREG + 21*SZXREG
+ xvld xr22, sp, 13*SZREG + 22*SZXREG
+ xvld xr23, sp, 13*SZREG + 23*SZXREG
+ xvld xr24, sp, 13*SZREG + 24*SZXREG
+ xvld xr25, sp, 13*SZREG + 25*SZXREG
+ xvld xr26, sp, 13*SZREG + 26*SZXREG
+ xvld xr27, sp, 13*SZREG + 27*SZXREG
+ xvld xr28, sp, 13*SZREG + 28*SZXREG
+ xvld xr29, sp, 13*SZREG + 29*SZXREG
+ xvld xr30, sp, 13*SZREG + 30*SZXREG
+ xvld xr31, sp, 13*SZREG + 31*SZXREG
+ REG_L t0, sp, 32*SZXREG
+ movgr2fcsr fcsr0, t0
+#elif defined USE_LSX
+ vld vr0, sp, 13*SZREG + 0*SZVREG
+ vld vr1, sp, 13*SZREG + 1*SZVREG
+ vld vr2, sp, 13*SZREG + 2*SZVREG
+ vld vr3, sp, 13*SZREG + 3*SZVREG
+ vld vr4, sp, 13*SZREG + 4*SZVREG
+ vld vr5, sp, 13*SZREG + 5*SZVREG
+ vld vr6, sp, 13*SZREG + 6*SZVREG
+ vld vr7, sp, 13*SZREG + 7*SZVREG
+ vld vr8, sp, 13*SZREG + 8*SZVREG
+ vld vr9, sp, 13*SZREG + 9*SZVREG
+ vld vr10, sp, 13*SZREG + 10*SZVREG
+ vld vr11, sp, 13*SZREG + 11*SZVREG
+ vld vr12, sp, 13*SZREG + 12*SZVREG
+ vld vr13, sp, 13*SZREG + 13*SZVREG
+ vld vr14, sp, 13*SZREG + 14*SZVREG
+ vld vr15, sp, 13*SZREG + 15*SZVREG
+ vld vr16, sp, 13*SZREG + 16*SZVREG
+ vld vr17, sp, 13*SZREG + 17*SZVREG
+ vld vr18, sp, 13*SZREG + 18*SZVREG
+ vld vr19, sp, 13*SZREG + 19*SZVREG
+ vld vr20, sp, 13*SZREG + 20*SZVREG
+ vld vr21, sp, 13*SZREG + 21*SZVREG
+ vld vr22, sp, 13*SZREG + 22*SZVREG
+ vld vr23, sp, 13*SZREG + 23*SZVREG
+ vld vr24, sp, 13*SZREG + 24*SZVREG
+ vld vr25, sp, 13*SZREG + 25*SZVREG
+ vld vr26, sp, 13*SZREG + 26*SZVREG
+ vld vr27, sp, 13*SZREG + 27*SZVREG
+ vld vr28, sp, 13*SZREG + 28*SZVREG
+ vld vr29, sp, 13*SZREG + 29*SZVREG
+ vld vr30, sp, 13*SZREG + 30*SZVREG
+ vld vr31, sp, 13*SZREG + 31*SZVREG
+ REG_L t0, sp, 32*SZVREG
+ movgr2fcsr fcsr0, t0
+#elif !defined __loongarch_soft_float
+ FREG_L fa0, sp, 13*SZREG + 0*SZFREG
+ FREG_L fa1, sp, 13*SZREG + 1*SZFREG
+ FREG_L fa2, sp, 13*SZREG + 2*SZFREG
+ FREG_L fa3, sp, 13*SZREG + 3*SZFREG
+ FREG_L fa4, sp, 13*SZREG + 4*SZFREG
+ FREG_L fa5, sp, 13*SZREG + 5*SZFREG
+ FREG_L fa6, sp, 13*SZREG + 6*SZFREG
+ FREG_L fa7, sp, 13*SZREG + 7*SZFREG
+ FREG_L ft0, sp, 13*SZREG + 8*SZFREG
+ FREG_L ft1, sp, 13*SZREG + 9*SZFREG
+ FREG_L ft2, sp, 13*SZREG + 10*SZFREG
+ FREG_L ft3, sp, 13*SZREG + 11*SZFREG
+ FREG_L ft4, sp, 13*SZREG + 12*SZFREG
+ FREG_L ft5, sp, 13*SZREG + 13*SZFREG
+ FREG_L ft6, sp, 13*SZREG + 14*SZFREG
+ FREG_L ft7, sp, 13*SZREG + 15*SZFREG
+ FREG_L ft8, sp, 13*SZREG + 16*SZFREG
+ FREG_L ft9, sp, 13*SZREG + 17*SZFREG
+ FREG_L ft10, sp, 13*SZREG + 18*SZFREG
+ FREG_L ft11, sp, 13*SZREG + 19*SZFREG
+ FREG_L ft12, sp, 13*SZREG + 20*SZFREG
+ FREG_L ft13, sp, 13*SZREG + 21*SZFREG
+ FREG_L ft14, sp, 13*SZREG + 22*SZFREG
+ FREG_L ft15, sp, 13*SZREG + 23*SZFREG
+ REG_L t0, sp, 24*SZFREG
+ movgr2fcsr fcsr0, t0
+#endif /* #ifdef USE_LASX */
+
+ ADDI sp, sp, FRAME_SIZE
+ b Lret
+ cfi_endproc
+ .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+#endif /* #ifdef SHARED */
diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
new file mode 100644
index 0000000000..4a17079169
--- /dev/null
+++ b/sysdeps/loongarch/dl-tlsdesc.S
@@ -0,0 +1,93 @@
+/* Thread-local storage handling in the ELF dynamic linker.
+ LoongArch version.
+ Copyright (C) 2011-2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <tls.h>
+#include "tlsdesc.h"
+
+ .text
+
+ /* Compute the thread pointer offset for symbols in the static
+ TLS block. The offset is the same for all threads.
+ Prototype:
+ _dl_tlsdesc_return (tlsdesc *); */
+ .hidden _dl_tlsdesc_return
+ .global _dl_tlsdesc_return
+ .type _dl_tlsdesc_return,%function
+ cfi_startproc
+ .align 2
+_dl_tlsdesc_return:
+ REG_L a0, a0, 8
+ RET
+ cfi_endproc
+ .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
+
+ /* Handler for undefined weak TLS symbols.
+ Prototype:
+ _dl_tlsdesc_undefweak (tlsdesc *);
+
+ The second word of the descriptor contains the addend.
+ Return the addend minus the thread pointer. This ensures
+ that when the caller adds on the thread pointer it gets back
+ the addend. */
+ .hidden _dl_tlsdesc_undefweak
+ .global _dl_tlsdesc_undefweak
+ .type _dl_tlsdesc_undefweak,%function
+ cfi_startproc
+ .align 2
+_dl_tlsdesc_undefweak:
+ REG_L a0, a0, 8
+ sub.d a0, a0, tp
+ RET
+ cfi_endproc
+ .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
+
+
+#ifdef SHARED
+
+#if !defined __loongarch_soft_float
+
+#define USE_LASX
+#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
+#define Lret Lret_lasx
+#define Lslow Lslow_lasx
+#include "dl-tlsdesc-dynamic.h"
+#undef FRAME_SIZE
+#undef USE_LASX
+#undef _dl_tlsdesc_dynamic
+#undef Lret
+#undef Lslow
+
+#define USE_LSX
+#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
+#define Lret Lret_lsx
+#define Lslow Lslow_lsx
+#include "dl-tlsdesc-dynamic.h"
+#undef FRAME_SIZE
+#undef USE_LSX
+#undef _dl_tlsdesc_dynamic
+#undef Lret
+#undef Lslow
+
+#endif
+
+#include "dl-tlsdesc-dynamic.h"
+
+#endif /* #ifdef SHARED */
diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
new file mode 100644
index 0000000000..988037a714
--- /dev/null
+++ b/sysdeps/loongarch/dl-tlsdesc.h
@@ -0,0 +1,53 @@
+/* Thread-local storage descriptor handling in the ELF dynamic linker.
+ LoongArch version.
+ Copyright (C) 2011-2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _DL_TLSDESC_H
+#define _DL_TLSDESC_H
+
+#include <dl-tls.h>
+
+/* Type used to represent a TLS descriptor in the GOT. */
+struct tlsdesc
+{
+ ptrdiff_t (*entry) (struct tlsdesc *);
+ void *arg;
+};
+
+/* Type used as the argument in a TLS descriptor for a symbol that
+ needs dynamic TLS offsets. */
+struct tlsdesc_dynamic_arg
+{
+ tls_index tlsinfo;
+ size_t gen_count;
+};
+
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
+
+# ifdef SHARED
+extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
+#if !defined __loongarch_soft_float
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
+#endif
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
+#endif
+
+#endif
diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
index 4d8737ee7f..9b1773634c 100644
--- a/sysdeps/loongarch/linkmap.h
+++ b/sysdeps/loongarch/linkmap.h
@@ -19,4 +19,5 @@
struct link_map_machine
{
ElfW (Addr) plt; /* Address of .plt. */
+ void *tlsdesc_table; /* Address of TLS descriptor hash table. */
};
diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
index 51521a7eb4..23c1d12914 100644
--- a/sysdeps/loongarch/sys/asm.h
+++ b/sysdeps/loongarch/sys/asm.h
@@ -25,6 +25,7 @@
/* Macros to handle different pointer/register sizes for 32/64-bit code. */
#define SZREG 8
#define SZFREG 8
+#define SZFCSREG 4
#define SZVREG 16
#define SZXREG 32
#define REG_L ld.d
diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
index f61ee25b25..80ce3e9c00 100644
--- a/sysdeps/loongarch/sys/regdef.h
+++ b/sysdeps/loongarch/sys/regdef.h
@@ -97,6 +97,7 @@
#define fcc5 $fcc5
#define fcc6 $fcc6
#define fcc7 $fcc7
+#define fcsr0 $fcsr0
#define vr0 $vr0
#define vr1 $vr1
diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
new file mode 100644
index 0000000000..a357e7619f
--- /dev/null
+++ b/sysdeps/loongarch/tlsdesc.c
@@ -0,0 +1,39 @@
+/* Manage TLS descriptors. AArch64 version.
+
+ Copyright (C) 2011-2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <tls.h>
+#include <dl-tlsdesc.h>
+#include <dl-unmap-segments.h>
+#include <tlsdeschtab.h>
+
+/* Unmap the dynamic object, but also release its TLS descriptor table
+ if there is one. */
+
+void
+_dl_unmap (struct link_map *map)
+{
+ _dl_unmap_segments (map);
+
+#ifdef SHARED
+ if (map->l_mach.tlsdesc_table)
+ htab_delete (map->l_mach.tlsdesc_table);
+#endif
+}
diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
new file mode 100644
index 0000000000..bcab218631
--- /dev/null
+++ b/sysdeps/loongarch/tlsdesc.sym
@@ -0,0 +1,19 @@
+#include <stddef.h>
+#include <sysdep.h>
+#include <tls.h>
+#include <link.h>
+#include <dl-tlsdesc.h>
+
+--
+
+-- Abuse tls.h macros to derive offsets relative to the thread register.
+
+TLSDESC_ARG offsetof(struct tlsdesc, arg)
+TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count)
+TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
+TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
+TCBHEAD_DTV offsetof(tcbhead_t, dtv)
+DTV_COUNTER offsetof(dtv_t, counter)
+TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED
+TLS_DTV_OFFSET TLS_DTV_OFFSET
+SIZE_OF_DTV sizeof(tcbhead_t)
diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data
index 547b1c1b7f..ec32e6d13f 100644
--- a/sysdeps/unix/sysv/linux/loongarch/localplt.data
+++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data
@@ -5,3 +5,5 @@ libc.so: calloc
libc.so: free
libc.so: malloc
libc.so: realloc
+# The dynamic loader needs __tls_get_addr for TLS.
+ld.so: __tls_get_addr
--
2.36.0
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] LoongArch: Add support for TLS Descriptors
2024-02-29 1:43 [PATCH v2] LoongArch: Add support for TLS Descriptors mengqinggang
@ 2024-02-29 2:56 ` caiyinyu
2024-03-04 15:42 ` H.J. Lu
2024-03-05 19:29 ` Adhemerval Zanella Netto
2 siblings, 0 replies; 8+ messages in thread
From: caiyinyu @ 2024-02-29 2:56 UTC (permalink / raw)
To: mengqinggang, libc-alpha
Cc: adhemerval.zanella, xuchenghua, chenglulu, cailulu, xry111,
i.swmail, maskray, luweining, wanglei, hejinyang
在 2024/2/29 上午9:43, mengqinggang 写道:
> This is mostly based on AArch64 and RISC-V implementation.
>
> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>
> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
> all vector registers.
> ---
> Changes v1 -> v2:
> - Fix vr24-vr31, xr24-xr31 typo.
> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
> - Save and restore fcsr0 in _dl_tlsdesc_dynamic.
>
> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
>
> elf/elf.h | 2 +
> sysdeps/loongarch/Makefile | 6 +
> sysdeps/loongarch/dl-link.sym | 1 +
> sysdeps/loongarch/dl-machine.h | 60 ++-
> sysdeps/loongarch/dl-tls.h | 9 +-
> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 341 ++++++++++++++++++
> sysdeps/loongarch/dl-tlsdesc.S | 93 +++++
> sysdeps/loongarch/dl-tlsdesc.h | 53 +++
> sysdeps/loongarch/linkmap.h | 1 +
> sysdeps/loongarch/sys/asm.h | 1 +
> sysdeps/loongarch/sys/regdef.h | 1 +
> sysdeps/loongarch/tlsdesc.c | 39 ++
> sysdeps/loongarch/tlsdesc.sym | 19 +
> .../unix/sysv/linux/loongarch/localplt.data | 2 +
> 14 files changed, 625 insertions(+), 3 deletions(-)
> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
> create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
> create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
> create mode 100644 sysdeps/loongarch/tlsdesc.c
> create mode 100644 sysdeps/loongarch/tlsdesc.sym
>
> diff --git a/elf/elf.h b/elf/elf.h
> index f2206e5c06..eec24ea049 100644
> --- a/elf/elf.h
> +++ b/elf/elf.h
> @@ -4237,6 +4237,8 @@ enum
> #define R_LARCH_TLS_TPREL32 10
> #define R_LARCH_TLS_TPREL64 11
> #define R_LARCH_IRELATIVE 12
> +#define R_LARCH_TLS_DESC32 13
> +#define R_LARCH_TLS_DESC64 14
>
> /* Reserved for future relocs that the dynamic linker must understand. */
>
> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
> index 43d2f583cd..181389e787 100644
> --- a/sysdeps/loongarch/Makefile
> +++ b/sysdeps/loongarch/Makefile
> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
> endif
>
> ifeq ($(subdir),elf)
> +sysdep-dl-routines += tlsdesc dl-tlsdesc
> gen-as-const-headers += dl-link.sym
> endif
>
> +ifeq ($(subdir),csu)
> +gen-as-const-headers += tlsdesc.sym
> +endif
> +
> +
> # LoongArch's assembler also needs to know about PIC as it changes the
> # definition of some assembler macros.
> ASFLAGS-.os += $(pic-ccflag)
> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
> index b534968e30..fd81ef37d5 100644
> --- a/sysdeps/loongarch/dl-link.sym
> +++ b/sysdeps/loongarch/dl-link.sym
> @@ -1,6 +1,7 @@
> #include <stddef.h>
> #include <sysdep.h>
> #include <link.h>
> +#include <dl-tlsdesc.h>
>
> DL_SIZEOF_RG sizeof(struct La_loongarch_regs)
> DL_SIZEOF_RV sizeof(struct La_loongarch_retval)
> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
> index ab81b82d95..8ca6c224f6 100644
> --- a/sysdeps/loongarch/dl-machine.h
> +++ b/sysdeps/loongarch/dl-machine.h
> @@ -25,7 +25,7 @@
> #include <entry.h>
> #include <elf/elf.h>
> #include <sys/asm.h>
> -#include <dl-tls.h>
> +#include <dl-tlsdesc.h>
> #include <dl-static-tls.h>
> #include <dl-machine-rel.h>
>
> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
> *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
> break;
>
> + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
> + {
> + struct tlsdesc volatile *td =
> + (struct tlsdesc volatile *)addr_field;
> + if (! sym)
Use sym != NULL instead of ! sym, the same applies to other similar cases.
> + {
> + td->arg = (void*)reloc->r_addend;
> + td->entry = _dl_tlsdesc_undefweak;
> + }
> + else
> + {
> +# ifndef SHARED
> + CHECK_STATIC_TLS (map, sym_map);
> +# else
> + if (!TRY_STATIC_TLS (map, sym_map))
> + {
> + td->arg = _dl_make_tlsdesc_dynamic
> + (sym_map, sym->st_value + reloc->r_addend);
> +# if !defined __loongarch_soft_float
> + if (SUPPORT_LASX)
> + td->entry = _dl_tlsdesc_dynamic_lasx;
> + else
> + if (SUPPORT_LSX)
> + td->entry = _dl_tlsdesc_dynamic_lsx;
> + else
> +# endif
> + td->entry = _dl_tlsdesc_dynamic;
> + }
> + else
> +# endif
> + {
> + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
> + + reloc->r_addend);
> + td->entry = _dl_tlsdesc_return;
> + }
> + }
> + break;
> + }
> +
> case R_LARCH_COPY:
> {
> if (sym == NULL)
> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> else
> *reloc_addr = map->l_mach.plt;
> }
> + else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
Use __glibc_unlikely/likely instead of __builtin_expect, the same
applies blow.
> + {
> + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
> + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
> + const ElfW (Sym) *sym = &symtab[symndx];
> + const struct r_found_version *version = NULL;
> +
> + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
> + {
> + const ElfW (Half) *vernum =
> + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
> + version = &map->l_versions[vernum[symndx] & 0x7fff];
> + }
> +
> + /* Always initialize TLS descriptors completely, because lazy
> + initialization requires synchronization at every TLS access. */
> + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
> + skip_ifunc);
> + }
> else
> _dl_reloc_bad_type (map, r_type, 1);
> }
> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
> index 29924b866d..de593c002d 100644
> --- a/sysdeps/loongarch/dl-tls.h
> +++ b/sysdeps/loongarch/dl-tls.h
> @@ -16,6 +16,9 @@
> License along with the GNU C Library. If not, see
> <https://www.gnu.org/licenses/>. */
>
> +#ifndef _DL_TLS_H
> +#define _DL_TLS_H
> +
> /* Type used for the representation of TLS information in the GOT. */
> typedef struct
> {
> @@ -23,6 +26,8 @@ typedef struct
> unsigned long int ti_offset;
> } tls_index;
>
> +extern void *__tls_get_addr (tls_index *ti);
> +
> /* The thread pointer points to the first static TLS block. */
> #define TLS_TP_OFFSET 0
>
> @@ -37,10 +42,10 @@ typedef struct
> /* Compute the value for a DTPREL reloc. */
> #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>
> -extern void *__tls_get_addr (tls_index *ti);
> -
> #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
> #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>
> /* Value used for dtv entries for which the allocation is delayed. */
> #define TLS_DTV_UNALLOCATED ((void *) -1l)
> +
> +#endif
> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> new file mode 100644
> index 0000000000..0d8c9bb991
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> @@ -0,0 +1,341 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> + LoongArch version.
> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#ifdef USE_LASX
> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
> +#elif defined USE_LSX
> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
> +#elif !defined __loongarch_soft_float
> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
> +#else
> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
> +#endif
> +
> +#ifdef SHARED
> + /* Handler for dynamic TLS symbols.
> + Prototype:
> + _dl_tlsdesc_dynamic (tlsdesc *) ;
> +
> + The second word of the descriptor points to a
> + tlsdesc_dynamic_arg structure.
> +
> + Returns the offset between the thread pointer and the
> + object referenced by the argument.
> +
> + ptrdiff_t
> + __attribute__ ((__regparm__ (1)))
> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
> + {
> + struct tlsdesc_dynamic_arg *td = tdp->arg;
> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
> + if (__builtin_expect (td->gen_count <= dtv[0].counter
> + && (dtv[td->tlsinfo.ti_module].pointer.val
> + != TLS_DTV_UNALLOCATED),
> + 1))
> + return dtv[td->tlsinfo.ti_module].pointer.val
> + + td->tlsinfo.ti_offset
> + - __thread_pointer;
> +
> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
> + }
> + */
> + .hidden _dl_tlsdesc_dynamic
> + .global _dl_tlsdesc_dynamic
> + .type _dl_tlsdesc_dynamic,%function
> + cfi_startproc
> + .align 2
> +_dl_tlsdesc_dynamic:
> + /* Save just enough registers to support fast path, if we fall
> + into slow path we will save additional registers. */
> + ADDI sp, sp,-24
> + REG_S t0, sp, 0
> + REG_S t1, sp, 8
> + REG_S t2, sp, 16
> +
> + REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start
> + REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg
> + REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
> + REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter
> + bltu t2, t1, Lslow
> +
> + REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module
> + slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
> + add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t)
> + REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
> + li.d t2, TLS_DTV_UNALLOCATED
> + beq t1, t2, Lslow
> + REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
> + # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
> + add.d a0, t1, t2
> +Lret:
> + sub.d a0, a0, tp
> + REG_L t0, sp, 0
> + REG_L t1, sp, 8
> + REG_L t2, sp, 16
> + ADDI sp, sp, 24
> + RET
> +
> +Lslow:
> + /* This is the slow path. We need to call __tls_get_addr() which
> + means we need to save and restore all the register that the
> + callee will trash. */
> +
> + /* Save the remaining registers that we must treat as caller save. */
> + ADDI sp, sp, -FRAME_SIZE
> + REG_S ra, sp, 0 * SZREG
> + REG_S a1, sp, 1 * SZREG
> + REG_S a2, sp, 2 * SZREG
> + REG_S a3, sp, 3 * SZREG
> + REG_S a4, sp, 4 * SZREG
> + REG_S a5, sp, 5 * SZREG
> + REG_S a6, sp, 6 * SZREG
> + REG_S a7, sp, 7 * SZREG
> + REG_S t4, sp, 8 * SZREG
> + REG_S t5, sp, 9 * SZREG
> + REG_S t6, sp, 10 * SZREG
> + REG_S t7, sp, 11 * SZREG
> + REG_S t8, sp, 12 * SZREG
> +
> +#ifdef USE_LASX
> + xvst xr0, sp, 13*SZREG + 0*SZXREG
> + xvst xr1, sp, 13*SZREG + 1*SZXREG
> + xvst xr2, sp, 13*SZREG + 2*SZXREG
> + xvst xr3, sp, 13*SZREG + 3*SZXREG
> + xvst xr4, sp, 13*SZREG + 4*SZXREG
> + xvst xr5, sp, 13*SZREG + 5*SZXREG
> + xvst xr6, sp, 13*SZREG + 6*SZXREG
> + xvst xr7, sp, 13*SZREG + 7*SZXREG
> + xvst xr8, sp, 13*SZREG + 8*SZXREG
> + xvst xr9, sp, 13*SZREG + 9*SZXREG
> + xvst xr10, sp, 13*SZREG + 10*SZXREG
> + xvst xr11, sp, 13*SZREG + 11*SZXREG
> + xvst xr12, sp, 13*SZREG + 12*SZXREG
> + xvst xr13, sp, 13*SZREG + 13*SZXREG
> + xvst xr14, sp, 13*SZREG + 14*SZXREG
> + xvst xr15, sp, 13*SZREG + 15*SZXREG
> + xvst xr16, sp, 13*SZREG + 16*SZXREG
> + xvst xr17, sp, 13*SZREG + 17*SZXREG
> + xvst xr18, sp, 13*SZREG + 18*SZXREG
> + xvst xr19, sp, 13*SZREG + 19*SZXREG
> + xvst xr20, sp, 13*SZREG + 20*SZXREG
> + xvst xr21, sp, 13*SZREG + 21*SZXREG
> + xvst xr22, sp, 13*SZREG + 22*SZXREG
> + xvst xr23, sp, 13*SZREG + 23*SZXREG
> + xvst xr24, sp, 13*SZREG + 24*SZXREG
> + xvst xr25, sp, 13*SZREG + 25*SZXREG
> + xvst xr26, sp, 13*SZREG + 26*SZXREG
> + xvst xr27, sp, 13*SZREG + 27*SZXREG
> + xvst xr28, sp, 13*SZREG + 28*SZXREG
> + xvst xr29, sp, 13*SZREG + 29*SZXREG
> + xvst xr30, sp, 13*SZREG + 30*SZXREG
> + xvst xr31, sp, 13*SZREG + 31*SZXREG
> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> + # some fields in fcsr0
> + movfcsr2gr t0, fcsr0
> + REG_S t0, sp, 32*SZXREG
> +#elif defined USE_LSX
> + vst vr0, sp, 13*SZREG + 0*SZVREG
> + vst vr1, sp, 13*SZREG + 1*SZVREG
> + vst vr2, sp, 13*SZREG + 2*SZVREG
> + vst vr3, sp, 13*SZREG + 3*SZVREG
> + vst vr4, sp, 13*SZREG + 4*SZVREG
> + vst vr5, sp, 13*SZREG + 5*SZVREG
> + vst vr6, sp, 13*SZREG + 6*SZVREG
> + vst vr7, sp, 13*SZREG + 7*SZVREG
> + vst vr8, sp, 13*SZREG + 8*SZVREG
> + vst vr9, sp, 13*SZREG + 9*SZVREG
> + vst vr10, sp, 13*SZREG + 10*SZVREG
> + vst vr11, sp, 13*SZREG + 11*SZVREG
> + vst vr12, sp, 13*SZREG + 12*SZVREG
> + vst vr13, sp, 13*SZREG + 13*SZVREG
> + vst vr14, sp, 13*SZREG + 14*SZVREG
> + vst vr15, sp, 13*SZREG + 15*SZVREG
> + vst vr16, sp, 13*SZREG + 16*SZVREG
> + vst vr17, sp, 13*SZREG + 17*SZVREG
> + vst vr18, sp, 13*SZREG + 18*SZVREG
> + vst vr19, sp, 13*SZREG + 19*SZVREG
> + vst vr20, sp, 13*SZREG + 20*SZVREG
> + vst vr21, sp, 13*SZREG + 21*SZVREG
> + vst vr22, sp, 13*SZREG + 22*SZVREG
> + vst vr23, sp, 13*SZREG + 23*SZVREG
> + vst vr24, sp, 13*SZREG + 24*SZVREG
> + vst vr25, sp, 13*SZREG + 25*SZVREG
> + vst vr26, sp, 13*SZREG + 26*SZVREG
> + vst vr27, sp, 13*SZREG + 27*SZVREG
> + vst vr28, sp, 13*SZREG + 28*SZVREG
> + vst vr29, sp, 13*SZREG + 29*SZVREG
> + vst vr30, sp, 13*SZREG + 30*SZVREG
> + vst vr31, sp, 13*SZREG + 31*SZVREG
> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> + # some fields in fcsr0
> + movfcsr2gr t0, fcsr0
> + REG_S t0, sp, 32*SZVREG
> +#elif !defined __loongarch_soft_float
> + FREG_S fa0, sp, 13*SZREG + 0*SZFREG
> + FREG_S fa1, sp, 13*SZREG + 1*SZFREG
> + FREG_S fa2, sp, 13*SZREG + 2*SZFREG
> + FREG_S fa3, sp, 13*SZREG + 3*SZFREG
> + FREG_S fa4, sp, 13*SZREG + 4*SZFREG
> + FREG_S fa5, sp, 13*SZREG + 5*SZFREG
> + FREG_S fa6, sp, 13*SZREG + 6*SZFREG
> + FREG_S fa7, sp, 13*SZREG + 7*SZFREG
> + FREG_S ft0, sp, 13*SZREG + 8*SZFREG
> + FREG_S ft1, sp, 13*SZREG + 9*SZFREG
> + FREG_S ft2, sp, 13*SZREG + 10*SZFREG
> + FREG_S ft3, sp, 13*SZREG + 11*SZFREG
> + FREG_S ft4, sp, 13*SZREG + 12*SZFREG
> + FREG_S ft5, sp, 13*SZREG + 13*SZFREG
> + FREG_S ft6, sp, 13*SZREG + 14*SZFREG
> + FREG_S ft7, sp, 13*SZREG + 15*SZFREG
> + FREG_S ft8, sp, 13*SZREG + 16*SZFREG
> + FREG_S ft9, sp, 13*SZREG + 17*SZFREG
> + FREG_S ft10, sp, 13*SZREG + 18*SZFREG
> + FREG_S ft11, sp, 13*SZREG + 19*SZFREG
> + FREG_S ft12, sp, 13*SZREG + 20*SZFREG
> + FREG_S ft13, sp, 13*SZREG + 21*SZFREG
> + FREG_S ft14, sp, 13*SZREG + 22*SZFREG
> + FREG_S ft15, sp, 13*SZREG + 23*SZFREG
> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> + # some fields in fcsr0
> + movfcsr2gr t0, fcsr0
> + REG_S t0, sp, 24*SZFREG
> +#endif /* #ifdef USE_LASX */
> +
> + bl __tls_get_addr
> + ADDI a0, a0, -TLS_DTV_OFFSET
> +
> + REG_L ra, sp, 0
> + REG_L a1, sp, 1 * 8
> + REG_L a2, sp, 2 * 8
> + REG_L a3, sp, 3 * 8
> + REG_L a4, sp, 4 * 8
> + REG_L a5, sp, 5 * 8
> + REG_L a6, sp, 6 * 8
> + REG_L a7, sp, 7 * 8
> + REG_L t4, sp, 8 * 8
> + REG_L t5, sp, 9 * 8
> + REG_L t6, sp, 10 * 8
> + REG_L t7, sp, 11 * 8
> + REG_L t8, sp, 12 * 8
> +
> +#ifdef USE_LASX
> + xvld xr0, sp, 13*SZREG + 0*SZXREG
> + xvld xr1, sp, 13*SZREG + 1*SZXREG
> + xvld xr2, sp, 13*SZREG + 2*SZXREG
> + xvld xr3, sp, 13*SZREG + 3*SZXREG
> + xvld xr4, sp, 13*SZREG + 4*SZXREG
> + xvld xr5, sp, 13*SZREG + 5*SZXREG
> + xvld xr6, sp, 13*SZREG + 6*SZXREG
> + xvld xr7, sp, 13*SZREG + 7*SZXREG
> + xvld xr8, sp, 13*SZREG + 8*SZXREG
> + xvld xr9, sp, 13*SZREG + 9*SZXREG
> + xvld xr10, sp, 13*SZREG + 10*SZXREG
> + xvld xr11, sp, 13*SZREG + 11*SZXREG
> + xvld xr12, sp, 13*SZREG + 12*SZXREG
> + xvld xr13, sp, 13*SZREG + 13*SZXREG
> + xvld xr14, sp, 13*SZREG + 14*SZXREG
> + xvld xr15, sp, 13*SZREG + 15*SZXREG
> + xvld xr16, sp, 13*SZREG + 16*SZXREG
> + xvld xr17, sp, 13*SZREG + 17*SZXREG
> + xvld xr18, sp, 13*SZREG + 18*SZXREG
> + xvld xr19, sp, 13*SZREG + 19*SZXREG
> + xvld xr20, sp, 13*SZREG + 20*SZXREG
> + xvld xr21, sp, 13*SZREG + 21*SZXREG
> + xvld xr22, sp, 13*SZREG + 22*SZXREG
> + xvld xr23, sp, 13*SZREG + 23*SZXREG
> + xvld xr24, sp, 13*SZREG + 24*SZXREG
> + xvld xr25, sp, 13*SZREG + 25*SZXREG
> + xvld xr26, sp, 13*SZREG + 26*SZXREG
> + xvld xr27, sp, 13*SZREG + 27*SZXREG
> + xvld xr28, sp, 13*SZREG + 28*SZXREG
> + xvld xr29, sp, 13*SZREG + 29*SZXREG
> + xvld xr30, sp, 13*SZREG + 30*SZXREG
> + xvld xr31, sp, 13*SZREG + 31*SZXREG
> + REG_L t0, sp, 32*SZXREG
> + movgr2fcsr fcsr0, t0
> +#elif defined USE_LSX
> + vld vr0, sp, 13*SZREG + 0*SZVREG
> + vld vr1, sp, 13*SZREG + 1*SZVREG
> + vld vr2, sp, 13*SZREG + 2*SZVREG
> + vld vr3, sp, 13*SZREG + 3*SZVREG
> + vld vr4, sp, 13*SZREG + 4*SZVREG
> + vld vr5, sp, 13*SZREG + 5*SZVREG
> + vld vr6, sp, 13*SZREG + 6*SZVREG
> + vld vr7, sp, 13*SZREG + 7*SZVREG
> + vld vr8, sp, 13*SZREG + 8*SZVREG
> + vld vr9, sp, 13*SZREG + 9*SZVREG
> + vld vr10, sp, 13*SZREG + 10*SZVREG
> + vld vr11, sp, 13*SZREG + 11*SZVREG
> + vld vr12, sp, 13*SZREG + 12*SZVREG
> + vld vr13, sp, 13*SZREG + 13*SZVREG
> + vld vr14, sp, 13*SZREG + 14*SZVREG
> + vld vr15, sp, 13*SZREG + 15*SZVREG
> + vld vr16, sp, 13*SZREG + 16*SZVREG
> + vld vr17, sp, 13*SZREG + 17*SZVREG
> + vld vr18, sp, 13*SZREG + 18*SZVREG
> + vld vr19, sp, 13*SZREG + 19*SZVREG
> + vld vr20, sp, 13*SZREG + 20*SZVREG
> + vld vr21, sp, 13*SZREG + 21*SZVREG
> + vld vr22, sp, 13*SZREG + 22*SZVREG
> + vld vr23, sp, 13*SZREG + 23*SZVREG
> + vld vr24, sp, 13*SZREG + 24*SZVREG
> + vld vr25, sp, 13*SZREG + 25*SZVREG
> + vld vr26, sp, 13*SZREG + 26*SZVREG
> + vld vr27, sp, 13*SZREG + 27*SZVREG
> + vld vr28, sp, 13*SZREG + 28*SZVREG
> + vld vr29, sp, 13*SZREG + 29*SZVREG
> + vld vr30, sp, 13*SZREG + 30*SZVREG
> + vld vr31, sp, 13*SZREG + 31*SZVREG
> + REG_L t0, sp, 32*SZVREG
> + movgr2fcsr fcsr0, t0
> +#elif !defined __loongarch_soft_float
> + FREG_L fa0, sp, 13*SZREG + 0*SZFREG
> + FREG_L fa1, sp, 13*SZREG + 1*SZFREG
> + FREG_L fa2, sp, 13*SZREG + 2*SZFREG
> + FREG_L fa3, sp, 13*SZREG + 3*SZFREG
> + FREG_L fa4, sp, 13*SZREG + 4*SZFREG
> + FREG_L fa5, sp, 13*SZREG + 5*SZFREG
> + FREG_L fa6, sp, 13*SZREG + 6*SZFREG
> + FREG_L fa7, sp, 13*SZREG + 7*SZFREG
> + FREG_L ft0, sp, 13*SZREG + 8*SZFREG
> + FREG_L ft1, sp, 13*SZREG + 9*SZFREG
> + FREG_L ft2, sp, 13*SZREG + 10*SZFREG
> + FREG_L ft3, sp, 13*SZREG + 11*SZFREG
> + FREG_L ft4, sp, 13*SZREG + 12*SZFREG
> + FREG_L ft5, sp, 13*SZREG + 13*SZFREG
> + FREG_L ft6, sp, 13*SZREG + 14*SZFREG
> + FREG_L ft7, sp, 13*SZREG + 15*SZFREG
> + FREG_L ft8, sp, 13*SZREG + 16*SZFREG
> + FREG_L ft9, sp, 13*SZREG + 17*SZFREG
> + FREG_L ft10, sp, 13*SZREG + 18*SZFREG
> + FREG_L ft11, sp, 13*SZREG + 19*SZFREG
> + FREG_L ft12, sp, 13*SZREG + 20*SZFREG
> + FREG_L ft13, sp, 13*SZREG + 21*SZFREG
> + FREG_L ft14, sp, 13*SZREG + 22*SZFREG
> + FREG_L ft15, sp, 13*SZREG + 23*SZFREG
> + REG_L t0, sp, 24*SZFREG
> + movgr2fcsr fcsr0, t0
> +#endif /* #ifdef USE_LASX */
> +
> + ADDI sp, sp, FRAME_SIZE
> + b Lret
> + cfi_endproc
> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
> +#endif /* #ifdef SHARED */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
> new file mode 100644
> index 0000000000..4a17079169
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc.S
> @@ -0,0 +1,93 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> + LoongArch version.
> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdep.h>
> +#include <tls.h>
> +#include "tlsdesc.h"
> +
> + .text
> +
> + /* Compute the thread pointer offset for symbols in the static
> + TLS block. The offset is the same for all threads.
> + Prototype:
> + _dl_tlsdesc_return (tlsdesc *); */
> + .hidden _dl_tlsdesc_return
> + .global _dl_tlsdesc_return
> + .type _dl_tlsdesc_return,%function
> + cfi_startproc
> + .align 2
> +_dl_tlsdesc_return:
> + REG_L a0, a0, 8
> + RET
> + cfi_endproc
> + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
> +
> + /* Handler for undefined weak TLS symbols.
> + Prototype:
> + _dl_tlsdesc_undefweak (tlsdesc *);
> +
> + The second word of the descriptor contains the addend.
> + Return the addend minus the thread pointer. This ensures
> + that when the caller adds on the thread pointer it gets back
> + the addend. */
> + .hidden _dl_tlsdesc_undefweak
> + .global _dl_tlsdesc_undefweak
> + .type _dl_tlsdesc_undefweak,%function
> + cfi_startproc
> + .align 2
> +_dl_tlsdesc_undefweak:
> + REG_L a0, a0, 8
> + sub.d a0, a0, tp
> + RET
> + cfi_endproc
> + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
> +
> +
> +#ifdef SHARED
> +
> +#if !defined __loongarch_soft_float
> +
> +#define USE_LASX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
> +#define Lret Lret_lasx
> +#define Lslow Lslow_lasx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LASX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#define USE_LSX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
> +#define Lret Lret_lsx
> +#define Lslow Lslow_lsx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LSX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#endif
> +
> +#include "dl-tlsdesc-dynamic.h"
> +
> +#endif /* #ifdef SHARED */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
> new file mode 100644
> index 0000000000..988037a714
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc.h
> @@ -0,0 +1,53 @@
> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
> + LoongArch version.
> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#ifndef _DL_TLSDESC_H
> +#define _DL_TLSDESC_H
> +
> +#include <dl-tls.h>
> +
> +/* Type used to represent a TLS descriptor in the GOT. */
> +struct tlsdesc
> +{
> + ptrdiff_t (*entry) (struct tlsdesc *);
> + void *arg;
> +};
> +
> +/* Type used as the argument in a TLS descriptor for a symbol that
> + needs dynamic TLS offsets. */
> +struct tlsdesc_dynamic_arg
> +{
> + tls_index tlsinfo;
> + size_t gen_count;
> +};
> +
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
> +
> +# ifdef SHARED
> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
> +#if !defined __loongarch_soft_float
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
> +#endif
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
> +#endif
> +
> +#endif
> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
> index 4d8737ee7f..9b1773634c 100644
> --- a/sysdeps/loongarch/linkmap.h
> +++ b/sysdeps/loongarch/linkmap.h
> @@ -19,4 +19,5 @@
> struct link_map_machine
> {
> ElfW (Addr) plt; /* Address of .plt. */
> + void *tlsdesc_table; /* Address of TLS descriptor hash table. */
> };
> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
> index 51521a7eb4..23c1d12914 100644
> --- a/sysdeps/loongarch/sys/asm.h
> +++ b/sysdeps/loongarch/sys/asm.h
> @@ -25,6 +25,7 @@
> /* Macros to handle different pointer/register sizes for 32/64-bit code. */
> #define SZREG 8
> #define SZFREG 8
> +#define SZFCSREG 4
> #define SZVREG 16
> #define SZXREG 32
> #define REG_L ld.d
> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
> index f61ee25b25..80ce3e9c00 100644
> --- a/sysdeps/loongarch/sys/regdef.h
> +++ b/sysdeps/loongarch/sys/regdef.h
> @@ -97,6 +97,7 @@
> #define fcc5 $fcc5
> #define fcc6 $fcc6
> #define fcc7 $fcc7
> +#define fcsr0 $fcsr0
>
> #define vr0 $vr0
> #define vr1 $vr1
> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
> new file mode 100644
> index 0000000000..a357e7619f
> --- /dev/null
> +++ b/sysdeps/loongarch/tlsdesc.c
> @@ -0,0 +1,39 @@
> +/* Manage TLS descriptors. AArch64 version.
> +
> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <ldsodefs.h>
> +#include <tls.h>
> +#include <dl-tlsdesc.h>
> +#include <dl-unmap-segments.h>
> +#include <tlsdeschtab.h>
> +
> +/* Unmap the dynamic object, but also release its TLS descriptor table
> + if there is one. */
> +
> +void
> +_dl_unmap (struct link_map *map)
> +{
> + _dl_unmap_segments (map);
> +
> +#ifdef SHARED
> + if (map->l_mach.tlsdesc_table)
> + htab_delete (map->l_mach.tlsdesc_table);
> +#endif
> +}
> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
> new file mode 100644
> index 0000000000..bcab218631
> --- /dev/null
> +++ b/sysdeps/loongarch/tlsdesc.sym
> @@ -0,0 +1,19 @@
> +#include <stddef.h>
> +#include <sysdep.h>
> +#include <tls.h>
> +#include <link.h>
> +#include <dl-tlsdesc.h>
> +
> +--
> +
> +-- Abuse tls.h macros to derive offsets relative to the thread register.
> +
> +TLSDESC_ARG offsetof(struct tlsdesc, arg)
> +TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count)
> +TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
> +TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
> +TCBHEAD_DTV offsetof(tcbhead_t, dtv)
> +DTV_COUNTER offsetof(dtv_t, counter)
> +TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED
> +TLS_DTV_OFFSET TLS_DTV_OFFSET
> +SIZE_OF_DTV sizeof(tcbhead_t)
> diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data
> index 547b1c1b7f..ec32e6d13f 100644
> --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data
> +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data
> @@ -5,3 +5,5 @@ libc.so: calloc
> libc.so: free
> libc.so: malloc
> libc.so: realloc
> +# The dynamic loader needs __tls_get_addr for TLS.
> +ld.so: __tls_get_addr
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] LoongArch: Add support for TLS Descriptors
2024-02-29 1:43 [PATCH v2] LoongArch: Add support for TLS Descriptors mengqinggang
2024-02-29 2:56 ` caiyinyu
@ 2024-03-04 15:42 ` H.J. Lu
2024-03-08 7:45 ` mengqinggang
2024-03-05 19:29 ` Adhemerval Zanella Netto
2 siblings, 1 reply; 8+ messages in thread
From: H.J. Lu @ 2024-03-04 15:42 UTC (permalink / raw)
To: mengqinggang
Cc: libc-alpha, adhemerval.zanella, xuchenghua, caiyinyu, chenglulu,
cailulu, xry111, i.swmail, maskray, luweining, wanglei,
hejinyang
On Wed, Feb 28, 2024 at 5:44 PM mengqinggang <mengqinggang@loongson.cn> wrote:
>
> This is mostly based on AArch64 and RISC-V implementation.
>
> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>
> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
> all vector registers.
> ---
> Changes v1 -> v2:
> - Fix vr24-vr31, xr24-xr31 typo.
> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
> - Save and restore fcsr0 in _dl_tlsdesc_dynamic.
>
> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
>
> elf/elf.h | 2 +
> sysdeps/loongarch/Makefile | 6 +
> sysdeps/loongarch/dl-link.sym | 1 +
> sysdeps/loongarch/dl-machine.h | 60 ++-
> sysdeps/loongarch/dl-tls.h | 9 +-
> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 341 ++++++++++++++++++
> sysdeps/loongarch/dl-tlsdesc.S | 93 +++++
> sysdeps/loongarch/dl-tlsdesc.h | 53 +++
> sysdeps/loongarch/linkmap.h | 1 +
> sysdeps/loongarch/sys/asm.h | 1 +
> sysdeps/loongarch/sys/regdef.h | 1 +
> sysdeps/loongarch/tlsdesc.c | 39 ++
> sysdeps/loongarch/tlsdesc.sym | 19 +
> .../unix/sysv/linux/loongarch/localplt.data | 2 +
> 14 files changed, 625 insertions(+), 3 deletions(-)
> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
> create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
> create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
> create mode 100644 sysdeps/loongarch/tlsdesc.c
> create mode 100644 sysdeps/loongarch/tlsdesc.sym
>
> diff --git a/elf/elf.h b/elf/elf.h
> index f2206e5c06..eec24ea049 100644
> --- a/elf/elf.h
> +++ b/elf/elf.h
> @@ -4237,6 +4237,8 @@ enum
> #define R_LARCH_TLS_TPREL32 10
> #define R_LARCH_TLS_TPREL64 11
> #define R_LARCH_IRELATIVE 12
> +#define R_LARCH_TLS_DESC32 13
> +#define R_LARCH_TLS_DESC64 14
>
> /* Reserved for future relocs that the dynamic linker must understand. */
>
> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
> index 43d2f583cd..181389e787 100644
> --- a/sysdeps/loongarch/Makefile
> +++ b/sysdeps/loongarch/Makefile
> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
> endif
>
> ifeq ($(subdir),elf)
> +sysdep-dl-routines += tlsdesc dl-tlsdesc
> gen-as-const-headers += dl-link.sym
> endif
>
> +ifeq ($(subdir),csu)
> +gen-as-const-headers += tlsdesc.sym
> +endif
> +
> +
> # LoongArch's assembler also needs to know about PIC as it changes the
> # definition of some assembler macros.
> ASFLAGS-.os += $(pic-ccflag)
> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
> index b534968e30..fd81ef37d5 100644
> --- a/sysdeps/loongarch/dl-link.sym
> +++ b/sysdeps/loongarch/dl-link.sym
> @@ -1,6 +1,7 @@
> #include <stddef.h>
> #include <sysdep.h>
> #include <link.h>
> +#include <dl-tlsdesc.h>
>
> DL_SIZEOF_RG sizeof(struct La_loongarch_regs)
> DL_SIZEOF_RV sizeof(struct La_loongarch_retval)
> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
> index ab81b82d95..8ca6c224f6 100644
> --- a/sysdeps/loongarch/dl-machine.h
> +++ b/sysdeps/loongarch/dl-machine.h
> @@ -25,7 +25,7 @@
> #include <entry.h>
> #include <elf/elf.h>
> #include <sys/asm.h>
> -#include <dl-tls.h>
> +#include <dl-tlsdesc.h>
> #include <dl-static-tls.h>
> #include <dl-machine-rel.h>
>
> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
> *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
> break;
>
> + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
> + {
> + struct tlsdesc volatile *td =
> + (struct tlsdesc volatile *)addr_field;
> + if (! sym)
> + {
> + td->arg = (void*)reloc->r_addend;
> + td->entry = _dl_tlsdesc_undefweak;
> + }
> + else
> + {
> +# ifndef SHARED
> + CHECK_STATIC_TLS (map, sym_map);
> +# else
> + if (!TRY_STATIC_TLS (map, sym_map))
> + {
> + td->arg = _dl_make_tlsdesc_dynamic
> + (sym_map, sym->st_value + reloc->r_addend);
> +# if !defined __loongarch_soft_float
> + if (SUPPORT_LASX)
> + td->entry = _dl_tlsdesc_dynamic_lasx;
> + else
> + if (SUPPORT_LSX)
> + td->entry = _dl_tlsdesc_dynamic_lsx;
> + else
> +# endif
> + td->entry = _dl_tlsdesc_dynamic;
> + }
> + else
> +# endif
> + {
> + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
> + + reloc->r_addend);
> + td->entry = _dl_tlsdesc_return;
> + }
> + }
> + break;
> + }
> +
> case R_LARCH_COPY:
> {
> if (sym == NULL)
> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> else
> *reloc_addr = map->l_mach.plt;
> }
> + else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
> + {
> + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
> + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
> + const ElfW (Sym) *sym = &symtab[symndx];
> + const struct r_found_version *version = NULL;
> +
> + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
> + {
> + const ElfW (Half) *vernum =
> + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
> + version = &map->l_versions[vernum[symndx] & 0x7fff];
> + }
> +
> + /* Always initialize TLS descriptors completely, because lazy
> + initialization requires synchronization at every TLS access. */
> + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
> + skip_ifunc);
> + }
> else
> _dl_reloc_bad_type (map, r_type, 1);
> }
> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
> index 29924b866d..de593c002d 100644
> --- a/sysdeps/loongarch/dl-tls.h
> +++ b/sysdeps/loongarch/dl-tls.h
> @@ -16,6 +16,9 @@
> License along with the GNU C Library. If not, see
> <https://www.gnu.org/licenses/>. */
>
> +#ifndef _DL_TLS_H
> +#define _DL_TLS_H
> +
> /* Type used for the representation of TLS information in the GOT. */
> typedef struct
> {
> @@ -23,6 +26,8 @@ typedef struct
> unsigned long int ti_offset;
> } tls_index;
>
> +extern void *__tls_get_addr (tls_index *ti);
> +
> /* The thread pointer points to the first static TLS block. */
> #define TLS_TP_OFFSET 0
>
> @@ -37,10 +42,10 @@ typedef struct
> /* Compute the value for a DTPREL reloc. */
> #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>
> -extern void *__tls_get_addr (tls_index *ti);
> -
> #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
> #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>
> /* Value used for dtv entries for which the allocation is delayed. */
> #define TLS_DTV_UNALLOCATED ((void *) -1l)
> +
> +#endif
> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> new file mode 100644
> index 0000000000..0d8c9bb991
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> @@ -0,0 +1,341 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> + LoongArch version.
> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#ifdef USE_LASX
> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
> +#elif defined USE_LSX
> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
> +#elif !defined __loongarch_soft_float
> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
> +#else
> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
> +#endif
> +
> +#ifdef SHARED
> + /* Handler for dynamic TLS symbols.
> + Prototype:
> + _dl_tlsdesc_dynamic (tlsdesc *) ;
> +
> + The second word of the descriptor points to a
> + tlsdesc_dynamic_arg structure.
> +
> + Returns the offset between the thread pointer and the
> + object referenced by the argument.
> +
> + ptrdiff_t
> + __attribute__ ((__regparm__ (1)))
> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
> + {
> + struct tlsdesc_dynamic_arg *td = tdp->arg;
> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
> + if (__builtin_expect (td->gen_count <= dtv[0].counter
> + && (dtv[td->tlsinfo.ti_module].pointer.val
> + != TLS_DTV_UNALLOCATED),
> + 1))
> + return dtv[td->tlsinfo.ti_module].pointer.val
> + + td->tlsinfo.ti_offset
> + - __thread_pointer;
> +
> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
> + }
> + */
> + .hidden _dl_tlsdesc_dynamic
> + .global _dl_tlsdesc_dynamic
> + .type _dl_tlsdesc_dynamic,%function
> + cfi_startproc
> + .align 2
> +_dl_tlsdesc_dynamic:
> + /* Save just enough registers to support fast path, if we fall
> + into slow path we will save additional registers. */
> + ADDI sp, sp,-24
> + REG_S t0, sp, 0
> + REG_S t1, sp, 8
> + REG_S t2, sp, 16
> +
> + REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start
> + REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg
> + REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
> + REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter
> + bltu t2, t1, Lslow
> +
> + REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module
> + slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
> + add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t)
> + REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
> + li.d t2, TLS_DTV_UNALLOCATED
> + beq t1, t2, Lslow
> + REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
> + # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
> + add.d a0, t1, t2
> +Lret:
> + sub.d a0, a0, tp
> + REG_L t0, sp, 0
> + REG_L t1, sp, 8
> + REG_L t2, sp, 16
> + ADDI sp, sp, 24
> + RET
> +
> +Lslow:
> + /* This is the slow path. We need to call __tls_get_addr() which
> + means we need to save and restore all the register that the
> + callee will trash. */
> +
> + /* Save the remaining registers that we must treat as caller save. */
> + ADDI sp, sp, -FRAME_SIZE
> + REG_S ra, sp, 0 * SZREG
> + REG_S a1, sp, 1 * SZREG
> + REG_S a2, sp, 2 * SZREG
> + REG_S a3, sp, 3 * SZREG
> + REG_S a4, sp, 4 * SZREG
> + REG_S a5, sp, 5 * SZREG
> + REG_S a6, sp, 6 * SZREG
> + REG_S a7, sp, 7 * SZREG
> + REG_S t4, sp, 8 * SZREG
> + REG_S t5, sp, 9 * SZREG
> + REG_S t6, sp, 10 * SZREG
> + REG_S t7, sp, 11 * SZREG
> + REG_S t8, sp, 12 * SZREG
> +
> +#ifdef USE_LASX
> + xvst xr0, sp, 13*SZREG + 0*SZXREG
> + xvst xr1, sp, 13*SZREG + 1*SZXREG
> + xvst xr2, sp, 13*SZREG + 2*SZXREG
> + xvst xr3, sp, 13*SZREG + 3*SZXREG
> + xvst xr4, sp, 13*SZREG + 4*SZXREG
> + xvst xr5, sp, 13*SZREG + 5*SZXREG
> + xvst xr6, sp, 13*SZREG + 6*SZXREG
> + xvst xr7, sp, 13*SZREG + 7*SZXREG
> + xvst xr8, sp, 13*SZREG + 8*SZXREG
> + xvst xr9, sp, 13*SZREG + 9*SZXREG
> + xvst xr10, sp, 13*SZREG + 10*SZXREG
> + xvst xr11, sp, 13*SZREG + 11*SZXREG
> + xvst xr12, sp, 13*SZREG + 12*SZXREG
> + xvst xr13, sp, 13*SZREG + 13*SZXREG
> + xvst xr14, sp, 13*SZREG + 14*SZXREG
> + xvst xr15, sp, 13*SZREG + 15*SZXREG
> + xvst xr16, sp, 13*SZREG + 16*SZXREG
> + xvst xr17, sp, 13*SZREG + 17*SZXREG
> + xvst xr18, sp, 13*SZREG + 18*SZXREG
> + xvst xr19, sp, 13*SZREG + 19*SZXREG
> + xvst xr20, sp, 13*SZREG + 20*SZXREG
> + xvst xr21, sp, 13*SZREG + 21*SZXREG
> + xvst xr22, sp, 13*SZREG + 22*SZXREG
> + xvst xr23, sp, 13*SZREG + 23*SZXREG
> + xvst xr24, sp, 13*SZREG + 24*SZXREG
> + xvst xr25, sp, 13*SZREG + 25*SZXREG
> + xvst xr26, sp, 13*SZREG + 26*SZXREG
> + xvst xr27, sp, 13*SZREG + 27*SZXREG
> + xvst xr28, sp, 13*SZREG + 28*SZXREG
> + xvst xr29, sp, 13*SZREG + 29*SZXREG
> + xvst xr30, sp, 13*SZREG + 30*SZXREG
> + xvst xr31, sp, 13*SZREG + 31*SZXREG
> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> + # some fields in fcsr0
> + movfcsr2gr t0, fcsr0
> + REG_S t0, sp, 32*SZXREG
> +#elif defined USE_LSX
> + vst vr0, sp, 13*SZREG + 0*SZVREG
> + vst vr1, sp, 13*SZREG + 1*SZVREG
> + vst vr2, sp, 13*SZREG + 2*SZVREG
> + vst vr3, sp, 13*SZREG + 3*SZVREG
> + vst vr4, sp, 13*SZREG + 4*SZVREG
> + vst vr5, sp, 13*SZREG + 5*SZVREG
> + vst vr6, sp, 13*SZREG + 6*SZVREG
> + vst vr7, sp, 13*SZREG + 7*SZVREG
> + vst vr8, sp, 13*SZREG + 8*SZVREG
> + vst vr9, sp, 13*SZREG + 9*SZVREG
> + vst vr10, sp, 13*SZREG + 10*SZVREG
> + vst vr11, sp, 13*SZREG + 11*SZVREG
> + vst vr12, sp, 13*SZREG + 12*SZVREG
> + vst vr13, sp, 13*SZREG + 13*SZVREG
> + vst vr14, sp, 13*SZREG + 14*SZVREG
> + vst vr15, sp, 13*SZREG + 15*SZVREG
> + vst vr16, sp, 13*SZREG + 16*SZVREG
> + vst vr17, sp, 13*SZREG + 17*SZVREG
> + vst vr18, sp, 13*SZREG + 18*SZVREG
> + vst vr19, sp, 13*SZREG + 19*SZVREG
> + vst vr20, sp, 13*SZREG + 20*SZVREG
> + vst vr21, sp, 13*SZREG + 21*SZVREG
> + vst vr22, sp, 13*SZREG + 22*SZVREG
> + vst vr23, sp, 13*SZREG + 23*SZVREG
> + vst vr24, sp, 13*SZREG + 24*SZVREG
> + vst vr25, sp, 13*SZREG + 25*SZVREG
> + vst vr26, sp, 13*SZREG + 26*SZVREG
> + vst vr27, sp, 13*SZREG + 27*SZVREG
> + vst vr28, sp, 13*SZREG + 28*SZVREG
> + vst vr29, sp, 13*SZREG + 29*SZVREG
> + vst vr30, sp, 13*SZREG + 30*SZVREG
> + vst vr31, sp, 13*SZREG + 31*SZVREG
> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> + # some fields in fcsr0
> + movfcsr2gr t0, fcsr0
> + REG_S t0, sp, 32*SZVREG
> +#elif !defined __loongarch_soft_float
> + FREG_S fa0, sp, 13*SZREG + 0*SZFREG
> + FREG_S fa1, sp, 13*SZREG + 1*SZFREG
> + FREG_S fa2, sp, 13*SZREG + 2*SZFREG
> + FREG_S fa3, sp, 13*SZREG + 3*SZFREG
> + FREG_S fa4, sp, 13*SZREG + 4*SZFREG
> + FREG_S fa5, sp, 13*SZREG + 5*SZFREG
> + FREG_S fa6, sp, 13*SZREG + 6*SZFREG
> + FREG_S fa7, sp, 13*SZREG + 7*SZFREG
> + FREG_S ft0, sp, 13*SZREG + 8*SZFREG
> + FREG_S ft1, sp, 13*SZREG + 9*SZFREG
> + FREG_S ft2, sp, 13*SZREG + 10*SZFREG
> + FREG_S ft3, sp, 13*SZREG + 11*SZFREG
> + FREG_S ft4, sp, 13*SZREG + 12*SZFREG
> + FREG_S ft5, sp, 13*SZREG + 13*SZFREG
> + FREG_S ft6, sp, 13*SZREG + 14*SZFREG
> + FREG_S ft7, sp, 13*SZREG + 15*SZFREG
> + FREG_S ft8, sp, 13*SZREG + 16*SZFREG
> + FREG_S ft9, sp, 13*SZREG + 17*SZFREG
> + FREG_S ft10, sp, 13*SZREG + 18*SZFREG
> + FREG_S ft11, sp, 13*SZREG + 19*SZFREG
> + FREG_S ft12, sp, 13*SZREG + 20*SZFREG
> + FREG_S ft13, sp, 13*SZREG + 21*SZFREG
> + FREG_S ft14, sp, 13*SZREG + 22*SZFREG
> + FREG_S ft15, sp, 13*SZREG + 23*SZFREG
> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> + # some fields in fcsr0
> + movfcsr2gr t0, fcsr0
> + REG_S t0, sp, 24*SZFREG
> +#endif /* #ifdef USE_LASX */
> +
> + bl __tls_get_addr
> + ADDI a0, a0, -TLS_DTV_OFFSET
> +
> + REG_L ra, sp, 0
> + REG_L a1, sp, 1 * 8
> + REG_L a2, sp, 2 * 8
> + REG_L a3, sp, 3 * 8
> + REG_L a4, sp, 4 * 8
> + REG_L a5, sp, 5 * 8
> + REG_L a6, sp, 6 * 8
> + REG_L a7, sp, 7 * 8
> + REG_L t4, sp, 8 * 8
> + REG_L t5, sp, 9 * 8
> + REG_L t6, sp, 10 * 8
> + REG_L t7, sp, 11 * 8
> + REG_L t8, sp, 12 * 8
> +
> +#ifdef USE_LASX
> + xvld xr0, sp, 13*SZREG + 0*SZXREG
> + xvld xr1, sp, 13*SZREG + 1*SZXREG
> + xvld xr2, sp, 13*SZREG + 2*SZXREG
> + xvld xr3, sp, 13*SZREG + 3*SZXREG
> + xvld xr4, sp, 13*SZREG + 4*SZXREG
> + xvld xr5, sp, 13*SZREG + 5*SZXREG
> + xvld xr6, sp, 13*SZREG + 6*SZXREG
> + xvld xr7, sp, 13*SZREG + 7*SZXREG
> + xvld xr8, sp, 13*SZREG + 8*SZXREG
> + xvld xr9, sp, 13*SZREG + 9*SZXREG
> + xvld xr10, sp, 13*SZREG + 10*SZXREG
> + xvld xr11, sp, 13*SZREG + 11*SZXREG
> + xvld xr12, sp, 13*SZREG + 12*SZXREG
> + xvld xr13, sp, 13*SZREG + 13*SZXREG
> + xvld xr14, sp, 13*SZREG + 14*SZXREG
> + xvld xr15, sp, 13*SZREG + 15*SZXREG
> + xvld xr16, sp, 13*SZREG + 16*SZXREG
> + xvld xr17, sp, 13*SZREG + 17*SZXREG
> + xvld xr18, sp, 13*SZREG + 18*SZXREG
> + xvld xr19, sp, 13*SZREG + 19*SZXREG
> + xvld xr20, sp, 13*SZREG + 20*SZXREG
> + xvld xr21, sp, 13*SZREG + 21*SZXREG
> + xvld xr22, sp, 13*SZREG + 22*SZXREG
> + xvld xr23, sp, 13*SZREG + 23*SZXREG
> + xvld xr24, sp, 13*SZREG + 24*SZXREG
> + xvld xr25, sp, 13*SZREG + 25*SZXREG
> + xvld xr26, sp, 13*SZREG + 26*SZXREG
> + xvld xr27, sp, 13*SZREG + 27*SZXREG
> + xvld xr28, sp, 13*SZREG + 28*SZXREG
> + xvld xr29, sp, 13*SZREG + 29*SZXREG
> + xvld xr30, sp, 13*SZREG + 30*SZXREG
> + xvld xr31, sp, 13*SZREG + 31*SZXREG
> + REG_L t0, sp, 32*SZXREG
> + movgr2fcsr fcsr0, t0
> +#elif defined USE_LSX
> + vld vr0, sp, 13*SZREG + 0*SZVREG
> + vld vr1, sp, 13*SZREG + 1*SZVREG
> + vld vr2, sp, 13*SZREG + 2*SZVREG
> + vld vr3, sp, 13*SZREG + 3*SZVREG
> + vld vr4, sp, 13*SZREG + 4*SZVREG
> + vld vr5, sp, 13*SZREG + 5*SZVREG
> + vld vr6, sp, 13*SZREG + 6*SZVREG
> + vld vr7, sp, 13*SZREG + 7*SZVREG
> + vld vr8, sp, 13*SZREG + 8*SZVREG
> + vld vr9, sp, 13*SZREG + 9*SZVREG
> + vld vr10, sp, 13*SZREG + 10*SZVREG
> + vld vr11, sp, 13*SZREG + 11*SZVREG
> + vld vr12, sp, 13*SZREG + 12*SZVREG
> + vld vr13, sp, 13*SZREG + 13*SZVREG
> + vld vr14, sp, 13*SZREG + 14*SZVREG
> + vld vr15, sp, 13*SZREG + 15*SZVREG
> + vld vr16, sp, 13*SZREG + 16*SZVREG
> + vld vr17, sp, 13*SZREG + 17*SZVREG
> + vld vr18, sp, 13*SZREG + 18*SZVREG
> + vld vr19, sp, 13*SZREG + 19*SZVREG
> + vld vr20, sp, 13*SZREG + 20*SZVREG
> + vld vr21, sp, 13*SZREG + 21*SZVREG
> + vld vr22, sp, 13*SZREG + 22*SZVREG
> + vld vr23, sp, 13*SZREG + 23*SZVREG
> + vld vr24, sp, 13*SZREG + 24*SZVREG
> + vld vr25, sp, 13*SZREG + 25*SZVREG
> + vld vr26, sp, 13*SZREG + 26*SZVREG
> + vld vr27, sp, 13*SZREG + 27*SZVREG
> + vld vr28, sp, 13*SZREG + 28*SZVREG
> + vld vr29, sp, 13*SZREG + 29*SZVREG
> + vld vr30, sp, 13*SZREG + 30*SZVREG
> + vld vr31, sp, 13*SZREG + 31*SZVREG
> + REG_L t0, sp, 32*SZVREG
> + movgr2fcsr fcsr0, t0
> +#elif !defined __loongarch_soft_float
> + FREG_L fa0, sp, 13*SZREG + 0*SZFREG
> + FREG_L fa1, sp, 13*SZREG + 1*SZFREG
> + FREG_L fa2, sp, 13*SZREG + 2*SZFREG
> + FREG_L fa3, sp, 13*SZREG + 3*SZFREG
> + FREG_L fa4, sp, 13*SZREG + 4*SZFREG
> + FREG_L fa5, sp, 13*SZREG + 5*SZFREG
> + FREG_L fa6, sp, 13*SZREG + 6*SZFREG
> + FREG_L fa7, sp, 13*SZREG + 7*SZFREG
> + FREG_L ft0, sp, 13*SZREG + 8*SZFREG
> + FREG_L ft1, sp, 13*SZREG + 9*SZFREG
> + FREG_L ft2, sp, 13*SZREG + 10*SZFREG
> + FREG_L ft3, sp, 13*SZREG + 11*SZFREG
> + FREG_L ft4, sp, 13*SZREG + 12*SZFREG
> + FREG_L ft5, sp, 13*SZREG + 13*SZFREG
> + FREG_L ft6, sp, 13*SZREG + 14*SZFREG
> + FREG_L ft7, sp, 13*SZREG + 15*SZFREG
> + FREG_L ft8, sp, 13*SZREG + 16*SZFREG
> + FREG_L ft9, sp, 13*SZREG + 17*SZFREG
> + FREG_L ft10, sp, 13*SZREG + 18*SZFREG
> + FREG_L ft11, sp, 13*SZREG + 19*SZFREG
> + FREG_L ft12, sp, 13*SZREG + 20*SZFREG
> + FREG_L ft13, sp, 13*SZREG + 21*SZFREG
> + FREG_L ft14, sp, 13*SZREG + 22*SZFREG
> + FREG_L ft15, sp, 13*SZREG + 23*SZFREG
> + REG_L t0, sp, 24*SZFREG
> + movgr2fcsr fcsr0, t0
> +#endif /* #ifdef USE_LASX */
> +
> + ADDI sp, sp, FRAME_SIZE
> + b Lret
> + cfi_endproc
> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
> +#endif /* #ifdef SHARED */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
> new file mode 100644
> index 0000000000..4a17079169
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc.S
> @@ -0,0 +1,93 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> + LoongArch version.
> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdep.h>
> +#include <tls.h>
> +#include "tlsdesc.h"
> +
> + .text
> +
> + /* Compute the thread pointer offset for symbols in the static
> + TLS block. The offset is the same for all threads.
> + Prototype:
> + _dl_tlsdesc_return (tlsdesc *); */
> + .hidden _dl_tlsdesc_return
> + .global _dl_tlsdesc_return
> + .type _dl_tlsdesc_return,%function
> + cfi_startproc
> + .align 2
> +_dl_tlsdesc_return:
> + REG_L a0, a0, 8
> + RET
> + cfi_endproc
> + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
> +
> + /* Handler for undefined weak TLS symbols.
> + Prototype:
> + _dl_tlsdesc_undefweak (tlsdesc *);
> +
> + The second word of the descriptor contains the addend.
> + Return the addend minus the thread pointer. This ensures
> + that when the caller adds on the thread pointer it gets back
> + the addend. */
> + .hidden _dl_tlsdesc_undefweak
> + .global _dl_tlsdesc_undefweak
> + .type _dl_tlsdesc_undefweak,%function
> + cfi_startproc
> + .align 2
> +_dl_tlsdesc_undefweak:
> + REG_L a0, a0, 8
> + sub.d a0, a0, tp
> + RET
> + cfi_endproc
> + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
> +
> +
> +#ifdef SHARED
> +
> +#if !defined __loongarch_soft_float
> +
> +#define USE_LASX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
> +#define Lret Lret_lasx
> +#define Lslow Lslow_lasx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LASX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#define USE_LSX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
> +#define Lret Lret_lsx
> +#define Lslow Lslow_lsx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LSX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#endif
> +
> +#include "dl-tlsdesc-dynamic.h"
> +
> +#endif /* #ifdef SHARED */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
> new file mode 100644
> index 0000000000..988037a714
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc.h
> @@ -0,0 +1,53 @@
> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
> + LoongArch version.
> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#ifndef _DL_TLSDESC_H
> +#define _DL_TLSDESC_H
> +
> +#include <dl-tls.h>
> +
> +/* Type used to represent a TLS descriptor in the GOT. */
> +struct tlsdesc
> +{
> + ptrdiff_t (*entry) (struct tlsdesc *);
> + void *arg;
> +};
> +
> +/* Type used as the argument in a TLS descriptor for a symbol that
> + needs dynamic TLS offsets. */
> +struct tlsdesc_dynamic_arg
> +{
> + tls_index tlsinfo;
> + size_t gen_count;
> +};
> +
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
> +
> +# ifdef SHARED
> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
> +#if !defined __loongarch_soft_float
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
> +#endif
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
> +#endif
> +
> +#endif
> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
> index 4d8737ee7f..9b1773634c 100644
> --- a/sysdeps/loongarch/linkmap.h
> +++ b/sysdeps/loongarch/linkmap.h
> @@ -19,4 +19,5 @@
> struct link_map_machine
> {
> ElfW (Addr) plt; /* Address of .plt. */
> + void *tlsdesc_table; /* Address of TLS descriptor hash table. */
> };
> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
> index 51521a7eb4..23c1d12914 100644
> --- a/sysdeps/loongarch/sys/asm.h
> +++ b/sysdeps/loongarch/sys/asm.h
> @@ -25,6 +25,7 @@
> /* Macros to handle different pointer/register sizes for 32/64-bit code. */
> #define SZREG 8
> #define SZFREG 8
> +#define SZFCSREG 4
> #define SZVREG 16
> #define SZXREG 32
> #define REG_L ld.d
> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
> index f61ee25b25..80ce3e9c00 100644
> --- a/sysdeps/loongarch/sys/regdef.h
> +++ b/sysdeps/loongarch/sys/regdef.h
> @@ -97,6 +97,7 @@
> #define fcc5 $fcc5
> #define fcc6 $fcc6
> #define fcc7 $fcc7
> +#define fcsr0 $fcsr0
>
> #define vr0 $vr0
> #define vr1 $vr1
> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
> new file mode 100644
> index 0000000000..a357e7619f
> --- /dev/null
> +++ b/sysdeps/loongarch/tlsdesc.c
> @@ -0,0 +1,39 @@
> +/* Manage TLS descriptors. AArch64 version.
Change it.
> +
>
--
H.J.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] LoongArch: Add support for TLS Descriptors
2024-02-29 1:43 [PATCH v2] LoongArch: Add support for TLS Descriptors mengqinggang
2024-02-29 2:56 ` caiyinyu
2024-03-04 15:42 ` H.J. Lu
@ 2024-03-05 19:29 ` Adhemerval Zanella Netto
2024-03-08 7:53 ` mengqinggang
2 siblings, 1 reply; 8+ messages in thread
From: Adhemerval Zanella Netto @ 2024-03-05 19:29 UTC (permalink / raw)
To: mengqinggang, libc-alpha
Cc: xuchenghua, caiyinyu, chenglulu, cailulu, xry111, i.swmail,
maskray, luweining, wanglei, hejinyang
On 28/02/24 22:43, mengqinggang wrote:
> This is mostly based on AArch64 and RISC-V implementation.
>
> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>
> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
> all vector registers.
> ---
> Changes v1 -> v2:
> - Fix vr24-vr31, xr24-xr31 typo.
> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
> - Save and restore fcsr0 in _dl_tlsdesc_dynamic.
>
> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
Patch looks ok, some comments below.
>
> elf/elf.h | 2 +
> sysdeps/loongarch/Makefile | 6 +
> sysdeps/loongarch/dl-link.sym | 1 +
> sysdeps/loongarch/dl-machine.h | 60 ++-
> sysdeps/loongarch/dl-tls.h | 9 +-
> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 341 ++++++++++++++++++
> sysdeps/loongarch/dl-tlsdesc.S | 93 +++++
> sysdeps/loongarch/dl-tlsdesc.h | 53 +++
> sysdeps/loongarch/linkmap.h | 1 +
> sysdeps/loongarch/sys/asm.h | 1 +
> sysdeps/loongarch/sys/regdef.h | 1 +
> sysdeps/loongarch/tlsdesc.c | 39 ++
> sysdeps/loongarch/tlsdesc.sym | 19 +
> .../unix/sysv/linux/loongarch/localplt.data | 2 +
> 14 files changed, 625 insertions(+), 3 deletions(-)
> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
> create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
> create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
> create mode 100644 sysdeps/loongarch/tlsdesc.c
> create mode 100644 sysdeps/loongarch/tlsdesc.sym
>
> diff --git a/elf/elf.h b/elf/elf.h
> index f2206e5c06..eec24ea049 100644
> --- a/elf/elf.h
> +++ b/elf/elf.h
> @@ -4237,6 +4237,8 @@ enum
> #define R_LARCH_TLS_TPREL32 10
> #define R_LARCH_TLS_TPREL64 11
> #define R_LARCH_IRELATIVE 12
> +#define R_LARCH_TLS_DESC32 13
> +#define R_LARCH_TLS_DESC64 14
>
> /* Reserved for future relocs that the dynamic linker must understand. */
>
> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
> index 43d2f583cd..181389e787 100644
> --- a/sysdeps/loongarch/Makefile
> +++ b/sysdeps/loongarch/Makefile
> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
> endif
>
> ifeq ($(subdir),elf)
> +sysdep-dl-routines += tlsdesc dl-tlsdesc
> gen-as-const-headers += dl-link.sym
> endif
>
> +ifeq ($(subdir),csu)
> +gen-as-const-headers += tlsdesc.sym
> +endif
> +
> +
> # LoongArch's assembler also needs to know about PIC as it changes the
> # definition of some assembler macros.
> ASFLAGS-.os += $(pic-ccflag)
> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
> index b534968e30..fd81ef37d5 100644
> --- a/sysdeps/loongarch/dl-link.sym
> +++ b/sysdeps/loongarch/dl-link.sym
> @@ -1,6 +1,7 @@
> #include <stddef.h>
> #include <sysdep.h>
> #include <link.h>
> +#include <dl-tlsdesc.h>
>
> DL_SIZEOF_RG sizeof(struct La_loongarch_regs)
> DL_SIZEOF_RV sizeof(struct La_loongarch_retval)
> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
> index ab81b82d95..8ca6c224f6 100644
> --- a/sysdeps/loongarch/dl-machine.h
> +++ b/sysdeps/loongarch/dl-machine.h
> @@ -25,7 +25,7 @@
> #include <entry.h>
> #include <elf/elf.h>
> #include <sys/asm.h>
> -#include <dl-tls.h>
> +#include <dl-tlsdesc.h>
> #include <dl-static-tls.h>
> #include <dl-machine-rel.h>
>
> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
> *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
> break;
>
> + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
> + {
> + struct tlsdesc volatile *td =
> + (struct tlsdesc volatile *)addr_field;
> + if (! sym)
> + {
> + td->arg = (void*)reloc->r_addend;
> + td->entry = _dl_tlsdesc_undefweak;
> + }
> + else
> + {
> +# ifndef SHARED
> + CHECK_STATIC_TLS (map, sym_map);
> +# else
> + if (!TRY_STATIC_TLS (map, sym_map))
> + {
> + td->arg = _dl_make_tlsdesc_dynamic
> + (sym_map, sym->st_value + reloc->r_addend);
> +# if !defined __loongarch_soft_float
> + if (SUPPORT_LASX)
> + td->entry = _dl_tlsdesc_dynamic_lasx;
> + else
> + if (SUPPORT_LSX)
> + td->entry = _dl_tlsdesc_dynamic_lsx;
> + else
> +# endif
> + td->entry = _dl_tlsdesc_dynamic;
> + }
> + else
> +# endif
> + {
> + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
> + + reloc->r_addend);
> + td->entry = _dl_tlsdesc_return;
> + }
> + }
> + break;
> + }
> +
> case R_LARCH_COPY:
> {
> if (sym == NULL)
> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> else
> *reloc_addr = map->l_mach.plt;
> }
> + else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
Use __glibc_likely here.
> + {
> + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
> + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
> + const ElfW (Sym) *sym = &symtab[symndx];
> + const struct r_found_version *version = NULL;
> +
> + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
> + {
> + const ElfW (Half) *vernum =
> + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
> + version = &map->l_versions[vernum[symndx] & 0x7fff];
> + }
> +
> + /* Always initialize TLS descriptors completely, because lazy
> + initialization requires synchronization at every TLS access. */
> + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
> + skip_ifunc);
> + }
> else
> _dl_reloc_bad_type (map, r_type, 1);
> }
> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
> index 29924b866d..de593c002d 100644
> --- a/sysdeps/loongarch/dl-tls.h
> +++ b/sysdeps/loongarch/dl-tls.h
> @@ -16,6 +16,9 @@
> License along with the GNU C Library. If not, see
> <https://www.gnu.org/licenses/>. */
>
> +#ifndef _DL_TLS_H
> +#define _DL_TLS_H
> +
> /* Type used for the representation of TLS information in the GOT. */
> typedef struct
> {
> @@ -23,6 +26,8 @@ typedef struct
> unsigned long int ti_offset;
> } tls_index;
>
> +extern void *__tls_get_addr (tls_index *ti);
> +
> /* The thread pointer points to the first static TLS block. */
> #define TLS_TP_OFFSET 0
>
> @@ -37,10 +42,10 @@ typedef struct
> /* Compute the value for a DTPREL reloc. */
> #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>
> -extern void *__tls_get_addr (tls_index *ti);
> -
Why move the function prototype?
> #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
> #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>
> /* Value used for dtv entries for which the allocation is delayed. */
> #define TLS_DTV_UNALLOCATED ((void *) -1l)
> +
> +#endif
> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> new file mode 100644
> index 0000000000..0d8c9bb991
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> @@ -0,0 +1,341 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> + LoongArch version.
> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
Update Copyright years to 2024.
> +
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#ifdef USE_LASX
> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
> +#elif defined USE_LSX
> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
> +#elif !defined __loongarch_soft_float
> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
> +#else
> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
> +#endif
I don't have a strong opinion, but another option that might be simpler it
to provide only only one _dl_tlsdesc_dynamic implementation and check the
required save/restore of vector register based on hwcap value.
> +
> +#ifdef SHARED
> + /* Handler for dynamic TLS symbols.
> + Prototype:
> + _dl_tlsdesc_dynamic (tlsdesc *) ;
> +
> + The second word of the descriptor points to a
> + tlsdesc_dynamic_arg structure.
> +
> + Returns the offset between the thread pointer and the
> + object referenced by the argument.
> +
> + ptrdiff_t
> + __attribute__ ((__regparm__ (1)))
Does this attribute really make sense for loongarch?
> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
> + {
> + struct tlsdesc_dynamic_arg *td = tdp->arg;
> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
> + if (__builtin_expect (td->gen_count <= dtv[0].counter
Use __glibc_unlikely or just remove the __builtin_expect for clarity.
> + && (dtv[td->tlsinfo.ti_module].pointer.val
> + != TLS_DTV_UNALLOCATED),
> + 1))
> + return dtv[td->tlsinfo.ti_module].pointer.val
> + + td->tlsinfo.ti_offset
> + - __thread_pointer;
> +
> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
> + }
> + */
> + .hidden _dl_tlsdesc_dynamic
> + .global _dl_tlsdesc_dynamic
> + .type _dl_tlsdesc_dynamic,%function
> + cfi_startproc
> + .align 2
> +_dl_tlsdesc_dynamic:
> + /* Save just enough registers to support fast path, if we fall
> + into slow path we will save additional registers. */
> + ADDI sp, sp,-24
> + REG_S t0, sp, 0
> + REG_S t1, sp, 8
> + REG_S t2, sp, 16
> +
> + REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start
> + REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg
> + REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
> + REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter
> + bltu t2, t1, Lslow
> +
> + REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module
> + slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
> + add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t)
> + REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
> + li.d t2, TLS_DTV_UNALLOCATED
> + beq t1, t2, Lslow
> + REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
> + # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
> + add.d a0, t1, t2
> +Lret:
> + sub.d a0, a0, tp
> + REG_L t0, sp, 0
> + REG_L t1, sp, 8
> + REG_L t2, sp, 16
> + ADDI sp, sp, 24
> + RET
> +
> +Lslow:
> + /* This is the slow path. We need to call __tls_get_addr() which
> + means we need to save and restore all the register that the
> + callee will trash. */
> +
> + /* Save the remaining registers that we must treat as caller save. */
> + ADDI sp, sp, -FRAME_SIZE
> + REG_S ra, sp, 0 * SZREG
> + REG_S a1, sp, 1 * SZREG
> + REG_S a2, sp, 2 * SZREG
> + REG_S a3, sp, 3 * SZREG
> + REG_S a4, sp, 4 * SZREG
> + REG_S a5, sp, 5 * SZREG
> + REG_S a6, sp, 6 * SZREG
> + REG_S a7, sp, 7 * SZREG
> + REG_S t4, sp, 8 * SZREG
> + REG_S t5, sp, 9 * SZREG
> + REG_S t6, sp, 10 * SZREG
> + REG_S t7, sp, 11 * SZREG
> + REG_S t8, sp, 12 * SZREG
> +
> +#ifdef USE_LASX
> + xvst xr0, sp, 13*SZREG + 0*SZXREG
> + xvst xr1, sp, 13*SZREG + 1*SZXREG
> + xvst xr2, sp, 13*SZREG + 2*SZXREG
> + xvst xr3, sp, 13*SZREG + 3*SZXREG
> + xvst xr4, sp, 13*SZREG + 4*SZXREG
> + xvst xr5, sp, 13*SZREG + 5*SZXREG
> + xvst xr6, sp, 13*SZREG + 6*SZXREG
> + xvst xr7, sp, 13*SZREG + 7*SZXREG
> + xvst xr8, sp, 13*SZREG + 8*SZXREG
> + xvst xr9, sp, 13*SZREG + 9*SZXREG
> + xvst xr10, sp, 13*SZREG + 10*SZXREG
> + xvst xr11, sp, 13*SZREG + 11*SZXREG
> + xvst xr12, sp, 13*SZREG + 12*SZXREG
> + xvst xr13, sp, 13*SZREG + 13*SZXREG
> + xvst xr14, sp, 13*SZREG + 14*SZXREG
> + xvst xr15, sp, 13*SZREG + 15*SZXREG
> + xvst xr16, sp, 13*SZREG + 16*SZXREG
> + xvst xr17, sp, 13*SZREG + 17*SZXREG
> + xvst xr18, sp, 13*SZREG + 18*SZXREG
> + xvst xr19, sp, 13*SZREG + 19*SZXREG
> + xvst xr20, sp, 13*SZREG + 20*SZXREG
> + xvst xr21, sp, 13*SZREG + 21*SZXREG
> + xvst xr22, sp, 13*SZREG + 22*SZXREG
> + xvst xr23, sp, 13*SZREG + 23*SZXREG
> + xvst xr24, sp, 13*SZREG + 24*SZXREG
> + xvst xr25, sp, 13*SZREG + 25*SZXREG
> + xvst xr26, sp, 13*SZREG + 26*SZXREG
> + xvst xr27, sp, 13*SZREG + 27*SZXREG
> + xvst xr28, sp, 13*SZREG + 28*SZXREG
> + xvst xr29, sp, 13*SZREG + 29*SZXREG
> + xvst xr30, sp, 13*SZREG + 30*SZXREG
> + xvst xr31, sp, 13*SZREG + 31*SZXREG
> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> + # some fields in fcsr0
> + movfcsr2gr t0, fcsr0
> + REG_S t0, sp, 32*SZXREG
> +#elif defined USE_LSX
> + vst vr0, sp, 13*SZREG + 0*SZVREG
> + vst vr1, sp, 13*SZREG + 1*SZVREG
> + vst vr2, sp, 13*SZREG + 2*SZVREG
> + vst vr3, sp, 13*SZREG + 3*SZVREG
> + vst vr4, sp, 13*SZREG + 4*SZVREG
> + vst vr5, sp, 13*SZREG + 5*SZVREG
> + vst vr6, sp, 13*SZREG + 6*SZVREG
> + vst vr7, sp, 13*SZREG + 7*SZVREG
> + vst vr8, sp, 13*SZREG + 8*SZVREG
> + vst vr9, sp, 13*SZREG + 9*SZVREG
> + vst vr10, sp, 13*SZREG + 10*SZVREG
> + vst vr11, sp, 13*SZREG + 11*SZVREG
> + vst vr12, sp, 13*SZREG + 12*SZVREG
> + vst vr13, sp, 13*SZREG + 13*SZVREG
> + vst vr14, sp, 13*SZREG + 14*SZVREG
> + vst vr15, sp, 13*SZREG + 15*SZVREG
> + vst vr16, sp, 13*SZREG + 16*SZVREG
> + vst vr17, sp, 13*SZREG + 17*SZVREG
> + vst vr18, sp, 13*SZREG + 18*SZVREG
> + vst vr19, sp, 13*SZREG + 19*SZVREG
> + vst vr20, sp, 13*SZREG + 20*SZVREG
> + vst vr21, sp, 13*SZREG + 21*SZVREG
> + vst vr22, sp, 13*SZREG + 22*SZVREG
> + vst vr23, sp, 13*SZREG + 23*SZVREG
> + vst vr24, sp, 13*SZREG + 24*SZVREG
> + vst vr25, sp, 13*SZREG + 25*SZVREG
> + vst vr26, sp, 13*SZREG + 26*SZVREG
> + vst vr27, sp, 13*SZREG + 27*SZVREG
> + vst vr28, sp, 13*SZREG + 28*SZVREG
> + vst vr29, sp, 13*SZREG + 29*SZVREG
> + vst vr30, sp, 13*SZREG + 30*SZVREG
> + vst vr31, sp, 13*SZREG + 31*SZVREG
> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> + # some fields in fcsr0
> + movfcsr2gr t0, fcsr0
> + REG_S t0, sp, 32*SZVREG
> +#elif !defined __loongarch_soft_float
> + FREG_S fa0, sp, 13*SZREG + 0*SZFREG
> + FREG_S fa1, sp, 13*SZREG + 1*SZFREG
> + FREG_S fa2, sp, 13*SZREG + 2*SZFREG
> + FREG_S fa3, sp, 13*SZREG + 3*SZFREG
> + FREG_S fa4, sp, 13*SZREG + 4*SZFREG
> + FREG_S fa5, sp, 13*SZREG + 5*SZFREG
> + FREG_S fa6, sp, 13*SZREG + 6*SZFREG
> + FREG_S fa7, sp, 13*SZREG + 7*SZFREG
> + FREG_S ft0, sp, 13*SZREG + 8*SZFREG
> + FREG_S ft1, sp, 13*SZREG + 9*SZFREG
> + FREG_S ft2, sp, 13*SZREG + 10*SZFREG
> + FREG_S ft3, sp, 13*SZREG + 11*SZFREG
> + FREG_S ft4, sp, 13*SZREG + 12*SZFREG
> + FREG_S ft5, sp, 13*SZREG + 13*SZFREG
> + FREG_S ft6, sp, 13*SZREG + 14*SZFREG
> + FREG_S ft7, sp, 13*SZREG + 15*SZFREG
> + FREG_S ft8, sp, 13*SZREG + 16*SZFREG
> + FREG_S ft9, sp, 13*SZREG + 17*SZFREG
> + FREG_S ft10, sp, 13*SZREG + 18*SZFREG
> + FREG_S ft11, sp, 13*SZREG + 19*SZFREG
> + FREG_S ft12, sp, 13*SZREG + 20*SZFREG
> + FREG_S ft13, sp, 13*SZREG + 21*SZFREG
> + FREG_S ft14, sp, 13*SZREG + 22*SZFREG
> + FREG_S ft15, sp, 13*SZREG + 23*SZFREG
> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> + # some fields in fcsr0
> + movfcsr2gr t0, fcsr0
> + REG_S t0, sp, 24*SZFREG
> +#endif /* #ifdef USE_LASX */
> +
> + bl __tls_get_addr
> + ADDI a0, a0, -TLS_DTV_OFFSET
> +
> + REG_L ra, sp, 0
> + REG_L a1, sp, 1 * 8
> + REG_L a2, sp, 2 * 8
> + REG_L a3, sp, 3 * 8
> + REG_L a4, sp, 4 * 8
> + REG_L a5, sp, 5 * 8
> + REG_L a6, sp, 6 * 8
> + REG_L a7, sp, 7 * 8
> + REG_L t4, sp, 8 * 8
> + REG_L t5, sp, 9 * 8
> + REG_L t6, sp, 10 * 8
> + REG_L t7, sp, 11 * 8
> + REG_L t8, sp, 12 * 8
> +
> +#ifdef USE_LASX
> + xvld xr0, sp, 13*SZREG + 0*SZXREG
> + xvld xr1, sp, 13*SZREG + 1*SZXREG
> + xvld xr2, sp, 13*SZREG + 2*SZXREG
> + xvld xr3, sp, 13*SZREG + 3*SZXREG
> + xvld xr4, sp, 13*SZREG + 4*SZXREG
> + xvld xr5, sp, 13*SZREG + 5*SZXREG
> + xvld xr6, sp, 13*SZREG + 6*SZXREG
> + xvld xr7, sp, 13*SZREG + 7*SZXREG
> + xvld xr8, sp, 13*SZREG + 8*SZXREG
> + xvld xr9, sp, 13*SZREG + 9*SZXREG
> + xvld xr10, sp, 13*SZREG + 10*SZXREG
> + xvld xr11, sp, 13*SZREG + 11*SZXREG
> + xvld xr12, sp, 13*SZREG + 12*SZXREG
> + xvld xr13, sp, 13*SZREG + 13*SZXREG
> + xvld xr14, sp, 13*SZREG + 14*SZXREG
> + xvld xr15, sp, 13*SZREG + 15*SZXREG
> + xvld xr16, sp, 13*SZREG + 16*SZXREG
> + xvld xr17, sp, 13*SZREG + 17*SZXREG
> + xvld xr18, sp, 13*SZREG + 18*SZXREG
> + xvld xr19, sp, 13*SZREG + 19*SZXREG
> + xvld xr20, sp, 13*SZREG + 20*SZXREG
> + xvld xr21, sp, 13*SZREG + 21*SZXREG
> + xvld xr22, sp, 13*SZREG + 22*SZXREG
> + xvld xr23, sp, 13*SZREG + 23*SZXREG
> + xvld xr24, sp, 13*SZREG + 24*SZXREG
> + xvld xr25, sp, 13*SZREG + 25*SZXREG
> + xvld xr26, sp, 13*SZREG + 26*SZXREG
> + xvld xr27, sp, 13*SZREG + 27*SZXREG
> + xvld xr28, sp, 13*SZREG + 28*SZXREG
> + xvld xr29, sp, 13*SZREG + 29*SZXREG
> + xvld xr30, sp, 13*SZREG + 30*SZXREG
> + xvld xr31, sp, 13*SZREG + 31*SZXREG
> + REG_L t0, sp, 32*SZXREG
> + movgr2fcsr fcsr0, t0
> +#elif defined USE_LSX
> + vld vr0, sp, 13*SZREG + 0*SZVREG
> + vld vr1, sp, 13*SZREG + 1*SZVREG
> + vld vr2, sp, 13*SZREG + 2*SZVREG
> + vld vr3, sp, 13*SZREG + 3*SZVREG
> + vld vr4, sp, 13*SZREG + 4*SZVREG
> + vld vr5, sp, 13*SZREG + 5*SZVREG
> + vld vr6, sp, 13*SZREG + 6*SZVREG
> + vld vr7, sp, 13*SZREG + 7*SZVREG
> + vld vr8, sp, 13*SZREG + 8*SZVREG
> + vld vr9, sp, 13*SZREG + 9*SZVREG
> + vld vr10, sp, 13*SZREG + 10*SZVREG
> + vld vr11, sp, 13*SZREG + 11*SZVREG
> + vld vr12, sp, 13*SZREG + 12*SZVREG
> + vld vr13, sp, 13*SZREG + 13*SZVREG
> + vld vr14, sp, 13*SZREG + 14*SZVREG
> + vld vr15, sp, 13*SZREG + 15*SZVREG
> + vld vr16, sp, 13*SZREG + 16*SZVREG
> + vld vr17, sp, 13*SZREG + 17*SZVREG
> + vld vr18, sp, 13*SZREG + 18*SZVREG
> + vld vr19, sp, 13*SZREG + 19*SZVREG
> + vld vr20, sp, 13*SZREG + 20*SZVREG
> + vld vr21, sp, 13*SZREG + 21*SZVREG
> + vld vr22, sp, 13*SZREG + 22*SZVREG
> + vld vr23, sp, 13*SZREG + 23*SZVREG
> + vld vr24, sp, 13*SZREG + 24*SZVREG
> + vld vr25, sp, 13*SZREG + 25*SZVREG
> + vld vr26, sp, 13*SZREG + 26*SZVREG
> + vld vr27, sp, 13*SZREG + 27*SZVREG
> + vld vr28, sp, 13*SZREG + 28*SZVREG
> + vld vr29, sp, 13*SZREG + 29*SZVREG
> + vld vr30, sp, 13*SZREG + 30*SZVREG
> + vld vr31, sp, 13*SZREG + 31*SZVREG
> + REG_L t0, sp, 32*SZVREG
> + movgr2fcsr fcsr0, t0
> +#elif !defined __loongarch_soft_float
> + FREG_L fa0, sp, 13*SZREG + 0*SZFREG
> + FREG_L fa1, sp, 13*SZREG + 1*SZFREG
> + FREG_L fa2, sp, 13*SZREG + 2*SZFREG
> + FREG_L fa3, sp, 13*SZREG + 3*SZFREG
> + FREG_L fa4, sp, 13*SZREG + 4*SZFREG
> + FREG_L fa5, sp, 13*SZREG + 5*SZFREG
> + FREG_L fa6, sp, 13*SZREG + 6*SZFREG
> + FREG_L fa7, sp, 13*SZREG + 7*SZFREG
> + FREG_L ft0, sp, 13*SZREG + 8*SZFREG
> + FREG_L ft1, sp, 13*SZREG + 9*SZFREG
> + FREG_L ft2, sp, 13*SZREG + 10*SZFREG
> + FREG_L ft3, sp, 13*SZREG + 11*SZFREG
> + FREG_L ft4, sp, 13*SZREG + 12*SZFREG
> + FREG_L ft5, sp, 13*SZREG + 13*SZFREG
> + FREG_L ft6, sp, 13*SZREG + 14*SZFREG
> + FREG_L ft7, sp, 13*SZREG + 15*SZFREG
> + FREG_L ft8, sp, 13*SZREG + 16*SZFREG
> + FREG_L ft9, sp, 13*SZREG + 17*SZFREG
> + FREG_L ft10, sp, 13*SZREG + 18*SZFREG
> + FREG_L ft11, sp, 13*SZREG + 19*SZFREG
> + FREG_L ft12, sp, 13*SZREG + 20*SZFREG
> + FREG_L ft13, sp, 13*SZREG + 21*SZFREG
> + FREG_L ft14, sp, 13*SZREG + 22*SZFREG
> + FREG_L ft15, sp, 13*SZREG + 23*SZFREG
> + REG_L t0, sp, 24*SZFREG
> + movgr2fcsr fcsr0, t0
> +#endif /* #ifdef USE_LASX */
> +
> + ADDI sp, sp, FRAME_SIZE
> + b Lret
> + cfi_endproc
> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
> +#endif /* #ifdef SHARED */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
> new file mode 100644
> index 0000000000..4a17079169
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc.S
> @@ -0,0 +1,93 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> + LoongArch version.
> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
Update Copyright years to 2024.
> +
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdep.h>
> +#include <tls.h>
> +#include "tlsdesc.h"
> +
> + .text
> +
> + /* Compute the thread pointer offset for symbols in the static
> + TLS block. The offset is the same for all threads.
> + Prototype:
> + _dl_tlsdesc_return (tlsdesc *); */
> + .hidden _dl_tlsdesc_return
> + .global _dl_tlsdesc_return
> + .type _dl_tlsdesc_return,%function
> + cfi_startproc
> + .align 2
> +_dl_tlsdesc_return:
> + REG_L a0, a0, 8
> + RET
> + cfi_endproc
> + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
> +
> + /* Handler for undefined weak TLS symbols.
> + Prototype:
> + _dl_tlsdesc_undefweak (tlsdesc *);
> +
> + The second word of the descriptor contains the addend.
> + Return the addend minus the thread pointer. This ensures
> + that when the caller adds on the thread pointer it gets back
> + the addend. */
> + .hidden _dl_tlsdesc_undefweak
> + .global _dl_tlsdesc_undefweak
> + .type _dl_tlsdesc_undefweak,%function
> + cfi_startproc
> + .align 2
> +_dl_tlsdesc_undefweak:
> + REG_L a0, a0, 8
> + sub.d a0, a0, tp
> + RET
> + cfi_endproc
> + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
> +
> +
> +#ifdef SHARED
> +
> +#if !defined __loongarch_soft_float
> +
> +#define USE_LASX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
> +#define Lret Lret_lasx
> +#define Lslow Lslow_lasx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LASX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#define USE_LSX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
> +#define Lret Lret_lsx
> +#define Lslow Lslow_lsx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LSX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#endif
> +
> +#include "dl-tlsdesc-dynamic.h"
> +
> +#endif /* #ifdef SHARED */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
> new file mode 100644
> index 0000000000..988037a714
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc.h
> @@ -0,0 +1,53 @@
> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
> + LoongArch version.
> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#ifndef _DL_TLSDESC_H
> +#define _DL_TLSDESC_H
> +
> +#include <dl-tls.h>
> +
> +/* Type used to represent a TLS descriptor in the GOT. */
> +struct tlsdesc
> +{
> + ptrdiff_t (*entry) (struct tlsdesc *);
> + void *arg;
> +};
> +
> +/* Type used as the argument in a TLS descriptor for a symbol that
> + needs dynamic TLS offsets. */
> +struct tlsdesc_dynamic_arg
> +{
> + tls_index tlsinfo;
> + size_t gen_count;
> +};
> +
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
> +
> +# ifdef SHARED
> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
> +#if !defined __loongarch_soft_float
Minor style, usually for single tests we use '#ifndef' and add
attribute_hidden at the end of prototype.
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
> +#endif
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
> +#endif
> +
> +#endif
> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
> index 4d8737ee7f..9b1773634c 100644
> --- a/sysdeps/loongarch/linkmap.h
> +++ b/sysdeps/loongarch/linkmap.h
> @@ -19,4 +19,5 @@
> struct link_map_machine
> {
> ElfW (Addr) plt; /* Address of .plt. */
> + void *tlsdesc_table; /* Address of TLS descriptor hash table. */
> };
> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
> index 51521a7eb4..23c1d12914 100644
> --- a/sysdeps/loongarch/sys/asm.h
> +++ b/sysdeps/loongarch/sys/asm.h
> @@ -25,6 +25,7 @@
> /* Macros to handle different pointer/register sizes for 32/64-bit code. */
> #define SZREG 8
> #define SZFREG 8
> +#define SZFCSREG 4
> #define SZVREG 16
> #define SZXREG 32
> #define REG_L ld.d
> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
> index f61ee25b25..80ce3e9c00 100644
> --- a/sysdeps/loongarch/sys/regdef.h
> +++ b/sysdeps/loongarch/sys/regdef.h
> @@ -97,6 +97,7 @@
> #define fcc5 $fcc5
> #define fcc6 $fcc6
> #define fcc7 $fcc7
> +#define fcsr0 $fcsr0
>
> #define vr0 $vr0
> #define vr1 $vr1
> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
> new file mode 100644
> index 0000000000..a357e7619f
> --- /dev/null
> +++ b/sysdeps/loongarch/tlsdesc.c
> @@ -0,0 +1,39 @@
> +/* Manage TLS descriptors. AArch64 version.
> +
> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
Update Copyright years to 2024 and remove the 'AArch64'.
> +
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <ldsodefs.h>
> +#include <tls.h>
> +#include <dl-tlsdesc.h>
> +#include <dl-unmap-segments.h>
> +#include <tlsdeschtab.h>
> +
> +/* Unmap the dynamic object, but also release its TLS descriptor table
> + if there is one. */
> +
> +void
> +_dl_unmap (struct link_map *map)
> +{
> + _dl_unmap_segments (map);
> +
> +#ifdef SHARED
> + if (map->l_mach.tlsdesc_table)
> + htab_delete (map->l_mach.tlsdesc_table);
> +#endif
> +}
> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
> new file mode 100644
> index 0000000000..bcab218631
> --- /dev/null
> +++ b/sysdeps/loongarch/tlsdesc.sym
> @@ -0,0 +1,19 @@
> +#include <stddef.h>
> +#include <sysdep.h>
> +#include <tls.h>
> +#include <link.h>
> +#include <dl-tlsdesc.h>
> +
> +--
> +
> +-- Abuse tls.h macros to derive offsets relative to the thread register.
> +
> +TLSDESC_ARG offsetof(struct tlsdesc, arg)
> +TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count)
> +TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
> +TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
> +TCBHEAD_DTV offsetof(tcbhead_t, dtv)
> +DTV_COUNTER offsetof(dtv_t, counter)
> +TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED
> +TLS_DTV_OFFSET TLS_DTV_OFFSET
> +SIZE_OF_DTV sizeof(tcbhead_t)
> diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data
> index 547b1c1b7f..ec32e6d13f 100644
> --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data
> +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data
> @@ -5,3 +5,5 @@ libc.so: calloc
> libc.so: free
> libc.so: malloc
> libc.so: realloc
> +# The dynamic loader needs __tls_get_addr for TLS.
> +ld.so: __tls_get_addr
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] LoongArch: Add support for TLS Descriptors
2024-03-04 15:42 ` H.J. Lu
@ 2024-03-08 7:45 ` mengqinggang
0 siblings, 0 replies; 8+ messages in thread
From: mengqinggang @ 2024-03-08 7:45 UTC (permalink / raw)
To: H.J. Lu
Cc: libc-alpha, adhemerval.zanella, xuchenghua, caiyinyu, chenglulu,
cailulu, xry111, i.swmail, maskray, luweining, wanglei,
hejinyang
[-- Attachment #1: Type: text/plain, Size: 30097 bytes --]
Thanks a lot for the review! A new v3 version patch has been sent.
https://sourceware.org/pipermail/libc-alpha/2024-March/155204.html
在 2024/3/4 下午11:42, H.J. Lu 写道:
> On Wed, Feb 28, 2024 at 5:44 PM mengqinggang <mengqinggang@loongson.cn> wrote:
>> This is mostly based on AArch64 and RISC-V implementation.
>>
>> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>>
>> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
>> all vector registers.
>> ---
>> Changes v1 -> v2:
>> - Fix vr24-vr31, xr24-xr31 typo.
>> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
>> - Save and restore fcsr0 in _dl_tlsdesc_dynamic.
>>
>> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
>>
>> elf/elf.h | 2 +
>> sysdeps/loongarch/Makefile | 6 +
>> sysdeps/loongarch/dl-link.sym | 1 +
>> sysdeps/loongarch/dl-machine.h | 60 ++-
>> sysdeps/loongarch/dl-tls.h | 9 +-
>> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 341 ++++++++++++++++++
>> sysdeps/loongarch/dl-tlsdesc.S | 93 +++++
>> sysdeps/loongarch/dl-tlsdesc.h | 53 +++
>> sysdeps/loongarch/linkmap.h | 1 +
>> sysdeps/loongarch/sys/asm.h | 1 +
>> sysdeps/loongarch/sys/regdef.h | 1 +
>> sysdeps/loongarch/tlsdesc.c | 39 ++
>> sysdeps/loongarch/tlsdesc.sym | 19 +
>> .../unix/sysv/linux/loongarch/localplt.data | 2 +
>> 14 files changed, 625 insertions(+), 3 deletions(-)
>> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>> create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
>> create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
>> create mode 100644 sysdeps/loongarch/tlsdesc.c
>> create mode 100644 sysdeps/loongarch/tlsdesc.sym
>>
>> diff --git a/elf/elf.h b/elf/elf.h
>> index f2206e5c06..eec24ea049 100644
>> --- a/elf/elf.h
>> +++ b/elf/elf.h
>> @@ -4237,6 +4237,8 @@ enum
>> #define R_LARCH_TLS_TPREL32 10
>> #define R_LARCH_TLS_TPREL64 11
>> #define R_LARCH_IRELATIVE 12
>> +#define R_LARCH_TLS_DESC32 13
>> +#define R_LARCH_TLS_DESC64 14
>>
>> /* Reserved for future relocs that the dynamic linker must understand. */
>>
>> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
>> index 43d2f583cd..181389e787 100644
>> --- a/sysdeps/loongarch/Makefile
>> +++ b/sysdeps/loongarch/Makefile
>> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
>> endif
>>
>> ifeq ($(subdir),elf)
>> +sysdep-dl-routines += tlsdesc dl-tlsdesc
>> gen-as-const-headers += dl-link.sym
>> endif
>>
>> +ifeq ($(subdir),csu)
>> +gen-as-const-headers += tlsdesc.sym
>> +endif
>> +
>> +
>> # LoongArch's assembler also needs to know about PIC as it changes the
>> # definition of some assembler macros.
>> ASFLAGS-.os += $(pic-ccflag)
>> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
>> index b534968e30..fd81ef37d5 100644
>> --- a/sysdeps/loongarch/dl-link.sym
>> +++ b/sysdeps/loongarch/dl-link.sym
>> @@ -1,6 +1,7 @@
>> #include <stddef.h>
>> #include <sysdep.h>
>> #include <link.h>
>> +#include <dl-tlsdesc.h>
>>
>> DL_SIZEOF_RG sizeof(struct La_loongarch_regs)
>> DL_SIZEOF_RV sizeof(struct La_loongarch_retval)
>> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
>> index ab81b82d95..8ca6c224f6 100644
>> --- a/sysdeps/loongarch/dl-machine.h
>> +++ b/sysdeps/loongarch/dl-machine.h
>> @@ -25,7 +25,7 @@
>> #include <entry.h>
>> #include <elf/elf.h>
>> #include <sys/asm.h>
>> -#include <dl-tls.h>
>> +#include <dl-tlsdesc.h>
>> #include <dl-static-tls.h>
>> #include <dl-machine-rel.h>
>>
>> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
>> *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
>> break;
>>
>> + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
>> + {
>> + struct tlsdesc volatile *td =
>> + (struct tlsdesc volatile *)addr_field;
>> + if (! sym)
>> + {
>> + td->arg = (void*)reloc->r_addend;
>> + td->entry = _dl_tlsdesc_undefweak;
>> + }
>> + else
>> + {
>> +# ifndef SHARED
>> + CHECK_STATIC_TLS (map, sym_map);
>> +# else
>> + if (!TRY_STATIC_TLS (map, sym_map))
>> + {
>> + td->arg = _dl_make_tlsdesc_dynamic
>> + (sym_map, sym->st_value + reloc->r_addend);
>> +# if !defined __loongarch_soft_float
>> + if (SUPPORT_LASX)
>> + td->entry = _dl_tlsdesc_dynamic_lasx;
>> + else
>> + if (SUPPORT_LSX)
>> + td->entry = _dl_tlsdesc_dynamic_lsx;
>> + else
>> +# endif
>> + td->entry = _dl_tlsdesc_dynamic;
>> + }
>> + else
>> +# endif
>> + {
>> + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
>> + + reloc->r_addend);
>> + td->entry = _dl_tlsdesc_return;
>> + }
>> + }
>> + break;
>> + }
>> +
>> case R_LARCH_COPY:
>> {
>> if (sym == NULL)
>> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>> else
>> *reloc_addr = map->l_mach.plt;
>> }
>> + else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
>> + {
>> + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
>> + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
>> + const ElfW (Sym) *sym = &symtab[symndx];
>> + const struct r_found_version *version = NULL;
>> +
>> + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
>> + {
>> + const ElfW (Half) *vernum =
>> + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
>> + version = &map->l_versions[vernum[symndx] & 0x7fff];
>> + }
>> +
>> + /* Always initialize TLS descriptors completely, because lazy
>> + initialization requires synchronization at every TLS access. */
>> + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
>> + skip_ifunc);
>> + }
>> else
>> _dl_reloc_bad_type (map, r_type, 1);
>> }
>> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
>> index 29924b866d..de593c002d 100644
>> --- a/sysdeps/loongarch/dl-tls.h
>> +++ b/sysdeps/loongarch/dl-tls.h
>> @@ -16,6 +16,9 @@
>> License along with the GNU C Library. If not, see
>> <https://www.gnu.org/licenses/>. */
>>
>> +#ifndef _DL_TLS_H
>> +#define _DL_TLS_H
>> +
>> /* Type used for the representation of TLS information in the GOT. */
>> typedef struct
>> {
>> @@ -23,6 +26,8 @@ typedef struct
>> unsigned long int ti_offset;
>> } tls_index;
>>
>> +extern void *__tls_get_addr (tls_index *ti);
>> +
>> /* The thread pointer points to the first static TLS block. */
>> #define TLS_TP_OFFSET 0
>>
>> @@ -37,10 +42,10 @@ typedef struct
>> /* Compute the value for a DTPREL reloc. */
>> #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>>
>> -extern void *__tls_get_addr (tls_index *ti);
>> -
>> #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
>> #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>>
>> /* Value used for dtv entries for which the allocation is delayed. */
>> #define TLS_DTV_UNALLOCATED ((void *) -1l)
>> +
>> +#endif
>> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>> new file mode 100644
>> index 0000000000..0d8c9bb991
>> --- /dev/null
>> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>> @@ -0,0 +1,341 @@
>> +/* Thread-local storage handling in the ELF dynamic linker.
>> + LoongArch version.
>> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> +
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +#ifdef USE_LASX
>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
>> +#elif defined USE_LSX
>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
>> +#elif !defined __loongarch_soft_float
>> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
>> +#else
>> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
>> +#endif
>> +
>> +#ifdef SHARED
>> + /* Handler for dynamic TLS symbols.
>> + Prototype:
>> + _dl_tlsdesc_dynamic (tlsdesc *) ;
>> +
>> + The second word of the descriptor points to a
>> + tlsdesc_dynamic_arg structure.
>> +
>> + Returns the offset between the thread pointer and the
>> + object referenced by the argument.
>> +
>> + ptrdiff_t
>> + __attribute__ ((__regparm__ (1)))
>> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
>> + {
>> + struct tlsdesc_dynamic_arg *td = tdp->arg;
>> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
>> + if (__builtin_expect (td->gen_count <= dtv[0].counter
>> + && (dtv[td->tlsinfo.ti_module].pointer.val
>> + != TLS_DTV_UNALLOCATED),
>> + 1))
>> + return dtv[td->tlsinfo.ti_module].pointer.val
>> + + td->tlsinfo.ti_offset
>> + - __thread_pointer;
>> +
>> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
>> + }
>> + */
>> + .hidden _dl_tlsdesc_dynamic
>> + .global _dl_tlsdesc_dynamic
>> + .type _dl_tlsdesc_dynamic,%function
>> + cfi_startproc
>> + .align 2
>> +_dl_tlsdesc_dynamic:
>> + /* Save just enough registers to support fast path, if we fall
>> + into slow path we will save additional registers. */
>> + ADDI sp, sp,-24
>> + REG_S t0, sp, 0
>> + REG_S t1, sp, 8
>> + REG_S t2, sp, 16
>> +
>> + REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start
>> + REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg
>> + REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
>> + REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter
>> + bltu t2, t1, Lslow
>> +
>> + REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module
>> + slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
>> + add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t)
>> + REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
>> + li.d t2, TLS_DTV_UNALLOCATED
>> + beq t1, t2, Lslow
>> + REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
>> + # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
>> + add.d a0, t1, t2
>> +Lret:
>> + sub.d a0, a0, tp
>> + REG_L t0, sp, 0
>> + REG_L t1, sp, 8
>> + REG_L t2, sp, 16
>> + ADDI sp, sp, 24
>> + RET
>> +
>> +Lslow:
>> + /* This is the slow path. We need to call __tls_get_addr() which
>> + means we need to save and restore all the register that the
>> + callee will trash. */
>> +
>> + /* Save the remaining registers that we must treat as caller save. */
>> + ADDI sp, sp, -FRAME_SIZE
>> + REG_S ra, sp, 0 * SZREG
>> + REG_S a1, sp, 1 * SZREG
>> + REG_S a2, sp, 2 * SZREG
>> + REG_S a3, sp, 3 * SZREG
>> + REG_S a4, sp, 4 * SZREG
>> + REG_S a5, sp, 5 * SZREG
>> + REG_S a6, sp, 6 * SZREG
>> + REG_S a7, sp, 7 * SZREG
>> + REG_S t4, sp, 8 * SZREG
>> + REG_S t5, sp, 9 * SZREG
>> + REG_S t6, sp, 10 * SZREG
>> + REG_S t7, sp, 11 * SZREG
>> + REG_S t8, sp, 12 * SZREG
>> +
>> +#ifdef USE_LASX
>> + xvst xr0, sp, 13*SZREG + 0*SZXREG
>> + xvst xr1, sp, 13*SZREG + 1*SZXREG
>> + xvst xr2, sp, 13*SZREG + 2*SZXREG
>> + xvst xr3, sp, 13*SZREG + 3*SZXREG
>> + xvst xr4, sp, 13*SZREG + 4*SZXREG
>> + xvst xr5, sp, 13*SZREG + 5*SZXREG
>> + xvst xr6, sp, 13*SZREG + 6*SZXREG
>> + xvst xr7, sp, 13*SZREG + 7*SZXREG
>> + xvst xr8, sp, 13*SZREG + 8*SZXREG
>> + xvst xr9, sp, 13*SZREG + 9*SZXREG
>> + xvst xr10, sp, 13*SZREG + 10*SZXREG
>> + xvst xr11, sp, 13*SZREG + 11*SZXREG
>> + xvst xr12, sp, 13*SZREG + 12*SZXREG
>> + xvst xr13, sp, 13*SZREG + 13*SZXREG
>> + xvst xr14, sp, 13*SZREG + 14*SZXREG
>> + xvst xr15, sp, 13*SZREG + 15*SZXREG
>> + xvst xr16, sp, 13*SZREG + 16*SZXREG
>> + xvst xr17, sp, 13*SZREG + 17*SZXREG
>> + xvst xr18, sp, 13*SZREG + 18*SZXREG
>> + xvst xr19, sp, 13*SZREG + 19*SZXREG
>> + xvst xr20, sp, 13*SZREG + 20*SZXREG
>> + xvst xr21, sp, 13*SZREG + 21*SZXREG
>> + xvst xr22, sp, 13*SZREG + 22*SZXREG
>> + xvst xr23, sp, 13*SZREG + 23*SZXREG
>> + xvst xr24, sp, 13*SZREG + 24*SZXREG
>> + xvst xr25, sp, 13*SZREG + 25*SZXREG
>> + xvst xr26, sp, 13*SZREG + 26*SZXREG
>> + xvst xr27, sp, 13*SZREG + 27*SZXREG
>> + xvst xr28, sp, 13*SZREG + 28*SZXREG
>> + xvst xr29, sp, 13*SZREG + 29*SZXREG
>> + xvst xr30, sp, 13*SZREG + 30*SZXREG
>> + xvst xr31, sp, 13*SZREG + 31*SZXREG
>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>> + # some fields in fcsr0
>> + movfcsr2gr t0, fcsr0
>> + REG_S t0, sp, 32*SZXREG
>> +#elif defined USE_LSX
>> + vst vr0, sp, 13*SZREG + 0*SZVREG
>> + vst vr1, sp, 13*SZREG + 1*SZVREG
>> + vst vr2, sp, 13*SZREG + 2*SZVREG
>> + vst vr3, sp, 13*SZREG + 3*SZVREG
>> + vst vr4, sp, 13*SZREG + 4*SZVREG
>> + vst vr5, sp, 13*SZREG + 5*SZVREG
>> + vst vr6, sp, 13*SZREG + 6*SZVREG
>> + vst vr7, sp, 13*SZREG + 7*SZVREG
>> + vst vr8, sp, 13*SZREG + 8*SZVREG
>> + vst vr9, sp, 13*SZREG + 9*SZVREG
>> + vst vr10, sp, 13*SZREG + 10*SZVREG
>> + vst vr11, sp, 13*SZREG + 11*SZVREG
>> + vst vr12, sp, 13*SZREG + 12*SZVREG
>> + vst vr13, sp, 13*SZREG + 13*SZVREG
>> + vst vr14, sp, 13*SZREG + 14*SZVREG
>> + vst vr15, sp, 13*SZREG + 15*SZVREG
>> + vst vr16, sp, 13*SZREG + 16*SZVREG
>> + vst vr17, sp, 13*SZREG + 17*SZVREG
>> + vst vr18, sp, 13*SZREG + 18*SZVREG
>> + vst vr19, sp, 13*SZREG + 19*SZVREG
>> + vst vr20, sp, 13*SZREG + 20*SZVREG
>> + vst vr21, sp, 13*SZREG + 21*SZVREG
>> + vst vr22, sp, 13*SZREG + 22*SZVREG
>> + vst vr23, sp, 13*SZREG + 23*SZVREG
>> + vst vr24, sp, 13*SZREG + 24*SZVREG
>> + vst vr25, sp, 13*SZREG + 25*SZVREG
>> + vst vr26, sp, 13*SZREG + 26*SZVREG
>> + vst vr27, sp, 13*SZREG + 27*SZVREG
>> + vst vr28, sp, 13*SZREG + 28*SZVREG
>> + vst vr29, sp, 13*SZREG + 29*SZVREG
>> + vst vr30, sp, 13*SZREG + 30*SZVREG
>> + vst vr31, sp, 13*SZREG + 31*SZVREG
>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>> + # some fields in fcsr0
>> + movfcsr2gr t0, fcsr0
>> + REG_S t0, sp, 32*SZVREG
>> +#elif !defined __loongarch_soft_float
>> + FREG_S fa0, sp, 13*SZREG + 0*SZFREG
>> + FREG_S fa1, sp, 13*SZREG + 1*SZFREG
>> + FREG_S fa2, sp, 13*SZREG + 2*SZFREG
>> + FREG_S fa3, sp, 13*SZREG + 3*SZFREG
>> + FREG_S fa4, sp, 13*SZREG + 4*SZFREG
>> + FREG_S fa5, sp, 13*SZREG + 5*SZFREG
>> + FREG_S fa6, sp, 13*SZREG + 6*SZFREG
>> + FREG_S fa7, sp, 13*SZREG + 7*SZFREG
>> + FREG_S ft0, sp, 13*SZREG + 8*SZFREG
>> + FREG_S ft1, sp, 13*SZREG + 9*SZFREG
>> + FREG_S ft2, sp, 13*SZREG + 10*SZFREG
>> + FREG_S ft3, sp, 13*SZREG + 11*SZFREG
>> + FREG_S ft4, sp, 13*SZREG + 12*SZFREG
>> + FREG_S ft5, sp, 13*SZREG + 13*SZFREG
>> + FREG_S ft6, sp, 13*SZREG + 14*SZFREG
>> + FREG_S ft7, sp, 13*SZREG + 15*SZFREG
>> + FREG_S ft8, sp, 13*SZREG + 16*SZFREG
>> + FREG_S ft9, sp, 13*SZREG + 17*SZFREG
>> + FREG_S ft10, sp, 13*SZREG + 18*SZFREG
>> + FREG_S ft11, sp, 13*SZREG + 19*SZFREG
>> + FREG_S ft12, sp, 13*SZREG + 20*SZFREG
>> + FREG_S ft13, sp, 13*SZREG + 21*SZFREG
>> + FREG_S ft14, sp, 13*SZREG + 22*SZFREG
>> + FREG_S ft15, sp, 13*SZREG + 23*SZFREG
>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>> + # some fields in fcsr0
>> + movfcsr2gr t0, fcsr0
>> + REG_S t0, sp, 24*SZFREG
>> +#endif /* #ifdef USE_LASX */
>> +
>> + bl __tls_get_addr
>> + ADDI a0, a0, -TLS_DTV_OFFSET
>> +
>> + REG_L ra, sp, 0
>> + REG_L a1, sp, 1 * 8
>> + REG_L a2, sp, 2 * 8
>> + REG_L a3, sp, 3 * 8
>> + REG_L a4, sp, 4 * 8
>> + REG_L a5, sp, 5 * 8
>> + REG_L a6, sp, 6 * 8
>> + REG_L a7, sp, 7 * 8
>> + REG_L t4, sp, 8 * 8
>> + REG_L t5, sp, 9 * 8
>> + REG_L t6, sp, 10 * 8
>> + REG_L t7, sp, 11 * 8
>> + REG_L t8, sp, 12 * 8
>> +
>> +#ifdef USE_LASX
>> + xvld xr0, sp, 13*SZREG + 0*SZXREG
>> + xvld xr1, sp, 13*SZREG + 1*SZXREG
>> + xvld xr2, sp, 13*SZREG + 2*SZXREG
>> + xvld xr3, sp, 13*SZREG + 3*SZXREG
>> + xvld xr4, sp, 13*SZREG + 4*SZXREG
>> + xvld xr5, sp, 13*SZREG + 5*SZXREG
>> + xvld xr6, sp, 13*SZREG + 6*SZXREG
>> + xvld xr7, sp, 13*SZREG + 7*SZXREG
>> + xvld xr8, sp, 13*SZREG + 8*SZXREG
>> + xvld xr9, sp, 13*SZREG + 9*SZXREG
>> + xvld xr10, sp, 13*SZREG + 10*SZXREG
>> + xvld xr11, sp, 13*SZREG + 11*SZXREG
>> + xvld xr12, sp, 13*SZREG + 12*SZXREG
>> + xvld xr13, sp, 13*SZREG + 13*SZXREG
>> + xvld xr14, sp, 13*SZREG + 14*SZXREG
>> + xvld xr15, sp, 13*SZREG + 15*SZXREG
>> + xvld xr16, sp, 13*SZREG + 16*SZXREG
>> + xvld xr17, sp, 13*SZREG + 17*SZXREG
>> + xvld xr18, sp, 13*SZREG + 18*SZXREG
>> + xvld xr19, sp, 13*SZREG + 19*SZXREG
>> + xvld xr20, sp, 13*SZREG + 20*SZXREG
>> + xvld xr21, sp, 13*SZREG + 21*SZXREG
>> + xvld xr22, sp, 13*SZREG + 22*SZXREG
>> + xvld xr23, sp, 13*SZREG + 23*SZXREG
>> + xvld xr24, sp, 13*SZREG + 24*SZXREG
>> + xvld xr25, sp, 13*SZREG + 25*SZXREG
>> + xvld xr26, sp, 13*SZREG + 26*SZXREG
>> + xvld xr27, sp, 13*SZREG + 27*SZXREG
>> + xvld xr28, sp, 13*SZREG + 28*SZXREG
>> + xvld xr29, sp, 13*SZREG + 29*SZXREG
>> + xvld xr30, sp, 13*SZREG + 30*SZXREG
>> + xvld xr31, sp, 13*SZREG + 31*SZXREG
>> + REG_L t0, sp, 32*SZXREG
>> + movgr2fcsr fcsr0, t0
>> +#elif defined USE_LSX
>> + vld vr0, sp, 13*SZREG + 0*SZVREG
>> + vld vr1, sp, 13*SZREG + 1*SZVREG
>> + vld vr2, sp, 13*SZREG + 2*SZVREG
>> + vld vr3, sp, 13*SZREG + 3*SZVREG
>> + vld vr4, sp, 13*SZREG + 4*SZVREG
>> + vld vr5, sp, 13*SZREG + 5*SZVREG
>> + vld vr6, sp, 13*SZREG + 6*SZVREG
>> + vld vr7, sp, 13*SZREG + 7*SZVREG
>> + vld vr8, sp, 13*SZREG + 8*SZVREG
>> + vld vr9, sp, 13*SZREG + 9*SZVREG
>> + vld vr10, sp, 13*SZREG + 10*SZVREG
>> + vld vr11, sp, 13*SZREG + 11*SZVREG
>> + vld vr12, sp, 13*SZREG + 12*SZVREG
>> + vld vr13, sp, 13*SZREG + 13*SZVREG
>> + vld vr14, sp, 13*SZREG + 14*SZVREG
>> + vld vr15, sp, 13*SZREG + 15*SZVREG
>> + vld vr16, sp, 13*SZREG + 16*SZVREG
>> + vld vr17, sp, 13*SZREG + 17*SZVREG
>> + vld vr18, sp, 13*SZREG + 18*SZVREG
>> + vld vr19, sp, 13*SZREG + 19*SZVREG
>> + vld vr20, sp, 13*SZREG + 20*SZVREG
>> + vld vr21, sp, 13*SZREG + 21*SZVREG
>> + vld vr22, sp, 13*SZREG + 22*SZVREG
>> + vld vr23, sp, 13*SZREG + 23*SZVREG
>> + vld vr24, sp, 13*SZREG + 24*SZVREG
>> + vld vr25, sp, 13*SZREG + 25*SZVREG
>> + vld vr26, sp, 13*SZREG + 26*SZVREG
>> + vld vr27, sp, 13*SZREG + 27*SZVREG
>> + vld vr28, sp, 13*SZREG + 28*SZVREG
>> + vld vr29, sp, 13*SZREG + 29*SZVREG
>> + vld vr30, sp, 13*SZREG + 30*SZVREG
>> + vld vr31, sp, 13*SZREG + 31*SZVREG
>> + REG_L t0, sp, 32*SZVREG
>> + movgr2fcsr fcsr0, t0
>> +#elif !defined __loongarch_soft_float
>> + FREG_L fa0, sp, 13*SZREG + 0*SZFREG
>> + FREG_L fa1, sp, 13*SZREG + 1*SZFREG
>> + FREG_L fa2, sp, 13*SZREG + 2*SZFREG
>> + FREG_L fa3, sp, 13*SZREG + 3*SZFREG
>> + FREG_L fa4, sp, 13*SZREG + 4*SZFREG
>> + FREG_L fa5, sp, 13*SZREG + 5*SZFREG
>> + FREG_L fa6, sp, 13*SZREG + 6*SZFREG
>> + FREG_L fa7, sp, 13*SZREG + 7*SZFREG
>> + FREG_L ft0, sp, 13*SZREG + 8*SZFREG
>> + FREG_L ft1, sp, 13*SZREG + 9*SZFREG
>> + FREG_L ft2, sp, 13*SZREG + 10*SZFREG
>> + FREG_L ft3, sp, 13*SZREG + 11*SZFREG
>> + FREG_L ft4, sp, 13*SZREG + 12*SZFREG
>> + FREG_L ft5, sp, 13*SZREG + 13*SZFREG
>> + FREG_L ft6, sp, 13*SZREG + 14*SZFREG
>> + FREG_L ft7, sp, 13*SZREG + 15*SZFREG
>> + FREG_L ft8, sp, 13*SZREG + 16*SZFREG
>> + FREG_L ft9, sp, 13*SZREG + 17*SZFREG
>> + FREG_L ft10, sp, 13*SZREG + 18*SZFREG
>> + FREG_L ft11, sp, 13*SZREG + 19*SZFREG
>> + FREG_L ft12, sp, 13*SZREG + 20*SZFREG
>> + FREG_L ft13, sp, 13*SZREG + 21*SZFREG
>> + FREG_L ft14, sp, 13*SZREG + 22*SZFREG
>> + FREG_L ft15, sp, 13*SZREG + 23*SZFREG
>> + REG_L t0, sp, 24*SZFREG
>> + movgr2fcsr fcsr0, t0
>> +#endif /* #ifdef USE_LASX */
>> +
>> + ADDI sp, sp, FRAME_SIZE
>> + b Lret
>> + cfi_endproc
>> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
>> +#endif /* #ifdef SHARED */
>> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
>> new file mode 100644
>> index 0000000000..4a17079169
>> --- /dev/null
>> +++ b/sysdeps/loongarch/dl-tlsdesc.S
>> @@ -0,0 +1,93 @@
>> +/* Thread-local storage handling in the ELF dynamic linker.
>> + LoongArch version.
>> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> +
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +#include <sysdep.h>
>> +#include <tls.h>
>> +#include "tlsdesc.h"
>> +
>> + .text
>> +
>> + /* Compute the thread pointer offset for symbols in the static
>> + TLS block. The offset is the same for all threads.
>> + Prototype:
>> + _dl_tlsdesc_return (tlsdesc *); */
>> + .hidden _dl_tlsdesc_return
>> + .global _dl_tlsdesc_return
>> + .type _dl_tlsdesc_return,%function
>> + cfi_startproc
>> + .align 2
>> +_dl_tlsdesc_return:
>> + REG_L a0, a0, 8
>> + RET
>> + cfi_endproc
>> + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
>> +
>> + /* Handler for undefined weak TLS symbols.
>> + Prototype:
>> + _dl_tlsdesc_undefweak (tlsdesc *);
>> +
>> + The second word of the descriptor contains the addend.
>> + Return the addend minus the thread pointer. This ensures
>> + that when the caller adds on the thread pointer it gets back
>> + the addend. */
>> + .hidden _dl_tlsdesc_undefweak
>> + .global _dl_tlsdesc_undefweak
>> + .type _dl_tlsdesc_undefweak,%function
>> + cfi_startproc
>> + .align 2
>> +_dl_tlsdesc_undefweak:
>> + REG_L a0, a0, 8
>> + sub.d a0, a0, tp
>> + RET
>> + cfi_endproc
>> + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>> +
>> +
>> +#ifdef SHARED
>> +
>> +#if !defined __loongarch_soft_float
>> +
>> +#define USE_LASX
>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
>> +#define Lret Lret_lasx
>> +#define Lslow Lslow_lasx
>> +#include "dl-tlsdesc-dynamic.h"
>> +#undef FRAME_SIZE
>> +#undef USE_LASX
>> +#undef _dl_tlsdesc_dynamic
>> +#undef Lret
>> +#undef Lslow
>> +
>> +#define USE_LSX
>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
>> +#define Lret Lret_lsx
>> +#define Lslow Lslow_lsx
>> +#include "dl-tlsdesc-dynamic.h"
>> +#undef FRAME_SIZE
>> +#undef USE_LSX
>> +#undef _dl_tlsdesc_dynamic
>> +#undef Lret
>> +#undef Lslow
>> +
>> +#endif
>> +
>> +#include "dl-tlsdesc-dynamic.h"
>> +
>> +#endif /* #ifdef SHARED */
>> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
>> new file mode 100644
>> index 0000000000..988037a714
>> --- /dev/null
>> +++ b/sysdeps/loongarch/dl-tlsdesc.h
>> @@ -0,0 +1,53 @@
>> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
>> + LoongArch version.
>> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> +
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +#ifndef _DL_TLSDESC_H
>> +#define _DL_TLSDESC_H
>> +
>> +#include <dl-tls.h>
>> +
>> +/* Type used to represent a TLS descriptor in the GOT. */
>> +struct tlsdesc
>> +{
>> + ptrdiff_t (*entry) (struct tlsdesc *);
>> + void *arg;
>> +};
>> +
>> +/* Type used as the argument in a TLS descriptor for a symbol that
>> + needs dynamic TLS offsets. */
>> +struct tlsdesc_dynamic_arg
>> +{
>> + tls_index tlsinfo;
>> + size_t gen_count;
>> +};
>> +
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
>> +
>> +# ifdef SHARED
>> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
>> +#if !defined __loongarch_soft_float
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
>> +#endif
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
>> +#endif
>> +
>> +#endif
>> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
>> index 4d8737ee7f..9b1773634c 100644
>> --- a/sysdeps/loongarch/linkmap.h
>> +++ b/sysdeps/loongarch/linkmap.h
>> @@ -19,4 +19,5 @@
>> struct link_map_machine
>> {
>> ElfW (Addr) plt; /* Address of .plt. */
>> + void *tlsdesc_table; /* Address of TLS descriptor hash table. */
>> };
>> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
>> index 51521a7eb4..23c1d12914 100644
>> --- a/sysdeps/loongarch/sys/asm.h
>> +++ b/sysdeps/loongarch/sys/asm.h
>> @@ -25,6 +25,7 @@
>> /* Macros to handle different pointer/register sizes for 32/64-bit code. */
>> #define SZREG 8
>> #define SZFREG 8
>> +#define SZFCSREG 4
>> #define SZVREG 16
>> #define SZXREG 32
>> #define REG_L ld.d
>> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
>> index f61ee25b25..80ce3e9c00 100644
>> --- a/sysdeps/loongarch/sys/regdef.h
>> +++ b/sysdeps/loongarch/sys/regdef.h
>> @@ -97,6 +97,7 @@
>> #define fcc5 $fcc5
>> #define fcc6 $fcc6
>> #define fcc7 $fcc7
>> +#define fcsr0 $fcsr0
>>
>> #define vr0 $vr0
>> #define vr1 $vr1
>> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
>> new file mode 100644
>> index 0000000000..a357e7619f
>> --- /dev/null
>> +++ b/sysdeps/loongarch/tlsdesc.c
>> @@ -0,0 +1,39 @@
>> +/* Manage TLS descriptors. AArch64 version.
> Change it.
>> +
>>
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] LoongArch: Add support for TLS Descriptors
2024-03-05 19:29 ` Adhemerval Zanella Netto
@ 2024-03-08 7:53 ` mengqinggang
2024-03-08 14:10 ` Adhemerval Zanella Netto
0 siblings, 1 reply; 8+ messages in thread
From: mengqinggang @ 2024-03-08 7:53 UTC (permalink / raw)
To: Adhemerval Zanella Netto, libc-alpha
Cc: xuchenghua, caiyinyu, chenglulu, cailulu, xry111, i.swmail,
maskray, luweining, wanglei, hejinyang
Thanks a lot for the review! A new v3 version patch has been sent.
https://sourceware.org/pipermail/libc-alpha/2024-March/155204.html
And some reply below.
在 2024/3/6 上午3:29, Adhemerval Zanella Netto 写道:
>
> On 28/02/24 22:43, mengqinggang wrote:
>> This is mostly based on AArch64 and RISC-V implementation.
>>
>> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>>
>> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
>> all vector registers.
>> ---
>> Changes v1 -> v2:
>> - Fix vr24-vr31, xr24-xr31 typo.
>> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
>> - Save and restore fcsr0 in _dl_tlsdesc_dynamic.
>>
>> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
> Patch looks ok, some comments below.
>
>> elf/elf.h | 2 +
>> sysdeps/loongarch/Makefile | 6 +
>> sysdeps/loongarch/dl-link.sym | 1 +
>> sysdeps/loongarch/dl-machine.h | 60 ++-
>> sysdeps/loongarch/dl-tls.h | 9 +-
>> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 341 ++++++++++++++++++
>> sysdeps/loongarch/dl-tlsdesc.S | 93 +++++
>> sysdeps/loongarch/dl-tlsdesc.h | 53 +++
>> sysdeps/loongarch/linkmap.h | 1 +
>> sysdeps/loongarch/sys/asm.h | 1 +
>> sysdeps/loongarch/sys/regdef.h | 1 +
>> sysdeps/loongarch/tlsdesc.c | 39 ++
>> sysdeps/loongarch/tlsdesc.sym | 19 +
>> .../unix/sysv/linux/loongarch/localplt.data | 2 +
>> 14 files changed, 625 insertions(+), 3 deletions(-)
>> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>> create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
>> create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
>> create mode 100644 sysdeps/loongarch/tlsdesc.c
>> create mode 100644 sysdeps/loongarch/tlsdesc.sym
>>
>> diff --git a/elf/elf.h b/elf/elf.h
>> index f2206e5c06..eec24ea049 100644
>> --- a/elf/elf.h
>> +++ b/elf/elf.h
>> @@ -4237,6 +4237,8 @@ enum
>> #define R_LARCH_TLS_TPREL32 10
>> #define R_LARCH_TLS_TPREL64 11
>> #define R_LARCH_IRELATIVE 12
>> +#define R_LARCH_TLS_DESC32 13
>> +#define R_LARCH_TLS_DESC64 14
>>
>> /* Reserved for future relocs that the dynamic linker must understand. */
>>
>> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
>> index 43d2f583cd..181389e787 100644
>> --- a/sysdeps/loongarch/Makefile
>> +++ b/sysdeps/loongarch/Makefile
>> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
>> endif
>>
>> ifeq ($(subdir),elf)
>> +sysdep-dl-routines += tlsdesc dl-tlsdesc
>> gen-as-const-headers += dl-link.sym
>> endif
>>
>> +ifeq ($(subdir),csu)
>> +gen-as-const-headers += tlsdesc.sym
>> +endif
>> +
>> +
>> # LoongArch's assembler also needs to know about PIC as it changes the
>> # definition of some assembler macros.
>> ASFLAGS-.os += $(pic-ccflag)
>> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
>> index b534968e30..fd81ef37d5 100644
>> --- a/sysdeps/loongarch/dl-link.sym
>> +++ b/sysdeps/loongarch/dl-link.sym
>> @@ -1,6 +1,7 @@
>> #include <stddef.h>
>> #include <sysdep.h>
>> #include <link.h>
>> +#include <dl-tlsdesc.h>
>>
>> DL_SIZEOF_RG sizeof(struct La_loongarch_regs)
>> DL_SIZEOF_RV sizeof(struct La_loongarch_retval)
>> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
>> index ab81b82d95..8ca6c224f6 100644
>> --- a/sysdeps/loongarch/dl-machine.h
>> +++ b/sysdeps/loongarch/dl-machine.h
>> @@ -25,7 +25,7 @@
>> #include <entry.h>
>> #include <elf/elf.h>
>> #include <sys/asm.h>
>> -#include <dl-tls.h>
>> +#include <dl-tlsdesc.h>
>> #include <dl-static-tls.h>
>> #include <dl-machine-rel.h>
>>
>> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
>> *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
>> break;
>>
>> + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
>> + {
>> + struct tlsdesc volatile *td =
>> + (struct tlsdesc volatile *)addr_field;
>> + if (! sym)
>> + {
>> + td->arg = (void*)reloc->r_addend;
>> + td->entry = _dl_tlsdesc_undefweak;
>> + }
>> + else
>> + {
>> +# ifndef SHARED
>> + CHECK_STATIC_TLS (map, sym_map);
>> +# else
>> + if (!TRY_STATIC_TLS (map, sym_map))
>> + {
>> + td->arg = _dl_make_tlsdesc_dynamic
>> + (sym_map, sym->st_value + reloc->r_addend);
>> +# if !defined __loongarch_soft_float
>> + if (SUPPORT_LASX)
>> + td->entry = _dl_tlsdesc_dynamic_lasx;
>> + else
>> + if (SUPPORT_LSX)
>> + td->entry = _dl_tlsdesc_dynamic_lsx;
>> + else
>> +# endif
>> + td->entry = _dl_tlsdesc_dynamic;
>> + }
>> + else
>> +# endif
>> + {
>> + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
>> + + reloc->r_addend);
>> + td->entry = _dl_tlsdesc_return;
>> + }
>> + }
>> + break;
>> + }
>> +
>> case R_LARCH_COPY:
>> {
>> if (sym == NULL)
>> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>> else
>> *reloc_addr = map->l_mach.plt;
>> }
>> + else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
> Use __glibc_likely here.
>
>> + {
>> + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
>> + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
>> + const ElfW (Sym) *sym = &symtab[symndx];
>> + const struct r_found_version *version = NULL;
>> +
>> + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
>> + {
>> + const ElfW (Half) *vernum =
>> + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
>> + version = &map->l_versions[vernum[symndx] & 0x7fff];
>> + }
>> +
>> + /* Always initialize TLS descriptors completely, because lazy
>> + initialization requires synchronization at every TLS access. */
>> + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
>> + skip_ifunc);
>> + }
>> else
>> _dl_reloc_bad_type (map, r_type, 1);
>> }
>> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
>> index 29924b866d..de593c002d 100644
>> --- a/sysdeps/loongarch/dl-tls.h
>> +++ b/sysdeps/loongarch/dl-tls.h
>> @@ -16,6 +16,9 @@
>> License along with the GNU C Library. If not, see
>> <https://www.gnu.org/licenses/>. */
>>
>> +#ifndef _DL_TLS_H
>> +#define _DL_TLS_H
>> +
>> /* Type used for the representation of TLS information in the GOT. */
>> typedef struct
>> {
>> @@ -23,6 +26,8 @@ typedef struct
>> unsigned long int ti_offset;
>> } tls_index;
>>
>> +extern void *__tls_get_addr (tls_index *ti);
>> +
>> /* The thread pointer points to the first static TLS block. */
>> #define TLS_TP_OFFSET 0
>>
>> @@ -37,10 +42,10 @@ typedef struct
>> /* Compute the value for a DTPREL reloc. */
>> #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>>
>> -extern void *__tls_get_addr (tls_index *ti);
>> -
> Why move the function prototype?
Maybe just want to take it out of a bunch of macros.
>
>> #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
>> #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>>
>> /* Value used for dtv entries for which the allocation is delayed. */
>> #define TLS_DTV_UNALLOCATED ((void *) -1l)
>> +
>> +#endif
>> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>> new file mode 100644
>> index 0000000000..0d8c9bb991
>> --- /dev/null
>> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>> @@ -0,0 +1,341 @@
>> +/* Thread-local storage handling in the ELF dynamic linker.
>> + LoongArch version.
>> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
> Update Copyright years to 2024.
>
>> +
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +#ifdef USE_LASX
>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
>> +#elif defined USE_LSX
>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
>> +#elif !defined __loongarch_soft_float
>> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
>> +#else
>> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
>> +#endif
> I don't have a strong opinion, but another option that might be simpler it
> to provide only only one _dl_tlsdesc_dynamic implementation and check the
> required save/restore of vector register based on hwcap value.
The v3 patch provides only one _dl_tlsdesc_dynamic implementation.
>> +
>> +#ifdef SHARED
>> + /* Handler for dynamic TLS symbols.
>> + Prototype:
>> + _dl_tlsdesc_dynamic (tlsdesc *) ;
>> +
>> + The second word of the descriptor points to a
>> + tlsdesc_dynamic_arg structure.
>> +
>> + Returns the offset between the thread pointer and the
>> + object referenced by the argument.
>> +
>> + ptrdiff_t
>> + __attribute__ ((__regparm__ (1)))
> Does this attribute really make sense for loongarch?
This line has been deleted.
>
>> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
>> + {
>> + struct tlsdesc_dynamic_arg *td = tdp->arg;
>> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
>> + if (__builtin_expect (td->gen_count <= dtv[0].counter
> Use __glibc_unlikely or just remove the __builtin_expect for clarity.
>
>> + && (dtv[td->tlsinfo.ti_module].pointer.val
>> + != TLS_DTV_UNALLOCATED),
>> + 1))
>> + return dtv[td->tlsinfo.ti_module].pointer.val
>> + + td->tlsinfo.ti_offset
>> + - __thread_pointer;
>> +
>> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
>> + }
>> + */
>> + .hidden _dl_tlsdesc_dynamic
>> + .global _dl_tlsdesc_dynamic
>> + .type _dl_tlsdesc_dynamic,%function
>> + cfi_startproc
>> + .align 2
>> +_dl_tlsdesc_dynamic:
>> + /* Save just enough registers to support fast path, if we fall
>> + into slow path we will save additional registers. */
>> + ADDI sp, sp,-24
>> + REG_S t0, sp, 0
>> + REG_S t1, sp, 8
>> + REG_S t2, sp, 16
>> +
>> + REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start
>> + REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg
>> + REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
>> + REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter
>> + bltu t2, t1, Lslow
>> +
>> + REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module
>> + slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
>> + add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t)
>> + REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
>> + li.d t2, TLS_DTV_UNALLOCATED
>> + beq t1, t2, Lslow
>> + REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
>> + # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
>> + add.d a0, t1, t2
>> +Lret:
>> + sub.d a0, a0, tp
>> + REG_L t0, sp, 0
>> + REG_L t1, sp, 8
>> + REG_L t2, sp, 16
>> + ADDI sp, sp, 24
>> + RET
>> +
>> +Lslow:
>> + /* This is the slow path. We need to call __tls_get_addr() which
>> + means we need to save and restore all the register that the
>> + callee will trash. */
>> +
>> + /* Save the remaining registers that we must treat as caller save. */
>> + ADDI sp, sp, -FRAME_SIZE
>> + REG_S ra, sp, 0 * SZREG
>> + REG_S a1, sp, 1 * SZREG
>> + REG_S a2, sp, 2 * SZREG
>> + REG_S a3, sp, 3 * SZREG
>> + REG_S a4, sp, 4 * SZREG
>> + REG_S a5, sp, 5 * SZREG
>> + REG_S a6, sp, 6 * SZREG
>> + REG_S a7, sp, 7 * SZREG
>> + REG_S t4, sp, 8 * SZREG
>> + REG_S t5, sp, 9 * SZREG
>> + REG_S t6, sp, 10 * SZREG
>> + REG_S t7, sp, 11 * SZREG
>> + REG_S t8, sp, 12 * SZREG
>> +
>> +#ifdef USE_LASX
>> + xvst xr0, sp, 13*SZREG + 0*SZXREG
>> + xvst xr1, sp, 13*SZREG + 1*SZXREG
>> + xvst xr2, sp, 13*SZREG + 2*SZXREG
>> + xvst xr3, sp, 13*SZREG + 3*SZXREG
>> + xvst xr4, sp, 13*SZREG + 4*SZXREG
>> + xvst xr5, sp, 13*SZREG + 5*SZXREG
>> + xvst xr6, sp, 13*SZREG + 6*SZXREG
>> + xvst xr7, sp, 13*SZREG + 7*SZXREG
>> + xvst xr8, sp, 13*SZREG + 8*SZXREG
>> + xvst xr9, sp, 13*SZREG + 9*SZXREG
>> + xvst xr10, sp, 13*SZREG + 10*SZXREG
>> + xvst xr11, sp, 13*SZREG + 11*SZXREG
>> + xvst xr12, sp, 13*SZREG + 12*SZXREG
>> + xvst xr13, sp, 13*SZREG + 13*SZXREG
>> + xvst xr14, sp, 13*SZREG + 14*SZXREG
>> + xvst xr15, sp, 13*SZREG + 15*SZXREG
>> + xvst xr16, sp, 13*SZREG + 16*SZXREG
>> + xvst xr17, sp, 13*SZREG + 17*SZXREG
>> + xvst xr18, sp, 13*SZREG + 18*SZXREG
>> + xvst xr19, sp, 13*SZREG + 19*SZXREG
>> + xvst xr20, sp, 13*SZREG + 20*SZXREG
>> + xvst xr21, sp, 13*SZREG + 21*SZXREG
>> + xvst xr22, sp, 13*SZREG + 22*SZXREG
>> + xvst xr23, sp, 13*SZREG + 23*SZXREG
>> + xvst xr24, sp, 13*SZREG + 24*SZXREG
>> + xvst xr25, sp, 13*SZREG + 25*SZXREG
>> + xvst xr26, sp, 13*SZREG + 26*SZXREG
>> + xvst xr27, sp, 13*SZREG + 27*SZXREG
>> + xvst xr28, sp, 13*SZREG + 28*SZXREG
>> + xvst xr29, sp, 13*SZREG + 29*SZXREG
>> + xvst xr30, sp, 13*SZREG + 30*SZXREG
>> + xvst xr31, sp, 13*SZREG + 31*SZXREG
>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>> + # some fields in fcsr0
>> + movfcsr2gr t0, fcsr0
>> + REG_S t0, sp, 32*SZXREG
>> +#elif defined USE_LSX
>> + vst vr0, sp, 13*SZREG + 0*SZVREG
>> + vst vr1, sp, 13*SZREG + 1*SZVREG
>> + vst vr2, sp, 13*SZREG + 2*SZVREG
>> + vst vr3, sp, 13*SZREG + 3*SZVREG
>> + vst vr4, sp, 13*SZREG + 4*SZVREG
>> + vst vr5, sp, 13*SZREG + 5*SZVREG
>> + vst vr6, sp, 13*SZREG + 6*SZVREG
>> + vst vr7, sp, 13*SZREG + 7*SZVREG
>> + vst vr8, sp, 13*SZREG + 8*SZVREG
>> + vst vr9, sp, 13*SZREG + 9*SZVREG
>> + vst vr10, sp, 13*SZREG + 10*SZVREG
>> + vst vr11, sp, 13*SZREG + 11*SZVREG
>> + vst vr12, sp, 13*SZREG + 12*SZVREG
>> + vst vr13, sp, 13*SZREG + 13*SZVREG
>> + vst vr14, sp, 13*SZREG + 14*SZVREG
>> + vst vr15, sp, 13*SZREG + 15*SZVREG
>> + vst vr16, sp, 13*SZREG + 16*SZVREG
>> + vst vr17, sp, 13*SZREG + 17*SZVREG
>> + vst vr18, sp, 13*SZREG + 18*SZVREG
>> + vst vr19, sp, 13*SZREG + 19*SZVREG
>> + vst vr20, sp, 13*SZREG + 20*SZVREG
>> + vst vr21, sp, 13*SZREG + 21*SZVREG
>> + vst vr22, sp, 13*SZREG + 22*SZVREG
>> + vst vr23, sp, 13*SZREG + 23*SZVREG
>> + vst vr24, sp, 13*SZREG + 24*SZVREG
>> + vst vr25, sp, 13*SZREG + 25*SZVREG
>> + vst vr26, sp, 13*SZREG + 26*SZVREG
>> + vst vr27, sp, 13*SZREG + 27*SZVREG
>> + vst vr28, sp, 13*SZREG + 28*SZVREG
>> + vst vr29, sp, 13*SZREG + 29*SZVREG
>> + vst vr30, sp, 13*SZREG + 30*SZVREG
>> + vst vr31, sp, 13*SZREG + 31*SZVREG
>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>> + # some fields in fcsr0
>> + movfcsr2gr t0, fcsr0
>> + REG_S t0, sp, 32*SZVREG
>> +#elif !defined __loongarch_soft_float
>> + FREG_S fa0, sp, 13*SZREG + 0*SZFREG
>> + FREG_S fa1, sp, 13*SZREG + 1*SZFREG
>> + FREG_S fa2, sp, 13*SZREG + 2*SZFREG
>> + FREG_S fa3, sp, 13*SZREG + 3*SZFREG
>> + FREG_S fa4, sp, 13*SZREG + 4*SZFREG
>> + FREG_S fa5, sp, 13*SZREG + 5*SZFREG
>> + FREG_S fa6, sp, 13*SZREG + 6*SZFREG
>> + FREG_S fa7, sp, 13*SZREG + 7*SZFREG
>> + FREG_S ft0, sp, 13*SZREG + 8*SZFREG
>> + FREG_S ft1, sp, 13*SZREG + 9*SZFREG
>> + FREG_S ft2, sp, 13*SZREG + 10*SZFREG
>> + FREG_S ft3, sp, 13*SZREG + 11*SZFREG
>> + FREG_S ft4, sp, 13*SZREG + 12*SZFREG
>> + FREG_S ft5, sp, 13*SZREG + 13*SZFREG
>> + FREG_S ft6, sp, 13*SZREG + 14*SZFREG
>> + FREG_S ft7, sp, 13*SZREG + 15*SZFREG
>> + FREG_S ft8, sp, 13*SZREG + 16*SZFREG
>> + FREG_S ft9, sp, 13*SZREG + 17*SZFREG
>> + FREG_S ft10, sp, 13*SZREG + 18*SZFREG
>> + FREG_S ft11, sp, 13*SZREG + 19*SZFREG
>> + FREG_S ft12, sp, 13*SZREG + 20*SZFREG
>> + FREG_S ft13, sp, 13*SZREG + 21*SZFREG
>> + FREG_S ft14, sp, 13*SZREG + 22*SZFREG
>> + FREG_S ft15, sp, 13*SZREG + 23*SZFREG
>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>> + # some fields in fcsr0
>> + movfcsr2gr t0, fcsr0
>> + REG_S t0, sp, 24*SZFREG
>> +#endif /* #ifdef USE_LASX */
>> +
>> + bl __tls_get_addr
>> + ADDI a0, a0, -TLS_DTV_OFFSET
>> +
>> + REG_L ra, sp, 0
>> + REG_L a1, sp, 1 * 8
>> + REG_L a2, sp, 2 * 8
>> + REG_L a3, sp, 3 * 8
>> + REG_L a4, sp, 4 * 8
>> + REG_L a5, sp, 5 * 8
>> + REG_L a6, sp, 6 * 8
>> + REG_L a7, sp, 7 * 8
>> + REG_L t4, sp, 8 * 8
>> + REG_L t5, sp, 9 * 8
>> + REG_L t6, sp, 10 * 8
>> + REG_L t7, sp, 11 * 8
>> + REG_L t8, sp, 12 * 8
>> +
>> +#ifdef USE_LASX
>> + xvld xr0, sp, 13*SZREG + 0*SZXREG
>> + xvld xr1, sp, 13*SZREG + 1*SZXREG
>> + xvld xr2, sp, 13*SZREG + 2*SZXREG
>> + xvld xr3, sp, 13*SZREG + 3*SZXREG
>> + xvld xr4, sp, 13*SZREG + 4*SZXREG
>> + xvld xr5, sp, 13*SZREG + 5*SZXREG
>> + xvld xr6, sp, 13*SZREG + 6*SZXREG
>> + xvld xr7, sp, 13*SZREG + 7*SZXREG
>> + xvld xr8, sp, 13*SZREG + 8*SZXREG
>> + xvld xr9, sp, 13*SZREG + 9*SZXREG
>> + xvld xr10, sp, 13*SZREG + 10*SZXREG
>> + xvld xr11, sp, 13*SZREG + 11*SZXREG
>> + xvld xr12, sp, 13*SZREG + 12*SZXREG
>> + xvld xr13, sp, 13*SZREG + 13*SZXREG
>> + xvld xr14, sp, 13*SZREG + 14*SZXREG
>> + xvld xr15, sp, 13*SZREG + 15*SZXREG
>> + xvld xr16, sp, 13*SZREG + 16*SZXREG
>> + xvld xr17, sp, 13*SZREG + 17*SZXREG
>> + xvld xr18, sp, 13*SZREG + 18*SZXREG
>> + xvld xr19, sp, 13*SZREG + 19*SZXREG
>> + xvld xr20, sp, 13*SZREG + 20*SZXREG
>> + xvld xr21, sp, 13*SZREG + 21*SZXREG
>> + xvld xr22, sp, 13*SZREG + 22*SZXREG
>> + xvld xr23, sp, 13*SZREG + 23*SZXREG
>> + xvld xr24, sp, 13*SZREG + 24*SZXREG
>> + xvld xr25, sp, 13*SZREG + 25*SZXREG
>> + xvld xr26, sp, 13*SZREG + 26*SZXREG
>> + xvld xr27, sp, 13*SZREG + 27*SZXREG
>> + xvld xr28, sp, 13*SZREG + 28*SZXREG
>> + xvld xr29, sp, 13*SZREG + 29*SZXREG
>> + xvld xr30, sp, 13*SZREG + 30*SZXREG
>> + xvld xr31, sp, 13*SZREG + 31*SZXREG
>> + REG_L t0, sp, 32*SZXREG
>> + movgr2fcsr fcsr0, t0
>> +#elif defined USE_LSX
>> + vld vr0, sp, 13*SZREG + 0*SZVREG
>> + vld vr1, sp, 13*SZREG + 1*SZVREG
>> + vld vr2, sp, 13*SZREG + 2*SZVREG
>> + vld vr3, sp, 13*SZREG + 3*SZVREG
>> + vld vr4, sp, 13*SZREG + 4*SZVREG
>> + vld vr5, sp, 13*SZREG + 5*SZVREG
>> + vld vr6, sp, 13*SZREG + 6*SZVREG
>> + vld vr7, sp, 13*SZREG + 7*SZVREG
>> + vld vr8, sp, 13*SZREG + 8*SZVREG
>> + vld vr9, sp, 13*SZREG + 9*SZVREG
>> + vld vr10, sp, 13*SZREG + 10*SZVREG
>> + vld vr11, sp, 13*SZREG + 11*SZVREG
>> + vld vr12, sp, 13*SZREG + 12*SZVREG
>> + vld vr13, sp, 13*SZREG + 13*SZVREG
>> + vld vr14, sp, 13*SZREG + 14*SZVREG
>> + vld vr15, sp, 13*SZREG + 15*SZVREG
>> + vld vr16, sp, 13*SZREG + 16*SZVREG
>> + vld vr17, sp, 13*SZREG + 17*SZVREG
>> + vld vr18, sp, 13*SZREG + 18*SZVREG
>> + vld vr19, sp, 13*SZREG + 19*SZVREG
>> + vld vr20, sp, 13*SZREG + 20*SZVREG
>> + vld vr21, sp, 13*SZREG + 21*SZVREG
>> + vld vr22, sp, 13*SZREG + 22*SZVREG
>> + vld vr23, sp, 13*SZREG + 23*SZVREG
>> + vld vr24, sp, 13*SZREG + 24*SZVREG
>> + vld vr25, sp, 13*SZREG + 25*SZVREG
>> + vld vr26, sp, 13*SZREG + 26*SZVREG
>> + vld vr27, sp, 13*SZREG + 27*SZVREG
>> + vld vr28, sp, 13*SZREG + 28*SZVREG
>> + vld vr29, sp, 13*SZREG + 29*SZVREG
>> + vld vr30, sp, 13*SZREG + 30*SZVREG
>> + vld vr31, sp, 13*SZREG + 31*SZVREG
>> + REG_L t0, sp, 32*SZVREG
>> + movgr2fcsr fcsr0, t0
>> +#elif !defined __loongarch_soft_float
>> + FREG_L fa0, sp, 13*SZREG + 0*SZFREG
>> + FREG_L fa1, sp, 13*SZREG + 1*SZFREG
>> + FREG_L fa2, sp, 13*SZREG + 2*SZFREG
>> + FREG_L fa3, sp, 13*SZREG + 3*SZFREG
>> + FREG_L fa4, sp, 13*SZREG + 4*SZFREG
>> + FREG_L fa5, sp, 13*SZREG + 5*SZFREG
>> + FREG_L fa6, sp, 13*SZREG + 6*SZFREG
>> + FREG_L fa7, sp, 13*SZREG + 7*SZFREG
>> + FREG_L ft0, sp, 13*SZREG + 8*SZFREG
>> + FREG_L ft1, sp, 13*SZREG + 9*SZFREG
>> + FREG_L ft2, sp, 13*SZREG + 10*SZFREG
>> + FREG_L ft3, sp, 13*SZREG + 11*SZFREG
>> + FREG_L ft4, sp, 13*SZREG + 12*SZFREG
>> + FREG_L ft5, sp, 13*SZREG + 13*SZFREG
>> + FREG_L ft6, sp, 13*SZREG + 14*SZFREG
>> + FREG_L ft7, sp, 13*SZREG + 15*SZFREG
>> + FREG_L ft8, sp, 13*SZREG + 16*SZFREG
>> + FREG_L ft9, sp, 13*SZREG + 17*SZFREG
>> + FREG_L ft10, sp, 13*SZREG + 18*SZFREG
>> + FREG_L ft11, sp, 13*SZREG + 19*SZFREG
>> + FREG_L ft12, sp, 13*SZREG + 20*SZFREG
>> + FREG_L ft13, sp, 13*SZREG + 21*SZFREG
>> + FREG_L ft14, sp, 13*SZREG + 22*SZFREG
>> + FREG_L ft15, sp, 13*SZREG + 23*SZFREG
>> + REG_L t0, sp, 24*SZFREG
>> + movgr2fcsr fcsr0, t0
>> +#endif /* #ifdef USE_LASX */
>> +
>> + ADDI sp, sp, FRAME_SIZE
>> + b Lret
>> + cfi_endproc
>> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
>> +#endif /* #ifdef SHARED */
>> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
>> new file mode 100644
>> index 0000000000..4a17079169
>> --- /dev/null
>> +++ b/sysdeps/loongarch/dl-tlsdesc.S
>> @@ -0,0 +1,93 @@
>> +/* Thread-local storage handling in the ELF dynamic linker.
>> + LoongArch version.
>> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
> Update Copyright years to 2024.
>
>> +
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +#include <sysdep.h>
>> +#include <tls.h>
>> +#include "tlsdesc.h"
>> +
>> + .text
>> +
>> + /* Compute the thread pointer offset for symbols in the static
>> + TLS block. The offset is the same for all threads.
>> + Prototype:
>> + _dl_tlsdesc_return (tlsdesc *); */
>> + .hidden _dl_tlsdesc_return
>> + .global _dl_tlsdesc_return
>> + .type _dl_tlsdesc_return,%function
>> + cfi_startproc
>> + .align 2
>> +_dl_tlsdesc_return:
>> + REG_L a0, a0, 8
>> + RET
>> + cfi_endproc
>> + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
>> +
>> + /* Handler for undefined weak TLS symbols.
>> + Prototype:
>> + _dl_tlsdesc_undefweak (tlsdesc *);
>> +
>> + The second word of the descriptor contains the addend.
>> + Return the addend minus the thread pointer. This ensures
>> + that when the caller adds on the thread pointer it gets back
>> + the addend. */
>> + .hidden _dl_tlsdesc_undefweak
>> + .global _dl_tlsdesc_undefweak
>> + .type _dl_tlsdesc_undefweak,%function
>> + cfi_startproc
>> + .align 2
>> +_dl_tlsdesc_undefweak:
>> + REG_L a0, a0, 8
>> + sub.d a0, a0, tp
>> + RET
>> + cfi_endproc
>> + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>> +
>> +
>> +#ifdef SHARED
>> +
>> +#if !defined __loongarch_soft_float
>> +
>> +#define USE_LASX
>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
>> +#define Lret Lret_lasx
>> +#define Lslow Lslow_lasx
>> +#include "dl-tlsdesc-dynamic.h"
>> +#undef FRAME_SIZE
>> +#undef USE_LASX
>> +#undef _dl_tlsdesc_dynamic
>> +#undef Lret
>> +#undef Lslow
>> +
>> +#define USE_LSX
>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
>> +#define Lret Lret_lsx
>> +#define Lslow Lslow_lsx
>> +#include "dl-tlsdesc-dynamic.h"
>> +#undef FRAME_SIZE
>> +#undef USE_LSX
>> +#undef _dl_tlsdesc_dynamic
>> +#undef Lret
>> +#undef Lslow
>> +
>> +#endif
>> +
>> +#include "dl-tlsdesc-dynamic.h"
>> +
>> +#endif /* #ifdef SHARED */
>> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
>> new file mode 100644
>> index 0000000000..988037a714
>> --- /dev/null
>> +++ b/sysdeps/loongarch/dl-tlsdesc.h
>> @@ -0,0 +1,53 @@
>> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
>> + LoongArch version.
>> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> +
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +#ifndef _DL_TLSDESC_H
>> +#define _DL_TLSDESC_H
>> +
>> +#include <dl-tls.h>
>> +
>> +/* Type used to represent a TLS descriptor in the GOT. */
>> +struct tlsdesc
>> +{
>> + ptrdiff_t (*entry) (struct tlsdesc *);
>> + void *arg;
>> +};
>> +
>> +/* Type used as the argument in a TLS descriptor for a symbol that
>> + needs dynamic TLS offsets. */
>> +struct tlsdesc_dynamic_arg
>> +{
>> + tls_index tlsinfo;
>> + size_t gen_count;
>> +};
>> +
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
>> +
>> +# ifdef SHARED
>> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
>> +#if !defined __loongarch_soft_float
> Minor style, usually for single tests we use '#ifndef' and add
> attribute_hidden at the end of prototype.
>
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
>> +#endif
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
>> +#endif
>> +
>> +#endif
>> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
>> index 4d8737ee7f..9b1773634c 100644
>> --- a/sysdeps/loongarch/linkmap.h
>> +++ b/sysdeps/loongarch/linkmap.h
>> @@ -19,4 +19,5 @@
>> struct link_map_machine
>> {
>> ElfW (Addr) plt; /* Address of .plt. */
>> + void *tlsdesc_table; /* Address of TLS descriptor hash table. */
>> };
>> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
>> index 51521a7eb4..23c1d12914 100644
>> --- a/sysdeps/loongarch/sys/asm.h
>> +++ b/sysdeps/loongarch/sys/asm.h
>> @@ -25,6 +25,7 @@
>> /* Macros to handle different pointer/register sizes for 32/64-bit code. */
>> #define SZREG 8
>> #define SZFREG 8
>> +#define SZFCSREG 4
>> #define SZVREG 16
>> #define SZXREG 32
>> #define REG_L ld.d
>> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
>> index f61ee25b25..80ce3e9c00 100644
>> --- a/sysdeps/loongarch/sys/regdef.h
>> +++ b/sysdeps/loongarch/sys/regdef.h
>> @@ -97,6 +97,7 @@
>> #define fcc5 $fcc5
>> #define fcc6 $fcc6
>> #define fcc7 $fcc7
>> +#define fcsr0 $fcsr0
>>
>> #define vr0 $vr0
>> #define vr1 $vr1
>> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
>> new file mode 100644
>> index 0000000000..a357e7619f
>> --- /dev/null
>> +++ b/sysdeps/loongarch/tlsdesc.c
>> @@ -0,0 +1,39 @@
>> +/* Manage TLS descriptors. AArch64 version.
>> +
>> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
> Update Copyright years to 2024 and remove the 'AArch64'.
>
>
>> +
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +#include <ldsodefs.h>
>> +#include <tls.h>
>> +#include <dl-tlsdesc.h>
>> +#include <dl-unmap-segments.h>
>> +#include <tlsdeschtab.h>
>> +
>> +/* Unmap the dynamic object, but also release its TLS descriptor table
>> + if there is one. */
>> +
>> +void
>> +_dl_unmap (struct link_map *map)
>> +{
>> + _dl_unmap_segments (map);
>> +
>> +#ifdef SHARED
>> + if (map->l_mach.tlsdesc_table)
>> + htab_delete (map->l_mach.tlsdesc_table);
>> +#endif
>> +}
>> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
>> new file mode 100644
>> index 0000000000..bcab218631
>> --- /dev/null
>> +++ b/sysdeps/loongarch/tlsdesc.sym
>> @@ -0,0 +1,19 @@
>> +#include <stddef.h>
>> +#include <sysdep.h>
>> +#include <tls.h>
>> +#include <link.h>
>> +#include <dl-tlsdesc.h>
>> +
>> +--
>> +
>> +-- Abuse tls.h macros to derive offsets relative to the thread register.
>> +
>> +TLSDESC_ARG offsetof(struct tlsdesc, arg)
>> +TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count)
>> +TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
>> +TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
>> +TCBHEAD_DTV offsetof(tcbhead_t, dtv)
>> +DTV_COUNTER offsetof(dtv_t, counter)
>> +TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED
>> +TLS_DTV_OFFSET TLS_DTV_OFFSET
>> +SIZE_OF_DTV sizeof(tcbhead_t)
>> diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>> index 547b1c1b7f..ec32e6d13f 100644
>> --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data
>> +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>> @@ -5,3 +5,5 @@ libc.so: calloc
>> libc.so: free
>> libc.so: malloc
>> libc.so: realloc
>> +# The dynamic loader needs __tls_get_addr for TLS.
>> +ld.so: __tls_get_addr
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] LoongArch: Add support for TLS Descriptors
2024-03-08 7:53 ` mengqinggang
@ 2024-03-08 14:10 ` Adhemerval Zanella Netto
2024-03-11 8:45 ` mengqinggang
0 siblings, 1 reply; 8+ messages in thread
From: Adhemerval Zanella Netto @ 2024-03-08 14:10 UTC (permalink / raw)
To: mengqinggang, libc-alpha
Cc: xuchenghua, caiyinyu, chenglulu, cailulu, xry111, i.swmail,
maskray, luweining, wanglei, hejinyang
On 08/03/24 04:53, mengqinggang wrote:
> Thanks a lot for the review! A new v3 version patch has been sent.
> https://sourceware.org/pipermail/libc-alpha/2024-March/155204.html
>
>
> And some reply below.
>
>
From the gcc enablement patch it seems that you are using the aarch64
ABI naming, -mtls-dialect={desc,trad}. So I would suggest to check if
there is no regression with my patch to enable TLS descriptor for
-mtls=desc [1].
You might also extend the testing to check for possible vector register
wrong save/restore, as I did to check for arm32 ones [2] (check
the sysdeps/arm/tst-gnu2-tls2.h).
Also, I think this patch should be pushed only after gcc enablement
is installed.
[1] https://patchwork.sourceware.org/project/glibc/patch/20240229194642.3398122-4-adhemerval.zanella@linaro.org/
[2] https://patchwork.sourceware.org/project/glibc/patch/20240229194642.3398122-3-adhemerval.zanella@linaro.org/
> 在 2024/3/6 上午3:29, Adhemerval Zanella Netto 写道:
>>
>> On 28/02/24 22:43, mengqinggang wrote:
>>> This is mostly based on AArch64 and RISC-V implementation.
>>>
>>> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>>>
>>> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
>>> all vector registers.
>>> ---
>>> Changes v1 -> v2:
>>> - Fix vr24-vr31, xr24-xr31 typo.
>>> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
>>> - Save and restore fcsr0 in _dl_tlsdesc_dynamic.
>>>
>>> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
>> Patch looks ok, some comments below.
>>
>>> elf/elf.h | 2 +
>>> sysdeps/loongarch/Makefile | 6 +
>>> sysdeps/loongarch/dl-link.sym | 1 +
>>> sysdeps/loongarch/dl-machine.h | 60 ++-
>>> sysdeps/loongarch/dl-tls.h | 9 +-
>>> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 341 ++++++++++++++++++
>>> sysdeps/loongarch/dl-tlsdesc.S | 93 +++++
>>> sysdeps/loongarch/dl-tlsdesc.h | 53 +++
>>> sysdeps/loongarch/linkmap.h | 1 +
>>> sysdeps/loongarch/sys/asm.h | 1 +
>>> sysdeps/loongarch/sys/regdef.h | 1 +
>>> sysdeps/loongarch/tlsdesc.c | 39 ++
>>> sysdeps/loongarch/tlsdesc.sym | 19 +
>>> .../unix/sysv/linux/loongarch/localplt.data | 2 +
>>> 14 files changed, 625 insertions(+), 3 deletions(-)
>>> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>> create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
>>> create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
>>> create mode 100644 sysdeps/loongarch/tlsdesc.c
>>> create mode 100644 sysdeps/loongarch/tlsdesc.sym
>>>
>>> diff --git a/elf/elf.h b/elf/elf.h
>>> index f2206e5c06..eec24ea049 100644
>>> --- a/elf/elf.h
>>> +++ b/elf/elf.h
>>> @@ -4237,6 +4237,8 @@ enum
>>> #define R_LARCH_TLS_TPREL32 10
>>> #define R_LARCH_TLS_TPREL64 11
>>> #define R_LARCH_IRELATIVE 12
>>> +#define R_LARCH_TLS_DESC32 13
>>> +#define R_LARCH_TLS_DESC64 14
>>> /* Reserved for future relocs that the dynamic linker must understand. */
>>> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
>>> index 43d2f583cd..181389e787 100644
>>> --- a/sysdeps/loongarch/Makefile
>>> +++ b/sysdeps/loongarch/Makefile
>>> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
>>> endif
>>> ifeq ($(subdir),elf)
>>> +sysdep-dl-routines += tlsdesc dl-tlsdesc
>>> gen-as-const-headers += dl-link.sym
>>> endif
>>> +ifeq ($(subdir),csu)
>>> +gen-as-const-headers += tlsdesc.sym
>>> +endif
>>> +
>>> +
>>> # LoongArch's assembler also needs to know about PIC as it changes the
>>> # definition of some assembler macros.
>>> ASFLAGS-.os += $(pic-ccflag)
>>> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
>>> index b534968e30..fd81ef37d5 100644
>>> --- a/sysdeps/loongarch/dl-link.sym
>>> +++ b/sysdeps/loongarch/dl-link.sym
>>> @@ -1,6 +1,7 @@
>>> #include <stddef.h>
>>> #include <sysdep.h>
>>> #include <link.h>
>>> +#include <dl-tlsdesc.h>
>>> DL_SIZEOF_RG sizeof(struct La_loongarch_regs)
>>> DL_SIZEOF_RV sizeof(struct La_loongarch_retval)
>>> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
>>> index ab81b82d95..8ca6c224f6 100644
>>> --- a/sysdeps/loongarch/dl-machine.h
>>> +++ b/sysdeps/loongarch/dl-machine.h
>>> @@ -25,7 +25,7 @@
>>> #include <entry.h>
>>> #include <elf/elf.h>
>>> #include <sys/asm.h>
>>> -#include <dl-tls.h>
>>> +#include <dl-tlsdesc.h>
>>> #include <dl-static-tls.h>
>>> #include <dl-machine-rel.h>
>>> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
>>> *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
>>> break;
>>> + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
>>> + {
>>> + struct tlsdesc volatile *td =
>>> + (struct tlsdesc volatile *)addr_field;
>>> + if (! sym)
>>> + {
>>> + td->arg = (void*)reloc->r_addend;
>>> + td->entry = _dl_tlsdesc_undefweak;
>>> + }
>>> + else
>>> + {
>>> +# ifndef SHARED
>>> + CHECK_STATIC_TLS (map, sym_map);
>>> +# else
>>> + if (!TRY_STATIC_TLS (map, sym_map))
>>> + {
>>> + td->arg = _dl_make_tlsdesc_dynamic
>>> + (sym_map, sym->st_value + reloc->r_addend);
>>> +# if !defined __loongarch_soft_float
>>> + if (SUPPORT_LASX)
>>> + td->entry = _dl_tlsdesc_dynamic_lasx;
>>> + else
>>> + if (SUPPORT_LSX)
>>> + td->entry = _dl_tlsdesc_dynamic_lsx;
>>> + else
>>> +# endif
>>> + td->entry = _dl_tlsdesc_dynamic;
>>> + }
>>> + else
>>> +# endif
>>> + {
>>> + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
>>> + + reloc->r_addend);
>>> + td->entry = _dl_tlsdesc_return;
>>> + }
>>> + }
>>> + break;
>>> + }
>>> +
>>> case R_LARCH_COPY:
>>> {
>>> if (sym == NULL)
>>> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>>> else
>>> *reloc_addr = map->l_mach.plt;
>>> }
>>> + else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
>> Use __glibc_likely here.
>>
>>> + {
>>> + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
>>> + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
>>> + const ElfW (Sym) *sym = &symtab[symndx];
>>> + const struct r_found_version *version = NULL;
>>> +
>>> + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
>>> + {
>>> + const ElfW (Half) *vernum =
>>> + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
>>> + version = &map->l_versions[vernum[symndx] & 0x7fff];
>>> + }
>>> +
>>> + /* Always initialize TLS descriptors completely, because lazy
>>> + initialization requires synchronization at every TLS access. */
>>> + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
>>> + skip_ifunc);
>>> + }
>>> else
>>> _dl_reloc_bad_type (map, r_type, 1);
>>> }
>>> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
>>> index 29924b866d..de593c002d 100644
>>> --- a/sysdeps/loongarch/dl-tls.h
>>> +++ b/sysdeps/loongarch/dl-tls.h
>>> @@ -16,6 +16,9 @@
>>> License along with the GNU C Library. If not, see
>>> <https://www.gnu.org/licenses/>. */
>>> +#ifndef _DL_TLS_H
>>> +#define _DL_TLS_H
>>> +
>>> /* Type used for the representation of TLS information in the GOT. */
>>> typedef struct
>>> {
>>> @@ -23,6 +26,8 @@ typedef struct
>>> unsigned long int ti_offset;
>>> } tls_index;
>>> +extern void *__tls_get_addr (tls_index *ti);
>>> +
>>> /* The thread pointer points to the first static TLS block. */
>>> #define TLS_TP_OFFSET 0
>>> @@ -37,10 +42,10 @@ typedef struct
>>> /* Compute the value for a DTPREL reloc. */
>>> #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>>> -extern void *__tls_get_addr (tls_index *ti);
>>> -
>> Why move the function prototype?
>
>
> Maybe just want to take it out of a bunch of macros.
>
>
>>
>>> #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
>>> #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>>> /* Value used for dtv entries for which the allocation is delayed. */
>>> #define TLS_DTV_UNALLOCATED ((void *) -1l)
>>> +
>>> +#endif
>>> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>> new file mode 100644
>>> index 0000000000..0d8c9bb991
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>> @@ -0,0 +1,341 @@
>>> +/* Thread-local storage handling in the ELF dynamic linker.
>>> + LoongArch version.
>>> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> Update Copyright years to 2024.
>>
>>> +
>>> + This file is part of the GNU C Library.
>>> +
>>> + The GNU C Library is free software; you can redistribute it and/or
>>> + modify it under the terms of the GNU Lesser General Public
>>> + License as published by the Free Software Foundation; either
>>> + version 2.1 of the License, or (at your option) any later version.
>>> +
>>> + The GNU C Library is distributed in the hope that it will be useful,
>>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>>> + Lesser General Public License for more details.
>>> +
>>> + You should have received a copy of the GNU Lesser General Public
>>> + License along with the GNU C Library; if not, see
>>> + <https://www.gnu.org/licenses/>. */
>>> +
>>> +#ifdef USE_LASX
>>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
>>> +#elif defined USE_LSX
>>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
>>> +#elif !defined __loongarch_soft_float
>>> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
>>> +#else
>>> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
>>> +#endif
>> I don't have a strong opinion, but another option that might be simpler it
>> to provide only only one _dl_tlsdesc_dynamic implementation and check the
>> required save/restore of vector register based on hwcap value.
>
>
> The v3 patch provides only one _dl_tlsdesc_dynamic implementation.
>
>
>>> +
>>> +#ifdef SHARED
>>> + /* Handler for dynamic TLS symbols.
>>> + Prototype:
>>> + _dl_tlsdesc_dynamic (tlsdesc *) ;
>>> +
>>> + The second word of the descriptor points to a
>>> + tlsdesc_dynamic_arg structure.
>>> +
>>> + Returns the offset between the thread pointer and the
>>> + object referenced by the argument.
>>> +
>>> + ptrdiff_t
>>> + __attribute__ ((__regparm__ (1)))
>> Does this attribute really make sense for loongarch?
>
>
> This line has been deleted.
>
>
>>
>>> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
>>> + {
>>> + struct tlsdesc_dynamic_arg *td = tdp->arg;
>>> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
>>> + if (__builtin_expect (td->gen_count <= dtv[0].counter
>> Use __glibc_unlikely or just remove the __builtin_expect for clarity.
>>
>>> + && (dtv[td->tlsinfo.ti_module].pointer.val
>>> + != TLS_DTV_UNALLOCATED),
>>> + 1))
>>> + return dtv[td->tlsinfo.ti_module].pointer.val
>>> + + td->tlsinfo.ti_offset
>>> + - __thread_pointer;
>>> +
>>> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
>>> + }
>>> + */
>>> + .hidden _dl_tlsdesc_dynamic
>>> + .global _dl_tlsdesc_dynamic
>>> + .type _dl_tlsdesc_dynamic,%function
>>> + cfi_startproc
>>> + .align 2
>>> +_dl_tlsdesc_dynamic:
>>> + /* Save just enough registers to support fast path, if we fall
>>> + into slow path we will save additional registers. */
>>> + ADDI sp, sp,-24
>>> + REG_S t0, sp, 0
>>> + REG_S t1, sp, 8
>>> + REG_S t2, sp, 16
>>> +
>>> + REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start
>>> + REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg
>>> + REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
>>> + REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter
>>> + bltu t2, t1, Lslow
>>> +
>>> + REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module
>>> + slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
>>> + add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t)
>>> + REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
>>> + li.d t2, TLS_DTV_UNALLOCATED
>>> + beq t1, t2, Lslow
>>> + REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
>>> + # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
>>> + add.d a0, t1, t2
>>> +Lret:
>>> + sub.d a0, a0, tp
>>> + REG_L t0, sp, 0
>>> + REG_L t1, sp, 8
>>> + REG_L t2, sp, 16
>>> + ADDI sp, sp, 24
>>> + RET
>>> +
>>> +Lslow:
>>> + /* This is the slow path. We need to call __tls_get_addr() which
>>> + means we need to save and restore all the register that the
>>> + callee will trash. */
>>> +
>>> + /* Save the remaining registers that we must treat as caller save. */
>>> + ADDI sp, sp, -FRAME_SIZE
>>> + REG_S ra, sp, 0 * SZREG
>>> + REG_S a1, sp, 1 * SZREG
>>> + REG_S a2, sp, 2 * SZREG
>>> + REG_S a3, sp, 3 * SZREG
>>> + REG_S a4, sp, 4 * SZREG
>>> + REG_S a5, sp, 5 * SZREG
>>> + REG_S a6, sp, 6 * SZREG
>>> + REG_S a7, sp, 7 * SZREG
>>> + REG_S t4, sp, 8 * SZREG
>>> + REG_S t5, sp, 9 * SZREG
>>> + REG_S t6, sp, 10 * SZREG
>>> + REG_S t7, sp, 11 * SZREG
>>> + REG_S t8, sp, 12 * SZREG
>>> +
>>> +#ifdef USE_LASX
>>> + xvst xr0, sp, 13*SZREG + 0*SZXREG
>>> + xvst xr1, sp, 13*SZREG + 1*SZXREG
>>> + xvst xr2, sp, 13*SZREG + 2*SZXREG
>>> + xvst xr3, sp, 13*SZREG + 3*SZXREG
>>> + xvst xr4, sp, 13*SZREG + 4*SZXREG
>>> + xvst xr5, sp, 13*SZREG + 5*SZXREG
>>> + xvst xr6, sp, 13*SZREG + 6*SZXREG
>>> + xvst xr7, sp, 13*SZREG + 7*SZXREG
>>> + xvst xr8, sp, 13*SZREG + 8*SZXREG
>>> + xvst xr9, sp, 13*SZREG + 9*SZXREG
>>> + xvst xr10, sp, 13*SZREG + 10*SZXREG
>>> + xvst xr11, sp, 13*SZREG + 11*SZXREG
>>> + xvst xr12, sp, 13*SZREG + 12*SZXREG
>>> + xvst xr13, sp, 13*SZREG + 13*SZXREG
>>> + xvst xr14, sp, 13*SZREG + 14*SZXREG
>>> + xvst xr15, sp, 13*SZREG + 15*SZXREG
>>> + xvst xr16, sp, 13*SZREG + 16*SZXREG
>>> + xvst xr17, sp, 13*SZREG + 17*SZXREG
>>> + xvst xr18, sp, 13*SZREG + 18*SZXREG
>>> + xvst xr19, sp, 13*SZREG + 19*SZXREG
>>> + xvst xr20, sp, 13*SZREG + 20*SZXREG
>>> + xvst xr21, sp, 13*SZREG + 21*SZXREG
>>> + xvst xr22, sp, 13*SZREG + 22*SZXREG
>>> + xvst xr23, sp, 13*SZREG + 23*SZXREG
>>> + xvst xr24, sp, 13*SZREG + 24*SZXREG
>>> + xvst xr25, sp, 13*SZREG + 25*SZXREG
>>> + xvst xr26, sp, 13*SZREG + 26*SZXREG
>>> + xvst xr27, sp, 13*SZREG + 27*SZXREG
>>> + xvst xr28, sp, 13*SZREG + 28*SZXREG
>>> + xvst xr29, sp, 13*SZREG + 29*SZXREG
>>> + xvst xr30, sp, 13*SZREG + 30*SZXREG
>>> + xvst xr31, sp, 13*SZREG + 31*SZXREG
>>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>>> + # some fields in fcsr0
>>> + movfcsr2gr t0, fcsr0
>>> + REG_S t0, sp, 32*SZXREG
>>> +#elif defined USE_LSX
>>> + vst vr0, sp, 13*SZREG + 0*SZVREG
>>> + vst vr1, sp, 13*SZREG + 1*SZVREG
>>> + vst vr2, sp, 13*SZREG + 2*SZVREG
>>> + vst vr3, sp, 13*SZREG + 3*SZVREG
>>> + vst vr4, sp, 13*SZREG + 4*SZVREG
>>> + vst vr5, sp, 13*SZREG + 5*SZVREG
>>> + vst vr6, sp, 13*SZREG + 6*SZVREG
>>> + vst vr7, sp, 13*SZREG + 7*SZVREG
>>> + vst vr8, sp, 13*SZREG + 8*SZVREG
>>> + vst vr9, sp, 13*SZREG + 9*SZVREG
>>> + vst vr10, sp, 13*SZREG + 10*SZVREG
>>> + vst vr11, sp, 13*SZREG + 11*SZVREG
>>> + vst vr12, sp, 13*SZREG + 12*SZVREG
>>> + vst vr13, sp, 13*SZREG + 13*SZVREG
>>> + vst vr14, sp, 13*SZREG + 14*SZVREG
>>> + vst vr15, sp, 13*SZREG + 15*SZVREG
>>> + vst vr16, sp, 13*SZREG + 16*SZVREG
>>> + vst vr17, sp, 13*SZREG + 17*SZVREG
>>> + vst vr18, sp, 13*SZREG + 18*SZVREG
>>> + vst vr19, sp, 13*SZREG + 19*SZVREG
>>> + vst vr20, sp, 13*SZREG + 20*SZVREG
>>> + vst vr21, sp, 13*SZREG + 21*SZVREG
>>> + vst vr22, sp, 13*SZREG + 22*SZVREG
>>> + vst vr23, sp, 13*SZREG + 23*SZVREG
>>> + vst vr24, sp, 13*SZREG + 24*SZVREG
>>> + vst vr25, sp, 13*SZREG + 25*SZVREG
>>> + vst vr26, sp, 13*SZREG + 26*SZVREG
>>> + vst vr27, sp, 13*SZREG + 27*SZVREG
>>> + vst vr28, sp, 13*SZREG + 28*SZVREG
>>> + vst vr29, sp, 13*SZREG + 29*SZVREG
>>> + vst vr30, sp, 13*SZREG + 30*SZVREG
>>> + vst vr31, sp, 13*SZREG + 31*SZVREG
>>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>>> + # some fields in fcsr0
>>> + movfcsr2gr t0, fcsr0
>>> + REG_S t0, sp, 32*SZVREG
>>> +#elif !defined __loongarch_soft_float
>>> + FREG_S fa0, sp, 13*SZREG + 0*SZFREG
>>> + FREG_S fa1, sp, 13*SZREG + 1*SZFREG
>>> + FREG_S fa2, sp, 13*SZREG + 2*SZFREG
>>> + FREG_S fa3, sp, 13*SZREG + 3*SZFREG
>>> + FREG_S fa4, sp, 13*SZREG + 4*SZFREG
>>> + FREG_S fa5, sp, 13*SZREG + 5*SZFREG
>>> + FREG_S fa6, sp, 13*SZREG + 6*SZFREG
>>> + FREG_S fa7, sp, 13*SZREG + 7*SZFREG
>>> + FREG_S ft0, sp, 13*SZREG + 8*SZFREG
>>> + FREG_S ft1, sp, 13*SZREG + 9*SZFREG
>>> + FREG_S ft2, sp, 13*SZREG + 10*SZFREG
>>> + FREG_S ft3, sp, 13*SZREG + 11*SZFREG
>>> + FREG_S ft4, sp, 13*SZREG + 12*SZFREG
>>> + FREG_S ft5, sp, 13*SZREG + 13*SZFREG
>>> + FREG_S ft6, sp, 13*SZREG + 14*SZFREG
>>> + FREG_S ft7, sp, 13*SZREG + 15*SZFREG
>>> + FREG_S ft8, sp, 13*SZREG + 16*SZFREG
>>> + FREG_S ft9, sp, 13*SZREG + 17*SZFREG
>>> + FREG_S ft10, sp, 13*SZREG + 18*SZFREG
>>> + FREG_S ft11, sp, 13*SZREG + 19*SZFREG
>>> + FREG_S ft12, sp, 13*SZREG + 20*SZFREG
>>> + FREG_S ft13, sp, 13*SZREG + 21*SZFREG
>>> + FREG_S ft14, sp, 13*SZREG + 22*SZFREG
>>> + FREG_S ft15, sp, 13*SZREG + 23*SZFREG
>>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>>> + # some fields in fcsr0
>>> + movfcsr2gr t0, fcsr0
>>> + REG_S t0, sp, 24*SZFREG
>>> +#endif /* #ifdef USE_LASX */
>>> +
>>> + bl __tls_get_addr
>>> + ADDI a0, a0, -TLS_DTV_OFFSET
>>> +
>>> + REG_L ra, sp, 0
>>> + REG_L a1, sp, 1 * 8
>>> + REG_L a2, sp, 2 * 8
>>> + REG_L a3, sp, 3 * 8
>>> + REG_L a4, sp, 4 * 8
>>> + REG_L a5, sp, 5 * 8
>>> + REG_L a6, sp, 6 * 8
>>> + REG_L a7, sp, 7 * 8
>>> + REG_L t4, sp, 8 * 8
>>> + REG_L t5, sp, 9 * 8
>>> + REG_L t6, sp, 10 * 8
>>> + REG_L t7, sp, 11 * 8
>>> + REG_L t8, sp, 12 * 8
>>> +
>>> +#ifdef USE_LASX
>>> + xvld xr0, sp, 13*SZREG + 0*SZXREG
>>> + xvld xr1, sp, 13*SZREG + 1*SZXREG
>>> + xvld xr2, sp, 13*SZREG + 2*SZXREG
>>> + xvld xr3, sp, 13*SZREG + 3*SZXREG
>>> + xvld xr4, sp, 13*SZREG + 4*SZXREG
>>> + xvld xr5, sp, 13*SZREG + 5*SZXREG
>>> + xvld xr6, sp, 13*SZREG + 6*SZXREG
>>> + xvld xr7, sp, 13*SZREG + 7*SZXREG
>>> + xvld xr8, sp, 13*SZREG + 8*SZXREG
>>> + xvld xr9, sp, 13*SZREG + 9*SZXREG
>>> + xvld xr10, sp, 13*SZREG + 10*SZXREG
>>> + xvld xr11, sp, 13*SZREG + 11*SZXREG
>>> + xvld xr12, sp, 13*SZREG + 12*SZXREG
>>> + xvld xr13, sp, 13*SZREG + 13*SZXREG
>>> + xvld xr14, sp, 13*SZREG + 14*SZXREG
>>> + xvld xr15, sp, 13*SZREG + 15*SZXREG
>>> + xvld xr16, sp, 13*SZREG + 16*SZXREG
>>> + xvld xr17, sp, 13*SZREG + 17*SZXREG
>>> + xvld xr18, sp, 13*SZREG + 18*SZXREG
>>> + xvld xr19, sp, 13*SZREG + 19*SZXREG
>>> + xvld xr20, sp, 13*SZREG + 20*SZXREG
>>> + xvld xr21, sp, 13*SZREG + 21*SZXREG
>>> + xvld xr22, sp, 13*SZREG + 22*SZXREG
>>> + xvld xr23, sp, 13*SZREG + 23*SZXREG
>>> + xvld xr24, sp, 13*SZREG + 24*SZXREG
>>> + xvld xr25, sp, 13*SZREG + 25*SZXREG
>>> + xvld xr26, sp, 13*SZREG + 26*SZXREG
>>> + xvld xr27, sp, 13*SZREG + 27*SZXREG
>>> + xvld xr28, sp, 13*SZREG + 28*SZXREG
>>> + xvld xr29, sp, 13*SZREG + 29*SZXREG
>>> + xvld xr30, sp, 13*SZREG + 30*SZXREG
>>> + xvld xr31, sp, 13*SZREG + 31*SZXREG
>>> + REG_L t0, sp, 32*SZXREG
>>> + movgr2fcsr fcsr0, t0
>>> +#elif defined USE_LSX
>>> + vld vr0, sp, 13*SZREG + 0*SZVREG
>>> + vld vr1, sp, 13*SZREG + 1*SZVREG
>>> + vld vr2, sp, 13*SZREG + 2*SZVREG
>>> + vld vr3, sp, 13*SZREG + 3*SZVREG
>>> + vld vr4, sp, 13*SZREG + 4*SZVREG
>>> + vld vr5, sp, 13*SZREG + 5*SZVREG
>>> + vld vr6, sp, 13*SZREG + 6*SZVREG
>>> + vld vr7, sp, 13*SZREG + 7*SZVREG
>>> + vld vr8, sp, 13*SZREG + 8*SZVREG
>>> + vld vr9, sp, 13*SZREG + 9*SZVREG
>>> + vld vr10, sp, 13*SZREG + 10*SZVREG
>>> + vld vr11, sp, 13*SZREG + 11*SZVREG
>>> + vld vr12, sp, 13*SZREG + 12*SZVREG
>>> + vld vr13, sp, 13*SZREG + 13*SZVREG
>>> + vld vr14, sp, 13*SZREG + 14*SZVREG
>>> + vld vr15, sp, 13*SZREG + 15*SZVREG
>>> + vld vr16, sp, 13*SZREG + 16*SZVREG
>>> + vld vr17, sp, 13*SZREG + 17*SZVREG
>>> + vld vr18, sp, 13*SZREG + 18*SZVREG
>>> + vld vr19, sp, 13*SZREG + 19*SZVREG
>>> + vld vr20, sp, 13*SZREG + 20*SZVREG
>>> + vld vr21, sp, 13*SZREG + 21*SZVREG
>>> + vld vr22, sp, 13*SZREG + 22*SZVREG
>>> + vld vr23, sp, 13*SZREG + 23*SZVREG
>>> + vld vr24, sp, 13*SZREG + 24*SZVREG
>>> + vld vr25, sp, 13*SZREG + 25*SZVREG
>>> + vld vr26, sp, 13*SZREG + 26*SZVREG
>>> + vld vr27, sp, 13*SZREG + 27*SZVREG
>>> + vld vr28, sp, 13*SZREG + 28*SZVREG
>>> + vld vr29, sp, 13*SZREG + 29*SZVREG
>>> + vld vr30, sp, 13*SZREG + 30*SZVREG
>>> + vld vr31, sp, 13*SZREG + 31*SZVREG
>>> + REG_L t0, sp, 32*SZVREG
>>> + movgr2fcsr fcsr0, t0
>>> +#elif !defined __loongarch_soft_float
>>> + FREG_L fa0, sp, 13*SZREG + 0*SZFREG
>>> + FREG_L fa1, sp, 13*SZREG + 1*SZFREG
>>> + FREG_L fa2, sp, 13*SZREG + 2*SZFREG
>>> + FREG_L fa3, sp, 13*SZREG + 3*SZFREG
>>> + FREG_L fa4, sp, 13*SZREG + 4*SZFREG
>>> + FREG_L fa5, sp, 13*SZREG + 5*SZFREG
>>> + FREG_L fa6, sp, 13*SZREG + 6*SZFREG
>>> + FREG_L fa7, sp, 13*SZREG + 7*SZFREG
>>> + FREG_L ft0, sp, 13*SZREG + 8*SZFREG
>>> + FREG_L ft1, sp, 13*SZREG + 9*SZFREG
>>> + FREG_L ft2, sp, 13*SZREG + 10*SZFREG
>>> + FREG_L ft3, sp, 13*SZREG + 11*SZFREG
>>> + FREG_L ft4, sp, 13*SZREG + 12*SZFREG
>>> + FREG_L ft5, sp, 13*SZREG + 13*SZFREG
>>> + FREG_L ft6, sp, 13*SZREG + 14*SZFREG
>>> + FREG_L ft7, sp, 13*SZREG + 15*SZFREG
>>> + FREG_L ft8, sp, 13*SZREG + 16*SZFREG
>>> + FREG_L ft9, sp, 13*SZREG + 17*SZFREG
>>> + FREG_L ft10, sp, 13*SZREG + 18*SZFREG
>>> + FREG_L ft11, sp, 13*SZREG + 19*SZFREG
>>> + FREG_L ft12, sp, 13*SZREG + 20*SZFREG
>>> + FREG_L ft13, sp, 13*SZREG + 21*SZFREG
>>> + FREG_L ft14, sp, 13*SZREG + 22*SZFREG
>>> + FREG_L ft15, sp, 13*SZREG + 23*SZFREG
>>> + REG_L t0, sp, 24*SZFREG
>>> + movgr2fcsr fcsr0, t0
>>> +#endif /* #ifdef USE_LASX */
>>> +
>>> + ADDI sp, sp, FRAME_SIZE
>>> + b Lret
>>> + cfi_endproc
>>> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
>>> +#endif /* #ifdef SHARED */
>>> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
>>> new file mode 100644
>>> index 0000000000..4a17079169
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/dl-tlsdesc.S
>>> @@ -0,0 +1,93 @@
>>> +/* Thread-local storage handling in the ELF dynamic linker.
>>> + LoongArch version.
>>> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> Update Copyright years to 2024.
>>
>>> +
>>> + This file is part of the GNU C Library.
>>> +
>>> + The GNU C Library is free software; you can redistribute it and/or
>>> + modify it under the terms of the GNU Lesser General Public
>>> + License as published by the Free Software Foundation; either
>>> + version 2.1 of the License, or (at your option) any later version.
>>> +
>>> + The GNU C Library is distributed in the hope that it will be useful,
>>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>>> + Lesser General Public License for more details.
>>> +
>>> + You should have received a copy of the GNU Lesser General Public
>>> + License along with the GNU C Library; if not, see
>>> + <https://www.gnu.org/licenses/>. */
>>> +
>>> +#include <sysdep.h>
>>> +#include <tls.h>
>>> +#include "tlsdesc.h"
>>> +
>>> + .text
>>> +
>>> + /* Compute the thread pointer offset for symbols in the static
>>> + TLS block. The offset is the same for all threads.
>>> + Prototype:
>>> + _dl_tlsdesc_return (tlsdesc *); */
>>> + .hidden _dl_tlsdesc_return
>>> + .global _dl_tlsdesc_return
>>> + .type _dl_tlsdesc_return,%function
>>> + cfi_startproc
>>> + .align 2
>>> +_dl_tlsdesc_return:
>>> + REG_L a0, a0, 8
>>> + RET
>>> + cfi_endproc
>>> + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
>>> +
>>> + /* Handler for undefined weak TLS symbols.
>>> + Prototype:
>>> + _dl_tlsdesc_undefweak (tlsdesc *);
>>> +
>>> + The second word of the descriptor contains the addend.
>>> + Return the addend minus the thread pointer. This ensures
>>> + that when the caller adds on the thread pointer it gets back
>>> + the addend. */
>>> + .hidden _dl_tlsdesc_undefweak
>>> + .global _dl_tlsdesc_undefweak
>>> + .type _dl_tlsdesc_undefweak,%function
>>> + cfi_startproc
>>> + .align 2
>>> +_dl_tlsdesc_undefweak:
>>> + REG_L a0, a0, 8
>>> + sub.d a0, a0, tp
>>> + RET
>>> + cfi_endproc
>>> + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>>> +
>>> +
>>> +#ifdef SHARED
>>> +
>>> +#if !defined __loongarch_soft_float
>>> +
>>> +#define USE_LASX
>>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
>>> +#define Lret Lret_lasx
>>> +#define Lslow Lslow_lasx
>>> +#include "dl-tlsdesc-dynamic.h"
>>> +#undef FRAME_SIZE
>>> +#undef USE_LASX
>>> +#undef _dl_tlsdesc_dynamic
>>> +#undef Lret
>>> +#undef Lslow
>>> +
>>> +#define USE_LSX
>>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
>>> +#define Lret Lret_lsx
>>> +#define Lslow Lslow_lsx
>>> +#include "dl-tlsdesc-dynamic.h"
>>> +#undef FRAME_SIZE
>>> +#undef USE_LSX
>>> +#undef _dl_tlsdesc_dynamic
>>> +#undef Lret
>>> +#undef Lslow
>>> +
>>> +#endif
>>> +
>>> +#include "dl-tlsdesc-dynamic.h"
>>> +
>>> +#endif /* #ifdef SHARED */
>>> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
>>> new file mode 100644
>>> index 0000000000..988037a714
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/dl-tlsdesc.h
>>> @@ -0,0 +1,53 @@
>>> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
>>> + LoongArch version.
>>> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
>>> +
>>> + This file is part of the GNU C Library.
>>> +
>>> + The GNU C Library is free software; you can redistribute it and/or
>>> + modify it under the terms of the GNU Lesser General Public
>>> + License as published by the Free Software Foundation; either
>>> + version 2.1 of the License, or (at your option) any later version.
>>> +
>>> + The GNU C Library is distributed in the hope that it will be useful,
>>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>>> + Lesser General Public License for more details.
>>> +
>>> + You should have received a copy of the GNU Lesser General Public
>>> + License along with the GNU C Library; if not, see
>>> + <https://www.gnu.org/licenses/>. */
>>> +
>>> +#ifndef _DL_TLSDESC_H
>>> +#define _DL_TLSDESC_H
>>> +
>>> +#include <dl-tls.h>
>>> +
>>> +/* Type used to represent a TLS descriptor in the GOT. */
>>> +struct tlsdesc
>>> +{
>>> + ptrdiff_t (*entry) (struct tlsdesc *);
>>> + void *arg;
>>> +};
>>> +
>>> +/* Type used as the argument in a TLS descriptor for a symbol that
>>> + needs dynamic TLS offsets. */
>>> +struct tlsdesc_dynamic_arg
>>> +{
>>> + tls_index tlsinfo;
>>> + size_t gen_count;
>>> +};
>>> +
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
>>> +
>>> +# ifdef SHARED
>>> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
>>> +#if !defined __loongarch_soft_float
>> Minor style, usually for single tests we use '#ifndef' and add
>> attribute_hidden at the end of prototype.
>>
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
>>> +#endif
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
>>> +#endif
>>> +
>>> +#endif
>>> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
>>> index 4d8737ee7f..9b1773634c 100644
>>> --- a/sysdeps/loongarch/linkmap.h
>>> +++ b/sysdeps/loongarch/linkmap.h
>>> @@ -19,4 +19,5 @@
>>> struct link_map_machine
>>> {
>>> ElfW (Addr) plt; /* Address of .plt. */
>>> + void *tlsdesc_table; /* Address of TLS descriptor hash table. */
>>> };
>>> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
>>> index 51521a7eb4..23c1d12914 100644
>>> --- a/sysdeps/loongarch/sys/asm.h
>>> +++ b/sysdeps/loongarch/sys/asm.h
>>> @@ -25,6 +25,7 @@
>>> /* Macros to handle different pointer/register sizes for 32/64-bit code. */
>>> #define SZREG 8
>>> #define SZFREG 8
>>> +#define SZFCSREG 4
>>> #define SZVREG 16
>>> #define SZXREG 32
>>> #define REG_L ld.d
>>> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
>>> index f61ee25b25..80ce3e9c00 100644
>>> --- a/sysdeps/loongarch/sys/regdef.h
>>> +++ b/sysdeps/loongarch/sys/regdef.h
>>> @@ -97,6 +97,7 @@
>>> #define fcc5 $fcc5
>>> #define fcc6 $fcc6
>>> #define fcc7 $fcc7
>>> +#define fcsr0 $fcsr0
>>> #define vr0 $vr0
>>> #define vr1 $vr1
>>> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
>>> new file mode 100644
>>> index 0000000000..a357e7619f
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/tlsdesc.c
>>> @@ -0,0 +1,39 @@
>>> +/* Manage TLS descriptors. AArch64 version.
>>> +
>>> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> Update Copyright years to 2024 and remove the 'AArch64'.
>>
>>
>>> +
>>> + This file is part of the GNU C Library.
>>> +
>>> + The GNU C Library is free software; you can redistribute it and/or
>>> + modify it under the terms of the GNU Lesser General Public
>>> + License as published by the Free Software Foundation; either
>>> + version 2.1 of the License, or (at your option) any later version.
>>> +
>>> + The GNU C Library is distributed in the hope that it will be useful,
>>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>>> + Lesser General Public License for more details.
>>> +
>>> + You should have received a copy of the GNU Lesser General Public
>>> + License along with the GNU C Library; if not, see
>>> + <https://www.gnu.org/licenses/>. */
>>> +
>>> +#include <ldsodefs.h>
>>> +#include <tls.h>
>>> +#include <dl-tlsdesc.h>
>>> +#include <dl-unmap-segments.h>
>>> +#include <tlsdeschtab.h>
>>> +
>>> +/* Unmap the dynamic object, but also release its TLS descriptor table
>>> + if there is one. */
>>> +
>>> +void
>>> +_dl_unmap (struct link_map *map)
>>> +{
>>> + _dl_unmap_segments (map);
>>> +
>>> +#ifdef SHARED
>>> + if (map->l_mach.tlsdesc_table)
>>> + htab_delete (map->l_mach.tlsdesc_table);
>>> +#endif
>>> +}
>>> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
>>> new file mode 100644
>>> index 0000000000..bcab218631
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/tlsdesc.sym
>>> @@ -0,0 +1,19 @@
>>> +#include <stddef.h>
>>> +#include <sysdep.h>
>>> +#include <tls.h>
>>> +#include <link.h>
>>> +#include <dl-tlsdesc.h>
>>> +
>>> +--
>>> +
>>> +-- Abuse tls.h macros to derive offsets relative to the thread register.
>>> +
>>> +TLSDESC_ARG offsetof(struct tlsdesc, arg)
>>> +TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count)
>>> +TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
>>> +TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
>>> +TCBHEAD_DTV offsetof(tcbhead_t, dtv)
>>> +DTV_COUNTER offsetof(dtv_t, counter)
>>> +TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED
>>> +TLS_DTV_OFFSET TLS_DTV_OFFSET
>>> +SIZE_OF_DTV sizeof(tcbhead_t)
>>> diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>>> index 547b1c1b7f..ec32e6d13f 100644
>>> --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data
>>> +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>>> @@ -5,3 +5,5 @@ libc.so: calloc
>>> libc.so: free
>>> libc.so: malloc
>>> libc.so: realloc
>>> +# The dynamic loader needs __tls_get_addr for TLS.
>>> +ld.so: __tls_get_addr
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] LoongArch: Add support for TLS Descriptors
2024-03-08 14:10 ` Adhemerval Zanella Netto
@ 2024-03-11 8:45 ` mengqinggang
0 siblings, 0 replies; 8+ messages in thread
From: mengqinggang @ 2024-03-11 8:45 UTC (permalink / raw)
To: Adhemerval Zanella Netto, libc-alpha
Cc: xuchenghua, caiyinyu, chenglulu, cailulu, xry111, i.swmail,
maskray, luweining, wanglei, hejinyang
Thanks, I will first complete the gcc patch as soon as possible.
在 2024/3/8 下午10:10, Adhemerval Zanella Netto 写道:
>
> On 08/03/24 04:53, mengqinggang wrote:
>> Thanks a lot for the review! A new v3 version patch has been sent.
>> https://sourceware.org/pipermail/libc-alpha/2024-March/155204.html
>>
>>
>> And some reply below.
>>
>>
> From the gcc enablement patch it seems that you are using the aarch64
> ABI naming, -mtls-dialect={desc,trad}. So I would suggest to check if
> there is no regression with my patch to enable TLS descriptor for
> -mtls=desc [1].
>
> You might also extend the testing to check for possible vector register
> wrong save/restore, as I did to check for arm32 ones [2] (check
> the sysdeps/arm/tst-gnu2-tls2.h).
>
> Also, I think this patch should be pushed only after gcc enablement
> is installed.
>
>
> [1] https://patchwork.sourceware.org/project/glibc/patch/20240229194642.3398122-4-adhemerval.zanella@linaro.org/
> [2] https://patchwork.sourceware.org/project/glibc/patch/20240229194642.3398122-3-adhemerval.zanella@linaro.org/
>
>> 在 2024/3/6 上午3:29, Adhemerval Zanella Netto 写道:
>>> On 28/02/24 22:43, mengqinggang wrote:
>>>> This is mostly based on AArch64 and RISC-V implementation.
>>>>
>>>> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>>>>
>>>> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
>>>> all vector registers.
>>>> ---
>>>> Changes v1 -> v2:
>>>> - Fix vr24-vr31, xr24-xr31 typo.
>>>> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
>>>> - Save and restore fcsr0 in _dl_tlsdesc_dynamic.
>>>>
>>>> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
>>> Patch looks ok, some comments below.
>>>
>>>> elf/elf.h | 2 +
>>>> sysdeps/loongarch/Makefile | 6 +
>>>> sysdeps/loongarch/dl-link.sym | 1 +
>>>> sysdeps/loongarch/dl-machine.h | 60 ++-
>>>> sysdeps/loongarch/dl-tls.h | 9 +-
>>>> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 341 ++++++++++++++++++
>>>> sysdeps/loongarch/dl-tlsdesc.S | 93 +++++
>>>> sysdeps/loongarch/dl-tlsdesc.h | 53 +++
>>>> sysdeps/loongarch/linkmap.h | 1 +
>>>> sysdeps/loongarch/sys/asm.h | 1 +
>>>> sysdeps/loongarch/sys/regdef.h | 1 +
>>>> sysdeps/loongarch/tlsdesc.c | 39 ++
>>>> sysdeps/loongarch/tlsdesc.sym | 19 +
>>>> .../unix/sysv/linux/loongarch/localplt.data | 2 +
>>>> 14 files changed, 625 insertions(+), 3 deletions(-)
>>>> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>>> create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
>>>> create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
>>>> create mode 100644 sysdeps/loongarch/tlsdesc.c
>>>> create mode 100644 sysdeps/loongarch/tlsdesc.sym
>>>>
>>>> diff --git a/elf/elf.h b/elf/elf.h
>>>> index f2206e5c06..eec24ea049 100644
>>>> --- a/elf/elf.h
>>>> +++ b/elf/elf.h
>>>> @@ -4237,6 +4237,8 @@ enum
>>>> #define R_LARCH_TLS_TPREL32 10
>>>> #define R_LARCH_TLS_TPREL64 11
>>>> #define R_LARCH_IRELATIVE 12
>>>> +#define R_LARCH_TLS_DESC32 13
>>>> +#define R_LARCH_TLS_DESC64 14
>>>> /* Reserved for future relocs that the dynamic linker must understand. */
>>>> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
>>>> index 43d2f583cd..181389e787 100644
>>>> --- a/sysdeps/loongarch/Makefile
>>>> +++ b/sysdeps/loongarch/Makefile
>>>> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
>>>> endif
>>>> ifeq ($(subdir),elf)
>>>> +sysdep-dl-routines += tlsdesc dl-tlsdesc
>>>> gen-as-const-headers += dl-link.sym
>>>> endif
>>>> +ifeq ($(subdir),csu)
>>>> +gen-as-const-headers += tlsdesc.sym
>>>> +endif
>>>> +
>>>> +
>>>> # LoongArch's assembler also needs to know about PIC as it changes the
>>>> # definition of some assembler macros.
>>>> ASFLAGS-.os += $(pic-ccflag)
>>>> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
>>>> index b534968e30..fd81ef37d5 100644
>>>> --- a/sysdeps/loongarch/dl-link.sym
>>>> +++ b/sysdeps/loongarch/dl-link.sym
>>>> @@ -1,6 +1,7 @@
>>>> #include <stddef.h>
>>>> #include <sysdep.h>
>>>> #include <link.h>
>>>> +#include <dl-tlsdesc.h>
>>>> DL_SIZEOF_RG sizeof(struct La_loongarch_regs)
>>>> DL_SIZEOF_RV sizeof(struct La_loongarch_retval)
>>>> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
>>>> index ab81b82d95..8ca6c224f6 100644
>>>> --- a/sysdeps/loongarch/dl-machine.h
>>>> +++ b/sysdeps/loongarch/dl-machine.h
>>>> @@ -25,7 +25,7 @@
>>>> #include <entry.h>
>>>> #include <elf/elf.h>
>>>> #include <sys/asm.h>
>>>> -#include <dl-tls.h>
>>>> +#include <dl-tlsdesc.h>
>>>> #include <dl-static-tls.h>
>>>> #include <dl-machine-rel.h>
>>>> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
>>>> *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
>>>> break;
>>>> + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
>>>> + {
>>>> + struct tlsdesc volatile *td =
>>>> + (struct tlsdesc volatile *)addr_field;
>>>> + if (! sym)
>>>> + {
>>>> + td->arg = (void*)reloc->r_addend;
>>>> + td->entry = _dl_tlsdesc_undefweak;
>>>> + }
>>>> + else
>>>> + {
>>>> +# ifndef SHARED
>>>> + CHECK_STATIC_TLS (map, sym_map);
>>>> +# else
>>>> + if (!TRY_STATIC_TLS (map, sym_map))
>>>> + {
>>>> + td->arg = _dl_make_tlsdesc_dynamic
>>>> + (sym_map, sym->st_value + reloc->r_addend);
>>>> +# if !defined __loongarch_soft_float
>>>> + if (SUPPORT_LASX)
>>>> + td->entry = _dl_tlsdesc_dynamic_lasx;
>>>> + else
>>>> + if (SUPPORT_LSX)
>>>> + td->entry = _dl_tlsdesc_dynamic_lsx;
>>>> + else
>>>> +# endif
>>>> + td->entry = _dl_tlsdesc_dynamic;
>>>> + }
>>>> + else
>>>> +# endif
>>>> + {
>>>> + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
>>>> + + reloc->r_addend);
>>>> + td->entry = _dl_tlsdesc_return;
>>>> + }
>>>> + }
>>>> + break;
>>>> + }
>>>> +
>>>> case R_LARCH_COPY:
>>>> {
>>>> if (sym == NULL)
>>>> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>>>> else
>>>> *reloc_addr = map->l_mach.plt;
>>>> }
>>>> + else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
>>> Use __glibc_likely here.
>>>
>>>> + {
>>>> + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
>>>> + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
>>>> + const ElfW (Sym) *sym = &symtab[symndx];
>>>> + const struct r_found_version *version = NULL;
>>>> +
>>>> + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
>>>> + {
>>>> + const ElfW (Half) *vernum =
>>>> + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
>>>> + version = &map->l_versions[vernum[symndx] & 0x7fff];
>>>> + }
>>>> +
>>>> + /* Always initialize TLS descriptors completely, because lazy
>>>> + initialization requires synchronization at every TLS access. */
>>>> + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
>>>> + skip_ifunc);
>>>> + }
>>>> else
>>>> _dl_reloc_bad_type (map, r_type, 1);
>>>> }
>>>> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
>>>> index 29924b866d..de593c002d 100644
>>>> --- a/sysdeps/loongarch/dl-tls.h
>>>> +++ b/sysdeps/loongarch/dl-tls.h
>>>> @@ -16,6 +16,9 @@
>>>> License along with the GNU C Library. If not, see
>>>> <https://www.gnu.org/licenses/>. */
>>>> +#ifndef _DL_TLS_H
>>>> +#define _DL_TLS_H
>>>> +
>>>> /* Type used for the representation of TLS information in the GOT. */
>>>> typedef struct
>>>> {
>>>> @@ -23,6 +26,8 @@ typedef struct
>>>> unsigned long int ti_offset;
>>>> } tls_index;
>>>> +extern void *__tls_get_addr (tls_index *ti);
>>>> +
>>>> /* The thread pointer points to the first static TLS block. */
>>>> #define TLS_TP_OFFSET 0
>>>> @@ -37,10 +42,10 @@ typedef struct
>>>> /* Compute the value for a DTPREL reloc. */
>>>> #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>>>> -extern void *__tls_get_addr (tls_index *ti);
>>>> -
>>> Why move the function prototype?
>>
>> Maybe just want to take it out of a bunch of macros.
>>
>>
>>>> #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
>>>> #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>>>> /* Value used for dtv entries for which the allocation is delayed. */
>>>> #define TLS_DTV_UNALLOCATED ((void *) -1l)
>>>> +
>>>> +#endif
>>>> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>>> new file mode 100644
>>>> index 0000000000..0d8c9bb991
>>>> --- /dev/null
>>>> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>>> @@ -0,0 +1,341 @@
>>>> +/* Thread-local storage handling in the ELF dynamic linker.
>>>> + LoongArch version.
>>>> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
>>> Update Copyright years to 2024.
>>>
>>>> +
>>>> + This file is part of the GNU C Library.
>>>> +
>>>> + The GNU C Library is free software; you can redistribute it and/or
>>>> + modify it under the terms of the GNU Lesser General Public
>>>> + License as published by the Free Software Foundation; either
>>>> + version 2.1 of the License, or (at your option) any later version.
>>>> +
>>>> + The GNU C Library is distributed in the hope that it will be useful,
>>>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>>>> + Lesser General Public License for more details.
>>>> +
>>>> + You should have received a copy of the GNU Lesser General Public
>>>> + License along with the GNU C Library; if not, see
>>>> + <https://www.gnu.org/licenses/>. */
>>>> +
>>>> +#ifdef USE_LASX
>>>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
>>>> +#elif defined USE_LSX
>>>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
>>>> +#elif !defined __loongarch_soft_float
>>>> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
>>>> +#else
>>>> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
>>>> +#endif
>>> I don't have a strong opinion, but another option that might be simpler it
>>> to provide only only one _dl_tlsdesc_dynamic implementation and check the
>>> required save/restore of vector register based on hwcap value.
>>
>> The v3 patch provides only one _dl_tlsdesc_dynamic implementation.
>>
>>
>>>> +
>>>> +#ifdef SHARED
>>>> + /* Handler for dynamic TLS symbols.
>>>> + Prototype:
>>>> + _dl_tlsdesc_dynamic (tlsdesc *) ;
>>>> +
>>>> + The second word of the descriptor points to a
>>>> + tlsdesc_dynamic_arg structure.
>>>> +
>>>> + Returns the offset between the thread pointer and the
>>>> + object referenced by the argument.
>>>> +
>>>> + ptrdiff_t
>>>> + __attribute__ ((__regparm__ (1)))
>>> Does this attribute really make sense for loongarch?
>>
>> This line has been deleted.
>>
>>
>>>> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
>>>> + {
>>>> + struct tlsdesc_dynamic_arg *td = tdp->arg;
>>>> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
>>>> + if (__builtin_expect (td->gen_count <= dtv[0].counter
>>> Use __glibc_unlikely or just remove the __builtin_expect for clarity.
>>>
>>>> + && (dtv[td->tlsinfo.ti_module].pointer.val
>>>> + != TLS_DTV_UNALLOCATED),
>>>> + 1))
>>>> + return dtv[td->tlsinfo.ti_module].pointer.val
>>>> + + td->tlsinfo.ti_offset
>>>> + - __thread_pointer;
>>>> +
>>>> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
>>>> + }
>>>> + */
>>>> + .hidden _dl_tlsdesc_dynamic
>>>> + .global _dl_tlsdesc_dynamic
>>>> + .type _dl_tlsdesc_dynamic,%function
>>>> + cfi_startproc
>>>> + .align 2
>>>> +_dl_tlsdesc_dynamic:
>>>> + /* Save just enough registers to support fast path, if we fall
>>>> + into slow path we will save additional registers. */
>>>> + ADDI sp, sp,-24
>>>> + REG_S t0, sp, 0
>>>> + REG_S t1, sp, 8
>>>> + REG_S t2, sp, 16
>>>> +
>>>> + REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start
>>>> + REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg
>>>> + REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
>>>> + REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter
>>>> + bltu t2, t1, Lslow
>>>> +
>>>> + REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module
>>>> + slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
>>>> + add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t)
>>>> + REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
>>>> + li.d t2, TLS_DTV_UNALLOCATED
>>>> + beq t1, t2, Lslow
>>>> + REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
>>>> + # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
>>>> + add.d a0, t1, t2
>>>> +Lret:
>>>> + sub.d a0, a0, tp
>>>> + REG_L t0, sp, 0
>>>> + REG_L t1, sp, 8
>>>> + REG_L t2, sp, 16
>>>> + ADDI sp, sp, 24
>>>> + RET
>>>> +
>>>> +Lslow:
>>>> + /* This is the slow path. We need to call __tls_get_addr() which
>>>> + means we need to save and restore all the register that the
>>>> + callee will trash. */
>>>> +
>>>> + /* Save the remaining registers that we must treat as caller save. */
>>>> + ADDI sp, sp, -FRAME_SIZE
>>>> + REG_S ra, sp, 0 * SZREG
>>>> + REG_S a1, sp, 1 * SZREG
>>>> + REG_S a2, sp, 2 * SZREG
>>>> + REG_S a3, sp, 3 * SZREG
>>>> + REG_S a4, sp, 4 * SZREG
>>>> + REG_S a5, sp, 5 * SZREG
>>>> + REG_S a6, sp, 6 * SZREG
>>>> + REG_S a7, sp, 7 * SZREG
>>>> + REG_S t4, sp, 8 * SZREG
>>>> + REG_S t5, sp, 9 * SZREG
>>>> + REG_S t6, sp, 10 * SZREG
>>>> + REG_S t7, sp, 11 * SZREG
>>>> + REG_S t8, sp, 12 * SZREG
>>>> +
>>>> +#ifdef USE_LASX
>>>> + xvst xr0, sp, 13*SZREG + 0*SZXREG
>>>> + xvst xr1, sp, 13*SZREG + 1*SZXREG
>>>> + xvst xr2, sp, 13*SZREG + 2*SZXREG
>>>> + xvst xr3, sp, 13*SZREG + 3*SZXREG
>>>> + xvst xr4, sp, 13*SZREG + 4*SZXREG
>>>> + xvst xr5, sp, 13*SZREG + 5*SZXREG
>>>> + xvst xr6, sp, 13*SZREG + 6*SZXREG
>>>> + xvst xr7, sp, 13*SZREG + 7*SZXREG
>>>> + xvst xr8, sp, 13*SZREG + 8*SZXREG
>>>> + xvst xr9, sp, 13*SZREG + 9*SZXREG
>>>> + xvst xr10, sp, 13*SZREG + 10*SZXREG
>>>> + xvst xr11, sp, 13*SZREG + 11*SZXREG
>>>> + xvst xr12, sp, 13*SZREG + 12*SZXREG
>>>> + xvst xr13, sp, 13*SZREG + 13*SZXREG
>>>> + xvst xr14, sp, 13*SZREG + 14*SZXREG
>>>> + xvst xr15, sp, 13*SZREG + 15*SZXREG
>>>> + xvst xr16, sp, 13*SZREG + 16*SZXREG
>>>> + xvst xr17, sp, 13*SZREG + 17*SZXREG
>>>> + xvst xr18, sp, 13*SZREG + 18*SZXREG
>>>> + xvst xr19, sp, 13*SZREG + 19*SZXREG
>>>> + xvst xr20, sp, 13*SZREG + 20*SZXREG
>>>> + xvst xr21, sp, 13*SZREG + 21*SZXREG
>>>> + xvst xr22, sp, 13*SZREG + 22*SZXREG
>>>> + xvst xr23, sp, 13*SZREG + 23*SZXREG
>>>> + xvst xr24, sp, 13*SZREG + 24*SZXREG
>>>> + xvst xr25, sp, 13*SZREG + 25*SZXREG
>>>> + xvst xr26, sp, 13*SZREG + 26*SZXREG
>>>> + xvst xr27, sp, 13*SZREG + 27*SZXREG
>>>> + xvst xr28, sp, 13*SZREG + 28*SZXREG
>>>> + xvst xr29, sp, 13*SZREG + 29*SZXREG
>>>> + xvst xr30, sp, 13*SZREG + 30*SZXREG
>>>> + xvst xr31, sp, 13*SZREG + 31*SZXREG
>>>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>>>> + # some fields in fcsr0
>>>> + movfcsr2gr t0, fcsr0
>>>> + REG_S t0, sp, 32*SZXREG
>>>> +#elif defined USE_LSX
>>>> + vst vr0, sp, 13*SZREG + 0*SZVREG
>>>> + vst vr1, sp, 13*SZREG + 1*SZVREG
>>>> + vst vr2, sp, 13*SZREG + 2*SZVREG
>>>> + vst vr3, sp, 13*SZREG + 3*SZVREG
>>>> + vst vr4, sp, 13*SZREG + 4*SZVREG
>>>> + vst vr5, sp, 13*SZREG + 5*SZVREG
>>>> + vst vr6, sp, 13*SZREG + 6*SZVREG
>>>> + vst vr7, sp, 13*SZREG + 7*SZVREG
>>>> + vst vr8, sp, 13*SZREG + 8*SZVREG
>>>> + vst vr9, sp, 13*SZREG + 9*SZVREG
>>>> + vst vr10, sp, 13*SZREG + 10*SZVREG
>>>> + vst vr11, sp, 13*SZREG + 11*SZVREG
>>>> + vst vr12, sp, 13*SZREG + 12*SZVREG
>>>> + vst vr13, sp, 13*SZREG + 13*SZVREG
>>>> + vst vr14, sp, 13*SZREG + 14*SZVREG
>>>> + vst vr15, sp, 13*SZREG + 15*SZVREG
>>>> + vst vr16, sp, 13*SZREG + 16*SZVREG
>>>> + vst vr17, sp, 13*SZREG + 17*SZVREG
>>>> + vst vr18, sp, 13*SZREG + 18*SZVREG
>>>> + vst vr19, sp, 13*SZREG + 19*SZVREG
>>>> + vst vr20, sp, 13*SZREG + 20*SZVREG
>>>> + vst vr21, sp, 13*SZREG + 21*SZVREG
>>>> + vst vr22, sp, 13*SZREG + 22*SZVREG
>>>> + vst vr23, sp, 13*SZREG + 23*SZVREG
>>>> + vst vr24, sp, 13*SZREG + 24*SZVREG
>>>> + vst vr25, sp, 13*SZREG + 25*SZVREG
>>>> + vst vr26, sp, 13*SZREG + 26*SZVREG
>>>> + vst vr27, sp, 13*SZREG + 27*SZVREG
>>>> + vst vr28, sp, 13*SZREG + 28*SZVREG
>>>> + vst vr29, sp, 13*SZREG + 29*SZVREG
>>>> + vst vr30, sp, 13*SZREG + 30*SZVREG
>>>> + vst vr31, sp, 13*SZREG + 31*SZVREG
>>>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>>>> + # some fields in fcsr0
>>>> + movfcsr2gr t0, fcsr0
>>>> + REG_S t0, sp, 32*SZVREG
>>>> +#elif !defined __loongarch_soft_float
>>>> + FREG_S fa0, sp, 13*SZREG + 0*SZFREG
>>>> + FREG_S fa1, sp, 13*SZREG + 1*SZFREG
>>>> + FREG_S fa2, sp, 13*SZREG + 2*SZFREG
>>>> + FREG_S fa3, sp, 13*SZREG + 3*SZFREG
>>>> + FREG_S fa4, sp, 13*SZREG + 4*SZFREG
>>>> + FREG_S fa5, sp, 13*SZREG + 5*SZFREG
>>>> + FREG_S fa6, sp, 13*SZREG + 6*SZFREG
>>>> + FREG_S fa7, sp, 13*SZREG + 7*SZFREG
>>>> + FREG_S ft0, sp, 13*SZREG + 8*SZFREG
>>>> + FREG_S ft1, sp, 13*SZREG + 9*SZFREG
>>>> + FREG_S ft2, sp, 13*SZREG + 10*SZFREG
>>>> + FREG_S ft3, sp, 13*SZREG + 11*SZFREG
>>>> + FREG_S ft4, sp, 13*SZREG + 12*SZFREG
>>>> + FREG_S ft5, sp, 13*SZREG + 13*SZFREG
>>>> + FREG_S ft6, sp, 13*SZREG + 14*SZFREG
>>>> + FREG_S ft7, sp, 13*SZREG + 15*SZFREG
>>>> + FREG_S ft8, sp, 13*SZREG + 16*SZFREG
>>>> + FREG_S ft9, sp, 13*SZREG + 17*SZFREG
>>>> + FREG_S ft10, sp, 13*SZREG + 18*SZFREG
>>>> + FREG_S ft11, sp, 13*SZREG + 19*SZFREG
>>>> + FREG_S ft12, sp, 13*SZREG + 20*SZFREG
>>>> + FREG_S ft13, sp, 13*SZREG + 21*SZFREG
>>>> + FREG_S ft14, sp, 13*SZREG + 22*SZFREG
>>>> + FREG_S ft15, sp, 13*SZREG + 23*SZFREG
>>>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>>>> + # some fields in fcsr0
>>>> + movfcsr2gr t0, fcsr0
>>>> + REG_S t0, sp, 24*SZFREG
>>>> +#endif /* #ifdef USE_LASX */
>>>> +
>>>> + bl __tls_get_addr
>>>> + ADDI a0, a0, -TLS_DTV_OFFSET
>>>> +
>>>> + REG_L ra, sp, 0
>>>> + REG_L a1, sp, 1 * 8
>>>> + REG_L a2, sp, 2 * 8
>>>> + REG_L a3, sp, 3 * 8
>>>> + REG_L a4, sp, 4 * 8
>>>> + REG_L a5, sp, 5 * 8
>>>> + REG_L a6, sp, 6 * 8
>>>> + REG_L a7, sp, 7 * 8
>>>> + REG_L t4, sp, 8 * 8
>>>> + REG_L t5, sp, 9 * 8
>>>> + REG_L t6, sp, 10 * 8
>>>> + REG_L t7, sp, 11 * 8
>>>> + REG_L t8, sp, 12 * 8
>>>> +
>>>> +#ifdef USE_LASX
>>>> + xvld xr0, sp, 13*SZREG + 0*SZXREG
>>>> + xvld xr1, sp, 13*SZREG + 1*SZXREG
>>>> + xvld xr2, sp, 13*SZREG + 2*SZXREG
>>>> + xvld xr3, sp, 13*SZREG + 3*SZXREG
>>>> + xvld xr4, sp, 13*SZREG + 4*SZXREG
>>>> + xvld xr5, sp, 13*SZREG + 5*SZXREG
>>>> + xvld xr6, sp, 13*SZREG + 6*SZXREG
>>>> + xvld xr7, sp, 13*SZREG + 7*SZXREG
>>>> + xvld xr8, sp, 13*SZREG + 8*SZXREG
>>>> + xvld xr9, sp, 13*SZREG + 9*SZXREG
>>>> + xvld xr10, sp, 13*SZREG + 10*SZXREG
>>>> + xvld xr11, sp, 13*SZREG + 11*SZXREG
>>>> + xvld xr12, sp, 13*SZREG + 12*SZXREG
>>>> + xvld xr13, sp, 13*SZREG + 13*SZXREG
>>>> + xvld xr14, sp, 13*SZREG + 14*SZXREG
>>>> + xvld xr15, sp, 13*SZREG + 15*SZXREG
>>>> + xvld xr16, sp, 13*SZREG + 16*SZXREG
>>>> + xvld xr17, sp, 13*SZREG + 17*SZXREG
>>>> + xvld xr18, sp, 13*SZREG + 18*SZXREG
>>>> + xvld xr19, sp, 13*SZREG + 19*SZXREG
>>>> + xvld xr20, sp, 13*SZREG + 20*SZXREG
>>>> + xvld xr21, sp, 13*SZREG + 21*SZXREG
>>>> + xvld xr22, sp, 13*SZREG + 22*SZXREG
>>>> + xvld xr23, sp, 13*SZREG + 23*SZXREG
>>>> + xvld xr24, sp, 13*SZREG + 24*SZXREG
>>>> + xvld xr25, sp, 13*SZREG + 25*SZXREG
>>>> + xvld xr26, sp, 13*SZREG + 26*SZXREG
>>>> + xvld xr27, sp, 13*SZREG + 27*SZXREG
>>>> + xvld xr28, sp, 13*SZREG + 28*SZXREG
>>>> + xvld xr29, sp, 13*SZREG + 29*SZXREG
>>>> + xvld xr30, sp, 13*SZREG + 30*SZXREG
>>>> + xvld xr31, sp, 13*SZREG + 31*SZXREG
>>>> + REG_L t0, sp, 32*SZXREG
>>>> + movgr2fcsr fcsr0, t0
>>>> +#elif defined USE_LSX
>>>> + vld vr0, sp, 13*SZREG + 0*SZVREG
>>>> + vld vr1, sp, 13*SZREG + 1*SZVREG
>>>> + vld vr2, sp, 13*SZREG + 2*SZVREG
>>>> + vld vr3, sp, 13*SZREG + 3*SZVREG
>>>> + vld vr4, sp, 13*SZREG + 4*SZVREG
>>>> + vld vr5, sp, 13*SZREG + 5*SZVREG
>>>> + vld vr6, sp, 13*SZREG + 6*SZVREG
>>>> + vld vr7, sp, 13*SZREG + 7*SZVREG
>>>> + vld vr8, sp, 13*SZREG + 8*SZVREG
>>>> + vld vr9, sp, 13*SZREG + 9*SZVREG
>>>> + vld vr10, sp, 13*SZREG + 10*SZVREG
>>>> + vld vr11, sp, 13*SZREG + 11*SZVREG
>>>> + vld vr12, sp, 13*SZREG + 12*SZVREG
>>>> + vld vr13, sp, 13*SZREG + 13*SZVREG
>>>> + vld vr14, sp, 13*SZREG + 14*SZVREG
>>>> + vld vr15, sp, 13*SZREG + 15*SZVREG
>>>> + vld vr16, sp, 13*SZREG + 16*SZVREG
>>>> + vld vr17, sp, 13*SZREG + 17*SZVREG
>>>> + vld vr18, sp, 13*SZREG + 18*SZVREG
>>>> + vld vr19, sp, 13*SZREG + 19*SZVREG
>>>> + vld vr20, sp, 13*SZREG + 20*SZVREG
>>>> + vld vr21, sp, 13*SZREG + 21*SZVREG
>>>> + vld vr22, sp, 13*SZREG + 22*SZVREG
>>>> + vld vr23, sp, 13*SZREG + 23*SZVREG
>>>> + vld vr24, sp, 13*SZREG + 24*SZVREG
>>>> + vld vr25, sp, 13*SZREG + 25*SZVREG
>>>> + vld vr26, sp, 13*SZREG + 26*SZVREG
>>>> + vld vr27, sp, 13*SZREG + 27*SZVREG
>>>> + vld vr28, sp, 13*SZREG + 28*SZVREG
>>>> + vld vr29, sp, 13*SZREG + 29*SZVREG
>>>> + vld vr30, sp, 13*SZREG + 30*SZVREG
>>>> + vld vr31, sp, 13*SZREG + 31*SZVREG
>>>> + REG_L t0, sp, 32*SZVREG
>>>> + movgr2fcsr fcsr0, t0
>>>> +#elif !defined __loongarch_soft_float
>>>> + FREG_L fa0, sp, 13*SZREG + 0*SZFREG
>>>> + FREG_L fa1, sp, 13*SZREG + 1*SZFREG
>>>> + FREG_L fa2, sp, 13*SZREG + 2*SZFREG
>>>> + FREG_L fa3, sp, 13*SZREG + 3*SZFREG
>>>> + FREG_L fa4, sp, 13*SZREG + 4*SZFREG
>>>> + FREG_L fa5, sp, 13*SZREG + 5*SZFREG
>>>> + FREG_L fa6, sp, 13*SZREG + 6*SZFREG
>>>> + FREG_L fa7, sp, 13*SZREG + 7*SZFREG
>>>> + FREG_L ft0, sp, 13*SZREG + 8*SZFREG
>>>> + FREG_L ft1, sp, 13*SZREG + 9*SZFREG
>>>> + FREG_L ft2, sp, 13*SZREG + 10*SZFREG
>>>> + FREG_L ft3, sp, 13*SZREG + 11*SZFREG
>>>> + FREG_L ft4, sp, 13*SZREG + 12*SZFREG
>>>> + FREG_L ft5, sp, 13*SZREG + 13*SZFREG
>>>> + FREG_L ft6, sp, 13*SZREG + 14*SZFREG
>>>> + FREG_L ft7, sp, 13*SZREG + 15*SZFREG
>>>> + FREG_L ft8, sp, 13*SZREG + 16*SZFREG
>>>> + FREG_L ft9, sp, 13*SZREG + 17*SZFREG
>>>> + FREG_L ft10, sp, 13*SZREG + 18*SZFREG
>>>> + FREG_L ft11, sp, 13*SZREG + 19*SZFREG
>>>> + FREG_L ft12, sp, 13*SZREG + 20*SZFREG
>>>> + FREG_L ft13, sp, 13*SZREG + 21*SZFREG
>>>> + FREG_L ft14, sp, 13*SZREG + 22*SZFREG
>>>> + FREG_L ft15, sp, 13*SZREG + 23*SZFREG
>>>> + REG_L t0, sp, 24*SZFREG
>>>> + movgr2fcsr fcsr0, t0
>>>> +#endif /* #ifdef USE_LASX */
>>>> +
>>>> + ADDI sp, sp, FRAME_SIZE
>>>> + b Lret
>>>> + cfi_endproc
>>>> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
>>>> +#endif /* #ifdef SHARED */
>>>> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
>>>> new file mode 100644
>>>> index 0000000000..4a17079169
>>>> --- /dev/null
>>>> +++ b/sysdeps/loongarch/dl-tlsdesc.S
>>>> @@ -0,0 +1,93 @@
>>>> +/* Thread-local storage handling in the ELF dynamic linker.
>>>> + LoongArch version.
>>>> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
>>> Update Copyright years to 2024.
>>>
>>>> +
>>>> + This file is part of the GNU C Library.
>>>> +
>>>> + The GNU C Library is free software; you can redistribute it and/or
>>>> + modify it under the terms of the GNU Lesser General Public
>>>> + License as published by the Free Software Foundation; either
>>>> + version 2.1 of the License, or (at your option) any later version.
>>>> +
>>>> + The GNU C Library is distributed in the hope that it will be useful,
>>>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>>>> + Lesser General Public License for more details.
>>>> +
>>>> + You should have received a copy of the GNU Lesser General Public
>>>> + License along with the GNU C Library; if not, see
>>>> + <https://www.gnu.org/licenses/>. */
>>>> +
>>>> +#include <sysdep.h>
>>>> +#include <tls.h>
>>>> +#include "tlsdesc.h"
>>>> +
>>>> + .text
>>>> +
>>>> + /* Compute the thread pointer offset for symbols in the static
>>>> + TLS block. The offset is the same for all threads.
>>>> + Prototype:
>>>> + _dl_tlsdesc_return (tlsdesc *); */
>>>> + .hidden _dl_tlsdesc_return
>>>> + .global _dl_tlsdesc_return
>>>> + .type _dl_tlsdesc_return,%function
>>>> + cfi_startproc
>>>> + .align 2
>>>> +_dl_tlsdesc_return:
>>>> + REG_L a0, a0, 8
>>>> + RET
>>>> + cfi_endproc
>>>> + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
>>>> +
>>>> + /* Handler for undefined weak TLS symbols.
>>>> + Prototype:
>>>> + _dl_tlsdesc_undefweak (tlsdesc *);
>>>> +
>>>> + The second word of the descriptor contains the addend.
>>>> + Return the addend minus the thread pointer. This ensures
>>>> + that when the caller adds on the thread pointer it gets back
>>>> + the addend. */
>>>> + .hidden _dl_tlsdesc_undefweak
>>>> + .global _dl_tlsdesc_undefweak
>>>> + .type _dl_tlsdesc_undefweak,%function
>>>> + cfi_startproc
>>>> + .align 2
>>>> +_dl_tlsdesc_undefweak:
>>>> + REG_L a0, a0, 8
>>>> + sub.d a0, a0, tp
>>>> + RET
>>>> + cfi_endproc
>>>> + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>>>> +
>>>> +
>>>> +#ifdef SHARED
>>>> +
>>>> +#if !defined __loongarch_soft_float
>>>> +
>>>> +#define USE_LASX
>>>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
>>>> +#define Lret Lret_lasx
>>>> +#define Lslow Lslow_lasx
>>>> +#include "dl-tlsdesc-dynamic.h"
>>>> +#undef FRAME_SIZE
>>>> +#undef USE_LASX
>>>> +#undef _dl_tlsdesc_dynamic
>>>> +#undef Lret
>>>> +#undef Lslow
>>>> +
>>>> +#define USE_LSX
>>>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
>>>> +#define Lret Lret_lsx
>>>> +#define Lslow Lslow_lsx
>>>> +#include "dl-tlsdesc-dynamic.h"
>>>> +#undef FRAME_SIZE
>>>> +#undef USE_LSX
>>>> +#undef _dl_tlsdesc_dynamic
>>>> +#undef Lret
>>>> +#undef Lslow
>>>> +
>>>> +#endif
>>>> +
>>>> +#include "dl-tlsdesc-dynamic.h"
>>>> +
>>>> +#endif /* #ifdef SHARED */
>>>> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
>>>> new file mode 100644
>>>> index 0000000000..988037a714
>>>> --- /dev/null
>>>> +++ b/sysdeps/loongarch/dl-tlsdesc.h
>>>> @@ -0,0 +1,53 @@
>>>> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
>>>> + LoongArch version.
>>>> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
>>>> +
>>>> + This file is part of the GNU C Library.
>>>> +
>>>> + The GNU C Library is free software; you can redistribute it and/or
>>>> + modify it under the terms of the GNU Lesser General Public
>>>> + License as published by the Free Software Foundation; either
>>>> + version 2.1 of the License, or (at your option) any later version.
>>>> +
>>>> + The GNU C Library is distributed in the hope that it will be useful,
>>>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>>>> + Lesser General Public License for more details.
>>>> +
>>>> + You should have received a copy of the GNU Lesser General Public
>>>> + License along with the GNU C Library; if not, see
>>>> + <https://www.gnu.org/licenses/>. */
>>>> +
>>>> +#ifndef _DL_TLSDESC_H
>>>> +#define _DL_TLSDESC_H
>>>> +
>>>> +#include <dl-tls.h>
>>>> +
>>>> +/* Type used to represent a TLS descriptor in the GOT. */
>>>> +struct tlsdesc
>>>> +{
>>>> + ptrdiff_t (*entry) (struct tlsdesc *);
>>>> + void *arg;
>>>> +};
>>>> +
>>>> +/* Type used as the argument in a TLS descriptor for a symbol that
>>>> + needs dynamic TLS offsets. */
>>>> +struct tlsdesc_dynamic_arg
>>>> +{
>>>> + tls_index tlsinfo;
>>>> + size_t gen_count;
>>>> +};
>>>> +
>>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
>>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
>>>> +
>>>> +# ifdef SHARED
>>>> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
>>>> +#if !defined __loongarch_soft_float
>>> Minor style, usually for single tests we use '#ifndef' and add
>>> attribute_hidden at the end of prototype.
>>>
>>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
>>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
>>>> +#endif
>>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
>>>> +#endif
>>>> +
>>>> +#endif
>>>> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
>>>> index 4d8737ee7f..9b1773634c 100644
>>>> --- a/sysdeps/loongarch/linkmap.h
>>>> +++ b/sysdeps/loongarch/linkmap.h
>>>> @@ -19,4 +19,5 @@
>>>> struct link_map_machine
>>>> {
>>>> ElfW (Addr) plt; /* Address of .plt. */
>>>> + void *tlsdesc_table; /* Address of TLS descriptor hash table. */
>>>> };
>>>> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
>>>> index 51521a7eb4..23c1d12914 100644
>>>> --- a/sysdeps/loongarch/sys/asm.h
>>>> +++ b/sysdeps/loongarch/sys/asm.h
>>>> @@ -25,6 +25,7 @@
>>>> /* Macros to handle different pointer/register sizes for 32/64-bit code. */
>>>> #define SZREG 8
>>>> #define SZFREG 8
>>>> +#define SZFCSREG 4
>>>> #define SZVREG 16
>>>> #define SZXREG 32
>>>> #define REG_L ld.d
>>>> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
>>>> index f61ee25b25..80ce3e9c00 100644
>>>> --- a/sysdeps/loongarch/sys/regdef.h
>>>> +++ b/sysdeps/loongarch/sys/regdef.h
>>>> @@ -97,6 +97,7 @@
>>>> #define fcc5 $fcc5
>>>> #define fcc6 $fcc6
>>>> #define fcc7 $fcc7
>>>> +#define fcsr0 $fcsr0
>>>> #define vr0 $vr0
>>>> #define vr1 $vr1
>>>> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
>>>> new file mode 100644
>>>> index 0000000000..a357e7619f
>>>> --- /dev/null
>>>> +++ b/sysdeps/loongarch/tlsdesc.c
>>>> @@ -0,0 +1,39 @@
>>>> +/* Manage TLS descriptors. AArch64 version.
>>>> +
>>>> + Copyright (C) 2011-2023 Free Software Foundation, Inc.
>>> Update Copyright years to 2024 and remove the 'AArch64'.
>>>
>>>
>>>> +
>>>> + This file is part of the GNU C Library.
>>>> +
>>>> + The GNU C Library is free software; you can redistribute it and/or
>>>> + modify it under the terms of the GNU Lesser General Public
>>>> + License as published by the Free Software Foundation; either
>>>> + version 2.1 of the License, or (at your option) any later version.
>>>> +
>>>> + The GNU C Library is distributed in the hope that it will be useful,
>>>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>>>> + Lesser General Public License for more details.
>>>> +
>>>> + You should have received a copy of the GNU Lesser General Public
>>>> + License along with the GNU C Library; if not, see
>>>> + <https://www.gnu.org/licenses/>. */
>>>> +
>>>> +#include <ldsodefs.h>
>>>> +#include <tls.h>
>>>> +#include <dl-tlsdesc.h>
>>>> +#include <dl-unmap-segments.h>
>>>> +#include <tlsdeschtab.h>
>>>> +
>>>> +/* Unmap the dynamic object, but also release its TLS descriptor table
>>>> + if there is one. */
>>>> +
>>>> +void
>>>> +_dl_unmap (struct link_map *map)
>>>> +{
>>>> + _dl_unmap_segments (map);
>>>> +
>>>> +#ifdef SHARED
>>>> + if (map->l_mach.tlsdesc_table)
>>>> + htab_delete (map->l_mach.tlsdesc_table);
>>>> +#endif
>>>> +}
>>>> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
>>>> new file mode 100644
>>>> index 0000000000..bcab218631
>>>> --- /dev/null
>>>> +++ b/sysdeps/loongarch/tlsdesc.sym
>>>> @@ -0,0 +1,19 @@
>>>> +#include <stddef.h>
>>>> +#include <sysdep.h>
>>>> +#include <tls.h>
>>>> +#include <link.h>
>>>> +#include <dl-tlsdesc.h>
>>>> +
>>>> +--
>>>> +
>>>> +-- Abuse tls.h macros to derive offsets relative to the thread register.
>>>> +
>>>> +TLSDESC_ARG offsetof(struct tlsdesc, arg)
>>>> +TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count)
>>>> +TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
>>>> +TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
>>>> +TCBHEAD_DTV offsetof(tcbhead_t, dtv)
>>>> +DTV_COUNTER offsetof(dtv_t, counter)
>>>> +TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED
>>>> +TLS_DTV_OFFSET TLS_DTV_OFFSET
>>>> +SIZE_OF_DTV sizeof(tcbhead_t)
>>>> diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>>>> index 547b1c1b7f..ec32e6d13f 100644
>>>> --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data
>>>> +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>>>> @@ -5,3 +5,5 @@ libc.so: calloc
>>>> libc.so: free
>>>> libc.so: malloc
>>>> libc.so: realloc
>>>> +# The dynamic loader needs __tls_get_addr for TLS.
>>>> +ld.so: __tls_get_addr
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2024-03-11 8:45 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-29 1:43 [PATCH v2] LoongArch: Add support for TLS Descriptors mengqinggang
2024-02-29 2:56 ` caiyinyu
2024-03-04 15:42 ` H.J. Lu
2024-03-08 7:45 ` mengqinggang
2024-03-05 19:29 ` Adhemerval Zanella Netto
2024-03-08 7:53 ` mengqinggang
2024-03-08 14:10 ` Adhemerval Zanella Netto
2024-03-11 8:45 ` mengqinggang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).