public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v2] LoongArch: Add support for TLS Descriptors
@ 2024-02-29  1:43 mengqinggang
  2024-02-29  2:56 ` caiyinyu
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: mengqinggang @ 2024-02-29  1:43 UTC (permalink / raw)
  To: libc-alpha
  Cc: adhemerval.zanella, xuchenghua, caiyinyu, chenglulu, cailulu,
	xry111, i.swmail, maskray, luweining, wanglei, hejinyang,
	mengqinggang

This is mostly based on AArch64 and RISC-V implementation.

Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.

For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
all vector registers.
---
Changes v1 -> v2:
- Fix vr24-vr31, xr24-xr31 typo.
- Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
- Save and restore fcsr0 in _dl_tlsdesc_dynamic. 

v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html

 elf/elf.h                                     |   2 +
 sysdeps/loongarch/Makefile                    |   6 +
 sysdeps/loongarch/dl-link.sym                 |   1 +
 sysdeps/loongarch/dl-machine.h                |  60 ++-
 sysdeps/loongarch/dl-tls.h                    |   9 +-
 sysdeps/loongarch/dl-tlsdesc-dynamic.h        | 341 ++++++++++++++++++
 sysdeps/loongarch/dl-tlsdesc.S                |  93 +++++
 sysdeps/loongarch/dl-tlsdesc.h                |  53 +++
 sysdeps/loongarch/linkmap.h                   |   1 +
 sysdeps/loongarch/sys/asm.h                   |   1 +
 sysdeps/loongarch/sys/regdef.h                |   1 +
 sysdeps/loongarch/tlsdesc.c                   |  39 ++
 sysdeps/loongarch/tlsdesc.sym                 |  19 +
 .../unix/sysv/linux/loongarch/localplt.data   |   2 +
 14 files changed, 625 insertions(+), 3 deletions(-)
 create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
 create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
 create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
 create mode 100644 sysdeps/loongarch/tlsdesc.c
 create mode 100644 sysdeps/loongarch/tlsdesc.sym

diff --git a/elf/elf.h b/elf/elf.h
index f2206e5c06..eec24ea049 100644
--- a/elf/elf.h
+++ b/elf/elf.h
@@ -4237,6 +4237,8 @@ enum
 #define R_LARCH_TLS_TPREL32	10
 #define R_LARCH_TLS_TPREL64	11
 #define R_LARCH_IRELATIVE	12
+#define R_LARCH_TLS_DESC32	13
+#define R_LARCH_TLS_DESC64	14
 
 /* Reserved for future relocs that the dynamic linker must understand.  */
 
diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
index 43d2f583cd..181389e787 100644
--- a/sysdeps/loongarch/Makefile
+++ b/sysdeps/loongarch/Makefile
@@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
 endif
 
 ifeq ($(subdir),elf)
+sysdep-dl-routines += tlsdesc dl-tlsdesc
 gen-as-const-headers += dl-link.sym
 endif
 
+ifeq ($(subdir),csu)
+gen-as-const-headers += tlsdesc.sym
+endif
+
+
 # LoongArch's assembler also needs to know about PIC as it changes the
 # definition of some assembler macros.
 ASFLAGS-.os += $(pic-ccflag)
diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
index b534968e30..fd81ef37d5 100644
--- a/sysdeps/loongarch/dl-link.sym
+++ b/sysdeps/loongarch/dl-link.sym
@@ -1,6 +1,7 @@
 #include <stddef.h>
 #include <sysdep.h>
 #include <link.h>
+#include <dl-tlsdesc.h>
 
 DL_SIZEOF_RG            sizeof(struct La_loongarch_regs)
 DL_SIZEOF_RV            sizeof(struct La_loongarch_retval)
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
index ab81b82d95..8ca6c224f6 100644
--- a/sysdeps/loongarch/dl-machine.h
+++ b/sysdeps/loongarch/dl-machine.h
@@ -25,7 +25,7 @@
 #include <entry.h>
 #include <elf/elf.h>
 #include <sys/asm.h>
-#include <dl-tls.h>
+#include <dl-tlsdesc.h>
 #include <dl-static-tls.h>
 #include <dl-machine-rel.h>
 
@@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
       *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
       break;
 
+    case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
+      {
+	struct tlsdesc volatile *td =
+	    (struct tlsdesc volatile *)addr_field;
+	if (! sym)
+	  {
+	    td->arg = (void*)reloc->r_addend;
+	    td->entry = _dl_tlsdesc_undefweak;
+	  }
+	else
+	  {
+# ifndef SHARED
+	    CHECK_STATIC_TLS (map, sym_map);
+# else
+	    if (!TRY_STATIC_TLS (map, sym_map))
+	      {
+		td->arg = _dl_make_tlsdesc_dynamic
+		  (sym_map, sym->st_value + reloc->r_addend);
+# if !defined __loongarch_soft_float
+		if (SUPPORT_LASX)
+		  td->entry = _dl_tlsdesc_dynamic_lasx;
+		else
+		if (SUPPORT_LSX)
+		  td->entry = _dl_tlsdesc_dynamic_lsx;
+		else
+# endif
+		  td->entry = _dl_tlsdesc_dynamic;
+	      }
+	    else
+# endif
+	      {
+		td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
+			    + reloc->r_addend);
+		td->entry = _dl_tlsdesc_return;
+	      }
+	  }
+	break;
+      }
+
     case R_LARCH_COPY:
       {
 	  if (sym == NULL)
@@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
       else
 	*reloc_addr = map->l_mach.plt;
     }
+  else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
+    {
+      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+      const ElfW (Sym) *sym = &symtab[symndx];
+      const struct r_found_version *version = NULL;
+
+      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+	{
+	  const ElfW (Half) *vernum =
+	    (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+	  version = &map->l_versions[vernum[symndx] & 0x7fff];
+	}
+
+      /* Always initialize TLS descriptors completely, because lazy
+	 initialization requires synchronization at every TLS access.  */
+      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
+			skip_ifunc);
+    }
   else
     _dl_reloc_bad_type (map, r_type, 1);
 }
diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
index 29924b866d..de593c002d 100644
--- a/sysdeps/loongarch/dl-tls.h
+++ b/sysdeps/loongarch/dl-tls.h
@@ -16,6 +16,9 @@
    License along with the GNU C Library.  If not, see
    <https://www.gnu.org/licenses/>.  */
 
+#ifndef _DL_TLS_H
+#define _DL_TLS_H
+
 /* Type used for the representation of TLS information in the GOT.  */
 typedef struct
 {
@@ -23,6 +26,8 @@ typedef struct
   unsigned long int ti_offset;
 } tls_index;
 
+extern void *__tls_get_addr (tls_index *ti);
+
 /* The thread pointer points to the first static TLS block.  */
 #define TLS_TP_OFFSET 0
 
@@ -37,10 +42,10 @@ typedef struct
 /* Compute the value for a DTPREL reloc.  */
 #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
 
-extern void *__tls_get_addr (tls_index *ti);
-
 #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
 #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
 
 /* Value used for dtv entries for which the allocation is delayed.  */
 #define TLS_DTV_UNALLOCATED ((void *) -1l)
+
+#endif
diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
new file mode 100644
index 0000000000..0d8c9bb991
--- /dev/null
+++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
@@ -0,0 +1,341 @@
+/* Thread-local storage handling in the ELF dynamic linker.
+   LoongArch version.
+   Copyright (C) 2011-2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifdef USE_LASX
+# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
+#elif defined USE_LSX
+# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
+#elif !defined __loongarch_soft_float
+# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
+#else
+# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
+#endif
+
+#ifdef SHARED
+	/* Handler for dynamic TLS symbols.
+	   Prototype:
+	   _dl_tlsdesc_dynamic (tlsdesc *) ;
+
+	   The second word of the descriptor points to a
+	   tlsdesc_dynamic_arg structure.
+
+	   Returns the offset between the thread pointer and the
+	   object referenced by the argument.
+
+	   ptrdiff_t
+	   __attribute__ ((__regparm__ (1)))
+	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
+	   {
+	     struct tlsdesc_dynamic_arg *td = tdp->arg;
+	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
+	     if (__builtin_expect (td->gen_count <= dtv[0].counter
+		&& (dtv[td->tlsinfo.ti_module].pointer.val
+		    != TLS_DTV_UNALLOCATED),
+		1))
+	       return dtv[td->tlsinfo.ti_module].pointer.val
+		+ td->tlsinfo.ti_offset
+		- __thread_pointer;
+
+	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
+	   }
+	 */
+	.hidden _dl_tlsdesc_dynamic
+	.global	_dl_tlsdesc_dynamic
+	.type	_dl_tlsdesc_dynamic,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_dynamic:
+	/* Save just enough registers to support fast path, if we fall
+	   into slow path we will save additional registers.  */
+	ADDI	sp, sp,-24
+	REG_S	t0, sp, 0
+	REG_S	t1, sp, 8
+	REG_S	t2, sp, 16
+
+	REG_L	t0, tp, -SIZE_OF_DTV	  # dtv(t0) = tp + TCBHEAD_DTV dtv start
+	REG_L	a0, a0, TLSDESC_ARG	  # td(a0) = tdp->arg
+	REG_L	t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
+	REG_L	t2, t0, DTV_COUNTER	  # t2 = dtv[0].counter
+	bltu	t2, t1, Lslow
+
+	REG_L	t1, a0, TLSDESC_MODID	  # t1 = td->tlsinfo.ti_module
+	slli.d	t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
+	add.d	t1, t1, t0    # t1 = dtv + ti_module * sizeof(dtv_t)
+	REG_L	t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
+	li.d	t2, TLS_DTV_UNALLOCATED
+	beq	t1, t2, Lslow
+	REG_L	t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
+	# dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
+	add.d	a0, t1, t2
+Lret:
+	sub.d	a0, a0, tp
+	REG_L	t0, sp, 0
+	REG_L	t1, sp, 8
+	REG_L	t2, sp, 16
+	ADDI	sp, sp, 24
+	RET
+
+Lslow:
+	/* This is the slow path. We need to call __tls_get_addr() which
+	   means we need to save and restore all the register that the
+	   callee will trash.  */
+
+	/* Save the remaining registers that we must treat as caller save.  */
+	ADDI	sp, sp, -FRAME_SIZE
+	REG_S	ra, sp, 0 * SZREG
+	REG_S	a1, sp, 1 * SZREG
+	REG_S	a2, sp, 2 * SZREG
+	REG_S	a3, sp, 3 * SZREG
+	REG_S	a4, sp, 4 * SZREG
+	REG_S	a5, sp, 5 * SZREG
+	REG_S	a6, sp, 6 * SZREG
+	REG_S	a7, sp, 7 * SZREG
+	REG_S	t4, sp, 8 * SZREG
+	REG_S	t5, sp, 9 * SZREG
+	REG_S	t6, sp, 10 * SZREG
+	REG_S	t7, sp, 11 * SZREG
+	REG_S	t8, sp, 12 * SZREG
+
+#ifdef USE_LASX
+	xvst	xr0, sp, 13*SZREG + 0*SZXREG
+	xvst	xr1, sp, 13*SZREG + 1*SZXREG
+	xvst	xr2, sp, 13*SZREG + 2*SZXREG
+	xvst	xr3, sp, 13*SZREG + 3*SZXREG
+	xvst	xr4, sp, 13*SZREG + 4*SZXREG
+	xvst	xr5, sp, 13*SZREG + 5*SZXREG
+	xvst	xr6, sp, 13*SZREG + 6*SZXREG
+	xvst	xr7, sp, 13*SZREG + 7*SZXREG
+	xvst	xr8, sp, 13*SZREG + 8*SZXREG
+	xvst	xr9, sp, 13*SZREG + 9*SZXREG
+	xvst	xr10, sp, 13*SZREG + 10*SZXREG
+	xvst	xr11, sp, 13*SZREG + 11*SZXREG
+	xvst	xr12, sp, 13*SZREG + 12*SZXREG
+	xvst	xr13, sp, 13*SZREG + 13*SZXREG
+	xvst	xr14, sp, 13*SZREG + 14*SZXREG
+	xvst	xr15, sp, 13*SZREG + 15*SZXREG
+	xvst	xr16, sp, 13*SZREG + 16*SZXREG
+	xvst	xr17, sp, 13*SZREG + 17*SZXREG
+	xvst	xr18, sp, 13*SZREG + 18*SZXREG
+	xvst	xr19, sp, 13*SZREG + 19*SZXREG
+	xvst	xr20, sp, 13*SZREG + 20*SZXREG
+	xvst	xr21, sp, 13*SZREG + 21*SZXREG
+	xvst	xr22, sp, 13*SZREG + 22*SZXREG
+	xvst	xr23, sp, 13*SZREG + 23*SZXREG
+	xvst	xr24, sp, 13*SZREG + 24*SZXREG
+	xvst	xr25, sp, 13*SZREG + 25*SZXREG
+	xvst	xr26, sp, 13*SZREG + 26*SZXREG
+	xvst	xr27, sp, 13*SZREG + 27*SZXREG
+	xvst	xr28, sp, 13*SZREG + 28*SZXREG
+	xvst	xr29, sp, 13*SZREG + 29*SZXREG
+	xvst	xr30, sp, 13*SZREG + 30*SZXREG
+	xvst	xr31, sp, 13*SZREG + 31*SZXREG
+	# Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
+	# some fields in fcsr0
+	movfcsr2gr  t0, fcsr0
+	REG_S	    t0, sp, 32*SZXREG
+#elif defined USE_LSX
+	vst	vr0, sp, 13*SZREG + 0*SZVREG
+	vst	vr1, sp, 13*SZREG + 1*SZVREG
+	vst	vr2, sp, 13*SZREG + 2*SZVREG
+	vst	vr3, sp, 13*SZREG + 3*SZVREG
+	vst	vr4, sp, 13*SZREG + 4*SZVREG
+	vst	vr5, sp, 13*SZREG + 5*SZVREG
+	vst	vr6, sp, 13*SZREG + 6*SZVREG
+	vst	vr7, sp, 13*SZREG + 7*SZVREG
+	vst	vr8, sp, 13*SZREG + 8*SZVREG
+	vst	vr9, sp, 13*SZREG + 9*SZVREG
+	vst	vr10, sp, 13*SZREG + 10*SZVREG
+	vst	vr11, sp, 13*SZREG + 11*SZVREG
+	vst	vr12, sp, 13*SZREG + 12*SZVREG
+	vst	vr13, sp, 13*SZREG + 13*SZVREG
+	vst	vr14, sp, 13*SZREG + 14*SZVREG
+	vst	vr15, sp, 13*SZREG + 15*SZVREG
+	vst	vr16, sp, 13*SZREG + 16*SZVREG
+	vst	vr17, sp, 13*SZREG + 17*SZVREG
+	vst	vr18, sp, 13*SZREG + 18*SZVREG
+	vst	vr19, sp, 13*SZREG + 19*SZVREG
+	vst	vr20, sp, 13*SZREG + 20*SZVREG
+	vst	vr21, sp, 13*SZREG + 21*SZVREG
+	vst	vr22, sp, 13*SZREG + 22*SZVREG
+	vst	vr23, sp, 13*SZREG + 23*SZVREG
+	vst	vr24, sp, 13*SZREG + 24*SZVREG
+	vst	vr25, sp, 13*SZREG + 25*SZVREG
+	vst	vr26, sp, 13*SZREG + 26*SZVREG
+	vst	vr27, sp, 13*SZREG + 27*SZVREG
+	vst	vr28, sp, 13*SZREG + 28*SZVREG
+	vst	vr29, sp, 13*SZREG + 29*SZVREG
+	vst	vr30, sp, 13*SZREG + 30*SZVREG
+	vst	vr31, sp, 13*SZREG + 31*SZVREG
+	# Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
+	# some fields in fcsr0
+	movfcsr2gr  t0, fcsr0
+	REG_S	    t0, sp, 32*SZVREG
+#elif !defined __loongarch_soft_float
+	FREG_S	fa0, sp, 13*SZREG + 0*SZFREG
+	FREG_S	fa1, sp, 13*SZREG + 1*SZFREG
+	FREG_S	fa2, sp, 13*SZREG + 2*SZFREG
+	FREG_S	fa3, sp, 13*SZREG + 3*SZFREG
+	FREG_S	fa4, sp, 13*SZREG + 4*SZFREG
+	FREG_S	fa5, sp, 13*SZREG + 5*SZFREG
+	FREG_S	fa6, sp, 13*SZREG + 6*SZFREG
+	FREG_S	fa7, sp, 13*SZREG + 7*SZFREG
+	FREG_S	ft0, sp, 13*SZREG + 8*SZFREG
+	FREG_S	ft1, sp, 13*SZREG + 9*SZFREG
+	FREG_S	ft2, sp, 13*SZREG + 10*SZFREG
+	FREG_S	ft3, sp, 13*SZREG + 11*SZFREG
+	FREG_S	ft4, sp, 13*SZREG + 12*SZFREG
+	FREG_S	ft5, sp, 13*SZREG + 13*SZFREG
+	FREG_S	ft6, sp, 13*SZREG + 14*SZFREG
+	FREG_S	ft7, sp, 13*SZREG + 15*SZFREG
+	FREG_S	ft8, sp, 13*SZREG + 16*SZFREG
+	FREG_S	ft9, sp, 13*SZREG + 17*SZFREG
+	FREG_S	ft10, sp, 13*SZREG + 18*SZFREG
+	FREG_S	ft11, sp, 13*SZREG + 19*SZFREG
+	FREG_S	ft12, sp, 13*SZREG + 20*SZFREG
+	FREG_S	ft13, sp, 13*SZREG + 21*SZFREG
+	FREG_S	ft14, sp, 13*SZREG + 22*SZFREG
+	FREG_S	ft15, sp, 13*SZREG + 23*SZFREG
+	# Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
+	# some fields in fcsr0
+	movfcsr2gr  t0, fcsr0
+	REG_S	    t0, sp, 24*SZFREG
+#endif /* #ifdef USE_LASX  */
+
+	bl	__tls_get_addr
+	ADDI	a0, a0, -TLS_DTV_OFFSET
+
+	REG_L	ra, sp, 0
+	REG_L	a1, sp, 1 * 8
+	REG_L	a2, sp, 2 * 8
+	REG_L	a3, sp, 3 * 8
+	REG_L	a4, sp, 4 * 8
+	REG_L	a5, sp, 5 * 8
+	REG_L	a6, sp, 6 * 8
+	REG_L	a7, sp, 7 * 8
+	REG_L	t4, sp, 8 * 8
+	REG_L	t5, sp, 9 * 8
+	REG_L	t6, sp, 10 * 8
+	REG_L	t7, sp, 11 * 8
+	REG_L	t8, sp, 12 * 8
+
+#ifdef USE_LASX
+	xvld	xr0, sp, 13*SZREG + 0*SZXREG
+	xvld	xr1, sp, 13*SZREG + 1*SZXREG
+	xvld	xr2, sp, 13*SZREG + 2*SZXREG
+	xvld	xr3, sp, 13*SZREG + 3*SZXREG
+	xvld	xr4, sp, 13*SZREG + 4*SZXREG
+	xvld	xr5, sp, 13*SZREG + 5*SZXREG
+	xvld	xr6, sp, 13*SZREG + 6*SZXREG
+	xvld	xr7, sp, 13*SZREG + 7*SZXREG
+	xvld	xr8, sp, 13*SZREG + 8*SZXREG
+	xvld	xr9, sp, 13*SZREG + 9*SZXREG
+	xvld	xr10, sp, 13*SZREG + 10*SZXREG
+	xvld	xr11, sp, 13*SZREG + 11*SZXREG
+	xvld	xr12, sp, 13*SZREG + 12*SZXREG
+	xvld	xr13, sp, 13*SZREG + 13*SZXREG
+	xvld	xr14, sp, 13*SZREG + 14*SZXREG
+	xvld	xr15, sp, 13*SZREG + 15*SZXREG
+	xvld	xr16, sp, 13*SZREG + 16*SZXREG
+	xvld	xr17, sp, 13*SZREG + 17*SZXREG
+	xvld	xr18, sp, 13*SZREG + 18*SZXREG
+	xvld	xr19, sp, 13*SZREG + 19*SZXREG
+	xvld	xr20, sp, 13*SZREG + 20*SZXREG
+	xvld	xr21, sp, 13*SZREG + 21*SZXREG
+	xvld	xr22, sp, 13*SZREG + 22*SZXREG
+	xvld	xr23, sp, 13*SZREG + 23*SZXREG
+	xvld	xr24, sp, 13*SZREG + 24*SZXREG
+	xvld	xr25, sp, 13*SZREG + 25*SZXREG
+	xvld	xr26, sp, 13*SZREG + 26*SZXREG
+	xvld	xr27, sp, 13*SZREG + 27*SZXREG
+	xvld	xr28, sp, 13*SZREG + 28*SZXREG
+	xvld	xr29, sp, 13*SZREG + 29*SZXREG
+	xvld	xr30, sp, 13*SZREG + 30*SZXREG
+	xvld	xr31, sp, 13*SZREG + 31*SZXREG
+	REG_L	t0, sp, 32*SZXREG
+	movgr2fcsr  fcsr0, t0
+#elif defined USE_LSX
+	vld	vr0, sp, 13*SZREG + 0*SZVREG
+	vld	vr1, sp, 13*SZREG + 1*SZVREG
+	vld	vr2, sp, 13*SZREG + 2*SZVREG
+	vld	vr3, sp, 13*SZREG + 3*SZVREG
+	vld	vr4, sp, 13*SZREG + 4*SZVREG
+	vld	vr5, sp, 13*SZREG + 5*SZVREG
+	vld	vr6, sp, 13*SZREG + 6*SZVREG
+	vld	vr7, sp, 13*SZREG + 7*SZVREG
+	vld	vr8, sp, 13*SZREG + 8*SZVREG
+	vld	vr9, sp, 13*SZREG + 9*SZVREG
+	vld	vr10, sp, 13*SZREG + 10*SZVREG
+	vld	vr11, sp, 13*SZREG + 11*SZVREG
+	vld	vr12, sp, 13*SZREG + 12*SZVREG
+	vld	vr13, sp, 13*SZREG + 13*SZVREG
+	vld	vr14, sp, 13*SZREG + 14*SZVREG
+	vld	vr15, sp, 13*SZREG + 15*SZVREG
+	vld	vr16, sp, 13*SZREG + 16*SZVREG
+	vld	vr17, sp, 13*SZREG + 17*SZVREG
+	vld	vr18, sp, 13*SZREG + 18*SZVREG
+	vld	vr19, sp, 13*SZREG + 19*SZVREG
+	vld	vr20, sp, 13*SZREG + 20*SZVREG
+	vld	vr21, sp, 13*SZREG + 21*SZVREG
+	vld	vr22, sp, 13*SZREG + 22*SZVREG
+	vld	vr23, sp, 13*SZREG + 23*SZVREG
+	vld	vr24, sp, 13*SZREG + 24*SZVREG
+	vld	vr25, sp, 13*SZREG + 25*SZVREG
+	vld	vr26, sp, 13*SZREG + 26*SZVREG
+	vld	vr27, sp, 13*SZREG + 27*SZVREG
+	vld	vr28, sp, 13*SZREG + 28*SZVREG
+	vld	vr29, sp, 13*SZREG + 29*SZVREG
+	vld	vr30, sp, 13*SZREG + 30*SZVREG
+	vld	vr31, sp, 13*SZREG + 31*SZVREG
+	REG_L	t0, sp, 32*SZVREG
+	movgr2fcsr  fcsr0, t0
+#elif !defined __loongarch_soft_float
+	FREG_L	fa0, sp, 13*SZREG + 0*SZFREG
+	FREG_L	fa1, sp, 13*SZREG + 1*SZFREG
+	FREG_L	fa2, sp, 13*SZREG + 2*SZFREG
+	FREG_L	fa3, sp, 13*SZREG + 3*SZFREG
+	FREG_L	fa4, sp, 13*SZREG + 4*SZFREG
+	FREG_L	fa5, sp, 13*SZREG + 5*SZFREG
+	FREG_L	fa6, sp, 13*SZREG + 6*SZFREG
+	FREG_L	fa7, sp, 13*SZREG + 7*SZFREG
+	FREG_L	ft0, sp, 13*SZREG + 8*SZFREG
+	FREG_L	ft1, sp, 13*SZREG + 9*SZFREG
+	FREG_L	ft2, sp, 13*SZREG + 10*SZFREG
+	FREG_L	ft3, sp, 13*SZREG + 11*SZFREG
+	FREG_L	ft4, sp, 13*SZREG + 12*SZFREG
+	FREG_L	ft5, sp, 13*SZREG + 13*SZFREG
+	FREG_L	ft6, sp, 13*SZREG + 14*SZFREG
+	FREG_L	ft7, sp, 13*SZREG + 15*SZFREG
+	FREG_L	ft8, sp, 13*SZREG + 16*SZFREG
+	FREG_L	ft9, sp, 13*SZREG + 17*SZFREG
+	FREG_L	ft10, sp, 13*SZREG + 18*SZFREG
+	FREG_L	ft11, sp, 13*SZREG + 19*SZFREG
+	FREG_L	ft12, sp, 13*SZREG + 20*SZFREG
+	FREG_L	ft13, sp, 13*SZREG + 21*SZFREG
+	FREG_L	ft14, sp, 13*SZREG + 22*SZFREG
+	FREG_L	ft15, sp, 13*SZREG + 23*SZFREG
+	REG_L	t0, sp, 24*SZFREG
+	movgr2fcsr  fcsr0, t0
+#endif /* #ifdef USE_LASX  */
+
+	ADDI	sp, sp, FRAME_SIZE
+	b	Lret
+	cfi_endproc
+	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+#endif /* #ifdef SHARED  */
diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
new file mode 100644
index 0000000000..4a17079169
--- /dev/null
+++ b/sysdeps/loongarch/dl-tlsdesc.S
@@ -0,0 +1,93 @@
+/* Thread-local storage handling in the ELF dynamic linker.
+   LoongArch version.
+   Copyright (C) 2011-2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <tls.h>
+#include "tlsdesc.h"
+
+	.text
+
+	/* Compute the thread pointer offset for symbols in the static
+	   TLS block. The offset is the same for all threads.
+	   Prototype:
+	   _dl_tlsdesc_return (tlsdesc *);  */
+	.hidden _dl_tlsdesc_return
+	.global	_dl_tlsdesc_return
+	.type	_dl_tlsdesc_return,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_return:
+	REG_L  a0, a0, 8
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
+
+	/* Handler for undefined weak TLS symbols.
+	   Prototype:
+	   _dl_tlsdesc_undefweak (tlsdesc *);
+
+	   The second word of the descriptor contains the addend.
+	   Return the addend minus the thread pointer. This ensures
+	   that when the caller adds on the thread pointer it gets back
+	   the addend.  */
+	.hidden _dl_tlsdesc_undefweak
+	.global	_dl_tlsdesc_undefweak
+	.type	_dl_tlsdesc_undefweak,%function
+	cfi_startproc
+	.align  2
+_dl_tlsdesc_undefweak:
+	REG_L	a0, a0, 8
+	sub.d	a0, a0, tp
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
+
+
+#ifdef SHARED
+
+#if !defined __loongarch_soft_float
+
+#define USE_LASX
+#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
+#define Lret Lret_lasx
+#define Lslow Lslow_lasx
+#include "dl-tlsdesc-dynamic.h"
+#undef FRAME_SIZE
+#undef USE_LASX
+#undef _dl_tlsdesc_dynamic
+#undef Lret
+#undef Lslow
+
+#define USE_LSX
+#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
+#define Lret Lret_lsx
+#define Lslow Lslow_lsx
+#include "dl-tlsdesc-dynamic.h"
+#undef FRAME_SIZE
+#undef USE_LSX
+#undef _dl_tlsdesc_dynamic
+#undef Lret
+#undef Lslow
+
+#endif
+
+#include "dl-tlsdesc-dynamic.h"
+
+#endif /* #ifdef SHARED  */
diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
new file mode 100644
index 0000000000..988037a714
--- /dev/null
+++ b/sysdeps/loongarch/dl-tlsdesc.h
@@ -0,0 +1,53 @@
+/* Thread-local storage descriptor handling in the ELF dynamic linker.
+   LoongArch version.
+   Copyright (C) 2011-2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _DL_TLSDESC_H
+#define _DL_TLSDESC_H
+
+#include <dl-tls.h>
+
+/* Type used to represent a TLS descriptor in the GOT.  */
+struct tlsdesc
+{
+  ptrdiff_t (*entry) (struct tlsdesc *);
+  void *arg;
+};
+
+/* Type used as the argument in a TLS descriptor for a symbol that
+   needs dynamic TLS offsets.  */
+struct tlsdesc_dynamic_arg
+{
+  tls_index tlsinfo;
+  size_t gen_count;
+};
+
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
+
+# ifdef SHARED
+extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
+#if !defined __loongarch_soft_float
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
+#endif
+extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
+#endif
+
+#endif
diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
index 4d8737ee7f..9b1773634c 100644
--- a/sysdeps/loongarch/linkmap.h
+++ b/sysdeps/loongarch/linkmap.h
@@ -19,4 +19,5 @@
 struct link_map_machine
 {
   ElfW (Addr) plt; /* Address of .plt.  */
+  void *tlsdesc_table;    /* Address of TLS descriptor hash table.  */
 };
diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
index 51521a7eb4..23c1d12914 100644
--- a/sysdeps/loongarch/sys/asm.h
+++ b/sysdeps/loongarch/sys/asm.h
@@ -25,6 +25,7 @@
 /* Macros to handle different pointer/register sizes for 32/64-bit code.  */
 #define SZREG 8
 #define SZFREG 8
+#define SZFCSREG 4
 #define SZVREG 16
 #define SZXREG 32
 #define REG_L ld.d
diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
index f61ee25b25..80ce3e9c00 100644
--- a/sysdeps/loongarch/sys/regdef.h
+++ b/sysdeps/loongarch/sys/regdef.h
@@ -97,6 +97,7 @@
 #define fcc5 $fcc5
 #define fcc6 $fcc6
 #define fcc7 $fcc7
+#define fcsr0 $fcsr0
 
 #define vr0 $vr0
 #define vr1 $vr1
diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
new file mode 100644
index 0000000000..a357e7619f
--- /dev/null
+++ b/sysdeps/loongarch/tlsdesc.c
@@ -0,0 +1,39 @@
+/* Manage TLS descriptors.  AArch64 version.
+
+   Copyright (C) 2011-2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ldsodefs.h>
+#include <tls.h>
+#include <dl-tlsdesc.h>
+#include <dl-unmap-segments.h>
+#include <tlsdeschtab.h>
+
+/* Unmap the dynamic object, but also release its TLS descriptor table
+   if there is one.  */
+
+void
+_dl_unmap (struct link_map *map)
+{
+  _dl_unmap_segments (map);
+
+#ifdef SHARED
+  if (map->l_mach.tlsdesc_table)
+    htab_delete (map->l_mach.tlsdesc_table);
+#endif
+}
diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
new file mode 100644
index 0000000000..bcab218631
--- /dev/null
+++ b/sysdeps/loongarch/tlsdesc.sym
@@ -0,0 +1,19 @@
+#include <stddef.h>
+#include <sysdep.h>
+#include <tls.h>
+#include <link.h>
+#include <dl-tlsdesc.h>
+
+--
+
+-- Abuse tls.h macros to derive offsets relative to the thread register.
+
+TLSDESC_ARG		offsetof(struct tlsdesc, arg)
+TLSDESC_GEN_COUNT	offsetof(struct tlsdesc_dynamic_arg, gen_count)
+TLSDESC_MODID		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
+TLSDESC_MODOFF		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
+TCBHEAD_DTV		offsetof(tcbhead_t, dtv)
+DTV_COUNTER		offsetof(dtv_t, counter)
+TLS_DTV_UNALLOCATED	TLS_DTV_UNALLOCATED
+TLS_DTV_OFFSET		TLS_DTV_OFFSET
+SIZE_OF_DTV		sizeof(tcbhead_t)
diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data
index 547b1c1b7f..ec32e6d13f 100644
--- a/sysdeps/unix/sysv/linux/loongarch/localplt.data
+++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data
@@ -5,3 +5,5 @@ libc.so: calloc
 libc.so: free
 libc.so: malloc
 libc.so: realloc
+# The dynamic loader needs __tls_get_addr for TLS.
+ld.so: __tls_get_addr
-- 
2.36.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] LoongArch: Add support for TLS Descriptors
  2024-02-29  1:43 [PATCH v2] LoongArch: Add support for TLS Descriptors mengqinggang
@ 2024-02-29  2:56 ` caiyinyu
  2024-03-04 15:42 ` H.J. Lu
  2024-03-05 19:29 ` Adhemerval Zanella Netto
  2 siblings, 0 replies; 8+ messages in thread
From: caiyinyu @ 2024-02-29  2:56 UTC (permalink / raw)
  To: mengqinggang, libc-alpha
  Cc: adhemerval.zanella, xuchenghua, chenglulu, cailulu, xry111,
	i.swmail, maskray, luweining, wanglei, hejinyang


在 2024/2/29 上午9:43, mengqinggang 写道:
> This is mostly based on AArch64 and RISC-V implementation.
>
> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>
> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
> all vector registers.
> ---
> Changes v1 -> v2:
> - Fix vr24-vr31, xr24-xr31 typo.
> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
> - Save and restore fcsr0 in _dl_tlsdesc_dynamic.
>
> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
>
>   elf/elf.h                                     |   2 +
>   sysdeps/loongarch/Makefile                    |   6 +
>   sysdeps/loongarch/dl-link.sym                 |   1 +
>   sysdeps/loongarch/dl-machine.h                |  60 ++-
>   sysdeps/loongarch/dl-tls.h                    |   9 +-
>   sysdeps/loongarch/dl-tlsdesc-dynamic.h        | 341 ++++++++++++++++++
>   sysdeps/loongarch/dl-tlsdesc.S                |  93 +++++
>   sysdeps/loongarch/dl-tlsdesc.h                |  53 +++
>   sysdeps/loongarch/linkmap.h                   |   1 +
>   sysdeps/loongarch/sys/asm.h                   |   1 +
>   sysdeps/loongarch/sys/regdef.h                |   1 +
>   sysdeps/loongarch/tlsdesc.c                   |  39 ++
>   sysdeps/loongarch/tlsdesc.sym                 |  19 +
>   .../unix/sysv/linux/loongarch/localplt.data   |   2 +
>   14 files changed, 625 insertions(+), 3 deletions(-)
>   create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>   create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
>   create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
>   create mode 100644 sysdeps/loongarch/tlsdesc.c
>   create mode 100644 sysdeps/loongarch/tlsdesc.sym
>
> diff --git a/elf/elf.h b/elf/elf.h
> index f2206e5c06..eec24ea049 100644
> --- a/elf/elf.h
> +++ b/elf/elf.h
> @@ -4237,6 +4237,8 @@ enum
>   #define R_LARCH_TLS_TPREL32	10
>   #define R_LARCH_TLS_TPREL64	11
>   #define R_LARCH_IRELATIVE	12
> +#define R_LARCH_TLS_DESC32	13
> +#define R_LARCH_TLS_DESC64	14
>   
>   /* Reserved for future relocs that the dynamic linker must understand.  */
>   
> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
> index 43d2f583cd..181389e787 100644
> --- a/sysdeps/loongarch/Makefile
> +++ b/sysdeps/loongarch/Makefile
> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
>   endif
>   
>   ifeq ($(subdir),elf)
> +sysdep-dl-routines += tlsdesc dl-tlsdesc
>   gen-as-const-headers += dl-link.sym
>   endif
>   
> +ifeq ($(subdir),csu)
> +gen-as-const-headers += tlsdesc.sym
> +endif
> +
> +
>   # LoongArch's assembler also needs to know about PIC as it changes the
>   # definition of some assembler macros.
>   ASFLAGS-.os += $(pic-ccflag)
> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
> index b534968e30..fd81ef37d5 100644
> --- a/sysdeps/loongarch/dl-link.sym
> +++ b/sysdeps/loongarch/dl-link.sym
> @@ -1,6 +1,7 @@
>   #include <stddef.h>
>   #include <sysdep.h>
>   #include <link.h>
> +#include <dl-tlsdesc.h>
>   
>   DL_SIZEOF_RG            sizeof(struct La_loongarch_regs)
>   DL_SIZEOF_RV            sizeof(struct La_loongarch_retval)
> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
> index ab81b82d95..8ca6c224f6 100644
> --- a/sysdeps/loongarch/dl-machine.h
> +++ b/sysdeps/loongarch/dl-machine.h
> @@ -25,7 +25,7 @@
>   #include <entry.h>
>   #include <elf/elf.h>
>   #include <sys/asm.h>
> -#include <dl-tls.h>
> +#include <dl-tlsdesc.h>
>   #include <dl-static-tls.h>
>   #include <dl-machine-rel.h>
>   
> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
>         *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
>         break;
>   
> +    case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
> +      {
> +	struct tlsdesc volatile *td =
> +	    (struct tlsdesc volatile *)addr_field;
> +	if (! sym)
Use sym != NULL instead of  ! sym, the same applies to other similar cases.
> +	  {
> +	    td->arg = (void*)reloc->r_addend;
> +	    td->entry = _dl_tlsdesc_undefweak;
> +	  }
> +	else
> +	  {
> +# ifndef SHARED
> +	    CHECK_STATIC_TLS (map, sym_map);
> +# else
> +	    if (!TRY_STATIC_TLS (map, sym_map))
> +	      {
> +		td->arg = _dl_make_tlsdesc_dynamic
> +		  (sym_map, sym->st_value + reloc->r_addend);
> +# if !defined __loongarch_soft_float
> +		if (SUPPORT_LASX)
> +		  td->entry = _dl_tlsdesc_dynamic_lasx;
> +		else
> +		if (SUPPORT_LSX)
> +		  td->entry = _dl_tlsdesc_dynamic_lsx;
> +		else
> +# endif
> +		  td->entry = _dl_tlsdesc_dynamic;
> +	      }
> +	    else
> +# endif
> +	      {
> +		td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
> +			    + reloc->r_addend);
> +		td->entry = _dl_tlsdesc_return;
> +	      }
> +	  }
> +	break;
> +      }
> +
>       case R_LARCH_COPY:
>         {
>   	  if (sym == NULL)
> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>         else
>   	*reloc_addr = map->l_mach.plt;
>       }
> +  else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
Use __glibc_unlikely/likely instead of __builtin_expect, the same 
applies blow.
> +    {
> +      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
> +      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
> +      const ElfW (Sym) *sym = &symtab[symndx];
> +      const struct r_found_version *version = NULL;
> +
> +      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
> +	{
> +	  const ElfW (Half) *vernum =
> +	    (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
> +	  version = &map->l_versions[vernum[symndx] & 0x7fff];
> +	}
> +
> +      /* Always initialize TLS descriptors completely, because lazy
> +	 initialization requires synchronization at every TLS access.  */
> +      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
> +			skip_ifunc);
> +    }
>     else
>       _dl_reloc_bad_type (map, r_type, 1);
>   }
> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
> index 29924b866d..de593c002d 100644
> --- a/sysdeps/loongarch/dl-tls.h
> +++ b/sysdeps/loongarch/dl-tls.h
> @@ -16,6 +16,9 @@
>      License along with the GNU C Library.  If not, see
>      <https://www.gnu.org/licenses/>.  */
>   
> +#ifndef _DL_TLS_H
> +#define _DL_TLS_H
> +
>   /* Type used for the representation of TLS information in the GOT.  */
>   typedef struct
>   {
> @@ -23,6 +26,8 @@ typedef struct
>     unsigned long int ti_offset;
>   } tls_index;
>   
> +extern void *__tls_get_addr (tls_index *ti);
> +
>   /* The thread pointer points to the first static TLS block.  */
>   #define TLS_TP_OFFSET 0
>   
> @@ -37,10 +42,10 @@ typedef struct
>   /* Compute the value for a DTPREL reloc.  */
>   #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>   
> -extern void *__tls_get_addr (tls_index *ti);
> -
>   #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
>   #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>   
>   /* Value used for dtv entries for which the allocation is delayed.  */
>   #define TLS_DTV_UNALLOCATED ((void *) -1l)
> +
> +#endif
> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> new file mode 100644
> index 0000000000..0d8c9bb991
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> @@ -0,0 +1,341 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> +   LoongArch version.
> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifdef USE_LASX
> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
> +#elif defined USE_LSX
> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
> +#elif !defined __loongarch_soft_float
> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
> +#else
> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
> +#endif
> +
> +#ifdef SHARED
> +	/* Handler for dynamic TLS symbols.
> +	   Prototype:
> +	   _dl_tlsdesc_dynamic (tlsdesc *) ;
> +
> +	   The second word of the descriptor points to a
> +	   tlsdesc_dynamic_arg structure.
> +
> +	   Returns the offset between the thread pointer and the
> +	   object referenced by the argument.
> +
> +	   ptrdiff_t
> +	   __attribute__ ((__regparm__ (1)))
> +	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
> +	   {
> +	     struct tlsdesc_dynamic_arg *td = tdp->arg;
> +	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
> +	     if (__builtin_expect (td->gen_count <= dtv[0].counter
> +		&& (dtv[td->tlsinfo.ti_module].pointer.val
> +		    != TLS_DTV_UNALLOCATED),
> +		1))
> +	       return dtv[td->tlsinfo.ti_module].pointer.val
> +		+ td->tlsinfo.ti_offset
> +		- __thread_pointer;
> +
> +	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
> +	   }
> +	 */
> +	.hidden _dl_tlsdesc_dynamic
> +	.global	_dl_tlsdesc_dynamic
> +	.type	_dl_tlsdesc_dynamic,%function
> +	cfi_startproc
> +	.align 2
> +_dl_tlsdesc_dynamic:
> +	/* Save just enough registers to support fast path, if we fall
> +	   into slow path we will save additional registers.  */
> +	ADDI	sp, sp,-24
> +	REG_S	t0, sp, 0
> +	REG_S	t1, sp, 8
> +	REG_S	t2, sp, 16
> +
> +	REG_L	t0, tp, -SIZE_OF_DTV	  # dtv(t0) = tp + TCBHEAD_DTV dtv start
> +	REG_L	a0, a0, TLSDESC_ARG	  # td(a0) = tdp->arg
> +	REG_L	t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
> +	REG_L	t2, t0, DTV_COUNTER	  # t2 = dtv[0].counter
> +	bltu	t2, t1, Lslow
> +
> +	REG_L	t1, a0, TLSDESC_MODID	  # t1 = td->tlsinfo.ti_module
> +	slli.d	t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
> +	add.d	t1, t1, t0    # t1 = dtv + ti_module * sizeof(dtv_t)
> +	REG_L	t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
> +	li.d	t2, TLS_DTV_UNALLOCATED
> +	beq	t1, t2, Lslow
> +	REG_L	t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
> +	# dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
> +	add.d	a0, t1, t2
> +Lret:
> +	sub.d	a0, a0, tp
> +	REG_L	t0, sp, 0
> +	REG_L	t1, sp, 8
> +	REG_L	t2, sp, 16
> +	ADDI	sp, sp, 24
> +	RET
> +
> +Lslow:
> +	/* This is the slow path. We need to call __tls_get_addr() which
> +	   means we need to save and restore all the register that the
> +	   callee will trash.  */
> +
> +	/* Save the remaining registers that we must treat as caller save.  */
> +	ADDI	sp, sp, -FRAME_SIZE
> +	REG_S	ra, sp, 0 * SZREG
> +	REG_S	a1, sp, 1 * SZREG
> +	REG_S	a2, sp, 2 * SZREG
> +	REG_S	a3, sp, 3 * SZREG
> +	REG_S	a4, sp, 4 * SZREG
> +	REG_S	a5, sp, 5 * SZREG
> +	REG_S	a6, sp, 6 * SZREG
> +	REG_S	a7, sp, 7 * SZREG
> +	REG_S	t4, sp, 8 * SZREG
> +	REG_S	t5, sp, 9 * SZREG
> +	REG_S	t6, sp, 10 * SZREG
> +	REG_S	t7, sp, 11 * SZREG
> +	REG_S	t8, sp, 12 * SZREG
> +
> +#ifdef USE_LASX
> +	xvst	xr0, sp, 13*SZREG + 0*SZXREG
> +	xvst	xr1, sp, 13*SZREG + 1*SZXREG
> +	xvst	xr2, sp, 13*SZREG + 2*SZXREG
> +	xvst	xr3, sp, 13*SZREG + 3*SZXREG
> +	xvst	xr4, sp, 13*SZREG + 4*SZXREG
> +	xvst	xr5, sp, 13*SZREG + 5*SZXREG
> +	xvst	xr6, sp, 13*SZREG + 6*SZXREG
> +	xvst	xr7, sp, 13*SZREG + 7*SZXREG
> +	xvst	xr8, sp, 13*SZREG + 8*SZXREG
> +	xvst	xr9, sp, 13*SZREG + 9*SZXREG
> +	xvst	xr10, sp, 13*SZREG + 10*SZXREG
> +	xvst	xr11, sp, 13*SZREG + 11*SZXREG
> +	xvst	xr12, sp, 13*SZREG + 12*SZXREG
> +	xvst	xr13, sp, 13*SZREG + 13*SZXREG
> +	xvst	xr14, sp, 13*SZREG + 14*SZXREG
> +	xvst	xr15, sp, 13*SZREG + 15*SZXREG
> +	xvst	xr16, sp, 13*SZREG + 16*SZXREG
> +	xvst	xr17, sp, 13*SZREG + 17*SZXREG
> +	xvst	xr18, sp, 13*SZREG + 18*SZXREG
> +	xvst	xr19, sp, 13*SZREG + 19*SZXREG
> +	xvst	xr20, sp, 13*SZREG + 20*SZXREG
> +	xvst	xr21, sp, 13*SZREG + 21*SZXREG
> +	xvst	xr22, sp, 13*SZREG + 22*SZXREG
> +	xvst	xr23, sp, 13*SZREG + 23*SZXREG
> +	xvst	xr24, sp, 13*SZREG + 24*SZXREG
> +	xvst	xr25, sp, 13*SZREG + 25*SZXREG
> +	xvst	xr26, sp, 13*SZREG + 26*SZXREG
> +	xvst	xr27, sp, 13*SZREG + 27*SZXREG
> +	xvst	xr28, sp, 13*SZREG + 28*SZXREG
> +	xvst	xr29, sp, 13*SZREG + 29*SZXREG
> +	xvst	xr30, sp, 13*SZREG + 30*SZXREG
> +	xvst	xr31, sp, 13*SZREG + 31*SZXREG
> +	# Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> +	# some fields in fcsr0
> +	movfcsr2gr  t0, fcsr0
> +	REG_S	    t0, sp, 32*SZXREG
> +#elif defined USE_LSX
> +	vst	vr0, sp, 13*SZREG + 0*SZVREG
> +	vst	vr1, sp, 13*SZREG + 1*SZVREG
> +	vst	vr2, sp, 13*SZREG + 2*SZVREG
> +	vst	vr3, sp, 13*SZREG + 3*SZVREG
> +	vst	vr4, sp, 13*SZREG + 4*SZVREG
> +	vst	vr5, sp, 13*SZREG + 5*SZVREG
> +	vst	vr6, sp, 13*SZREG + 6*SZVREG
> +	vst	vr7, sp, 13*SZREG + 7*SZVREG
> +	vst	vr8, sp, 13*SZREG + 8*SZVREG
> +	vst	vr9, sp, 13*SZREG + 9*SZVREG
> +	vst	vr10, sp, 13*SZREG + 10*SZVREG
> +	vst	vr11, sp, 13*SZREG + 11*SZVREG
> +	vst	vr12, sp, 13*SZREG + 12*SZVREG
> +	vst	vr13, sp, 13*SZREG + 13*SZVREG
> +	vst	vr14, sp, 13*SZREG + 14*SZVREG
> +	vst	vr15, sp, 13*SZREG + 15*SZVREG
> +	vst	vr16, sp, 13*SZREG + 16*SZVREG
> +	vst	vr17, sp, 13*SZREG + 17*SZVREG
> +	vst	vr18, sp, 13*SZREG + 18*SZVREG
> +	vst	vr19, sp, 13*SZREG + 19*SZVREG
> +	vst	vr20, sp, 13*SZREG + 20*SZVREG
> +	vst	vr21, sp, 13*SZREG + 21*SZVREG
> +	vst	vr22, sp, 13*SZREG + 22*SZVREG
> +	vst	vr23, sp, 13*SZREG + 23*SZVREG
> +	vst	vr24, sp, 13*SZREG + 24*SZVREG
> +	vst	vr25, sp, 13*SZREG + 25*SZVREG
> +	vst	vr26, sp, 13*SZREG + 26*SZVREG
> +	vst	vr27, sp, 13*SZREG + 27*SZVREG
> +	vst	vr28, sp, 13*SZREG + 28*SZVREG
> +	vst	vr29, sp, 13*SZREG + 29*SZVREG
> +	vst	vr30, sp, 13*SZREG + 30*SZVREG
> +	vst	vr31, sp, 13*SZREG + 31*SZVREG
> +	# Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> +	# some fields in fcsr0
> +	movfcsr2gr  t0, fcsr0
> +	REG_S	    t0, sp, 32*SZVREG
> +#elif !defined __loongarch_soft_float
> +	FREG_S	fa0, sp, 13*SZREG + 0*SZFREG
> +	FREG_S	fa1, sp, 13*SZREG + 1*SZFREG
> +	FREG_S	fa2, sp, 13*SZREG + 2*SZFREG
> +	FREG_S	fa3, sp, 13*SZREG + 3*SZFREG
> +	FREG_S	fa4, sp, 13*SZREG + 4*SZFREG
> +	FREG_S	fa5, sp, 13*SZREG + 5*SZFREG
> +	FREG_S	fa6, sp, 13*SZREG + 6*SZFREG
> +	FREG_S	fa7, sp, 13*SZREG + 7*SZFREG
> +	FREG_S	ft0, sp, 13*SZREG + 8*SZFREG
> +	FREG_S	ft1, sp, 13*SZREG + 9*SZFREG
> +	FREG_S	ft2, sp, 13*SZREG + 10*SZFREG
> +	FREG_S	ft3, sp, 13*SZREG + 11*SZFREG
> +	FREG_S	ft4, sp, 13*SZREG + 12*SZFREG
> +	FREG_S	ft5, sp, 13*SZREG + 13*SZFREG
> +	FREG_S	ft6, sp, 13*SZREG + 14*SZFREG
> +	FREG_S	ft7, sp, 13*SZREG + 15*SZFREG
> +	FREG_S	ft8, sp, 13*SZREG + 16*SZFREG
> +	FREG_S	ft9, sp, 13*SZREG + 17*SZFREG
> +	FREG_S	ft10, sp, 13*SZREG + 18*SZFREG
> +	FREG_S	ft11, sp, 13*SZREG + 19*SZFREG
> +	FREG_S	ft12, sp, 13*SZREG + 20*SZFREG
> +	FREG_S	ft13, sp, 13*SZREG + 21*SZFREG
> +	FREG_S	ft14, sp, 13*SZREG + 22*SZFREG
> +	FREG_S	ft15, sp, 13*SZREG + 23*SZFREG
> +	# Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> +	# some fields in fcsr0
> +	movfcsr2gr  t0, fcsr0
> +	REG_S	    t0, sp, 24*SZFREG
> +#endif /* #ifdef USE_LASX  */
> +
> +	bl	__tls_get_addr
> +	ADDI	a0, a0, -TLS_DTV_OFFSET
> +
> +	REG_L	ra, sp, 0
> +	REG_L	a1, sp, 1 * 8
> +	REG_L	a2, sp, 2 * 8
> +	REG_L	a3, sp, 3 * 8
> +	REG_L	a4, sp, 4 * 8
> +	REG_L	a5, sp, 5 * 8
> +	REG_L	a6, sp, 6 * 8
> +	REG_L	a7, sp, 7 * 8
> +	REG_L	t4, sp, 8 * 8
> +	REG_L	t5, sp, 9 * 8
> +	REG_L	t6, sp, 10 * 8
> +	REG_L	t7, sp, 11 * 8
> +	REG_L	t8, sp, 12 * 8
> +
> +#ifdef USE_LASX
> +	xvld	xr0, sp, 13*SZREG + 0*SZXREG
> +	xvld	xr1, sp, 13*SZREG + 1*SZXREG
> +	xvld	xr2, sp, 13*SZREG + 2*SZXREG
> +	xvld	xr3, sp, 13*SZREG + 3*SZXREG
> +	xvld	xr4, sp, 13*SZREG + 4*SZXREG
> +	xvld	xr5, sp, 13*SZREG + 5*SZXREG
> +	xvld	xr6, sp, 13*SZREG + 6*SZXREG
> +	xvld	xr7, sp, 13*SZREG + 7*SZXREG
> +	xvld	xr8, sp, 13*SZREG + 8*SZXREG
> +	xvld	xr9, sp, 13*SZREG + 9*SZXREG
> +	xvld	xr10, sp, 13*SZREG + 10*SZXREG
> +	xvld	xr11, sp, 13*SZREG + 11*SZXREG
> +	xvld	xr12, sp, 13*SZREG + 12*SZXREG
> +	xvld	xr13, sp, 13*SZREG + 13*SZXREG
> +	xvld	xr14, sp, 13*SZREG + 14*SZXREG
> +	xvld	xr15, sp, 13*SZREG + 15*SZXREG
> +	xvld	xr16, sp, 13*SZREG + 16*SZXREG
> +	xvld	xr17, sp, 13*SZREG + 17*SZXREG
> +	xvld	xr18, sp, 13*SZREG + 18*SZXREG
> +	xvld	xr19, sp, 13*SZREG + 19*SZXREG
> +	xvld	xr20, sp, 13*SZREG + 20*SZXREG
> +	xvld	xr21, sp, 13*SZREG + 21*SZXREG
> +	xvld	xr22, sp, 13*SZREG + 22*SZXREG
> +	xvld	xr23, sp, 13*SZREG + 23*SZXREG
> +	xvld	xr24, sp, 13*SZREG + 24*SZXREG
> +	xvld	xr25, sp, 13*SZREG + 25*SZXREG
> +	xvld	xr26, sp, 13*SZREG + 26*SZXREG
> +	xvld	xr27, sp, 13*SZREG + 27*SZXREG
> +	xvld	xr28, sp, 13*SZREG + 28*SZXREG
> +	xvld	xr29, sp, 13*SZREG + 29*SZXREG
> +	xvld	xr30, sp, 13*SZREG + 30*SZXREG
> +	xvld	xr31, sp, 13*SZREG + 31*SZXREG
> +	REG_L	t0, sp, 32*SZXREG
> +	movgr2fcsr  fcsr0, t0
> +#elif defined USE_LSX
> +	vld	vr0, sp, 13*SZREG + 0*SZVREG
> +	vld	vr1, sp, 13*SZREG + 1*SZVREG
> +	vld	vr2, sp, 13*SZREG + 2*SZVREG
> +	vld	vr3, sp, 13*SZREG + 3*SZVREG
> +	vld	vr4, sp, 13*SZREG + 4*SZVREG
> +	vld	vr5, sp, 13*SZREG + 5*SZVREG
> +	vld	vr6, sp, 13*SZREG + 6*SZVREG
> +	vld	vr7, sp, 13*SZREG + 7*SZVREG
> +	vld	vr8, sp, 13*SZREG + 8*SZVREG
> +	vld	vr9, sp, 13*SZREG + 9*SZVREG
> +	vld	vr10, sp, 13*SZREG + 10*SZVREG
> +	vld	vr11, sp, 13*SZREG + 11*SZVREG
> +	vld	vr12, sp, 13*SZREG + 12*SZVREG
> +	vld	vr13, sp, 13*SZREG + 13*SZVREG
> +	vld	vr14, sp, 13*SZREG + 14*SZVREG
> +	vld	vr15, sp, 13*SZREG + 15*SZVREG
> +	vld	vr16, sp, 13*SZREG + 16*SZVREG
> +	vld	vr17, sp, 13*SZREG + 17*SZVREG
> +	vld	vr18, sp, 13*SZREG + 18*SZVREG
> +	vld	vr19, sp, 13*SZREG + 19*SZVREG
> +	vld	vr20, sp, 13*SZREG + 20*SZVREG
> +	vld	vr21, sp, 13*SZREG + 21*SZVREG
> +	vld	vr22, sp, 13*SZREG + 22*SZVREG
> +	vld	vr23, sp, 13*SZREG + 23*SZVREG
> +	vld	vr24, sp, 13*SZREG + 24*SZVREG
> +	vld	vr25, sp, 13*SZREG + 25*SZVREG
> +	vld	vr26, sp, 13*SZREG + 26*SZVREG
> +	vld	vr27, sp, 13*SZREG + 27*SZVREG
> +	vld	vr28, sp, 13*SZREG + 28*SZVREG
> +	vld	vr29, sp, 13*SZREG + 29*SZVREG
> +	vld	vr30, sp, 13*SZREG + 30*SZVREG
> +	vld	vr31, sp, 13*SZREG + 31*SZVREG
> +	REG_L	t0, sp, 32*SZVREG
> +	movgr2fcsr  fcsr0, t0
> +#elif !defined __loongarch_soft_float
> +	FREG_L	fa0, sp, 13*SZREG + 0*SZFREG
> +	FREG_L	fa1, sp, 13*SZREG + 1*SZFREG
> +	FREG_L	fa2, sp, 13*SZREG + 2*SZFREG
> +	FREG_L	fa3, sp, 13*SZREG + 3*SZFREG
> +	FREG_L	fa4, sp, 13*SZREG + 4*SZFREG
> +	FREG_L	fa5, sp, 13*SZREG + 5*SZFREG
> +	FREG_L	fa6, sp, 13*SZREG + 6*SZFREG
> +	FREG_L	fa7, sp, 13*SZREG + 7*SZFREG
> +	FREG_L	ft0, sp, 13*SZREG + 8*SZFREG
> +	FREG_L	ft1, sp, 13*SZREG + 9*SZFREG
> +	FREG_L	ft2, sp, 13*SZREG + 10*SZFREG
> +	FREG_L	ft3, sp, 13*SZREG + 11*SZFREG
> +	FREG_L	ft4, sp, 13*SZREG + 12*SZFREG
> +	FREG_L	ft5, sp, 13*SZREG + 13*SZFREG
> +	FREG_L	ft6, sp, 13*SZREG + 14*SZFREG
> +	FREG_L	ft7, sp, 13*SZREG + 15*SZFREG
> +	FREG_L	ft8, sp, 13*SZREG + 16*SZFREG
> +	FREG_L	ft9, sp, 13*SZREG + 17*SZFREG
> +	FREG_L	ft10, sp, 13*SZREG + 18*SZFREG
> +	FREG_L	ft11, sp, 13*SZREG + 19*SZFREG
> +	FREG_L	ft12, sp, 13*SZREG + 20*SZFREG
> +	FREG_L	ft13, sp, 13*SZREG + 21*SZFREG
> +	FREG_L	ft14, sp, 13*SZREG + 22*SZFREG
> +	FREG_L	ft15, sp, 13*SZREG + 23*SZFREG
> +	REG_L	t0, sp, 24*SZFREG
> +	movgr2fcsr  fcsr0, t0
> +#endif /* #ifdef USE_LASX  */
> +
> +	ADDI	sp, sp, FRAME_SIZE
> +	b	Lret
> +	cfi_endproc
> +	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
> +#endif /* #ifdef SHARED  */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
> new file mode 100644
> index 0000000000..4a17079169
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc.S
> @@ -0,0 +1,93 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> +   LoongArch version.
> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <tls.h>
> +#include "tlsdesc.h"
> +
> +	.text
> +
> +	/* Compute the thread pointer offset for symbols in the static
> +	   TLS block. The offset is the same for all threads.
> +	   Prototype:
> +	   _dl_tlsdesc_return (tlsdesc *);  */
> +	.hidden _dl_tlsdesc_return
> +	.global	_dl_tlsdesc_return
> +	.type	_dl_tlsdesc_return,%function
> +	cfi_startproc
> +	.align 2
> +_dl_tlsdesc_return:
> +	REG_L  a0, a0, 8
> +	RET
> +	cfi_endproc
> +	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
> +
> +	/* Handler for undefined weak TLS symbols.
> +	   Prototype:
> +	   _dl_tlsdesc_undefweak (tlsdesc *);
> +
> +	   The second word of the descriptor contains the addend.
> +	   Return the addend minus the thread pointer. This ensures
> +	   that when the caller adds on the thread pointer it gets back
> +	   the addend.  */
> +	.hidden _dl_tlsdesc_undefweak
> +	.global	_dl_tlsdesc_undefweak
> +	.type	_dl_tlsdesc_undefweak,%function
> +	cfi_startproc
> +	.align  2
> +_dl_tlsdesc_undefweak:
> +	REG_L	a0, a0, 8
> +	sub.d	a0, a0, tp
> +	RET
> +	cfi_endproc
> +	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
> +
> +
> +#ifdef SHARED
> +
> +#if !defined __loongarch_soft_float
> +
> +#define USE_LASX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
> +#define Lret Lret_lasx
> +#define Lslow Lslow_lasx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LASX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#define USE_LSX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
> +#define Lret Lret_lsx
> +#define Lslow Lslow_lsx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LSX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#endif
> +
> +#include "dl-tlsdesc-dynamic.h"
> +
> +#endif /* #ifdef SHARED  */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
> new file mode 100644
> index 0000000000..988037a714
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc.h
> @@ -0,0 +1,53 @@
> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
> +   LoongArch version.
> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _DL_TLSDESC_H
> +#define _DL_TLSDESC_H
> +
> +#include <dl-tls.h>
> +
> +/* Type used to represent a TLS descriptor in the GOT.  */
> +struct tlsdesc
> +{
> +  ptrdiff_t (*entry) (struct tlsdesc *);
> +  void *arg;
> +};
> +
> +/* Type used as the argument in a TLS descriptor for a symbol that
> +   needs dynamic TLS offsets.  */
> +struct tlsdesc_dynamic_arg
> +{
> +  tls_index tlsinfo;
> +  size_t gen_count;
> +};
> +
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
> +
> +# ifdef SHARED
> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
> +#if !defined __loongarch_soft_float
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
> +#endif
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
> +#endif
> +
> +#endif
> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
> index 4d8737ee7f..9b1773634c 100644
> --- a/sysdeps/loongarch/linkmap.h
> +++ b/sysdeps/loongarch/linkmap.h
> @@ -19,4 +19,5 @@
>   struct link_map_machine
>   {
>     ElfW (Addr) plt; /* Address of .plt.  */
> +  void *tlsdesc_table;    /* Address of TLS descriptor hash table.  */
>   };
> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
> index 51521a7eb4..23c1d12914 100644
> --- a/sysdeps/loongarch/sys/asm.h
> +++ b/sysdeps/loongarch/sys/asm.h
> @@ -25,6 +25,7 @@
>   /* Macros to handle different pointer/register sizes for 32/64-bit code.  */
>   #define SZREG 8
>   #define SZFREG 8
> +#define SZFCSREG 4
>   #define SZVREG 16
>   #define SZXREG 32
>   #define REG_L ld.d
> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
> index f61ee25b25..80ce3e9c00 100644
> --- a/sysdeps/loongarch/sys/regdef.h
> +++ b/sysdeps/loongarch/sys/regdef.h
> @@ -97,6 +97,7 @@
>   #define fcc5 $fcc5
>   #define fcc6 $fcc6
>   #define fcc7 $fcc7
> +#define fcsr0 $fcsr0
>   
>   #define vr0 $vr0
>   #define vr1 $vr1
> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
> new file mode 100644
> index 0000000000..a357e7619f
> --- /dev/null
> +++ b/sysdeps/loongarch/tlsdesc.c
> @@ -0,0 +1,39 @@
> +/* Manage TLS descriptors.  AArch64 version.
> +
> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <ldsodefs.h>
> +#include <tls.h>
> +#include <dl-tlsdesc.h>
> +#include <dl-unmap-segments.h>
> +#include <tlsdeschtab.h>
> +
> +/* Unmap the dynamic object, but also release its TLS descriptor table
> +   if there is one.  */
> +
> +void
> +_dl_unmap (struct link_map *map)
> +{
> +  _dl_unmap_segments (map);
> +
> +#ifdef SHARED
> +  if (map->l_mach.tlsdesc_table)
> +    htab_delete (map->l_mach.tlsdesc_table);
> +#endif
> +}
> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
> new file mode 100644
> index 0000000000..bcab218631
> --- /dev/null
> +++ b/sysdeps/loongarch/tlsdesc.sym
> @@ -0,0 +1,19 @@
> +#include <stddef.h>
> +#include <sysdep.h>
> +#include <tls.h>
> +#include <link.h>
> +#include <dl-tlsdesc.h>
> +
> +--
> +
> +-- Abuse tls.h macros to derive offsets relative to the thread register.
> +
> +TLSDESC_ARG		offsetof(struct tlsdesc, arg)
> +TLSDESC_GEN_COUNT	offsetof(struct tlsdesc_dynamic_arg, gen_count)
> +TLSDESC_MODID		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
> +TLSDESC_MODOFF		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
> +TCBHEAD_DTV		offsetof(tcbhead_t, dtv)
> +DTV_COUNTER		offsetof(dtv_t, counter)
> +TLS_DTV_UNALLOCATED	TLS_DTV_UNALLOCATED
> +TLS_DTV_OFFSET		TLS_DTV_OFFSET
> +SIZE_OF_DTV		sizeof(tcbhead_t)
> diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data
> index 547b1c1b7f..ec32e6d13f 100644
> --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data
> +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data
> @@ -5,3 +5,5 @@ libc.so: calloc
>   libc.so: free
>   libc.so: malloc
>   libc.so: realloc
> +# The dynamic loader needs __tls_get_addr for TLS.
> +ld.so: __tls_get_addr


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] LoongArch: Add support for TLS Descriptors
  2024-02-29  1:43 [PATCH v2] LoongArch: Add support for TLS Descriptors mengqinggang
  2024-02-29  2:56 ` caiyinyu
@ 2024-03-04 15:42 ` H.J. Lu
  2024-03-08  7:45   ` mengqinggang
  2024-03-05 19:29 ` Adhemerval Zanella Netto
  2 siblings, 1 reply; 8+ messages in thread
From: H.J. Lu @ 2024-03-04 15:42 UTC (permalink / raw)
  To: mengqinggang
  Cc: libc-alpha, adhemerval.zanella, xuchenghua, caiyinyu, chenglulu,
	cailulu, xry111, i.swmail, maskray, luweining, wanglei,
	hejinyang

On Wed, Feb 28, 2024 at 5:44 PM mengqinggang <mengqinggang@loongson.cn> wrote:
>
> This is mostly based on AArch64 and RISC-V implementation.
>
> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>
> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
> all vector registers.
> ---
> Changes v1 -> v2:
> - Fix vr24-vr31, xr24-xr31 typo.
> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
> - Save and restore fcsr0 in _dl_tlsdesc_dynamic.
>
> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
>
>  elf/elf.h                                     |   2 +
>  sysdeps/loongarch/Makefile                    |   6 +
>  sysdeps/loongarch/dl-link.sym                 |   1 +
>  sysdeps/loongarch/dl-machine.h                |  60 ++-
>  sysdeps/loongarch/dl-tls.h                    |   9 +-
>  sysdeps/loongarch/dl-tlsdesc-dynamic.h        | 341 ++++++++++++++++++
>  sysdeps/loongarch/dl-tlsdesc.S                |  93 +++++
>  sysdeps/loongarch/dl-tlsdesc.h                |  53 +++
>  sysdeps/loongarch/linkmap.h                   |   1 +
>  sysdeps/loongarch/sys/asm.h                   |   1 +
>  sysdeps/loongarch/sys/regdef.h                |   1 +
>  sysdeps/loongarch/tlsdesc.c                   |  39 ++
>  sysdeps/loongarch/tlsdesc.sym                 |  19 +
>  .../unix/sysv/linux/loongarch/localplt.data   |   2 +
>  14 files changed, 625 insertions(+), 3 deletions(-)
>  create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>  create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
>  create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
>  create mode 100644 sysdeps/loongarch/tlsdesc.c
>  create mode 100644 sysdeps/loongarch/tlsdesc.sym
>
> diff --git a/elf/elf.h b/elf/elf.h
> index f2206e5c06..eec24ea049 100644
> --- a/elf/elf.h
> +++ b/elf/elf.h
> @@ -4237,6 +4237,8 @@ enum
>  #define R_LARCH_TLS_TPREL32    10
>  #define R_LARCH_TLS_TPREL64    11
>  #define R_LARCH_IRELATIVE      12
> +#define R_LARCH_TLS_DESC32     13
> +#define R_LARCH_TLS_DESC64     14
>
>  /* Reserved for future relocs that the dynamic linker must understand.  */
>
> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
> index 43d2f583cd..181389e787 100644
> --- a/sysdeps/loongarch/Makefile
> +++ b/sysdeps/loongarch/Makefile
> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
>  endif
>
>  ifeq ($(subdir),elf)
> +sysdep-dl-routines += tlsdesc dl-tlsdesc
>  gen-as-const-headers += dl-link.sym
>  endif
>
> +ifeq ($(subdir),csu)
> +gen-as-const-headers += tlsdesc.sym
> +endif
> +
> +
>  # LoongArch's assembler also needs to know about PIC as it changes the
>  # definition of some assembler macros.
>  ASFLAGS-.os += $(pic-ccflag)
> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
> index b534968e30..fd81ef37d5 100644
> --- a/sysdeps/loongarch/dl-link.sym
> +++ b/sysdeps/loongarch/dl-link.sym
> @@ -1,6 +1,7 @@
>  #include <stddef.h>
>  #include <sysdep.h>
>  #include <link.h>
> +#include <dl-tlsdesc.h>
>
>  DL_SIZEOF_RG            sizeof(struct La_loongarch_regs)
>  DL_SIZEOF_RV            sizeof(struct La_loongarch_retval)
> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
> index ab81b82d95..8ca6c224f6 100644
> --- a/sysdeps/loongarch/dl-machine.h
> +++ b/sysdeps/loongarch/dl-machine.h
> @@ -25,7 +25,7 @@
>  #include <entry.h>
>  #include <elf/elf.h>
>  #include <sys/asm.h>
> -#include <dl-tls.h>
> +#include <dl-tlsdesc.h>
>  #include <dl-static-tls.h>
>  #include <dl-machine-rel.h>
>
> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
>        *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
>        break;
>
> +    case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
> +      {
> +       struct tlsdesc volatile *td =
> +           (struct tlsdesc volatile *)addr_field;
> +       if (! sym)
> +         {
> +           td->arg = (void*)reloc->r_addend;
> +           td->entry = _dl_tlsdesc_undefweak;
> +         }
> +       else
> +         {
> +# ifndef SHARED
> +           CHECK_STATIC_TLS (map, sym_map);
> +# else
> +           if (!TRY_STATIC_TLS (map, sym_map))
> +             {
> +               td->arg = _dl_make_tlsdesc_dynamic
> +                 (sym_map, sym->st_value + reloc->r_addend);
> +# if !defined __loongarch_soft_float
> +               if (SUPPORT_LASX)
> +                 td->entry = _dl_tlsdesc_dynamic_lasx;
> +               else
> +               if (SUPPORT_LSX)
> +                 td->entry = _dl_tlsdesc_dynamic_lsx;
> +               else
> +# endif
> +                 td->entry = _dl_tlsdesc_dynamic;
> +             }
> +           else
> +# endif
> +             {
> +               td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
> +                           + reloc->r_addend);
> +               td->entry = _dl_tlsdesc_return;
> +             }
> +         }
> +       break;
> +      }
> +
>      case R_LARCH_COPY:
>        {
>           if (sym == NULL)
> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>        else
>         *reloc_addr = map->l_mach.plt;
>      }
> +  else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
> +    {
> +      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
> +      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
> +      const ElfW (Sym) *sym = &symtab[symndx];
> +      const struct r_found_version *version = NULL;
> +
> +      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
> +       {
> +         const ElfW (Half) *vernum =
> +           (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
> +         version = &map->l_versions[vernum[symndx] & 0x7fff];
> +       }
> +
> +      /* Always initialize TLS descriptors completely, because lazy
> +        initialization requires synchronization at every TLS access.  */
> +      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
> +                       skip_ifunc);
> +    }
>    else
>      _dl_reloc_bad_type (map, r_type, 1);
>  }
> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
> index 29924b866d..de593c002d 100644
> --- a/sysdeps/loongarch/dl-tls.h
> +++ b/sysdeps/loongarch/dl-tls.h
> @@ -16,6 +16,9 @@
>     License along with the GNU C Library.  If not, see
>     <https://www.gnu.org/licenses/>.  */
>
> +#ifndef _DL_TLS_H
> +#define _DL_TLS_H
> +
>  /* Type used for the representation of TLS information in the GOT.  */
>  typedef struct
>  {
> @@ -23,6 +26,8 @@ typedef struct
>    unsigned long int ti_offset;
>  } tls_index;
>
> +extern void *__tls_get_addr (tls_index *ti);
> +
>  /* The thread pointer points to the first static TLS block.  */
>  #define TLS_TP_OFFSET 0
>
> @@ -37,10 +42,10 @@ typedef struct
>  /* Compute the value for a DTPREL reloc.  */
>  #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>
> -extern void *__tls_get_addr (tls_index *ti);
> -
>  #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
>  #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>
>  /* Value used for dtv entries for which the allocation is delayed.  */
>  #define TLS_DTV_UNALLOCATED ((void *) -1l)
> +
> +#endif
> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> new file mode 100644
> index 0000000000..0d8c9bb991
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> @@ -0,0 +1,341 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> +   LoongArch version.
> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifdef USE_LASX
> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
> +#elif defined USE_LSX
> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
> +#elif !defined __loongarch_soft_float
> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
> +#else
> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
> +#endif
> +
> +#ifdef SHARED
> +       /* Handler for dynamic TLS symbols.
> +          Prototype:
> +          _dl_tlsdesc_dynamic (tlsdesc *) ;
> +
> +          The second word of the descriptor points to a
> +          tlsdesc_dynamic_arg structure.
> +
> +          Returns the offset between the thread pointer and the
> +          object referenced by the argument.
> +
> +          ptrdiff_t
> +          __attribute__ ((__regparm__ (1)))
> +          _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
> +          {
> +            struct tlsdesc_dynamic_arg *td = tdp->arg;
> +            dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
> +            if (__builtin_expect (td->gen_count <= dtv[0].counter
> +               && (dtv[td->tlsinfo.ti_module].pointer.val
> +                   != TLS_DTV_UNALLOCATED),
> +               1))
> +              return dtv[td->tlsinfo.ti_module].pointer.val
> +               + td->tlsinfo.ti_offset
> +               - __thread_pointer;
> +
> +            return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
> +          }
> +        */
> +       .hidden _dl_tlsdesc_dynamic
> +       .global _dl_tlsdesc_dynamic
> +       .type   _dl_tlsdesc_dynamic,%function
> +       cfi_startproc
> +       .align 2
> +_dl_tlsdesc_dynamic:
> +       /* Save just enough registers to support fast path, if we fall
> +          into slow path we will save additional registers.  */
> +       ADDI    sp, sp,-24
> +       REG_S   t0, sp, 0
> +       REG_S   t1, sp, 8
> +       REG_S   t2, sp, 16
> +
> +       REG_L   t0, tp, -SIZE_OF_DTV      # dtv(t0) = tp + TCBHEAD_DTV dtv start
> +       REG_L   a0, a0, TLSDESC_ARG       # td(a0) = tdp->arg
> +       REG_L   t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
> +       REG_L   t2, t0, DTV_COUNTER       # t2 = dtv[0].counter
> +       bltu    t2, t1, Lslow
> +
> +       REG_L   t1, a0, TLSDESC_MODID     # t1 = td->tlsinfo.ti_module
> +       slli.d  t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
> +       add.d   t1, t1, t0    # t1 = dtv + ti_module * sizeof(dtv_t)
> +       REG_L   t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
> +       li.d    t2, TLS_DTV_UNALLOCATED
> +       beq     t1, t2, Lslow
> +       REG_L   t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
> +       # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
> +       add.d   a0, t1, t2
> +Lret:
> +       sub.d   a0, a0, tp
> +       REG_L   t0, sp, 0
> +       REG_L   t1, sp, 8
> +       REG_L   t2, sp, 16
> +       ADDI    sp, sp, 24
> +       RET
> +
> +Lslow:
> +       /* This is the slow path. We need to call __tls_get_addr() which
> +          means we need to save and restore all the register that the
> +          callee will trash.  */
> +
> +       /* Save the remaining registers that we must treat as caller save.  */
> +       ADDI    sp, sp, -FRAME_SIZE
> +       REG_S   ra, sp, 0 * SZREG
> +       REG_S   a1, sp, 1 * SZREG
> +       REG_S   a2, sp, 2 * SZREG
> +       REG_S   a3, sp, 3 * SZREG
> +       REG_S   a4, sp, 4 * SZREG
> +       REG_S   a5, sp, 5 * SZREG
> +       REG_S   a6, sp, 6 * SZREG
> +       REG_S   a7, sp, 7 * SZREG
> +       REG_S   t4, sp, 8 * SZREG
> +       REG_S   t5, sp, 9 * SZREG
> +       REG_S   t6, sp, 10 * SZREG
> +       REG_S   t7, sp, 11 * SZREG
> +       REG_S   t8, sp, 12 * SZREG
> +
> +#ifdef USE_LASX
> +       xvst    xr0, sp, 13*SZREG + 0*SZXREG
> +       xvst    xr1, sp, 13*SZREG + 1*SZXREG
> +       xvst    xr2, sp, 13*SZREG + 2*SZXREG
> +       xvst    xr3, sp, 13*SZREG + 3*SZXREG
> +       xvst    xr4, sp, 13*SZREG + 4*SZXREG
> +       xvst    xr5, sp, 13*SZREG + 5*SZXREG
> +       xvst    xr6, sp, 13*SZREG + 6*SZXREG
> +       xvst    xr7, sp, 13*SZREG + 7*SZXREG
> +       xvst    xr8, sp, 13*SZREG + 8*SZXREG
> +       xvst    xr9, sp, 13*SZREG + 9*SZXREG
> +       xvst    xr10, sp, 13*SZREG + 10*SZXREG
> +       xvst    xr11, sp, 13*SZREG + 11*SZXREG
> +       xvst    xr12, sp, 13*SZREG + 12*SZXREG
> +       xvst    xr13, sp, 13*SZREG + 13*SZXREG
> +       xvst    xr14, sp, 13*SZREG + 14*SZXREG
> +       xvst    xr15, sp, 13*SZREG + 15*SZXREG
> +       xvst    xr16, sp, 13*SZREG + 16*SZXREG
> +       xvst    xr17, sp, 13*SZREG + 17*SZXREG
> +       xvst    xr18, sp, 13*SZREG + 18*SZXREG
> +       xvst    xr19, sp, 13*SZREG + 19*SZXREG
> +       xvst    xr20, sp, 13*SZREG + 20*SZXREG
> +       xvst    xr21, sp, 13*SZREG + 21*SZXREG
> +       xvst    xr22, sp, 13*SZREG + 22*SZXREG
> +       xvst    xr23, sp, 13*SZREG + 23*SZXREG
> +       xvst    xr24, sp, 13*SZREG + 24*SZXREG
> +       xvst    xr25, sp, 13*SZREG + 25*SZXREG
> +       xvst    xr26, sp, 13*SZREG + 26*SZXREG
> +       xvst    xr27, sp, 13*SZREG + 27*SZXREG
> +       xvst    xr28, sp, 13*SZREG + 28*SZXREG
> +       xvst    xr29, sp, 13*SZREG + 29*SZXREG
> +       xvst    xr30, sp, 13*SZREG + 30*SZXREG
> +       xvst    xr31, sp, 13*SZREG + 31*SZXREG
> +       # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> +       # some fields in fcsr0
> +       movfcsr2gr  t0, fcsr0
> +       REG_S       t0, sp, 32*SZXREG
> +#elif defined USE_LSX
> +       vst     vr0, sp, 13*SZREG + 0*SZVREG
> +       vst     vr1, sp, 13*SZREG + 1*SZVREG
> +       vst     vr2, sp, 13*SZREG + 2*SZVREG
> +       vst     vr3, sp, 13*SZREG + 3*SZVREG
> +       vst     vr4, sp, 13*SZREG + 4*SZVREG
> +       vst     vr5, sp, 13*SZREG + 5*SZVREG
> +       vst     vr6, sp, 13*SZREG + 6*SZVREG
> +       vst     vr7, sp, 13*SZREG + 7*SZVREG
> +       vst     vr8, sp, 13*SZREG + 8*SZVREG
> +       vst     vr9, sp, 13*SZREG + 9*SZVREG
> +       vst     vr10, sp, 13*SZREG + 10*SZVREG
> +       vst     vr11, sp, 13*SZREG + 11*SZVREG
> +       vst     vr12, sp, 13*SZREG + 12*SZVREG
> +       vst     vr13, sp, 13*SZREG + 13*SZVREG
> +       vst     vr14, sp, 13*SZREG + 14*SZVREG
> +       vst     vr15, sp, 13*SZREG + 15*SZVREG
> +       vst     vr16, sp, 13*SZREG + 16*SZVREG
> +       vst     vr17, sp, 13*SZREG + 17*SZVREG
> +       vst     vr18, sp, 13*SZREG + 18*SZVREG
> +       vst     vr19, sp, 13*SZREG + 19*SZVREG
> +       vst     vr20, sp, 13*SZREG + 20*SZVREG
> +       vst     vr21, sp, 13*SZREG + 21*SZVREG
> +       vst     vr22, sp, 13*SZREG + 22*SZVREG
> +       vst     vr23, sp, 13*SZREG + 23*SZVREG
> +       vst     vr24, sp, 13*SZREG + 24*SZVREG
> +       vst     vr25, sp, 13*SZREG + 25*SZVREG
> +       vst     vr26, sp, 13*SZREG + 26*SZVREG
> +       vst     vr27, sp, 13*SZREG + 27*SZVREG
> +       vst     vr28, sp, 13*SZREG + 28*SZVREG
> +       vst     vr29, sp, 13*SZREG + 29*SZVREG
> +       vst     vr30, sp, 13*SZREG + 30*SZVREG
> +       vst     vr31, sp, 13*SZREG + 31*SZVREG
> +       # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> +       # some fields in fcsr0
> +       movfcsr2gr  t0, fcsr0
> +       REG_S       t0, sp, 32*SZVREG
> +#elif !defined __loongarch_soft_float
> +       FREG_S  fa0, sp, 13*SZREG + 0*SZFREG
> +       FREG_S  fa1, sp, 13*SZREG + 1*SZFREG
> +       FREG_S  fa2, sp, 13*SZREG + 2*SZFREG
> +       FREG_S  fa3, sp, 13*SZREG + 3*SZFREG
> +       FREG_S  fa4, sp, 13*SZREG + 4*SZFREG
> +       FREG_S  fa5, sp, 13*SZREG + 5*SZFREG
> +       FREG_S  fa6, sp, 13*SZREG + 6*SZFREG
> +       FREG_S  fa7, sp, 13*SZREG + 7*SZFREG
> +       FREG_S  ft0, sp, 13*SZREG + 8*SZFREG
> +       FREG_S  ft1, sp, 13*SZREG + 9*SZFREG
> +       FREG_S  ft2, sp, 13*SZREG + 10*SZFREG
> +       FREG_S  ft3, sp, 13*SZREG + 11*SZFREG
> +       FREG_S  ft4, sp, 13*SZREG + 12*SZFREG
> +       FREG_S  ft5, sp, 13*SZREG + 13*SZFREG
> +       FREG_S  ft6, sp, 13*SZREG + 14*SZFREG
> +       FREG_S  ft7, sp, 13*SZREG + 15*SZFREG
> +       FREG_S  ft8, sp, 13*SZREG + 16*SZFREG
> +       FREG_S  ft9, sp, 13*SZREG + 17*SZFREG
> +       FREG_S  ft10, sp, 13*SZREG + 18*SZFREG
> +       FREG_S  ft11, sp, 13*SZREG + 19*SZFREG
> +       FREG_S  ft12, sp, 13*SZREG + 20*SZFREG
> +       FREG_S  ft13, sp, 13*SZREG + 21*SZFREG
> +       FREG_S  ft14, sp, 13*SZREG + 22*SZFREG
> +       FREG_S  ft15, sp, 13*SZREG + 23*SZFREG
> +       # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> +       # some fields in fcsr0
> +       movfcsr2gr  t0, fcsr0
> +       REG_S       t0, sp, 24*SZFREG
> +#endif /* #ifdef USE_LASX  */
> +
> +       bl      __tls_get_addr
> +       ADDI    a0, a0, -TLS_DTV_OFFSET
> +
> +       REG_L   ra, sp, 0
> +       REG_L   a1, sp, 1 * 8
> +       REG_L   a2, sp, 2 * 8
> +       REG_L   a3, sp, 3 * 8
> +       REG_L   a4, sp, 4 * 8
> +       REG_L   a5, sp, 5 * 8
> +       REG_L   a6, sp, 6 * 8
> +       REG_L   a7, sp, 7 * 8
> +       REG_L   t4, sp, 8 * 8
> +       REG_L   t5, sp, 9 * 8
> +       REG_L   t6, sp, 10 * 8
> +       REG_L   t7, sp, 11 * 8
> +       REG_L   t8, sp, 12 * 8
> +
> +#ifdef USE_LASX
> +       xvld    xr0, sp, 13*SZREG + 0*SZXREG
> +       xvld    xr1, sp, 13*SZREG + 1*SZXREG
> +       xvld    xr2, sp, 13*SZREG + 2*SZXREG
> +       xvld    xr3, sp, 13*SZREG + 3*SZXREG
> +       xvld    xr4, sp, 13*SZREG + 4*SZXREG
> +       xvld    xr5, sp, 13*SZREG + 5*SZXREG
> +       xvld    xr6, sp, 13*SZREG + 6*SZXREG
> +       xvld    xr7, sp, 13*SZREG + 7*SZXREG
> +       xvld    xr8, sp, 13*SZREG + 8*SZXREG
> +       xvld    xr9, sp, 13*SZREG + 9*SZXREG
> +       xvld    xr10, sp, 13*SZREG + 10*SZXREG
> +       xvld    xr11, sp, 13*SZREG + 11*SZXREG
> +       xvld    xr12, sp, 13*SZREG + 12*SZXREG
> +       xvld    xr13, sp, 13*SZREG + 13*SZXREG
> +       xvld    xr14, sp, 13*SZREG + 14*SZXREG
> +       xvld    xr15, sp, 13*SZREG + 15*SZXREG
> +       xvld    xr16, sp, 13*SZREG + 16*SZXREG
> +       xvld    xr17, sp, 13*SZREG + 17*SZXREG
> +       xvld    xr18, sp, 13*SZREG + 18*SZXREG
> +       xvld    xr19, sp, 13*SZREG + 19*SZXREG
> +       xvld    xr20, sp, 13*SZREG + 20*SZXREG
> +       xvld    xr21, sp, 13*SZREG + 21*SZXREG
> +       xvld    xr22, sp, 13*SZREG + 22*SZXREG
> +       xvld    xr23, sp, 13*SZREG + 23*SZXREG
> +       xvld    xr24, sp, 13*SZREG + 24*SZXREG
> +       xvld    xr25, sp, 13*SZREG + 25*SZXREG
> +       xvld    xr26, sp, 13*SZREG + 26*SZXREG
> +       xvld    xr27, sp, 13*SZREG + 27*SZXREG
> +       xvld    xr28, sp, 13*SZREG + 28*SZXREG
> +       xvld    xr29, sp, 13*SZREG + 29*SZXREG
> +       xvld    xr30, sp, 13*SZREG + 30*SZXREG
> +       xvld    xr31, sp, 13*SZREG + 31*SZXREG
> +       REG_L   t0, sp, 32*SZXREG
> +       movgr2fcsr  fcsr0, t0
> +#elif defined USE_LSX
> +       vld     vr0, sp, 13*SZREG + 0*SZVREG
> +       vld     vr1, sp, 13*SZREG + 1*SZVREG
> +       vld     vr2, sp, 13*SZREG + 2*SZVREG
> +       vld     vr3, sp, 13*SZREG + 3*SZVREG
> +       vld     vr4, sp, 13*SZREG + 4*SZVREG
> +       vld     vr5, sp, 13*SZREG + 5*SZVREG
> +       vld     vr6, sp, 13*SZREG + 6*SZVREG
> +       vld     vr7, sp, 13*SZREG + 7*SZVREG
> +       vld     vr8, sp, 13*SZREG + 8*SZVREG
> +       vld     vr9, sp, 13*SZREG + 9*SZVREG
> +       vld     vr10, sp, 13*SZREG + 10*SZVREG
> +       vld     vr11, sp, 13*SZREG + 11*SZVREG
> +       vld     vr12, sp, 13*SZREG + 12*SZVREG
> +       vld     vr13, sp, 13*SZREG + 13*SZVREG
> +       vld     vr14, sp, 13*SZREG + 14*SZVREG
> +       vld     vr15, sp, 13*SZREG + 15*SZVREG
> +       vld     vr16, sp, 13*SZREG + 16*SZVREG
> +       vld     vr17, sp, 13*SZREG + 17*SZVREG
> +       vld     vr18, sp, 13*SZREG + 18*SZVREG
> +       vld     vr19, sp, 13*SZREG + 19*SZVREG
> +       vld     vr20, sp, 13*SZREG + 20*SZVREG
> +       vld     vr21, sp, 13*SZREG + 21*SZVREG
> +       vld     vr22, sp, 13*SZREG + 22*SZVREG
> +       vld     vr23, sp, 13*SZREG + 23*SZVREG
> +       vld     vr24, sp, 13*SZREG + 24*SZVREG
> +       vld     vr25, sp, 13*SZREG + 25*SZVREG
> +       vld     vr26, sp, 13*SZREG + 26*SZVREG
> +       vld     vr27, sp, 13*SZREG + 27*SZVREG
> +       vld     vr28, sp, 13*SZREG + 28*SZVREG
> +       vld     vr29, sp, 13*SZREG + 29*SZVREG
> +       vld     vr30, sp, 13*SZREG + 30*SZVREG
> +       vld     vr31, sp, 13*SZREG + 31*SZVREG
> +       REG_L   t0, sp, 32*SZVREG
> +       movgr2fcsr  fcsr0, t0
> +#elif !defined __loongarch_soft_float
> +       FREG_L  fa0, sp, 13*SZREG + 0*SZFREG
> +       FREG_L  fa1, sp, 13*SZREG + 1*SZFREG
> +       FREG_L  fa2, sp, 13*SZREG + 2*SZFREG
> +       FREG_L  fa3, sp, 13*SZREG + 3*SZFREG
> +       FREG_L  fa4, sp, 13*SZREG + 4*SZFREG
> +       FREG_L  fa5, sp, 13*SZREG + 5*SZFREG
> +       FREG_L  fa6, sp, 13*SZREG + 6*SZFREG
> +       FREG_L  fa7, sp, 13*SZREG + 7*SZFREG
> +       FREG_L  ft0, sp, 13*SZREG + 8*SZFREG
> +       FREG_L  ft1, sp, 13*SZREG + 9*SZFREG
> +       FREG_L  ft2, sp, 13*SZREG + 10*SZFREG
> +       FREG_L  ft3, sp, 13*SZREG + 11*SZFREG
> +       FREG_L  ft4, sp, 13*SZREG + 12*SZFREG
> +       FREG_L  ft5, sp, 13*SZREG + 13*SZFREG
> +       FREG_L  ft6, sp, 13*SZREG + 14*SZFREG
> +       FREG_L  ft7, sp, 13*SZREG + 15*SZFREG
> +       FREG_L  ft8, sp, 13*SZREG + 16*SZFREG
> +       FREG_L  ft9, sp, 13*SZREG + 17*SZFREG
> +       FREG_L  ft10, sp, 13*SZREG + 18*SZFREG
> +       FREG_L  ft11, sp, 13*SZREG + 19*SZFREG
> +       FREG_L  ft12, sp, 13*SZREG + 20*SZFREG
> +       FREG_L  ft13, sp, 13*SZREG + 21*SZFREG
> +       FREG_L  ft14, sp, 13*SZREG + 22*SZFREG
> +       FREG_L  ft15, sp, 13*SZREG + 23*SZFREG
> +       REG_L   t0, sp, 24*SZFREG
> +       movgr2fcsr  fcsr0, t0
> +#endif /* #ifdef USE_LASX  */
> +
> +       ADDI    sp, sp, FRAME_SIZE
> +       b       Lret
> +       cfi_endproc
> +       .size   _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
> +#endif /* #ifdef SHARED  */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
> new file mode 100644
> index 0000000000..4a17079169
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc.S
> @@ -0,0 +1,93 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> +   LoongArch version.
> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <tls.h>
> +#include "tlsdesc.h"
> +
> +       .text
> +
> +       /* Compute the thread pointer offset for symbols in the static
> +          TLS block. The offset is the same for all threads.
> +          Prototype:
> +          _dl_tlsdesc_return (tlsdesc *);  */
> +       .hidden _dl_tlsdesc_return
> +       .global _dl_tlsdesc_return
> +       .type   _dl_tlsdesc_return,%function
> +       cfi_startproc
> +       .align 2
> +_dl_tlsdesc_return:
> +       REG_L  a0, a0, 8
> +       RET
> +       cfi_endproc
> +       .size   _dl_tlsdesc_return, .-_dl_tlsdesc_return
> +
> +       /* Handler for undefined weak TLS symbols.
> +          Prototype:
> +          _dl_tlsdesc_undefweak (tlsdesc *);
> +
> +          The second word of the descriptor contains the addend.
> +          Return the addend minus the thread pointer. This ensures
> +          that when the caller adds on the thread pointer it gets back
> +          the addend.  */
> +       .hidden _dl_tlsdesc_undefweak
> +       .global _dl_tlsdesc_undefweak
> +       .type   _dl_tlsdesc_undefweak,%function
> +       cfi_startproc
> +       .align  2
> +_dl_tlsdesc_undefweak:
> +       REG_L   a0, a0, 8
> +       sub.d   a0, a0, tp
> +       RET
> +       cfi_endproc
> +       .size   _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
> +
> +
> +#ifdef SHARED
> +
> +#if !defined __loongarch_soft_float
> +
> +#define USE_LASX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
> +#define Lret Lret_lasx
> +#define Lslow Lslow_lasx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LASX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#define USE_LSX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
> +#define Lret Lret_lsx
> +#define Lslow Lslow_lsx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LSX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#endif
> +
> +#include "dl-tlsdesc-dynamic.h"
> +
> +#endif /* #ifdef SHARED  */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
> new file mode 100644
> index 0000000000..988037a714
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc.h
> @@ -0,0 +1,53 @@
> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
> +   LoongArch version.
> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _DL_TLSDESC_H
> +#define _DL_TLSDESC_H
> +
> +#include <dl-tls.h>
> +
> +/* Type used to represent a TLS descriptor in the GOT.  */
> +struct tlsdesc
> +{
> +  ptrdiff_t (*entry) (struct tlsdesc *);
> +  void *arg;
> +};
> +
> +/* Type used as the argument in a TLS descriptor for a symbol that
> +   needs dynamic TLS offsets.  */
> +struct tlsdesc_dynamic_arg
> +{
> +  tls_index tlsinfo;
> +  size_t gen_count;
> +};
> +
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
> +
> +# ifdef SHARED
> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
> +#if !defined __loongarch_soft_float
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
> +#endif
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
> +#endif
> +
> +#endif
> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
> index 4d8737ee7f..9b1773634c 100644
> --- a/sysdeps/loongarch/linkmap.h
> +++ b/sysdeps/loongarch/linkmap.h
> @@ -19,4 +19,5 @@
>  struct link_map_machine
>  {
>    ElfW (Addr) plt; /* Address of .plt.  */
> +  void *tlsdesc_table;    /* Address of TLS descriptor hash table.  */
>  };
> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
> index 51521a7eb4..23c1d12914 100644
> --- a/sysdeps/loongarch/sys/asm.h
> +++ b/sysdeps/loongarch/sys/asm.h
> @@ -25,6 +25,7 @@
>  /* Macros to handle different pointer/register sizes for 32/64-bit code.  */
>  #define SZREG 8
>  #define SZFREG 8
> +#define SZFCSREG 4
>  #define SZVREG 16
>  #define SZXREG 32
>  #define REG_L ld.d
> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
> index f61ee25b25..80ce3e9c00 100644
> --- a/sysdeps/loongarch/sys/regdef.h
> +++ b/sysdeps/loongarch/sys/regdef.h
> @@ -97,6 +97,7 @@
>  #define fcc5 $fcc5
>  #define fcc6 $fcc6
>  #define fcc7 $fcc7
> +#define fcsr0 $fcsr0
>
>  #define vr0 $vr0
>  #define vr1 $vr1
> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
> new file mode 100644
> index 0000000000..a357e7619f
> --- /dev/null
> +++ b/sysdeps/loongarch/tlsdesc.c
> @@ -0,0 +1,39 @@
> +/* Manage TLS descriptors.  AArch64 version.
                                                  Change it.
> +
>


-- 
H.J.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] LoongArch: Add support for TLS Descriptors
  2024-02-29  1:43 [PATCH v2] LoongArch: Add support for TLS Descriptors mengqinggang
  2024-02-29  2:56 ` caiyinyu
  2024-03-04 15:42 ` H.J. Lu
@ 2024-03-05 19:29 ` Adhemerval Zanella Netto
  2024-03-08  7:53   ` mengqinggang
  2 siblings, 1 reply; 8+ messages in thread
From: Adhemerval Zanella Netto @ 2024-03-05 19:29 UTC (permalink / raw)
  To: mengqinggang, libc-alpha
  Cc: xuchenghua, caiyinyu, chenglulu, cailulu, xry111, i.swmail,
	maskray, luweining, wanglei, hejinyang



On 28/02/24 22:43, mengqinggang wrote:
> This is mostly based on AArch64 and RISC-V implementation.
> 
> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
> 
> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
> all vector registers.
> ---
> Changes v1 -> v2:
> - Fix vr24-vr31, xr24-xr31 typo.
> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
> - Save and restore fcsr0 in _dl_tlsdesc_dynamic. 
> 
> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html

Patch looks ok, some comments below.

> 
>  elf/elf.h                                     |   2 +
>  sysdeps/loongarch/Makefile                    |   6 +
>  sysdeps/loongarch/dl-link.sym                 |   1 +
>  sysdeps/loongarch/dl-machine.h                |  60 ++-
>  sysdeps/loongarch/dl-tls.h                    |   9 +-
>  sysdeps/loongarch/dl-tlsdesc-dynamic.h        | 341 ++++++++++++++++++
>  sysdeps/loongarch/dl-tlsdesc.S                |  93 +++++
>  sysdeps/loongarch/dl-tlsdesc.h                |  53 +++
>  sysdeps/loongarch/linkmap.h                   |   1 +
>  sysdeps/loongarch/sys/asm.h                   |   1 +
>  sysdeps/loongarch/sys/regdef.h                |   1 +
>  sysdeps/loongarch/tlsdesc.c                   |  39 ++
>  sysdeps/loongarch/tlsdesc.sym                 |  19 +
>  .../unix/sysv/linux/loongarch/localplt.data   |   2 +
>  14 files changed, 625 insertions(+), 3 deletions(-)
>  create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>  create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
>  create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
>  create mode 100644 sysdeps/loongarch/tlsdesc.c
>  create mode 100644 sysdeps/loongarch/tlsdesc.sym
> 
> diff --git a/elf/elf.h b/elf/elf.h
> index f2206e5c06..eec24ea049 100644
> --- a/elf/elf.h
> +++ b/elf/elf.h
> @@ -4237,6 +4237,8 @@ enum
>  #define R_LARCH_TLS_TPREL32	10
>  #define R_LARCH_TLS_TPREL64	11
>  #define R_LARCH_IRELATIVE	12
> +#define R_LARCH_TLS_DESC32	13
> +#define R_LARCH_TLS_DESC64	14
>  
>  /* Reserved for future relocs that the dynamic linker must understand.  */
>  
> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
> index 43d2f583cd..181389e787 100644
> --- a/sysdeps/loongarch/Makefile
> +++ b/sysdeps/loongarch/Makefile
> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
>  endif
>  
>  ifeq ($(subdir),elf)
> +sysdep-dl-routines += tlsdesc dl-tlsdesc
>  gen-as-const-headers += dl-link.sym
>  endif
>  
> +ifeq ($(subdir),csu)
> +gen-as-const-headers += tlsdesc.sym
> +endif
> +
> +
>  # LoongArch's assembler also needs to know about PIC as it changes the
>  # definition of some assembler macros.
>  ASFLAGS-.os += $(pic-ccflag)
> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
> index b534968e30..fd81ef37d5 100644
> --- a/sysdeps/loongarch/dl-link.sym
> +++ b/sysdeps/loongarch/dl-link.sym
> @@ -1,6 +1,7 @@
>  #include <stddef.h>
>  #include <sysdep.h>
>  #include <link.h>
> +#include <dl-tlsdesc.h>
>  
>  DL_SIZEOF_RG            sizeof(struct La_loongarch_regs)
>  DL_SIZEOF_RV            sizeof(struct La_loongarch_retval)
> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
> index ab81b82d95..8ca6c224f6 100644
> --- a/sysdeps/loongarch/dl-machine.h
> +++ b/sysdeps/loongarch/dl-machine.h
> @@ -25,7 +25,7 @@
>  #include <entry.h>
>  #include <elf/elf.h>
>  #include <sys/asm.h>
> -#include <dl-tls.h>
> +#include <dl-tlsdesc.h>
>  #include <dl-static-tls.h>
>  #include <dl-machine-rel.h>
>  
> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
>        *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
>        break;
>  
> +    case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
> +      {
> +	struct tlsdesc volatile *td =
> +	    (struct tlsdesc volatile *)addr_field;
> +	if (! sym)
> +	  {
> +	    td->arg = (void*)reloc->r_addend;
> +	    td->entry = _dl_tlsdesc_undefweak;
> +	  }
> +	else
> +	  {
> +# ifndef SHARED
> +	    CHECK_STATIC_TLS (map, sym_map);
> +# else
> +	    if (!TRY_STATIC_TLS (map, sym_map))
> +	      {
> +		td->arg = _dl_make_tlsdesc_dynamic
> +		  (sym_map, sym->st_value + reloc->r_addend);
> +# if !defined __loongarch_soft_float
> +		if (SUPPORT_LASX)
> +		  td->entry = _dl_tlsdesc_dynamic_lasx;
> +		else
> +		if (SUPPORT_LSX)
> +		  td->entry = _dl_tlsdesc_dynamic_lsx;
> +		else
> +# endif
> +		  td->entry = _dl_tlsdesc_dynamic;
> +	      }
> +	    else
> +# endif
> +	      {
> +		td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
> +			    + reloc->r_addend);
> +		td->entry = _dl_tlsdesc_return;
> +	      }
> +	  }
> +	break;
> +      }
> +
>      case R_LARCH_COPY:
>        {
>  	  if (sym == NULL)
> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>        else
>  	*reloc_addr = map->l_mach.plt;
>      }
> +  else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))

Use __glibc_likely here.

> +    {
> +      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
> +      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
> +      const ElfW (Sym) *sym = &symtab[symndx];
> +      const struct r_found_version *version = NULL;
> +
> +      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
> +	{
> +	  const ElfW (Half) *vernum =
> +	    (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
> +	  version = &map->l_versions[vernum[symndx] & 0x7fff];
> +	}
> +
> +      /* Always initialize TLS descriptors completely, because lazy
> +	 initialization requires synchronization at every TLS access.  */
> +      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
> +			skip_ifunc);
> +    }
>    else
>      _dl_reloc_bad_type (map, r_type, 1);
>  }
> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
> index 29924b866d..de593c002d 100644
> --- a/sysdeps/loongarch/dl-tls.h
> +++ b/sysdeps/loongarch/dl-tls.h
> @@ -16,6 +16,9 @@
>     License along with the GNU C Library.  If not, see
>     <https://www.gnu.org/licenses/>.  */
>  
> +#ifndef _DL_TLS_H
> +#define _DL_TLS_H
> +
>  /* Type used for the representation of TLS information in the GOT.  */
>  typedef struct
>  {
> @@ -23,6 +26,8 @@ typedef struct
>    unsigned long int ti_offset;
>  } tls_index;
>  
> +extern void *__tls_get_addr (tls_index *ti);
> +
>  /* The thread pointer points to the first static TLS block.  */
>  #define TLS_TP_OFFSET 0
>  
> @@ -37,10 +42,10 @@ typedef struct
>  /* Compute the value for a DTPREL reloc.  */
>  #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>  
> -extern void *__tls_get_addr (tls_index *ti);
> -

Why move the function prototype?

>  #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
>  #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>  
>  /* Value used for dtv entries for which the allocation is delayed.  */
>  #define TLS_DTV_UNALLOCATED ((void *) -1l)
> +
> +#endif
> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> new file mode 100644
> index 0000000000..0d8c9bb991
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> @@ -0,0 +1,341 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> +   LoongArch version.
> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.

Update Copyright years to 2024.

> +
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifdef USE_LASX
> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
> +#elif defined USE_LSX
> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
> +#elif !defined __loongarch_soft_float
> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
> +#else
> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
> +#endif

I don't have a strong opinion, but another option that might be simpler it
to provide only only one _dl_tlsdesc_dynamic implementation and check the 
required save/restore of vector register based on hwcap value.

> +
> +#ifdef SHARED
> +	/* Handler for dynamic TLS symbols.
> +	   Prototype:
> +	   _dl_tlsdesc_dynamic (tlsdesc *) ;
> +
> +	   The second word of the descriptor points to a
> +	   tlsdesc_dynamic_arg structure.
> +
> +	   Returns the offset between the thread pointer and the
> +	   object referenced by the argument.
> +
> +	   ptrdiff_t
> +	   __attribute__ ((__regparm__ (1)))

Does this attribute really make sense for loongarch?

> +	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
> +	   {
> +	     struct tlsdesc_dynamic_arg *td = tdp->arg;
> +	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
> +	     if (__builtin_expect (td->gen_count <= dtv[0].counter

Use __glibc_unlikely or just remove the __builtin_expect for clarity.

> +		&& (dtv[td->tlsinfo.ti_module].pointer.val
> +		    != TLS_DTV_UNALLOCATED),
> +		1))
> +	       return dtv[td->tlsinfo.ti_module].pointer.val
> +		+ td->tlsinfo.ti_offset
> +		- __thread_pointer;
> +
> +	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
> +	   }
> +	 */
> +	.hidden _dl_tlsdesc_dynamic
> +	.global	_dl_tlsdesc_dynamic
> +	.type	_dl_tlsdesc_dynamic,%function
> +	cfi_startproc
> +	.align 2
> +_dl_tlsdesc_dynamic:
> +	/* Save just enough registers to support fast path, if we fall
> +	   into slow path we will save additional registers.  */
> +	ADDI	sp, sp,-24
> +	REG_S	t0, sp, 0
> +	REG_S	t1, sp, 8
> +	REG_S	t2, sp, 16
> +
> +	REG_L	t0, tp, -SIZE_OF_DTV	  # dtv(t0) = tp + TCBHEAD_DTV dtv start
> +	REG_L	a0, a0, TLSDESC_ARG	  # td(a0) = tdp->arg
> +	REG_L	t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
> +	REG_L	t2, t0, DTV_COUNTER	  # t2 = dtv[0].counter
> +	bltu	t2, t1, Lslow
> +
> +	REG_L	t1, a0, TLSDESC_MODID	  # t1 = td->tlsinfo.ti_module
> +	slli.d	t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
> +	add.d	t1, t1, t0    # t1 = dtv + ti_module * sizeof(dtv_t)
> +	REG_L	t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
> +	li.d	t2, TLS_DTV_UNALLOCATED
> +	beq	t1, t2, Lslow
> +	REG_L	t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
> +	# dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
> +	add.d	a0, t1, t2
> +Lret:
> +	sub.d	a0, a0, tp
> +	REG_L	t0, sp, 0
> +	REG_L	t1, sp, 8
> +	REG_L	t2, sp, 16
> +	ADDI	sp, sp, 24
> +	RET
> +
> +Lslow:
> +	/* This is the slow path. We need to call __tls_get_addr() which
> +	   means we need to save and restore all the register that the
> +	   callee will trash.  */
> +
> +	/* Save the remaining registers that we must treat as caller save.  */
> +	ADDI	sp, sp, -FRAME_SIZE
> +	REG_S	ra, sp, 0 * SZREG
> +	REG_S	a1, sp, 1 * SZREG
> +	REG_S	a2, sp, 2 * SZREG
> +	REG_S	a3, sp, 3 * SZREG
> +	REG_S	a4, sp, 4 * SZREG
> +	REG_S	a5, sp, 5 * SZREG
> +	REG_S	a6, sp, 6 * SZREG
> +	REG_S	a7, sp, 7 * SZREG
> +	REG_S	t4, sp, 8 * SZREG
> +	REG_S	t5, sp, 9 * SZREG
> +	REG_S	t6, sp, 10 * SZREG
> +	REG_S	t7, sp, 11 * SZREG
> +	REG_S	t8, sp, 12 * SZREG
> +
> +#ifdef USE_LASX
> +	xvst	xr0, sp, 13*SZREG + 0*SZXREG
> +	xvst	xr1, sp, 13*SZREG + 1*SZXREG
> +	xvst	xr2, sp, 13*SZREG + 2*SZXREG
> +	xvst	xr3, sp, 13*SZREG + 3*SZXREG
> +	xvst	xr4, sp, 13*SZREG + 4*SZXREG
> +	xvst	xr5, sp, 13*SZREG + 5*SZXREG
> +	xvst	xr6, sp, 13*SZREG + 6*SZXREG
> +	xvst	xr7, sp, 13*SZREG + 7*SZXREG
> +	xvst	xr8, sp, 13*SZREG + 8*SZXREG
> +	xvst	xr9, sp, 13*SZREG + 9*SZXREG
> +	xvst	xr10, sp, 13*SZREG + 10*SZXREG
> +	xvst	xr11, sp, 13*SZREG + 11*SZXREG
> +	xvst	xr12, sp, 13*SZREG + 12*SZXREG
> +	xvst	xr13, sp, 13*SZREG + 13*SZXREG
> +	xvst	xr14, sp, 13*SZREG + 14*SZXREG
> +	xvst	xr15, sp, 13*SZREG + 15*SZXREG
> +	xvst	xr16, sp, 13*SZREG + 16*SZXREG
> +	xvst	xr17, sp, 13*SZREG + 17*SZXREG
> +	xvst	xr18, sp, 13*SZREG + 18*SZXREG
> +	xvst	xr19, sp, 13*SZREG + 19*SZXREG
> +	xvst	xr20, sp, 13*SZREG + 20*SZXREG
> +	xvst	xr21, sp, 13*SZREG + 21*SZXREG
> +	xvst	xr22, sp, 13*SZREG + 22*SZXREG
> +	xvst	xr23, sp, 13*SZREG + 23*SZXREG
> +	xvst	xr24, sp, 13*SZREG + 24*SZXREG
> +	xvst	xr25, sp, 13*SZREG + 25*SZXREG
> +	xvst	xr26, sp, 13*SZREG + 26*SZXREG
> +	xvst	xr27, sp, 13*SZREG + 27*SZXREG
> +	xvst	xr28, sp, 13*SZREG + 28*SZXREG
> +	xvst	xr29, sp, 13*SZREG + 29*SZXREG
> +	xvst	xr30, sp, 13*SZREG + 30*SZXREG
> +	xvst	xr31, sp, 13*SZREG + 31*SZXREG
> +	# Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> +	# some fields in fcsr0
> +	movfcsr2gr  t0, fcsr0
> +	REG_S	    t0, sp, 32*SZXREG
> +#elif defined USE_LSX
> +	vst	vr0, sp, 13*SZREG + 0*SZVREG
> +	vst	vr1, sp, 13*SZREG + 1*SZVREG
> +	vst	vr2, sp, 13*SZREG + 2*SZVREG
> +	vst	vr3, sp, 13*SZREG + 3*SZVREG
> +	vst	vr4, sp, 13*SZREG + 4*SZVREG
> +	vst	vr5, sp, 13*SZREG + 5*SZVREG
> +	vst	vr6, sp, 13*SZREG + 6*SZVREG
> +	vst	vr7, sp, 13*SZREG + 7*SZVREG
> +	vst	vr8, sp, 13*SZREG + 8*SZVREG
> +	vst	vr9, sp, 13*SZREG + 9*SZVREG
> +	vst	vr10, sp, 13*SZREG + 10*SZVREG
> +	vst	vr11, sp, 13*SZREG + 11*SZVREG
> +	vst	vr12, sp, 13*SZREG + 12*SZVREG
> +	vst	vr13, sp, 13*SZREG + 13*SZVREG
> +	vst	vr14, sp, 13*SZREG + 14*SZVREG
> +	vst	vr15, sp, 13*SZREG + 15*SZVREG
> +	vst	vr16, sp, 13*SZREG + 16*SZVREG
> +	vst	vr17, sp, 13*SZREG + 17*SZVREG
> +	vst	vr18, sp, 13*SZREG + 18*SZVREG
> +	vst	vr19, sp, 13*SZREG + 19*SZVREG
> +	vst	vr20, sp, 13*SZREG + 20*SZVREG
> +	vst	vr21, sp, 13*SZREG + 21*SZVREG
> +	vst	vr22, sp, 13*SZREG + 22*SZVREG
> +	vst	vr23, sp, 13*SZREG + 23*SZVREG
> +	vst	vr24, sp, 13*SZREG + 24*SZVREG
> +	vst	vr25, sp, 13*SZREG + 25*SZVREG
> +	vst	vr26, sp, 13*SZREG + 26*SZVREG
> +	vst	vr27, sp, 13*SZREG + 27*SZVREG
> +	vst	vr28, sp, 13*SZREG + 28*SZVREG
> +	vst	vr29, sp, 13*SZREG + 29*SZVREG
> +	vst	vr30, sp, 13*SZREG + 30*SZVREG
> +	vst	vr31, sp, 13*SZREG + 31*SZVREG
> +	# Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> +	# some fields in fcsr0
> +	movfcsr2gr  t0, fcsr0
> +	REG_S	    t0, sp, 32*SZVREG
> +#elif !defined __loongarch_soft_float
> +	FREG_S	fa0, sp, 13*SZREG + 0*SZFREG
> +	FREG_S	fa1, sp, 13*SZREG + 1*SZFREG
> +	FREG_S	fa2, sp, 13*SZREG + 2*SZFREG
> +	FREG_S	fa3, sp, 13*SZREG + 3*SZFREG
> +	FREG_S	fa4, sp, 13*SZREG + 4*SZFREG
> +	FREG_S	fa5, sp, 13*SZREG + 5*SZFREG
> +	FREG_S	fa6, sp, 13*SZREG + 6*SZFREG
> +	FREG_S	fa7, sp, 13*SZREG + 7*SZFREG
> +	FREG_S	ft0, sp, 13*SZREG + 8*SZFREG
> +	FREG_S	ft1, sp, 13*SZREG + 9*SZFREG
> +	FREG_S	ft2, sp, 13*SZREG + 10*SZFREG
> +	FREG_S	ft3, sp, 13*SZREG + 11*SZFREG
> +	FREG_S	ft4, sp, 13*SZREG + 12*SZFREG
> +	FREG_S	ft5, sp, 13*SZREG + 13*SZFREG
> +	FREG_S	ft6, sp, 13*SZREG + 14*SZFREG
> +	FREG_S	ft7, sp, 13*SZREG + 15*SZFREG
> +	FREG_S	ft8, sp, 13*SZREG + 16*SZFREG
> +	FREG_S	ft9, sp, 13*SZREG + 17*SZFREG
> +	FREG_S	ft10, sp, 13*SZREG + 18*SZFREG
> +	FREG_S	ft11, sp, 13*SZREG + 19*SZFREG
> +	FREG_S	ft12, sp, 13*SZREG + 20*SZFREG
> +	FREG_S	ft13, sp, 13*SZREG + 21*SZFREG
> +	FREG_S	ft14, sp, 13*SZREG + 22*SZFREG
> +	FREG_S	ft15, sp, 13*SZREG + 23*SZFREG
> +	# Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> +	# some fields in fcsr0
> +	movfcsr2gr  t0, fcsr0
> +	REG_S	    t0, sp, 24*SZFREG
> +#endif /* #ifdef USE_LASX  */
> +
> +	bl	__tls_get_addr
> +	ADDI	a0, a0, -TLS_DTV_OFFSET
> +
> +	REG_L	ra, sp, 0
> +	REG_L	a1, sp, 1 * 8
> +	REG_L	a2, sp, 2 * 8
> +	REG_L	a3, sp, 3 * 8
> +	REG_L	a4, sp, 4 * 8
> +	REG_L	a5, sp, 5 * 8
> +	REG_L	a6, sp, 6 * 8
> +	REG_L	a7, sp, 7 * 8
> +	REG_L	t4, sp, 8 * 8
> +	REG_L	t5, sp, 9 * 8
> +	REG_L	t6, sp, 10 * 8
> +	REG_L	t7, sp, 11 * 8
> +	REG_L	t8, sp, 12 * 8
> +
> +#ifdef USE_LASX
> +	xvld	xr0, sp, 13*SZREG + 0*SZXREG
> +	xvld	xr1, sp, 13*SZREG + 1*SZXREG
> +	xvld	xr2, sp, 13*SZREG + 2*SZXREG
> +	xvld	xr3, sp, 13*SZREG + 3*SZXREG
> +	xvld	xr4, sp, 13*SZREG + 4*SZXREG
> +	xvld	xr5, sp, 13*SZREG + 5*SZXREG
> +	xvld	xr6, sp, 13*SZREG + 6*SZXREG
> +	xvld	xr7, sp, 13*SZREG + 7*SZXREG
> +	xvld	xr8, sp, 13*SZREG + 8*SZXREG
> +	xvld	xr9, sp, 13*SZREG + 9*SZXREG
> +	xvld	xr10, sp, 13*SZREG + 10*SZXREG
> +	xvld	xr11, sp, 13*SZREG + 11*SZXREG
> +	xvld	xr12, sp, 13*SZREG + 12*SZXREG
> +	xvld	xr13, sp, 13*SZREG + 13*SZXREG
> +	xvld	xr14, sp, 13*SZREG + 14*SZXREG
> +	xvld	xr15, sp, 13*SZREG + 15*SZXREG
> +	xvld	xr16, sp, 13*SZREG + 16*SZXREG
> +	xvld	xr17, sp, 13*SZREG + 17*SZXREG
> +	xvld	xr18, sp, 13*SZREG + 18*SZXREG
> +	xvld	xr19, sp, 13*SZREG + 19*SZXREG
> +	xvld	xr20, sp, 13*SZREG + 20*SZXREG
> +	xvld	xr21, sp, 13*SZREG + 21*SZXREG
> +	xvld	xr22, sp, 13*SZREG + 22*SZXREG
> +	xvld	xr23, sp, 13*SZREG + 23*SZXREG
> +	xvld	xr24, sp, 13*SZREG + 24*SZXREG
> +	xvld	xr25, sp, 13*SZREG + 25*SZXREG
> +	xvld	xr26, sp, 13*SZREG + 26*SZXREG
> +	xvld	xr27, sp, 13*SZREG + 27*SZXREG
> +	xvld	xr28, sp, 13*SZREG + 28*SZXREG
> +	xvld	xr29, sp, 13*SZREG + 29*SZXREG
> +	xvld	xr30, sp, 13*SZREG + 30*SZXREG
> +	xvld	xr31, sp, 13*SZREG + 31*SZXREG
> +	REG_L	t0, sp, 32*SZXREG
> +	movgr2fcsr  fcsr0, t0
> +#elif defined USE_LSX
> +	vld	vr0, sp, 13*SZREG + 0*SZVREG
> +	vld	vr1, sp, 13*SZREG + 1*SZVREG
> +	vld	vr2, sp, 13*SZREG + 2*SZVREG
> +	vld	vr3, sp, 13*SZREG + 3*SZVREG
> +	vld	vr4, sp, 13*SZREG + 4*SZVREG
> +	vld	vr5, sp, 13*SZREG + 5*SZVREG
> +	vld	vr6, sp, 13*SZREG + 6*SZVREG
> +	vld	vr7, sp, 13*SZREG + 7*SZVREG
> +	vld	vr8, sp, 13*SZREG + 8*SZVREG
> +	vld	vr9, sp, 13*SZREG + 9*SZVREG
> +	vld	vr10, sp, 13*SZREG + 10*SZVREG
> +	vld	vr11, sp, 13*SZREG + 11*SZVREG
> +	vld	vr12, sp, 13*SZREG + 12*SZVREG
> +	vld	vr13, sp, 13*SZREG + 13*SZVREG
> +	vld	vr14, sp, 13*SZREG + 14*SZVREG
> +	vld	vr15, sp, 13*SZREG + 15*SZVREG
> +	vld	vr16, sp, 13*SZREG + 16*SZVREG
> +	vld	vr17, sp, 13*SZREG + 17*SZVREG
> +	vld	vr18, sp, 13*SZREG + 18*SZVREG
> +	vld	vr19, sp, 13*SZREG + 19*SZVREG
> +	vld	vr20, sp, 13*SZREG + 20*SZVREG
> +	vld	vr21, sp, 13*SZREG + 21*SZVREG
> +	vld	vr22, sp, 13*SZREG + 22*SZVREG
> +	vld	vr23, sp, 13*SZREG + 23*SZVREG
> +	vld	vr24, sp, 13*SZREG + 24*SZVREG
> +	vld	vr25, sp, 13*SZREG + 25*SZVREG
> +	vld	vr26, sp, 13*SZREG + 26*SZVREG
> +	vld	vr27, sp, 13*SZREG + 27*SZVREG
> +	vld	vr28, sp, 13*SZREG + 28*SZVREG
> +	vld	vr29, sp, 13*SZREG + 29*SZVREG
> +	vld	vr30, sp, 13*SZREG + 30*SZVREG
> +	vld	vr31, sp, 13*SZREG + 31*SZVREG
> +	REG_L	t0, sp, 32*SZVREG
> +	movgr2fcsr  fcsr0, t0
> +#elif !defined __loongarch_soft_float
> +	FREG_L	fa0, sp, 13*SZREG + 0*SZFREG
> +	FREG_L	fa1, sp, 13*SZREG + 1*SZFREG
> +	FREG_L	fa2, sp, 13*SZREG + 2*SZFREG
> +	FREG_L	fa3, sp, 13*SZREG + 3*SZFREG
> +	FREG_L	fa4, sp, 13*SZREG + 4*SZFREG
> +	FREG_L	fa5, sp, 13*SZREG + 5*SZFREG
> +	FREG_L	fa6, sp, 13*SZREG + 6*SZFREG
> +	FREG_L	fa7, sp, 13*SZREG + 7*SZFREG
> +	FREG_L	ft0, sp, 13*SZREG + 8*SZFREG
> +	FREG_L	ft1, sp, 13*SZREG + 9*SZFREG
> +	FREG_L	ft2, sp, 13*SZREG + 10*SZFREG
> +	FREG_L	ft3, sp, 13*SZREG + 11*SZFREG
> +	FREG_L	ft4, sp, 13*SZREG + 12*SZFREG
> +	FREG_L	ft5, sp, 13*SZREG + 13*SZFREG
> +	FREG_L	ft6, sp, 13*SZREG + 14*SZFREG
> +	FREG_L	ft7, sp, 13*SZREG + 15*SZFREG
> +	FREG_L	ft8, sp, 13*SZREG + 16*SZFREG
> +	FREG_L	ft9, sp, 13*SZREG + 17*SZFREG
> +	FREG_L	ft10, sp, 13*SZREG + 18*SZFREG
> +	FREG_L	ft11, sp, 13*SZREG + 19*SZFREG
> +	FREG_L	ft12, sp, 13*SZREG + 20*SZFREG
> +	FREG_L	ft13, sp, 13*SZREG + 21*SZFREG
> +	FREG_L	ft14, sp, 13*SZREG + 22*SZFREG
> +	FREG_L	ft15, sp, 13*SZREG + 23*SZFREG
> +	REG_L	t0, sp, 24*SZFREG
> +	movgr2fcsr  fcsr0, t0
> +#endif /* #ifdef USE_LASX  */
> +
> +	ADDI	sp, sp, FRAME_SIZE
> +	b	Lret
> +	cfi_endproc
> +	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
> +#endif /* #ifdef SHARED  */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
> new file mode 100644
> index 0000000000..4a17079169
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc.S
> @@ -0,0 +1,93 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> +   LoongArch version.
> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.

Update Copyright years to 2024.

> +
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <tls.h>
> +#include "tlsdesc.h"
> +
> +	.text
> +
> +	/* Compute the thread pointer offset for symbols in the static
> +	   TLS block. The offset is the same for all threads.
> +	   Prototype:
> +	   _dl_tlsdesc_return (tlsdesc *);  */
> +	.hidden _dl_tlsdesc_return
> +	.global	_dl_tlsdesc_return
> +	.type	_dl_tlsdesc_return,%function
> +	cfi_startproc
> +	.align 2
> +_dl_tlsdesc_return:
> +	REG_L  a0, a0, 8
> +	RET
> +	cfi_endproc
> +	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
> +
> +	/* Handler for undefined weak TLS symbols.
> +	   Prototype:
> +	   _dl_tlsdesc_undefweak (tlsdesc *);
> +
> +	   The second word of the descriptor contains the addend.
> +	   Return the addend minus the thread pointer. This ensures
> +	   that when the caller adds on the thread pointer it gets back
> +	   the addend.  */
> +	.hidden _dl_tlsdesc_undefweak
> +	.global	_dl_tlsdesc_undefweak
> +	.type	_dl_tlsdesc_undefweak,%function
> +	cfi_startproc
> +	.align  2
> +_dl_tlsdesc_undefweak:
> +	REG_L	a0, a0, 8
> +	sub.d	a0, a0, tp
> +	RET
> +	cfi_endproc
> +	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
> +
> +
> +#ifdef SHARED
> +
> +#if !defined __loongarch_soft_float
> +
> +#define USE_LASX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
> +#define Lret Lret_lasx
> +#define Lslow Lslow_lasx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LASX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#define USE_LSX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
> +#define Lret Lret_lsx
> +#define Lslow Lslow_lsx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LSX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#endif
> +
> +#include "dl-tlsdesc-dynamic.h"
> +
> +#endif /* #ifdef SHARED  */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
> new file mode 100644
> index 0000000000..988037a714
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc.h
> @@ -0,0 +1,53 @@
> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
> +   LoongArch version.
> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _DL_TLSDESC_H
> +#define _DL_TLSDESC_H
> +
> +#include <dl-tls.h>
> +
> +/* Type used to represent a TLS descriptor in the GOT.  */
> +struct tlsdesc
> +{
> +  ptrdiff_t (*entry) (struct tlsdesc *);
> +  void *arg;
> +};
> +
> +/* Type used as the argument in a TLS descriptor for a symbol that
> +   needs dynamic TLS offsets.  */
> +struct tlsdesc_dynamic_arg
> +{
> +  tls_index tlsinfo;
> +  size_t gen_count;
> +};
> +
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
> +
> +# ifdef SHARED
> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
> +#if !defined __loongarch_soft_float

Minor style, usually for single tests we use '#ifndef' and add
attribute_hidden at the end of prototype.

> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
> +#endif
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
> +#endif
> +
> +#endif
> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
> index 4d8737ee7f..9b1773634c 100644
> --- a/sysdeps/loongarch/linkmap.h
> +++ b/sysdeps/loongarch/linkmap.h
> @@ -19,4 +19,5 @@
>  struct link_map_machine
>  {
>    ElfW (Addr) plt; /* Address of .plt.  */
> +  void *tlsdesc_table;    /* Address of TLS descriptor hash table.  */
>  };
> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
> index 51521a7eb4..23c1d12914 100644
> --- a/sysdeps/loongarch/sys/asm.h
> +++ b/sysdeps/loongarch/sys/asm.h
> @@ -25,6 +25,7 @@
>  /* Macros to handle different pointer/register sizes for 32/64-bit code.  */
>  #define SZREG 8
>  #define SZFREG 8
> +#define SZFCSREG 4
>  #define SZVREG 16
>  #define SZXREG 32
>  #define REG_L ld.d
> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
> index f61ee25b25..80ce3e9c00 100644
> --- a/sysdeps/loongarch/sys/regdef.h
> +++ b/sysdeps/loongarch/sys/regdef.h
> @@ -97,6 +97,7 @@
>  #define fcc5 $fcc5
>  #define fcc6 $fcc6
>  #define fcc7 $fcc7
> +#define fcsr0 $fcsr0
>  
>  #define vr0 $vr0
>  #define vr1 $vr1
> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
> new file mode 100644
> index 0000000000..a357e7619f
> --- /dev/null
> +++ b/sysdeps/loongarch/tlsdesc.c
> @@ -0,0 +1,39 @@
> +/* Manage TLS descriptors.  AArch64 version.
> +
> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.

Update Copyright years to 2024 and remove the 'AArch64'.


> +
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <ldsodefs.h>
> +#include <tls.h>
> +#include <dl-tlsdesc.h>
> +#include <dl-unmap-segments.h>
> +#include <tlsdeschtab.h>
> +
> +/* Unmap the dynamic object, but also release its TLS descriptor table
> +   if there is one.  */
> +
> +void
> +_dl_unmap (struct link_map *map)
> +{
> +  _dl_unmap_segments (map);
> +
> +#ifdef SHARED
> +  if (map->l_mach.tlsdesc_table)
> +    htab_delete (map->l_mach.tlsdesc_table);
> +#endif
> +}
> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
> new file mode 100644
> index 0000000000..bcab218631
> --- /dev/null
> +++ b/sysdeps/loongarch/tlsdesc.sym
> @@ -0,0 +1,19 @@
> +#include <stddef.h>
> +#include <sysdep.h>
> +#include <tls.h>
> +#include <link.h>
> +#include <dl-tlsdesc.h>
> +
> +--
> +
> +-- Abuse tls.h macros to derive offsets relative to the thread register.
> +
> +TLSDESC_ARG		offsetof(struct tlsdesc, arg)
> +TLSDESC_GEN_COUNT	offsetof(struct tlsdesc_dynamic_arg, gen_count)
> +TLSDESC_MODID		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
> +TLSDESC_MODOFF		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
> +TCBHEAD_DTV		offsetof(tcbhead_t, dtv)
> +DTV_COUNTER		offsetof(dtv_t, counter)
> +TLS_DTV_UNALLOCATED	TLS_DTV_UNALLOCATED
> +TLS_DTV_OFFSET		TLS_DTV_OFFSET
> +SIZE_OF_DTV		sizeof(tcbhead_t)
> diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data
> index 547b1c1b7f..ec32e6d13f 100644
> --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data
> +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data
> @@ -5,3 +5,5 @@ libc.so: calloc
>  libc.so: free
>  libc.so: malloc
>  libc.so: realloc
> +# The dynamic loader needs __tls_get_addr for TLS.
> +ld.so: __tls_get_addr

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] LoongArch: Add support for TLS Descriptors
  2024-03-04 15:42 ` H.J. Lu
@ 2024-03-08  7:45   ` mengqinggang
  0 siblings, 0 replies; 8+ messages in thread
From: mengqinggang @ 2024-03-08  7:45 UTC (permalink / raw)
  To: H.J. Lu
  Cc: libc-alpha, adhemerval.zanella, xuchenghua, caiyinyu, chenglulu,
	cailulu, xry111, i.swmail, maskray, luweining, wanglei,
	hejinyang

[-- Attachment #1: Type: text/plain, Size: 30097 bytes --]

Thanks a lot for the review! A new v3 version patch has been sent.

https://sourceware.org/pipermail/libc-alpha/2024-March/155204.html

在 2024/3/4 下午11:42, H.J. Lu 写道:
> On Wed, Feb 28, 2024 at 5:44 PM mengqinggang <mengqinggang@loongson.cn> wrote:
>> This is mostly based on AArch64 and RISC-V implementation.
>>
>> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>>
>> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
>> all vector registers.
>> ---
>> Changes v1 -> v2:
>> - Fix vr24-vr31, xr24-xr31 typo.
>> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
>> - Save and restore fcsr0 in _dl_tlsdesc_dynamic.
>>
>> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
>>
>>   elf/elf.h                                     |   2 +
>>   sysdeps/loongarch/Makefile                    |   6 +
>>   sysdeps/loongarch/dl-link.sym                 |   1 +
>>   sysdeps/loongarch/dl-machine.h                |  60 ++-
>>   sysdeps/loongarch/dl-tls.h                    |   9 +-
>>   sysdeps/loongarch/dl-tlsdesc-dynamic.h        | 341 ++++++++++++++++++
>>   sysdeps/loongarch/dl-tlsdesc.S                |  93 +++++
>>   sysdeps/loongarch/dl-tlsdesc.h                |  53 +++
>>   sysdeps/loongarch/linkmap.h                   |   1 +
>>   sysdeps/loongarch/sys/asm.h                   |   1 +
>>   sysdeps/loongarch/sys/regdef.h                |   1 +
>>   sysdeps/loongarch/tlsdesc.c                   |  39 ++
>>   sysdeps/loongarch/tlsdesc.sym                 |  19 +
>>   .../unix/sysv/linux/loongarch/localplt.data   |   2 +
>>   14 files changed, 625 insertions(+), 3 deletions(-)
>>   create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>   create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
>>   create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
>>   create mode 100644 sysdeps/loongarch/tlsdesc.c
>>   create mode 100644 sysdeps/loongarch/tlsdesc.sym
>>
>> diff --git a/elf/elf.h b/elf/elf.h
>> index f2206e5c06..eec24ea049 100644
>> --- a/elf/elf.h
>> +++ b/elf/elf.h
>> @@ -4237,6 +4237,8 @@ enum
>>   #define R_LARCH_TLS_TPREL32    10
>>   #define R_LARCH_TLS_TPREL64    11
>>   #define R_LARCH_IRELATIVE      12
>> +#define R_LARCH_TLS_DESC32     13
>> +#define R_LARCH_TLS_DESC64     14
>>
>>   /* Reserved for future relocs that the dynamic linker must understand.  */
>>
>> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
>> index 43d2f583cd..181389e787 100644
>> --- a/sysdeps/loongarch/Makefile
>> +++ b/sysdeps/loongarch/Makefile
>> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
>>   endif
>>
>>   ifeq ($(subdir),elf)
>> +sysdep-dl-routines += tlsdesc dl-tlsdesc
>>   gen-as-const-headers += dl-link.sym
>>   endif
>>
>> +ifeq ($(subdir),csu)
>> +gen-as-const-headers += tlsdesc.sym
>> +endif
>> +
>> +
>>   # LoongArch's assembler also needs to know about PIC as it changes the
>>   # definition of some assembler macros.
>>   ASFLAGS-.os += $(pic-ccflag)
>> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
>> index b534968e30..fd81ef37d5 100644
>> --- a/sysdeps/loongarch/dl-link.sym
>> +++ b/sysdeps/loongarch/dl-link.sym
>> @@ -1,6 +1,7 @@
>>   #include <stddef.h>
>>   #include <sysdep.h>
>>   #include <link.h>
>> +#include <dl-tlsdesc.h>
>>
>>   DL_SIZEOF_RG            sizeof(struct La_loongarch_regs)
>>   DL_SIZEOF_RV            sizeof(struct La_loongarch_retval)
>> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
>> index ab81b82d95..8ca6c224f6 100644
>> --- a/sysdeps/loongarch/dl-machine.h
>> +++ b/sysdeps/loongarch/dl-machine.h
>> @@ -25,7 +25,7 @@
>>   #include <entry.h>
>>   #include <elf/elf.h>
>>   #include <sys/asm.h>
>> -#include <dl-tls.h>
>> +#include <dl-tlsdesc.h>
>>   #include <dl-static-tls.h>
>>   #include <dl-machine-rel.h>
>>
>> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
>>         *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
>>         break;
>>
>> +    case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
>> +      {
>> +       struct tlsdesc volatile *td =
>> +           (struct tlsdesc volatile *)addr_field;
>> +       if (! sym)
>> +         {
>> +           td->arg = (void*)reloc->r_addend;
>> +           td->entry = _dl_tlsdesc_undefweak;
>> +         }
>> +       else
>> +         {
>> +# ifndef SHARED
>> +           CHECK_STATIC_TLS (map, sym_map);
>> +# else
>> +           if (!TRY_STATIC_TLS (map, sym_map))
>> +             {
>> +               td->arg = _dl_make_tlsdesc_dynamic
>> +                 (sym_map, sym->st_value + reloc->r_addend);
>> +# if !defined __loongarch_soft_float
>> +               if (SUPPORT_LASX)
>> +                 td->entry = _dl_tlsdesc_dynamic_lasx;
>> +               else
>> +               if (SUPPORT_LSX)
>> +                 td->entry = _dl_tlsdesc_dynamic_lsx;
>> +               else
>> +# endif
>> +                 td->entry = _dl_tlsdesc_dynamic;
>> +             }
>> +           else
>> +# endif
>> +             {
>> +               td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
>> +                           + reloc->r_addend);
>> +               td->entry = _dl_tlsdesc_return;
>> +             }
>> +         }
>> +       break;
>> +      }
>> +
>>       case R_LARCH_COPY:
>>         {
>>            if (sym == NULL)
>> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>>         else
>>          *reloc_addr = map->l_mach.plt;
>>       }
>> +  else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
>> +    {
>> +      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
>> +      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
>> +      const ElfW (Sym) *sym = &symtab[symndx];
>> +      const struct r_found_version *version = NULL;
>> +
>> +      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
>> +       {
>> +         const ElfW (Half) *vernum =
>> +           (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
>> +         version = &map->l_versions[vernum[symndx] & 0x7fff];
>> +       }
>> +
>> +      /* Always initialize TLS descriptors completely, because lazy
>> +        initialization requires synchronization at every TLS access.  */
>> +      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
>> +                       skip_ifunc);
>> +    }
>>     else
>>       _dl_reloc_bad_type (map, r_type, 1);
>>   }
>> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
>> index 29924b866d..de593c002d 100644
>> --- a/sysdeps/loongarch/dl-tls.h
>> +++ b/sysdeps/loongarch/dl-tls.h
>> @@ -16,6 +16,9 @@
>>      License along with the GNU C Library.  If not, see
>>      <https://www.gnu.org/licenses/>.  */
>>
>> +#ifndef _DL_TLS_H
>> +#define _DL_TLS_H
>> +
>>   /* Type used for the representation of TLS information in the GOT.  */
>>   typedef struct
>>   {
>> @@ -23,6 +26,8 @@ typedef struct
>>     unsigned long int ti_offset;
>>   } tls_index;
>>
>> +extern void *__tls_get_addr (tls_index *ti);
>> +
>>   /* The thread pointer points to the first static TLS block.  */
>>   #define TLS_TP_OFFSET 0
>>
>> @@ -37,10 +42,10 @@ typedef struct
>>   /* Compute the value for a DTPREL reloc.  */
>>   #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>>
>> -extern void *__tls_get_addr (tls_index *ti);
>> -
>>   #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
>>   #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>>
>>   /* Value used for dtv entries for which the allocation is delayed.  */
>>   #define TLS_DTV_UNALLOCATED ((void *) -1l)
>> +
>> +#endif
>> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>> new file mode 100644
>> index 0000000000..0d8c9bb991
>> --- /dev/null
>> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>> @@ -0,0 +1,341 @@
>> +/* Thread-local storage handling in the ELF dynamic linker.
>> +   LoongArch version.
>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> +
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <https://www.gnu.org/licenses/>.  */
>> +
>> +#ifdef USE_LASX
>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
>> +#elif defined USE_LSX
>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
>> +#elif !defined __loongarch_soft_float
>> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
>> +#else
>> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
>> +#endif
>> +
>> +#ifdef SHARED
>> +       /* Handler for dynamic TLS symbols.
>> +          Prototype:
>> +          _dl_tlsdesc_dynamic (tlsdesc *) ;
>> +
>> +          The second word of the descriptor points to a
>> +          tlsdesc_dynamic_arg structure.
>> +
>> +          Returns the offset between the thread pointer and the
>> +          object referenced by the argument.
>> +
>> +          ptrdiff_t
>> +          __attribute__ ((__regparm__ (1)))
>> +          _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
>> +          {
>> +            struct tlsdesc_dynamic_arg *td = tdp->arg;
>> +            dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
>> +            if (__builtin_expect (td->gen_count <= dtv[0].counter
>> +               && (dtv[td->tlsinfo.ti_module].pointer.val
>> +                   != TLS_DTV_UNALLOCATED),
>> +               1))
>> +              return dtv[td->tlsinfo.ti_module].pointer.val
>> +               + td->tlsinfo.ti_offset
>> +               - __thread_pointer;
>> +
>> +            return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
>> +          }
>> +        */
>> +       .hidden _dl_tlsdesc_dynamic
>> +       .global _dl_tlsdesc_dynamic
>> +       .type   _dl_tlsdesc_dynamic,%function
>> +       cfi_startproc
>> +       .align 2
>> +_dl_tlsdesc_dynamic:
>> +       /* Save just enough registers to support fast path, if we fall
>> +          into slow path we will save additional registers.  */
>> +       ADDI    sp, sp,-24
>> +       REG_S   t0, sp, 0
>> +       REG_S   t1, sp, 8
>> +       REG_S   t2, sp, 16
>> +
>> +       REG_L   t0, tp, -SIZE_OF_DTV      # dtv(t0) = tp + TCBHEAD_DTV dtv start
>> +       REG_L   a0, a0, TLSDESC_ARG       # td(a0) = tdp->arg
>> +       REG_L   t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
>> +       REG_L   t2, t0, DTV_COUNTER       # t2 = dtv[0].counter
>> +       bltu    t2, t1, Lslow
>> +
>> +       REG_L   t1, a0, TLSDESC_MODID     # t1 = td->tlsinfo.ti_module
>> +       slli.d  t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
>> +       add.d   t1, t1, t0    # t1 = dtv + ti_module * sizeof(dtv_t)
>> +       REG_L   t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
>> +       li.d    t2, TLS_DTV_UNALLOCATED
>> +       beq     t1, t2, Lslow
>> +       REG_L   t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
>> +       # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
>> +       add.d   a0, t1, t2
>> +Lret:
>> +       sub.d   a0, a0, tp
>> +       REG_L   t0, sp, 0
>> +       REG_L   t1, sp, 8
>> +       REG_L   t2, sp, 16
>> +       ADDI    sp, sp, 24
>> +       RET
>> +
>> +Lslow:
>> +       /* This is the slow path. We need to call __tls_get_addr() which
>> +          means we need to save and restore all the register that the
>> +          callee will trash.  */
>> +
>> +       /* Save the remaining registers that we must treat as caller save.  */
>> +       ADDI    sp, sp, -FRAME_SIZE
>> +       REG_S   ra, sp, 0 * SZREG
>> +       REG_S   a1, sp, 1 * SZREG
>> +       REG_S   a2, sp, 2 * SZREG
>> +       REG_S   a3, sp, 3 * SZREG
>> +       REG_S   a4, sp, 4 * SZREG
>> +       REG_S   a5, sp, 5 * SZREG
>> +       REG_S   a6, sp, 6 * SZREG
>> +       REG_S   a7, sp, 7 * SZREG
>> +       REG_S   t4, sp, 8 * SZREG
>> +       REG_S   t5, sp, 9 * SZREG
>> +       REG_S   t6, sp, 10 * SZREG
>> +       REG_S   t7, sp, 11 * SZREG
>> +       REG_S   t8, sp, 12 * SZREG
>> +
>> +#ifdef USE_LASX
>> +       xvst    xr0, sp, 13*SZREG + 0*SZXREG
>> +       xvst    xr1, sp, 13*SZREG + 1*SZXREG
>> +       xvst    xr2, sp, 13*SZREG + 2*SZXREG
>> +       xvst    xr3, sp, 13*SZREG + 3*SZXREG
>> +       xvst    xr4, sp, 13*SZREG + 4*SZXREG
>> +       xvst    xr5, sp, 13*SZREG + 5*SZXREG
>> +       xvst    xr6, sp, 13*SZREG + 6*SZXREG
>> +       xvst    xr7, sp, 13*SZREG + 7*SZXREG
>> +       xvst    xr8, sp, 13*SZREG + 8*SZXREG
>> +       xvst    xr9, sp, 13*SZREG + 9*SZXREG
>> +       xvst    xr10, sp, 13*SZREG + 10*SZXREG
>> +       xvst    xr11, sp, 13*SZREG + 11*SZXREG
>> +       xvst    xr12, sp, 13*SZREG + 12*SZXREG
>> +       xvst    xr13, sp, 13*SZREG + 13*SZXREG
>> +       xvst    xr14, sp, 13*SZREG + 14*SZXREG
>> +       xvst    xr15, sp, 13*SZREG + 15*SZXREG
>> +       xvst    xr16, sp, 13*SZREG + 16*SZXREG
>> +       xvst    xr17, sp, 13*SZREG + 17*SZXREG
>> +       xvst    xr18, sp, 13*SZREG + 18*SZXREG
>> +       xvst    xr19, sp, 13*SZREG + 19*SZXREG
>> +       xvst    xr20, sp, 13*SZREG + 20*SZXREG
>> +       xvst    xr21, sp, 13*SZREG + 21*SZXREG
>> +       xvst    xr22, sp, 13*SZREG + 22*SZXREG
>> +       xvst    xr23, sp, 13*SZREG + 23*SZXREG
>> +       xvst    xr24, sp, 13*SZREG + 24*SZXREG
>> +       xvst    xr25, sp, 13*SZREG + 25*SZXREG
>> +       xvst    xr26, sp, 13*SZREG + 26*SZXREG
>> +       xvst    xr27, sp, 13*SZREG + 27*SZXREG
>> +       xvst    xr28, sp, 13*SZREG + 28*SZXREG
>> +       xvst    xr29, sp, 13*SZREG + 29*SZXREG
>> +       xvst    xr30, sp, 13*SZREG + 30*SZXREG
>> +       xvst    xr31, sp, 13*SZREG + 31*SZXREG
>> +       # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>> +       # some fields in fcsr0
>> +       movfcsr2gr  t0, fcsr0
>> +       REG_S       t0, sp, 32*SZXREG
>> +#elif defined USE_LSX
>> +       vst     vr0, sp, 13*SZREG + 0*SZVREG
>> +       vst     vr1, sp, 13*SZREG + 1*SZVREG
>> +       vst     vr2, sp, 13*SZREG + 2*SZVREG
>> +       vst     vr3, sp, 13*SZREG + 3*SZVREG
>> +       vst     vr4, sp, 13*SZREG + 4*SZVREG
>> +       vst     vr5, sp, 13*SZREG + 5*SZVREG
>> +       vst     vr6, sp, 13*SZREG + 6*SZVREG
>> +       vst     vr7, sp, 13*SZREG + 7*SZVREG
>> +       vst     vr8, sp, 13*SZREG + 8*SZVREG
>> +       vst     vr9, sp, 13*SZREG + 9*SZVREG
>> +       vst     vr10, sp, 13*SZREG + 10*SZVREG
>> +       vst     vr11, sp, 13*SZREG + 11*SZVREG
>> +       vst     vr12, sp, 13*SZREG + 12*SZVREG
>> +       vst     vr13, sp, 13*SZREG + 13*SZVREG
>> +       vst     vr14, sp, 13*SZREG + 14*SZVREG
>> +       vst     vr15, sp, 13*SZREG + 15*SZVREG
>> +       vst     vr16, sp, 13*SZREG + 16*SZVREG
>> +       vst     vr17, sp, 13*SZREG + 17*SZVREG
>> +       vst     vr18, sp, 13*SZREG + 18*SZVREG
>> +       vst     vr19, sp, 13*SZREG + 19*SZVREG
>> +       vst     vr20, sp, 13*SZREG + 20*SZVREG
>> +       vst     vr21, sp, 13*SZREG + 21*SZVREG
>> +       vst     vr22, sp, 13*SZREG + 22*SZVREG
>> +       vst     vr23, sp, 13*SZREG + 23*SZVREG
>> +       vst     vr24, sp, 13*SZREG + 24*SZVREG
>> +       vst     vr25, sp, 13*SZREG + 25*SZVREG
>> +       vst     vr26, sp, 13*SZREG + 26*SZVREG
>> +       vst     vr27, sp, 13*SZREG + 27*SZVREG
>> +       vst     vr28, sp, 13*SZREG + 28*SZVREG
>> +       vst     vr29, sp, 13*SZREG + 29*SZVREG
>> +       vst     vr30, sp, 13*SZREG + 30*SZVREG
>> +       vst     vr31, sp, 13*SZREG + 31*SZVREG
>> +       # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>> +       # some fields in fcsr0
>> +       movfcsr2gr  t0, fcsr0
>> +       REG_S       t0, sp, 32*SZVREG
>> +#elif !defined __loongarch_soft_float
>> +       FREG_S  fa0, sp, 13*SZREG + 0*SZFREG
>> +       FREG_S  fa1, sp, 13*SZREG + 1*SZFREG
>> +       FREG_S  fa2, sp, 13*SZREG + 2*SZFREG
>> +       FREG_S  fa3, sp, 13*SZREG + 3*SZFREG
>> +       FREG_S  fa4, sp, 13*SZREG + 4*SZFREG
>> +       FREG_S  fa5, sp, 13*SZREG + 5*SZFREG
>> +       FREG_S  fa6, sp, 13*SZREG + 6*SZFREG
>> +       FREG_S  fa7, sp, 13*SZREG + 7*SZFREG
>> +       FREG_S  ft0, sp, 13*SZREG + 8*SZFREG
>> +       FREG_S  ft1, sp, 13*SZREG + 9*SZFREG
>> +       FREG_S  ft2, sp, 13*SZREG + 10*SZFREG
>> +       FREG_S  ft3, sp, 13*SZREG + 11*SZFREG
>> +       FREG_S  ft4, sp, 13*SZREG + 12*SZFREG
>> +       FREG_S  ft5, sp, 13*SZREG + 13*SZFREG
>> +       FREG_S  ft6, sp, 13*SZREG + 14*SZFREG
>> +       FREG_S  ft7, sp, 13*SZREG + 15*SZFREG
>> +       FREG_S  ft8, sp, 13*SZREG + 16*SZFREG
>> +       FREG_S  ft9, sp, 13*SZREG + 17*SZFREG
>> +       FREG_S  ft10, sp, 13*SZREG + 18*SZFREG
>> +       FREG_S  ft11, sp, 13*SZREG + 19*SZFREG
>> +       FREG_S  ft12, sp, 13*SZREG + 20*SZFREG
>> +       FREG_S  ft13, sp, 13*SZREG + 21*SZFREG
>> +       FREG_S  ft14, sp, 13*SZREG + 22*SZFREG
>> +       FREG_S  ft15, sp, 13*SZREG + 23*SZFREG
>> +       # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>> +       # some fields in fcsr0
>> +       movfcsr2gr  t0, fcsr0
>> +       REG_S       t0, sp, 24*SZFREG
>> +#endif /* #ifdef USE_LASX  */
>> +
>> +       bl      __tls_get_addr
>> +       ADDI    a0, a0, -TLS_DTV_OFFSET
>> +
>> +       REG_L   ra, sp, 0
>> +       REG_L   a1, sp, 1 * 8
>> +       REG_L   a2, sp, 2 * 8
>> +       REG_L   a3, sp, 3 * 8
>> +       REG_L   a4, sp, 4 * 8
>> +       REG_L   a5, sp, 5 * 8
>> +       REG_L   a6, sp, 6 * 8
>> +       REG_L   a7, sp, 7 * 8
>> +       REG_L   t4, sp, 8 * 8
>> +       REG_L   t5, sp, 9 * 8
>> +       REG_L   t6, sp, 10 * 8
>> +       REG_L   t7, sp, 11 * 8
>> +       REG_L   t8, sp, 12 * 8
>> +
>> +#ifdef USE_LASX
>> +       xvld    xr0, sp, 13*SZREG + 0*SZXREG
>> +       xvld    xr1, sp, 13*SZREG + 1*SZXREG
>> +       xvld    xr2, sp, 13*SZREG + 2*SZXREG
>> +       xvld    xr3, sp, 13*SZREG + 3*SZXREG
>> +       xvld    xr4, sp, 13*SZREG + 4*SZXREG
>> +       xvld    xr5, sp, 13*SZREG + 5*SZXREG
>> +       xvld    xr6, sp, 13*SZREG + 6*SZXREG
>> +       xvld    xr7, sp, 13*SZREG + 7*SZXREG
>> +       xvld    xr8, sp, 13*SZREG + 8*SZXREG
>> +       xvld    xr9, sp, 13*SZREG + 9*SZXREG
>> +       xvld    xr10, sp, 13*SZREG + 10*SZXREG
>> +       xvld    xr11, sp, 13*SZREG + 11*SZXREG
>> +       xvld    xr12, sp, 13*SZREG + 12*SZXREG
>> +       xvld    xr13, sp, 13*SZREG + 13*SZXREG
>> +       xvld    xr14, sp, 13*SZREG + 14*SZXREG
>> +       xvld    xr15, sp, 13*SZREG + 15*SZXREG
>> +       xvld    xr16, sp, 13*SZREG + 16*SZXREG
>> +       xvld    xr17, sp, 13*SZREG + 17*SZXREG
>> +       xvld    xr18, sp, 13*SZREG + 18*SZXREG
>> +       xvld    xr19, sp, 13*SZREG + 19*SZXREG
>> +       xvld    xr20, sp, 13*SZREG + 20*SZXREG
>> +       xvld    xr21, sp, 13*SZREG + 21*SZXREG
>> +       xvld    xr22, sp, 13*SZREG + 22*SZXREG
>> +       xvld    xr23, sp, 13*SZREG + 23*SZXREG
>> +       xvld    xr24, sp, 13*SZREG + 24*SZXREG
>> +       xvld    xr25, sp, 13*SZREG + 25*SZXREG
>> +       xvld    xr26, sp, 13*SZREG + 26*SZXREG
>> +       xvld    xr27, sp, 13*SZREG + 27*SZXREG
>> +       xvld    xr28, sp, 13*SZREG + 28*SZXREG
>> +       xvld    xr29, sp, 13*SZREG + 29*SZXREG
>> +       xvld    xr30, sp, 13*SZREG + 30*SZXREG
>> +       xvld    xr31, sp, 13*SZREG + 31*SZXREG
>> +       REG_L   t0, sp, 32*SZXREG
>> +       movgr2fcsr  fcsr0, t0
>> +#elif defined USE_LSX
>> +       vld     vr0, sp, 13*SZREG + 0*SZVREG
>> +       vld     vr1, sp, 13*SZREG + 1*SZVREG
>> +       vld     vr2, sp, 13*SZREG + 2*SZVREG
>> +       vld     vr3, sp, 13*SZREG + 3*SZVREG
>> +       vld     vr4, sp, 13*SZREG + 4*SZVREG
>> +       vld     vr5, sp, 13*SZREG + 5*SZVREG
>> +       vld     vr6, sp, 13*SZREG + 6*SZVREG
>> +       vld     vr7, sp, 13*SZREG + 7*SZVREG
>> +       vld     vr8, sp, 13*SZREG + 8*SZVREG
>> +       vld     vr9, sp, 13*SZREG + 9*SZVREG
>> +       vld     vr10, sp, 13*SZREG + 10*SZVREG
>> +       vld     vr11, sp, 13*SZREG + 11*SZVREG
>> +       vld     vr12, sp, 13*SZREG + 12*SZVREG
>> +       vld     vr13, sp, 13*SZREG + 13*SZVREG
>> +       vld     vr14, sp, 13*SZREG + 14*SZVREG
>> +       vld     vr15, sp, 13*SZREG + 15*SZVREG
>> +       vld     vr16, sp, 13*SZREG + 16*SZVREG
>> +       vld     vr17, sp, 13*SZREG + 17*SZVREG
>> +       vld     vr18, sp, 13*SZREG + 18*SZVREG
>> +       vld     vr19, sp, 13*SZREG + 19*SZVREG
>> +       vld     vr20, sp, 13*SZREG + 20*SZVREG
>> +       vld     vr21, sp, 13*SZREG + 21*SZVREG
>> +       vld     vr22, sp, 13*SZREG + 22*SZVREG
>> +       vld     vr23, sp, 13*SZREG + 23*SZVREG
>> +       vld     vr24, sp, 13*SZREG + 24*SZVREG
>> +       vld     vr25, sp, 13*SZREG + 25*SZVREG
>> +       vld     vr26, sp, 13*SZREG + 26*SZVREG
>> +       vld     vr27, sp, 13*SZREG + 27*SZVREG
>> +       vld     vr28, sp, 13*SZREG + 28*SZVREG
>> +       vld     vr29, sp, 13*SZREG + 29*SZVREG
>> +       vld     vr30, sp, 13*SZREG + 30*SZVREG
>> +       vld     vr31, sp, 13*SZREG + 31*SZVREG
>> +       REG_L   t0, sp, 32*SZVREG
>> +       movgr2fcsr  fcsr0, t0
>> +#elif !defined __loongarch_soft_float
>> +       FREG_L  fa0, sp, 13*SZREG + 0*SZFREG
>> +       FREG_L  fa1, sp, 13*SZREG + 1*SZFREG
>> +       FREG_L  fa2, sp, 13*SZREG + 2*SZFREG
>> +       FREG_L  fa3, sp, 13*SZREG + 3*SZFREG
>> +       FREG_L  fa4, sp, 13*SZREG + 4*SZFREG
>> +       FREG_L  fa5, sp, 13*SZREG + 5*SZFREG
>> +       FREG_L  fa6, sp, 13*SZREG + 6*SZFREG
>> +       FREG_L  fa7, sp, 13*SZREG + 7*SZFREG
>> +       FREG_L  ft0, sp, 13*SZREG + 8*SZFREG
>> +       FREG_L  ft1, sp, 13*SZREG + 9*SZFREG
>> +       FREG_L  ft2, sp, 13*SZREG + 10*SZFREG
>> +       FREG_L  ft3, sp, 13*SZREG + 11*SZFREG
>> +       FREG_L  ft4, sp, 13*SZREG + 12*SZFREG
>> +       FREG_L  ft5, sp, 13*SZREG + 13*SZFREG
>> +       FREG_L  ft6, sp, 13*SZREG + 14*SZFREG
>> +       FREG_L  ft7, sp, 13*SZREG + 15*SZFREG
>> +       FREG_L  ft8, sp, 13*SZREG + 16*SZFREG
>> +       FREG_L  ft9, sp, 13*SZREG + 17*SZFREG
>> +       FREG_L  ft10, sp, 13*SZREG + 18*SZFREG
>> +       FREG_L  ft11, sp, 13*SZREG + 19*SZFREG
>> +       FREG_L  ft12, sp, 13*SZREG + 20*SZFREG
>> +       FREG_L  ft13, sp, 13*SZREG + 21*SZFREG
>> +       FREG_L  ft14, sp, 13*SZREG + 22*SZFREG
>> +       FREG_L  ft15, sp, 13*SZREG + 23*SZFREG
>> +       REG_L   t0, sp, 24*SZFREG
>> +       movgr2fcsr  fcsr0, t0
>> +#endif /* #ifdef USE_LASX  */
>> +
>> +       ADDI    sp, sp, FRAME_SIZE
>> +       b       Lret
>> +       cfi_endproc
>> +       .size   _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
>> +#endif /* #ifdef SHARED  */
>> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
>> new file mode 100644
>> index 0000000000..4a17079169
>> --- /dev/null
>> +++ b/sysdeps/loongarch/dl-tlsdesc.S
>> @@ -0,0 +1,93 @@
>> +/* Thread-local storage handling in the ELF dynamic linker.
>> +   LoongArch version.
>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> +
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <https://www.gnu.org/licenses/>.  */
>> +
>> +#include <sysdep.h>
>> +#include <tls.h>
>> +#include "tlsdesc.h"
>> +
>> +       .text
>> +
>> +       /* Compute the thread pointer offset for symbols in the static
>> +          TLS block. The offset is the same for all threads.
>> +          Prototype:
>> +          _dl_tlsdesc_return (tlsdesc *);  */
>> +       .hidden _dl_tlsdesc_return
>> +       .global _dl_tlsdesc_return
>> +       .type   _dl_tlsdesc_return,%function
>> +       cfi_startproc
>> +       .align 2
>> +_dl_tlsdesc_return:
>> +       REG_L  a0, a0, 8
>> +       RET
>> +       cfi_endproc
>> +       .size   _dl_tlsdesc_return, .-_dl_tlsdesc_return
>> +
>> +       /* Handler for undefined weak TLS symbols.
>> +          Prototype:
>> +          _dl_tlsdesc_undefweak (tlsdesc *);
>> +
>> +          The second word of the descriptor contains the addend.
>> +          Return the addend minus the thread pointer. This ensures
>> +          that when the caller adds on the thread pointer it gets back
>> +          the addend.  */
>> +       .hidden _dl_tlsdesc_undefweak
>> +       .global _dl_tlsdesc_undefweak
>> +       .type   _dl_tlsdesc_undefweak,%function
>> +       cfi_startproc
>> +       .align  2
>> +_dl_tlsdesc_undefweak:
>> +       REG_L   a0, a0, 8
>> +       sub.d   a0, a0, tp
>> +       RET
>> +       cfi_endproc
>> +       .size   _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>> +
>> +
>> +#ifdef SHARED
>> +
>> +#if !defined __loongarch_soft_float
>> +
>> +#define USE_LASX
>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
>> +#define Lret Lret_lasx
>> +#define Lslow Lslow_lasx
>> +#include "dl-tlsdesc-dynamic.h"
>> +#undef FRAME_SIZE
>> +#undef USE_LASX
>> +#undef _dl_tlsdesc_dynamic
>> +#undef Lret
>> +#undef Lslow
>> +
>> +#define USE_LSX
>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
>> +#define Lret Lret_lsx
>> +#define Lslow Lslow_lsx
>> +#include "dl-tlsdesc-dynamic.h"
>> +#undef FRAME_SIZE
>> +#undef USE_LSX
>> +#undef _dl_tlsdesc_dynamic
>> +#undef Lret
>> +#undef Lslow
>> +
>> +#endif
>> +
>> +#include "dl-tlsdesc-dynamic.h"
>> +
>> +#endif /* #ifdef SHARED  */
>> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
>> new file mode 100644
>> index 0000000000..988037a714
>> --- /dev/null
>> +++ b/sysdeps/loongarch/dl-tlsdesc.h
>> @@ -0,0 +1,53 @@
>> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
>> +   LoongArch version.
>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> +
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <https://www.gnu.org/licenses/>.  */
>> +
>> +#ifndef _DL_TLSDESC_H
>> +#define _DL_TLSDESC_H
>> +
>> +#include <dl-tls.h>
>> +
>> +/* Type used to represent a TLS descriptor in the GOT.  */
>> +struct tlsdesc
>> +{
>> +  ptrdiff_t (*entry) (struct tlsdesc *);
>> +  void *arg;
>> +};
>> +
>> +/* Type used as the argument in a TLS descriptor for a symbol that
>> +   needs dynamic TLS offsets.  */
>> +struct tlsdesc_dynamic_arg
>> +{
>> +  tls_index tlsinfo;
>> +  size_t gen_count;
>> +};
>> +
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
>> +
>> +# ifdef SHARED
>> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
>> +#if !defined __loongarch_soft_float
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
>> +#endif
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
>> +#endif
>> +
>> +#endif
>> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
>> index 4d8737ee7f..9b1773634c 100644
>> --- a/sysdeps/loongarch/linkmap.h
>> +++ b/sysdeps/loongarch/linkmap.h
>> @@ -19,4 +19,5 @@
>>   struct link_map_machine
>>   {
>>     ElfW (Addr) plt; /* Address of .plt.  */
>> +  void *tlsdesc_table;    /* Address of TLS descriptor hash table.  */
>>   };
>> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
>> index 51521a7eb4..23c1d12914 100644
>> --- a/sysdeps/loongarch/sys/asm.h
>> +++ b/sysdeps/loongarch/sys/asm.h
>> @@ -25,6 +25,7 @@
>>   /* Macros to handle different pointer/register sizes for 32/64-bit code.  */
>>   #define SZREG 8
>>   #define SZFREG 8
>> +#define SZFCSREG 4
>>   #define SZVREG 16
>>   #define SZXREG 32
>>   #define REG_L ld.d
>> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
>> index f61ee25b25..80ce3e9c00 100644
>> --- a/sysdeps/loongarch/sys/regdef.h
>> +++ b/sysdeps/loongarch/sys/regdef.h
>> @@ -97,6 +97,7 @@
>>   #define fcc5 $fcc5
>>   #define fcc6 $fcc6
>>   #define fcc7 $fcc7
>> +#define fcsr0 $fcsr0
>>
>>   #define vr0 $vr0
>>   #define vr1 $vr1
>> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
>> new file mode 100644
>> index 0000000000..a357e7619f
>> --- /dev/null
>> +++ b/sysdeps/loongarch/tlsdesc.c
>> @@ -0,0 +1,39 @@
>> +/* Manage TLS descriptors.  AArch64 version.
>                                                    Change it.
>> +
>>
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] LoongArch: Add support for TLS Descriptors
  2024-03-05 19:29 ` Adhemerval Zanella Netto
@ 2024-03-08  7:53   ` mengqinggang
  2024-03-08 14:10     ` Adhemerval Zanella Netto
  0 siblings, 1 reply; 8+ messages in thread
From: mengqinggang @ 2024-03-08  7:53 UTC (permalink / raw)
  To: Adhemerval Zanella Netto, libc-alpha
  Cc: xuchenghua, caiyinyu, chenglulu, cailulu, xry111, i.swmail,
	maskray, luweining, wanglei, hejinyang

Thanks a lot for the review! A new v3 version patch has been sent.
https://sourceware.org/pipermail/libc-alpha/2024-March/155204.html


And some reply below.


在 2024/3/6 上午3:29, Adhemerval Zanella Netto 写道:
>
> On 28/02/24 22:43, mengqinggang wrote:
>> This is mostly based on AArch64 and RISC-V implementation.
>>
>> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>>
>> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
>> all vector registers.
>> ---
>> Changes v1 -> v2:
>> - Fix vr24-vr31, xr24-xr31 typo.
>> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
>> - Save and restore fcsr0 in _dl_tlsdesc_dynamic.
>>
>> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
> Patch looks ok, some comments below.
>
>>   elf/elf.h                                     |   2 +
>>   sysdeps/loongarch/Makefile                    |   6 +
>>   sysdeps/loongarch/dl-link.sym                 |   1 +
>>   sysdeps/loongarch/dl-machine.h                |  60 ++-
>>   sysdeps/loongarch/dl-tls.h                    |   9 +-
>>   sysdeps/loongarch/dl-tlsdesc-dynamic.h        | 341 ++++++++++++++++++
>>   sysdeps/loongarch/dl-tlsdesc.S                |  93 +++++
>>   sysdeps/loongarch/dl-tlsdesc.h                |  53 +++
>>   sysdeps/loongarch/linkmap.h                   |   1 +
>>   sysdeps/loongarch/sys/asm.h                   |   1 +
>>   sysdeps/loongarch/sys/regdef.h                |   1 +
>>   sysdeps/loongarch/tlsdesc.c                   |  39 ++
>>   sysdeps/loongarch/tlsdesc.sym                 |  19 +
>>   .../unix/sysv/linux/loongarch/localplt.data   |   2 +
>>   14 files changed, 625 insertions(+), 3 deletions(-)
>>   create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>   create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
>>   create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
>>   create mode 100644 sysdeps/loongarch/tlsdesc.c
>>   create mode 100644 sysdeps/loongarch/tlsdesc.sym
>>
>> diff --git a/elf/elf.h b/elf/elf.h
>> index f2206e5c06..eec24ea049 100644
>> --- a/elf/elf.h
>> +++ b/elf/elf.h
>> @@ -4237,6 +4237,8 @@ enum
>>   #define R_LARCH_TLS_TPREL32	10
>>   #define R_LARCH_TLS_TPREL64	11
>>   #define R_LARCH_IRELATIVE	12
>> +#define R_LARCH_TLS_DESC32	13
>> +#define R_LARCH_TLS_DESC64	14
>>   
>>   /* Reserved for future relocs that the dynamic linker must understand.  */
>>   
>> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
>> index 43d2f583cd..181389e787 100644
>> --- a/sysdeps/loongarch/Makefile
>> +++ b/sysdeps/loongarch/Makefile
>> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
>>   endif
>>   
>>   ifeq ($(subdir),elf)
>> +sysdep-dl-routines += tlsdesc dl-tlsdesc
>>   gen-as-const-headers += dl-link.sym
>>   endif
>>   
>> +ifeq ($(subdir),csu)
>> +gen-as-const-headers += tlsdesc.sym
>> +endif
>> +
>> +
>>   # LoongArch's assembler also needs to know about PIC as it changes the
>>   # definition of some assembler macros.
>>   ASFLAGS-.os += $(pic-ccflag)
>> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
>> index b534968e30..fd81ef37d5 100644
>> --- a/sysdeps/loongarch/dl-link.sym
>> +++ b/sysdeps/loongarch/dl-link.sym
>> @@ -1,6 +1,7 @@
>>   #include <stddef.h>
>>   #include <sysdep.h>
>>   #include <link.h>
>> +#include <dl-tlsdesc.h>
>>   
>>   DL_SIZEOF_RG            sizeof(struct La_loongarch_regs)
>>   DL_SIZEOF_RV            sizeof(struct La_loongarch_retval)
>> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
>> index ab81b82d95..8ca6c224f6 100644
>> --- a/sysdeps/loongarch/dl-machine.h
>> +++ b/sysdeps/loongarch/dl-machine.h
>> @@ -25,7 +25,7 @@
>>   #include <entry.h>
>>   #include <elf/elf.h>
>>   #include <sys/asm.h>
>> -#include <dl-tls.h>
>> +#include <dl-tlsdesc.h>
>>   #include <dl-static-tls.h>
>>   #include <dl-machine-rel.h>
>>   
>> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
>>         *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
>>         break;
>>   
>> +    case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
>> +      {
>> +	struct tlsdesc volatile *td =
>> +	    (struct tlsdesc volatile *)addr_field;
>> +	if (! sym)
>> +	  {
>> +	    td->arg = (void*)reloc->r_addend;
>> +	    td->entry = _dl_tlsdesc_undefweak;
>> +	  }
>> +	else
>> +	  {
>> +# ifndef SHARED
>> +	    CHECK_STATIC_TLS (map, sym_map);
>> +# else
>> +	    if (!TRY_STATIC_TLS (map, sym_map))
>> +	      {
>> +		td->arg = _dl_make_tlsdesc_dynamic
>> +		  (sym_map, sym->st_value + reloc->r_addend);
>> +# if !defined __loongarch_soft_float
>> +		if (SUPPORT_LASX)
>> +		  td->entry = _dl_tlsdesc_dynamic_lasx;
>> +		else
>> +		if (SUPPORT_LSX)
>> +		  td->entry = _dl_tlsdesc_dynamic_lsx;
>> +		else
>> +# endif
>> +		  td->entry = _dl_tlsdesc_dynamic;
>> +	      }
>> +	    else
>> +# endif
>> +	      {
>> +		td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
>> +			    + reloc->r_addend);
>> +		td->entry = _dl_tlsdesc_return;
>> +	      }
>> +	  }
>> +	break;
>> +      }
>> +
>>       case R_LARCH_COPY:
>>         {
>>   	  if (sym == NULL)
>> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>>         else
>>   	*reloc_addr = map->l_mach.plt;
>>       }
>> +  else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
> Use __glibc_likely here.
>
>> +    {
>> +      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
>> +      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
>> +      const ElfW (Sym) *sym = &symtab[symndx];
>> +      const struct r_found_version *version = NULL;
>> +
>> +      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
>> +	{
>> +	  const ElfW (Half) *vernum =
>> +	    (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
>> +	  version = &map->l_versions[vernum[symndx] & 0x7fff];
>> +	}
>> +
>> +      /* Always initialize TLS descriptors completely, because lazy
>> +	 initialization requires synchronization at every TLS access.  */
>> +      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
>> +			skip_ifunc);
>> +    }
>>     else
>>       _dl_reloc_bad_type (map, r_type, 1);
>>   }
>> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
>> index 29924b866d..de593c002d 100644
>> --- a/sysdeps/loongarch/dl-tls.h
>> +++ b/sysdeps/loongarch/dl-tls.h
>> @@ -16,6 +16,9 @@
>>      License along with the GNU C Library.  If not, see
>>      <https://www.gnu.org/licenses/>.  */
>>   
>> +#ifndef _DL_TLS_H
>> +#define _DL_TLS_H
>> +
>>   /* Type used for the representation of TLS information in the GOT.  */
>>   typedef struct
>>   {
>> @@ -23,6 +26,8 @@ typedef struct
>>     unsigned long int ti_offset;
>>   } tls_index;
>>   
>> +extern void *__tls_get_addr (tls_index *ti);
>> +
>>   /* The thread pointer points to the first static TLS block.  */
>>   #define TLS_TP_OFFSET 0
>>   
>> @@ -37,10 +42,10 @@ typedef struct
>>   /* Compute the value for a DTPREL reloc.  */
>>   #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>>   
>> -extern void *__tls_get_addr (tls_index *ti);
>> -
> Why move the function prototype?


Maybe just want to take it out of a bunch of macros.


>
>>   #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
>>   #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>>   
>>   /* Value used for dtv entries for which the allocation is delayed.  */
>>   #define TLS_DTV_UNALLOCATED ((void *) -1l)
>> +
>> +#endif
>> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>> new file mode 100644
>> index 0000000000..0d8c9bb991
>> --- /dev/null
>> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>> @@ -0,0 +1,341 @@
>> +/* Thread-local storage handling in the ELF dynamic linker.
>> +   LoongArch version.
>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
> Update Copyright years to 2024.
>
>> +
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <https://www.gnu.org/licenses/>.  */
>> +
>> +#ifdef USE_LASX
>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
>> +#elif defined USE_LSX
>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
>> +#elif !defined __loongarch_soft_float
>> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
>> +#else
>> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
>> +#endif
> I don't have a strong opinion, but another option that might be simpler it
> to provide only only one _dl_tlsdesc_dynamic implementation and check the
> required save/restore of vector register based on hwcap value.


The v3 patch provides only one  _dl_tlsdesc_dynamic implementation.


>> +
>> +#ifdef SHARED
>> +	/* Handler for dynamic TLS symbols.
>> +	   Prototype:
>> +	   _dl_tlsdesc_dynamic (tlsdesc *) ;
>> +
>> +	   The second word of the descriptor points to a
>> +	   tlsdesc_dynamic_arg structure.
>> +
>> +	   Returns the offset between the thread pointer and the
>> +	   object referenced by the argument.
>> +
>> +	   ptrdiff_t
>> +	   __attribute__ ((__regparm__ (1)))
> Does this attribute really make sense for loongarch?


This line has been deleted.


>
>> +	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
>> +	   {
>> +	     struct tlsdesc_dynamic_arg *td = tdp->arg;
>> +	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
>> +	     if (__builtin_expect (td->gen_count <= dtv[0].counter
> Use __glibc_unlikely or just remove the __builtin_expect for clarity.
>
>> +		&& (dtv[td->tlsinfo.ti_module].pointer.val
>> +		    != TLS_DTV_UNALLOCATED),
>> +		1))
>> +	       return dtv[td->tlsinfo.ti_module].pointer.val
>> +		+ td->tlsinfo.ti_offset
>> +		- __thread_pointer;
>> +
>> +	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
>> +	   }
>> +	 */
>> +	.hidden _dl_tlsdesc_dynamic
>> +	.global	_dl_tlsdesc_dynamic
>> +	.type	_dl_tlsdesc_dynamic,%function
>> +	cfi_startproc
>> +	.align 2
>> +_dl_tlsdesc_dynamic:
>> +	/* Save just enough registers to support fast path, if we fall
>> +	   into slow path we will save additional registers.  */
>> +	ADDI	sp, sp,-24
>> +	REG_S	t0, sp, 0
>> +	REG_S	t1, sp, 8
>> +	REG_S	t2, sp, 16
>> +
>> +	REG_L	t0, tp, -SIZE_OF_DTV	  # dtv(t0) = tp + TCBHEAD_DTV dtv start
>> +	REG_L	a0, a0, TLSDESC_ARG	  # td(a0) = tdp->arg
>> +	REG_L	t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
>> +	REG_L	t2, t0, DTV_COUNTER	  # t2 = dtv[0].counter
>> +	bltu	t2, t1, Lslow
>> +
>> +	REG_L	t1, a0, TLSDESC_MODID	  # t1 = td->tlsinfo.ti_module
>> +	slli.d	t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
>> +	add.d	t1, t1, t0    # t1 = dtv + ti_module * sizeof(dtv_t)
>> +	REG_L	t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
>> +	li.d	t2, TLS_DTV_UNALLOCATED
>> +	beq	t1, t2, Lslow
>> +	REG_L	t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
>> +	# dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
>> +	add.d	a0, t1, t2
>> +Lret:
>> +	sub.d	a0, a0, tp
>> +	REG_L	t0, sp, 0
>> +	REG_L	t1, sp, 8
>> +	REG_L	t2, sp, 16
>> +	ADDI	sp, sp, 24
>> +	RET
>> +
>> +Lslow:
>> +	/* This is the slow path. We need to call __tls_get_addr() which
>> +	   means we need to save and restore all the register that the
>> +	   callee will trash.  */
>> +
>> +	/* Save the remaining registers that we must treat as caller save.  */
>> +	ADDI	sp, sp, -FRAME_SIZE
>> +	REG_S	ra, sp, 0 * SZREG
>> +	REG_S	a1, sp, 1 * SZREG
>> +	REG_S	a2, sp, 2 * SZREG
>> +	REG_S	a3, sp, 3 * SZREG
>> +	REG_S	a4, sp, 4 * SZREG
>> +	REG_S	a5, sp, 5 * SZREG
>> +	REG_S	a6, sp, 6 * SZREG
>> +	REG_S	a7, sp, 7 * SZREG
>> +	REG_S	t4, sp, 8 * SZREG
>> +	REG_S	t5, sp, 9 * SZREG
>> +	REG_S	t6, sp, 10 * SZREG
>> +	REG_S	t7, sp, 11 * SZREG
>> +	REG_S	t8, sp, 12 * SZREG
>> +
>> +#ifdef USE_LASX
>> +	xvst	xr0, sp, 13*SZREG + 0*SZXREG
>> +	xvst	xr1, sp, 13*SZREG + 1*SZXREG
>> +	xvst	xr2, sp, 13*SZREG + 2*SZXREG
>> +	xvst	xr3, sp, 13*SZREG + 3*SZXREG
>> +	xvst	xr4, sp, 13*SZREG + 4*SZXREG
>> +	xvst	xr5, sp, 13*SZREG + 5*SZXREG
>> +	xvst	xr6, sp, 13*SZREG + 6*SZXREG
>> +	xvst	xr7, sp, 13*SZREG + 7*SZXREG
>> +	xvst	xr8, sp, 13*SZREG + 8*SZXREG
>> +	xvst	xr9, sp, 13*SZREG + 9*SZXREG
>> +	xvst	xr10, sp, 13*SZREG + 10*SZXREG
>> +	xvst	xr11, sp, 13*SZREG + 11*SZXREG
>> +	xvst	xr12, sp, 13*SZREG + 12*SZXREG
>> +	xvst	xr13, sp, 13*SZREG + 13*SZXREG
>> +	xvst	xr14, sp, 13*SZREG + 14*SZXREG
>> +	xvst	xr15, sp, 13*SZREG + 15*SZXREG
>> +	xvst	xr16, sp, 13*SZREG + 16*SZXREG
>> +	xvst	xr17, sp, 13*SZREG + 17*SZXREG
>> +	xvst	xr18, sp, 13*SZREG + 18*SZXREG
>> +	xvst	xr19, sp, 13*SZREG + 19*SZXREG
>> +	xvst	xr20, sp, 13*SZREG + 20*SZXREG
>> +	xvst	xr21, sp, 13*SZREG + 21*SZXREG
>> +	xvst	xr22, sp, 13*SZREG + 22*SZXREG
>> +	xvst	xr23, sp, 13*SZREG + 23*SZXREG
>> +	xvst	xr24, sp, 13*SZREG + 24*SZXREG
>> +	xvst	xr25, sp, 13*SZREG + 25*SZXREG
>> +	xvst	xr26, sp, 13*SZREG + 26*SZXREG
>> +	xvst	xr27, sp, 13*SZREG + 27*SZXREG
>> +	xvst	xr28, sp, 13*SZREG + 28*SZXREG
>> +	xvst	xr29, sp, 13*SZREG + 29*SZXREG
>> +	xvst	xr30, sp, 13*SZREG + 30*SZXREG
>> +	xvst	xr31, sp, 13*SZREG + 31*SZXREG
>> +	# Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>> +	# some fields in fcsr0
>> +	movfcsr2gr  t0, fcsr0
>> +	REG_S	    t0, sp, 32*SZXREG
>> +#elif defined USE_LSX
>> +	vst	vr0, sp, 13*SZREG + 0*SZVREG
>> +	vst	vr1, sp, 13*SZREG + 1*SZVREG
>> +	vst	vr2, sp, 13*SZREG + 2*SZVREG
>> +	vst	vr3, sp, 13*SZREG + 3*SZVREG
>> +	vst	vr4, sp, 13*SZREG + 4*SZVREG
>> +	vst	vr5, sp, 13*SZREG + 5*SZVREG
>> +	vst	vr6, sp, 13*SZREG + 6*SZVREG
>> +	vst	vr7, sp, 13*SZREG + 7*SZVREG
>> +	vst	vr8, sp, 13*SZREG + 8*SZVREG
>> +	vst	vr9, sp, 13*SZREG + 9*SZVREG
>> +	vst	vr10, sp, 13*SZREG + 10*SZVREG
>> +	vst	vr11, sp, 13*SZREG + 11*SZVREG
>> +	vst	vr12, sp, 13*SZREG + 12*SZVREG
>> +	vst	vr13, sp, 13*SZREG + 13*SZVREG
>> +	vst	vr14, sp, 13*SZREG + 14*SZVREG
>> +	vst	vr15, sp, 13*SZREG + 15*SZVREG
>> +	vst	vr16, sp, 13*SZREG + 16*SZVREG
>> +	vst	vr17, sp, 13*SZREG + 17*SZVREG
>> +	vst	vr18, sp, 13*SZREG + 18*SZVREG
>> +	vst	vr19, sp, 13*SZREG + 19*SZVREG
>> +	vst	vr20, sp, 13*SZREG + 20*SZVREG
>> +	vst	vr21, sp, 13*SZREG + 21*SZVREG
>> +	vst	vr22, sp, 13*SZREG + 22*SZVREG
>> +	vst	vr23, sp, 13*SZREG + 23*SZVREG
>> +	vst	vr24, sp, 13*SZREG + 24*SZVREG
>> +	vst	vr25, sp, 13*SZREG + 25*SZVREG
>> +	vst	vr26, sp, 13*SZREG + 26*SZVREG
>> +	vst	vr27, sp, 13*SZREG + 27*SZVREG
>> +	vst	vr28, sp, 13*SZREG + 28*SZVREG
>> +	vst	vr29, sp, 13*SZREG + 29*SZVREG
>> +	vst	vr30, sp, 13*SZREG + 30*SZVREG
>> +	vst	vr31, sp, 13*SZREG + 31*SZVREG
>> +	# Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>> +	# some fields in fcsr0
>> +	movfcsr2gr  t0, fcsr0
>> +	REG_S	    t0, sp, 32*SZVREG
>> +#elif !defined __loongarch_soft_float
>> +	FREG_S	fa0, sp, 13*SZREG + 0*SZFREG
>> +	FREG_S	fa1, sp, 13*SZREG + 1*SZFREG
>> +	FREG_S	fa2, sp, 13*SZREG + 2*SZFREG
>> +	FREG_S	fa3, sp, 13*SZREG + 3*SZFREG
>> +	FREG_S	fa4, sp, 13*SZREG + 4*SZFREG
>> +	FREG_S	fa5, sp, 13*SZREG + 5*SZFREG
>> +	FREG_S	fa6, sp, 13*SZREG + 6*SZFREG
>> +	FREG_S	fa7, sp, 13*SZREG + 7*SZFREG
>> +	FREG_S	ft0, sp, 13*SZREG + 8*SZFREG
>> +	FREG_S	ft1, sp, 13*SZREG + 9*SZFREG
>> +	FREG_S	ft2, sp, 13*SZREG + 10*SZFREG
>> +	FREG_S	ft3, sp, 13*SZREG + 11*SZFREG
>> +	FREG_S	ft4, sp, 13*SZREG + 12*SZFREG
>> +	FREG_S	ft5, sp, 13*SZREG + 13*SZFREG
>> +	FREG_S	ft6, sp, 13*SZREG + 14*SZFREG
>> +	FREG_S	ft7, sp, 13*SZREG + 15*SZFREG
>> +	FREG_S	ft8, sp, 13*SZREG + 16*SZFREG
>> +	FREG_S	ft9, sp, 13*SZREG + 17*SZFREG
>> +	FREG_S	ft10, sp, 13*SZREG + 18*SZFREG
>> +	FREG_S	ft11, sp, 13*SZREG + 19*SZFREG
>> +	FREG_S	ft12, sp, 13*SZREG + 20*SZFREG
>> +	FREG_S	ft13, sp, 13*SZREG + 21*SZFREG
>> +	FREG_S	ft14, sp, 13*SZREG + 22*SZFREG
>> +	FREG_S	ft15, sp, 13*SZREG + 23*SZFREG
>> +	# Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>> +	# some fields in fcsr0
>> +	movfcsr2gr  t0, fcsr0
>> +	REG_S	    t0, sp, 24*SZFREG
>> +#endif /* #ifdef USE_LASX  */
>> +
>> +	bl	__tls_get_addr
>> +	ADDI	a0, a0, -TLS_DTV_OFFSET
>> +
>> +	REG_L	ra, sp, 0
>> +	REG_L	a1, sp, 1 * 8
>> +	REG_L	a2, sp, 2 * 8
>> +	REG_L	a3, sp, 3 * 8
>> +	REG_L	a4, sp, 4 * 8
>> +	REG_L	a5, sp, 5 * 8
>> +	REG_L	a6, sp, 6 * 8
>> +	REG_L	a7, sp, 7 * 8
>> +	REG_L	t4, sp, 8 * 8
>> +	REG_L	t5, sp, 9 * 8
>> +	REG_L	t6, sp, 10 * 8
>> +	REG_L	t7, sp, 11 * 8
>> +	REG_L	t8, sp, 12 * 8
>> +
>> +#ifdef USE_LASX
>> +	xvld	xr0, sp, 13*SZREG + 0*SZXREG
>> +	xvld	xr1, sp, 13*SZREG + 1*SZXREG
>> +	xvld	xr2, sp, 13*SZREG + 2*SZXREG
>> +	xvld	xr3, sp, 13*SZREG + 3*SZXREG
>> +	xvld	xr4, sp, 13*SZREG + 4*SZXREG
>> +	xvld	xr5, sp, 13*SZREG + 5*SZXREG
>> +	xvld	xr6, sp, 13*SZREG + 6*SZXREG
>> +	xvld	xr7, sp, 13*SZREG + 7*SZXREG
>> +	xvld	xr8, sp, 13*SZREG + 8*SZXREG
>> +	xvld	xr9, sp, 13*SZREG + 9*SZXREG
>> +	xvld	xr10, sp, 13*SZREG + 10*SZXREG
>> +	xvld	xr11, sp, 13*SZREG + 11*SZXREG
>> +	xvld	xr12, sp, 13*SZREG + 12*SZXREG
>> +	xvld	xr13, sp, 13*SZREG + 13*SZXREG
>> +	xvld	xr14, sp, 13*SZREG + 14*SZXREG
>> +	xvld	xr15, sp, 13*SZREG + 15*SZXREG
>> +	xvld	xr16, sp, 13*SZREG + 16*SZXREG
>> +	xvld	xr17, sp, 13*SZREG + 17*SZXREG
>> +	xvld	xr18, sp, 13*SZREG + 18*SZXREG
>> +	xvld	xr19, sp, 13*SZREG + 19*SZXREG
>> +	xvld	xr20, sp, 13*SZREG + 20*SZXREG
>> +	xvld	xr21, sp, 13*SZREG + 21*SZXREG
>> +	xvld	xr22, sp, 13*SZREG + 22*SZXREG
>> +	xvld	xr23, sp, 13*SZREG + 23*SZXREG
>> +	xvld	xr24, sp, 13*SZREG + 24*SZXREG
>> +	xvld	xr25, sp, 13*SZREG + 25*SZXREG
>> +	xvld	xr26, sp, 13*SZREG + 26*SZXREG
>> +	xvld	xr27, sp, 13*SZREG + 27*SZXREG
>> +	xvld	xr28, sp, 13*SZREG + 28*SZXREG
>> +	xvld	xr29, sp, 13*SZREG + 29*SZXREG
>> +	xvld	xr30, sp, 13*SZREG + 30*SZXREG
>> +	xvld	xr31, sp, 13*SZREG + 31*SZXREG
>> +	REG_L	t0, sp, 32*SZXREG
>> +	movgr2fcsr  fcsr0, t0
>> +#elif defined USE_LSX
>> +	vld	vr0, sp, 13*SZREG + 0*SZVREG
>> +	vld	vr1, sp, 13*SZREG + 1*SZVREG
>> +	vld	vr2, sp, 13*SZREG + 2*SZVREG
>> +	vld	vr3, sp, 13*SZREG + 3*SZVREG
>> +	vld	vr4, sp, 13*SZREG + 4*SZVREG
>> +	vld	vr5, sp, 13*SZREG + 5*SZVREG
>> +	vld	vr6, sp, 13*SZREG + 6*SZVREG
>> +	vld	vr7, sp, 13*SZREG + 7*SZVREG
>> +	vld	vr8, sp, 13*SZREG + 8*SZVREG
>> +	vld	vr9, sp, 13*SZREG + 9*SZVREG
>> +	vld	vr10, sp, 13*SZREG + 10*SZVREG
>> +	vld	vr11, sp, 13*SZREG + 11*SZVREG
>> +	vld	vr12, sp, 13*SZREG + 12*SZVREG
>> +	vld	vr13, sp, 13*SZREG + 13*SZVREG
>> +	vld	vr14, sp, 13*SZREG + 14*SZVREG
>> +	vld	vr15, sp, 13*SZREG + 15*SZVREG
>> +	vld	vr16, sp, 13*SZREG + 16*SZVREG
>> +	vld	vr17, sp, 13*SZREG + 17*SZVREG
>> +	vld	vr18, sp, 13*SZREG + 18*SZVREG
>> +	vld	vr19, sp, 13*SZREG + 19*SZVREG
>> +	vld	vr20, sp, 13*SZREG + 20*SZVREG
>> +	vld	vr21, sp, 13*SZREG + 21*SZVREG
>> +	vld	vr22, sp, 13*SZREG + 22*SZVREG
>> +	vld	vr23, sp, 13*SZREG + 23*SZVREG
>> +	vld	vr24, sp, 13*SZREG + 24*SZVREG
>> +	vld	vr25, sp, 13*SZREG + 25*SZVREG
>> +	vld	vr26, sp, 13*SZREG + 26*SZVREG
>> +	vld	vr27, sp, 13*SZREG + 27*SZVREG
>> +	vld	vr28, sp, 13*SZREG + 28*SZVREG
>> +	vld	vr29, sp, 13*SZREG + 29*SZVREG
>> +	vld	vr30, sp, 13*SZREG + 30*SZVREG
>> +	vld	vr31, sp, 13*SZREG + 31*SZVREG
>> +	REG_L	t0, sp, 32*SZVREG
>> +	movgr2fcsr  fcsr0, t0
>> +#elif !defined __loongarch_soft_float
>> +	FREG_L	fa0, sp, 13*SZREG + 0*SZFREG
>> +	FREG_L	fa1, sp, 13*SZREG + 1*SZFREG
>> +	FREG_L	fa2, sp, 13*SZREG + 2*SZFREG
>> +	FREG_L	fa3, sp, 13*SZREG + 3*SZFREG
>> +	FREG_L	fa4, sp, 13*SZREG + 4*SZFREG
>> +	FREG_L	fa5, sp, 13*SZREG + 5*SZFREG
>> +	FREG_L	fa6, sp, 13*SZREG + 6*SZFREG
>> +	FREG_L	fa7, sp, 13*SZREG + 7*SZFREG
>> +	FREG_L	ft0, sp, 13*SZREG + 8*SZFREG
>> +	FREG_L	ft1, sp, 13*SZREG + 9*SZFREG
>> +	FREG_L	ft2, sp, 13*SZREG + 10*SZFREG
>> +	FREG_L	ft3, sp, 13*SZREG + 11*SZFREG
>> +	FREG_L	ft4, sp, 13*SZREG + 12*SZFREG
>> +	FREG_L	ft5, sp, 13*SZREG + 13*SZFREG
>> +	FREG_L	ft6, sp, 13*SZREG + 14*SZFREG
>> +	FREG_L	ft7, sp, 13*SZREG + 15*SZFREG
>> +	FREG_L	ft8, sp, 13*SZREG + 16*SZFREG
>> +	FREG_L	ft9, sp, 13*SZREG + 17*SZFREG
>> +	FREG_L	ft10, sp, 13*SZREG + 18*SZFREG
>> +	FREG_L	ft11, sp, 13*SZREG + 19*SZFREG
>> +	FREG_L	ft12, sp, 13*SZREG + 20*SZFREG
>> +	FREG_L	ft13, sp, 13*SZREG + 21*SZFREG
>> +	FREG_L	ft14, sp, 13*SZREG + 22*SZFREG
>> +	FREG_L	ft15, sp, 13*SZREG + 23*SZFREG
>> +	REG_L	t0, sp, 24*SZFREG
>> +	movgr2fcsr  fcsr0, t0
>> +#endif /* #ifdef USE_LASX  */
>> +
>> +	ADDI	sp, sp, FRAME_SIZE
>> +	b	Lret
>> +	cfi_endproc
>> +	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
>> +#endif /* #ifdef SHARED  */
>> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
>> new file mode 100644
>> index 0000000000..4a17079169
>> --- /dev/null
>> +++ b/sysdeps/loongarch/dl-tlsdesc.S
>> @@ -0,0 +1,93 @@
>> +/* Thread-local storage handling in the ELF dynamic linker.
>> +   LoongArch version.
>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
> Update Copyright years to 2024.
>
>> +
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <https://www.gnu.org/licenses/>.  */
>> +
>> +#include <sysdep.h>
>> +#include <tls.h>
>> +#include "tlsdesc.h"
>> +
>> +	.text
>> +
>> +	/* Compute the thread pointer offset for symbols in the static
>> +	   TLS block. The offset is the same for all threads.
>> +	   Prototype:
>> +	   _dl_tlsdesc_return (tlsdesc *);  */
>> +	.hidden _dl_tlsdesc_return
>> +	.global	_dl_tlsdesc_return
>> +	.type	_dl_tlsdesc_return,%function
>> +	cfi_startproc
>> +	.align 2
>> +_dl_tlsdesc_return:
>> +	REG_L  a0, a0, 8
>> +	RET
>> +	cfi_endproc
>> +	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
>> +
>> +	/* Handler for undefined weak TLS symbols.
>> +	   Prototype:
>> +	   _dl_tlsdesc_undefweak (tlsdesc *);
>> +
>> +	   The second word of the descriptor contains the addend.
>> +	   Return the addend minus the thread pointer. This ensures
>> +	   that when the caller adds on the thread pointer it gets back
>> +	   the addend.  */
>> +	.hidden _dl_tlsdesc_undefweak
>> +	.global	_dl_tlsdesc_undefweak
>> +	.type	_dl_tlsdesc_undefweak,%function
>> +	cfi_startproc
>> +	.align  2
>> +_dl_tlsdesc_undefweak:
>> +	REG_L	a0, a0, 8
>> +	sub.d	a0, a0, tp
>> +	RET
>> +	cfi_endproc
>> +	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>> +
>> +
>> +#ifdef SHARED
>> +
>> +#if !defined __loongarch_soft_float
>> +
>> +#define USE_LASX
>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
>> +#define Lret Lret_lasx
>> +#define Lslow Lslow_lasx
>> +#include "dl-tlsdesc-dynamic.h"
>> +#undef FRAME_SIZE
>> +#undef USE_LASX
>> +#undef _dl_tlsdesc_dynamic
>> +#undef Lret
>> +#undef Lslow
>> +
>> +#define USE_LSX
>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
>> +#define Lret Lret_lsx
>> +#define Lslow Lslow_lsx
>> +#include "dl-tlsdesc-dynamic.h"
>> +#undef FRAME_SIZE
>> +#undef USE_LSX
>> +#undef _dl_tlsdesc_dynamic
>> +#undef Lret
>> +#undef Lslow
>> +
>> +#endif
>> +
>> +#include "dl-tlsdesc-dynamic.h"
>> +
>> +#endif /* #ifdef SHARED  */
>> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
>> new file mode 100644
>> index 0000000000..988037a714
>> --- /dev/null
>> +++ b/sysdeps/loongarch/dl-tlsdesc.h
>> @@ -0,0 +1,53 @@
>> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
>> +   LoongArch version.
>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> +
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <https://www.gnu.org/licenses/>.  */
>> +
>> +#ifndef _DL_TLSDESC_H
>> +#define _DL_TLSDESC_H
>> +
>> +#include <dl-tls.h>
>> +
>> +/* Type used to represent a TLS descriptor in the GOT.  */
>> +struct tlsdesc
>> +{
>> +  ptrdiff_t (*entry) (struct tlsdesc *);
>> +  void *arg;
>> +};
>> +
>> +/* Type used as the argument in a TLS descriptor for a symbol that
>> +   needs dynamic TLS offsets.  */
>> +struct tlsdesc_dynamic_arg
>> +{
>> +  tls_index tlsinfo;
>> +  size_t gen_count;
>> +};
>> +
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
>> +
>> +# ifdef SHARED
>> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
>> +#if !defined __loongarch_soft_float
> Minor style, usually for single tests we use '#ifndef' and add
> attribute_hidden at the end of prototype.
>
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
>> +#endif
>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
>> +#endif
>> +
>> +#endif
>> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
>> index 4d8737ee7f..9b1773634c 100644
>> --- a/sysdeps/loongarch/linkmap.h
>> +++ b/sysdeps/loongarch/linkmap.h
>> @@ -19,4 +19,5 @@
>>   struct link_map_machine
>>   {
>>     ElfW (Addr) plt; /* Address of .plt.  */
>> +  void *tlsdesc_table;    /* Address of TLS descriptor hash table.  */
>>   };
>> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
>> index 51521a7eb4..23c1d12914 100644
>> --- a/sysdeps/loongarch/sys/asm.h
>> +++ b/sysdeps/loongarch/sys/asm.h
>> @@ -25,6 +25,7 @@
>>   /* Macros to handle different pointer/register sizes for 32/64-bit code.  */
>>   #define SZREG 8
>>   #define SZFREG 8
>> +#define SZFCSREG 4
>>   #define SZVREG 16
>>   #define SZXREG 32
>>   #define REG_L ld.d
>> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
>> index f61ee25b25..80ce3e9c00 100644
>> --- a/sysdeps/loongarch/sys/regdef.h
>> +++ b/sysdeps/loongarch/sys/regdef.h
>> @@ -97,6 +97,7 @@
>>   #define fcc5 $fcc5
>>   #define fcc6 $fcc6
>>   #define fcc7 $fcc7
>> +#define fcsr0 $fcsr0
>>   
>>   #define vr0 $vr0
>>   #define vr1 $vr1
>> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
>> new file mode 100644
>> index 0000000000..a357e7619f
>> --- /dev/null
>> +++ b/sysdeps/loongarch/tlsdesc.c
>> @@ -0,0 +1,39 @@
>> +/* Manage TLS descriptors.  AArch64 version.
>> +
>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
> Update Copyright years to 2024 and remove the 'AArch64'.
>
>
>> +
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <https://www.gnu.org/licenses/>.  */
>> +
>> +#include <ldsodefs.h>
>> +#include <tls.h>
>> +#include <dl-tlsdesc.h>
>> +#include <dl-unmap-segments.h>
>> +#include <tlsdeschtab.h>
>> +
>> +/* Unmap the dynamic object, but also release its TLS descriptor table
>> +   if there is one.  */
>> +
>> +void
>> +_dl_unmap (struct link_map *map)
>> +{
>> +  _dl_unmap_segments (map);
>> +
>> +#ifdef SHARED
>> +  if (map->l_mach.tlsdesc_table)
>> +    htab_delete (map->l_mach.tlsdesc_table);
>> +#endif
>> +}
>> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
>> new file mode 100644
>> index 0000000000..bcab218631
>> --- /dev/null
>> +++ b/sysdeps/loongarch/tlsdesc.sym
>> @@ -0,0 +1,19 @@
>> +#include <stddef.h>
>> +#include <sysdep.h>
>> +#include <tls.h>
>> +#include <link.h>
>> +#include <dl-tlsdesc.h>
>> +
>> +--
>> +
>> +-- Abuse tls.h macros to derive offsets relative to the thread register.
>> +
>> +TLSDESC_ARG		offsetof(struct tlsdesc, arg)
>> +TLSDESC_GEN_COUNT	offsetof(struct tlsdesc_dynamic_arg, gen_count)
>> +TLSDESC_MODID		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
>> +TLSDESC_MODOFF		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
>> +TCBHEAD_DTV		offsetof(tcbhead_t, dtv)
>> +DTV_COUNTER		offsetof(dtv_t, counter)
>> +TLS_DTV_UNALLOCATED	TLS_DTV_UNALLOCATED
>> +TLS_DTV_OFFSET		TLS_DTV_OFFSET
>> +SIZE_OF_DTV		sizeof(tcbhead_t)
>> diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>> index 547b1c1b7f..ec32e6d13f 100644
>> --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data
>> +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>> @@ -5,3 +5,5 @@ libc.so: calloc
>>   libc.so: free
>>   libc.so: malloc
>>   libc.so: realloc
>> +# The dynamic loader needs __tls_get_addr for TLS.
>> +ld.so: __tls_get_addr


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] LoongArch: Add support for TLS Descriptors
  2024-03-08  7:53   ` mengqinggang
@ 2024-03-08 14:10     ` Adhemerval Zanella Netto
  2024-03-11  8:45       ` mengqinggang
  0 siblings, 1 reply; 8+ messages in thread
From: Adhemerval Zanella Netto @ 2024-03-08 14:10 UTC (permalink / raw)
  To: mengqinggang, libc-alpha
  Cc: xuchenghua, caiyinyu, chenglulu, cailulu, xry111, i.swmail,
	maskray, luweining, wanglei, hejinyang



On 08/03/24 04:53, mengqinggang wrote:
> Thanks a lot for the review! A new v3 version patch has been sent.
> https://sourceware.org/pipermail/libc-alpha/2024-March/155204.html
> 
> 
> And some reply below.
> 
> 

From the gcc enablement patch it seems that you are using the aarch64
ABI naming, -mtls-dialect={desc,trad}.  So I would suggest to check if
there is no regression with my patch to enable TLS descriptor for 
-mtls=desc [1].

You might also extend the testing to check for possible vector register
wrong save/restore, as I did to check for arm32 ones [2] (check 
the sysdeps/arm/tst-gnu2-tls2.h).

Also, I think this patch should be pushed only after gcc enablement
is installed.


[1] https://patchwork.sourceware.org/project/glibc/patch/20240229194642.3398122-4-adhemerval.zanella@linaro.org/
[2] https://patchwork.sourceware.org/project/glibc/patch/20240229194642.3398122-3-adhemerval.zanella@linaro.org/

> 在 2024/3/6 上午3:29, Adhemerval Zanella Netto 写道:
>>
>> On 28/02/24 22:43, mengqinggang wrote:
>>> This is mostly based on AArch64 and RISC-V implementation.
>>>
>>> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>>>
>>> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
>>> all vector registers.
>>> ---
>>> Changes v1 -> v2:
>>> - Fix vr24-vr31, xr24-xr31 typo.
>>> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
>>> - Save and restore fcsr0 in _dl_tlsdesc_dynamic.
>>>
>>> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
>> Patch looks ok, some comments below.
>>
>>>   elf/elf.h                                     |   2 +
>>>   sysdeps/loongarch/Makefile                    |   6 +
>>>   sysdeps/loongarch/dl-link.sym                 |   1 +
>>>   sysdeps/loongarch/dl-machine.h                |  60 ++-
>>>   sysdeps/loongarch/dl-tls.h                    |   9 +-
>>>   sysdeps/loongarch/dl-tlsdesc-dynamic.h        | 341 ++++++++++++++++++
>>>   sysdeps/loongarch/dl-tlsdesc.S                |  93 +++++
>>>   sysdeps/loongarch/dl-tlsdesc.h                |  53 +++
>>>   sysdeps/loongarch/linkmap.h                   |   1 +
>>>   sysdeps/loongarch/sys/asm.h                   |   1 +
>>>   sysdeps/loongarch/sys/regdef.h                |   1 +
>>>   sysdeps/loongarch/tlsdesc.c                   |  39 ++
>>>   sysdeps/loongarch/tlsdesc.sym                 |  19 +
>>>   .../unix/sysv/linux/loongarch/localplt.data   |   2 +
>>>   14 files changed, 625 insertions(+), 3 deletions(-)
>>>   create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>>   create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
>>>   create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
>>>   create mode 100644 sysdeps/loongarch/tlsdesc.c
>>>   create mode 100644 sysdeps/loongarch/tlsdesc.sym
>>>
>>> diff --git a/elf/elf.h b/elf/elf.h
>>> index f2206e5c06..eec24ea049 100644
>>> --- a/elf/elf.h
>>> +++ b/elf/elf.h
>>> @@ -4237,6 +4237,8 @@ enum
>>>   #define R_LARCH_TLS_TPREL32    10
>>>   #define R_LARCH_TLS_TPREL64    11
>>>   #define R_LARCH_IRELATIVE    12
>>> +#define R_LARCH_TLS_DESC32    13
>>> +#define R_LARCH_TLS_DESC64    14
>>>     /* Reserved for future relocs that the dynamic linker must understand.  */
>>>   diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
>>> index 43d2f583cd..181389e787 100644
>>> --- a/sysdeps/loongarch/Makefile
>>> +++ b/sysdeps/loongarch/Makefile
>>> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
>>>   endif
>>>     ifeq ($(subdir),elf)
>>> +sysdep-dl-routines += tlsdesc dl-tlsdesc
>>>   gen-as-const-headers += dl-link.sym
>>>   endif
>>>   +ifeq ($(subdir),csu)
>>> +gen-as-const-headers += tlsdesc.sym
>>> +endif
>>> +
>>> +
>>>   # LoongArch's assembler also needs to know about PIC as it changes the
>>>   # definition of some assembler macros.
>>>   ASFLAGS-.os += $(pic-ccflag)
>>> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
>>> index b534968e30..fd81ef37d5 100644
>>> --- a/sysdeps/loongarch/dl-link.sym
>>> +++ b/sysdeps/loongarch/dl-link.sym
>>> @@ -1,6 +1,7 @@
>>>   #include <stddef.h>
>>>   #include <sysdep.h>
>>>   #include <link.h>
>>> +#include <dl-tlsdesc.h>
>>>     DL_SIZEOF_RG            sizeof(struct La_loongarch_regs)
>>>   DL_SIZEOF_RV            sizeof(struct La_loongarch_retval)
>>> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
>>> index ab81b82d95..8ca6c224f6 100644
>>> --- a/sysdeps/loongarch/dl-machine.h
>>> +++ b/sysdeps/loongarch/dl-machine.h
>>> @@ -25,7 +25,7 @@
>>>   #include <entry.h>
>>>   #include <elf/elf.h>
>>>   #include <sys/asm.h>
>>> -#include <dl-tls.h>
>>> +#include <dl-tlsdesc.h>
>>>   #include <dl-static-tls.h>
>>>   #include <dl-machine-rel.h>
>>>   @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
>>>         *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
>>>         break;
>>>   +    case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
>>> +      {
>>> +    struct tlsdesc volatile *td =
>>> +        (struct tlsdesc volatile *)addr_field;
>>> +    if (! sym)
>>> +      {
>>> +        td->arg = (void*)reloc->r_addend;
>>> +        td->entry = _dl_tlsdesc_undefweak;
>>> +      }
>>> +    else
>>> +      {
>>> +# ifndef SHARED
>>> +        CHECK_STATIC_TLS (map, sym_map);
>>> +# else
>>> +        if (!TRY_STATIC_TLS (map, sym_map))
>>> +          {
>>> +        td->arg = _dl_make_tlsdesc_dynamic
>>> +          (sym_map, sym->st_value + reloc->r_addend);
>>> +# if !defined __loongarch_soft_float
>>> +        if (SUPPORT_LASX)
>>> +          td->entry = _dl_tlsdesc_dynamic_lasx;
>>> +        else
>>> +        if (SUPPORT_LSX)
>>> +          td->entry = _dl_tlsdesc_dynamic_lsx;
>>> +        else
>>> +# endif
>>> +          td->entry = _dl_tlsdesc_dynamic;
>>> +          }
>>> +        else
>>> +# endif
>>> +          {
>>> +        td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
>>> +                + reloc->r_addend);
>>> +        td->entry = _dl_tlsdesc_return;
>>> +          }
>>> +      }
>>> +    break;
>>> +      }
>>> +
>>>       case R_LARCH_COPY:
>>>         {
>>>         if (sym == NULL)
>>> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>>>         else
>>>       *reloc_addr = map->l_mach.plt;
>>>       }
>>> +  else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
>> Use __glibc_likely here.
>>
>>> +    {
>>> +      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
>>> +      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
>>> +      const ElfW (Sym) *sym = &symtab[symndx];
>>> +      const struct r_found_version *version = NULL;
>>> +
>>> +      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
>>> +    {
>>> +      const ElfW (Half) *vernum =
>>> +        (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
>>> +      version = &map->l_versions[vernum[symndx] & 0x7fff];
>>> +    }
>>> +
>>> +      /* Always initialize TLS descriptors completely, because lazy
>>> +     initialization requires synchronization at every TLS access.  */
>>> +      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
>>> +            skip_ifunc);
>>> +    }
>>>     else
>>>       _dl_reloc_bad_type (map, r_type, 1);
>>>   }
>>> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
>>> index 29924b866d..de593c002d 100644
>>> --- a/sysdeps/loongarch/dl-tls.h
>>> +++ b/sysdeps/loongarch/dl-tls.h
>>> @@ -16,6 +16,9 @@
>>>      License along with the GNU C Library.  If not, see
>>>      <https://www.gnu.org/licenses/>.  */
>>>   +#ifndef _DL_TLS_H
>>> +#define _DL_TLS_H
>>> +
>>>   /* Type used for the representation of TLS information in the GOT.  */
>>>   typedef struct
>>>   {
>>> @@ -23,6 +26,8 @@ typedef struct
>>>     unsigned long int ti_offset;
>>>   } tls_index;
>>>   +extern void *__tls_get_addr (tls_index *ti);
>>> +
>>>   /* The thread pointer points to the first static TLS block.  */
>>>   #define TLS_TP_OFFSET 0
>>>   @@ -37,10 +42,10 @@ typedef struct
>>>   /* Compute the value for a DTPREL reloc.  */
>>>   #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>>>   -extern void *__tls_get_addr (tls_index *ti);
>>> -
>> Why move the function prototype?
> 
> 
> Maybe just want to take it out of a bunch of macros.
> 
> 
>>
>>>   #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
>>>   #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>>>     /* Value used for dtv entries for which the allocation is delayed.  */
>>>   #define TLS_DTV_UNALLOCATED ((void *) -1l)
>>> +
>>> +#endif
>>> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>> new file mode 100644
>>> index 0000000000..0d8c9bb991
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>> @@ -0,0 +1,341 @@
>>> +/* Thread-local storage handling in the ELF dynamic linker.
>>> +   LoongArch version.
>>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> Update Copyright years to 2024.
>>
>>> +
>>> +   This file is part of the GNU C Library.
>>> +
>>> +   The GNU C Library is free software; you can redistribute it and/or
>>> +   modify it under the terms of the GNU Lesser General Public
>>> +   License as published by the Free Software Foundation; either
>>> +   version 2.1 of the License, or (at your option) any later version.
>>> +
>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> +   Lesser General Public License for more details.
>>> +
>>> +   You should have received a copy of the GNU Lesser General Public
>>> +   License along with the GNU C Library; if not, see
>>> +   <https://www.gnu.org/licenses/>.  */
>>> +
>>> +#ifdef USE_LASX
>>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
>>> +#elif defined USE_LSX
>>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
>>> +#elif !defined __loongarch_soft_float
>>> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
>>> +#else
>>> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
>>> +#endif
>> I don't have a strong opinion, but another option that might be simpler it
>> to provide only only one _dl_tlsdesc_dynamic implementation and check the
>> required save/restore of vector register based on hwcap value.
> 
> 
> The v3 patch provides only one  _dl_tlsdesc_dynamic implementation.
> 
> 
>>> +
>>> +#ifdef SHARED
>>> +    /* Handler for dynamic TLS symbols.
>>> +       Prototype:
>>> +       _dl_tlsdesc_dynamic (tlsdesc *) ;
>>> +
>>> +       The second word of the descriptor points to a
>>> +       tlsdesc_dynamic_arg structure.
>>> +
>>> +       Returns the offset between the thread pointer and the
>>> +       object referenced by the argument.
>>> +
>>> +       ptrdiff_t
>>> +       __attribute__ ((__regparm__ (1)))
>> Does this attribute really make sense for loongarch?
> 
> 
> This line has been deleted.
> 
> 
>>
>>> +       _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
>>> +       {
>>> +         struct tlsdesc_dynamic_arg *td = tdp->arg;
>>> +         dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
>>> +         if (__builtin_expect (td->gen_count <= dtv[0].counter
>> Use __glibc_unlikely or just remove the __builtin_expect for clarity.
>>
>>> +        && (dtv[td->tlsinfo.ti_module].pointer.val
>>> +            != TLS_DTV_UNALLOCATED),
>>> +        1))
>>> +           return dtv[td->tlsinfo.ti_module].pointer.val
>>> +        + td->tlsinfo.ti_offset
>>> +        - __thread_pointer;
>>> +
>>> +         return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
>>> +       }
>>> +     */
>>> +    .hidden _dl_tlsdesc_dynamic
>>> +    .global    _dl_tlsdesc_dynamic
>>> +    .type    _dl_tlsdesc_dynamic,%function
>>> +    cfi_startproc
>>> +    .align 2
>>> +_dl_tlsdesc_dynamic:
>>> +    /* Save just enough registers to support fast path, if we fall
>>> +       into slow path we will save additional registers.  */
>>> +    ADDI    sp, sp,-24
>>> +    REG_S    t0, sp, 0
>>> +    REG_S    t1, sp, 8
>>> +    REG_S    t2, sp, 16
>>> +
>>> +    REG_L    t0, tp, -SIZE_OF_DTV      # dtv(t0) = tp + TCBHEAD_DTV dtv start
>>> +    REG_L    a0, a0, TLSDESC_ARG      # td(a0) = tdp->arg
>>> +    REG_L    t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
>>> +    REG_L    t2, t0, DTV_COUNTER      # t2 = dtv[0].counter
>>> +    bltu    t2, t1, Lslow
>>> +
>>> +    REG_L    t1, a0, TLSDESC_MODID      # t1 = td->tlsinfo.ti_module
>>> +    slli.d    t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
>>> +    add.d    t1, t1, t0    # t1 = dtv + ti_module * sizeof(dtv_t)
>>> +    REG_L    t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
>>> +    li.d    t2, TLS_DTV_UNALLOCATED
>>> +    beq    t1, t2, Lslow
>>> +    REG_L    t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
>>> +    # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
>>> +    add.d    a0, t1, t2
>>> +Lret:
>>> +    sub.d    a0, a0, tp
>>> +    REG_L    t0, sp, 0
>>> +    REG_L    t1, sp, 8
>>> +    REG_L    t2, sp, 16
>>> +    ADDI    sp, sp, 24
>>> +    RET
>>> +
>>> +Lslow:
>>> +    /* This is the slow path. We need to call __tls_get_addr() which
>>> +       means we need to save and restore all the register that the
>>> +       callee will trash.  */
>>> +
>>> +    /* Save the remaining registers that we must treat as caller save.  */
>>> +    ADDI    sp, sp, -FRAME_SIZE
>>> +    REG_S    ra, sp, 0 * SZREG
>>> +    REG_S    a1, sp, 1 * SZREG
>>> +    REG_S    a2, sp, 2 * SZREG
>>> +    REG_S    a3, sp, 3 * SZREG
>>> +    REG_S    a4, sp, 4 * SZREG
>>> +    REG_S    a5, sp, 5 * SZREG
>>> +    REG_S    a6, sp, 6 * SZREG
>>> +    REG_S    a7, sp, 7 * SZREG
>>> +    REG_S    t4, sp, 8 * SZREG
>>> +    REG_S    t5, sp, 9 * SZREG
>>> +    REG_S    t6, sp, 10 * SZREG
>>> +    REG_S    t7, sp, 11 * SZREG
>>> +    REG_S    t8, sp, 12 * SZREG
>>> +
>>> +#ifdef USE_LASX
>>> +    xvst    xr0, sp, 13*SZREG + 0*SZXREG
>>> +    xvst    xr1, sp, 13*SZREG + 1*SZXREG
>>> +    xvst    xr2, sp, 13*SZREG + 2*SZXREG
>>> +    xvst    xr3, sp, 13*SZREG + 3*SZXREG
>>> +    xvst    xr4, sp, 13*SZREG + 4*SZXREG
>>> +    xvst    xr5, sp, 13*SZREG + 5*SZXREG
>>> +    xvst    xr6, sp, 13*SZREG + 6*SZXREG
>>> +    xvst    xr7, sp, 13*SZREG + 7*SZXREG
>>> +    xvst    xr8, sp, 13*SZREG + 8*SZXREG
>>> +    xvst    xr9, sp, 13*SZREG + 9*SZXREG
>>> +    xvst    xr10, sp, 13*SZREG + 10*SZXREG
>>> +    xvst    xr11, sp, 13*SZREG + 11*SZXREG
>>> +    xvst    xr12, sp, 13*SZREG + 12*SZXREG
>>> +    xvst    xr13, sp, 13*SZREG + 13*SZXREG
>>> +    xvst    xr14, sp, 13*SZREG + 14*SZXREG
>>> +    xvst    xr15, sp, 13*SZREG + 15*SZXREG
>>> +    xvst    xr16, sp, 13*SZREG + 16*SZXREG
>>> +    xvst    xr17, sp, 13*SZREG + 17*SZXREG
>>> +    xvst    xr18, sp, 13*SZREG + 18*SZXREG
>>> +    xvst    xr19, sp, 13*SZREG + 19*SZXREG
>>> +    xvst    xr20, sp, 13*SZREG + 20*SZXREG
>>> +    xvst    xr21, sp, 13*SZREG + 21*SZXREG
>>> +    xvst    xr22, sp, 13*SZREG + 22*SZXREG
>>> +    xvst    xr23, sp, 13*SZREG + 23*SZXREG
>>> +    xvst    xr24, sp, 13*SZREG + 24*SZXREG
>>> +    xvst    xr25, sp, 13*SZREG + 25*SZXREG
>>> +    xvst    xr26, sp, 13*SZREG + 26*SZXREG
>>> +    xvst    xr27, sp, 13*SZREG + 27*SZXREG
>>> +    xvst    xr28, sp, 13*SZREG + 28*SZXREG
>>> +    xvst    xr29, sp, 13*SZREG + 29*SZXREG
>>> +    xvst    xr30, sp, 13*SZREG + 30*SZXREG
>>> +    xvst    xr31, sp, 13*SZREG + 31*SZXREG
>>> +    # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>>> +    # some fields in fcsr0
>>> +    movfcsr2gr  t0, fcsr0
>>> +    REG_S        t0, sp, 32*SZXREG
>>> +#elif defined USE_LSX
>>> +    vst    vr0, sp, 13*SZREG + 0*SZVREG
>>> +    vst    vr1, sp, 13*SZREG + 1*SZVREG
>>> +    vst    vr2, sp, 13*SZREG + 2*SZVREG
>>> +    vst    vr3, sp, 13*SZREG + 3*SZVREG
>>> +    vst    vr4, sp, 13*SZREG + 4*SZVREG
>>> +    vst    vr5, sp, 13*SZREG + 5*SZVREG
>>> +    vst    vr6, sp, 13*SZREG + 6*SZVREG
>>> +    vst    vr7, sp, 13*SZREG + 7*SZVREG
>>> +    vst    vr8, sp, 13*SZREG + 8*SZVREG
>>> +    vst    vr9, sp, 13*SZREG + 9*SZVREG
>>> +    vst    vr10, sp, 13*SZREG + 10*SZVREG
>>> +    vst    vr11, sp, 13*SZREG + 11*SZVREG
>>> +    vst    vr12, sp, 13*SZREG + 12*SZVREG
>>> +    vst    vr13, sp, 13*SZREG + 13*SZVREG
>>> +    vst    vr14, sp, 13*SZREG + 14*SZVREG
>>> +    vst    vr15, sp, 13*SZREG + 15*SZVREG
>>> +    vst    vr16, sp, 13*SZREG + 16*SZVREG
>>> +    vst    vr17, sp, 13*SZREG + 17*SZVREG
>>> +    vst    vr18, sp, 13*SZREG + 18*SZVREG
>>> +    vst    vr19, sp, 13*SZREG + 19*SZVREG
>>> +    vst    vr20, sp, 13*SZREG + 20*SZVREG
>>> +    vst    vr21, sp, 13*SZREG + 21*SZVREG
>>> +    vst    vr22, sp, 13*SZREG + 22*SZVREG
>>> +    vst    vr23, sp, 13*SZREG + 23*SZVREG
>>> +    vst    vr24, sp, 13*SZREG + 24*SZVREG
>>> +    vst    vr25, sp, 13*SZREG + 25*SZVREG
>>> +    vst    vr26, sp, 13*SZREG + 26*SZVREG
>>> +    vst    vr27, sp, 13*SZREG + 27*SZVREG
>>> +    vst    vr28, sp, 13*SZREG + 28*SZVREG
>>> +    vst    vr29, sp, 13*SZREG + 29*SZVREG
>>> +    vst    vr30, sp, 13*SZREG + 30*SZVREG
>>> +    vst    vr31, sp, 13*SZREG + 31*SZVREG
>>> +    # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>>> +    # some fields in fcsr0
>>> +    movfcsr2gr  t0, fcsr0
>>> +    REG_S        t0, sp, 32*SZVREG
>>> +#elif !defined __loongarch_soft_float
>>> +    FREG_S    fa0, sp, 13*SZREG + 0*SZFREG
>>> +    FREG_S    fa1, sp, 13*SZREG + 1*SZFREG
>>> +    FREG_S    fa2, sp, 13*SZREG + 2*SZFREG
>>> +    FREG_S    fa3, sp, 13*SZREG + 3*SZFREG
>>> +    FREG_S    fa4, sp, 13*SZREG + 4*SZFREG
>>> +    FREG_S    fa5, sp, 13*SZREG + 5*SZFREG
>>> +    FREG_S    fa6, sp, 13*SZREG + 6*SZFREG
>>> +    FREG_S    fa7, sp, 13*SZREG + 7*SZFREG
>>> +    FREG_S    ft0, sp, 13*SZREG + 8*SZFREG
>>> +    FREG_S    ft1, sp, 13*SZREG + 9*SZFREG
>>> +    FREG_S    ft2, sp, 13*SZREG + 10*SZFREG
>>> +    FREG_S    ft3, sp, 13*SZREG + 11*SZFREG
>>> +    FREG_S    ft4, sp, 13*SZREG + 12*SZFREG
>>> +    FREG_S    ft5, sp, 13*SZREG + 13*SZFREG
>>> +    FREG_S    ft6, sp, 13*SZREG + 14*SZFREG
>>> +    FREG_S    ft7, sp, 13*SZREG + 15*SZFREG
>>> +    FREG_S    ft8, sp, 13*SZREG + 16*SZFREG
>>> +    FREG_S    ft9, sp, 13*SZREG + 17*SZFREG
>>> +    FREG_S    ft10, sp, 13*SZREG + 18*SZFREG
>>> +    FREG_S    ft11, sp, 13*SZREG + 19*SZFREG
>>> +    FREG_S    ft12, sp, 13*SZREG + 20*SZFREG
>>> +    FREG_S    ft13, sp, 13*SZREG + 21*SZFREG
>>> +    FREG_S    ft14, sp, 13*SZREG + 22*SZFREG
>>> +    FREG_S    ft15, sp, 13*SZREG + 23*SZFREG
>>> +    # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>>> +    # some fields in fcsr0
>>> +    movfcsr2gr  t0, fcsr0
>>> +    REG_S        t0, sp, 24*SZFREG
>>> +#endif /* #ifdef USE_LASX  */
>>> +
>>> +    bl    __tls_get_addr
>>> +    ADDI    a0, a0, -TLS_DTV_OFFSET
>>> +
>>> +    REG_L    ra, sp, 0
>>> +    REG_L    a1, sp, 1 * 8
>>> +    REG_L    a2, sp, 2 * 8
>>> +    REG_L    a3, sp, 3 * 8
>>> +    REG_L    a4, sp, 4 * 8
>>> +    REG_L    a5, sp, 5 * 8
>>> +    REG_L    a6, sp, 6 * 8
>>> +    REG_L    a7, sp, 7 * 8
>>> +    REG_L    t4, sp, 8 * 8
>>> +    REG_L    t5, sp, 9 * 8
>>> +    REG_L    t6, sp, 10 * 8
>>> +    REG_L    t7, sp, 11 * 8
>>> +    REG_L    t8, sp, 12 * 8
>>> +
>>> +#ifdef USE_LASX
>>> +    xvld    xr0, sp, 13*SZREG + 0*SZXREG
>>> +    xvld    xr1, sp, 13*SZREG + 1*SZXREG
>>> +    xvld    xr2, sp, 13*SZREG + 2*SZXREG
>>> +    xvld    xr3, sp, 13*SZREG + 3*SZXREG
>>> +    xvld    xr4, sp, 13*SZREG + 4*SZXREG
>>> +    xvld    xr5, sp, 13*SZREG + 5*SZXREG
>>> +    xvld    xr6, sp, 13*SZREG + 6*SZXREG
>>> +    xvld    xr7, sp, 13*SZREG + 7*SZXREG
>>> +    xvld    xr8, sp, 13*SZREG + 8*SZXREG
>>> +    xvld    xr9, sp, 13*SZREG + 9*SZXREG
>>> +    xvld    xr10, sp, 13*SZREG + 10*SZXREG
>>> +    xvld    xr11, sp, 13*SZREG + 11*SZXREG
>>> +    xvld    xr12, sp, 13*SZREG + 12*SZXREG
>>> +    xvld    xr13, sp, 13*SZREG + 13*SZXREG
>>> +    xvld    xr14, sp, 13*SZREG + 14*SZXREG
>>> +    xvld    xr15, sp, 13*SZREG + 15*SZXREG
>>> +    xvld    xr16, sp, 13*SZREG + 16*SZXREG
>>> +    xvld    xr17, sp, 13*SZREG + 17*SZXREG
>>> +    xvld    xr18, sp, 13*SZREG + 18*SZXREG
>>> +    xvld    xr19, sp, 13*SZREG + 19*SZXREG
>>> +    xvld    xr20, sp, 13*SZREG + 20*SZXREG
>>> +    xvld    xr21, sp, 13*SZREG + 21*SZXREG
>>> +    xvld    xr22, sp, 13*SZREG + 22*SZXREG
>>> +    xvld    xr23, sp, 13*SZREG + 23*SZXREG
>>> +    xvld    xr24, sp, 13*SZREG + 24*SZXREG
>>> +    xvld    xr25, sp, 13*SZREG + 25*SZXREG
>>> +    xvld    xr26, sp, 13*SZREG + 26*SZXREG
>>> +    xvld    xr27, sp, 13*SZREG + 27*SZXREG
>>> +    xvld    xr28, sp, 13*SZREG + 28*SZXREG
>>> +    xvld    xr29, sp, 13*SZREG + 29*SZXREG
>>> +    xvld    xr30, sp, 13*SZREG + 30*SZXREG
>>> +    xvld    xr31, sp, 13*SZREG + 31*SZXREG
>>> +    REG_L    t0, sp, 32*SZXREG
>>> +    movgr2fcsr  fcsr0, t0
>>> +#elif defined USE_LSX
>>> +    vld    vr0, sp, 13*SZREG + 0*SZVREG
>>> +    vld    vr1, sp, 13*SZREG + 1*SZVREG
>>> +    vld    vr2, sp, 13*SZREG + 2*SZVREG
>>> +    vld    vr3, sp, 13*SZREG + 3*SZVREG
>>> +    vld    vr4, sp, 13*SZREG + 4*SZVREG
>>> +    vld    vr5, sp, 13*SZREG + 5*SZVREG
>>> +    vld    vr6, sp, 13*SZREG + 6*SZVREG
>>> +    vld    vr7, sp, 13*SZREG + 7*SZVREG
>>> +    vld    vr8, sp, 13*SZREG + 8*SZVREG
>>> +    vld    vr9, sp, 13*SZREG + 9*SZVREG
>>> +    vld    vr10, sp, 13*SZREG + 10*SZVREG
>>> +    vld    vr11, sp, 13*SZREG + 11*SZVREG
>>> +    vld    vr12, sp, 13*SZREG + 12*SZVREG
>>> +    vld    vr13, sp, 13*SZREG + 13*SZVREG
>>> +    vld    vr14, sp, 13*SZREG + 14*SZVREG
>>> +    vld    vr15, sp, 13*SZREG + 15*SZVREG
>>> +    vld    vr16, sp, 13*SZREG + 16*SZVREG
>>> +    vld    vr17, sp, 13*SZREG + 17*SZVREG
>>> +    vld    vr18, sp, 13*SZREG + 18*SZVREG
>>> +    vld    vr19, sp, 13*SZREG + 19*SZVREG
>>> +    vld    vr20, sp, 13*SZREG + 20*SZVREG
>>> +    vld    vr21, sp, 13*SZREG + 21*SZVREG
>>> +    vld    vr22, sp, 13*SZREG + 22*SZVREG
>>> +    vld    vr23, sp, 13*SZREG + 23*SZVREG
>>> +    vld    vr24, sp, 13*SZREG + 24*SZVREG
>>> +    vld    vr25, sp, 13*SZREG + 25*SZVREG
>>> +    vld    vr26, sp, 13*SZREG + 26*SZVREG
>>> +    vld    vr27, sp, 13*SZREG + 27*SZVREG
>>> +    vld    vr28, sp, 13*SZREG + 28*SZVREG
>>> +    vld    vr29, sp, 13*SZREG + 29*SZVREG
>>> +    vld    vr30, sp, 13*SZREG + 30*SZVREG
>>> +    vld    vr31, sp, 13*SZREG + 31*SZVREG
>>> +    REG_L    t0, sp, 32*SZVREG
>>> +    movgr2fcsr  fcsr0, t0
>>> +#elif !defined __loongarch_soft_float
>>> +    FREG_L    fa0, sp, 13*SZREG + 0*SZFREG
>>> +    FREG_L    fa1, sp, 13*SZREG + 1*SZFREG
>>> +    FREG_L    fa2, sp, 13*SZREG + 2*SZFREG
>>> +    FREG_L    fa3, sp, 13*SZREG + 3*SZFREG
>>> +    FREG_L    fa4, sp, 13*SZREG + 4*SZFREG
>>> +    FREG_L    fa5, sp, 13*SZREG + 5*SZFREG
>>> +    FREG_L    fa6, sp, 13*SZREG + 6*SZFREG
>>> +    FREG_L    fa7, sp, 13*SZREG + 7*SZFREG
>>> +    FREG_L    ft0, sp, 13*SZREG + 8*SZFREG
>>> +    FREG_L    ft1, sp, 13*SZREG + 9*SZFREG
>>> +    FREG_L    ft2, sp, 13*SZREG + 10*SZFREG
>>> +    FREG_L    ft3, sp, 13*SZREG + 11*SZFREG
>>> +    FREG_L    ft4, sp, 13*SZREG + 12*SZFREG
>>> +    FREG_L    ft5, sp, 13*SZREG + 13*SZFREG
>>> +    FREG_L    ft6, sp, 13*SZREG + 14*SZFREG
>>> +    FREG_L    ft7, sp, 13*SZREG + 15*SZFREG
>>> +    FREG_L    ft8, sp, 13*SZREG + 16*SZFREG
>>> +    FREG_L    ft9, sp, 13*SZREG + 17*SZFREG
>>> +    FREG_L    ft10, sp, 13*SZREG + 18*SZFREG
>>> +    FREG_L    ft11, sp, 13*SZREG + 19*SZFREG
>>> +    FREG_L    ft12, sp, 13*SZREG + 20*SZFREG
>>> +    FREG_L    ft13, sp, 13*SZREG + 21*SZFREG
>>> +    FREG_L    ft14, sp, 13*SZREG + 22*SZFREG
>>> +    FREG_L    ft15, sp, 13*SZREG + 23*SZFREG
>>> +    REG_L    t0, sp, 24*SZFREG
>>> +    movgr2fcsr  fcsr0, t0
>>> +#endif /* #ifdef USE_LASX  */
>>> +
>>> +    ADDI    sp, sp, FRAME_SIZE
>>> +    b    Lret
>>> +    cfi_endproc
>>> +    .size    _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
>>> +#endif /* #ifdef SHARED  */
>>> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
>>> new file mode 100644
>>> index 0000000000..4a17079169
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/dl-tlsdesc.S
>>> @@ -0,0 +1,93 @@
>>> +/* Thread-local storage handling in the ELF dynamic linker.
>>> +   LoongArch version.
>>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> Update Copyright years to 2024.
>>
>>> +
>>> +   This file is part of the GNU C Library.
>>> +
>>> +   The GNU C Library is free software; you can redistribute it and/or
>>> +   modify it under the terms of the GNU Lesser General Public
>>> +   License as published by the Free Software Foundation; either
>>> +   version 2.1 of the License, or (at your option) any later version.
>>> +
>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> +   Lesser General Public License for more details.
>>> +
>>> +   You should have received a copy of the GNU Lesser General Public
>>> +   License along with the GNU C Library; if not, see
>>> +   <https://www.gnu.org/licenses/>.  */
>>> +
>>> +#include <sysdep.h>
>>> +#include <tls.h>
>>> +#include "tlsdesc.h"
>>> +
>>> +    .text
>>> +
>>> +    /* Compute the thread pointer offset for symbols in the static
>>> +       TLS block. The offset is the same for all threads.
>>> +       Prototype:
>>> +       _dl_tlsdesc_return (tlsdesc *);  */
>>> +    .hidden _dl_tlsdesc_return
>>> +    .global    _dl_tlsdesc_return
>>> +    .type    _dl_tlsdesc_return,%function
>>> +    cfi_startproc
>>> +    .align 2
>>> +_dl_tlsdesc_return:
>>> +    REG_L  a0, a0, 8
>>> +    RET
>>> +    cfi_endproc
>>> +    .size    _dl_tlsdesc_return, .-_dl_tlsdesc_return
>>> +
>>> +    /* Handler for undefined weak TLS symbols.
>>> +       Prototype:
>>> +       _dl_tlsdesc_undefweak (tlsdesc *);
>>> +
>>> +       The second word of the descriptor contains the addend.
>>> +       Return the addend minus the thread pointer. This ensures
>>> +       that when the caller adds on the thread pointer it gets back
>>> +       the addend.  */
>>> +    .hidden _dl_tlsdesc_undefweak
>>> +    .global    _dl_tlsdesc_undefweak
>>> +    .type    _dl_tlsdesc_undefweak,%function
>>> +    cfi_startproc
>>> +    .align  2
>>> +_dl_tlsdesc_undefweak:
>>> +    REG_L    a0, a0, 8
>>> +    sub.d    a0, a0, tp
>>> +    RET
>>> +    cfi_endproc
>>> +    .size    _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>>> +
>>> +
>>> +#ifdef SHARED
>>> +
>>> +#if !defined __loongarch_soft_float
>>> +
>>> +#define USE_LASX
>>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
>>> +#define Lret Lret_lasx
>>> +#define Lslow Lslow_lasx
>>> +#include "dl-tlsdesc-dynamic.h"
>>> +#undef FRAME_SIZE
>>> +#undef USE_LASX
>>> +#undef _dl_tlsdesc_dynamic
>>> +#undef Lret
>>> +#undef Lslow
>>> +
>>> +#define USE_LSX
>>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
>>> +#define Lret Lret_lsx
>>> +#define Lslow Lslow_lsx
>>> +#include "dl-tlsdesc-dynamic.h"
>>> +#undef FRAME_SIZE
>>> +#undef USE_LSX
>>> +#undef _dl_tlsdesc_dynamic
>>> +#undef Lret
>>> +#undef Lslow
>>> +
>>> +#endif
>>> +
>>> +#include "dl-tlsdesc-dynamic.h"
>>> +
>>> +#endif /* #ifdef SHARED  */
>>> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
>>> new file mode 100644
>>> index 0000000000..988037a714
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/dl-tlsdesc.h
>>> @@ -0,0 +1,53 @@
>>> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
>>> +   LoongArch version.
>>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>>> +
>>> +   This file is part of the GNU C Library.
>>> +
>>> +   The GNU C Library is free software; you can redistribute it and/or
>>> +   modify it under the terms of the GNU Lesser General Public
>>> +   License as published by the Free Software Foundation; either
>>> +   version 2.1 of the License, or (at your option) any later version.
>>> +
>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> +   Lesser General Public License for more details.
>>> +
>>> +   You should have received a copy of the GNU Lesser General Public
>>> +   License along with the GNU C Library; if not, see
>>> +   <https://www.gnu.org/licenses/>.  */
>>> +
>>> +#ifndef _DL_TLSDESC_H
>>> +#define _DL_TLSDESC_H
>>> +
>>> +#include <dl-tls.h>
>>> +
>>> +/* Type used to represent a TLS descriptor in the GOT.  */
>>> +struct tlsdesc
>>> +{
>>> +  ptrdiff_t (*entry) (struct tlsdesc *);
>>> +  void *arg;
>>> +};
>>> +
>>> +/* Type used as the argument in a TLS descriptor for a symbol that
>>> +   needs dynamic TLS offsets.  */
>>> +struct tlsdesc_dynamic_arg
>>> +{
>>> +  tls_index tlsinfo;
>>> +  size_t gen_count;
>>> +};
>>> +
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
>>> +
>>> +# ifdef SHARED
>>> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
>>> +#if !defined __loongarch_soft_float
>> Minor style, usually for single tests we use '#ifndef' and add
>> attribute_hidden at the end of prototype.
>>
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
>>> +#endif
>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
>>> +#endif
>>> +
>>> +#endif
>>> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
>>> index 4d8737ee7f..9b1773634c 100644
>>> --- a/sysdeps/loongarch/linkmap.h
>>> +++ b/sysdeps/loongarch/linkmap.h
>>> @@ -19,4 +19,5 @@
>>>   struct link_map_machine
>>>   {
>>>     ElfW (Addr) plt; /* Address of .plt.  */
>>> +  void *tlsdesc_table;    /* Address of TLS descriptor hash table.  */
>>>   };
>>> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
>>> index 51521a7eb4..23c1d12914 100644
>>> --- a/sysdeps/loongarch/sys/asm.h
>>> +++ b/sysdeps/loongarch/sys/asm.h
>>> @@ -25,6 +25,7 @@
>>>   /* Macros to handle different pointer/register sizes for 32/64-bit code.  */
>>>   #define SZREG 8
>>>   #define SZFREG 8
>>> +#define SZFCSREG 4
>>>   #define SZVREG 16
>>>   #define SZXREG 32
>>>   #define REG_L ld.d
>>> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
>>> index f61ee25b25..80ce3e9c00 100644
>>> --- a/sysdeps/loongarch/sys/regdef.h
>>> +++ b/sysdeps/loongarch/sys/regdef.h
>>> @@ -97,6 +97,7 @@
>>>   #define fcc5 $fcc5
>>>   #define fcc6 $fcc6
>>>   #define fcc7 $fcc7
>>> +#define fcsr0 $fcsr0
>>>     #define vr0 $vr0
>>>   #define vr1 $vr1
>>> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
>>> new file mode 100644
>>> index 0000000000..a357e7619f
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/tlsdesc.c
>>> @@ -0,0 +1,39 @@
>>> +/* Manage TLS descriptors.  AArch64 version.
>>> +
>>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>> Update Copyright years to 2024 and remove the 'AArch64'.
>>
>>
>>> +
>>> +   This file is part of the GNU C Library.
>>> +
>>> +   The GNU C Library is free software; you can redistribute it and/or
>>> +   modify it under the terms of the GNU Lesser General Public
>>> +   License as published by the Free Software Foundation; either
>>> +   version 2.1 of the License, or (at your option) any later version.
>>> +
>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> +   Lesser General Public License for more details.
>>> +
>>> +   You should have received a copy of the GNU Lesser General Public
>>> +   License along with the GNU C Library; if not, see
>>> +   <https://www.gnu.org/licenses/>.  */
>>> +
>>> +#include <ldsodefs.h>
>>> +#include <tls.h>
>>> +#include <dl-tlsdesc.h>
>>> +#include <dl-unmap-segments.h>
>>> +#include <tlsdeschtab.h>
>>> +
>>> +/* Unmap the dynamic object, but also release its TLS descriptor table
>>> +   if there is one.  */
>>> +
>>> +void
>>> +_dl_unmap (struct link_map *map)
>>> +{
>>> +  _dl_unmap_segments (map);
>>> +
>>> +#ifdef SHARED
>>> +  if (map->l_mach.tlsdesc_table)
>>> +    htab_delete (map->l_mach.tlsdesc_table);
>>> +#endif
>>> +}
>>> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
>>> new file mode 100644
>>> index 0000000000..bcab218631
>>> --- /dev/null
>>> +++ b/sysdeps/loongarch/tlsdesc.sym
>>> @@ -0,0 +1,19 @@
>>> +#include <stddef.h>
>>> +#include <sysdep.h>
>>> +#include <tls.h>
>>> +#include <link.h>
>>> +#include <dl-tlsdesc.h>
>>> +
>>> +--
>>> +
>>> +-- Abuse tls.h macros to derive offsets relative to the thread register.
>>> +
>>> +TLSDESC_ARG        offsetof(struct tlsdesc, arg)
>>> +TLSDESC_GEN_COUNT    offsetof(struct tlsdesc_dynamic_arg, gen_count)
>>> +TLSDESC_MODID        offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
>>> +TLSDESC_MODOFF        offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
>>> +TCBHEAD_DTV        offsetof(tcbhead_t, dtv)
>>> +DTV_COUNTER        offsetof(dtv_t, counter)
>>> +TLS_DTV_UNALLOCATED    TLS_DTV_UNALLOCATED
>>> +TLS_DTV_OFFSET        TLS_DTV_OFFSET
>>> +SIZE_OF_DTV        sizeof(tcbhead_t)
>>> diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>>> index 547b1c1b7f..ec32e6d13f 100644
>>> --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data
>>> +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>>> @@ -5,3 +5,5 @@ libc.so: calloc
>>>   libc.so: free
>>>   libc.so: malloc
>>>   libc.so: realloc
>>> +# The dynamic loader needs __tls_get_addr for TLS.
>>> +ld.so: __tls_get_addr
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] LoongArch: Add support for TLS Descriptors
  2024-03-08 14:10     ` Adhemerval Zanella Netto
@ 2024-03-11  8:45       ` mengqinggang
  0 siblings, 0 replies; 8+ messages in thread
From: mengqinggang @ 2024-03-11  8:45 UTC (permalink / raw)
  To: Adhemerval Zanella Netto, libc-alpha
  Cc: xuchenghua, caiyinyu, chenglulu, cailulu, xry111, i.swmail,
	maskray, luweining, wanglei, hejinyang

Thanks,  I will first complete the gcc patch as soon as possible.


在 2024/3/8 下午10:10, Adhemerval Zanella Netto 写道:
>
> On 08/03/24 04:53, mengqinggang wrote:
>> Thanks a lot for the review! A new v3 version patch has been sent.
>> https://sourceware.org/pipermail/libc-alpha/2024-March/155204.html
>>
>>
>> And some reply below.
>>
>>
>  From the gcc enablement patch it seems that you are using the aarch64
> ABI naming, -mtls-dialect={desc,trad}.  So I would suggest to check if
> there is no regression with my patch to enable TLS descriptor for
> -mtls=desc [1].
>
> You might also extend the testing to check for possible vector register
> wrong save/restore, as I did to check for arm32 ones [2] (check
> the sysdeps/arm/tst-gnu2-tls2.h).
>
> Also, I think this patch should be pushed only after gcc enablement
> is installed.
>
>
> [1] https://patchwork.sourceware.org/project/glibc/patch/20240229194642.3398122-4-adhemerval.zanella@linaro.org/
> [2] https://patchwork.sourceware.org/project/glibc/patch/20240229194642.3398122-3-adhemerval.zanella@linaro.org/
>
>> 在 2024/3/6 上午3:29, Adhemerval Zanella Netto 写道:
>>> On 28/02/24 22:43, mengqinggang wrote:
>>>> This is mostly based on AArch64 and RISC-V implementation.
>>>>
>>>> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>>>>
>>>> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
>>>> all vector registers.
>>>> ---
>>>> Changes v1 -> v2:
>>>> - Fix vr24-vr31, xr24-xr31 typo.
>>>> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
>>>> - Save and restore fcsr0 in _dl_tlsdesc_dynamic.
>>>>
>>>> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
>>> Patch looks ok, some comments below.
>>>
>>>>    elf/elf.h                                     |   2 +
>>>>    sysdeps/loongarch/Makefile                    |   6 +
>>>>    sysdeps/loongarch/dl-link.sym                 |   1 +
>>>>    sysdeps/loongarch/dl-machine.h                |  60 ++-
>>>>    sysdeps/loongarch/dl-tls.h                    |   9 +-
>>>>    sysdeps/loongarch/dl-tlsdesc-dynamic.h        | 341 ++++++++++++++++++
>>>>    sysdeps/loongarch/dl-tlsdesc.S                |  93 +++++
>>>>    sysdeps/loongarch/dl-tlsdesc.h                |  53 +++
>>>>    sysdeps/loongarch/linkmap.h                   |   1 +
>>>>    sysdeps/loongarch/sys/asm.h                   |   1 +
>>>>    sysdeps/loongarch/sys/regdef.h                |   1 +
>>>>    sysdeps/loongarch/tlsdesc.c                   |  39 ++
>>>>    sysdeps/loongarch/tlsdesc.sym                 |  19 +
>>>>    .../unix/sysv/linux/loongarch/localplt.data   |   2 +
>>>>    14 files changed, 625 insertions(+), 3 deletions(-)
>>>>    create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>>>    create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
>>>>    create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
>>>>    create mode 100644 sysdeps/loongarch/tlsdesc.c
>>>>    create mode 100644 sysdeps/loongarch/tlsdesc.sym
>>>>
>>>> diff --git a/elf/elf.h b/elf/elf.h
>>>> index f2206e5c06..eec24ea049 100644
>>>> --- a/elf/elf.h
>>>> +++ b/elf/elf.h
>>>> @@ -4237,6 +4237,8 @@ enum
>>>>    #define R_LARCH_TLS_TPREL32    10
>>>>    #define R_LARCH_TLS_TPREL64    11
>>>>    #define R_LARCH_IRELATIVE    12
>>>> +#define R_LARCH_TLS_DESC32    13
>>>> +#define R_LARCH_TLS_DESC64    14
>>>>      /* Reserved for future relocs that the dynamic linker must understand.  */
>>>>    diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
>>>> index 43d2f583cd..181389e787 100644
>>>> --- a/sysdeps/loongarch/Makefile
>>>> +++ b/sysdeps/loongarch/Makefile
>>>> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
>>>>    endif
>>>>      ifeq ($(subdir),elf)
>>>> +sysdep-dl-routines += tlsdesc dl-tlsdesc
>>>>    gen-as-const-headers += dl-link.sym
>>>>    endif
>>>>    +ifeq ($(subdir),csu)
>>>> +gen-as-const-headers += tlsdesc.sym
>>>> +endif
>>>> +
>>>> +
>>>>    # LoongArch's assembler also needs to know about PIC as it changes the
>>>>    # definition of some assembler macros.
>>>>    ASFLAGS-.os += $(pic-ccflag)
>>>> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
>>>> index b534968e30..fd81ef37d5 100644
>>>> --- a/sysdeps/loongarch/dl-link.sym
>>>> +++ b/sysdeps/loongarch/dl-link.sym
>>>> @@ -1,6 +1,7 @@
>>>>    #include <stddef.h>
>>>>    #include <sysdep.h>
>>>>    #include <link.h>
>>>> +#include <dl-tlsdesc.h>
>>>>      DL_SIZEOF_RG            sizeof(struct La_loongarch_regs)
>>>>    DL_SIZEOF_RV            sizeof(struct La_loongarch_retval)
>>>> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
>>>> index ab81b82d95..8ca6c224f6 100644
>>>> --- a/sysdeps/loongarch/dl-machine.h
>>>> +++ b/sysdeps/loongarch/dl-machine.h
>>>> @@ -25,7 +25,7 @@
>>>>    #include <entry.h>
>>>>    #include <elf/elf.h>
>>>>    #include <sys/asm.h>
>>>> -#include <dl-tls.h>
>>>> +#include <dl-tlsdesc.h>
>>>>    #include <dl-static-tls.h>
>>>>    #include <dl-machine-rel.h>
>>>>    @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
>>>>          *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
>>>>          break;
>>>>    +    case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
>>>> +      {
>>>> +    struct tlsdesc volatile *td =
>>>> +        (struct tlsdesc volatile *)addr_field;
>>>> +    if (! sym)
>>>> +      {
>>>> +        td->arg = (void*)reloc->r_addend;
>>>> +        td->entry = _dl_tlsdesc_undefweak;
>>>> +      }
>>>> +    else
>>>> +      {
>>>> +# ifndef SHARED
>>>> +        CHECK_STATIC_TLS (map, sym_map);
>>>> +# else
>>>> +        if (!TRY_STATIC_TLS (map, sym_map))
>>>> +          {
>>>> +        td->arg = _dl_make_tlsdesc_dynamic
>>>> +          (sym_map, sym->st_value + reloc->r_addend);
>>>> +# if !defined __loongarch_soft_float
>>>> +        if (SUPPORT_LASX)
>>>> +          td->entry = _dl_tlsdesc_dynamic_lasx;
>>>> +        else
>>>> +        if (SUPPORT_LSX)
>>>> +          td->entry = _dl_tlsdesc_dynamic_lsx;
>>>> +        else
>>>> +# endif
>>>> +          td->entry = _dl_tlsdesc_dynamic;
>>>> +          }
>>>> +        else
>>>> +# endif
>>>> +          {
>>>> +        td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
>>>> +                + reloc->r_addend);
>>>> +        td->entry = _dl_tlsdesc_return;
>>>> +          }
>>>> +      }
>>>> +    break;
>>>> +      }
>>>> +
>>>>        case R_LARCH_COPY:
>>>>          {
>>>>          if (sym == NULL)
>>>> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>>>>          else
>>>>        *reloc_addr = map->l_mach.plt;
>>>>        }
>>>> +  else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
>>> Use __glibc_likely here.
>>>
>>>> +    {
>>>> +      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
>>>> +      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
>>>> +      const ElfW (Sym) *sym = &symtab[symndx];
>>>> +      const struct r_found_version *version = NULL;
>>>> +
>>>> +      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
>>>> +    {
>>>> +      const ElfW (Half) *vernum =
>>>> +        (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
>>>> +      version = &map->l_versions[vernum[symndx] & 0x7fff];
>>>> +    }
>>>> +
>>>> +      /* Always initialize TLS descriptors completely, because lazy
>>>> +     initialization requires synchronization at every TLS access.  */
>>>> +      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
>>>> +            skip_ifunc);
>>>> +    }
>>>>      else
>>>>        _dl_reloc_bad_type (map, r_type, 1);
>>>>    }
>>>> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
>>>> index 29924b866d..de593c002d 100644
>>>> --- a/sysdeps/loongarch/dl-tls.h
>>>> +++ b/sysdeps/loongarch/dl-tls.h
>>>> @@ -16,6 +16,9 @@
>>>>       License along with the GNU C Library.  If not, see
>>>>       <https://www.gnu.org/licenses/>.  */
>>>>    +#ifndef _DL_TLS_H
>>>> +#define _DL_TLS_H
>>>> +
>>>>    /* Type used for the representation of TLS information in the GOT.  */
>>>>    typedef struct
>>>>    {
>>>> @@ -23,6 +26,8 @@ typedef struct
>>>>      unsigned long int ti_offset;
>>>>    } tls_index;
>>>>    +extern void *__tls_get_addr (tls_index *ti);
>>>> +
>>>>    /* The thread pointer points to the first static TLS block.  */
>>>>    #define TLS_TP_OFFSET 0
>>>>    @@ -37,10 +42,10 @@ typedef struct
>>>>    /* Compute the value for a DTPREL reloc.  */
>>>>    #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>>>>    -extern void *__tls_get_addr (tls_index *ti);
>>>> -
>>> Why move the function prototype?
>>
>> Maybe just want to take it out of a bunch of macros.
>>
>>
>>>>    #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
>>>>    #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>>>>      /* Value used for dtv entries for which the allocation is delayed.  */
>>>>    #define TLS_DTV_UNALLOCATED ((void *) -1l)
>>>> +
>>>> +#endif
>>>> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>>> new file mode 100644
>>>> index 0000000000..0d8c9bb991
>>>> --- /dev/null
>>>> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
>>>> @@ -0,0 +1,341 @@
>>>> +/* Thread-local storage handling in the ELF dynamic linker.
>>>> +   LoongArch version.
>>>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>>> Update Copyright years to 2024.
>>>
>>>> +
>>>> +   This file is part of the GNU C Library.
>>>> +
>>>> +   The GNU C Library is free software; you can redistribute it and/or
>>>> +   modify it under the terms of the GNU Lesser General Public
>>>> +   License as published by the Free Software Foundation; either
>>>> +   version 2.1 of the License, or (at your option) any later version.
>>>> +
>>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>> +   Lesser General Public License for more details.
>>>> +
>>>> +   You should have received a copy of the GNU Lesser General Public
>>>> +   License along with the GNU C Library; if not, see
>>>> +   <https://www.gnu.org/licenses/>.  */
>>>> +
>>>> +#ifdef USE_LASX
>>>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
>>>> +#elif defined USE_LSX
>>>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
>>>> +#elif !defined __loongarch_soft_float
>>>> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
>>>> +#else
>>>> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
>>>> +#endif
>>> I don't have a strong opinion, but another option that might be simpler it
>>> to provide only only one _dl_tlsdesc_dynamic implementation and check the
>>> required save/restore of vector register based on hwcap value.
>>
>> The v3 patch provides only one  _dl_tlsdesc_dynamic implementation.
>>
>>
>>>> +
>>>> +#ifdef SHARED
>>>> +    /* Handler for dynamic TLS symbols.
>>>> +       Prototype:
>>>> +       _dl_tlsdesc_dynamic (tlsdesc *) ;
>>>> +
>>>> +       The second word of the descriptor points to a
>>>> +       tlsdesc_dynamic_arg structure.
>>>> +
>>>> +       Returns the offset between the thread pointer and the
>>>> +       object referenced by the argument.
>>>> +
>>>> +       ptrdiff_t
>>>> +       __attribute__ ((__regparm__ (1)))
>>> Does this attribute really make sense for loongarch?
>>
>> This line has been deleted.
>>
>>
>>>> +       _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
>>>> +       {
>>>> +         struct tlsdesc_dynamic_arg *td = tdp->arg;
>>>> +         dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
>>>> +         if (__builtin_expect (td->gen_count <= dtv[0].counter
>>> Use __glibc_unlikely or just remove the __builtin_expect for clarity.
>>>
>>>> +        && (dtv[td->tlsinfo.ti_module].pointer.val
>>>> +            != TLS_DTV_UNALLOCATED),
>>>> +        1))
>>>> +           return dtv[td->tlsinfo.ti_module].pointer.val
>>>> +        + td->tlsinfo.ti_offset
>>>> +        - __thread_pointer;
>>>> +
>>>> +         return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
>>>> +       }
>>>> +     */
>>>> +    .hidden _dl_tlsdesc_dynamic
>>>> +    .global    _dl_tlsdesc_dynamic
>>>> +    .type    _dl_tlsdesc_dynamic,%function
>>>> +    cfi_startproc
>>>> +    .align 2
>>>> +_dl_tlsdesc_dynamic:
>>>> +    /* Save just enough registers to support fast path, if we fall
>>>> +       into slow path we will save additional registers.  */
>>>> +    ADDI    sp, sp,-24
>>>> +    REG_S    t0, sp, 0
>>>> +    REG_S    t1, sp, 8
>>>> +    REG_S    t2, sp, 16
>>>> +
>>>> +    REG_L    t0, tp, -SIZE_OF_DTV      # dtv(t0) = tp + TCBHEAD_DTV dtv start
>>>> +    REG_L    a0, a0, TLSDESC_ARG      # td(a0) = tdp->arg
>>>> +    REG_L    t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
>>>> +    REG_L    t2, t0, DTV_COUNTER      # t2 = dtv[0].counter
>>>> +    bltu    t2, t1, Lslow
>>>> +
>>>> +    REG_L    t1, a0, TLSDESC_MODID      # t1 = td->tlsinfo.ti_module
>>>> +    slli.d    t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
>>>> +    add.d    t1, t1, t0    # t1 = dtv + ti_module * sizeof(dtv_t)
>>>> +    REG_L    t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
>>>> +    li.d    t2, TLS_DTV_UNALLOCATED
>>>> +    beq    t1, t2, Lslow
>>>> +    REG_L    t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
>>>> +    # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
>>>> +    add.d    a0, t1, t2
>>>> +Lret:
>>>> +    sub.d    a0, a0, tp
>>>> +    REG_L    t0, sp, 0
>>>> +    REG_L    t1, sp, 8
>>>> +    REG_L    t2, sp, 16
>>>> +    ADDI    sp, sp, 24
>>>> +    RET
>>>> +
>>>> +Lslow:
>>>> +    /* This is the slow path. We need to call __tls_get_addr() which
>>>> +       means we need to save and restore all the register that the
>>>> +       callee will trash.  */
>>>> +
>>>> +    /* Save the remaining registers that we must treat as caller save.  */
>>>> +    ADDI    sp, sp, -FRAME_SIZE
>>>> +    REG_S    ra, sp, 0 * SZREG
>>>> +    REG_S    a1, sp, 1 * SZREG
>>>> +    REG_S    a2, sp, 2 * SZREG
>>>> +    REG_S    a3, sp, 3 * SZREG
>>>> +    REG_S    a4, sp, 4 * SZREG
>>>> +    REG_S    a5, sp, 5 * SZREG
>>>> +    REG_S    a6, sp, 6 * SZREG
>>>> +    REG_S    a7, sp, 7 * SZREG
>>>> +    REG_S    t4, sp, 8 * SZREG
>>>> +    REG_S    t5, sp, 9 * SZREG
>>>> +    REG_S    t6, sp, 10 * SZREG
>>>> +    REG_S    t7, sp, 11 * SZREG
>>>> +    REG_S    t8, sp, 12 * SZREG
>>>> +
>>>> +#ifdef USE_LASX
>>>> +    xvst    xr0, sp, 13*SZREG + 0*SZXREG
>>>> +    xvst    xr1, sp, 13*SZREG + 1*SZXREG
>>>> +    xvst    xr2, sp, 13*SZREG + 2*SZXREG
>>>> +    xvst    xr3, sp, 13*SZREG + 3*SZXREG
>>>> +    xvst    xr4, sp, 13*SZREG + 4*SZXREG
>>>> +    xvst    xr5, sp, 13*SZREG + 5*SZXREG
>>>> +    xvst    xr6, sp, 13*SZREG + 6*SZXREG
>>>> +    xvst    xr7, sp, 13*SZREG + 7*SZXREG
>>>> +    xvst    xr8, sp, 13*SZREG + 8*SZXREG
>>>> +    xvst    xr9, sp, 13*SZREG + 9*SZXREG
>>>> +    xvst    xr10, sp, 13*SZREG + 10*SZXREG
>>>> +    xvst    xr11, sp, 13*SZREG + 11*SZXREG
>>>> +    xvst    xr12, sp, 13*SZREG + 12*SZXREG
>>>> +    xvst    xr13, sp, 13*SZREG + 13*SZXREG
>>>> +    xvst    xr14, sp, 13*SZREG + 14*SZXREG
>>>> +    xvst    xr15, sp, 13*SZREG + 15*SZXREG
>>>> +    xvst    xr16, sp, 13*SZREG + 16*SZXREG
>>>> +    xvst    xr17, sp, 13*SZREG + 17*SZXREG
>>>> +    xvst    xr18, sp, 13*SZREG + 18*SZXREG
>>>> +    xvst    xr19, sp, 13*SZREG + 19*SZXREG
>>>> +    xvst    xr20, sp, 13*SZREG + 20*SZXREG
>>>> +    xvst    xr21, sp, 13*SZREG + 21*SZXREG
>>>> +    xvst    xr22, sp, 13*SZREG + 22*SZXREG
>>>> +    xvst    xr23, sp, 13*SZREG + 23*SZXREG
>>>> +    xvst    xr24, sp, 13*SZREG + 24*SZXREG
>>>> +    xvst    xr25, sp, 13*SZREG + 25*SZXREG
>>>> +    xvst    xr26, sp, 13*SZREG + 26*SZXREG
>>>> +    xvst    xr27, sp, 13*SZREG + 27*SZXREG
>>>> +    xvst    xr28, sp, 13*SZREG + 28*SZXREG
>>>> +    xvst    xr29, sp, 13*SZREG + 29*SZXREG
>>>> +    xvst    xr30, sp, 13*SZREG + 30*SZXREG
>>>> +    xvst    xr31, sp, 13*SZREG + 31*SZXREG
>>>> +    # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>>>> +    # some fields in fcsr0
>>>> +    movfcsr2gr  t0, fcsr0
>>>> +    REG_S        t0, sp, 32*SZXREG
>>>> +#elif defined USE_LSX
>>>> +    vst    vr0, sp, 13*SZREG + 0*SZVREG
>>>> +    vst    vr1, sp, 13*SZREG + 1*SZVREG
>>>> +    vst    vr2, sp, 13*SZREG + 2*SZVREG
>>>> +    vst    vr3, sp, 13*SZREG + 3*SZVREG
>>>> +    vst    vr4, sp, 13*SZREG + 4*SZVREG
>>>> +    vst    vr5, sp, 13*SZREG + 5*SZVREG
>>>> +    vst    vr6, sp, 13*SZREG + 6*SZVREG
>>>> +    vst    vr7, sp, 13*SZREG + 7*SZVREG
>>>> +    vst    vr8, sp, 13*SZREG + 8*SZVREG
>>>> +    vst    vr9, sp, 13*SZREG + 9*SZVREG
>>>> +    vst    vr10, sp, 13*SZREG + 10*SZVREG
>>>> +    vst    vr11, sp, 13*SZREG + 11*SZVREG
>>>> +    vst    vr12, sp, 13*SZREG + 12*SZVREG
>>>> +    vst    vr13, sp, 13*SZREG + 13*SZVREG
>>>> +    vst    vr14, sp, 13*SZREG + 14*SZVREG
>>>> +    vst    vr15, sp, 13*SZREG + 15*SZVREG
>>>> +    vst    vr16, sp, 13*SZREG + 16*SZVREG
>>>> +    vst    vr17, sp, 13*SZREG + 17*SZVREG
>>>> +    vst    vr18, sp, 13*SZREG + 18*SZVREG
>>>> +    vst    vr19, sp, 13*SZREG + 19*SZVREG
>>>> +    vst    vr20, sp, 13*SZREG + 20*SZVREG
>>>> +    vst    vr21, sp, 13*SZREG + 21*SZVREG
>>>> +    vst    vr22, sp, 13*SZREG + 22*SZVREG
>>>> +    vst    vr23, sp, 13*SZREG + 23*SZVREG
>>>> +    vst    vr24, sp, 13*SZREG + 24*SZVREG
>>>> +    vst    vr25, sp, 13*SZREG + 25*SZVREG
>>>> +    vst    vr26, sp, 13*SZREG + 26*SZVREG
>>>> +    vst    vr27, sp, 13*SZREG + 27*SZVREG
>>>> +    vst    vr28, sp, 13*SZREG + 28*SZVREG
>>>> +    vst    vr29, sp, 13*SZREG + 29*SZVREG
>>>> +    vst    vr30, sp, 13*SZREG + 30*SZVREG
>>>> +    vst    vr31, sp, 13*SZREG + 31*SZVREG
>>>> +    # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>>>> +    # some fields in fcsr0
>>>> +    movfcsr2gr  t0, fcsr0
>>>> +    REG_S        t0, sp, 32*SZVREG
>>>> +#elif !defined __loongarch_soft_float
>>>> +    FREG_S    fa0, sp, 13*SZREG + 0*SZFREG
>>>> +    FREG_S    fa1, sp, 13*SZREG + 1*SZFREG
>>>> +    FREG_S    fa2, sp, 13*SZREG + 2*SZFREG
>>>> +    FREG_S    fa3, sp, 13*SZREG + 3*SZFREG
>>>> +    FREG_S    fa4, sp, 13*SZREG + 4*SZFREG
>>>> +    FREG_S    fa5, sp, 13*SZREG + 5*SZFREG
>>>> +    FREG_S    fa6, sp, 13*SZREG + 6*SZFREG
>>>> +    FREG_S    fa7, sp, 13*SZREG + 7*SZFREG
>>>> +    FREG_S    ft0, sp, 13*SZREG + 8*SZFREG
>>>> +    FREG_S    ft1, sp, 13*SZREG + 9*SZFREG
>>>> +    FREG_S    ft2, sp, 13*SZREG + 10*SZFREG
>>>> +    FREG_S    ft3, sp, 13*SZREG + 11*SZFREG
>>>> +    FREG_S    ft4, sp, 13*SZREG + 12*SZFREG
>>>> +    FREG_S    ft5, sp, 13*SZREG + 13*SZFREG
>>>> +    FREG_S    ft6, sp, 13*SZREG + 14*SZFREG
>>>> +    FREG_S    ft7, sp, 13*SZREG + 15*SZFREG
>>>> +    FREG_S    ft8, sp, 13*SZREG + 16*SZFREG
>>>> +    FREG_S    ft9, sp, 13*SZREG + 17*SZFREG
>>>> +    FREG_S    ft10, sp, 13*SZREG + 18*SZFREG
>>>> +    FREG_S    ft11, sp, 13*SZREG + 19*SZFREG
>>>> +    FREG_S    ft12, sp, 13*SZREG + 20*SZFREG
>>>> +    FREG_S    ft13, sp, 13*SZREG + 21*SZFREG
>>>> +    FREG_S    ft14, sp, 13*SZREG + 22*SZFREG
>>>> +    FREG_S    ft15, sp, 13*SZREG + 23*SZFREG
>>>> +    # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
>>>> +    # some fields in fcsr0
>>>> +    movfcsr2gr  t0, fcsr0
>>>> +    REG_S        t0, sp, 24*SZFREG
>>>> +#endif /* #ifdef USE_LASX  */
>>>> +
>>>> +    bl    __tls_get_addr
>>>> +    ADDI    a0, a0, -TLS_DTV_OFFSET
>>>> +
>>>> +    REG_L    ra, sp, 0
>>>> +    REG_L    a1, sp, 1 * 8
>>>> +    REG_L    a2, sp, 2 * 8
>>>> +    REG_L    a3, sp, 3 * 8
>>>> +    REG_L    a4, sp, 4 * 8
>>>> +    REG_L    a5, sp, 5 * 8
>>>> +    REG_L    a6, sp, 6 * 8
>>>> +    REG_L    a7, sp, 7 * 8
>>>> +    REG_L    t4, sp, 8 * 8
>>>> +    REG_L    t5, sp, 9 * 8
>>>> +    REG_L    t6, sp, 10 * 8
>>>> +    REG_L    t7, sp, 11 * 8
>>>> +    REG_L    t8, sp, 12 * 8
>>>> +
>>>> +#ifdef USE_LASX
>>>> +    xvld    xr0, sp, 13*SZREG + 0*SZXREG
>>>> +    xvld    xr1, sp, 13*SZREG + 1*SZXREG
>>>> +    xvld    xr2, sp, 13*SZREG + 2*SZXREG
>>>> +    xvld    xr3, sp, 13*SZREG + 3*SZXREG
>>>> +    xvld    xr4, sp, 13*SZREG + 4*SZXREG
>>>> +    xvld    xr5, sp, 13*SZREG + 5*SZXREG
>>>> +    xvld    xr6, sp, 13*SZREG + 6*SZXREG
>>>> +    xvld    xr7, sp, 13*SZREG + 7*SZXREG
>>>> +    xvld    xr8, sp, 13*SZREG + 8*SZXREG
>>>> +    xvld    xr9, sp, 13*SZREG + 9*SZXREG
>>>> +    xvld    xr10, sp, 13*SZREG + 10*SZXREG
>>>> +    xvld    xr11, sp, 13*SZREG + 11*SZXREG
>>>> +    xvld    xr12, sp, 13*SZREG + 12*SZXREG
>>>> +    xvld    xr13, sp, 13*SZREG + 13*SZXREG
>>>> +    xvld    xr14, sp, 13*SZREG + 14*SZXREG
>>>> +    xvld    xr15, sp, 13*SZREG + 15*SZXREG
>>>> +    xvld    xr16, sp, 13*SZREG + 16*SZXREG
>>>> +    xvld    xr17, sp, 13*SZREG + 17*SZXREG
>>>> +    xvld    xr18, sp, 13*SZREG + 18*SZXREG
>>>> +    xvld    xr19, sp, 13*SZREG + 19*SZXREG
>>>> +    xvld    xr20, sp, 13*SZREG + 20*SZXREG
>>>> +    xvld    xr21, sp, 13*SZREG + 21*SZXREG
>>>> +    xvld    xr22, sp, 13*SZREG + 22*SZXREG
>>>> +    xvld    xr23, sp, 13*SZREG + 23*SZXREG
>>>> +    xvld    xr24, sp, 13*SZREG + 24*SZXREG
>>>> +    xvld    xr25, sp, 13*SZREG + 25*SZXREG
>>>> +    xvld    xr26, sp, 13*SZREG + 26*SZXREG
>>>> +    xvld    xr27, sp, 13*SZREG + 27*SZXREG
>>>> +    xvld    xr28, sp, 13*SZREG + 28*SZXREG
>>>> +    xvld    xr29, sp, 13*SZREG + 29*SZXREG
>>>> +    xvld    xr30, sp, 13*SZREG + 30*SZXREG
>>>> +    xvld    xr31, sp, 13*SZREG + 31*SZXREG
>>>> +    REG_L    t0, sp, 32*SZXREG
>>>> +    movgr2fcsr  fcsr0, t0
>>>> +#elif defined USE_LSX
>>>> +    vld    vr0, sp, 13*SZREG + 0*SZVREG
>>>> +    vld    vr1, sp, 13*SZREG + 1*SZVREG
>>>> +    vld    vr2, sp, 13*SZREG + 2*SZVREG
>>>> +    vld    vr3, sp, 13*SZREG + 3*SZVREG
>>>> +    vld    vr4, sp, 13*SZREG + 4*SZVREG
>>>> +    vld    vr5, sp, 13*SZREG + 5*SZVREG
>>>> +    vld    vr6, sp, 13*SZREG + 6*SZVREG
>>>> +    vld    vr7, sp, 13*SZREG + 7*SZVREG
>>>> +    vld    vr8, sp, 13*SZREG + 8*SZVREG
>>>> +    vld    vr9, sp, 13*SZREG + 9*SZVREG
>>>> +    vld    vr10, sp, 13*SZREG + 10*SZVREG
>>>> +    vld    vr11, sp, 13*SZREG + 11*SZVREG
>>>> +    vld    vr12, sp, 13*SZREG + 12*SZVREG
>>>> +    vld    vr13, sp, 13*SZREG + 13*SZVREG
>>>> +    vld    vr14, sp, 13*SZREG + 14*SZVREG
>>>> +    vld    vr15, sp, 13*SZREG + 15*SZVREG
>>>> +    vld    vr16, sp, 13*SZREG + 16*SZVREG
>>>> +    vld    vr17, sp, 13*SZREG + 17*SZVREG
>>>> +    vld    vr18, sp, 13*SZREG + 18*SZVREG
>>>> +    vld    vr19, sp, 13*SZREG + 19*SZVREG
>>>> +    vld    vr20, sp, 13*SZREG + 20*SZVREG
>>>> +    vld    vr21, sp, 13*SZREG + 21*SZVREG
>>>> +    vld    vr22, sp, 13*SZREG + 22*SZVREG
>>>> +    vld    vr23, sp, 13*SZREG + 23*SZVREG
>>>> +    vld    vr24, sp, 13*SZREG + 24*SZVREG
>>>> +    vld    vr25, sp, 13*SZREG + 25*SZVREG
>>>> +    vld    vr26, sp, 13*SZREG + 26*SZVREG
>>>> +    vld    vr27, sp, 13*SZREG + 27*SZVREG
>>>> +    vld    vr28, sp, 13*SZREG + 28*SZVREG
>>>> +    vld    vr29, sp, 13*SZREG + 29*SZVREG
>>>> +    vld    vr30, sp, 13*SZREG + 30*SZVREG
>>>> +    vld    vr31, sp, 13*SZREG + 31*SZVREG
>>>> +    REG_L    t0, sp, 32*SZVREG
>>>> +    movgr2fcsr  fcsr0, t0
>>>> +#elif !defined __loongarch_soft_float
>>>> +    FREG_L    fa0, sp, 13*SZREG + 0*SZFREG
>>>> +    FREG_L    fa1, sp, 13*SZREG + 1*SZFREG
>>>> +    FREG_L    fa2, sp, 13*SZREG + 2*SZFREG
>>>> +    FREG_L    fa3, sp, 13*SZREG + 3*SZFREG
>>>> +    FREG_L    fa4, sp, 13*SZREG + 4*SZFREG
>>>> +    FREG_L    fa5, sp, 13*SZREG + 5*SZFREG
>>>> +    FREG_L    fa6, sp, 13*SZREG + 6*SZFREG
>>>> +    FREG_L    fa7, sp, 13*SZREG + 7*SZFREG
>>>> +    FREG_L    ft0, sp, 13*SZREG + 8*SZFREG
>>>> +    FREG_L    ft1, sp, 13*SZREG + 9*SZFREG
>>>> +    FREG_L    ft2, sp, 13*SZREG + 10*SZFREG
>>>> +    FREG_L    ft3, sp, 13*SZREG + 11*SZFREG
>>>> +    FREG_L    ft4, sp, 13*SZREG + 12*SZFREG
>>>> +    FREG_L    ft5, sp, 13*SZREG + 13*SZFREG
>>>> +    FREG_L    ft6, sp, 13*SZREG + 14*SZFREG
>>>> +    FREG_L    ft7, sp, 13*SZREG + 15*SZFREG
>>>> +    FREG_L    ft8, sp, 13*SZREG + 16*SZFREG
>>>> +    FREG_L    ft9, sp, 13*SZREG + 17*SZFREG
>>>> +    FREG_L    ft10, sp, 13*SZREG + 18*SZFREG
>>>> +    FREG_L    ft11, sp, 13*SZREG + 19*SZFREG
>>>> +    FREG_L    ft12, sp, 13*SZREG + 20*SZFREG
>>>> +    FREG_L    ft13, sp, 13*SZREG + 21*SZFREG
>>>> +    FREG_L    ft14, sp, 13*SZREG + 22*SZFREG
>>>> +    FREG_L    ft15, sp, 13*SZREG + 23*SZFREG
>>>> +    REG_L    t0, sp, 24*SZFREG
>>>> +    movgr2fcsr  fcsr0, t0
>>>> +#endif /* #ifdef USE_LASX  */
>>>> +
>>>> +    ADDI    sp, sp, FRAME_SIZE
>>>> +    b    Lret
>>>> +    cfi_endproc
>>>> +    .size    _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
>>>> +#endif /* #ifdef SHARED  */
>>>> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
>>>> new file mode 100644
>>>> index 0000000000..4a17079169
>>>> --- /dev/null
>>>> +++ b/sysdeps/loongarch/dl-tlsdesc.S
>>>> @@ -0,0 +1,93 @@
>>>> +/* Thread-local storage handling in the ELF dynamic linker.
>>>> +   LoongArch version.
>>>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>>> Update Copyright years to 2024.
>>>
>>>> +
>>>> +   This file is part of the GNU C Library.
>>>> +
>>>> +   The GNU C Library is free software; you can redistribute it and/or
>>>> +   modify it under the terms of the GNU Lesser General Public
>>>> +   License as published by the Free Software Foundation; either
>>>> +   version 2.1 of the License, or (at your option) any later version.
>>>> +
>>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>> +   Lesser General Public License for more details.
>>>> +
>>>> +   You should have received a copy of the GNU Lesser General Public
>>>> +   License along with the GNU C Library; if not, see
>>>> +   <https://www.gnu.org/licenses/>.  */
>>>> +
>>>> +#include <sysdep.h>
>>>> +#include <tls.h>
>>>> +#include "tlsdesc.h"
>>>> +
>>>> +    .text
>>>> +
>>>> +    /* Compute the thread pointer offset for symbols in the static
>>>> +       TLS block. The offset is the same for all threads.
>>>> +       Prototype:
>>>> +       _dl_tlsdesc_return (tlsdesc *);  */
>>>> +    .hidden _dl_tlsdesc_return
>>>> +    .global    _dl_tlsdesc_return
>>>> +    .type    _dl_tlsdesc_return,%function
>>>> +    cfi_startproc
>>>> +    .align 2
>>>> +_dl_tlsdesc_return:
>>>> +    REG_L  a0, a0, 8
>>>> +    RET
>>>> +    cfi_endproc
>>>> +    .size    _dl_tlsdesc_return, .-_dl_tlsdesc_return
>>>> +
>>>> +    /* Handler for undefined weak TLS symbols.
>>>> +       Prototype:
>>>> +       _dl_tlsdesc_undefweak (tlsdesc *);
>>>> +
>>>> +       The second word of the descriptor contains the addend.
>>>> +       Return the addend minus the thread pointer. This ensures
>>>> +       that when the caller adds on the thread pointer it gets back
>>>> +       the addend.  */
>>>> +    .hidden _dl_tlsdesc_undefweak
>>>> +    .global    _dl_tlsdesc_undefweak
>>>> +    .type    _dl_tlsdesc_undefweak,%function
>>>> +    cfi_startproc
>>>> +    .align  2
>>>> +_dl_tlsdesc_undefweak:
>>>> +    REG_L    a0, a0, 8
>>>> +    sub.d    a0, a0, tp
>>>> +    RET
>>>> +    cfi_endproc
>>>> +    .size    _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>>>> +
>>>> +
>>>> +#ifdef SHARED
>>>> +
>>>> +#if !defined __loongarch_soft_float
>>>> +
>>>> +#define USE_LASX
>>>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
>>>> +#define Lret Lret_lasx
>>>> +#define Lslow Lslow_lasx
>>>> +#include "dl-tlsdesc-dynamic.h"
>>>> +#undef FRAME_SIZE
>>>> +#undef USE_LASX
>>>> +#undef _dl_tlsdesc_dynamic
>>>> +#undef Lret
>>>> +#undef Lslow
>>>> +
>>>> +#define USE_LSX
>>>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
>>>> +#define Lret Lret_lsx
>>>> +#define Lslow Lslow_lsx
>>>> +#include "dl-tlsdesc-dynamic.h"
>>>> +#undef FRAME_SIZE
>>>> +#undef USE_LSX
>>>> +#undef _dl_tlsdesc_dynamic
>>>> +#undef Lret
>>>> +#undef Lslow
>>>> +
>>>> +#endif
>>>> +
>>>> +#include "dl-tlsdesc-dynamic.h"
>>>> +
>>>> +#endif /* #ifdef SHARED  */
>>>> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
>>>> new file mode 100644
>>>> index 0000000000..988037a714
>>>> --- /dev/null
>>>> +++ b/sysdeps/loongarch/dl-tlsdesc.h
>>>> @@ -0,0 +1,53 @@
>>>> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
>>>> +   LoongArch version.
>>>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>>>> +
>>>> +   This file is part of the GNU C Library.
>>>> +
>>>> +   The GNU C Library is free software; you can redistribute it and/or
>>>> +   modify it under the terms of the GNU Lesser General Public
>>>> +   License as published by the Free Software Foundation; either
>>>> +   version 2.1 of the License, or (at your option) any later version.
>>>> +
>>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>> +   Lesser General Public License for more details.
>>>> +
>>>> +   You should have received a copy of the GNU Lesser General Public
>>>> +   License along with the GNU C Library; if not, see
>>>> +   <https://www.gnu.org/licenses/>.  */
>>>> +
>>>> +#ifndef _DL_TLSDESC_H
>>>> +#define _DL_TLSDESC_H
>>>> +
>>>> +#include <dl-tls.h>
>>>> +
>>>> +/* Type used to represent a TLS descriptor in the GOT.  */
>>>> +struct tlsdesc
>>>> +{
>>>> +  ptrdiff_t (*entry) (struct tlsdesc *);
>>>> +  void *arg;
>>>> +};
>>>> +
>>>> +/* Type used as the argument in a TLS descriptor for a symbol that
>>>> +   needs dynamic TLS offsets.  */
>>>> +struct tlsdesc_dynamic_arg
>>>> +{
>>>> +  tls_index tlsinfo;
>>>> +  size_t gen_count;
>>>> +};
>>>> +
>>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
>>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
>>>> +
>>>> +# ifdef SHARED
>>>> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
>>>> +#if !defined __loongarch_soft_float
>>> Minor style, usually for single tests we use '#ifndef' and add
>>> attribute_hidden at the end of prototype.
>>>
>>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
>>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
>>>> +#endif
>>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
>>>> +#endif
>>>> +
>>>> +#endif
>>>> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
>>>> index 4d8737ee7f..9b1773634c 100644
>>>> --- a/sysdeps/loongarch/linkmap.h
>>>> +++ b/sysdeps/loongarch/linkmap.h
>>>> @@ -19,4 +19,5 @@
>>>>    struct link_map_machine
>>>>    {
>>>>      ElfW (Addr) plt; /* Address of .plt.  */
>>>> +  void *tlsdesc_table;    /* Address of TLS descriptor hash table.  */
>>>>    };
>>>> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
>>>> index 51521a7eb4..23c1d12914 100644
>>>> --- a/sysdeps/loongarch/sys/asm.h
>>>> +++ b/sysdeps/loongarch/sys/asm.h
>>>> @@ -25,6 +25,7 @@
>>>>    /* Macros to handle different pointer/register sizes for 32/64-bit code.  */
>>>>    #define SZREG 8
>>>>    #define SZFREG 8
>>>> +#define SZFCSREG 4
>>>>    #define SZVREG 16
>>>>    #define SZXREG 32
>>>>    #define REG_L ld.d
>>>> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
>>>> index f61ee25b25..80ce3e9c00 100644
>>>> --- a/sysdeps/loongarch/sys/regdef.h
>>>> +++ b/sysdeps/loongarch/sys/regdef.h
>>>> @@ -97,6 +97,7 @@
>>>>    #define fcc5 $fcc5
>>>>    #define fcc6 $fcc6
>>>>    #define fcc7 $fcc7
>>>> +#define fcsr0 $fcsr0
>>>>      #define vr0 $vr0
>>>>    #define vr1 $vr1
>>>> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
>>>> new file mode 100644
>>>> index 0000000000..a357e7619f
>>>> --- /dev/null
>>>> +++ b/sysdeps/loongarch/tlsdesc.c
>>>> @@ -0,0 +1,39 @@
>>>> +/* Manage TLS descriptors.  AArch64 version.
>>>> +
>>>> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
>>> Update Copyright years to 2024 and remove the 'AArch64'.
>>>
>>>
>>>> +
>>>> +   This file is part of the GNU C Library.
>>>> +
>>>> +   The GNU C Library is free software; you can redistribute it and/or
>>>> +   modify it under the terms of the GNU Lesser General Public
>>>> +   License as published by the Free Software Foundation; either
>>>> +   version 2.1 of the License, or (at your option) any later version.
>>>> +
>>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>> +   Lesser General Public License for more details.
>>>> +
>>>> +   You should have received a copy of the GNU Lesser General Public
>>>> +   License along with the GNU C Library; if not, see
>>>> +   <https://www.gnu.org/licenses/>.  */
>>>> +
>>>> +#include <ldsodefs.h>
>>>> +#include <tls.h>
>>>> +#include <dl-tlsdesc.h>
>>>> +#include <dl-unmap-segments.h>
>>>> +#include <tlsdeschtab.h>
>>>> +
>>>> +/* Unmap the dynamic object, but also release its TLS descriptor table
>>>> +   if there is one.  */
>>>> +
>>>> +void
>>>> +_dl_unmap (struct link_map *map)
>>>> +{
>>>> +  _dl_unmap_segments (map);
>>>> +
>>>> +#ifdef SHARED
>>>> +  if (map->l_mach.tlsdesc_table)
>>>> +    htab_delete (map->l_mach.tlsdesc_table);
>>>> +#endif
>>>> +}
>>>> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
>>>> new file mode 100644
>>>> index 0000000000..bcab218631
>>>> --- /dev/null
>>>> +++ b/sysdeps/loongarch/tlsdesc.sym
>>>> @@ -0,0 +1,19 @@
>>>> +#include <stddef.h>
>>>> +#include <sysdep.h>
>>>> +#include <tls.h>
>>>> +#include <link.h>
>>>> +#include <dl-tlsdesc.h>
>>>> +
>>>> +--
>>>> +
>>>> +-- Abuse tls.h macros to derive offsets relative to the thread register.
>>>> +
>>>> +TLSDESC_ARG        offsetof(struct tlsdesc, arg)
>>>> +TLSDESC_GEN_COUNT    offsetof(struct tlsdesc_dynamic_arg, gen_count)
>>>> +TLSDESC_MODID        offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
>>>> +TLSDESC_MODOFF        offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
>>>> +TCBHEAD_DTV        offsetof(tcbhead_t, dtv)
>>>> +DTV_COUNTER        offsetof(dtv_t, counter)
>>>> +TLS_DTV_UNALLOCATED    TLS_DTV_UNALLOCATED
>>>> +TLS_DTV_OFFSET        TLS_DTV_OFFSET
>>>> +SIZE_OF_DTV        sizeof(tcbhead_t)
>>>> diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>>>> index 547b1c1b7f..ec32e6d13f 100644
>>>> --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data
>>>> +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data
>>>> @@ -5,3 +5,5 @@ libc.so: calloc
>>>>    libc.so: free
>>>>    libc.so: malloc
>>>>    libc.so: realloc
>>>> +# The dynamic loader needs __tls_get_addr for TLS.
>>>> +ld.so: __tls_get_addr


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2024-03-11  8:45 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-29  1:43 [PATCH v2] LoongArch: Add support for TLS Descriptors mengqinggang
2024-02-29  2:56 ` caiyinyu
2024-03-04 15:42 ` H.J. Lu
2024-03-08  7:45   ` mengqinggang
2024-03-05 19:29 ` Adhemerval Zanella Netto
2024-03-08  7:53   ` mengqinggang
2024-03-08 14:10     ` Adhemerval Zanella Netto
2024-03-11  8:45       ` mengqinggang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).