public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: "H.J. Lu" <hjl.tools@gmail.com>
To: mengqinggang <mengqinggang@loongson.cn>
Cc: libc-alpha@sourceware.org, adhemerval.zanella@linaro.org,
	 xuchenghua@loongson.cn, caiyinyu@loongson.cn,
	chenglulu@loongson.cn,  cailulu@loongson.cn, xry111@xry111.site,
	i.swmail@xen0n.name,  maskray@google.com, luweining@loongson.cn,
	wanglei@loongson.cn,  hejinyang@loongson.cn
Subject: Re: [PATCH v2] LoongArch: Add support for TLS Descriptors
Date: Mon, 4 Mar 2024 07:42:01 -0800	[thread overview]
Message-ID: <CAMe9rOq3YH1stU1NsVrtyAqbot8O_na6eQ=Na-m0GEyeLDb7ow@mail.gmail.com> (raw)
In-Reply-To: <20240229014328.3559028-1-mengqinggang@loongson.cn>

On Wed, Feb 28, 2024 at 5:44 PM mengqinggang <mengqinggang@loongson.cn> wrote:
>
> This is mostly based on AArch64 and RISC-V implementation.
>
> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations.
>
> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore
> all vector registers.
> ---
> Changes v1 -> v2:
> - Fix vr24-vr31, xr24-xr31 typo.
> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic.
> - Save and restore fcsr0 in _dl_tlsdesc_dynamic.
>
> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html
>
>  elf/elf.h                                     |   2 +
>  sysdeps/loongarch/Makefile                    |   6 +
>  sysdeps/loongarch/dl-link.sym                 |   1 +
>  sysdeps/loongarch/dl-machine.h                |  60 ++-
>  sysdeps/loongarch/dl-tls.h                    |   9 +-
>  sysdeps/loongarch/dl-tlsdesc-dynamic.h        | 341 ++++++++++++++++++
>  sysdeps/loongarch/dl-tlsdesc.S                |  93 +++++
>  sysdeps/loongarch/dl-tlsdesc.h                |  53 +++
>  sysdeps/loongarch/linkmap.h                   |   1 +
>  sysdeps/loongarch/sys/asm.h                   |   1 +
>  sysdeps/loongarch/sys/regdef.h                |   1 +
>  sysdeps/loongarch/tlsdesc.c                   |  39 ++
>  sysdeps/loongarch/tlsdesc.sym                 |  19 +
>  .../unix/sysv/linux/loongarch/localplt.data   |   2 +
>  14 files changed, 625 insertions(+), 3 deletions(-)
>  create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
>  create mode 100644 sysdeps/loongarch/dl-tlsdesc.S
>  create mode 100644 sysdeps/loongarch/dl-tlsdesc.h
>  create mode 100644 sysdeps/loongarch/tlsdesc.c
>  create mode 100644 sysdeps/loongarch/tlsdesc.sym
>
> diff --git a/elf/elf.h b/elf/elf.h
> index f2206e5c06..eec24ea049 100644
> --- a/elf/elf.h
> +++ b/elf/elf.h
> @@ -4237,6 +4237,8 @@ enum
>  #define R_LARCH_TLS_TPREL32    10
>  #define R_LARCH_TLS_TPREL64    11
>  #define R_LARCH_IRELATIVE      12
> +#define R_LARCH_TLS_DESC32     13
> +#define R_LARCH_TLS_DESC64     14
>
>  /* Reserved for future relocs that the dynamic linker must understand.  */
>
> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
> index 43d2f583cd..181389e787 100644
> --- a/sysdeps/loongarch/Makefile
> +++ b/sysdeps/loongarch/Makefile
> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h
>  endif
>
>  ifeq ($(subdir),elf)
> +sysdep-dl-routines += tlsdesc dl-tlsdesc
>  gen-as-const-headers += dl-link.sym
>  endif
>
> +ifeq ($(subdir),csu)
> +gen-as-const-headers += tlsdesc.sym
> +endif
> +
> +
>  # LoongArch's assembler also needs to know about PIC as it changes the
>  # definition of some assembler macros.
>  ASFLAGS-.os += $(pic-ccflag)
> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
> index b534968e30..fd81ef37d5 100644
> --- a/sysdeps/loongarch/dl-link.sym
> +++ b/sysdeps/loongarch/dl-link.sym
> @@ -1,6 +1,7 @@
>  #include <stddef.h>
>  #include <sysdep.h>
>  #include <link.h>
> +#include <dl-tlsdesc.h>
>
>  DL_SIZEOF_RG            sizeof(struct La_loongarch_regs)
>  DL_SIZEOF_RV            sizeof(struct La_loongarch_retval)
> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
> index ab81b82d95..8ca6c224f6 100644
> --- a/sysdeps/loongarch/dl-machine.h
> +++ b/sysdeps/loongarch/dl-machine.h
> @@ -25,7 +25,7 @@
>  #include <entry.h>
>  #include <elf/elf.h>
>  #include <sys/asm.h>
> -#include <dl-tls.h>
> +#include <dl-tlsdesc.h>
>  #include <dl-static-tls.h>
>  #include <dl-machine-rel.h>
>
> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
>        *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend;
>        break;
>
> +    case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32:
> +      {
> +       struct tlsdesc volatile *td =
> +           (struct tlsdesc volatile *)addr_field;
> +       if (! sym)
> +         {
> +           td->arg = (void*)reloc->r_addend;
> +           td->entry = _dl_tlsdesc_undefweak;
> +         }
> +       else
> +         {
> +# ifndef SHARED
> +           CHECK_STATIC_TLS (map, sym_map);
> +# else
> +           if (!TRY_STATIC_TLS (map, sym_map))
> +             {
> +               td->arg = _dl_make_tlsdesc_dynamic
> +                 (sym_map, sym->st_value + reloc->r_addend);
> +# if !defined __loongarch_soft_float
> +               if (SUPPORT_LASX)
> +                 td->entry = _dl_tlsdesc_dynamic_lasx;
> +               else
> +               if (SUPPORT_LSX)
> +                 td->entry = _dl_tlsdesc_dynamic_lsx;
> +               else
> +# endif
> +                 td->entry = _dl_tlsdesc_dynamic;
> +             }
> +           else
> +# endif
> +             {
> +               td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym)
> +                           + reloc->r_addend);
> +               td->entry = _dl_tlsdesc_return;
> +             }
> +         }
> +       break;
> +      }
> +
>      case R_LARCH_COPY:
>        {
>           if (sym == NULL)
> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>        else
>         *reloc_addr = map->l_mach.plt;
>      }
> +  else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1))
> +    {
> +      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
> +      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
> +      const ElfW (Sym) *sym = &symtab[symndx];
> +      const struct r_found_version *version = NULL;
> +
> +      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
> +       {
> +         const ElfW (Half) *vernum =
> +           (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
> +         version = &map->l_versions[vernum[symndx] & 0x7fff];
> +       }
> +
> +      /* Always initialize TLS descriptors completely, because lazy
> +        initialization requires synchronization at every TLS access.  */
> +      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
> +                       skip_ifunc);
> +    }
>    else
>      _dl_reloc_bad_type (map, r_type, 1);
>  }
> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h
> index 29924b866d..de593c002d 100644
> --- a/sysdeps/loongarch/dl-tls.h
> +++ b/sysdeps/loongarch/dl-tls.h
> @@ -16,6 +16,9 @@
>     License along with the GNU C Library.  If not, see
>     <https://www.gnu.org/licenses/>.  */
>
> +#ifndef _DL_TLS_H
> +#define _DL_TLS_H
> +
>  /* Type used for the representation of TLS information in the GOT.  */
>  typedef struct
>  {
> @@ -23,6 +26,8 @@ typedef struct
>    unsigned long int ti_offset;
>  } tls_index;
>
> +extern void *__tls_get_addr (tls_index *ti);
> +
>  /* The thread pointer points to the first static TLS block.  */
>  #define TLS_TP_OFFSET 0
>
> @@ -37,10 +42,10 @@ typedef struct
>  /* Compute the value for a DTPREL reloc.  */
>  #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET)
>
> -extern void *__tls_get_addr (tls_index *ti);
> -
>  #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
>  #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
>
>  /* Value used for dtv entries for which the allocation is delayed.  */
>  #define TLS_DTV_UNALLOCATED ((void *) -1l)
> +
> +#endif
> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> new file mode 100644
> index 0000000000..0d8c9bb991
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
> @@ -0,0 +1,341 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> +   LoongArch version.
> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifdef USE_LASX
> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK))
> +#elif defined USE_LSX
> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK))
> +#elif !defined __loongarch_soft_float
> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK))
> +#else
> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK))
> +#endif
> +
> +#ifdef SHARED
> +       /* Handler for dynamic TLS symbols.
> +          Prototype:
> +          _dl_tlsdesc_dynamic (tlsdesc *) ;
> +
> +          The second word of the descriptor points to a
> +          tlsdesc_dynamic_arg structure.
> +
> +          Returns the offset between the thread pointer and the
> +          object referenced by the argument.
> +
> +          ptrdiff_t
> +          __attribute__ ((__regparm__ (1)))
> +          _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
> +          {
> +            struct tlsdesc_dynamic_arg *td = tdp->arg;
> +            dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
> +            if (__builtin_expect (td->gen_count <= dtv[0].counter
> +               && (dtv[td->tlsinfo.ti_module].pointer.val
> +                   != TLS_DTV_UNALLOCATED),
> +               1))
> +              return dtv[td->tlsinfo.ti_module].pointer.val
> +               + td->tlsinfo.ti_offset
> +               - __thread_pointer;
> +
> +            return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
> +          }
> +        */
> +       .hidden _dl_tlsdesc_dynamic
> +       .global _dl_tlsdesc_dynamic
> +       .type   _dl_tlsdesc_dynamic,%function
> +       cfi_startproc
> +       .align 2
> +_dl_tlsdesc_dynamic:
> +       /* Save just enough registers to support fast path, if we fall
> +          into slow path we will save additional registers.  */
> +       ADDI    sp, sp,-24
> +       REG_S   t0, sp, 0
> +       REG_S   t1, sp, 8
> +       REG_S   t2, sp, 16
> +
> +       REG_L   t0, tp, -SIZE_OF_DTV      # dtv(t0) = tp + TCBHEAD_DTV dtv start
> +       REG_L   a0, a0, TLSDESC_ARG       # td(a0) = tdp->arg
> +       REG_L   t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count
> +       REG_L   t2, t0, DTV_COUNTER       # t2 = dtv[0].counter
> +       bltu    t2, t1, Lslow
> +
> +       REG_L   t1, a0, TLSDESC_MODID     # t1 = td->tlsinfo.ti_module
> +       slli.d  t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */
> +       add.d   t1, t1, t0    # t1 = dtv + ti_module * sizeof(dtv_t)
> +       REG_L   t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val
> +       li.d    t2, TLS_DTV_UNALLOCATED
> +       beq     t1, t2, Lslow
> +       REG_L   t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset
> +       # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
> +       add.d   a0, t1, t2
> +Lret:
> +       sub.d   a0, a0, tp
> +       REG_L   t0, sp, 0
> +       REG_L   t1, sp, 8
> +       REG_L   t2, sp, 16
> +       ADDI    sp, sp, 24
> +       RET
> +
> +Lslow:
> +       /* This is the slow path. We need to call __tls_get_addr() which
> +          means we need to save and restore all the register that the
> +          callee will trash.  */
> +
> +       /* Save the remaining registers that we must treat as caller save.  */
> +       ADDI    sp, sp, -FRAME_SIZE
> +       REG_S   ra, sp, 0 * SZREG
> +       REG_S   a1, sp, 1 * SZREG
> +       REG_S   a2, sp, 2 * SZREG
> +       REG_S   a3, sp, 3 * SZREG
> +       REG_S   a4, sp, 4 * SZREG
> +       REG_S   a5, sp, 5 * SZREG
> +       REG_S   a6, sp, 6 * SZREG
> +       REG_S   a7, sp, 7 * SZREG
> +       REG_S   t4, sp, 8 * SZREG
> +       REG_S   t5, sp, 9 * SZREG
> +       REG_S   t6, sp, 10 * SZREG
> +       REG_S   t7, sp, 11 * SZREG
> +       REG_S   t8, sp, 12 * SZREG
> +
> +#ifdef USE_LASX
> +       xvst    xr0, sp, 13*SZREG + 0*SZXREG
> +       xvst    xr1, sp, 13*SZREG + 1*SZXREG
> +       xvst    xr2, sp, 13*SZREG + 2*SZXREG
> +       xvst    xr3, sp, 13*SZREG + 3*SZXREG
> +       xvst    xr4, sp, 13*SZREG + 4*SZXREG
> +       xvst    xr5, sp, 13*SZREG + 5*SZXREG
> +       xvst    xr6, sp, 13*SZREG + 6*SZXREG
> +       xvst    xr7, sp, 13*SZREG + 7*SZXREG
> +       xvst    xr8, sp, 13*SZREG + 8*SZXREG
> +       xvst    xr9, sp, 13*SZREG + 9*SZXREG
> +       xvst    xr10, sp, 13*SZREG + 10*SZXREG
> +       xvst    xr11, sp, 13*SZREG + 11*SZXREG
> +       xvst    xr12, sp, 13*SZREG + 12*SZXREG
> +       xvst    xr13, sp, 13*SZREG + 13*SZXREG
> +       xvst    xr14, sp, 13*SZREG + 14*SZXREG
> +       xvst    xr15, sp, 13*SZREG + 15*SZXREG
> +       xvst    xr16, sp, 13*SZREG + 16*SZXREG
> +       xvst    xr17, sp, 13*SZREG + 17*SZXREG
> +       xvst    xr18, sp, 13*SZREG + 18*SZXREG
> +       xvst    xr19, sp, 13*SZREG + 19*SZXREG
> +       xvst    xr20, sp, 13*SZREG + 20*SZXREG
> +       xvst    xr21, sp, 13*SZREG + 21*SZXREG
> +       xvst    xr22, sp, 13*SZREG + 22*SZXREG
> +       xvst    xr23, sp, 13*SZREG + 23*SZXREG
> +       xvst    xr24, sp, 13*SZREG + 24*SZXREG
> +       xvst    xr25, sp, 13*SZREG + 25*SZXREG
> +       xvst    xr26, sp, 13*SZREG + 26*SZXREG
> +       xvst    xr27, sp, 13*SZREG + 27*SZXREG
> +       xvst    xr28, sp, 13*SZREG + 28*SZXREG
> +       xvst    xr29, sp, 13*SZREG + 29*SZXREG
> +       xvst    xr30, sp, 13*SZREG + 30*SZXREG
> +       xvst    xr31, sp, 13*SZREG + 31*SZXREG
> +       # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> +       # some fields in fcsr0
> +       movfcsr2gr  t0, fcsr0
> +       REG_S       t0, sp, 32*SZXREG
> +#elif defined USE_LSX
> +       vst     vr0, sp, 13*SZREG + 0*SZVREG
> +       vst     vr1, sp, 13*SZREG + 1*SZVREG
> +       vst     vr2, sp, 13*SZREG + 2*SZVREG
> +       vst     vr3, sp, 13*SZREG + 3*SZVREG
> +       vst     vr4, sp, 13*SZREG + 4*SZVREG
> +       vst     vr5, sp, 13*SZREG + 5*SZVREG
> +       vst     vr6, sp, 13*SZREG + 6*SZVREG
> +       vst     vr7, sp, 13*SZREG + 7*SZVREG
> +       vst     vr8, sp, 13*SZREG + 8*SZVREG
> +       vst     vr9, sp, 13*SZREG + 9*SZVREG
> +       vst     vr10, sp, 13*SZREG + 10*SZVREG
> +       vst     vr11, sp, 13*SZREG + 11*SZVREG
> +       vst     vr12, sp, 13*SZREG + 12*SZVREG
> +       vst     vr13, sp, 13*SZREG + 13*SZVREG
> +       vst     vr14, sp, 13*SZREG + 14*SZVREG
> +       vst     vr15, sp, 13*SZREG + 15*SZVREG
> +       vst     vr16, sp, 13*SZREG + 16*SZVREG
> +       vst     vr17, sp, 13*SZREG + 17*SZVREG
> +       vst     vr18, sp, 13*SZREG + 18*SZVREG
> +       vst     vr19, sp, 13*SZREG + 19*SZVREG
> +       vst     vr20, sp, 13*SZREG + 20*SZVREG
> +       vst     vr21, sp, 13*SZREG + 21*SZVREG
> +       vst     vr22, sp, 13*SZREG + 22*SZVREG
> +       vst     vr23, sp, 13*SZREG + 23*SZVREG
> +       vst     vr24, sp, 13*SZREG + 24*SZVREG
> +       vst     vr25, sp, 13*SZREG + 25*SZVREG
> +       vst     vr26, sp, 13*SZREG + 26*SZVREG
> +       vst     vr27, sp, 13*SZREG + 27*SZVREG
> +       vst     vr28, sp, 13*SZREG + 28*SZVREG
> +       vst     vr29, sp, 13*SZREG + 29*SZVREG
> +       vst     vr30, sp, 13*SZREG + 30*SZVREG
> +       vst     vr31, sp, 13*SZREG + 31*SZVREG
> +       # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> +       # some fields in fcsr0
> +       movfcsr2gr  t0, fcsr0
> +       REG_S       t0, sp, 32*SZVREG
> +#elif !defined __loongarch_soft_float
> +       FREG_S  fa0, sp, 13*SZREG + 0*SZFREG
> +       FREG_S  fa1, sp, 13*SZREG + 1*SZFREG
> +       FREG_S  fa2, sp, 13*SZREG + 2*SZFREG
> +       FREG_S  fa3, sp, 13*SZREG + 3*SZFREG
> +       FREG_S  fa4, sp, 13*SZREG + 4*SZFREG
> +       FREG_S  fa5, sp, 13*SZREG + 5*SZFREG
> +       FREG_S  fa6, sp, 13*SZREG + 6*SZFREG
> +       FREG_S  fa7, sp, 13*SZREG + 7*SZFREG
> +       FREG_S  ft0, sp, 13*SZREG + 8*SZFREG
> +       FREG_S  ft1, sp, 13*SZREG + 9*SZFREG
> +       FREG_S  ft2, sp, 13*SZREG + 10*SZFREG
> +       FREG_S  ft3, sp, 13*SZREG + 11*SZFREG
> +       FREG_S  ft4, sp, 13*SZREG + 12*SZFREG
> +       FREG_S  ft5, sp, 13*SZREG + 13*SZFREG
> +       FREG_S  ft6, sp, 13*SZREG + 14*SZFREG
> +       FREG_S  ft7, sp, 13*SZREG + 15*SZFREG
> +       FREG_S  ft8, sp, 13*SZREG + 16*SZFREG
> +       FREG_S  ft9, sp, 13*SZREG + 17*SZFREG
> +       FREG_S  ft10, sp, 13*SZREG + 18*SZFREG
> +       FREG_S  ft11, sp, 13*SZREG + 19*SZFREG
> +       FREG_S  ft12, sp, 13*SZREG + 20*SZFREG
> +       FREG_S  ft13, sp, 13*SZREG + 21*SZFREG
> +       FREG_S  ft14, sp, 13*SZREG + 22*SZFREG
> +       FREG_S  ft15, sp, 13*SZREG + 23*SZFREG
> +       # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of
> +       # some fields in fcsr0
> +       movfcsr2gr  t0, fcsr0
> +       REG_S       t0, sp, 24*SZFREG
> +#endif /* #ifdef USE_LASX  */
> +
> +       bl      __tls_get_addr
> +       ADDI    a0, a0, -TLS_DTV_OFFSET
> +
> +       REG_L   ra, sp, 0
> +       REG_L   a1, sp, 1 * 8
> +       REG_L   a2, sp, 2 * 8
> +       REG_L   a3, sp, 3 * 8
> +       REG_L   a4, sp, 4 * 8
> +       REG_L   a5, sp, 5 * 8
> +       REG_L   a6, sp, 6 * 8
> +       REG_L   a7, sp, 7 * 8
> +       REG_L   t4, sp, 8 * 8
> +       REG_L   t5, sp, 9 * 8
> +       REG_L   t6, sp, 10 * 8
> +       REG_L   t7, sp, 11 * 8
> +       REG_L   t8, sp, 12 * 8
> +
> +#ifdef USE_LASX
> +       xvld    xr0, sp, 13*SZREG + 0*SZXREG
> +       xvld    xr1, sp, 13*SZREG + 1*SZXREG
> +       xvld    xr2, sp, 13*SZREG + 2*SZXREG
> +       xvld    xr3, sp, 13*SZREG + 3*SZXREG
> +       xvld    xr4, sp, 13*SZREG + 4*SZXREG
> +       xvld    xr5, sp, 13*SZREG + 5*SZXREG
> +       xvld    xr6, sp, 13*SZREG + 6*SZXREG
> +       xvld    xr7, sp, 13*SZREG + 7*SZXREG
> +       xvld    xr8, sp, 13*SZREG + 8*SZXREG
> +       xvld    xr9, sp, 13*SZREG + 9*SZXREG
> +       xvld    xr10, sp, 13*SZREG + 10*SZXREG
> +       xvld    xr11, sp, 13*SZREG + 11*SZXREG
> +       xvld    xr12, sp, 13*SZREG + 12*SZXREG
> +       xvld    xr13, sp, 13*SZREG + 13*SZXREG
> +       xvld    xr14, sp, 13*SZREG + 14*SZXREG
> +       xvld    xr15, sp, 13*SZREG + 15*SZXREG
> +       xvld    xr16, sp, 13*SZREG + 16*SZXREG
> +       xvld    xr17, sp, 13*SZREG + 17*SZXREG
> +       xvld    xr18, sp, 13*SZREG + 18*SZXREG
> +       xvld    xr19, sp, 13*SZREG + 19*SZXREG
> +       xvld    xr20, sp, 13*SZREG + 20*SZXREG
> +       xvld    xr21, sp, 13*SZREG + 21*SZXREG
> +       xvld    xr22, sp, 13*SZREG + 22*SZXREG
> +       xvld    xr23, sp, 13*SZREG + 23*SZXREG
> +       xvld    xr24, sp, 13*SZREG + 24*SZXREG
> +       xvld    xr25, sp, 13*SZREG + 25*SZXREG
> +       xvld    xr26, sp, 13*SZREG + 26*SZXREG
> +       xvld    xr27, sp, 13*SZREG + 27*SZXREG
> +       xvld    xr28, sp, 13*SZREG + 28*SZXREG
> +       xvld    xr29, sp, 13*SZREG + 29*SZXREG
> +       xvld    xr30, sp, 13*SZREG + 30*SZXREG
> +       xvld    xr31, sp, 13*SZREG + 31*SZXREG
> +       REG_L   t0, sp, 32*SZXREG
> +       movgr2fcsr  fcsr0, t0
> +#elif defined USE_LSX
> +       vld     vr0, sp, 13*SZREG + 0*SZVREG
> +       vld     vr1, sp, 13*SZREG + 1*SZVREG
> +       vld     vr2, sp, 13*SZREG + 2*SZVREG
> +       vld     vr3, sp, 13*SZREG + 3*SZVREG
> +       vld     vr4, sp, 13*SZREG + 4*SZVREG
> +       vld     vr5, sp, 13*SZREG + 5*SZVREG
> +       vld     vr6, sp, 13*SZREG + 6*SZVREG
> +       vld     vr7, sp, 13*SZREG + 7*SZVREG
> +       vld     vr8, sp, 13*SZREG + 8*SZVREG
> +       vld     vr9, sp, 13*SZREG + 9*SZVREG
> +       vld     vr10, sp, 13*SZREG + 10*SZVREG
> +       vld     vr11, sp, 13*SZREG + 11*SZVREG
> +       vld     vr12, sp, 13*SZREG + 12*SZVREG
> +       vld     vr13, sp, 13*SZREG + 13*SZVREG
> +       vld     vr14, sp, 13*SZREG + 14*SZVREG
> +       vld     vr15, sp, 13*SZREG + 15*SZVREG
> +       vld     vr16, sp, 13*SZREG + 16*SZVREG
> +       vld     vr17, sp, 13*SZREG + 17*SZVREG
> +       vld     vr18, sp, 13*SZREG + 18*SZVREG
> +       vld     vr19, sp, 13*SZREG + 19*SZVREG
> +       vld     vr20, sp, 13*SZREG + 20*SZVREG
> +       vld     vr21, sp, 13*SZREG + 21*SZVREG
> +       vld     vr22, sp, 13*SZREG + 22*SZVREG
> +       vld     vr23, sp, 13*SZREG + 23*SZVREG
> +       vld     vr24, sp, 13*SZREG + 24*SZVREG
> +       vld     vr25, sp, 13*SZREG + 25*SZVREG
> +       vld     vr26, sp, 13*SZREG + 26*SZVREG
> +       vld     vr27, sp, 13*SZREG + 27*SZVREG
> +       vld     vr28, sp, 13*SZREG + 28*SZVREG
> +       vld     vr29, sp, 13*SZREG + 29*SZVREG
> +       vld     vr30, sp, 13*SZREG + 30*SZVREG
> +       vld     vr31, sp, 13*SZREG + 31*SZVREG
> +       REG_L   t0, sp, 32*SZVREG
> +       movgr2fcsr  fcsr0, t0
> +#elif !defined __loongarch_soft_float
> +       FREG_L  fa0, sp, 13*SZREG + 0*SZFREG
> +       FREG_L  fa1, sp, 13*SZREG + 1*SZFREG
> +       FREG_L  fa2, sp, 13*SZREG + 2*SZFREG
> +       FREG_L  fa3, sp, 13*SZREG + 3*SZFREG
> +       FREG_L  fa4, sp, 13*SZREG + 4*SZFREG
> +       FREG_L  fa5, sp, 13*SZREG + 5*SZFREG
> +       FREG_L  fa6, sp, 13*SZREG + 6*SZFREG
> +       FREG_L  fa7, sp, 13*SZREG + 7*SZFREG
> +       FREG_L  ft0, sp, 13*SZREG + 8*SZFREG
> +       FREG_L  ft1, sp, 13*SZREG + 9*SZFREG
> +       FREG_L  ft2, sp, 13*SZREG + 10*SZFREG
> +       FREG_L  ft3, sp, 13*SZREG + 11*SZFREG
> +       FREG_L  ft4, sp, 13*SZREG + 12*SZFREG
> +       FREG_L  ft5, sp, 13*SZREG + 13*SZFREG
> +       FREG_L  ft6, sp, 13*SZREG + 14*SZFREG
> +       FREG_L  ft7, sp, 13*SZREG + 15*SZFREG
> +       FREG_L  ft8, sp, 13*SZREG + 16*SZFREG
> +       FREG_L  ft9, sp, 13*SZREG + 17*SZFREG
> +       FREG_L  ft10, sp, 13*SZREG + 18*SZFREG
> +       FREG_L  ft11, sp, 13*SZREG + 19*SZFREG
> +       FREG_L  ft12, sp, 13*SZREG + 20*SZFREG
> +       FREG_L  ft13, sp, 13*SZREG + 21*SZFREG
> +       FREG_L  ft14, sp, 13*SZREG + 22*SZFREG
> +       FREG_L  ft15, sp, 13*SZREG + 23*SZFREG
> +       REG_L   t0, sp, 24*SZFREG
> +       movgr2fcsr  fcsr0, t0
> +#endif /* #ifdef USE_LASX  */
> +
> +       ADDI    sp, sp, FRAME_SIZE
> +       b       Lret
> +       cfi_endproc
> +       .size   _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
> +#endif /* #ifdef SHARED  */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
> new file mode 100644
> index 0000000000..4a17079169
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc.S
> @@ -0,0 +1,93 @@
> +/* Thread-local storage handling in the ELF dynamic linker.
> +   LoongArch version.
> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <tls.h>
> +#include "tlsdesc.h"
> +
> +       .text
> +
> +       /* Compute the thread pointer offset for symbols in the static
> +          TLS block. The offset is the same for all threads.
> +          Prototype:
> +          _dl_tlsdesc_return (tlsdesc *);  */
> +       .hidden _dl_tlsdesc_return
> +       .global _dl_tlsdesc_return
> +       .type   _dl_tlsdesc_return,%function
> +       cfi_startproc
> +       .align 2
> +_dl_tlsdesc_return:
> +       REG_L  a0, a0, 8
> +       RET
> +       cfi_endproc
> +       .size   _dl_tlsdesc_return, .-_dl_tlsdesc_return
> +
> +       /* Handler for undefined weak TLS symbols.
> +          Prototype:
> +          _dl_tlsdesc_undefweak (tlsdesc *);
> +
> +          The second word of the descriptor contains the addend.
> +          Return the addend minus the thread pointer. This ensures
> +          that when the caller adds on the thread pointer it gets back
> +          the addend.  */
> +       .hidden _dl_tlsdesc_undefweak
> +       .global _dl_tlsdesc_undefweak
> +       .type   _dl_tlsdesc_undefweak,%function
> +       cfi_startproc
> +       .align  2
> +_dl_tlsdesc_undefweak:
> +       REG_L   a0, a0, 8
> +       sub.d   a0, a0, tp
> +       RET
> +       cfi_endproc
> +       .size   _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
> +
> +
> +#ifdef SHARED
> +
> +#if !defined __loongarch_soft_float
> +
> +#define USE_LASX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
> +#define Lret Lret_lasx
> +#define Lslow Lslow_lasx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LASX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#define USE_LSX
> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
> +#define Lret Lret_lsx
> +#define Lslow Lslow_lsx
> +#include "dl-tlsdesc-dynamic.h"
> +#undef FRAME_SIZE
> +#undef USE_LSX
> +#undef _dl_tlsdesc_dynamic
> +#undef Lret
> +#undef Lslow
> +
> +#endif
> +
> +#include "dl-tlsdesc-dynamic.h"
> +
> +#endif /* #ifdef SHARED  */
> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
> new file mode 100644
> index 0000000000..988037a714
> --- /dev/null
> +++ b/sysdeps/loongarch/dl-tlsdesc.h
> @@ -0,0 +1,53 @@
> +/* Thread-local storage descriptor handling in the ELF dynamic linker.
> +   LoongArch version.
> +   Copyright (C) 2011-2023 Free Software Foundation, Inc.
> +
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _DL_TLSDESC_H
> +#define _DL_TLSDESC_H
> +
> +#include <dl-tls.h>
> +
> +/* Type used to represent a TLS descriptor in the GOT.  */
> +struct tlsdesc
> +{
> +  ptrdiff_t (*entry) (struct tlsdesc *);
> +  void *arg;
> +};
> +
> +/* Type used as the argument in a TLS descriptor for a symbol that
> +   needs dynamic TLS offsets.  */
> +struct tlsdesc_dynamic_arg
> +{
> +  tls_index tlsinfo;
> +  size_t gen_count;
> +};
> +
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
> +
> +# ifdef SHARED
> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
> +#if !defined __loongarch_soft_float
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
> +#endif
> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
> +#endif
> +
> +#endif
> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h
> index 4d8737ee7f..9b1773634c 100644
> --- a/sysdeps/loongarch/linkmap.h
> +++ b/sysdeps/loongarch/linkmap.h
> @@ -19,4 +19,5 @@
>  struct link_map_machine
>  {
>    ElfW (Addr) plt; /* Address of .plt.  */
> +  void *tlsdesc_table;    /* Address of TLS descriptor hash table.  */
>  };
> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
> index 51521a7eb4..23c1d12914 100644
> --- a/sysdeps/loongarch/sys/asm.h
> +++ b/sysdeps/loongarch/sys/asm.h
> @@ -25,6 +25,7 @@
>  /* Macros to handle different pointer/register sizes for 32/64-bit code.  */
>  #define SZREG 8
>  #define SZFREG 8
> +#define SZFCSREG 4
>  #define SZVREG 16
>  #define SZXREG 32
>  #define REG_L ld.d
> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
> index f61ee25b25..80ce3e9c00 100644
> --- a/sysdeps/loongarch/sys/regdef.h
> +++ b/sysdeps/loongarch/sys/regdef.h
> @@ -97,6 +97,7 @@
>  #define fcc5 $fcc5
>  #define fcc6 $fcc6
>  #define fcc7 $fcc7
> +#define fcsr0 $fcsr0
>
>  #define vr0 $vr0
>  #define vr1 $vr1
> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c
> new file mode 100644
> index 0000000000..a357e7619f
> --- /dev/null
> +++ b/sysdeps/loongarch/tlsdesc.c
> @@ -0,0 +1,39 @@
> +/* Manage TLS descriptors.  AArch64 version.
                                                  Change it.
> +
>


-- 
H.J.

  parent reply	other threads:[~2024-03-04 15:42 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-29  1:43 mengqinggang
2024-02-29  2:56 ` caiyinyu
2024-03-04 15:42 ` H.J. Lu [this message]
2024-03-08  7:45   ` mengqinggang
2024-03-05 19:29 ` Adhemerval Zanella Netto
2024-03-08  7:53   ` mengqinggang
2024-03-08 14:10     ` Adhemerval Zanella Netto
2024-03-11  8:45       ` mengqinggang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAMe9rOq3YH1stU1NsVrtyAqbot8O_na6eQ=Na-m0GEyeLDb7ow@mail.gmail.com' \
    --to=hjl.tools@gmail.com \
    --cc=adhemerval.zanella@linaro.org \
    --cc=cailulu@loongson.cn \
    --cc=caiyinyu@loongson.cn \
    --cc=chenglulu@loongson.cn \
    --cc=hejinyang@loongson.cn \
    --cc=i.swmail@xen0n.name \
    --cc=libc-alpha@sourceware.org \
    --cc=luweining@loongson.cn \
    --cc=maskray@google.com \
    --cc=mengqinggang@loongson.cn \
    --cc=wanglei@loongson.cn \
    --cc=xry111@xry111.site \
    --cc=xuchenghua@loongson.cn \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).