* [PATCH v2] LoongArch: Add cfi instructions for _dl_tlsdesc_dynamic @ 2024-06-26 6:34 mengqinggang 2024-07-01 9:27 ` mengqinggang 0 siblings, 1 reply; 4+ messages in thread From: mengqinggang @ 2024-06-26 6:34 UTC (permalink / raw) To: libc-alpha Cc: adhemerval.zanella, xuchenghua, caiyinyu, chenglulu, cailulu, xry111, i.swmail, maskray, luweining, wanglei, hejinyang, mengqinggang Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic, _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx. Conflicting cfi instructions can be distributed to the three functions. --- Changes v1 -> v2: - Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic, _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx. v1 link: https://sourceware.org/pipermail/libc-alpha/2024-June/157270.html sysdeps/loongarch/dl-machine.h | 7 + sysdeps/loongarch/dl-tlsdesc-dynamic.h | 403 +++++++++++++++++++++++++ sysdeps/loongarch/dl-tlsdesc.S | 386 ++--------------------- sysdeps/loongarch/dl-tlsdesc.h | 4 + 4 files changed, 436 insertions(+), 364 deletions(-) create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h index ab6f1da7c0..04fabbf598 100644 --- a/sysdeps/loongarch/dl-machine.h +++ b/sysdeps/loongarch/dl-machine.h @@ -223,6 +223,13 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], { td->arg = _dl_make_tlsdesc_dynamic (sym_map, sym->st_value + reloc->r_addend); +# ifndef __loongarch_soft_float + if (SUPPORT_LASX) + td->entry = _dl_tlsdesc_dynamic_lasx; + else if (SUPPORT_LSX) + td->entry = _dl_tlsdesc_dynamic_lsx; + else +# endif td->entry = _dl_tlsdesc_dynamic; } else diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h new file mode 100644 index 0000000000..5b1f43aaf4 --- /dev/null +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h @@ -0,0 +1,403 @@ +/* Thread-local storage handling in the ELF dynamic linker. + LoongArch version. + Copyright (C) 2024 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define FRAME_SIZE (-((-14 * SZREG) & ALMASK)) +#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) +#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) +#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) + + /* Handler for dynamic TLS symbols. + Prototype: + _dl_tlsdesc_dynamic (tlsdesc *) ; + + The second word of the descriptor points to a + tlsdesc_dynamic_arg structure. + + Returns the offset between the thread pointer and the + object referenced by the argument. + + ptrdiff_t + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) + { + struct tlsdesc_dynamic_arg *td = tdp->arg; + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB); + if (__glibc_likely (td->gen_count <= dtv[0].counter + && (dtv[td->tlsinfo.ti_module].pointer.val + != TLS_DTV_UNALLOCATED), + 1)) + return dtv[td->tlsinfo.ti_module].pointer.val + + td->tlsinfo.ti_offset + - __thread_pointer; + + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; + } */ + .hidden _dl_tlsdesc_dynamic + .global _dl_tlsdesc_dynamic + .type _dl_tlsdesc_dynamic,%function + cfi_startproc + .align 2 +_dl_tlsdesc_dynamic: + /* Save just enough registers to support fast path, if we fall + into slow path we will save additional registers. */ + ADDI sp, sp, -32 + cfi_adjust_cfa_offset (32) + REG_S t0, sp, 0 + REG_S t1, sp, 8 + REG_S t2, sp, 16 + cfi_rel_offset (12, 0) + cfi_rel_offset (13, 8) + cfi_rel_offset (14, 16) + +/* Runtime Storage Layout of Thread-Local Storage + TP point to the start of TLS block. + + dtv +Low address TCB ----------------> dtv0(counter) + TP --> static_block0 <----- dtv1 + static_block1 <----- dtv2 + static_block2 <----- dtv3 + dynamic_block0 <----- dtv4 +Hign address dynamic_block1 <----- dtv5 */ + + REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */ + REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */ + REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ + REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ + /* If dtv[0].counter < td->gen_count, goto slow path. */ + bltu t2, t1, .Lslow + + REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ + /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */ + slli.d t1, t1, 4 + add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */ + REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */ + li.d t2, TLS_DTV_UNALLOCATED + /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED, + goto slow path. */ + beq t1, t2, .Lslow + + cfi_remember_state + REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ + /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */ + add.d a0, t1, t2 +.Lret: + sub.d a0, a0, tp + REG_L t0, sp, 0 + REG_L t1, sp, 8 + REG_L t2, sp, 16 + ADDI sp, sp, 32 + cfi_adjust_cfa_offset (-32) + RET + +.Lslow: + /* This is the slow path. We need to call __tls_get_addr() which + means we need to save and restore all the register that the + callee will trash. */ + + /* Save the remaining registers that we must treat as caller save. */ + cfi_restore_state + ADDI sp, sp, -FRAME_SIZE + cfi_adjust_cfa_offset (FRAME_SIZE) + REG_S ra, sp, 0 * SZREG + REG_S a1, sp, 1 * SZREG + REG_S a2, sp, 2 * SZREG + REG_S a3, sp, 3 * SZREG + REG_S a4, sp, 4 * SZREG + REG_S a5, sp, 5 * SZREG + REG_S a6, sp, 6 * SZREG + REG_S a7, sp, 7 * SZREG + REG_S t3, sp, 8 * SZREG + REG_S t4, sp, 9 * SZREG + REG_S t5, sp, 10 * SZREG + REG_S t6, sp, 11 * SZREG + REG_S t7, sp, 12 * SZREG + REG_S t8, sp, 13 * SZREG + cfi_rel_offset (1, 0 * SZREG) + cfi_rel_offset (5, 1 * SZREG) + cfi_rel_offset (6, 2 * SZREG) + cfi_rel_offset (7, 3 * SZREG) + cfi_rel_offset (8, 4 * SZREG) + cfi_rel_offset (9, 5 * SZREG) + cfi_rel_offset (10, 6 * SZREG) + cfi_rel_offset (11, 7 * SZREG) + cfi_rel_offset (15, 8 * SZREG) + cfi_rel_offset (16, 9 * SZREG) + cfi_rel_offset (17, 10 * SZREG) + cfi_rel_offset (18, 11 * SZREG) + cfi_rel_offset (19, 12 * SZREG) + cfi_rel_offset (20, 13 * SZREG) + +#ifndef __loongarch_soft_float + + /* Save fcsr0 register. + Only one physical fcsr0 register, fcsr1-fcsr3 are aliases + of some fields in fcsr0. */ + movfcsr2gr t0, fcsr0 + st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2. */ + +#ifdef USE_LASX + + /* Save 256-bit vector registers. + FIXME: Without vector ABI, save all vector registers. */ + ADDI sp, sp, -FRAME_SIZE_LASX + cfi_adjust_cfa_offset (FRAME_SIZE_LASX) + xvst xr0, sp, 0*SZXREG + xvst xr1, sp, 1*SZXREG + xvst xr2, sp, 2*SZXREG + xvst xr3, sp, 3*SZXREG + xvst xr4, sp, 4*SZXREG + xvst xr5, sp, 5*SZXREG + xvst xr6, sp, 6*SZXREG + xvst xr7, sp, 7*SZXREG + xvst xr8, sp, 8*SZXREG + xvst xr9, sp, 9*SZXREG + xvst xr10, sp, 10*SZXREG + xvst xr11, sp, 11*SZXREG + xvst xr12, sp, 12*SZXREG + xvst xr13, sp, 13*SZXREG + xvst xr14, sp, 14*SZXREG + xvst xr15, sp, 15*SZXREG + xvst xr16, sp, 16*SZXREG + xvst xr17, sp, 17*SZXREG + xvst xr18, sp, 18*SZXREG + xvst xr19, sp, 19*SZXREG + xvst xr20, sp, 20*SZXREG + xvst xr21, sp, 21*SZXREG + xvst xr22, sp, 22*SZXREG + xvst xr23, sp, 23*SZXREG + xvst xr24, sp, 24*SZXREG + xvst xr25, sp, 25*SZXREG + xvst xr26, sp, 26*SZXREG + xvst xr27, sp, 27*SZXREG + xvst xr28, sp, 28*SZXREG + xvst xr29, sp, 29*SZXREG + xvst xr30, sp, 30*SZXREG + xvst xr31, sp, 31*SZXREG + +#elif defined USE_LSX + + /* Save 128-bit vector registers. */ + ADDI sp, sp, -FRAME_SIZE_LSX + cfi_adjust_cfa_offset (FRAME_SIZE_LSX) + vst vr0, sp, 0*SZVREG + vst vr1, sp, 1*SZVREG + vst vr2, sp, 2*SZVREG + vst vr3, sp, 3*SZVREG + vst vr4, sp, 4*SZVREG + vst vr5, sp, 5*SZVREG + vst vr6, sp, 6*SZVREG + vst vr7, sp, 7*SZVREG + vst vr8, sp, 8*SZVREG + vst vr9, sp, 9*SZVREG + vst vr10, sp, 10*SZVREG + vst vr11, sp, 11*SZVREG + vst vr12, sp, 12*SZVREG + vst vr13, sp, 13*SZVREG + vst vr14, sp, 14*SZVREG + vst vr15, sp, 15*SZVREG + vst vr16, sp, 16*SZVREG + vst vr17, sp, 17*SZVREG + vst vr18, sp, 18*SZVREG + vst vr19, sp, 19*SZVREG + vst vr20, sp, 20*SZVREG + vst vr21, sp, 21*SZVREG + vst vr22, sp, 22*SZVREG + vst vr23, sp, 23*SZVREG + vst vr24, sp, 24*SZVREG + vst vr25, sp, 25*SZVREG + vst vr26, sp, 26*SZVREG + vst vr27, sp, 27*SZVREG + vst vr28, sp, 28*SZVREG + vst vr29, sp, 29*SZVREG + vst vr30, sp, 30*SZVREG + vst vr31, sp, 31*SZVREG + +# else + + /* Save float registers. */ + ADDI sp, sp, -FRAME_SIZE_FLOAT + cfi_adjust_cfa_offset (FRAME_SIZE_FLOAT) + FREG_S fa0, sp, 0*SZFREG + FREG_S fa1, sp, 1*SZFREG + FREG_S fa2, sp, 2*SZFREG + FREG_S fa3, sp, 3*SZFREG + FREG_S fa4, sp, 4*SZFREG + FREG_S fa5, sp, 5*SZFREG + FREG_S fa6, sp, 6*SZFREG + FREG_S fa7, sp, 7*SZFREG + FREG_S ft0, sp, 8*SZFREG + FREG_S ft1, sp, 9*SZFREG + FREG_S ft2, sp, 10*SZFREG + FREG_S ft3, sp, 11*SZFREG + FREG_S ft4, sp, 12*SZFREG + FREG_S ft5, sp, 13*SZFREG + FREG_S ft6, sp, 14*SZFREG + FREG_S ft7, sp, 15*SZFREG + FREG_S ft8, sp, 16*SZFREG + FREG_S ft9, sp, 17*SZFREG + FREG_S ft10, sp, 18*SZFREG + FREG_S ft11, sp, 19*SZFREG + FREG_S ft12, sp, 20*SZFREG + FREG_S ft13, sp, 21*SZFREG + FREG_S ft14, sp, 22*SZFREG + FREG_S ft15, sp, 23*SZFREG + +#endif /* #ifdef USE_LASX */ +#endif /* #ifndef __loongarch_soft_float */ + + bl HIDDEN_JUMPTARGET(__tls_get_addr) + ADDI a0, a0, -TLS_DTV_OFFSET + +#ifndef __loongarch_soft_float +#ifdef USE_LASX + + /* Restore 256-bit vector registers. */ + xvld xr0, sp, 0*SZXREG + xvld xr1, sp, 1*SZXREG + xvld xr2, sp, 2*SZXREG + xvld xr3, sp, 3*SZXREG + xvld xr4, sp, 4*SZXREG + xvld xr5, sp, 5*SZXREG + xvld xr6, sp, 6*SZXREG + xvld xr7, sp, 7*SZXREG + xvld xr8, sp, 8*SZXREG + xvld xr9, sp, 9*SZXREG + xvld xr10, sp, 10*SZXREG + xvld xr11, sp, 11*SZXREG + xvld xr12, sp, 12*SZXREG + xvld xr13, sp, 13*SZXREG + xvld xr14, sp, 14*SZXREG + xvld xr15, sp, 15*SZXREG + xvld xr16, sp, 16*SZXREG + xvld xr17, sp, 17*SZXREG + xvld xr18, sp, 18*SZXREG + xvld xr19, sp, 19*SZXREG + xvld xr20, sp, 20*SZXREG + xvld xr21, sp, 21*SZXREG + xvld xr22, sp, 22*SZXREG + xvld xr23, sp, 23*SZXREG + xvld xr24, sp, 24*SZXREG + xvld xr25, sp, 25*SZXREG + xvld xr26, sp, 26*SZXREG + xvld xr27, sp, 27*SZXREG + xvld xr28, sp, 28*SZXREG + xvld xr29, sp, 29*SZXREG + xvld xr30, sp, 30*SZXREG + xvld xr31, sp, 31*SZXREG + ADDI sp, sp, FRAME_SIZE_LASX + cfi_adjust_cfa_offset (-FRAME_SIZE_LASX) + +#elif defined USE_LSX + + /* Restore 128-bit vector registers. */ + vld vr0, sp, 0*SZVREG + vld vr1, sp, 1*SZVREG + vld vr2, sp, 2*SZVREG + vld vr3, sp, 3*SZVREG + vld vr4, sp, 4*SZVREG + vld vr5, sp, 5*SZVREG + vld vr6, sp, 6*SZVREG + vld vr7, sp, 7*SZVREG + vld vr8, sp, 8*SZVREG + vld vr9, sp, 9*SZVREG + vld vr10, sp, 10*SZVREG + vld vr11, sp, 11*SZVREG + vld vr12, sp, 12*SZVREG + vld vr13, sp, 13*SZVREG + vld vr14, sp, 14*SZVREG + vld vr15, sp, 15*SZVREG + vld vr16, sp, 16*SZVREG + vld vr17, sp, 17*SZVREG + vld vr18, sp, 18*SZVREG + vld vr19, sp, 19*SZVREG + vld vr20, sp, 20*SZVREG + vld vr21, sp, 21*SZVREG + vld vr22, sp, 22*SZVREG + vld vr23, sp, 23*SZVREG + vld vr24, sp, 24*SZVREG + vld vr25, sp, 25*SZVREG + vld vr26, sp, 26*SZVREG + vld vr27, sp, 27*SZVREG + vld vr28, sp, 28*SZVREG + vld vr29, sp, 29*SZVREG + vld vr30, sp, 30*SZVREG + vld vr31, sp, 31*SZVREG + ADDI sp, sp, FRAME_SIZE_LSX + cfi_adjust_cfa_offset (-FRAME_SIZE_LSX) + +#else + + /* Restore float registers. */ + FREG_L fa0, sp, 0*SZFREG + FREG_L fa1, sp, 1*SZFREG + FREG_L fa2, sp, 2*SZFREG + FREG_L fa3, sp, 3*SZFREG + FREG_L fa4, sp, 4*SZFREG + FREG_L fa5, sp, 5*SZFREG + FREG_L fa6, sp, 6*SZFREG + FREG_L fa7, sp, 7*SZFREG + FREG_L ft0, sp, 8*SZFREG + FREG_L ft1, sp, 9*SZFREG + FREG_L ft2, sp, 10*SZFREG + FREG_L ft3, sp, 11*SZFREG + FREG_L ft4, sp, 12*SZFREG + FREG_L ft5, sp, 13*SZFREG + FREG_L ft6, sp, 14*SZFREG + FREG_L ft7, sp, 15*SZFREG + FREG_L ft8, sp, 16*SZFREG + FREG_L ft9, sp, 17*SZFREG + FREG_L ft10, sp, 18*SZFREG + FREG_L ft11, sp, 19*SZFREG + FREG_L ft12, sp, 20*SZFREG + FREG_L ft13, sp, 21*SZFREG + FREG_L ft14, sp, 22*SZFREG + FREG_L ft15, sp, 23*SZFREG + ADDI sp, sp, FRAME_SIZE_FLOAT + cfi_adjust_cfa_offset (-FRAME_SIZE_FLOAT) + +#endif /* #ifdef USE_LASX */ + + /* Restore fcsr0 register. */ + ld.w t0, sp, FRAME_SIZE + 24 + movgr2fcsr fcsr0, t0 + +#endif /* #ifndef __loongarch_soft_float */ + + REG_L ra, sp, 0 * SZREG + REG_L a1, sp, 1 * SZREG + REG_L a2, sp, 2 * SZREG + REG_L a3, sp, 3 * SZREG + REG_L a4, sp, 4 * SZREG + REG_L a5, sp, 5 * SZREG + REG_L a6, sp, 6 * SZREG + REG_L a7, sp, 7 * SZREG + REG_L t3, sp, 8 * SZREG + REG_L t4, sp, 9 * SZREG + REG_L t5, sp, 10 * SZREG + REG_L t6, sp, 11 * SZREG + REG_L t7, sp, 12 * SZREG + REG_L t8, sp, 13 * SZREG + ADDI sp, sp, FRAME_SIZE + cfi_adjust_cfa_offset (-FRAME_SIZE) + + b .Lret + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic + .hidden HIDDEN_JUMPTARGET(__tls_get_addr) diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S index a6627cc754..b6cfd6121d 100644 --- a/sysdeps/loongarch/dl-tlsdesc.S +++ b/sysdeps/loongarch/dl-tlsdesc.S @@ -59,376 +59,34 @@ _dl_tlsdesc_undefweak: cfi_endproc .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak - #ifdef SHARED -#define FRAME_SIZE (-((-14 * SZREG) & ALMASK)) -#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) -#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) -#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) - - /* Handler for dynamic TLS symbols. - Prototype: - _dl_tlsdesc_dynamic (tlsdesc *) ; - - The second word of the descriptor points to a - tlsdesc_dynamic_arg structure. - - Returns the offset between the thread pointer and the - object referenced by the argument. - - ptrdiff_t - _dl_tlsdesc_dynamic (struct tlsdesc *tdp) - { - struct tlsdesc_dynamic_arg *td = tdp->arg; - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB); - if (__glibc_likely (td->gen_count <= dtv[0].counter - && (dtv[td->tlsinfo.ti_module].pointer.val - != TLS_DTV_UNALLOCATED), - 1)) - return dtv[td->tlsinfo.ti_module].pointer.val - + td->tlsinfo.ti_offset - - __thread_pointer; - - return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; - } */ - .hidden _dl_tlsdesc_dynamic - .global _dl_tlsdesc_dynamic - .type _dl_tlsdesc_dynamic,%function - cfi_startproc - .align 2 -_dl_tlsdesc_dynamic: - /* Save just enough registers to support fast path, if we fall - into slow path we will save additional registers. */ - ADDI sp, sp, -32 - REG_S t0, sp, 0 - REG_S t1, sp, 8 - REG_S t2, sp, 16 - -/* Runtime Storage Layout of Thread-Local Storage - TP point to the start of TLS block. - - dtv -Low address TCB ----------------> dtv0(counter) - TP --> static_block0 <----- dtv1 - static_block1 <----- dtv2 - static_block2 <----- dtv3 - dynamic_block0 <----- dtv4 -Hign address dynamic_block1 <----- dtv5 */ - - REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */ - REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */ - REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ - REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ - /* If dtv[0].counter < td->gen_count, goto slow path. */ - bltu t2, t1, .Lslow - - REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ - /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */ - slli.d t1, t1, 4 - add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */ - REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */ - li.d t2, TLS_DTV_UNALLOCATED - /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED, - goto slow path. */ - beq t1, t2, .Lslow - - REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ - /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */ - add.d a0, t1, t2 -.Lret: - sub.d a0, a0, tp - REG_L t0, sp, 0 - REG_L t1, sp, 8 - REG_L t2, sp, 16 - ADDI sp, sp, 32 - RET - -.Lslow: - /* This is the slow path. We need to call __tls_get_addr() which - means we need to save and restore all the register that the - callee will trash. */ - - /* Save the remaining registers that we must treat as caller save. */ - ADDI sp, sp, -FRAME_SIZE - REG_S ra, sp, 0 * SZREG - REG_S a1, sp, 1 * SZREG - REG_S a2, sp, 2 * SZREG - REG_S a3, sp, 3 * SZREG - REG_S a4, sp, 4 * SZREG - REG_S a5, sp, 5 * SZREG - REG_S a6, sp, 6 * SZREG - REG_S a7, sp, 7 * SZREG - REG_S t3, sp, 8 * SZREG - REG_S t4, sp, 9 * SZREG - REG_S t5, sp, 10 * SZREG - REG_S t6, sp, 11 * SZREG - REG_S t7, sp, 12 * SZREG - REG_S t8, sp, 13 * SZREG - #ifndef __loongarch_soft_float - /* Save fcsr0 register. - Only one physical fcsr0 register, fcsr1-fcsr3 are aliases - of some fields in fcsr0. */ - movfcsr2gr t0, fcsr0 - st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */ - - /* Whether support LASX. */ - la.global t0, _rtld_global_ro - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET - andi t1, t0, HWCAP_LOONGARCH_LASX - beqz t1, .Llsx - - /* Save 256-bit vector registers. - FIXME: Without vector ABI, save all vector registers. */ - ADDI sp, sp, -FRAME_SIZE_LASX - xvst xr0, sp, 0*SZXREG - xvst xr1, sp, 1*SZXREG - xvst xr2, sp, 2*SZXREG - xvst xr3, sp, 3*SZXREG - xvst xr4, sp, 4*SZXREG - xvst xr5, sp, 5*SZXREG - xvst xr6, sp, 6*SZXREG - xvst xr7, sp, 7*SZXREG - xvst xr8, sp, 8*SZXREG - xvst xr9, sp, 9*SZXREG - xvst xr10, sp, 10*SZXREG - xvst xr11, sp, 11*SZXREG - xvst xr12, sp, 12*SZXREG - xvst xr13, sp, 13*SZXREG - xvst xr14, sp, 14*SZXREG - xvst xr15, sp, 15*SZXREG - xvst xr16, sp, 16*SZXREG - xvst xr17, sp, 17*SZXREG - xvst xr18, sp, 18*SZXREG - xvst xr19, sp, 19*SZXREG - xvst xr20, sp, 20*SZXREG - xvst xr21, sp, 21*SZXREG - xvst xr22, sp, 22*SZXREG - xvst xr23, sp, 23*SZXREG - xvst xr24, sp, 24*SZXREG - xvst xr25, sp, 25*SZXREG - xvst xr26, sp, 26*SZXREG - xvst xr27, sp, 27*SZXREG - xvst xr28, sp, 28*SZXREG - xvst xr29, sp, 29*SZXREG - xvst xr30, sp, 30*SZXREG - xvst xr31, sp, 31*SZXREG - b .Ltga - -.Llsx: - /* Whether support LSX. */ - andi t1, t0, HWCAP_LOONGARCH_LSX - beqz t1, .Lfloat - - /* Save 128-bit vector registers. */ - ADDI sp, sp, -FRAME_SIZE_LSX - vst vr0, sp, 0*SZVREG - vst vr1, sp, 1*SZVREG - vst vr2, sp, 2*SZVREG - vst vr3, sp, 3*SZVREG - vst vr4, sp, 4*SZVREG - vst vr5, sp, 5*SZVREG - vst vr6, sp, 6*SZVREG - vst vr7, sp, 7*SZVREG - vst vr8, sp, 8*SZVREG - vst vr9, sp, 9*SZVREG - vst vr10, sp, 10*SZVREG - vst vr11, sp, 11*SZVREG - vst vr12, sp, 12*SZVREG - vst vr13, sp, 13*SZVREG - vst vr14, sp, 14*SZVREG - vst vr15, sp, 15*SZVREG - vst vr16, sp, 16*SZVREG - vst vr17, sp, 17*SZVREG - vst vr18, sp, 18*SZVREG - vst vr19, sp, 19*SZVREG - vst vr20, sp, 20*SZVREG - vst vr21, sp, 21*SZVREG - vst vr22, sp, 22*SZVREG - vst vr23, sp, 23*SZVREG - vst vr24, sp, 24*SZVREG - vst vr25, sp, 25*SZVREG - vst vr26, sp, 26*SZVREG - vst vr27, sp, 27*SZVREG - vst vr28, sp, 28*SZVREG - vst vr29, sp, 29*SZVREG - vst vr30, sp, 30*SZVREG - vst vr31, sp, 31*SZVREG - b .Ltga - -.Lfloat: - /* Save float registers. */ - ADDI sp, sp, -FRAME_SIZE_FLOAT - FREG_S fa0, sp, 0*SZFREG - FREG_S fa1, sp, 1*SZFREG - FREG_S fa2, sp, 2*SZFREG - FREG_S fa3, sp, 3*SZFREG - FREG_S fa4, sp, 4*SZFREG - FREG_S fa5, sp, 5*SZFREG - FREG_S fa6, sp, 6*SZFREG - FREG_S fa7, sp, 7*SZFREG - FREG_S ft0, sp, 8*SZFREG - FREG_S ft1, sp, 9*SZFREG - FREG_S ft2, sp, 10*SZFREG - FREG_S ft3, sp, 11*SZFREG - FREG_S ft4, sp, 12*SZFREG - FREG_S ft5, sp, 13*SZFREG - FREG_S ft6, sp, 14*SZFREG - FREG_S ft7, sp, 15*SZFREG - FREG_S ft8, sp, 16*SZFREG - FREG_S ft9, sp, 17*SZFREG - FREG_S ft10, sp, 18*SZFREG - FREG_S ft11, sp, 19*SZFREG - FREG_S ft12, sp, 20*SZFREG - FREG_S ft13, sp, 21*SZFREG - FREG_S ft14, sp, 22*SZFREG - FREG_S ft15, sp, 23*SZFREG - -#endif /* #ifndef __loongarch_soft_float */ - -.Ltga: - bl HIDDEN_JUMPTARGET(__tls_get_addr) - ADDI a0, a0, -TLS_DTV_OFFSET - -#ifndef __loongarch_soft_float - - la.global t0, _rtld_global_ro - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET - andi t1, t0, HWCAP_LOONGARCH_LASX - beqz t1, .Llsx1 - - /* Restore 256-bit vector registers. */ - xvld xr0, sp, 0*SZXREG - xvld xr1, sp, 1*SZXREG - xvld xr2, sp, 2*SZXREG - xvld xr3, sp, 3*SZXREG - xvld xr4, sp, 4*SZXREG - xvld xr5, sp, 5*SZXREG - xvld xr6, sp, 6*SZXREG - xvld xr7, sp, 7*SZXREG - xvld xr8, sp, 8*SZXREG - xvld xr9, sp, 9*SZXREG - xvld xr10, sp, 10*SZXREG - xvld xr11, sp, 11*SZXREG - xvld xr12, sp, 12*SZXREG - xvld xr13, sp, 13*SZXREG - xvld xr14, sp, 14*SZXREG - xvld xr15, sp, 15*SZXREG - xvld xr16, sp, 16*SZXREG - xvld xr17, sp, 17*SZXREG - xvld xr18, sp, 18*SZXREG - xvld xr19, sp, 19*SZXREG - xvld xr20, sp, 20*SZXREG - xvld xr21, sp, 21*SZXREG - xvld xr22, sp, 22*SZXREG - xvld xr23, sp, 23*SZXREG - xvld xr24, sp, 24*SZXREG - xvld xr25, sp, 25*SZXREG - xvld xr26, sp, 26*SZXREG - xvld xr27, sp, 27*SZXREG - xvld xr28, sp, 28*SZXREG - xvld xr29, sp, 29*SZXREG - xvld xr30, sp, 30*SZXREG - xvld xr31, sp, 31*SZXREG - ADDI sp, sp, FRAME_SIZE_LASX - b .Lfcsr - -.Llsx1: - andi t1, t0, HWCAP_LOONGARCH_LSX - beqz t1, .Lfloat1 - - /* Restore 128-bit vector registers. */ - vld vr0, sp, 0*SZVREG - vld vr1, sp, 1*SZVREG - vld vr2, sp, 2*SZVREG - vld vr3, sp, 3*SZVREG - vld vr4, sp, 4*SZVREG - vld vr5, sp, 5*SZVREG - vld vr6, sp, 6*SZVREG - vld vr7, sp, 7*SZVREG - vld vr8, sp, 8*SZVREG - vld vr9, sp, 9*SZVREG - vld vr10, sp, 10*SZVREG - vld vr11, sp, 11*SZVREG - vld vr12, sp, 12*SZVREG - vld vr13, sp, 13*SZVREG - vld vr14, sp, 14*SZVREG - vld vr15, sp, 15*SZVREG - vld vr16, sp, 16*SZVREG - vld vr17, sp, 17*SZVREG - vld vr18, sp, 18*SZVREG - vld vr19, sp, 19*SZVREG - vld vr20, sp, 20*SZVREG - vld vr21, sp, 21*SZVREG - vld vr22, sp, 22*SZVREG - vld vr23, sp, 23*SZVREG - vld vr24, sp, 24*SZVREG - vld vr25, sp, 25*SZVREG - vld vr26, sp, 26*SZVREG - vld vr27, sp, 27*SZVREG - vld vr28, sp, 28*SZVREG - vld vr29, sp, 29*SZVREG - vld vr30, sp, 30*SZVREG - vld vr31, sp, 31*SZVREG - ADDI sp, sp, FRAME_SIZE_LSX - b .Lfcsr - -.Lfloat1: - /* Restore float registers. */ - FREG_L fa0, sp, 0*SZFREG - FREG_L fa1, sp, 1*SZFREG - FREG_L fa2, sp, 2*SZFREG - FREG_L fa3, sp, 3*SZFREG - FREG_L fa4, sp, 4*SZFREG - FREG_L fa5, sp, 5*SZFREG - FREG_L fa6, sp, 6*SZFREG - FREG_L fa7, sp, 7*SZFREG - FREG_L ft0, sp, 8*SZFREG - FREG_L ft1, sp, 9*SZFREG - FREG_L ft2, sp, 10*SZFREG - FREG_L ft3, sp, 11*SZFREG - FREG_L ft4, sp, 12*SZFREG - FREG_L ft5, sp, 13*SZFREG - FREG_L ft6, sp, 14*SZFREG - FREG_L ft7, sp, 15*SZFREG - FREG_L ft8, sp, 16*SZFREG - FREG_L ft9, sp, 17*SZFREG - FREG_L ft10, sp, 18*SZFREG - FREG_L ft11, sp, 19*SZFREG - FREG_L ft12, sp, 20*SZFREG - FREG_L ft13, sp, 21*SZFREG - FREG_L ft14, sp, 22*SZFREG - FREG_L ft15, sp, 23*SZFREG - ADDI sp, sp, FRAME_SIZE_FLOAT - -.Lfcsr: - /* Restore fcsr0 register. */ - ld.w t0, sp, FRAME_SIZE + 24 - movgr2fcsr fcsr0, t0 +#define USE_LASX +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx +#define Lret Lret_lasx +#define Lslow Lslow_lasx +#include "dl-tlsdesc-dynamic.h" +#undef FRAME_SIZE +#undef USE_LASX +#undef _dl_tlsdesc_dynamic +#undef Lret +#undef Lslow + +#define USE_LSX +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx +#define Lret Lret_lsx +#define Lslow Lslow_lsx +#include "dl-tlsdesc-dynamic.h" +#undef FRAME_SIZE +#undef USE_LSX +#undef _dl_tlsdesc_dynamic +#undef Lret +#undef Lslow #endif /* #ifndef __loongarch_soft_float */ - REG_L ra, sp, 0 * SZREG - REG_L a1, sp, 1 * SZREG - REG_L a2, sp, 2 * SZREG - REG_L a3, sp, 3 * SZREG - REG_L a4, sp, 4 * SZREG - REG_L a5, sp, 5 * SZREG - REG_L a6, sp, 6 * SZREG - REG_L a7, sp, 7 * SZREG - REG_L t3, sp, 8 * SZREG - REG_L t4, sp, 9 * SZREG - REG_L t5, sp, 10 * SZREG - REG_L t6, sp, 11 * SZREG - REG_L t7, sp, 12 * SZREG - REG_L t8, sp, 13 * SZREG - ADDI sp, sp, FRAME_SIZE - - b .Lret - cfi_endproc - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic - .hidden HIDDEN_JUMPTARGET(__tls_get_addr) +#include "dl-tlsdesc-dynamic.h" #endif /* #ifdef SHARED */ diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h index ff8c69cb93..45c43a5b52 100644 --- a/sysdeps/loongarch/dl-tlsdesc.h +++ b/sysdeps/loongarch/dl-tlsdesc.h @@ -43,6 +43,10 @@ extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *); #ifdef SHARED extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); +#ifndef __loongarch_soft_float +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *); +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *); +#endif extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *); #endif -- 2.38.1 ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2] LoongArch: Add cfi instructions for _dl_tlsdesc_dynamic 2024-06-26 6:34 [PATCH v2] LoongArch: Add cfi instructions for _dl_tlsdesc_dynamic mengqinggang @ 2024-07-01 9:27 ` mengqinggang 2024-07-02 10:44 ` Jinyang He 0 siblings, 1 reply; 4+ messages in thread From: mengqinggang @ 2024-07-01 9:27 UTC (permalink / raw) To: libc-alpha Cc: adhemerval.zanella, xuchenghua, caiyinyu, chenglulu, cailulu, xry111, i.swmail, maskray, luweining, wanglei, hejinyang Ping. The reason of changing to three _dl_tlsdesc_dynamic: In one _dl_tlsdesc_dynamic, there are three cfi_adjust_cfa_offset for Float/LSX/LASX path. Three cfi_adjust_cfa_offset are always executed in stack unwinding, but only once stack down instruction is executed. It resulting in incorrect CFA address. With three _dl_tlsdesc_dynamic functions, three cfi_adjust_cfa_offset can be distributed to three functions. So cfi instructions can correspond to stack down instructions. 在 2024/6/26 下午2:34, mengqinggang 写道: > Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic, > _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx. > Conflicting cfi instructions can be distributed to the > three functions. > --- > Changes v1 -> v2: > - Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic, > _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx. > > v1 link: https://sourceware.org/pipermail/libc-alpha/2024-June/157270.html > > sysdeps/loongarch/dl-machine.h | 7 + > sysdeps/loongarch/dl-tlsdesc-dynamic.h | 403 +++++++++++++++++++++++++ > sysdeps/loongarch/dl-tlsdesc.S | 386 ++--------------------- > sysdeps/loongarch/dl-tlsdesc.h | 4 + > 4 files changed, 436 insertions(+), 364 deletions(-) > create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h > > diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h > index ab6f1da7c0..04fabbf598 100644 > --- a/sysdeps/loongarch/dl-machine.h > +++ b/sysdeps/loongarch/dl-machine.h > @@ -223,6 +223,13 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], > { > td->arg = _dl_make_tlsdesc_dynamic (sym_map, > sym->st_value + reloc->r_addend); > +# ifndef __loongarch_soft_float > + if (SUPPORT_LASX) > + td->entry = _dl_tlsdesc_dynamic_lasx; > + else if (SUPPORT_LSX) > + td->entry = _dl_tlsdesc_dynamic_lsx; > + else > +# endif > td->entry = _dl_tlsdesc_dynamic; > } > else > diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h > new file mode 100644 > index 0000000000..5b1f43aaf4 > --- /dev/null > +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h > @@ -0,0 +1,403 @@ > +/* Thread-local storage handling in the ELF dynamic linker. > + LoongArch version. > + Copyright (C) 2024 Free Software Foundation, Inc. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define FRAME_SIZE (-((-14 * SZREG) & ALMASK)) > +#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) > +#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) > +#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) > + > + /* Handler for dynamic TLS symbols. > + Prototype: > + _dl_tlsdesc_dynamic (tlsdesc *) ; > + > + The second word of the descriptor points to a > + tlsdesc_dynamic_arg structure. > + > + Returns the offset between the thread pointer and the > + object referenced by the argument. > + > + ptrdiff_t > + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) > + { > + struct tlsdesc_dynamic_arg *td = tdp->arg; > + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB); > + if (__glibc_likely (td->gen_count <= dtv[0].counter > + && (dtv[td->tlsinfo.ti_module].pointer.val > + != TLS_DTV_UNALLOCATED), > + 1)) > + return dtv[td->tlsinfo.ti_module].pointer.val > + + td->tlsinfo.ti_offset > + - __thread_pointer; > + > + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; > + } */ > + .hidden _dl_tlsdesc_dynamic > + .global _dl_tlsdesc_dynamic > + .type _dl_tlsdesc_dynamic,%function > + cfi_startproc > + .align 2 > +_dl_tlsdesc_dynamic: > + /* Save just enough registers to support fast path, if we fall > + into slow path we will save additional registers. */ > + ADDI sp, sp, -32 > + cfi_adjust_cfa_offset (32) > + REG_S t0, sp, 0 > + REG_S t1, sp, 8 > + REG_S t2, sp, 16 > + cfi_rel_offset (12, 0) > + cfi_rel_offset (13, 8) > + cfi_rel_offset (14, 16) > + > +/* Runtime Storage Layout of Thread-Local Storage > + TP point to the start of TLS block. > + > + dtv > +Low address TCB ----------------> dtv0(counter) > + TP --> static_block0 <----- dtv1 > + static_block1 <----- dtv2 > + static_block2 <----- dtv3 > + dynamic_block0 <----- dtv4 > +Hign address dynamic_block1 <----- dtv5 */ > + > + REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */ > + REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */ > + REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ > + REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ > + /* If dtv[0].counter < td->gen_count, goto slow path. */ > + bltu t2, t1, .Lslow > + > + REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ > + /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */ > + slli.d t1, t1, 4 > + add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */ > + REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */ > + li.d t2, TLS_DTV_UNALLOCATED > + /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED, > + goto slow path. */ > + beq t1, t2, .Lslow > + > + cfi_remember_state > + REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ > + /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */ > + add.d a0, t1, t2 > +.Lret: > + sub.d a0, a0, tp > + REG_L t0, sp, 0 > + REG_L t1, sp, 8 > + REG_L t2, sp, 16 > + ADDI sp, sp, 32 > + cfi_adjust_cfa_offset (-32) > + RET > + > +.Lslow: > + /* This is the slow path. We need to call __tls_get_addr() which > + means we need to save and restore all the register that the > + callee will trash. */ > + > + /* Save the remaining registers that we must treat as caller save. */ > + cfi_restore_state > + ADDI sp, sp, -FRAME_SIZE > + cfi_adjust_cfa_offset (FRAME_SIZE) > + REG_S ra, sp, 0 * SZREG > + REG_S a1, sp, 1 * SZREG > + REG_S a2, sp, 2 * SZREG > + REG_S a3, sp, 3 * SZREG > + REG_S a4, sp, 4 * SZREG > + REG_S a5, sp, 5 * SZREG > + REG_S a6, sp, 6 * SZREG > + REG_S a7, sp, 7 * SZREG > + REG_S t3, sp, 8 * SZREG > + REG_S t4, sp, 9 * SZREG > + REG_S t5, sp, 10 * SZREG > + REG_S t6, sp, 11 * SZREG > + REG_S t7, sp, 12 * SZREG > + REG_S t8, sp, 13 * SZREG > + cfi_rel_offset (1, 0 * SZREG) > + cfi_rel_offset (5, 1 * SZREG) > + cfi_rel_offset (6, 2 * SZREG) > + cfi_rel_offset (7, 3 * SZREG) > + cfi_rel_offset (8, 4 * SZREG) > + cfi_rel_offset (9, 5 * SZREG) > + cfi_rel_offset (10, 6 * SZREG) > + cfi_rel_offset (11, 7 * SZREG) > + cfi_rel_offset (15, 8 * SZREG) > + cfi_rel_offset (16, 9 * SZREG) > + cfi_rel_offset (17, 10 * SZREG) > + cfi_rel_offset (18, 11 * SZREG) > + cfi_rel_offset (19, 12 * SZREG) > + cfi_rel_offset (20, 13 * SZREG) > + > +#ifndef __loongarch_soft_float > + > + /* Save fcsr0 register. > + Only one physical fcsr0 register, fcsr1-fcsr3 are aliases > + of some fields in fcsr0. */ > + movfcsr2gr t0, fcsr0 > + st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2. */ > + > +#ifdef USE_LASX > + > + /* Save 256-bit vector registers. > + FIXME: Without vector ABI, save all vector registers. */ > + ADDI sp, sp, -FRAME_SIZE_LASX > + cfi_adjust_cfa_offset (FRAME_SIZE_LASX) > + xvst xr0, sp, 0*SZXREG > + xvst xr1, sp, 1*SZXREG > + xvst xr2, sp, 2*SZXREG > + xvst xr3, sp, 3*SZXREG > + xvst xr4, sp, 4*SZXREG > + xvst xr5, sp, 5*SZXREG > + xvst xr6, sp, 6*SZXREG > + xvst xr7, sp, 7*SZXREG > + xvst xr8, sp, 8*SZXREG > + xvst xr9, sp, 9*SZXREG > + xvst xr10, sp, 10*SZXREG > + xvst xr11, sp, 11*SZXREG > + xvst xr12, sp, 12*SZXREG > + xvst xr13, sp, 13*SZXREG > + xvst xr14, sp, 14*SZXREG > + xvst xr15, sp, 15*SZXREG > + xvst xr16, sp, 16*SZXREG > + xvst xr17, sp, 17*SZXREG > + xvst xr18, sp, 18*SZXREG > + xvst xr19, sp, 19*SZXREG > + xvst xr20, sp, 20*SZXREG > + xvst xr21, sp, 21*SZXREG > + xvst xr22, sp, 22*SZXREG > + xvst xr23, sp, 23*SZXREG > + xvst xr24, sp, 24*SZXREG > + xvst xr25, sp, 25*SZXREG > + xvst xr26, sp, 26*SZXREG > + xvst xr27, sp, 27*SZXREG > + xvst xr28, sp, 28*SZXREG > + xvst xr29, sp, 29*SZXREG > + xvst xr30, sp, 30*SZXREG > + xvst xr31, sp, 31*SZXREG > + > +#elif defined USE_LSX > + > + /* Save 128-bit vector registers. */ > + ADDI sp, sp, -FRAME_SIZE_LSX > + cfi_adjust_cfa_offset (FRAME_SIZE_LSX) > + vst vr0, sp, 0*SZVREG > + vst vr1, sp, 1*SZVREG > + vst vr2, sp, 2*SZVREG > + vst vr3, sp, 3*SZVREG > + vst vr4, sp, 4*SZVREG > + vst vr5, sp, 5*SZVREG > + vst vr6, sp, 6*SZVREG > + vst vr7, sp, 7*SZVREG > + vst vr8, sp, 8*SZVREG > + vst vr9, sp, 9*SZVREG > + vst vr10, sp, 10*SZVREG > + vst vr11, sp, 11*SZVREG > + vst vr12, sp, 12*SZVREG > + vst vr13, sp, 13*SZVREG > + vst vr14, sp, 14*SZVREG > + vst vr15, sp, 15*SZVREG > + vst vr16, sp, 16*SZVREG > + vst vr17, sp, 17*SZVREG > + vst vr18, sp, 18*SZVREG > + vst vr19, sp, 19*SZVREG > + vst vr20, sp, 20*SZVREG > + vst vr21, sp, 21*SZVREG > + vst vr22, sp, 22*SZVREG > + vst vr23, sp, 23*SZVREG > + vst vr24, sp, 24*SZVREG > + vst vr25, sp, 25*SZVREG > + vst vr26, sp, 26*SZVREG > + vst vr27, sp, 27*SZVREG > + vst vr28, sp, 28*SZVREG > + vst vr29, sp, 29*SZVREG > + vst vr30, sp, 30*SZVREG > + vst vr31, sp, 31*SZVREG > + > +# else > + > + /* Save float registers. */ > + ADDI sp, sp, -FRAME_SIZE_FLOAT > + cfi_adjust_cfa_offset (FRAME_SIZE_FLOAT) > + FREG_S fa0, sp, 0*SZFREG > + FREG_S fa1, sp, 1*SZFREG > + FREG_S fa2, sp, 2*SZFREG > + FREG_S fa3, sp, 3*SZFREG > + FREG_S fa4, sp, 4*SZFREG > + FREG_S fa5, sp, 5*SZFREG > + FREG_S fa6, sp, 6*SZFREG > + FREG_S fa7, sp, 7*SZFREG > + FREG_S ft0, sp, 8*SZFREG > + FREG_S ft1, sp, 9*SZFREG > + FREG_S ft2, sp, 10*SZFREG > + FREG_S ft3, sp, 11*SZFREG > + FREG_S ft4, sp, 12*SZFREG > + FREG_S ft5, sp, 13*SZFREG > + FREG_S ft6, sp, 14*SZFREG > + FREG_S ft7, sp, 15*SZFREG > + FREG_S ft8, sp, 16*SZFREG > + FREG_S ft9, sp, 17*SZFREG > + FREG_S ft10, sp, 18*SZFREG > + FREG_S ft11, sp, 19*SZFREG > + FREG_S ft12, sp, 20*SZFREG > + FREG_S ft13, sp, 21*SZFREG > + FREG_S ft14, sp, 22*SZFREG > + FREG_S ft15, sp, 23*SZFREG > + > +#endif /* #ifdef USE_LASX */ > +#endif /* #ifndef __loongarch_soft_float */ > + > + bl HIDDEN_JUMPTARGET(__tls_get_addr) > + ADDI a0, a0, -TLS_DTV_OFFSET > + > +#ifndef __loongarch_soft_float > +#ifdef USE_LASX > + > + /* Restore 256-bit vector registers. */ > + xvld xr0, sp, 0*SZXREG > + xvld xr1, sp, 1*SZXREG > + xvld xr2, sp, 2*SZXREG > + xvld xr3, sp, 3*SZXREG > + xvld xr4, sp, 4*SZXREG > + xvld xr5, sp, 5*SZXREG > + xvld xr6, sp, 6*SZXREG > + xvld xr7, sp, 7*SZXREG > + xvld xr8, sp, 8*SZXREG > + xvld xr9, sp, 9*SZXREG > + xvld xr10, sp, 10*SZXREG > + xvld xr11, sp, 11*SZXREG > + xvld xr12, sp, 12*SZXREG > + xvld xr13, sp, 13*SZXREG > + xvld xr14, sp, 14*SZXREG > + xvld xr15, sp, 15*SZXREG > + xvld xr16, sp, 16*SZXREG > + xvld xr17, sp, 17*SZXREG > + xvld xr18, sp, 18*SZXREG > + xvld xr19, sp, 19*SZXREG > + xvld xr20, sp, 20*SZXREG > + xvld xr21, sp, 21*SZXREG > + xvld xr22, sp, 22*SZXREG > + xvld xr23, sp, 23*SZXREG > + xvld xr24, sp, 24*SZXREG > + xvld xr25, sp, 25*SZXREG > + xvld xr26, sp, 26*SZXREG > + xvld xr27, sp, 27*SZXREG > + xvld xr28, sp, 28*SZXREG > + xvld xr29, sp, 29*SZXREG > + xvld xr30, sp, 30*SZXREG > + xvld xr31, sp, 31*SZXREG > + ADDI sp, sp, FRAME_SIZE_LASX > + cfi_adjust_cfa_offset (-FRAME_SIZE_LASX) > + > +#elif defined USE_LSX > + > + /* Restore 128-bit vector registers. */ > + vld vr0, sp, 0*SZVREG > + vld vr1, sp, 1*SZVREG > + vld vr2, sp, 2*SZVREG > + vld vr3, sp, 3*SZVREG > + vld vr4, sp, 4*SZVREG > + vld vr5, sp, 5*SZVREG > + vld vr6, sp, 6*SZVREG > + vld vr7, sp, 7*SZVREG > + vld vr8, sp, 8*SZVREG > + vld vr9, sp, 9*SZVREG > + vld vr10, sp, 10*SZVREG > + vld vr11, sp, 11*SZVREG > + vld vr12, sp, 12*SZVREG > + vld vr13, sp, 13*SZVREG > + vld vr14, sp, 14*SZVREG > + vld vr15, sp, 15*SZVREG > + vld vr16, sp, 16*SZVREG > + vld vr17, sp, 17*SZVREG > + vld vr18, sp, 18*SZVREG > + vld vr19, sp, 19*SZVREG > + vld vr20, sp, 20*SZVREG > + vld vr21, sp, 21*SZVREG > + vld vr22, sp, 22*SZVREG > + vld vr23, sp, 23*SZVREG > + vld vr24, sp, 24*SZVREG > + vld vr25, sp, 25*SZVREG > + vld vr26, sp, 26*SZVREG > + vld vr27, sp, 27*SZVREG > + vld vr28, sp, 28*SZVREG > + vld vr29, sp, 29*SZVREG > + vld vr30, sp, 30*SZVREG > + vld vr31, sp, 31*SZVREG > + ADDI sp, sp, FRAME_SIZE_LSX > + cfi_adjust_cfa_offset (-FRAME_SIZE_LSX) > + > +#else > + > + /* Restore float registers. */ > + FREG_L fa0, sp, 0*SZFREG > + FREG_L fa1, sp, 1*SZFREG > + FREG_L fa2, sp, 2*SZFREG > + FREG_L fa3, sp, 3*SZFREG > + FREG_L fa4, sp, 4*SZFREG > + FREG_L fa5, sp, 5*SZFREG > + FREG_L fa6, sp, 6*SZFREG > + FREG_L fa7, sp, 7*SZFREG > + FREG_L ft0, sp, 8*SZFREG > + FREG_L ft1, sp, 9*SZFREG > + FREG_L ft2, sp, 10*SZFREG > + FREG_L ft3, sp, 11*SZFREG > + FREG_L ft4, sp, 12*SZFREG > + FREG_L ft5, sp, 13*SZFREG > + FREG_L ft6, sp, 14*SZFREG > + FREG_L ft7, sp, 15*SZFREG > + FREG_L ft8, sp, 16*SZFREG > + FREG_L ft9, sp, 17*SZFREG > + FREG_L ft10, sp, 18*SZFREG > + FREG_L ft11, sp, 19*SZFREG > + FREG_L ft12, sp, 20*SZFREG > + FREG_L ft13, sp, 21*SZFREG > + FREG_L ft14, sp, 22*SZFREG > + FREG_L ft15, sp, 23*SZFREG > + ADDI sp, sp, FRAME_SIZE_FLOAT > + cfi_adjust_cfa_offset (-FRAME_SIZE_FLOAT) > + > +#endif /* #ifdef USE_LASX */ > + > + /* Restore fcsr0 register. */ > + ld.w t0, sp, FRAME_SIZE + 24 > + movgr2fcsr fcsr0, t0 > + > +#endif /* #ifndef __loongarch_soft_float */ > + > + REG_L ra, sp, 0 * SZREG > + REG_L a1, sp, 1 * SZREG > + REG_L a2, sp, 2 * SZREG > + REG_L a3, sp, 3 * SZREG > + REG_L a4, sp, 4 * SZREG > + REG_L a5, sp, 5 * SZREG > + REG_L a6, sp, 6 * SZREG > + REG_L a7, sp, 7 * SZREG > + REG_L t3, sp, 8 * SZREG > + REG_L t4, sp, 9 * SZREG > + REG_L t5, sp, 10 * SZREG > + REG_L t6, sp, 11 * SZREG > + REG_L t7, sp, 12 * SZREG > + REG_L t8, sp, 13 * SZREG > + ADDI sp, sp, FRAME_SIZE > + cfi_adjust_cfa_offset (-FRAME_SIZE) > + > + b .Lret > + cfi_endproc > + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic > + .hidden HIDDEN_JUMPTARGET(__tls_get_addr) > diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S > index a6627cc754..b6cfd6121d 100644 > --- a/sysdeps/loongarch/dl-tlsdesc.S > +++ b/sysdeps/loongarch/dl-tlsdesc.S > @@ -59,376 +59,34 @@ _dl_tlsdesc_undefweak: > cfi_endproc > .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak > > - > #ifdef SHARED > > -#define FRAME_SIZE (-((-14 * SZREG) & ALMASK)) > -#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) > -#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) > -#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) > - > - /* Handler for dynamic TLS symbols. > - Prototype: > - _dl_tlsdesc_dynamic (tlsdesc *) ; > - > - The second word of the descriptor points to a > - tlsdesc_dynamic_arg structure. > - > - Returns the offset between the thread pointer and the > - object referenced by the argument. > - > - ptrdiff_t > - _dl_tlsdesc_dynamic (struct tlsdesc *tdp) > - { > - struct tlsdesc_dynamic_arg *td = tdp->arg; > - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB); > - if (__glibc_likely (td->gen_count <= dtv[0].counter > - && (dtv[td->tlsinfo.ti_module].pointer.val > - != TLS_DTV_UNALLOCATED), > - 1)) > - return dtv[td->tlsinfo.ti_module].pointer.val > - + td->tlsinfo.ti_offset > - - __thread_pointer; > - > - return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; > - } */ > - .hidden _dl_tlsdesc_dynamic > - .global _dl_tlsdesc_dynamic > - .type _dl_tlsdesc_dynamic,%function > - cfi_startproc > - .align 2 > -_dl_tlsdesc_dynamic: > - /* Save just enough registers to support fast path, if we fall > - into slow path we will save additional registers. */ > - ADDI sp, sp, -32 > - REG_S t0, sp, 0 > - REG_S t1, sp, 8 > - REG_S t2, sp, 16 > - > -/* Runtime Storage Layout of Thread-Local Storage > - TP point to the start of TLS block. > - > - dtv > -Low address TCB ----------------> dtv0(counter) > - TP --> static_block0 <----- dtv1 > - static_block1 <----- dtv2 > - static_block2 <----- dtv3 > - dynamic_block0 <----- dtv4 > -Hign address dynamic_block1 <----- dtv5 */ > - > - REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */ > - REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */ > - REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ > - REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ > - /* If dtv[0].counter < td->gen_count, goto slow path. */ > - bltu t2, t1, .Lslow > - > - REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ > - /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */ > - slli.d t1, t1, 4 > - add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */ > - REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */ > - li.d t2, TLS_DTV_UNALLOCATED > - /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED, > - goto slow path. */ > - beq t1, t2, .Lslow > - > - REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ > - /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */ > - add.d a0, t1, t2 > -.Lret: > - sub.d a0, a0, tp > - REG_L t0, sp, 0 > - REG_L t1, sp, 8 > - REG_L t2, sp, 16 > - ADDI sp, sp, 32 > - RET > - > -.Lslow: > - /* This is the slow path. We need to call __tls_get_addr() which > - means we need to save and restore all the register that the > - callee will trash. */ > - > - /* Save the remaining registers that we must treat as caller save. */ > - ADDI sp, sp, -FRAME_SIZE > - REG_S ra, sp, 0 * SZREG > - REG_S a1, sp, 1 * SZREG > - REG_S a2, sp, 2 * SZREG > - REG_S a3, sp, 3 * SZREG > - REG_S a4, sp, 4 * SZREG > - REG_S a5, sp, 5 * SZREG > - REG_S a6, sp, 6 * SZREG > - REG_S a7, sp, 7 * SZREG > - REG_S t3, sp, 8 * SZREG > - REG_S t4, sp, 9 * SZREG > - REG_S t5, sp, 10 * SZREG > - REG_S t6, sp, 11 * SZREG > - REG_S t7, sp, 12 * SZREG > - REG_S t8, sp, 13 * SZREG > - > #ifndef __loongarch_soft_float > > - /* Save fcsr0 register. > - Only one physical fcsr0 register, fcsr1-fcsr3 are aliases > - of some fields in fcsr0. */ > - movfcsr2gr t0, fcsr0 > - st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */ > - > - /* Whether support LASX. */ > - la.global t0, _rtld_global_ro > - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET > - andi t1, t0, HWCAP_LOONGARCH_LASX > - beqz t1, .Llsx > - > - /* Save 256-bit vector registers. > - FIXME: Without vector ABI, save all vector registers. */ > - ADDI sp, sp, -FRAME_SIZE_LASX > - xvst xr0, sp, 0*SZXREG > - xvst xr1, sp, 1*SZXREG > - xvst xr2, sp, 2*SZXREG > - xvst xr3, sp, 3*SZXREG > - xvst xr4, sp, 4*SZXREG > - xvst xr5, sp, 5*SZXREG > - xvst xr6, sp, 6*SZXREG > - xvst xr7, sp, 7*SZXREG > - xvst xr8, sp, 8*SZXREG > - xvst xr9, sp, 9*SZXREG > - xvst xr10, sp, 10*SZXREG > - xvst xr11, sp, 11*SZXREG > - xvst xr12, sp, 12*SZXREG > - xvst xr13, sp, 13*SZXREG > - xvst xr14, sp, 14*SZXREG > - xvst xr15, sp, 15*SZXREG > - xvst xr16, sp, 16*SZXREG > - xvst xr17, sp, 17*SZXREG > - xvst xr18, sp, 18*SZXREG > - xvst xr19, sp, 19*SZXREG > - xvst xr20, sp, 20*SZXREG > - xvst xr21, sp, 21*SZXREG > - xvst xr22, sp, 22*SZXREG > - xvst xr23, sp, 23*SZXREG > - xvst xr24, sp, 24*SZXREG > - xvst xr25, sp, 25*SZXREG > - xvst xr26, sp, 26*SZXREG > - xvst xr27, sp, 27*SZXREG > - xvst xr28, sp, 28*SZXREG > - xvst xr29, sp, 29*SZXREG > - xvst xr30, sp, 30*SZXREG > - xvst xr31, sp, 31*SZXREG > - b .Ltga > - > -.Llsx: > - /* Whether support LSX. */ > - andi t1, t0, HWCAP_LOONGARCH_LSX > - beqz t1, .Lfloat > - > - /* Save 128-bit vector registers. */ > - ADDI sp, sp, -FRAME_SIZE_LSX > - vst vr0, sp, 0*SZVREG > - vst vr1, sp, 1*SZVREG > - vst vr2, sp, 2*SZVREG > - vst vr3, sp, 3*SZVREG > - vst vr4, sp, 4*SZVREG > - vst vr5, sp, 5*SZVREG > - vst vr6, sp, 6*SZVREG > - vst vr7, sp, 7*SZVREG > - vst vr8, sp, 8*SZVREG > - vst vr9, sp, 9*SZVREG > - vst vr10, sp, 10*SZVREG > - vst vr11, sp, 11*SZVREG > - vst vr12, sp, 12*SZVREG > - vst vr13, sp, 13*SZVREG > - vst vr14, sp, 14*SZVREG > - vst vr15, sp, 15*SZVREG > - vst vr16, sp, 16*SZVREG > - vst vr17, sp, 17*SZVREG > - vst vr18, sp, 18*SZVREG > - vst vr19, sp, 19*SZVREG > - vst vr20, sp, 20*SZVREG > - vst vr21, sp, 21*SZVREG > - vst vr22, sp, 22*SZVREG > - vst vr23, sp, 23*SZVREG > - vst vr24, sp, 24*SZVREG > - vst vr25, sp, 25*SZVREG > - vst vr26, sp, 26*SZVREG > - vst vr27, sp, 27*SZVREG > - vst vr28, sp, 28*SZVREG > - vst vr29, sp, 29*SZVREG > - vst vr30, sp, 30*SZVREG > - vst vr31, sp, 31*SZVREG > - b .Ltga > - > -.Lfloat: > - /* Save float registers. */ > - ADDI sp, sp, -FRAME_SIZE_FLOAT > - FREG_S fa0, sp, 0*SZFREG > - FREG_S fa1, sp, 1*SZFREG > - FREG_S fa2, sp, 2*SZFREG > - FREG_S fa3, sp, 3*SZFREG > - FREG_S fa4, sp, 4*SZFREG > - FREG_S fa5, sp, 5*SZFREG > - FREG_S fa6, sp, 6*SZFREG > - FREG_S fa7, sp, 7*SZFREG > - FREG_S ft0, sp, 8*SZFREG > - FREG_S ft1, sp, 9*SZFREG > - FREG_S ft2, sp, 10*SZFREG > - FREG_S ft3, sp, 11*SZFREG > - FREG_S ft4, sp, 12*SZFREG > - FREG_S ft5, sp, 13*SZFREG > - FREG_S ft6, sp, 14*SZFREG > - FREG_S ft7, sp, 15*SZFREG > - FREG_S ft8, sp, 16*SZFREG > - FREG_S ft9, sp, 17*SZFREG > - FREG_S ft10, sp, 18*SZFREG > - FREG_S ft11, sp, 19*SZFREG > - FREG_S ft12, sp, 20*SZFREG > - FREG_S ft13, sp, 21*SZFREG > - FREG_S ft14, sp, 22*SZFREG > - FREG_S ft15, sp, 23*SZFREG > - > -#endif /* #ifndef __loongarch_soft_float */ > - > -.Ltga: > - bl HIDDEN_JUMPTARGET(__tls_get_addr) > - ADDI a0, a0, -TLS_DTV_OFFSET > - > -#ifndef __loongarch_soft_float > - > - la.global t0, _rtld_global_ro > - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET > - andi t1, t0, HWCAP_LOONGARCH_LASX > - beqz t1, .Llsx1 > - > - /* Restore 256-bit vector registers. */ > - xvld xr0, sp, 0*SZXREG > - xvld xr1, sp, 1*SZXREG > - xvld xr2, sp, 2*SZXREG > - xvld xr3, sp, 3*SZXREG > - xvld xr4, sp, 4*SZXREG > - xvld xr5, sp, 5*SZXREG > - xvld xr6, sp, 6*SZXREG > - xvld xr7, sp, 7*SZXREG > - xvld xr8, sp, 8*SZXREG > - xvld xr9, sp, 9*SZXREG > - xvld xr10, sp, 10*SZXREG > - xvld xr11, sp, 11*SZXREG > - xvld xr12, sp, 12*SZXREG > - xvld xr13, sp, 13*SZXREG > - xvld xr14, sp, 14*SZXREG > - xvld xr15, sp, 15*SZXREG > - xvld xr16, sp, 16*SZXREG > - xvld xr17, sp, 17*SZXREG > - xvld xr18, sp, 18*SZXREG > - xvld xr19, sp, 19*SZXREG > - xvld xr20, sp, 20*SZXREG > - xvld xr21, sp, 21*SZXREG > - xvld xr22, sp, 22*SZXREG > - xvld xr23, sp, 23*SZXREG > - xvld xr24, sp, 24*SZXREG > - xvld xr25, sp, 25*SZXREG > - xvld xr26, sp, 26*SZXREG > - xvld xr27, sp, 27*SZXREG > - xvld xr28, sp, 28*SZXREG > - xvld xr29, sp, 29*SZXREG > - xvld xr30, sp, 30*SZXREG > - xvld xr31, sp, 31*SZXREG > - ADDI sp, sp, FRAME_SIZE_LASX > - b .Lfcsr > - > -.Llsx1: > - andi t1, t0, HWCAP_LOONGARCH_LSX > - beqz t1, .Lfloat1 > - > - /* Restore 128-bit vector registers. */ > - vld vr0, sp, 0*SZVREG > - vld vr1, sp, 1*SZVREG > - vld vr2, sp, 2*SZVREG > - vld vr3, sp, 3*SZVREG > - vld vr4, sp, 4*SZVREG > - vld vr5, sp, 5*SZVREG > - vld vr6, sp, 6*SZVREG > - vld vr7, sp, 7*SZVREG > - vld vr8, sp, 8*SZVREG > - vld vr9, sp, 9*SZVREG > - vld vr10, sp, 10*SZVREG > - vld vr11, sp, 11*SZVREG > - vld vr12, sp, 12*SZVREG > - vld vr13, sp, 13*SZVREG > - vld vr14, sp, 14*SZVREG > - vld vr15, sp, 15*SZVREG > - vld vr16, sp, 16*SZVREG > - vld vr17, sp, 17*SZVREG > - vld vr18, sp, 18*SZVREG > - vld vr19, sp, 19*SZVREG > - vld vr20, sp, 20*SZVREG > - vld vr21, sp, 21*SZVREG > - vld vr22, sp, 22*SZVREG > - vld vr23, sp, 23*SZVREG > - vld vr24, sp, 24*SZVREG > - vld vr25, sp, 25*SZVREG > - vld vr26, sp, 26*SZVREG > - vld vr27, sp, 27*SZVREG > - vld vr28, sp, 28*SZVREG > - vld vr29, sp, 29*SZVREG > - vld vr30, sp, 30*SZVREG > - vld vr31, sp, 31*SZVREG > - ADDI sp, sp, FRAME_SIZE_LSX > - b .Lfcsr > - > -.Lfloat1: > - /* Restore float registers. */ > - FREG_L fa0, sp, 0*SZFREG > - FREG_L fa1, sp, 1*SZFREG > - FREG_L fa2, sp, 2*SZFREG > - FREG_L fa3, sp, 3*SZFREG > - FREG_L fa4, sp, 4*SZFREG > - FREG_L fa5, sp, 5*SZFREG > - FREG_L fa6, sp, 6*SZFREG > - FREG_L fa7, sp, 7*SZFREG > - FREG_L ft0, sp, 8*SZFREG > - FREG_L ft1, sp, 9*SZFREG > - FREG_L ft2, sp, 10*SZFREG > - FREG_L ft3, sp, 11*SZFREG > - FREG_L ft4, sp, 12*SZFREG > - FREG_L ft5, sp, 13*SZFREG > - FREG_L ft6, sp, 14*SZFREG > - FREG_L ft7, sp, 15*SZFREG > - FREG_L ft8, sp, 16*SZFREG > - FREG_L ft9, sp, 17*SZFREG > - FREG_L ft10, sp, 18*SZFREG > - FREG_L ft11, sp, 19*SZFREG > - FREG_L ft12, sp, 20*SZFREG > - FREG_L ft13, sp, 21*SZFREG > - FREG_L ft14, sp, 22*SZFREG > - FREG_L ft15, sp, 23*SZFREG > - ADDI sp, sp, FRAME_SIZE_FLOAT > - > -.Lfcsr: > - /* Restore fcsr0 register. */ > - ld.w t0, sp, FRAME_SIZE + 24 > - movgr2fcsr fcsr0, t0 > +#define USE_LASX > +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx > +#define Lret Lret_lasx > +#define Lslow Lslow_lasx > +#include "dl-tlsdesc-dynamic.h" > +#undef FRAME_SIZE > +#undef USE_LASX > +#undef _dl_tlsdesc_dynamic > +#undef Lret > +#undef Lslow > + > +#define USE_LSX > +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx > +#define Lret Lret_lsx > +#define Lslow Lslow_lsx > +#include "dl-tlsdesc-dynamic.h" > +#undef FRAME_SIZE > +#undef USE_LSX > +#undef _dl_tlsdesc_dynamic > +#undef Lret > +#undef Lslow > > #endif /* #ifndef __loongarch_soft_float */ > > - REG_L ra, sp, 0 * SZREG > - REG_L a1, sp, 1 * SZREG > - REG_L a2, sp, 2 * SZREG > - REG_L a3, sp, 3 * SZREG > - REG_L a4, sp, 4 * SZREG > - REG_L a5, sp, 5 * SZREG > - REG_L a6, sp, 6 * SZREG > - REG_L a7, sp, 7 * SZREG > - REG_L t3, sp, 8 * SZREG > - REG_L t4, sp, 9 * SZREG > - REG_L t5, sp, 10 * SZREG > - REG_L t6, sp, 11 * SZREG > - REG_L t7, sp, 12 * SZREG > - REG_L t8, sp, 13 * SZREG > - ADDI sp, sp, FRAME_SIZE > - > - b .Lret > - cfi_endproc > - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic > - .hidden HIDDEN_JUMPTARGET(__tls_get_addr) > +#include "dl-tlsdesc-dynamic.h" > > #endif /* #ifdef SHARED */ > diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h > index ff8c69cb93..45c43a5b52 100644 > --- a/sysdeps/loongarch/dl-tlsdesc.h > +++ b/sysdeps/loongarch/dl-tlsdesc.h > @@ -43,6 +43,10 @@ extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *); > > #ifdef SHARED > extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); > +#ifndef __loongarch_soft_float > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *); > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *); > +#endif > extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *); > #endif > ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2] LoongArch: Add cfi instructions for _dl_tlsdesc_dynamic 2024-07-01 9:27 ` mengqinggang @ 2024-07-02 10:44 ` Jinyang He 2024-07-02 11:48 ` mengqinggang 0 siblings, 1 reply; 4+ messages in thread From: Jinyang He @ 2024-07-02 10:44 UTC (permalink / raw) To: mengqinggang Cc: libc-alpha, adhemerval.zanella, xuchenghua, caiyinyu, chenglulu, cailulu, xry111, i.swmail, maskray, luweining, wanglei On 2024-07-01 17:27, mengqinggang wrote: > Ping. > > > The reason of changing to three _dl_tlsdesc_dynamic: > In one _dl_tlsdesc_dynamic, there are three cfi_adjust_cfa_offset for > Float/LSX/LASX path. > Three cfi_adjust_cfa_offset are always executed in stack unwinding, > but only once stack down > instruction is executed. It resulting in incorrect CFA address. > > > With three _dl_tlsdesc_dynamic functions, three cfi_adjust_cfa_offset > can be distributed to three functions. > So cfi instructions can correspond to stack down instructions. But in the old version, the code didn't set other cfi_adjust_cfa_offset when do "stack down instructions", which may cause wrong. I think we can keep it just one _dl_tlsdesc_dynamic if set enough `cfi_*`. Of course, spliting it into three funcs is OK because we not need read HWCAP each time when call _dl_tlsdesc_dynamic. > > > > 在 2024/6/26 下午2:34, mengqinggang 写道: >> Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic, >> _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx. >> Conflicting cfi instructions can be distributed to the >> three functions. >> --- >> Changes v1 -> v2: >> - Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic, >> _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx. >> >> v1 link: >> https://sourceware.org/pipermail/libc-alpha/2024-June/157270.html >> >> sysdeps/loongarch/dl-machine.h | 7 + >> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 403 +++++++++++++++++++++++++ >> sysdeps/loongarch/dl-tlsdesc.S | 386 ++--------------------- >> sysdeps/loongarch/dl-tlsdesc.h | 4 + >> 4 files changed, 436 insertions(+), 364 deletions(-) >> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h >> >> diff --git a/sysdeps/loongarch/dl-machine.h >> b/sysdeps/loongarch/dl-machine.h >> index ab6f1da7c0..04fabbf598 100644 >> --- a/sysdeps/loongarch/dl-machine.h >> +++ b/sysdeps/loongarch/dl-machine.h >> @@ -223,6 +223,13 @@ elf_machine_rela (struct link_map *map, struct >> r_scope_elem *scope[], >> { >> td->arg = _dl_make_tlsdesc_dynamic (sym_map, >> sym->st_value + reloc->r_addend); >> +# ifndef __loongarch_soft_float >> + if (SUPPORT_LASX) Why "SUPPORT_LASX" rather than "RTLD_SUPPORT_LASX"? The old version read HWCAP at "GLRO_DL_HWCAP_OFFSET", it means the HWCAP is at "GLRO_offsetof (dl_hwcap)". But, #define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LASX) #define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX) >> + td->entry = _dl_tlsdesc_dynamic_lasx; >> + else if (SUPPORT_LSX) >> + td->entry = _dl_tlsdesc_dynamic_lsx; >> + else >> +# endif >> td->entry = _dl_tlsdesc_dynamic; >> } >> else >> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h >> b/sysdeps/loongarch/dl-tlsdesc-dynamic.h >> new file mode 100644 >> index 0000000000..5b1f43aaf4 >> --- /dev/null >> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h >> @@ -0,0 +1,403 @@ >> +/* Thread-local storage handling in the ELF dynamic linker. >> + LoongArch version. >> + Copyright (C) 2024 Free Software Foundation, Inc. >> + >> + This file is part of the GNU C Library. >> + >> + The GNU C Library is free software; you can redistribute it and/or >> + modify it under the terms of the GNU Lesser General Public >> + License as published by the Free Software Foundation; either >> + version 2.1 of the License, or (at your option) any later version. >> + >> + The GNU C Library is distributed in the hope that it will be useful, >> + but WITHOUT ANY WARRANTY; without even the implied warranty of >> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + Lesser General Public License for more details. >> + >> + You should have received a copy of the GNU Lesser General Public >> + License along with the GNU C Library; if not, see >> + <https://www.gnu.org/licenses/>. */ >> + >> +#define FRAME_SIZE (-((-14 * SZREG) & ALMASK)) >> +#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) >> +#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) >> +#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) >> + >> + /* Handler for dynamic TLS symbols. >> + Prototype: >> + _dl_tlsdesc_dynamic (tlsdesc *) ; >> + >> + The second word of the descriptor points to a >> + tlsdesc_dynamic_arg structure. >> + >> + Returns the offset between the thread pointer and the >> + object referenced by the argument. >> + >> + ptrdiff_t >> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) >> + { >> + struct tlsdesc_dynamic_arg *td = tdp->arg; >> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - >> SIZE_OF_TCB); >> + if (__glibc_likely (td->gen_count <= dtv[0].counter >> + && (dtv[td->tlsinfo.ti_module].pointer.val >> + != TLS_DTV_UNALLOCATED), >> + 1)) >> + return dtv[td->tlsinfo.ti_module].pointer.val >> + + td->tlsinfo.ti_offset >> + - __thread_pointer; >> + >> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; >> + } */ >> + .hidden _dl_tlsdesc_dynamic >> + .global _dl_tlsdesc_dynamic >> + .type _dl_tlsdesc_dynamic,%function >> + cfi_startproc >> + .align 2 >> +_dl_tlsdesc_dynamic: >> + /* Save just enough registers to support fast path, if we fall >> + into slow path we will save additional registers. */ >> + ADDI sp, sp, -32 >> + cfi_adjust_cfa_offset (32) >> + REG_S t0, sp, 0 >> + REG_S t1, sp, 8 >> + REG_S t2, sp, 16 >> + cfi_rel_offset (12, 0) >> + cfi_rel_offset (13, 8) >> + cfi_rel_offset (14, 16) >> + >> +/* Runtime Storage Layout of Thread-Local Storage >> + TP point to the start of TLS block. >> + >> + dtv >> +Low address TCB ----------------> dtv0(counter) >> + TP --> static_block0 <----- dtv1 >> + static_block1 <----- dtv2 >> + static_block2 <----- dtv3 >> + dynamic_block0 <----- dtv4 >> +Hign address dynamic_block1 <----- dtv5 */ >> + >> + REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */ >> + REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */ >> + REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ >> + REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ >> + /* If dtv[0].counter < td->gen_count, goto slow path. */ >> + bltu t2, t1, .Lslow >> + >> + REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ >> + /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */ >> + slli.d t1, t1, 4 >> + add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */ >> + REG_L t1, t1, 0 /* t1 = >> dtv[td->tlsinfo.ti_module].pointer.val */ >> + li.d t2, TLS_DTV_UNALLOCATED >> + /* If dtv[td->tlsinfo.ti_module].pointer.val is >> TLS_DTV_UNALLOCATED, >> + goto slow path. */ >> + beq t1, t2, .Lslow >> + >> + cfi_remember_state >> + REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ >> + /* dtv[td->tlsinfo.ti_module].pointer.val + >> td->tlsinfo.ti_offset */ >> + add.d a0, t1, t2 >> +.Lret: >> + sub.d a0, a0, tp >> + REG_L t0, sp, 0 >> + REG_L t1, sp, 8 >> + REG_L t2, sp, 16 >> + ADDI sp, sp, 32 >> + cfi_adjust_cfa_offset (-32) >> + RET >> + >> +.Lslow: >> + /* This is the slow path. We need to call __tls_get_addr() which >> + means we need to save and restore all the register that the >> + callee will trash. */ >> + >> + /* Save the remaining registers that we must treat as caller >> save. */ >> + cfi_restore_state >> + ADDI sp, sp, -FRAME_SIZE >> + cfi_adjust_cfa_offset (FRAME_SIZE) >> + REG_S ra, sp, 0 * SZREG >> + REG_S a1, sp, 1 * SZREG >> + REG_S a2, sp, 2 * SZREG >> + REG_S a3, sp, 3 * SZREG >> + REG_S a4, sp, 4 * SZREG >> + REG_S a5, sp, 5 * SZREG >> + REG_S a6, sp, 6 * SZREG >> + REG_S a7, sp, 7 * SZREG >> + REG_S t3, sp, 8 * SZREG >> + REG_S t4, sp, 9 * SZREG >> + REG_S t5, sp, 10 * SZREG >> + REG_S t6, sp, 11 * SZREG >> + REG_S t7, sp, 12 * SZREG >> + REG_S t8, sp, 13 * SZREG >> + cfi_rel_offset (1, 0 * SZREG) >> + cfi_rel_offset (5, 1 * SZREG) >> + cfi_rel_offset (6, 2 * SZREG) >> + cfi_rel_offset (7, 3 * SZREG) >> + cfi_rel_offset (8, 4 * SZREG) >> + cfi_rel_offset (9, 5 * SZREG) >> + cfi_rel_offset (10, 6 * SZREG) >> + cfi_rel_offset (11, 7 * SZREG) >> + cfi_rel_offset (15, 8 * SZREG) >> + cfi_rel_offset (16, 9 * SZREG) >> + cfi_rel_offset (17, 10 * SZREG) >> + cfi_rel_offset (18, 11 * SZREG) >> + cfi_rel_offset (19, 12 * SZREG) >> + cfi_rel_offset (20, 13 * SZREG) >> + >> +#ifndef __loongarch_soft_float >> + >> + /* Save fcsr0 register. >> + Only one physical fcsr0 register, fcsr1-fcsr3 are aliases >> + of some fields in fcsr0. */ >> + movfcsr2gr t0, fcsr0 >> + st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2. */ >> + >> +#ifdef USE_LASX If we define macros like follows, we can reduce some codes. // An example. #if defined(USE_LASX) #define V_REG_S xvst #define V_REG_L xvld #define V_SPACE FRAME_SIZE_LASX #define V_REG(n) $xr##n #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 #define V_REGSZ SZXREG #elif defined(USE_LSX) #define V_REG_S vst #define V_REG_L vld #define V_SPACE FRAME_SIZE_LSX #define V_REG(n) $vr##n #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 #define V_REGSZ SZVREG #else #define V_REG_S fst.d #define V_REG_L fld.d #define V_SPACE FRAME_SIZE_FLOAT #define V_REG(n) $f##n #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23 #define V_REGSZ SZFREG #endif ADDI sp, sp, -V_SPACE cfi_adjust_cfa_offset (V_SPACE) .irp i,V_REGS V_REG_S V_REG(\i), sp, \i * V_REGSZ .endr .irp i,V_REGS V_REG_L V_REG(\i), sp, \i * V_REGSZ .endr ADDI sp, sp, V_SPACE cfi_adjust_cfa_offset (-V_SPACE) Thanks. Jinyang >> + >> + /* Save 256-bit vector registers. >> + FIXME: Without vector ABI, save all vector registers. */ >> + ADDI sp, sp, -FRAME_SIZE_LASX >> + cfi_adjust_cfa_offset (FRAME_SIZE_LASX) >> + xvst xr0, sp, 0*SZXREG >> + xvst xr1, sp, 1*SZXREG >> + xvst xr2, sp, 2*SZXREG >> + xvst xr3, sp, 3*SZXREG >> + xvst xr4, sp, 4*SZXREG >> + xvst xr5, sp, 5*SZXREG >> + xvst xr6, sp, 6*SZXREG >> + xvst xr7, sp, 7*SZXREG >> + xvst xr8, sp, 8*SZXREG >> + xvst xr9, sp, 9*SZXREG >> + xvst xr10, sp, 10*SZXREG >> + xvst xr11, sp, 11*SZXREG >> + xvst xr12, sp, 12*SZXREG >> + xvst xr13, sp, 13*SZXREG >> + xvst xr14, sp, 14*SZXREG >> + xvst xr15, sp, 15*SZXREG >> + xvst xr16, sp, 16*SZXREG >> + xvst xr17, sp, 17*SZXREG >> + xvst xr18, sp, 18*SZXREG >> + xvst xr19, sp, 19*SZXREG >> + xvst xr20, sp, 20*SZXREG >> + xvst xr21, sp, 21*SZXREG >> + xvst xr22, sp, 22*SZXREG >> + xvst xr23, sp, 23*SZXREG >> + xvst xr24, sp, 24*SZXREG >> + xvst xr25, sp, 25*SZXREG >> + xvst xr26, sp, 26*SZXREG >> + xvst xr27, sp, 27*SZXREG >> + xvst xr28, sp, 28*SZXREG >> + xvst xr29, sp, 29*SZXREG >> + xvst xr30, sp, 30*SZXREG >> + xvst xr31, sp, 31*SZXREG >> + >> +#elif defined USE_LSX >> + >> + /* Save 128-bit vector registers. */ >> + ADDI sp, sp, -FRAME_SIZE_LSX >> + cfi_adjust_cfa_offset (FRAME_SIZE_LSX) >> + vst vr0, sp, 0*SZVREG >> + vst vr1, sp, 1*SZVREG >> + vst vr2, sp, 2*SZVREG >> + vst vr3, sp, 3*SZVREG >> + vst vr4, sp, 4*SZVREG >> + vst vr5, sp, 5*SZVREG >> + vst vr6, sp, 6*SZVREG >> + vst vr7, sp, 7*SZVREG >> + vst vr8, sp, 8*SZVREG >> + vst vr9, sp, 9*SZVREG >> + vst vr10, sp, 10*SZVREG >> + vst vr11, sp, 11*SZVREG >> + vst vr12, sp, 12*SZVREG >> + vst vr13, sp, 13*SZVREG >> + vst vr14, sp, 14*SZVREG >> + vst vr15, sp, 15*SZVREG >> + vst vr16, sp, 16*SZVREG >> + vst vr17, sp, 17*SZVREG >> + vst vr18, sp, 18*SZVREG >> + vst vr19, sp, 19*SZVREG >> + vst vr20, sp, 20*SZVREG >> + vst vr21, sp, 21*SZVREG >> + vst vr22, sp, 22*SZVREG >> + vst vr23, sp, 23*SZVREG >> + vst vr24, sp, 24*SZVREG >> + vst vr25, sp, 25*SZVREG >> + vst vr26, sp, 26*SZVREG >> + vst vr27, sp, 27*SZVREG >> + vst vr28, sp, 28*SZVREG >> + vst vr29, sp, 29*SZVREG >> + vst vr30, sp, 30*SZVREG >> + vst vr31, sp, 31*SZVREG >> + >> +# else >> + >> + /* Save float registers. */ >> + ADDI sp, sp, -FRAME_SIZE_FLOAT >> + cfi_adjust_cfa_offset (FRAME_SIZE_FLOAT) >> + FREG_S fa0, sp, 0*SZFREG >> + FREG_S fa1, sp, 1*SZFREG >> + FREG_S fa2, sp, 2*SZFREG >> + FREG_S fa3, sp, 3*SZFREG >> + FREG_S fa4, sp, 4*SZFREG >> + FREG_S fa5, sp, 5*SZFREG >> + FREG_S fa6, sp, 6*SZFREG >> + FREG_S fa7, sp, 7*SZFREG >> + FREG_S ft0, sp, 8*SZFREG >> + FREG_S ft1, sp, 9*SZFREG >> + FREG_S ft2, sp, 10*SZFREG >> + FREG_S ft3, sp, 11*SZFREG >> + FREG_S ft4, sp, 12*SZFREG >> + FREG_S ft5, sp, 13*SZFREG >> + FREG_S ft6, sp, 14*SZFREG >> + FREG_S ft7, sp, 15*SZFREG >> + FREG_S ft8, sp, 16*SZFREG >> + FREG_S ft9, sp, 17*SZFREG >> + FREG_S ft10, sp, 18*SZFREG >> + FREG_S ft11, sp, 19*SZFREG >> + FREG_S ft12, sp, 20*SZFREG >> + FREG_S ft13, sp, 21*SZFREG >> + FREG_S ft14, sp, 22*SZFREG >> + FREG_S ft15, sp, 23*SZFREG >> + >> +#endif /* #ifdef USE_LASX */ >> +#endif /* #ifndef __loongarch_soft_float */ >> + >> + bl HIDDEN_JUMPTARGET(__tls_get_addr) >> + ADDI a0, a0, -TLS_DTV_OFFSET >> + >> +#ifndef __loongarch_soft_float >> +#ifdef USE_LASX >> + >> + /* Restore 256-bit vector registers. */ >> + xvld xr0, sp, 0*SZXREG >> + xvld xr1, sp, 1*SZXREG >> + xvld xr2, sp, 2*SZXREG >> + xvld xr3, sp, 3*SZXREG >> + xvld xr4, sp, 4*SZXREG >> + xvld xr5, sp, 5*SZXREG >> + xvld xr6, sp, 6*SZXREG >> + xvld xr7, sp, 7*SZXREG >> + xvld xr8, sp, 8*SZXREG >> + xvld xr9, sp, 9*SZXREG >> + xvld xr10, sp, 10*SZXREG >> + xvld xr11, sp, 11*SZXREG >> + xvld xr12, sp, 12*SZXREG >> + xvld xr13, sp, 13*SZXREG >> + xvld xr14, sp, 14*SZXREG >> + xvld xr15, sp, 15*SZXREG >> + xvld xr16, sp, 16*SZXREG >> + xvld xr17, sp, 17*SZXREG >> + xvld xr18, sp, 18*SZXREG >> + xvld xr19, sp, 19*SZXREG >> + xvld xr20, sp, 20*SZXREG >> + xvld xr21, sp, 21*SZXREG >> + xvld xr22, sp, 22*SZXREG >> + xvld xr23, sp, 23*SZXREG >> + xvld xr24, sp, 24*SZXREG >> + xvld xr25, sp, 25*SZXREG >> + xvld xr26, sp, 26*SZXREG >> + xvld xr27, sp, 27*SZXREG >> + xvld xr28, sp, 28*SZXREG >> + xvld xr29, sp, 29*SZXREG >> + xvld xr30, sp, 30*SZXREG >> + xvld xr31, sp, 31*SZXREG >> + ADDI sp, sp, FRAME_SIZE_LASX >> + cfi_adjust_cfa_offset (-FRAME_SIZE_LASX) >> + >> +#elif defined USE_LSX >> + >> + /* Restore 128-bit vector registers. */ >> + vld vr0, sp, 0*SZVREG >> + vld vr1, sp, 1*SZVREG >> + vld vr2, sp, 2*SZVREG >> + vld vr3, sp, 3*SZVREG >> + vld vr4, sp, 4*SZVREG >> + vld vr5, sp, 5*SZVREG >> + vld vr6, sp, 6*SZVREG >> + vld vr7, sp, 7*SZVREG >> + vld vr8, sp, 8*SZVREG >> + vld vr9, sp, 9*SZVREG >> + vld vr10, sp, 10*SZVREG >> + vld vr11, sp, 11*SZVREG >> + vld vr12, sp, 12*SZVREG >> + vld vr13, sp, 13*SZVREG >> + vld vr14, sp, 14*SZVREG >> + vld vr15, sp, 15*SZVREG >> + vld vr16, sp, 16*SZVREG >> + vld vr17, sp, 17*SZVREG >> + vld vr18, sp, 18*SZVREG >> + vld vr19, sp, 19*SZVREG >> + vld vr20, sp, 20*SZVREG >> + vld vr21, sp, 21*SZVREG >> + vld vr22, sp, 22*SZVREG >> + vld vr23, sp, 23*SZVREG >> + vld vr24, sp, 24*SZVREG >> + vld vr25, sp, 25*SZVREG >> + vld vr26, sp, 26*SZVREG >> + vld vr27, sp, 27*SZVREG >> + vld vr28, sp, 28*SZVREG >> + vld vr29, sp, 29*SZVREG >> + vld vr30, sp, 30*SZVREG >> + vld vr31, sp, 31*SZVREG >> + ADDI sp, sp, FRAME_SIZE_LSX >> + cfi_adjust_cfa_offset (-FRAME_SIZE_LSX) >> + >> +#else >> + >> + /* Restore float registers. */ >> + FREG_L fa0, sp, 0*SZFREG >> + FREG_L fa1, sp, 1*SZFREG >> + FREG_L fa2, sp, 2*SZFREG >> + FREG_L fa3, sp, 3*SZFREG >> + FREG_L fa4, sp, 4*SZFREG >> + FREG_L fa5, sp, 5*SZFREG >> + FREG_L fa6, sp, 6*SZFREG >> + FREG_L fa7, sp, 7*SZFREG >> + FREG_L ft0, sp, 8*SZFREG >> + FREG_L ft1, sp, 9*SZFREG >> + FREG_L ft2, sp, 10*SZFREG >> + FREG_L ft3, sp, 11*SZFREG >> + FREG_L ft4, sp, 12*SZFREG >> + FREG_L ft5, sp, 13*SZFREG >> + FREG_L ft6, sp, 14*SZFREG >> + FREG_L ft7, sp, 15*SZFREG >> + FREG_L ft8, sp, 16*SZFREG >> + FREG_L ft9, sp, 17*SZFREG >> + FREG_L ft10, sp, 18*SZFREG >> + FREG_L ft11, sp, 19*SZFREG >> + FREG_L ft12, sp, 20*SZFREG >> + FREG_L ft13, sp, 21*SZFREG >> + FREG_L ft14, sp, 22*SZFREG >> + FREG_L ft15, sp, 23*SZFREG >> + ADDI sp, sp, FRAME_SIZE_FLOAT >> + cfi_adjust_cfa_offset (-FRAME_SIZE_FLOAT) >> + >> +#endif /* #ifdef USE_LASX */ >> + >> + /* Restore fcsr0 register. */ >> + ld.w t0, sp, FRAME_SIZE + 24 >> + movgr2fcsr fcsr0, t0 >> + >> +#endif /* #ifndef __loongarch_soft_float */ >> + >> + REG_L ra, sp, 0 * SZREG >> + REG_L a1, sp, 1 * SZREG >> + REG_L a2, sp, 2 * SZREG >> + REG_L a3, sp, 3 * SZREG >> + REG_L a4, sp, 4 * SZREG >> + REG_L a5, sp, 5 * SZREG >> + REG_L a6, sp, 6 * SZREG >> + REG_L a7, sp, 7 * SZREG >> + REG_L t3, sp, 8 * SZREG >> + REG_L t4, sp, 9 * SZREG >> + REG_L t5, sp, 10 * SZREG >> + REG_L t6, sp, 11 * SZREG >> + REG_L t7, sp, 12 * SZREG >> + REG_L t8, sp, 13 * SZREG >> + ADDI sp, sp, FRAME_SIZE >> + cfi_adjust_cfa_offset (-FRAME_SIZE) >> + >> + b .Lret >> + cfi_endproc >> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic >> + .hidden HIDDEN_JUMPTARGET(__tls_get_addr) >> diff --git a/sysdeps/loongarch/dl-tlsdesc.S >> b/sysdeps/loongarch/dl-tlsdesc.S >> index a6627cc754..b6cfd6121d 100644 >> --- a/sysdeps/loongarch/dl-tlsdesc.S >> +++ b/sysdeps/loongarch/dl-tlsdesc.S >> @@ -59,376 +59,34 @@ _dl_tlsdesc_undefweak: >> cfi_endproc >> .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak >> - >> #ifdef SHARED >> -#define FRAME_SIZE (-((-14 * SZREG) & ALMASK)) >> -#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) >> -#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) >> -#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) >> - >> - /* Handler for dynamic TLS symbols. >> - Prototype: >> - _dl_tlsdesc_dynamic (tlsdesc *) ; >> - >> - The second word of the descriptor points to a >> - tlsdesc_dynamic_arg structure. >> - >> - Returns the offset between the thread pointer and the >> - object referenced by the argument. >> - >> - ptrdiff_t >> - _dl_tlsdesc_dynamic (struct tlsdesc *tdp) >> - { >> - struct tlsdesc_dynamic_arg *td = tdp->arg; >> - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - >> SIZE_OF_TCB); >> - if (__glibc_likely (td->gen_count <= dtv[0].counter >> - && (dtv[td->tlsinfo.ti_module].pointer.val >> - != TLS_DTV_UNALLOCATED), >> - 1)) >> - return dtv[td->tlsinfo.ti_module].pointer.val >> - + td->tlsinfo.ti_offset >> - - __thread_pointer; >> - >> - return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; >> - } */ >> - .hidden _dl_tlsdesc_dynamic >> - .global _dl_tlsdesc_dynamic >> - .type _dl_tlsdesc_dynamic,%function >> - cfi_startproc >> - .align 2 >> -_dl_tlsdesc_dynamic: >> - /* Save just enough registers to support fast path, if we fall >> - into slow path we will save additional registers. */ >> - ADDI sp, sp, -32 >> - REG_S t0, sp, 0 >> - REG_S t1, sp, 8 >> - REG_S t2, sp, 16 >> - >> -/* Runtime Storage Layout of Thread-Local Storage >> - TP point to the start of TLS block. >> - >> - dtv >> -Low address TCB ----------------> dtv0(counter) >> - TP --> static_block0 <----- dtv1 >> - static_block1 <----- dtv2 >> - static_block2 <----- dtv3 >> - dynamic_block0 <----- dtv4 >> -Hign address dynamic_block1 <----- dtv5 */ >> - >> - REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */ >> - REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */ >> - REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ >> - REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ >> - /* If dtv[0].counter < td->gen_count, goto slow path. */ >> - bltu t2, t1, .Lslow >> - >> - REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ >> - /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */ >> - slli.d t1, t1, 4 >> - add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */ >> - REG_L t1, t1, 0 /* t1 = >> dtv[td->tlsinfo.ti_module].pointer.val */ >> - li.d t2, TLS_DTV_UNALLOCATED >> - /* If dtv[td->tlsinfo.ti_module].pointer.val is >> TLS_DTV_UNALLOCATED, >> - goto slow path. */ >> - beq t1, t2, .Lslow >> - >> - REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ >> - /* dtv[td->tlsinfo.ti_module].pointer.val + >> td->tlsinfo.ti_offset */ >> - add.d a0, t1, t2 >> -.Lret: >> - sub.d a0, a0, tp >> - REG_L t0, sp, 0 >> - REG_L t1, sp, 8 >> - REG_L t2, sp, 16 >> - ADDI sp, sp, 32 >> - RET >> - >> -.Lslow: >> - /* This is the slow path. We need to call __tls_get_addr() which >> - means we need to save and restore all the register that the >> - callee will trash. */ >> - >> - /* Save the remaining registers that we must treat as caller >> save. */ >> - ADDI sp, sp, -FRAME_SIZE >> - REG_S ra, sp, 0 * SZREG >> - REG_S a1, sp, 1 * SZREG >> - REG_S a2, sp, 2 * SZREG >> - REG_S a3, sp, 3 * SZREG >> - REG_S a4, sp, 4 * SZREG >> - REG_S a5, sp, 5 * SZREG >> - REG_S a6, sp, 6 * SZREG >> - REG_S a7, sp, 7 * SZREG >> - REG_S t3, sp, 8 * SZREG >> - REG_S t4, sp, 9 * SZREG >> - REG_S t5, sp, 10 * SZREG >> - REG_S t6, sp, 11 * SZREG >> - REG_S t7, sp, 12 * SZREG >> - REG_S t8, sp, 13 * SZREG >> - >> #ifndef __loongarch_soft_float >> - /* Save fcsr0 register. >> - Only one physical fcsr0 register, fcsr1-fcsr3 are aliases >> - of some fields in fcsr0. */ >> - movfcsr2gr t0, fcsr0 >> - st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */ >> - >> - /* Whether support LASX. */ >> - la.global t0, _rtld_global_ro >> - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET >> - andi t1, t0, HWCAP_LOONGARCH_LASX >> - beqz t1, .Llsx >> - >> - /* Save 256-bit vector registers. >> - FIXME: Without vector ABI, save all vector registers. */ >> - ADDI sp, sp, -FRAME_SIZE_LASX >> - xvst xr0, sp, 0*SZXREG >> - xvst xr1, sp, 1*SZXREG >> - xvst xr2, sp, 2*SZXREG >> - xvst xr3, sp, 3*SZXREG >> - xvst xr4, sp, 4*SZXREG >> - xvst xr5, sp, 5*SZXREG >> - xvst xr6, sp, 6*SZXREG >> - xvst xr7, sp, 7*SZXREG >> - xvst xr8, sp, 8*SZXREG >> - xvst xr9, sp, 9*SZXREG >> - xvst xr10, sp, 10*SZXREG >> - xvst xr11, sp, 11*SZXREG >> - xvst xr12, sp, 12*SZXREG >> - xvst xr13, sp, 13*SZXREG >> - xvst xr14, sp, 14*SZXREG >> - xvst xr15, sp, 15*SZXREG >> - xvst xr16, sp, 16*SZXREG >> - xvst xr17, sp, 17*SZXREG >> - xvst xr18, sp, 18*SZXREG >> - xvst xr19, sp, 19*SZXREG >> - xvst xr20, sp, 20*SZXREG >> - xvst xr21, sp, 21*SZXREG >> - xvst xr22, sp, 22*SZXREG >> - xvst xr23, sp, 23*SZXREG >> - xvst xr24, sp, 24*SZXREG >> - xvst xr25, sp, 25*SZXREG >> - xvst xr26, sp, 26*SZXREG >> - xvst xr27, sp, 27*SZXREG >> - xvst xr28, sp, 28*SZXREG >> - xvst xr29, sp, 29*SZXREG >> - xvst xr30, sp, 30*SZXREG >> - xvst xr31, sp, 31*SZXREG >> - b .Ltga >> - >> -.Llsx: >> - /* Whether support LSX. */ >> - andi t1, t0, HWCAP_LOONGARCH_LSX >> - beqz t1, .Lfloat >> - >> - /* Save 128-bit vector registers. */ >> - ADDI sp, sp, -FRAME_SIZE_LSX >> - vst vr0, sp, 0*SZVREG >> - vst vr1, sp, 1*SZVREG >> - vst vr2, sp, 2*SZVREG >> - vst vr3, sp, 3*SZVREG >> - vst vr4, sp, 4*SZVREG >> - vst vr5, sp, 5*SZVREG >> - vst vr6, sp, 6*SZVREG >> - vst vr7, sp, 7*SZVREG >> - vst vr8, sp, 8*SZVREG >> - vst vr9, sp, 9*SZVREG >> - vst vr10, sp, 10*SZVREG >> - vst vr11, sp, 11*SZVREG >> - vst vr12, sp, 12*SZVREG >> - vst vr13, sp, 13*SZVREG >> - vst vr14, sp, 14*SZVREG >> - vst vr15, sp, 15*SZVREG >> - vst vr16, sp, 16*SZVREG >> - vst vr17, sp, 17*SZVREG >> - vst vr18, sp, 18*SZVREG >> - vst vr19, sp, 19*SZVREG >> - vst vr20, sp, 20*SZVREG >> - vst vr21, sp, 21*SZVREG >> - vst vr22, sp, 22*SZVREG >> - vst vr23, sp, 23*SZVREG >> - vst vr24, sp, 24*SZVREG >> - vst vr25, sp, 25*SZVREG >> - vst vr26, sp, 26*SZVREG >> - vst vr27, sp, 27*SZVREG >> - vst vr28, sp, 28*SZVREG >> - vst vr29, sp, 29*SZVREG >> - vst vr30, sp, 30*SZVREG >> - vst vr31, sp, 31*SZVREG >> - b .Ltga >> - >> -.Lfloat: >> - /* Save float registers. */ >> - ADDI sp, sp, -FRAME_SIZE_FLOAT >> - FREG_S fa0, sp, 0*SZFREG >> - FREG_S fa1, sp, 1*SZFREG >> - FREG_S fa2, sp, 2*SZFREG >> - FREG_S fa3, sp, 3*SZFREG >> - FREG_S fa4, sp, 4*SZFREG >> - FREG_S fa5, sp, 5*SZFREG >> - FREG_S fa6, sp, 6*SZFREG >> - FREG_S fa7, sp, 7*SZFREG >> - FREG_S ft0, sp, 8*SZFREG >> - FREG_S ft1, sp, 9*SZFREG >> - FREG_S ft2, sp, 10*SZFREG >> - FREG_S ft3, sp, 11*SZFREG >> - FREG_S ft4, sp, 12*SZFREG >> - FREG_S ft5, sp, 13*SZFREG >> - FREG_S ft6, sp, 14*SZFREG >> - FREG_S ft7, sp, 15*SZFREG >> - FREG_S ft8, sp, 16*SZFREG >> - FREG_S ft9, sp, 17*SZFREG >> - FREG_S ft10, sp, 18*SZFREG >> - FREG_S ft11, sp, 19*SZFREG >> - FREG_S ft12, sp, 20*SZFREG >> - FREG_S ft13, sp, 21*SZFREG >> - FREG_S ft14, sp, 22*SZFREG >> - FREG_S ft15, sp, 23*SZFREG >> - >> -#endif /* #ifndef __loongarch_soft_float */ >> - >> -.Ltga: >> - bl HIDDEN_JUMPTARGET(__tls_get_addr) >> - ADDI a0, a0, -TLS_DTV_OFFSET >> - >> -#ifndef __loongarch_soft_float >> - >> - la.global t0, _rtld_global_ro >> - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET >> - andi t1, t0, HWCAP_LOONGARCH_LASX >> - beqz t1, .Llsx1 >> - >> - /* Restore 256-bit vector registers. */ >> - xvld xr0, sp, 0*SZXREG >> - xvld xr1, sp, 1*SZXREG >> - xvld xr2, sp, 2*SZXREG >> - xvld xr3, sp, 3*SZXREG >> - xvld xr4, sp, 4*SZXREG >> - xvld xr5, sp, 5*SZXREG >> - xvld xr6, sp, 6*SZXREG >> - xvld xr7, sp, 7*SZXREG >> - xvld xr8, sp, 8*SZXREG >> - xvld xr9, sp, 9*SZXREG >> - xvld xr10, sp, 10*SZXREG >> - xvld xr11, sp, 11*SZXREG >> - xvld xr12, sp, 12*SZXREG >> - xvld xr13, sp, 13*SZXREG >> - xvld xr14, sp, 14*SZXREG >> - xvld xr15, sp, 15*SZXREG >> - xvld xr16, sp, 16*SZXREG >> - xvld xr17, sp, 17*SZXREG >> - xvld xr18, sp, 18*SZXREG >> - xvld xr19, sp, 19*SZXREG >> - xvld xr20, sp, 20*SZXREG >> - xvld xr21, sp, 21*SZXREG >> - xvld xr22, sp, 22*SZXREG >> - xvld xr23, sp, 23*SZXREG >> - xvld xr24, sp, 24*SZXREG >> - xvld xr25, sp, 25*SZXREG >> - xvld xr26, sp, 26*SZXREG >> - xvld xr27, sp, 27*SZXREG >> - xvld xr28, sp, 28*SZXREG >> - xvld xr29, sp, 29*SZXREG >> - xvld xr30, sp, 30*SZXREG >> - xvld xr31, sp, 31*SZXREG >> - ADDI sp, sp, FRAME_SIZE_LASX >> - b .Lfcsr >> - >> -.Llsx1: >> - andi t1, t0, HWCAP_LOONGARCH_LSX >> - beqz t1, .Lfloat1 >> - >> - /* Restore 128-bit vector registers. */ >> - vld vr0, sp, 0*SZVREG >> - vld vr1, sp, 1*SZVREG >> - vld vr2, sp, 2*SZVREG >> - vld vr3, sp, 3*SZVREG >> - vld vr4, sp, 4*SZVREG >> - vld vr5, sp, 5*SZVREG >> - vld vr6, sp, 6*SZVREG >> - vld vr7, sp, 7*SZVREG >> - vld vr8, sp, 8*SZVREG >> - vld vr9, sp, 9*SZVREG >> - vld vr10, sp, 10*SZVREG >> - vld vr11, sp, 11*SZVREG >> - vld vr12, sp, 12*SZVREG >> - vld vr13, sp, 13*SZVREG >> - vld vr14, sp, 14*SZVREG >> - vld vr15, sp, 15*SZVREG >> - vld vr16, sp, 16*SZVREG >> - vld vr17, sp, 17*SZVREG >> - vld vr18, sp, 18*SZVREG >> - vld vr19, sp, 19*SZVREG >> - vld vr20, sp, 20*SZVREG >> - vld vr21, sp, 21*SZVREG >> - vld vr22, sp, 22*SZVREG >> - vld vr23, sp, 23*SZVREG >> - vld vr24, sp, 24*SZVREG >> - vld vr25, sp, 25*SZVREG >> - vld vr26, sp, 26*SZVREG >> - vld vr27, sp, 27*SZVREG >> - vld vr28, sp, 28*SZVREG >> - vld vr29, sp, 29*SZVREG >> - vld vr30, sp, 30*SZVREG >> - vld vr31, sp, 31*SZVREG >> - ADDI sp, sp, FRAME_SIZE_LSX >> - b .Lfcsr >> - >> -.Lfloat1: >> - /* Restore float registers. */ >> - FREG_L fa0, sp, 0*SZFREG >> - FREG_L fa1, sp, 1*SZFREG >> - FREG_L fa2, sp, 2*SZFREG >> - FREG_L fa3, sp, 3*SZFREG >> - FREG_L fa4, sp, 4*SZFREG >> - FREG_L fa5, sp, 5*SZFREG >> - FREG_L fa6, sp, 6*SZFREG >> - FREG_L fa7, sp, 7*SZFREG >> - FREG_L ft0, sp, 8*SZFREG >> - FREG_L ft1, sp, 9*SZFREG >> - FREG_L ft2, sp, 10*SZFREG >> - FREG_L ft3, sp, 11*SZFREG >> - FREG_L ft4, sp, 12*SZFREG >> - FREG_L ft5, sp, 13*SZFREG >> - FREG_L ft6, sp, 14*SZFREG >> - FREG_L ft7, sp, 15*SZFREG >> - FREG_L ft8, sp, 16*SZFREG >> - FREG_L ft9, sp, 17*SZFREG >> - FREG_L ft10, sp, 18*SZFREG >> - FREG_L ft11, sp, 19*SZFREG >> - FREG_L ft12, sp, 20*SZFREG >> - FREG_L ft13, sp, 21*SZFREG >> - FREG_L ft14, sp, 22*SZFREG >> - FREG_L ft15, sp, 23*SZFREG >> - ADDI sp, sp, FRAME_SIZE_FLOAT >> - >> -.Lfcsr: >> - /* Restore fcsr0 register. */ >> - ld.w t0, sp, FRAME_SIZE + 24 >> - movgr2fcsr fcsr0, t0 >> +#define USE_LASX >> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx >> +#define Lret Lret_lasx >> +#define Lslow Lslow_lasx >> +#include "dl-tlsdesc-dynamic.h" >> +#undef FRAME_SIZE >> +#undef USE_LASX >> +#undef _dl_tlsdesc_dynamic >> +#undef Lret >> +#undef Lslow >> + >> +#define USE_LSX >> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx >> +#define Lret Lret_lsx >> +#define Lslow Lslow_lsx >> +#include "dl-tlsdesc-dynamic.h" >> +#undef FRAME_SIZE >> +#undef USE_LSX >> +#undef _dl_tlsdesc_dynamic >> +#undef Lret >> +#undef Lslow >> #endif /* #ifndef __loongarch_soft_float */ >> - REG_L ra, sp, 0 * SZREG >> - REG_L a1, sp, 1 * SZREG >> - REG_L a2, sp, 2 * SZREG >> - REG_L a3, sp, 3 * SZREG >> - REG_L a4, sp, 4 * SZREG >> - REG_L a5, sp, 5 * SZREG >> - REG_L a6, sp, 6 * SZREG >> - REG_L a7, sp, 7 * SZREG >> - REG_L t3, sp, 8 * SZREG >> - REG_L t4, sp, 9 * SZREG >> - REG_L t5, sp, 10 * SZREG >> - REG_L t6, sp, 11 * SZREG >> - REG_L t7, sp, 12 * SZREG >> - REG_L t8, sp, 13 * SZREG >> - ADDI sp, sp, FRAME_SIZE >> - >> - b .Lret >> - cfi_endproc >> - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic >> - .hidden HIDDEN_JUMPTARGET(__tls_get_addr) >> +#include "dl-tlsdesc-dynamic.h" >> #endif /* #ifdef SHARED */ >> diff --git a/sysdeps/loongarch/dl-tlsdesc.h >> b/sysdeps/loongarch/dl-tlsdesc.h >> index ff8c69cb93..45c43a5b52 100644 >> --- a/sysdeps/loongarch/dl-tlsdesc.h >> +++ b/sysdeps/loongarch/dl-tlsdesc.h >> @@ -43,6 +43,10 @@ extern ptrdiff_t attribute_hidden >> _dl_tlsdesc_undefweak (struct tlsdesc *); >> #ifdef SHARED >> extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); >> +#ifndef __loongarch_soft_float >> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct >> tlsdesc *); >> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct >> tlsdesc *); >> +#endif >> extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct >> tlsdesc *); >> #endif ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2] LoongArch: Add cfi instructions for _dl_tlsdesc_dynamic 2024-07-02 10:44 ` Jinyang He @ 2024-07-02 11:48 ` mengqinggang 0 siblings, 0 replies; 4+ messages in thread From: mengqinggang @ 2024-07-02 11:48 UTC (permalink / raw) To: Jinyang He Cc: libc-alpha, adhemerval.zanella, xuchenghua, caiyinyu, chenglulu, cailulu, xry111, i.swmail, maskray, luweining, wanglei 在 2024/7/2 下午6:44, Jinyang He 写道: > > On 2024-07-01 17:27, mengqinggang wrote: >> Ping. >> >> >> The reason of changing to three _dl_tlsdesc_dynamic: >> In one _dl_tlsdesc_dynamic, there are three cfi_adjust_cfa_offset >> for Float/LSX/LASX path. >> Three cfi_adjust_cfa_offset are always executed in stack unwinding, >> but only once stack down >> instruction is executed. It resulting in incorrect CFA address. >> >> >> With three _dl_tlsdesc_dynamic functions, three cfi_adjust_cfa_offset >> can be distributed to three functions. >> So cfi instructions can correspond to stack down instructions. > But in the old version, the code didn't set other cfi_adjust_cfa_offset > when do "stack down instructions", which may cause wrong. I think we can > keep it just one _dl_tlsdesc_dynamic if set enough `cfi_*`. Of course, I don't know how to write cfi instructions if only one _dl_tlsdesc_dynamic. > spliting it into three funcs is OK because we not need read HWCAP each > time when call _dl_tlsdesc_dynamic. >> >> >> >> 在 2024/6/26 下午2:34, mengqinggang 写道: >>> Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic, >>> _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx. >>> Conflicting cfi instructions can be distributed to the >>> three functions. >>> --- >>> Changes v1 -> v2: >>> - Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic, >>> _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx. >>> >>> v1 link: >>> https://sourceware.org/pipermail/libc-alpha/2024-June/157270.html >>> >>> sysdeps/loongarch/dl-machine.h | 7 + >>> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 403 >>> +++++++++++++++++++++++++ >>> sysdeps/loongarch/dl-tlsdesc.S | 386 ++--------------------- >>> sysdeps/loongarch/dl-tlsdesc.h | 4 + >>> 4 files changed, 436 insertions(+), 364 deletions(-) >>> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h >>> >>> diff --git a/sysdeps/loongarch/dl-machine.h >>> b/sysdeps/loongarch/dl-machine.h >>> index ab6f1da7c0..04fabbf598 100644 >>> --- a/sysdeps/loongarch/dl-machine.h >>> +++ b/sysdeps/loongarch/dl-machine.h >>> @@ -223,6 +223,13 @@ elf_machine_rela (struct link_map *map, struct >>> r_scope_elem *scope[], >>> { >>> td->arg = _dl_make_tlsdesc_dynamic (sym_map, >>> sym->st_value + reloc->r_addend); >>> +# ifndef __loongarch_soft_float >>> + if (SUPPORT_LASX) > Why "SUPPORT_LASX" rather than "RTLD_SUPPORT_LASX"? > The old version read HWCAP at "GLRO_DL_HWCAP_OFFSET", it means > the HWCAP is at "GLRO_offsetof (dl_hwcap)". > But, > #define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & > HWCAP_LOONGARCH_LASX) > #define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX) > >>> + td->entry = _dl_tlsdesc_dynamic_lasx; >>> + else if (SUPPORT_LSX) >>> + td->entry = _dl_tlsdesc_dynamic_lsx; >>> + else >>> +# endif >>> td->entry = _dl_tlsdesc_dynamic; >>> } >>> else >>> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h >>> b/sysdeps/loongarch/dl-tlsdesc-dynamic.h >>> new file mode 100644 >>> index 0000000000..5b1f43aaf4 >>> --- /dev/null >>> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h >>> @@ -0,0 +1,403 @@ >>> +/* Thread-local storage handling in the ELF dynamic linker. >>> + LoongArch version. >>> + Copyright (C) 2024 Free Software Foundation, Inc. >>> + >>> + This file is part of the GNU C Library. >>> + >>> + The GNU C Library is free software; you can redistribute it and/or >>> + modify it under the terms of the GNU Lesser General Public >>> + License as published by the Free Software Foundation; either >>> + version 2.1 of the License, or (at your option) any later version. >>> + >>> + The GNU C Library is distributed in the hope that it will be >>> useful, >>> + but WITHOUT ANY WARRANTY; without even the implied warranty of >>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >>> + Lesser General Public License for more details. >>> + >>> + You should have received a copy of the GNU Lesser General Public >>> + License along with the GNU C Library; if not, see >>> + <https://www.gnu.org/licenses/>. */ >>> + >>> +#define FRAME_SIZE (-((-14 * SZREG) & ALMASK)) >>> +#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) >>> +#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) >>> +#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) >>> + >>> + /* Handler for dynamic TLS symbols. >>> + Prototype: >>> + _dl_tlsdesc_dynamic (tlsdesc *) ; >>> + >>> + The second word of the descriptor points to a >>> + tlsdesc_dynamic_arg structure. >>> + >>> + Returns the offset between the thread pointer and the >>> + object referenced by the argument. >>> + >>> + ptrdiff_t >>> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) >>> + { >>> + struct tlsdesc_dynamic_arg *td = tdp->arg; >>> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - >>> SIZE_OF_TCB); >>> + if (__glibc_likely (td->gen_count <= dtv[0].counter >>> + && (dtv[td->tlsinfo.ti_module].pointer.val >>> + != TLS_DTV_UNALLOCATED), >>> + 1)) >>> + return dtv[td->tlsinfo.ti_module].pointer.val >>> + + td->tlsinfo.ti_offset >>> + - __thread_pointer; >>> + >>> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; >>> + } */ >>> + .hidden _dl_tlsdesc_dynamic >>> + .global _dl_tlsdesc_dynamic >>> + .type _dl_tlsdesc_dynamic,%function >>> + cfi_startproc >>> + .align 2 >>> +_dl_tlsdesc_dynamic: >>> + /* Save just enough registers to support fast path, if we fall >>> + into slow path we will save additional registers. */ >>> + ADDI sp, sp, -32 >>> + cfi_adjust_cfa_offset (32) >>> + REG_S t0, sp, 0 >>> + REG_S t1, sp, 8 >>> + REG_S t2, sp, 16 >>> + cfi_rel_offset (12, 0) >>> + cfi_rel_offset (13, 8) >>> + cfi_rel_offset (14, 16) >>> + >>> +/* Runtime Storage Layout of Thread-Local Storage >>> + TP point to the start of TLS block. >>> + >>> + dtv >>> +Low address TCB ----------------> dtv0(counter) >>> + TP --> static_block0 <----- dtv1 >>> + static_block1 <----- dtv2 >>> + static_block2 <----- dtv3 >>> + dynamic_block0 <----- dtv4 >>> +Hign address dynamic_block1 <----- dtv5 */ >>> + >>> + REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */ >>> + REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */ >>> + REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ >>> + REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ >>> + /* If dtv[0].counter < td->gen_count, goto slow path. */ >>> + bltu t2, t1, .Lslow >>> + >>> + REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ >>> + /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */ >>> + slli.d t1, t1, 4 >>> + add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */ >>> + REG_L t1, t1, 0 /* t1 = >>> dtv[td->tlsinfo.ti_module].pointer.val */ >>> + li.d t2, TLS_DTV_UNALLOCATED >>> + /* If dtv[td->tlsinfo.ti_module].pointer.val is >>> TLS_DTV_UNALLOCATED, >>> + goto slow path. */ >>> + beq t1, t2, .Lslow >>> + >>> + cfi_remember_state >>> + REG_L t2, a0, TLSDESC_MODOFF /* t2 = >>> td->tlsinfo.ti_offset */ >>> + /* dtv[td->tlsinfo.ti_module].pointer.val + >>> td->tlsinfo.ti_offset */ >>> + add.d a0, t1, t2 >>> +.Lret: >>> + sub.d a0, a0, tp >>> + REG_L t0, sp, 0 >>> + REG_L t1, sp, 8 >>> + REG_L t2, sp, 16 >>> + ADDI sp, sp, 32 >>> + cfi_adjust_cfa_offset (-32) >>> + RET >>> + >>> +.Lslow: >>> + /* This is the slow path. We need to call __tls_get_addr() which >>> + means we need to save and restore all the register that the >>> + callee will trash. */ >>> + >>> + /* Save the remaining registers that we must treat as caller >>> save. */ >>> + cfi_restore_state >>> + ADDI sp, sp, -FRAME_SIZE >>> + cfi_adjust_cfa_offset (FRAME_SIZE) >>> + REG_S ra, sp, 0 * SZREG >>> + REG_S a1, sp, 1 * SZREG >>> + REG_S a2, sp, 2 * SZREG >>> + REG_S a3, sp, 3 * SZREG >>> + REG_S a4, sp, 4 * SZREG >>> + REG_S a5, sp, 5 * SZREG >>> + REG_S a6, sp, 6 * SZREG >>> + REG_S a7, sp, 7 * SZREG >>> + REG_S t3, sp, 8 * SZREG >>> + REG_S t4, sp, 9 * SZREG >>> + REG_S t5, sp, 10 * SZREG >>> + REG_S t6, sp, 11 * SZREG >>> + REG_S t7, sp, 12 * SZREG >>> + REG_S t8, sp, 13 * SZREG >>> + cfi_rel_offset (1, 0 * SZREG) >>> + cfi_rel_offset (5, 1 * SZREG) >>> + cfi_rel_offset (6, 2 * SZREG) >>> + cfi_rel_offset (7, 3 * SZREG) >>> + cfi_rel_offset (8, 4 * SZREG) >>> + cfi_rel_offset (9, 5 * SZREG) >>> + cfi_rel_offset (10, 6 * SZREG) >>> + cfi_rel_offset (11, 7 * SZREG) >>> + cfi_rel_offset (15, 8 * SZREG) >>> + cfi_rel_offset (16, 9 * SZREG) >>> + cfi_rel_offset (17, 10 * SZREG) >>> + cfi_rel_offset (18, 11 * SZREG) >>> + cfi_rel_offset (19, 12 * SZREG) >>> + cfi_rel_offset (20, 13 * SZREG) >>> + >>> +#ifndef __loongarch_soft_float >>> + >>> + /* Save fcsr0 register. >>> + Only one physical fcsr0 register, fcsr1-fcsr3 are aliases >>> + of some fields in fcsr0. */ >>> + movfcsr2gr t0, fcsr0 >>> + st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above >>> t2. */ >>> + >>> +#ifdef USE_LASX > If we define macros like follows, we can reduce some codes. > > // An example. > #if defined(USE_LASX) > #define V_REG_S xvst > #define V_REG_L xvld > #define V_SPACE FRAME_SIZE_LASX > #define V_REG(n) $xr##n > #define V_REGS > 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 > #define V_REGSZ SZXREG > #elif defined(USE_LSX) > #define V_REG_S vst > #define V_REG_L vld > #define V_SPACE FRAME_SIZE_LSX > #define V_REG(n) $vr##n > #define V_REGS > 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 > #define V_REGSZ SZVREG > #else > #define V_REG_S fst.d > #define V_REG_L fld.d > #define V_SPACE FRAME_SIZE_FLOAT > #define V_REG(n) $f##n > #define V_REGS > 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23 > #define V_REGSZ SZFREG > #endif > > ADDI sp, sp, -V_SPACE > > cfi_adjust_cfa_offset (V_SPACE) > .irp i,V_REGS > V_REG_S V_REG(\i), sp, \i * V_REGSZ > .endr > > .irp i,V_REGS > V_REG_L V_REG(\i), sp, \i * V_REGSZ > .endr > ADDI sp, sp, V_SPACE > cfi_adjust_cfa_offset (-V_SPACE) > > Thanks. > > Jinyang > >>> + >>> + /* Save 256-bit vector registers. >>> + FIXME: Without vector ABI, save all vector registers. */ >>> + ADDI sp, sp, -FRAME_SIZE_LASX >>> + cfi_adjust_cfa_offset (FRAME_SIZE_LASX) >>> + xvst xr0, sp, 0*SZXREG >>> + xvst xr1, sp, 1*SZXREG >>> + xvst xr2, sp, 2*SZXREG >>> + xvst xr3, sp, 3*SZXREG >>> + xvst xr4, sp, 4*SZXREG >>> + xvst xr5, sp, 5*SZXREG >>> + xvst xr6, sp, 6*SZXREG >>> + xvst xr7, sp, 7*SZXREG >>> + xvst xr8, sp, 8*SZXREG >>> + xvst xr9, sp, 9*SZXREG >>> + xvst xr10, sp, 10*SZXREG >>> + xvst xr11, sp, 11*SZXREG >>> + xvst xr12, sp, 12*SZXREG >>> + xvst xr13, sp, 13*SZXREG >>> + xvst xr14, sp, 14*SZXREG >>> + xvst xr15, sp, 15*SZXREG >>> + xvst xr16, sp, 16*SZXREG >>> + xvst xr17, sp, 17*SZXREG >>> + xvst xr18, sp, 18*SZXREG >>> + xvst xr19, sp, 19*SZXREG >>> + xvst xr20, sp, 20*SZXREG >>> + xvst xr21, sp, 21*SZXREG >>> + xvst xr22, sp, 22*SZXREG >>> + xvst xr23, sp, 23*SZXREG >>> + xvst xr24, sp, 24*SZXREG >>> + xvst xr25, sp, 25*SZXREG >>> + xvst xr26, sp, 26*SZXREG >>> + xvst xr27, sp, 27*SZXREG >>> + xvst xr28, sp, 28*SZXREG >>> + xvst xr29, sp, 29*SZXREG >>> + xvst xr30, sp, 30*SZXREG >>> + xvst xr31, sp, 31*SZXREG >>> + >>> +#elif defined USE_LSX >>> + >>> + /* Save 128-bit vector registers. */ >>> + ADDI sp, sp, -FRAME_SIZE_LSX >>> + cfi_adjust_cfa_offset (FRAME_SIZE_LSX) >>> + vst vr0, sp, 0*SZVREG >>> + vst vr1, sp, 1*SZVREG >>> + vst vr2, sp, 2*SZVREG >>> + vst vr3, sp, 3*SZVREG >>> + vst vr4, sp, 4*SZVREG >>> + vst vr5, sp, 5*SZVREG >>> + vst vr6, sp, 6*SZVREG >>> + vst vr7, sp, 7*SZVREG >>> + vst vr8, sp, 8*SZVREG >>> + vst vr9, sp, 9*SZVREG >>> + vst vr10, sp, 10*SZVREG >>> + vst vr11, sp, 11*SZVREG >>> + vst vr12, sp, 12*SZVREG >>> + vst vr13, sp, 13*SZVREG >>> + vst vr14, sp, 14*SZVREG >>> + vst vr15, sp, 15*SZVREG >>> + vst vr16, sp, 16*SZVREG >>> + vst vr17, sp, 17*SZVREG >>> + vst vr18, sp, 18*SZVREG >>> + vst vr19, sp, 19*SZVREG >>> + vst vr20, sp, 20*SZVREG >>> + vst vr21, sp, 21*SZVREG >>> + vst vr22, sp, 22*SZVREG >>> + vst vr23, sp, 23*SZVREG >>> + vst vr24, sp, 24*SZVREG >>> + vst vr25, sp, 25*SZVREG >>> + vst vr26, sp, 26*SZVREG >>> + vst vr27, sp, 27*SZVREG >>> + vst vr28, sp, 28*SZVREG >>> + vst vr29, sp, 29*SZVREG >>> + vst vr30, sp, 30*SZVREG >>> + vst vr31, sp, 31*SZVREG >>> + >>> +# else >>> + >>> + /* Save float registers. */ >>> + ADDI sp, sp, -FRAME_SIZE_FLOAT >>> + cfi_adjust_cfa_offset (FRAME_SIZE_FLOAT) >>> + FREG_S fa0, sp, 0*SZFREG >>> + FREG_S fa1, sp, 1*SZFREG >>> + FREG_S fa2, sp, 2*SZFREG >>> + FREG_S fa3, sp, 3*SZFREG >>> + FREG_S fa4, sp, 4*SZFREG >>> + FREG_S fa5, sp, 5*SZFREG >>> + FREG_S fa6, sp, 6*SZFREG >>> + FREG_S fa7, sp, 7*SZFREG >>> + FREG_S ft0, sp, 8*SZFREG >>> + FREG_S ft1, sp, 9*SZFREG >>> + FREG_S ft2, sp, 10*SZFREG >>> + FREG_S ft3, sp, 11*SZFREG >>> + FREG_S ft4, sp, 12*SZFREG >>> + FREG_S ft5, sp, 13*SZFREG >>> + FREG_S ft6, sp, 14*SZFREG >>> + FREG_S ft7, sp, 15*SZFREG >>> + FREG_S ft8, sp, 16*SZFREG >>> + FREG_S ft9, sp, 17*SZFREG >>> + FREG_S ft10, sp, 18*SZFREG >>> + FREG_S ft11, sp, 19*SZFREG >>> + FREG_S ft12, sp, 20*SZFREG >>> + FREG_S ft13, sp, 21*SZFREG >>> + FREG_S ft14, sp, 22*SZFREG >>> + FREG_S ft15, sp, 23*SZFREG >>> + >>> +#endif /* #ifdef USE_LASX */ >>> +#endif /* #ifndef __loongarch_soft_float */ >>> + >>> + bl HIDDEN_JUMPTARGET(__tls_get_addr) >>> + ADDI a0, a0, -TLS_DTV_OFFSET >>> + >>> +#ifndef __loongarch_soft_float >>> +#ifdef USE_LASX >>> + >>> + /* Restore 256-bit vector registers. */ >>> + xvld xr0, sp, 0*SZXREG >>> + xvld xr1, sp, 1*SZXREG >>> + xvld xr2, sp, 2*SZXREG >>> + xvld xr3, sp, 3*SZXREG >>> + xvld xr4, sp, 4*SZXREG >>> + xvld xr5, sp, 5*SZXREG >>> + xvld xr6, sp, 6*SZXREG >>> + xvld xr7, sp, 7*SZXREG >>> + xvld xr8, sp, 8*SZXREG >>> + xvld xr9, sp, 9*SZXREG >>> + xvld xr10, sp, 10*SZXREG >>> + xvld xr11, sp, 11*SZXREG >>> + xvld xr12, sp, 12*SZXREG >>> + xvld xr13, sp, 13*SZXREG >>> + xvld xr14, sp, 14*SZXREG >>> + xvld xr15, sp, 15*SZXREG >>> + xvld xr16, sp, 16*SZXREG >>> + xvld xr17, sp, 17*SZXREG >>> + xvld xr18, sp, 18*SZXREG >>> + xvld xr19, sp, 19*SZXREG >>> + xvld xr20, sp, 20*SZXREG >>> + xvld xr21, sp, 21*SZXREG >>> + xvld xr22, sp, 22*SZXREG >>> + xvld xr23, sp, 23*SZXREG >>> + xvld xr24, sp, 24*SZXREG >>> + xvld xr25, sp, 25*SZXREG >>> + xvld xr26, sp, 26*SZXREG >>> + xvld xr27, sp, 27*SZXREG >>> + xvld xr28, sp, 28*SZXREG >>> + xvld xr29, sp, 29*SZXREG >>> + xvld xr30, sp, 30*SZXREG >>> + xvld xr31, sp, 31*SZXREG >>> + ADDI sp, sp, FRAME_SIZE_LASX >>> + cfi_adjust_cfa_offset (-FRAME_SIZE_LASX) >>> + >>> +#elif defined USE_LSX >>> + >>> + /* Restore 128-bit vector registers. */ >>> + vld vr0, sp, 0*SZVREG >>> + vld vr1, sp, 1*SZVREG >>> + vld vr2, sp, 2*SZVREG >>> + vld vr3, sp, 3*SZVREG >>> + vld vr4, sp, 4*SZVREG >>> + vld vr5, sp, 5*SZVREG >>> + vld vr6, sp, 6*SZVREG >>> + vld vr7, sp, 7*SZVREG >>> + vld vr8, sp, 8*SZVREG >>> + vld vr9, sp, 9*SZVREG >>> + vld vr10, sp, 10*SZVREG >>> + vld vr11, sp, 11*SZVREG >>> + vld vr12, sp, 12*SZVREG >>> + vld vr13, sp, 13*SZVREG >>> + vld vr14, sp, 14*SZVREG >>> + vld vr15, sp, 15*SZVREG >>> + vld vr16, sp, 16*SZVREG >>> + vld vr17, sp, 17*SZVREG >>> + vld vr18, sp, 18*SZVREG >>> + vld vr19, sp, 19*SZVREG >>> + vld vr20, sp, 20*SZVREG >>> + vld vr21, sp, 21*SZVREG >>> + vld vr22, sp, 22*SZVREG >>> + vld vr23, sp, 23*SZVREG >>> + vld vr24, sp, 24*SZVREG >>> + vld vr25, sp, 25*SZVREG >>> + vld vr26, sp, 26*SZVREG >>> + vld vr27, sp, 27*SZVREG >>> + vld vr28, sp, 28*SZVREG >>> + vld vr29, sp, 29*SZVREG >>> + vld vr30, sp, 30*SZVREG >>> + vld vr31, sp, 31*SZVREG >>> + ADDI sp, sp, FRAME_SIZE_LSX >>> + cfi_adjust_cfa_offset (-FRAME_SIZE_LSX) >>> + >>> +#else >>> + >>> + /* Restore float registers. */ >>> + FREG_L fa0, sp, 0*SZFREG >>> + FREG_L fa1, sp, 1*SZFREG >>> + FREG_L fa2, sp, 2*SZFREG >>> + FREG_L fa3, sp, 3*SZFREG >>> + FREG_L fa4, sp, 4*SZFREG >>> + FREG_L fa5, sp, 5*SZFREG >>> + FREG_L fa6, sp, 6*SZFREG >>> + FREG_L fa7, sp, 7*SZFREG >>> + FREG_L ft0, sp, 8*SZFREG >>> + FREG_L ft1, sp, 9*SZFREG >>> + FREG_L ft2, sp, 10*SZFREG >>> + FREG_L ft3, sp, 11*SZFREG >>> + FREG_L ft4, sp, 12*SZFREG >>> + FREG_L ft5, sp, 13*SZFREG >>> + FREG_L ft6, sp, 14*SZFREG >>> + FREG_L ft7, sp, 15*SZFREG >>> + FREG_L ft8, sp, 16*SZFREG >>> + FREG_L ft9, sp, 17*SZFREG >>> + FREG_L ft10, sp, 18*SZFREG >>> + FREG_L ft11, sp, 19*SZFREG >>> + FREG_L ft12, sp, 20*SZFREG >>> + FREG_L ft13, sp, 21*SZFREG >>> + FREG_L ft14, sp, 22*SZFREG >>> + FREG_L ft15, sp, 23*SZFREG >>> + ADDI sp, sp, FRAME_SIZE_FLOAT >>> + cfi_adjust_cfa_offset (-FRAME_SIZE_FLOAT) >>> + >>> +#endif /* #ifdef USE_LASX */ >>> + >>> + /* Restore fcsr0 register. */ >>> + ld.w t0, sp, FRAME_SIZE + 24 >>> + movgr2fcsr fcsr0, t0 >>> + >>> +#endif /* #ifndef __loongarch_soft_float */ >>> + >>> + REG_L ra, sp, 0 * SZREG >>> + REG_L a1, sp, 1 * SZREG >>> + REG_L a2, sp, 2 * SZREG >>> + REG_L a3, sp, 3 * SZREG >>> + REG_L a4, sp, 4 * SZREG >>> + REG_L a5, sp, 5 * SZREG >>> + REG_L a6, sp, 6 * SZREG >>> + REG_L a7, sp, 7 * SZREG >>> + REG_L t3, sp, 8 * SZREG >>> + REG_L t4, sp, 9 * SZREG >>> + REG_L t5, sp, 10 * SZREG >>> + REG_L t6, sp, 11 * SZREG >>> + REG_L t7, sp, 12 * SZREG >>> + REG_L t8, sp, 13 * SZREG >>> + ADDI sp, sp, FRAME_SIZE >>> + cfi_adjust_cfa_offset (-FRAME_SIZE) >>> + >>> + b .Lret >>> + cfi_endproc >>> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic >>> + .hidden HIDDEN_JUMPTARGET(__tls_get_addr) >>> diff --git a/sysdeps/loongarch/dl-tlsdesc.S >>> b/sysdeps/loongarch/dl-tlsdesc.S >>> index a6627cc754..b6cfd6121d 100644 >>> --- a/sysdeps/loongarch/dl-tlsdesc.S >>> +++ b/sysdeps/loongarch/dl-tlsdesc.S >>> @@ -59,376 +59,34 @@ _dl_tlsdesc_undefweak: >>> cfi_endproc >>> .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak >>> - >>> #ifdef SHARED >>> -#define FRAME_SIZE (-((-14 * SZREG) & ALMASK)) >>> -#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) >>> -#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) >>> -#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) >>> - >>> - /* Handler for dynamic TLS symbols. >>> - Prototype: >>> - _dl_tlsdesc_dynamic (tlsdesc *) ; >>> - >>> - The second word of the descriptor points to a >>> - tlsdesc_dynamic_arg structure. >>> - >>> - Returns the offset between the thread pointer and the >>> - object referenced by the argument. >>> - >>> - ptrdiff_t >>> - _dl_tlsdesc_dynamic (struct tlsdesc *tdp) >>> - { >>> - struct tlsdesc_dynamic_arg *td = tdp->arg; >>> - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - >>> SIZE_OF_TCB); >>> - if (__glibc_likely (td->gen_count <= dtv[0].counter >>> - && (dtv[td->tlsinfo.ti_module].pointer.val >>> - != TLS_DTV_UNALLOCATED), >>> - 1)) >>> - return dtv[td->tlsinfo.ti_module].pointer.val >>> - + td->tlsinfo.ti_offset >>> - - __thread_pointer; >>> - >>> - return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; >>> - } */ >>> - .hidden _dl_tlsdesc_dynamic >>> - .global _dl_tlsdesc_dynamic >>> - .type _dl_tlsdesc_dynamic,%function >>> - cfi_startproc >>> - .align 2 >>> -_dl_tlsdesc_dynamic: >>> - /* Save just enough registers to support fast path, if we fall >>> - into slow path we will save additional registers. */ >>> - ADDI sp, sp, -32 >>> - REG_S t0, sp, 0 >>> - REG_S t1, sp, 8 >>> - REG_S t2, sp, 16 >>> - >>> -/* Runtime Storage Layout of Thread-Local Storage >>> - TP point to the start of TLS block. >>> - >>> - dtv >>> -Low address TCB ----------------> dtv0(counter) >>> - TP --> static_block0 <----- dtv1 >>> - static_block1 <----- dtv2 >>> - static_block2 <----- dtv3 >>> - dynamic_block0 <----- dtv4 >>> -Hign address dynamic_block1 <----- dtv5 */ >>> - >>> - REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */ >>> - REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */ >>> - REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ >>> - REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ >>> - /* If dtv[0].counter < td->gen_count, goto slow path. */ >>> - bltu t2, t1, .Lslow >>> - >>> - REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ >>> - /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */ >>> - slli.d t1, t1, 4 >>> - add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */ >>> - REG_L t1, t1, 0 /* t1 = >>> dtv[td->tlsinfo.ti_module].pointer.val */ >>> - li.d t2, TLS_DTV_UNALLOCATED >>> - /* If dtv[td->tlsinfo.ti_module].pointer.val is >>> TLS_DTV_UNALLOCATED, >>> - goto slow path. */ >>> - beq t1, t2, .Lslow >>> - >>> - REG_L t2, a0, TLSDESC_MODOFF /* t2 = >>> td->tlsinfo.ti_offset */ >>> - /* dtv[td->tlsinfo.ti_module].pointer.val + >>> td->tlsinfo.ti_offset */ >>> - add.d a0, t1, t2 >>> -.Lret: >>> - sub.d a0, a0, tp >>> - REG_L t0, sp, 0 >>> - REG_L t1, sp, 8 >>> - REG_L t2, sp, 16 >>> - ADDI sp, sp, 32 >>> - RET >>> - >>> -.Lslow: >>> - /* This is the slow path. We need to call __tls_get_addr() which >>> - means we need to save and restore all the register that the >>> - callee will trash. */ >>> - >>> - /* Save the remaining registers that we must treat as caller >>> save. */ >>> - ADDI sp, sp, -FRAME_SIZE >>> - REG_S ra, sp, 0 * SZREG >>> - REG_S a1, sp, 1 * SZREG >>> - REG_S a2, sp, 2 * SZREG >>> - REG_S a3, sp, 3 * SZREG >>> - REG_S a4, sp, 4 * SZREG >>> - REG_S a5, sp, 5 * SZREG >>> - REG_S a6, sp, 6 * SZREG >>> - REG_S a7, sp, 7 * SZREG >>> - REG_S t3, sp, 8 * SZREG >>> - REG_S t4, sp, 9 * SZREG >>> - REG_S t5, sp, 10 * SZREG >>> - REG_S t6, sp, 11 * SZREG >>> - REG_S t7, sp, 12 * SZREG >>> - REG_S t8, sp, 13 * SZREG >>> - >>> #ifndef __loongarch_soft_float >>> - /* Save fcsr0 register. >>> - Only one physical fcsr0 register, fcsr1-fcsr3 are aliases >>> - of some fields in fcsr0. */ >>> - movfcsr2gr t0, fcsr0 >>> - st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */ >>> - >>> - /* Whether support LASX. */ >>> - la.global t0, _rtld_global_ro >>> - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET >>> - andi t1, t0, HWCAP_LOONGARCH_LASX >>> - beqz t1, .Llsx >>> - >>> - /* Save 256-bit vector registers. >>> - FIXME: Without vector ABI, save all vector registers. */ >>> - ADDI sp, sp, -FRAME_SIZE_LASX >>> - xvst xr0, sp, 0*SZXREG >>> - xvst xr1, sp, 1*SZXREG >>> - xvst xr2, sp, 2*SZXREG >>> - xvst xr3, sp, 3*SZXREG >>> - xvst xr4, sp, 4*SZXREG >>> - xvst xr5, sp, 5*SZXREG >>> - xvst xr6, sp, 6*SZXREG >>> - xvst xr7, sp, 7*SZXREG >>> - xvst xr8, sp, 8*SZXREG >>> - xvst xr9, sp, 9*SZXREG >>> - xvst xr10, sp, 10*SZXREG >>> - xvst xr11, sp, 11*SZXREG >>> - xvst xr12, sp, 12*SZXREG >>> - xvst xr13, sp, 13*SZXREG >>> - xvst xr14, sp, 14*SZXREG >>> - xvst xr15, sp, 15*SZXREG >>> - xvst xr16, sp, 16*SZXREG >>> - xvst xr17, sp, 17*SZXREG >>> - xvst xr18, sp, 18*SZXREG >>> - xvst xr19, sp, 19*SZXREG >>> - xvst xr20, sp, 20*SZXREG >>> - xvst xr21, sp, 21*SZXREG >>> - xvst xr22, sp, 22*SZXREG >>> - xvst xr23, sp, 23*SZXREG >>> - xvst xr24, sp, 24*SZXREG >>> - xvst xr25, sp, 25*SZXREG >>> - xvst xr26, sp, 26*SZXREG >>> - xvst xr27, sp, 27*SZXREG >>> - xvst xr28, sp, 28*SZXREG >>> - xvst xr29, sp, 29*SZXREG >>> - xvst xr30, sp, 30*SZXREG >>> - xvst xr31, sp, 31*SZXREG >>> - b .Ltga >>> - >>> -.Llsx: >>> - /* Whether support LSX. */ >>> - andi t1, t0, HWCAP_LOONGARCH_LSX >>> - beqz t1, .Lfloat >>> - >>> - /* Save 128-bit vector registers. */ >>> - ADDI sp, sp, -FRAME_SIZE_LSX >>> - vst vr0, sp, 0*SZVREG >>> - vst vr1, sp, 1*SZVREG >>> - vst vr2, sp, 2*SZVREG >>> - vst vr3, sp, 3*SZVREG >>> - vst vr4, sp, 4*SZVREG >>> - vst vr5, sp, 5*SZVREG >>> - vst vr6, sp, 6*SZVREG >>> - vst vr7, sp, 7*SZVREG >>> - vst vr8, sp, 8*SZVREG >>> - vst vr9, sp, 9*SZVREG >>> - vst vr10, sp, 10*SZVREG >>> - vst vr11, sp, 11*SZVREG >>> - vst vr12, sp, 12*SZVREG >>> - vst vr13, sp, 13*SZVREG >>> - vst vr14, sp, 14*SZVREG >>> - vst vr15, sp, 15*SZVREG >>> - vst vr16, sp, 16*SZVREG >>> - vst vr17, sp, 17*SZVREG >>> - vst vr18, sp, 18*SZVREG >>> - vst vr19, sp, 19*SZVREG >>> - vst vr20, sp, 20*SZVREG >>> - vst vr21, sp, 21*SZVREG >>> - vst vr22, sp, 22*SZVREG >>> - vst vr23, sp, 23*SZVREG >>> - vst vr24, sp, 24*SZVREG >>> - vst vr25, sp, 25*SZVREG >>> - vst vr26, sp, 26*SZVREG >>> - vst vr27, sp, 27*SZVREG >>> - vst vr28, sp, 28*SZVREG >>> - vst vr29, sp, 29*SZVREG >>> - vst vr30, sp, 30*SZVREG >>> - vst vr31, sp, 31*SZVREG >>> - b .Ltga >>> - >>> -.Lfloat: >>> - /* Save float registers. */ >>> - ADDI sp, sp, -FRAME_SIZE_FLOAT >>> - FREG_S fa0, sp, 0*SZFREG >>> - FREG_S fa1, sp, 1*SZFREG >>> - FREG_S fa2, sp, 2*SZFREG >>> - FREG_S fa3, sp, 3*SZFREG >>> - FREG_S fa4, sp, 4*SZFREG >>> - FREG_S fa5, sp, 5*SZFREG >>> - FREG_S fa6, sp, 6*SZFREG >>> - FREG_S fa7, sp, 7*SZFREG >>> - FREG_S ft0, sp, 8*SZFREG >>> - FREG_S ft1, sp, 9*SZFREG >>> - FREG_S ft2, sp, 10*SZFREG >>> - FREG_S ft3, sp, 11*SZFREG >>> - FREG_S ft4, sp, 12*SZFREG >>> - FREG_S ft5, sp, 13*SZFREG >>> - FREG_S ft6, sp, 14*SZFREG >>> - FREG_S ft7, sp, 15*SZFREG >>> - FREG_S ft8, sp, 16*SZFREG >>> - FREG_S ft9, sp, 17*SZFREG >>> - FREG_S ft10, sp, 18*SZFREG >>> - FREG_S ft11, sp, 19*SZFREG >>> - FREG_S ft12, sp, 20*SZFREG >>> - FREG_S ft13, sp, 21*SZFREG >>> - FREG_S ft14, sp, 22*SZFREG >>> - FREG_S ft15, sp, 23*SZFREG >>> - >>> -#endif /* #ifndef __loongarch_soft_float */ >>> - >>> -.Ltga: >>> - bl HIDDEN_JUMPTARGET(__tls_get_addr) >>> - ADDI a0, a0, -TLS_DTV_OFFSET >>> - >>> -#ifndef __loongarch_soft_float >>> - >>> - la.global t0, _rtld_global_ro >>> - REG_L t0, t0, GLRO_DL_HWCAP_OFFSET >>> - andi t1, t0, HWCAP_LOONGARCH_LASX >>> - beqz t1, .Llsx1 >>> - >>> - /* Restore 256-bit vector registers. */ >>> - xvld xr0, sp, 0*SZXREG >>> - xvld xr1, sp, 1*SZXREG >>> - xvld xr2, sp, 2*SZXREG >>> - xvld xr3, sp, 3*SZXREG >>> - xvld xr4, sp, 4*SZXREG >>> - xvld xr5, sp, 5*SZXREG >>> - xvld xr6, sp, 6*SZXREG >>> - xvld xr7, sp, 7*SZXREG >>> - xvld xr8, sp, 8*SZXREG >>> - xvld xr9, sp, 9*SZXREG >>> - xvld xr10, sp, 10*SZXREG >>> - xvld xr11, sp, 11*SZXREG >>> - xvld xr12, sp, 12*SZXREG >>> - xvld xr13, sp, 13*SZXREG >>> - xvld xr14, sp, 14*SZXREG >>> - xvld xr15, sp, 15*SZXREG >>> - xvld xr16, sp, 16*SZXREG >>> - xvld xr17, sp, 17*SZXREG >>> - xvld xr18, sp, 18*SZXREG >>> - xvld xr19, sp, 19*SZXREG >>> - xvld xr20, sp, 20*SZXREG >>> - xvld xr21, sp, 21*SZXREG >>> - xvld xr22, sp, 22*SZXREG >>> - xvld xr23, sp, 23*SZXREG >>> - xvld xr24, sp, 24*SZXREG >>> - xvld xr25, sp, 25*SZXREG >>> - xvld xr26, sp, 26*SZXREG >>> - xvld xr27, sp, 27*SZXREG >>> - xvld xr28, sp, 28*SZXREG >>> - xvld xr29, sp, 29*SZXREG >>> - xvld xr30, sp, 30*SZXREG >>> - xvld xr31, sp, 31*SZXREG >>> - ADDI sp, sp, FRAME_SIZE_LASX >>> - b .Lfcsr >>> - >>> -.Llsx1: >>> - andi t1, t0, HWCAP_LOONGARCH_LSX >>> - beqz t1, .Lfloat1 >>> - >>> - /* Restore 128-bit vector registers. */ >>> - vld vr0, sp, 0*SZVREG >>> - vld vr1, sp, 1*SZVREG >>> - vld vr2, sp, 2*SZVREG >>> - vld vr3, sp, 3*SZVREG >>> - vld vr4, sp, 4*SZVREG >>> - vld vr5, sp, 5*SZVREG >>> - vld vr6, sp, 6*SZVREG >>> - vld vr7, sp, 7*SZVREG >>> - vld vr8, sp, 8*SZVREG >>> - vld vr9, sp, 9*SZVREG >>> - vld vr10, sp, 10*SZVREG >>> - vld vr11, sp, 11*SZVREG >>> - vld vr12, sp, 12*SZVREG >>> - vld vr13, sp, 13*SZVREG >>> - vld vr14, sp, 14*SZVREG >>> - vld vr15, sp, 15*SZVREG >>> - vld vr16, sp, 16*SZVREG >>> - vld vr17, sp, 17*SZVREG >>> - vld vr18, sp, 18*SZVREG >>> - vld vr19, sp, 19*SZVREG >>> - vld vr20, sp, 20*SZVREG >>> - vld vr21, sp, 21*SZVREG >>> - vld vr22, sp, 22*SZVREG >>> - vld vr23, sp, 23*SZVREG >>> - vld vr24, sp, 24*SZVREG >>> - vld vr25, sp, 25*SZVREG >>> - vld vr26, sp, 26*SZVREG >>> - vld vr27, sp, 27*SZVREG >>> - vld vr28, sp, 28*SZVREG >>> - vld vr29, sp, 29*SZVREG >>> - vld vr30, sp, 30*SZVREG >>> - vld vr31, sp, 31*SZVREG >>> - ADDI sp, sp, FRAME_SIZE_LSX >>> - b .Lfcsr >>> - >>> -.Lfloat1: >>> - /* Restore float registers. */ >>> - FREG_L fa0, sp, 0*SZFREG >>> - FREG_L fa1, sp, 1*SZFREG >>> - FREG_L fa2, sp, 2*SZFREG >>> - FREG_L fa3, sp, 3*SZFREG >>> - FREG_L fa4, sp, 4*SZFREG >>> - FREG_L fa5, sp, 5*SZFREG >>> - FREG_L fa6, sp, 6*SZFREG >>> - FREG_L fa7, sp, 7*SZFREG >>> - FREG_L ft0, sp, 8*SZFREG >>> - FREG_L ft1, sp, 9*SZFREG >>> - FREG_L ft2, sp, 10*SZFREG >>> - FREG_L ft3, sp, 11*SZFREG >>> - FREG_L ft4, sp, 12*SZFREG >>> - FREG_L ft5, sp, 13*SZFREG >>> - FREG_L ft6, sp, 14*SZFREG >>> - FREG_L ft7, sp, 15*SZFREG >>> - FREG_L ft8, sp, 16*SZFREG >>> - FREG_L ft9, sp, 17*SZFREG >>> - FREG_L ft10, sp, 18*SZFREG >>> - FREG_L ft11, sp, 19*SZFREG >>> - FREG_L ft12, sp, 20*SZFREG >>> - FREG_L ft13, sp, 21*SZFREG >>> - FREG_L ft14, sp, 22*SZFREG >>> - FREG_L ft15, sp, 23*SZFREG >>> - ADDI sp, sp, FRAME_SIZE_FLOAT >>> - >>> -.Lfcsr: >>> - /* Restore fcsr0 register. */ >>> - ld.w t0, sp, FRAME_SIZE + 24 >>> - movgr2fcsr fcsr0, t0 >>> +#define USE_LASX >>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx >>> +#define Lret Lret_lasx >>> +#define Lslow Lslow_lasx >>> +#include "dl-tlsdesc-dynamic.h" >>> +#undef FRAME_SIZE >>> +#undef USE_LASX >>> +#undef _dl_tlsdesc_dynamic >>> +#undef Lret >>> +#undef Lslow >>> + >>> +#define USE_LSX >>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx >>> +#define Lret Lret_lsx >>> +#define Lslow Lslow_lsx >>> +#include "dl-tlsdesc-dynamic.h" >>> +#undef FRAME_SIZE >>> +#undef USE_LSX >>> +#undef _dl_tlsdesc_dynamic >>> +#undef Lret >>> +#undef Lslow >>> #endif /* #ifndef __loongarch_soft_float */ >>> - REG_L ra, sp, 0 * SZREG >>> - REG_L a1, sp, 1 * SZREG >>> - REG_L a2, sp, 2 * SZREG >>> - REG_L a3, sp, 3 * SZREG >>> - REG_L a4, sp, 4 * SZREG >>> - REG_L a5, sp, 5 * SZREG >>> - REG_L a6, sp, 6 * SZREG >>> - REG_L a7, sp, 7 * SZREG >>> - REG_L t3, sp, 8 * SZREG >>> - REG_L t4, sp, 9 * SZREG >>> - REG_L t5, sp, 10 * SZREG >>> - REG_L t6, sp, 11 * SZREG >>> - REG_L t7, sp, 12 * SZREG >>> - REG_L t8, sp, 13 * SZREG >>> - ADDI sp, sp, FRAME_SIZE >>> - >>> - b .Lret >>> - cfi_endproc >>> - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic >>> - .hidden HIDDEN_JUMPTARGET(__tls_get_addr) >>> +#include "dl-tlsdesc-dynamic.h" >>> #endif /* #ifdef SHARED */ >>> diff --git a/sysdeps/loongarch/dl-tlsdesc.h >>> b/sysdeps/loongarch/dl-tlsdesc.h >>> index ff8c69cb93..45c43a5b52 100644 >>> --- a/sysdeps/loongarch/dl-tlsdesc.h >>> +++ b/sysdeps/loongarch/dl-tlsdesc.h >>> @@ -43,6 +43,10 @@ extern ptrdiff_t attribute_hidden >>> _dl_tlsdesc_undefweak (struct tlsdesc *); >>> #ifdef SHARED >>> extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); >>> +#ifndef __loongarch_soft_float >>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct >>> tlsdesc *); >>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct >>> tlsdesc *); >>> +#endif >>> extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct >>> tlsdesc *); >>> #endif ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-07-02 11:48 UTC | newest] Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2024-06-26 6:34 [PATCH v2] LoongArch: Add cfi instructions for _dl_tlsdesc_dynamic mengqinggang 2024-07-01 9:27 ` mengqinggang 2024-07-02 10:44 ` Jinyang He 2024-07-02 11:48 ` mengqinggang
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).