From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-oi1-x22a.google.com (mail-oi1-x22a.google.com [IPv6:2607:f8b0:4864:20::22a]) by sourceware.org (Postfix) with ESMTPS id 59E833858D3C for ; Tue, 12 Sep 2023 13:37:09 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 59E833858D3C Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=linaro.org Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=linaro.org Received: by mail-oi1-x22a.google.com with SMTP id 5614622812f47-3aa1446066aso3865333b6e.1 for ; Tue, 12 Sep 2023 06:37:09 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linaro.org; s=google; t=1694525828; x=1695130628; darn=sourceware.org; h=content-transfer-encoding:in-reply-to:organization:from:references :cc:to:content-language:subject:user-agent:mime-version:date :message-id:from:to:cc:subject:date:message-id:reply-to; bh=YQWnEBzC/ooy2B/yUd++yH6f4s9u/h/z71+ynHhAmhc=; b=usXI+t7/il48wJdeHkGnMaJGp0fXod4mVUw5os6IGz6RKR0g4CrmdFy7b8l6N8ZUHN tFYZRbeF+l5phGZxWXLkX06fvo6hxK/xAVqDw4avxvlvzsIwjTp+IfJ8dnk0tuegHLZX w8svcYZqmkhdhYhcnQlF4nR+vXbbTABqSjLQXgAuRHUt26/ZcswchH7lDu6hMYTb4sI4 mml024TkiUmOwsM7VTRPTOMeD8wMCnbXXRd97T5M6t60feoGl9s84xtqTt0EHlblz1ob ecjvOYuHRqdXmZ1F1o4KmrDpWdb+rCcezZOeU5sksx+nC63D0iyXhSkvkK8tuu/HkfkW Bbdw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1694525828; x=1695130628; h=content-transfer-encoding:in-reply-to:organization:from:references :cc:to:content-language:subject:user-agent:mime-version:date :message-id:x-gm-message-state:from:to:cc:subject:date:message-id :reply-to; bh=YQWnEBzC/ooy2B/yUd++yH6f4s9u/h/z71+ynHhAmhc=; b=CZOTedAq/fQ6wbRIUwypWJ3kpMsukS5h725KCZXv3MteFiVPfpUrtEHVOJg9Y3Sksa ouCEIsQSqeIEF4zgCbQdnff8HKofoGUpSUQ+1r/AFMFKuzQ8jROCXe/JKU60mFvOQgTM s0XV80U1Yg/OmuKCUQYFclEmDO7Qs8j9uJE/ERgRwdcuOvH0PXwZ6UkqKW4yfXbnySOj nZ0NwJj8JwsD27Zp6a1spyg1Bi4N4jF4yaRN6/LanZg2vdL8YywAUi6rB/fz0tRn6Sam DdTs0rA+n4eC1oVutOlH44gcx5n4Uf8aWdxnol315QsWwMIlAV63vL1Rm4YJb8ea3h8q GxsQ== X-Gm-Message-State: AOJu0Yzk/ZeAIQGphvD4n8h9A6In5zkw4MOM35NqxtsZhJ2Zw9igeZWr G5JVZkoLc1UB0iYSfg1WGbndpAwt8PB7/z6ktHGsRg== X-Google-Smtp-Source: AGHT+IFXhwUvUY81Zw6jxgsfTEHq7hVOtQl8kgjTk/E6pLdLfm/bQQmaiBoVGX0RIgfdioUOVOfzuQ== X-Received: by 2002:a05:6808:356:b0:3a7:39f6:3f2c with SMTP id j22-20020a056808035600b003a739f63f2cmr14836590oie.55.1694525828501; Tue, 12 Sep 2023 06:37:08 -0700 (PDT) Received: from ?IPV6:2804:1b3:a7c0:91cb:1977:7e4f:e638:7fad? ([2804:1b3:a7c0:91cb:1977:7e4f:e638:7fad]) by smtp.gmail.com with ESMTPSA id es12-20020a056808278c00b003a8560a9d34sm4174847oib.25.2023.09.12.06.37.06 (version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128); Tue, 12 Sep 2023 06:37:07 -0700 (PDT) Message-ID: <8e96c9e5-2e50-3f06-2c4d-24838dc177a5@linaro.org> Date: Tue, 12 Sep 2023 10:37:04 -0300 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:102.0) Gecko/20100101 Thunderbird/102.15.0 Subject: Re: [PATCH] LoongArch: Add lasx/lsx support for _dl_runtime_profile. Content-Language: en-US To: caiyinyu , libc-alpha@sourceware.org Cc: xry111@xry111.site References: <20230911103401.2485168-1-caiyinyu@loongson.cn> From: Adhemerval Zanella Netto Organization: Linaro In-Reply-To: <20230911103401.2485168-1-caiyinyu@loongson.cn> Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit X-Spam-Status: No, score=-13.0 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,NICE_REPLY_A,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: I think this characterize as a ABI break, so it should follow what aarch64 did when we fixed BZ#26643 (ce9a68c57c260c8417afc93972849ac9ad243ec4) and bump LAV_CURRENT (it was not done on this specific commit, but rather on 32612615c58b394c3eb09f020f31310797ad3854 to fix BZ #23734). So add a loongarch link_lavcurrent.h with value of 3 for !__loongarch_soft_float. On 11/09/23 07:34, caiyinyu wrote: > --- > sysdeps/loongarch/bits/link.h | 24 ++- > sysdeps/loongarch/dl-link.sym | 8 +- > sysdeps/loongarch/dl-machine.h | 11 +- > sysdeps/loongarch/dl-trampoline.S | 175 +-------------------- > sysdeps/loongarch/dl-trampoline.h | 242 ++++++++++++++++++++++++++++++ > 5 files changed, 283 insertions(+), 177 deletions(-) > > diff --git a/sysdeps/loongarch/bits/link.h b/sysdeps/loongarch/bits/link.h > index 7fa6131280..00f6f25f2d 100644 > --- a/sysdeps/loongarch/bits/link.h > +++ b/sysdeps/loongarch/bits/link.h > @@ -20,10 +20,26 @@ > #error "Never include directly; use instead." > #endif > > +#ifndef __loongarch_soft_float > +typedef float La_loongarch_vr > + __attribute__ ((__vector_size__ (16), __aligned__ (16))); > +typedef float La_loongarch_xr > + __attribute__ ((__vector_size__ (32), __aligned__ (16))); > + > +typedef union > +{ > + double fpreg[4]; > + La_loongarch_vr vr[2]; > + La_loongarch_xr xr[1]; > +} La_loongarch_vector __attribute__ ((__aligned__ (16))); > +#endif > + > typedef struct La_loongarch_regs > { > unsigned long int lr_reg[8]; /* a0 - a7 */ > - double lr_fpreg[8]; /* fa0 - fa7 */ > +#ifndef __loongarch_soft_float > + La_loongarch_vector lr_vec[8]; /* fa0 - fa7 or vr0 - vr7 or xr0 - xr7*/ > +#endif > unsigned long int lr_ra; > unsigned long int lr_sp; > } La_loongarch_regs; > @@ -33,8 +49,10 @@ typedef struct La_loongarch_retval > { > unsigned long int lrv_a0; > unsigned long int lrv_a1; > - double lrv_fa0; > - double lrv_fa1; > +#ifndef __loongarch_soft_float > + La_loongarch_vector lrv_vec0; > + La_loongarch_vector lrv_vec1; > +#endif > } La_loongarch_retval; > > __BEGIN_DECLS > diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym > index 868ab7c6eb..b534968e30 100644 > --- a/sysdeps/loongarch/dl-link.sym > +++ b/sysdeps/loongarch/dl-link.sym > @@ -6,9 +6,13 @@ DL_SIZEOF_RG sizeof(struct La_loongarch_regs) > DL_SIZEOF_RV sizeof(struct La_loongarch_retval) > > DL_OFFSET_RG_A0 offsetof(struct La_loongarch_regs, lr_reg) > -DL_OFFSET_RG_FA0 offsetof(struct La_loongarch_regs, lr_fpreg) > +#ifndef __loongarch_soft_float > +DL_OFFSET_RG_VEC0 offsetof(struct La_loongarch_regs, lr_vec) > +#endif > DL_OFFSET_RG_RA offsetof(struct La_loongarch_regs, lr_ra) > DL_OFFSET_RG_SP offsetof(struct La_loongarch_regs, lr_sp) > > DL_OFFSET_RV_A0 offsetof(struct La_loongarch_retval, lrv_a0) > -DL_OFFSET_RV_FA0 offsetof(struct La_loongarch_retval, lrv_a1) > +#ifndef __loongarch_soft_float > +DL_OFFSET_RV_VEC0 offsetof(struct La_loongarch_retval, lrv_vec0) > +#endif > diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h > index 066bb233ac..8a2db9de3c 100644 > --- a/sysdeps/loongarch/dl-machine.h > +++ b/sysdeps/loongarch/dl-machine.h > @@ -273,6 +273,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], > #if !defined __loongarch_soft_float > extern void _dl_runtime_resolve_lasx (void) attribute_hidden; > extern void _dl_runtime_resolve_lsx (void) attribute_hidden; > + extern void _dl_runtime_profile_lasx (void) attribute_hidden; > + extern void _dl_runtime_profile_lsx (void) attribute_hidden; > #endif > extern void _dl_runtime_resolve (void) attribute_hidden; > extern void _dl_runtime_profile (void) attribute_hidden; > @@ -287,7 +289,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], > end in this function. */ > if (profile != 0) > { > - gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile; > +#if !defined __loongarch_soft_float > + if (SUPPORT_LASX) > + gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx; > + else if (SUPPORT_LSX) > + gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx; > + else > +#endif > + gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile; > > if (GLRO(dl_profile) != NULL > && _dl_name_match_p (GLRO(dl_profile), l)) > diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S > index 8fd9146978..dce1c2f122 100644 > --- a/sysdeps/loongarch/dl-trampoline.S > +++ b/sysdeps/loongarch/dl-trampoline.S > @@ -22,190 +22,23 @@ > #if !defined __loongarch_soft_float > #define USE_LASX > #define _dl_runtime_resolve _dl_runtime_resolve_lasx > +#define _dl_runtime_profile _dl_runtime_profile_lasx > #include "dl-trampoline.h" > #undef FRAME_SIZE > #undef USE_LASX > #undef _dl_runtime_resolve > +#undef _dl_runtime_profile > > #define USE_LSX > #define _dl_runtime_resolve _dl_runtime_resolve_lsx > +#define _dl_runtime_profile _dl_runtime_profile_lsx > #include "dl-trampoline.h" > #undef FRAME_SIZE > #undef USE_LSX > #undef _dl_runtime_resolve > +#undef _dl_runtime_profile > #endif > > #include "dl-trampoline.h" > > -#include "dl-link.h" > > -ENTRY (_dl_runtime_profile) > - /* LoongArch we get called with: > - t0 linkr_map pointer > - t1 the scaled offset stored in t0, which can be used > - to calculate the offset of the current symbol in .rela.plt > - t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function > - t3 dl resolver entry point, no use in this function > - > - Stack frame layout: > - [sp, #96] La_loongarch_regs > - [sp, #48] La_loongarch_retval > - [sp, #40] frame size return from pltenter > - [sp, #32] dl_profile_call saved a1 > - [sp, #24] dl_profile_call saved a0 > - [sp, #16] T1 > - [sp, #0] ra, fp <- fp > - */ > - > -# define OFFSET_T1 16 > -# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8 > -# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 > -# define OFFSET_RV OFFSET_FS + 8 > -# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV > - > -# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK)) > - > - /* Save arguments to stack. */ > - ADDI sp, sp, -SF_SIZE > - REG_S ra, sp, 0 > - REG_S fp, sp, 8 > - > - or fp, sp, zero > - > - REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG > - REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG > - REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG > - REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG > - REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG > - REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG > - REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG > - REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG > - > -#ifndef __loongarch_soft_float > - FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG > - FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG > - FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG > - FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG > - FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG > - FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG > - FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG > - FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG > -#endif > - > - /* Update .got.plt and obtain runtime address of callee. */ > - SLLI a1, t1, 1 > - or a0, t0, zero > - ADD a1, a1, t1 > - or a2, ra, zero /* return addr */ > - ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */ > - ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */ > - > - REG_S a0, fp, OFFSET_SAVED_CALL_A0 > - REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG > - > - la t2, _dl_profile_fixup > - jirl ra, t2, 0 > - > - REG_L t3, fp, OFFSET_FS > - bge t3, zero, 1f > - > - /* Save the return. */ > - or t4, v0, zero > - > - /* Restore arguments from stack. */ > - REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG > - REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG > - REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG > - REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG > - REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG > - REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG > - REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG > - REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG > - > -#ifndef __loongarch_soft_float > - FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG > - FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG > - FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG > - FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG > - FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG > - FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG > - FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG > - FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG > -#endif > - > - REG_L ra, fp, 0 > - REG_L fp, fp, SZREG > - > - ADDI sp, sp, SF_SIZE > - jirl zero, t4, 0 > - > -1: > - /* The new frame size is in t3. */ > - SUB sp, fp, t3 > - BSTRINS sp, zero, 3, 0 > - > - REG_S a0, fp, OFFSET_T1 > - > - or a0, sp, zero > - ADDI a1, fp, SF_SIZE > - or a2, t3, zero > - la t5, memcpy > - jirl ra, t5, 0 > - > - REG_L t6, fp, OFFSET_T1 > - > - /* Call the function. */ > - REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG > - REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG > - REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG > - REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG > - REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG > - REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG > - REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG > - REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG > - > -#ifndef __loongarch_soft_float > - FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG > - FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG > - FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG > - FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG > - FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG > - FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG > - FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG > - FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG > -#endif > - jirl ra, t6, 0 > - > - REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 > - REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG > - > -#ifndef __loongarch_soft_float > - FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 > - FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG > -#endif > - > - /* Setup call to pltexit. */ > - REG_L a0, fp, OFFSET_SAVED_CALL_A0 > - REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG > - ADDI a2, fp, OFFSET_RG > - ADDI a3, fp, OFFSET_RV > - la t7, _dl_audit_pltexit > - jirl ra, t7, 0 > - > - REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0 > - REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG > - > -#ifndef __loongarch_soft_float > - FREG_L fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0 > - FREG_L fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG > -#endif > - > - /* RA from within La_loongarch_reg. */ > - REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA > - or sp, fp, zero > - ADDI sp, sp, SF_SIZE > - REG_S fp, fp, SZREG > - > - jirl zero, ra, 0 > - > -END (_dl_runtime_profile) > diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h > index 02375286f8..cb4a287c65 100644 > --- a/sysdeps/loongarch/dl-trampoline.h > +++ b/sysdeps/loongarch/dl-trampoline.h > @@ -125,3 +125,245 @@ ENTRY (_dl_runtime_resolve) > /* Invoke the callee. */ > jirl zero, t1, 0 > END (_dl_runtime_resolve) > + > +#include "dl-link.h" > + > +ENTRY (_dl_runtime_profile) > + /* LoongArch we get called with: > + t0 linkr_map pointer > + t1 the scaled offset stored in t0, which can be used > + to calculate the offset of the current symbol in .rela.plt > + t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function > + t3 dl resolver entry point, no use in this function > + > + Stack frame layout: > + [sp, #208] La_loongarch_regs > + [sp, #128] La_loongarch_retval // align: 16 > + [sp, #112] frame size return from pltenter > + [sp, #80 ] dl_profile_call saved vec1 > + [sp, #48 ] dl_profile_call saved vec0 // align: 16 > + [sp, #32 ] dl_profile_call saved a1 > + [sp, #24 ] dl_profile_call saved a0 > + [sp, #16 ] T1 > + [sp, #0 ] ra, fp <- fp > + */ > + > +# define OFFSET_T1 16 > +# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8 > +# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 + 8 + 64 > +# define OFFSET_RV OFFSET_FS + 8 + 8 > +# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV > + > +# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK)) > + > + /* Save arguments to stack. */ > + ADDI sp, sp, -SF_SIZE > + REG_S ra, sp, 0 > + REG_S fp, sp, 8 > + > + or fp, sp, zero > + > + REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG > + REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG > + REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG > + REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG > + REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG > + REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG > + REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG > + REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG > + > +#ifdef USE_LASX > + xvst xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG > + xvst xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG > + xvst xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG > + xvst xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG > + xvst xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG > + xvst xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG > + xvst xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG > + xvst xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG > +#elif defined USE_LSX > + vst vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG > + vst vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG > + vst vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG > + vst vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG > + vst vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG > + vst vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG > + vst vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG > + vst vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG > +#elif !defined __loongarch_soft_float > + FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG > + FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG > + FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG > + FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG > + FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG > + FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG > + FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG > + FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG > +#endif > + > + /* Update .got.plt and obtain runtime address of callee. */ > + SLLI a1, t1, 1 > + or a0, t0, zero > + ADD a1, a1, t1 > + or a2, ra, zero /* return addr */ > + ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */ > + ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */ > + > + REG_S a0, fp, OFFSET_SAVED_CALL_A0 > + REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG > + > + la t2, _dl_profile_fixup > + jirl ra, t2, 0 > + > + REG_L t3, fp, OFFSET_FS > + bge t3, zero, 1f > + > + /* Save the return. */ > + or t4, v0, zero > + > + /* Restore arguments from stack. */ > + REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG > + REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG > + REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG > + REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG > + REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG > + REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG > + REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG > + REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG > + > +#ifdef USE_LASX > + xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG > + xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG > + xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG > + xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG > + xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG > + xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG > + xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG > + xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG > +#elif defined USE_LSX > + vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG > + vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG > + vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG > + vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG > + vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG > + vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG > + vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG > + vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG > +#elif !defined __loongarch_soft_float > + FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG > + FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG > + FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG > + FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG > + FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG > + FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG > + FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG > + FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG > +#endif > + > + REG_L ra, fp, 0 > + REG_L fp, fp, SZREG > + > + ADDI sp, sp, SF_SIZE > + jirl zero, t4, 0 > + > +1: > + /* The new frame size is in t3. */ > + SUB sp, fp, t3 > + BSTRINS sp, zero, 3, 0 > + > + REG_S a0, fp, OFFSET_T1 > + > + or a0, sp, zero > + ADDI a1, fp, SF_SIZE > + or a2, t3, zero > + la t5, memcpy > + jirl ra, t5, 0 > + > + REG_L t6, fp, OFFSET_T1 > + > + /* Call the function. */ > + REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG > + REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG > + REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG > + REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG > + REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG > + REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG > + REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG > + REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG > + > +#ifdef USE_LASX > + xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG > + xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG > + xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG > + xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG > + xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG > + xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG > + xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG > + xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG > +#elif defined USE_LSX > + vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG > + vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG > + vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG > + vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG > + vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG > + vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG > + vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG > + vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG > +#elif !defined __loongarch_soft_float > + FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG > + FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG > + FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG > + FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG > + FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG > + FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG > + FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG > + FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG > +#endif > + > + jirl ra, t6, 0 > + > + REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 > + REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG > + > +#ifdef USE_LASX > + xvst xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 > + xvst xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG > +#elif defined USE_LSX > + vst vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 > + vst vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG > +#elif !defined __loongarch_soft_float > + FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 > + FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG > +#endif > + > + /* Setup call to pltexit. */ > + REG_L a0, fp, OFFSET_SAVED_CALL_A0 > + REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG > + ADDI a2, fp, OFFSET_RG > + ADDI a3, fp, OFFSET_RV > + la t7, _dl_audit_pltexit > + jirl ra, t7, 0 > + > + REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0 > + REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG > + > +#ifdef USE_LASX > + xvld xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 > + xvld xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG > +#elif defined USE_LSX > + vld vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 > + vld vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG > +#elif !defined __loongarch_soft_float > + FREG_L fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 > + FREG_L fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG > +#endif > + > + /* RA from within La_loongarch_reg. */ > + REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA > + or sp, fp, zero > + ADDI sp, sp, SF_SIZE > + REG_S fp, fp, SZREG > + > + jirl zero, ra, 0 > + > +END (_dl_runtime_profile)