From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1062) id 6176C3856DE0; Mon, 11 Apr 2022 01:30:30 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 6176C3856DE0 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Alan Modra To: glibc-cvs@sourceware.org Subject: [glibc] powerpc64: Use medium model toc accesses throughout X-Act-Checkin: glibc X-Git-Author: Alan Modra X-Git-Refname: refs/heads/master X-Git-Oldrev: 30afd8c44d6a0a8b0eddbadecb02c9b9dad3facf X-Git-Newrev: d6efcc118e406a1cfeb309f835d7118df53419bb Message-Id: <20220411013030.6176C3856DE0@sourceware.org> Date: Mon, 11 Apr 2022 01:30:30 +0000 (GMT) X-BeenThere: glibc-cvs@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Glibc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 11 Apr 2022 01:30:30 -0000 https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=d6efcc118e406a1cfeb309f835d7118df53419bb commit d6efcc118e406a1cfeb309f835d7118df53419bb Author: Alan Modra Date: Sat Jan 22 11:48:50 2022 +1030 powerpc64: Use medium model toc accesses throughout The PowerPC64 linker edits medium model toc-indirect code to toc-pointer relative: addis r9,r2,tc_entry_for_var@toc@ha ld r9,tc_entry_for_var@toc@l(r9) becomes addis r9,r2,(var-.TOC.)@ha addi r9,r9,(var-.TOC.)@l when "var" is known to be local to the binary. This isn't done for small-model toc-indirect code, because "var" is almost guaranteed to be too far away from .TOC. for a 16-bit signed offset. And, because the analysis of which .toc entry can be removed becomes much more complicated in objects that mix code models, they aren't removed if any small-model toc sequence appears in an object file. Unfortunately, glibc's build of ld.so smashes the needed objects together in a ld -r linking stage. This means the GOT/TOC is left with a whole lot of relative relocations which is untidy, but in itself is not a serious problem. However, static-pie on powerpc64 bombs due to a segfault caused by one of the small-model accesses before _dl_relocate_static_pie. (The very first one in rcrt1.o passing start_addresses in r8 to __libc_start_main.) So this patch makes all the toc/got accesses in assembly medium code model, and a couple of functions hidden. By itself this is not enough to give us working static-pie, but it is useful in isolation to enable better linker optimisation. There's a serious problem in libgcc too. libgcc ifuncs access the AT_HWCAP words stored in the tcb with an offset from the thread pointer (r13), but r13 isn't set at the time _dl_relocate_static_pie. A followup patch will fix that. Reviewed-by: Tulio Magno Quites Machado Filho Diff: --- sysdeps/powerpc/powerpc64/__longjmp-common.S | 8 +++++--- sysdeps/powerpc/powerpc64/dl-machine.h | 12 ++++++++---- sysdeps/powerpc/powerpc64/dl-trampoline.S | 8 ++++++-- sysdeps/powerpc/powerpc64/setjmp-common.S | 8 +++++--- sysdeps/powerpc/powerpc64/start.S | 3 ++- sysdeps/powerpc/powerpc64/sysdep.h | 6 ++++-- 6 files changed, 30 insertions(+), 15 deletions(-) diff --git a/sysdeps/powerpc/powerpc64/__longjmp-common.S b/sysdeps/powerpc/powerpc64/__longjmp-common.S index 4d71b9e102..5f629e1e0f 100644 --- a/sysdeps/powerpc/powerpc64/__longjmp-common.S +++ b/sysdeps/powerpc/powerpc64/__longjmp-common.S @@ -47,12 +47,14 @@ ENTRY (__longjmp) CALL_MCOUNT 2 #ifndef __NO_VMX__ - ld r5,.LC__dl_hwcap@toc(r2) + addis r5,r2,.LC__dl_hwcap@toc@ha + ld r5,.LC__dl_hwcap@toc@l(r5) # ifdef SHARED /* Load _rtld-global._dl_hwcap. */ - ld r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r5) + ld r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r5) # else - ld r5,0(r5) /* Load extern _dl_hwcap. */ + /* Load extern _dl_hwcap. */ + ld r5,0(r5) # endif andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) beq L(no_vmx) diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h index 5da5de7a0a..6fab5cbe81 100644 --- a/sysdeps/powerpc/powerpc64/dl-machine.h +++ b/sysdeps/powerpc/powerpc64/dl-machine.h @@ -175,9 +175,12 @@ BODY_PREFIX "_dl_start_user:\n" \ /* the address of _start in r30. */ \ " mr 30,3\n" \ /* &_dl_argc in 29, &_dl_argv in 27, and _dl_loaded in 28. */ \ -" ld 28,.LC__rtld_local@toc(2)\n" \ -" ld 29,.LC__dl_argc@toc(2)\n" \ -" ld 27,.LC__dl_argv@toc(2)\n" \ +" addis 28,2,.LC__rtld_local@toc@ha\n" \ +" ld 28,.LC__rtld_local@toc@l(28)\n" \ +" addis 29,2,.LC__dl_argc@toc@ha\n" \ +" ld 29,.LC__dl_argc@toc@l(29)\n" \ +" addis 27,2,.LC__dl_argv@toc@ha\n" \ +" ld 27,.LC__dl_argv@toc@l(27)\n" \ /* _dl_init (_dl_loaded, _dl_argc, _dl_argv, _dl_argv+_dl_argc+1). */ \ " ld 3,0(28)\n" \ " lwa 4,0(29)\n" \ @@ -204,7 +207,8 @@ BODY_PREFIX "_dl_start_user:\n" \ " addi 6,6,8\n" \ /* Pass a termination function pointer (in this case _dl_fini) in \ r7. */ \ -" ld 7,.LC__dl_fini@toc(2)\n" \ +" addis 7,2,.LC__dl_fini@toc@ha\n" \ +" ld 7,.LC__dl_fini@toc@l(7)\n" \ /* Pass the stack pointer in r1 (so far so good), pointing to a NULL \ value. This lets our startup code distinguish between a program \ linked statically, which linux will call with argc on top of the \ diff --git a/sysdeps/powerpc/powerpc64/dl-trampoline.S b/sysdeps/powerpc/powerpc64/dl-trampoline.S index 23debc2faf..45b821607b 100644 --- a/sysdeps/powerpc/powerpc64/dl-trampoline.S +++ b/sysdeps/powerpc/powerpc64/dl-trampoline.S @@ -32,6 +32,7 @@ because gcc as of 2010/05 doesn't allocate a proper stack frame for a function that makes no calls except for __tls_get_addr and we might be here resolving the __tls_get_addr call. */ + .hidden _dl_runtime_resolve #define INT_PARMS FRAME_MIN_SIZE ENTRY (_dl_runtime_resolve, 4) stdu r1,-FRAME_SIZE(r1) @@ -195,6 +196,7 @@ END(_dl_runtime_resolve) parm1 (r3) and the index (r0) needs to be converted to an offset (index * 24) in parm2 (r4). */ #ifndef PROF + .hidden _dl_profile_resolve ENTRY (_dl_profile_resolve, 4) /* Spill r30, r31 to preserve the link_map* and reloc_addr, in case we need to call _dl_audit_pltexit. */ @@ -225,12 +227,14 @@ ENTRY (_dl_profile_resolve, 4) std r9,INT_PARMS+48(r1) std r10,INT_PARMS+56(r1) std r8,CALLING_SP(r1) - ld r12,.LC__dl_hwcap@toc(r2) + addis r12,r2,.LC__dl_hwcap@toc@ha + ld r12,.LC__dl_hwcap@toc@l(r12) #ifdef SHARED /* Load _rtld_local_ro._dl_hwcap. */ ld r12,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r12) #else - ld r12,0(r12) /* Load extern _dl_hwcap. */ + /* Load extern _dl_hwcap. */ + ld r12,0(r12) #endif andis. r0,r12,(PPC_FEATURE_HAS_ALTIVEC >> 16) beq L(saveFP) diff --git a/sysdeps/powerpc/powerpc64/setjmp-common.S b/sysdeps/powerpc/powerpc64/setjmp-common.S index 41812e3427..19e76d59ee 100644 --- a/sysdeps/powerpc/powerpc64/setjmp-common.S +++ b/sysdeps/powerpc/powerpc64/setjmp-common.S @@ -132,12 +132,14 @@ JUMPTARGET(GLUE(__sigsetjmp_symbol,_ent)): std r31,((JB_GPRS+17)*8)(3) stfd fp31,((JB_FPRS+17)*8)(3) #ifndef __NO_VMX__ - ld r6,.LC__dl_hwcap@toc(r2) + addis r6,r2,.LC__dl_hwcap@toc@ha + ld r6,.LC__dl_hwcap@toc@l(r6) # ifdef SHARED /* Load _rtld-global._dl_hwcap. */ - ld r6,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r6) + ld r6,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r6) # else - ld r6,0(r6) /* Load extern _dl_hwcap. */ + /* Load extern _dl_hwcap. */ + ld r6,0(r6) # endif andis. r6,r6,(PPC_FEATURE_HAS_ALTIVEC >> 16) beq L(no_vmx) diff --git a/sysdeps/powerpc/powerpc64/start.S b/sysdeps/powerpc/powerpc64/start.S index 4319dc8d3e..244d9da07b 100644 --- a/sysdeps/powerpc/powerpc64/start.S +++ b/sysdeps/powerpc/powerpc64/start.S @@ -74,7 +74,8 @@ ENTRY (_start) /* put the address of start_addresses in r8... ** ** PPC64 ABI uses R13 for thread local, so we leave it alone */ - ld r8,.L01@toc(r2) + addis r8,r2,.L01@toc@ha + ld r8,.L01@toc@l(r8) /* and continue in libc-start, in glibc. */ b JUMPTARGET(__libc_start_main) diff --git a/sysdeps/powerpc/powerpc64/sysdep.h b/sysdeps/powerpc/powerpc64/sysdep.h index 3fec06e0df..011068b290 100644 --- a/sysdeps/powerpc/powerpc64/sysdep.h +++ b/sysdeps/powerpc/powerpc64/sysdep.h @@ -469,14 +469,16 @@ LT_LABELSUFFIX(name,_name_end): ; \ .tc _rtld_global_ro[TC],_rtld_global_ro # endif # define __GLRO(rOUT, var, offset) \ - ld rOUT,.LC__ ## var@toc(r2); \ + addis rOUT,r2,.LC__ ## var@toc@ha; \ + ld rOUT,.LC__ ## var@toc@l(rOUT); \ lwz rOUT,offset(rOUT) #else # define __GLRO_DEF(var) \ .LC__ ## var: \ .tc _ ## var[TC],_ ## var # define __GLRO(rOUT, var, offset) \ - ld rOUT,.LC__ ## var@toc(r2); \ + addis rOUT,r2,.LC__ ## var@toc@ha; \ + ld rOUT,.LC__ ## var@toc@l(rOUT); \ lwz rOUT,0(rOUT) #endif