public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
From: Alan Modra <amodra@sourceware.org>
To: glibc-cvs@sourceware.org
Subject: [glibc] powerpc64: Use medium model toc accesses throughout
Date: Mon, 11 Apr 2022 01:30:30 +0000 (GMT)	[thread overview]
Message-ID: <20220411013030.6176C3856DE0@sourceware.org> (raw)

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=d6efcc118e406a1cfeb309f835d7118df53419bb

commit d6efcc118e406a1cfeb309f835d7118df53419bb
Author: Alan Modra <amodra@gmail.com>
Date:   Sat Jan 22 11:48:50 2022 +1030

    powerpc64: Use medium model toc accesses throughout
    
    The PowerPC64 linker edits medium model toc-indirect code to toc-pointer
    relative:
            addis r9,r2,tc_entry_for_var@toc@ha
            ld r9,tc_entry_for_var@toc@l(r9)
    becomes
            addis r9,r2,(var-.TOC.)@ha
            addi r9,r9,(var-.TOC.)@l
    when "var" is known to be local to the binary.  This isn't done for
    small-model toc-indirect code, because "var" is almost guaranteed to
    be too far away from .TOC. for a 16-bit signed offset.  And, because
    the analysis of which .toc entry can be removed becomes much more
    complicated in objects that mix code models, they aren't removed if
    any small-model toc sequence appears in an object file.
    
    Unfortunately, glibc's build of ld.so smashes the needed objects
    together in a ld -r linking stage.  This means the GOT/TOC is left
    with a whole lot of relative relocations which is untidy, but in
    itself is not a serious problem.  However, static-pie on powerpc64
    bombs due to a segfault caused by one of the small-model accesses
    before _dl_relocate_static_pie.  (The very first one in rcrt1.o
    passing start_addresses in r8 to __libc_start_main.)
    
    So this patch makes all the toc/got accesses in assembly medium code
    model, and a couple of functions hidden.  By itself this is not
    enough to give us working static-pie, but it is useful in isolation to
    enable better linker optimisation.
    
    There's a serious problem in libgcc too.  libgcc ifuncs access the
    AT_HWCAP words stored in the tcb with an offset from the thread
    pointer (r13), but r13 isn't set at the time _dl_relocate_static_pie.
    A followup patch will fix that.
    
    Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>

Diff:
---
 sysdeps/powerpc/powerpc64/__longjmp-common.S |  8 +++++---
 sysdeps/powerpc/powerpc64/dl-machine.h       | 12 ++++++++----
 sysdeps/powerpc/powerpc64/dl-trampoline.S    |  8 ++++++--
 sysdeps/powerpc/powerpc64/setjmp-common.S    |  8 +++++---
 sysdeps/powerpc/powerpc64/start.S            |  3 ++-
 sysdeps/powerpc/powerpc64/sysdep.h           |  6 ++++--
 6 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/sysdeps/powerpc/powerpc64/__longjmp-common.S b/sysdeps/powerpc/powerpc64/__longjmp-common.S
index 4d71b9e102..5f629e1e0f 100644
--- a/sysdeps/powerpc/powerpc64/__longjmp-common.S
+++ b/sysdeps/powerpc/powerpc64/__longjmp-common.S
@@ -47,12 +47,14 @@
 ENTRY (__longjmp)
 	CALL_MCOUNT 2
 #ifndef __NO_VMX__
-	ld    r5,.LC__dl_hwcap@toc(r2)
+	addis	r5,r2,.LC__dl_hwcap@toc@ha
+	ld	r5,.LC__dl_hwcap@toc@l(r5)
 # ifdef SHARED
 	/* Load _rtld-global._dl_hwcap.  */
-	ld    r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r5)
+	ld	r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r5)
 # else
-	ld    r5,0(r5) /* Load extern _dl_hwcap.  */
+	/* Load extern _dl_hwcap.  */
+	ld	r5,0(r5)
 # endif
 	andis.  r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
 	beq	L(no_vmx)
diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h
index 5da5de7a0a..6fab5cbe81 100644
--- a/sysdeps/powerpc/powerpc64/dl-machine.h
+++ b/sysdeps/powerpc/powerpc64/dl-machine.h
@@ -175,9 +175,12 @@ BODY_PREFIX "_dl_start_user:\n"						\
 /* the address of _start in r30.  */					\
 "	mr	30,3\n"							\
 /* &_dl_argc in 29, &_dl_argv in 27, and _dl_loaded in 28.  */		\
-"	ld	28,.LC__rtld_local@toc(2)\n"				\
-"	ld	29,.LC__dl_argc@toc(2)\n"				\
-"	ld	27,.LC__dl_argv@toc(2)\n"				\
+"	addis	28,2,.LC__rtld_local@toc@ha\n"				\
+"	ld	28,.LC__rtld_local@toc@l(28)\n"				\
+"	addis	29,2,.LC__dl_argc@toc@ha\n"				\
+"	ld	29,.LC__dl_argc@toc@l(29)\n"				\
+"	addis	27,2,.LC__dl_argv@toc@ha\n"				\
+"	ld	27,.LC__dl_argv@toc@l(27)\n"				\
 /* _dl_init (_dl_loaded, _dl_argc, _dl_argv, _dl_argv+_dl_argc+1).  */	\
 "	ld	3,0(28)\n"						\
 "	lwa	4,0(29)\n"						\
@@ -204,7 +207,8 @@ BODY_PREFIX "_dl_start_user:\n"						\
 "	addi	6,6,8\n"						\
 /* Pass a termination function pointer (in this case _dl_fini) in	\
    r7.  */								\
-"	ld	7,.LC__dl_fini@toc(2)\n"				\
+"	addis	7,2,.LC__dl_fini@toc@ha\n"				\
+"	ld	7,.LC__dl_fini@toc@l(7)\n"				\
 /* Pass the stack pointer in r1 (so far so good), pointing to a NULL	\
    value.  This lets our startup code distinguish between a program	\
    linked statically, which linux will call with argc on top of the	\
diff --git a/sysdeps/powerpc/powerpc64/dl-trampoline.S b/sysdeps/powerpc/powerpc64/dl-trampoline.S
index 23debc2faf..45b821607b 100644
--- a/sysdeps/powerpc/powerpc64/dl-trampoline.S
+++ b/sysdeps/powerpc/powerpc64/dl-trampoline.S
@@ -32,6 +32,7 @@
    because gcc as of 2010/05 doesn't allocate a proper stack frame for
    a function that makes no calls except for __tls_get_addr and we
    might be here resolving the __tls_get_addr call.  */
+	.hidden _dl_runtime_resolve
 #define INT_PARMS FRAME_MIN_SIZE
 ENTRY (_dl_runtime_resolve, 4)
 	stdu	r1,-FRAME_SIZE(r1)
@@ -195,6 +196,7 @@ END(_dl_runtime_resolve)
    parm1 (r3) and the index (r0) needs to be converted to an offset
    (index * 24) in parm2 (r4).  */
 #ifndef PROF
+	.hidden _dl_profile_resolve
 ENTRY (_dl_profile_resolve, 4)
 /* Spill r30, r31 to preserve the link_map* and reloc_addr, in case we
    need to call _dl_audit_pltexit.  */
@@ -225,12 +227,14 @@ ENTRY (_dl_profile_resolve, 4)
 	std	r9,INT_PARMS+48(r1)
 	std	r10,INT_PARMS+56(r1)
 	std	r8,CALLING_SP(r1)
-	ld	r12,.LC__dl_hwcap@toc(r2)
+	addis   r12,r2,.LC__dl_hwcap@toc@ha
+	ld	r12,.LC__dl_hwcap@toc@l(r12)
 #ifdef SHARED
 	/* Load _rtld_local_ro._dl_hwcap.  */
 	ld	r12,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r12)
 #else
-	ld	r12,0(r12) /* Load extern _dl_hwcap.  */
+	/* Load extern _dl_hwcap.  */
+	ld	r12,0(r12)
 #endif
 	andis.  r0,r12,(PPC_FEATURE_HAS_ALTIVEC >> 16)
 	beq	L(saveFP)
diff --git a/sysdeps/powerpc/powerpc64/setjmp-common.S b/sysdeps/powerpc/powerpc64/setjmp-common.S
index 41812e3427..19e76d59ee 100644
--- a/sysdeps/powerpc/powerpc64/setjmp-common.S
+++ b/sysdeps/powerpc/powerpc64/setjmp-common.S
@@ -132,12 +132,14 @@ JUMPTARGET(GLUE(__sigsetjmp_symbol,_ent)):
 	std  r31,((JB_GPRS+17)*8)(3)
 	stfd fp31,((JB_FPRS+17)*8)(3)
 #ifndef __NO_VMX__
-	ld    r6,.LC__dl_hwcap@toc(r2)
+	addis	r6,r2,.LC__dl_hwcap@toc@ha
+	ld	r6,.LC__dl_hwcap@toc@l(r6)
 # ifdef SHARED
 	/* Load _rtld-global._dl_hwcap.  */
-	ld    r6,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r6)
+	ld	r6,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r6)
 # else
-	ld    r6,0(r6) /* Load extern _dl_hwcap.  */
+	/* Load extern _dl_hwcap.  */
+	ld	r6,0(r6)
 # endif
 	andis.  r6,r6,(PPC_FEATURE_HAS_ALTIVEC >> 16)
 	beq	L(no_vmx)
diff --git a/sysdeps/powerpc/powerpc64/start.S b/sysdeps/powerpc/powerpc64/start.S
index 4319dc8d3e..244d9da07b 100644
--- a/sysdeps/powerpc/powerpc64/start.S
+++ b/sysdeps/powerpc/powerpc64/start.S
@@ -74,7 +74,8 @@ ENTRY (_start)
 
  /* put the address of start_addresses in r8...  **
 ** PPC64 ABI uses R13 for thread local, so we leave it alone */
-	ld	r8,.L01@toc(r2)
+	addis	r8,r2,.L01@toc@ha
+	ld	r8,.L01@toc@l(r8)
 
  /* and continue in libc-start, in glibc.  */
 	b	JUMPTARGET(__libc_start_main)
diff --git a/sysdeps/powerpc/powerpc64/sysdep.h b/sysdeps/powerpc/powerpc64/sysdep.h
index 3fec06e0df..011068b290 100644
--- a/sysdeps/powerpc/powerpc64/sysdep.h
+++ b/sysdeps/powerpc/powerpc64/sysdep.h
@@ -469,14 +469,16 @@ LT_LABELSUFFIX(name,_name_end): ; \
 	.tc _rtld_global_ro[TC],_rtld_global_ro
 # endif
 # define __GLRO(rOUT, var, offset)		\
-	ld	rOUT,.LC__ ## var@toc(r2);	\
+	addis	rOUT,r2,.LC__ ## var@toc@ha;	\
+	ld	rOUT,.LC__ ## var@toc@l(rOUT);	\
 	lwz	rOUT,offset(rOUT)
 #else
 # define __GLRO_DEF(var)			\
 .LC__ ## var:					\
 	.tc _ ## var[TC],_ ## var
 # define __GLRO(rOUT, var, offset)		\
-	ld	rOUT,.LC__ ## var@toc(r2);	\
+	addis	rOUT,r2,.LC__ ## var@toc@ha;	\
+	ld	rOUT,.LC__ ## var@toc@l(rOUT);	\
 	lwz	rOUT,0(rOUT)
 #endif


                 reply	other threads:[~2022-04-11  1:30 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220411013030.6176C3856DE0@sourceware.org \
    --to=amodra@sourceware.org \
    --cc=glibc-cvs@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).