From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pl1-x631.google.com (mail-pl1-x631.google.com [IPv6:2607:f8b0:4864:20::631]) by sourceware.org (Postfix) with ESMTPS id 9B7D23858C60 for ; Mon, 28 Feb 2022 06:41:39 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 9B7D23858C60 Received: by mail-pl1-x631.google.com with SMTP id z2so9865860plg.8 for ; Sun, 27 Feb 2022 22:41:39 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=3/cVKz/wRhxlIkf9kiCzKeLbBxlwFYhLlCKLCVy+JSc=; b=tMVF6ZKosj+lYB+HnYXZyJSrep729ESVp8WAaJsp+ozjxBfVCp4WInSSfLGzoICfRg 7EnLf3CW4l8zuGzZImSbIfLirAgM7embmZrw8MPB/7ffa4mZfiR6nBzyGCdes59SlEke 5arRWC8+T3ZcXjQB3fSoBByjyMYIEzOwOeIYADUCpOpH0dK5t9/ItD4HMpeJmMFWdWmK wuLiMcAAIIvP3gmw+PIf/WNSMVobNlz2BO3orVP15kbiViC6DUe3dSSKSve615wNrcYE 3HkP8Wiy0vEIcdf5TNgUqefbgttoV6YlIf7HfGNEJv8kjd7zSIqgrhvErTA2bSEvffg6 Kk1g== X-Gm-Message-State: AOAM530nLkfXE99YNAwIVuFP2LXewWCouvUoUGHVBf73A1sCYhlH0R3I hbZVXyuU9Knhc4JSzouVe8fxqooAPFY= X-Google-Smtp-Source: ABdhPJzW4b3s/mBcIJ203ijJWN42wt5u6Ch8ZIKOAI/kVeVHfw8to5a5ZwIPe/xbWlRTjOC3lwXm6A== X-Received: by 2002:a17:902:d643:b0:151:6f2c:cfb4 with SMTP id y3-20020a170902d64300b001516f2ccfb4mr1441511plh.120.1646030498301; Sun, 27 Feb 2022 22:41:38 -0800 (PST) Received: from squeak.grove.modra.org ([2406:3400:51d:8cc0:f31d:e338:6c7b:8cfa]) by smtp.gmail.com with ESMTPSA id q15-20020a63504f000000b0037425262293sm9130249pgl.43.2022.02.27.22.41.36 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sun, 27 Feb 2022 22:41:37 -0800 (PST) From: Alan Modra To: libc-alpha@sourceware.org Cc: Tulio Magno Quites Machado Filho , Alan Modra Subject: [PATCH v2 1/4] powerpc64: Use medium model toc accesses throughout Date: Mon, 28 Feb 2022 17:10:49 +1030 Message-Id: <20220228064052.3413334-2-amodra@gmail.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220228064052.3413334-1-amodra@gmail.com> References: <20220228064052.3413334-1-amodra@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-3036.3 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 28 Feb 2022 06:41:41 -0000 The PowerPC64 linker edits medium model toc-indirect code to toc-pointer relative: addis r9,r2,tc_entry_for_var@toc@ha ld r9,tc_entry_for_var@toc@l(r9) becomes addis r9,r2,(var-.TOC.)@ha addi r9,r9,(var-.TOC.)@l when "var" is known to be local to the binary. This isn't done for small-model toc-indirect code, because "var" is almost guaranteed to be too far away from .TOC. for a 16-bit signed offset. And, because the analysis of which .toc entry can be removed becomes much more complicated in objects that mix code models, they aren't removed if any small-model toc sequence appears in an object file. Unfortunately, glibc's build of ld.so smashes the needed objects together in a ld -r linking stage. This means the GOT/TOC is left with a whole lot of relative relocations which is untidy, but in itself is not a serious problem. However, static-pie on powerpc64 bombs due to a segfault caused by one of the small-model accesses before _dl_relocate_static_pie. (The very first one in rcrt1.o passing start_addresses in r8 to __libc_start_main.) So this patch makes all the toc/got accesses in assembly medium code model, and a couple of functions hidden. By itself this is not enough to give us working static-pie, but it is useful in isolation to enable better linker optimisation. There's a serious problem in libgcc too. libgcc ifuncs access the AT_HWCAP words stored in the tcb with an offset from the thread pointer (r13), but r13 isn't set at the time _dl_relocate_static_pie. A followup patch will fix that. diff --git a/sysdeps/powerpc/powerpc64/__longjmp-common.S b/sysdeps/powerpc/powerpc64/__longjmp-common.S index 4d71b9e102..5f629e1e0f 100644 --- a/sysdeps/powerpc/powerpc64/__longjmp-common.S +++ b/sysdeps/powerpc/powerpc64/__longjmp-common.S @@ -47,12 +47,14 @@ ENTRY (__longjmp) CALL_MCOUNT 2 #ifndef __NO_VMX__ - ld r5,.LC__dl_hwcap@toc(r2) + addis r5,r2,.LC__dl_hwcap@toc@ha + ld r5,.LC__dl_hwcap@toc@l(r5) # ifdef SHARED /* Load _rtld-global._dl_hwcap. */ - ld r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r5) + ld r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r5) # else - ld r5,0(r5) /* Load extern _dl_hwcap. */ + /* Load extern _dl_hwcap. */ + ld r5,0(r5) # endif andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) beq L(no_vmx) diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h index 5da5de7a0a..6fab5cbe81 100644 --- a/sysdeps/powerpc/powerpc64/dl-machine.h +++ b/sysdeps/powerpc/powerpc64/dl-machine.h @@ -175,9 +175,12 @@ BODY_PREFIX "_dl_start_user:\n" \ /* the address of _start in r30. */ \ " mr 30,3\n" \ /* &_dl_argc in 29, &_dl_argv in 27, and _dl_loaded in 28. */ \ -" ld 28,.LC__rtld_local@toc(2)\n" \ -" ld 29,.LC__dl_argc@toc(2)\n" \ -" ld 27,.LC__dl_argv@toc(2)\n" \ +" addis 28,2,.LC__rtld_local@toc@ha\n" \ +" ld 28,.LC__rtld_local@toc@l(28)\n" \ +" addis 29,2,.LC__dl_argc@toc@ha\n" \ +" ld 29,.LC__dl_argc@toc@l(29)\n" \ +" addis 27,2,.LC__dl_argv@toc@ha\n" \ +" ld 27,.LC__dl_argv@toc@l(27)\n" \ /* _dl_init (_dl_loaded, _dl_argc, _dl_argv, _dl_argv+_dl_argc+1). */ \ " ld 3,0(28)\n" \ " lwa 4,0(29)\n" \ @@ -204,7 +207,8 @@ BODY_PREFIX "_dl_start_user:\n" \ " addi 6,6,8\n" \ /* Pass a termination function pointer (in this case _dl_fini) in \ r7. */ \ -" ld 7,.LC__dl_fini@toc(2)\n" \ +" addis 7,2,.LC__dl_fini@toc@ha\n" \ +" ld 7,.LC__dl_fini@toc@l(7)\n" \ /* Pass the stack pointer in r1 (so far so good), pointing to a NULL \ value. This lets our startup code distinguish between a program \ linked statically, which linux will call with argc on top of the \ diff --git a/sysdeps/powerpc/powerpc64/dl-trampoline.S b/sysdeps/powerpc/powerpc64/dl-trampoline.S index 23debc2faf..45b821607b 100644 --- a/sysdeps/powerpc/powerpc64/dl-trampoline.S +++ b/sysdeps/powerpc/powerpc64/dl-trampoline.S @@ -32,6 +32,7 @@ because gcc as of 2010/05 doesn't allocate a proper stack frame for a function that makes no calls except for __tls_get_addr and we might be here resolving the __tls_get_addr call. */ + .hidden _dl_runtime_resolve #define INT_PARMS FRAME_MIN_SIZE ENTRY (_dl_runtime_resolve, 4) stdu r1,-FRAME_SIZE(r1) @@ -195,6 +196,7 @@ END(_dl_runtime_resolve) parm1 (r3) and the index (r0) needs to be converted to an offset (index * 24) in parm2 (r4). */ #ifndef PROF + .hidden _dl_profile_resolve ENTRY (_dl_profile_resolve, 4) /* Spill r30, r31 to preserve the link_map* and reloc_addr, in case we need to call _dl_audit_pltexit. */ @@ -225,12 +227,14 @@ ENTRY (_dl_profile_resolve, 4) std r9,INT_PARMS+48(r1) std r10,INT_PARMS+56(r1) std r8,CALLING_SP(r1) - ld r12,.LC__dl_hwcap@toc(r2) + addis r12,r2,.LC__dl_hwcap@toc@ha + ld r12,.LC__dl_hwcap@toc@l(r12) #ifdef SHARED /* Load _rtld_local_ro._dl_hwcap. */ ld r12,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r12) #else - ld r12,0(r12) /* Load extern _dl_hwcap. */ + /* Load extern _dl_hwcap. */ + ld r12,0(r12) #endif andis. r0,r12,(PPC_FEATURE_HAS_ALTIVEC >> 16) beq L(saveFP) diff --git a/sysdeps/powerpc/powerpc64/setjmp-common.S b/sysdeps/powerpc/powerpc64/setjmp-common.S index 41812e3427..19e76d59ee 100644 --- a/sysdeps/powerpc/powerpc64/setjmp-common.S +++ b/sysdeps/powerpc/powerpc64/setjmp-common.S @@ -132,12 +132,14 @@ JUMPTARGET(GLUE(__sigsetjmp_symbol,_ent)): std r31,((JB_GPRS+17)*8)(3) stfd fp31,((JB_FPRS+17)*8)(3) #ifndef __NO_VMX__ - ld r6,.LC__dl_hwcap@toc(r2) + addis r6,r2,.LC__dl_hwcap@toc@ha + ld r6,.LC__dl_hwcap@toc@l(r6) # ifdef SHARED /* Load _rtld-global._dl_hwcap. */ - ld r6,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r6) + ld r6,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r6) # else - ld r6,0(r6) /* Load extern _dl_hwcap. */ + /* Load extern _dl_hwcap. */ + ld r6,0(r6) # endif andis. r6,r6,(PPC_FEATURE_HAS_ALTIVEC >> 16) beq L(no_vmx) diff --git a/sysdeps/powerpc/powerpc64/start.S b/sysdeps/powerpc/powerpc64/start.S index 4319dc8d3e..244d9da07b 100644 --- a/sysdeps/powerpc/powerpc64/start.S +++ b/sysdeps/powerpc/powerpc64/start.S @@ -74,7 +74,8 @@ ENTRY (_start) /* put the address of start_addresses in r8... ** ** PPC64 ABI uses R13 for thread local, so we leave it alone */ - ld r8,.L01@toc(r2) + addis r8,r2,.L01@toc@ha + ld r8,.L01@toc@l(r8) /* and continue in libc-start, in glibc. */ b JUMPTARGET(__libc_start_main) diff --git a/sysdeps/powerpc/powerpc64/sysdep.h b/sysdeps/powerpc/powerpc64/sysdep.h index 3fec06e0df..011068b290 100644 --- a/sysdeps/powerpc/powerpc64/sysdep.h +++ b/sysdeps/powerpc/powerpc64/sysdep.h @@ -469,14 +469,16 @@ LT_LABELSUFFIX(name,_name_end): ; \ .tc _rtld_global_ro[TC],_rtld_global_ro # endif # define __GLRO(rOUT, var, offset) \ - ld rOUT,.LC__ ## var@toc(r2); \ + addis rOUT,r2,.LC__ ## var@toc@ha; \ + ld rOUT,.LC__ ## var@toc@l(rOUT); \ lwz rOUT,offset(rOUT) #else # define __GLRO_DEF(var) \ .LC__ ## var: \ .tc _ ## var[TC],_ ## var # define __GLRO(rOUT, var, offset) \ - ld rOUT,.LC__ ## var@toc(r2); \ + addis rOUT,r2,.LC__ ## var@toc@ha; \ + ld rOUT,.LC__ ## var@toc@l(rOUT); \ lwz rOUT,0(rOUT) #endif