From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 2471 invoked by alias); 23 Dec 2019 18:48:25 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Received: (qmail 2453 invoked by uid 89); 23 Dec 2019 18:48:24 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-20.5 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,KAM_SHORT,RCVD_IN_DNSWL_LOW,SPF_PASS autolearn=ham version=3.3.1 spammy=establishing, PIC, lwz, lbz X-HELO: mx0a-001b2d01.pphosted.com From: Tulio Magno Quites Machado Filho To: libc-alpha@sourceware.org Cc: Andreas Schwab , Joseph Myers , fw@deneb.enyo.de Subject: [PATCH] powerpc: Move cache line size to rtld_global_ro Date: Mon, 23 Dec 2019 18:48:00 -0000 Message-Id: <20191223184523.70113-1-tuliom@linux.ibm.com> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SW-Source: 2019-12/txt/msg00738.txt.bz2 GCC 10.0 enabled -fno-common by default and this started to point that __cache_line_size had been implemented in 2 different places: loader and libc. In order to avoid this duplication, the libc variable has been removed and the loader variable is moved to rtld_global_ro. File sysdeps/unix/sysv/linux/powerpc/dl-auxv.h has been added in order to reuse code for both static and dynamic linking scenarios. Signed-off-by: Tulio Magno Quites Machado Filho --- sysdeps/powerpc/dl-procinfo.c | 17 ++++++++++++ sysdeps/powerpc/powerpc32/a2/memcpy.S | 23 ++++++++-------- sysdeps/powerpc/powerpc32/dl-machine.c | 11 ++------ sysdeps/powerpc/powerpc32/memset.S | 29 +++++++++----------- sysdeps/powerpc/powerpc32/sysdep.h | 26 ++++++++++++++++++ sysdeps/powerpc/powerpc64/a2/memcpy.S | 21 +++++++++++--- sysdeps/powerpc/powerpc64/memset.S | 24 ++++++++++++++-- sysdeps/powerpc/rtld-global-offsets.sym | 1 + sysdeps/unix/sysv/linux/powerpc/dl-auxv.h | 24 ++++++++++++++++ sysdeps/unix/sysv/linux/powerpc/dl-support.c | 23 ++++++++++++++++ sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c | 12 +------- sysdeps/unix/sysv/linux/powerpc/libc-start.c | 10 ++----- 12 files changed, 160 insertions(+), 61 deletions(-) create mode 100644 sysdeps/unix/sysv/linux/powerpc/dl-auxv.h create mode 100644 sysdeps/unix/sysv/linux/powerpc/dl-support.c diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c index 20706a648a..94b33d0c16 100644 --- a/sysdeps/powerpc/dl-procinfo.c +++ b/sysdeps/powerpc/dl-procinfo.c @@ -89,5 +89,22 @@ PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][15] , #endif +#if !IS_IN (ldconfig) +# if !defined PROCINFO_DECL && defined SHARED + ._dl_cache_line_size +# else +PROCINFO_CLASS int _dl_cache_line_size +# endif +# ifndef PROCINFO_DECL + = 0 +# endif +# if !defined SHARED || defined PROCINFO_DECL +; +# else +, +# endif +#endif + + #undef PROCINFO_DECL #undef PROCINFO_CLASS diff --git a/sysdeps/powerpc/powerpc32/a2/memcpy.S b/sysdeps/powerpc/powerpc32/a2/memcpy.S index 9bc91a8df1..ecfea5c832 100644 --- a/sysdeps/powerpc/powerpc32/a2/memcpy.S +++ b/sysdeps/powerpc/powerpc32/a2/memcpy.S @@ -18,6 +18,7 @@ . */ #include +#include #define PREFETCH_AHEAD 4 /* no cache lines SRC prefetching ahead */ #define ZERO_AHEAD 2 /* no cache lines DST zeroing ahead */ @@ -106,25 +107,23 @@ EALIGN (memcpy, 5, 0) L(dst_aligned): -#ifdef SHARED +#ifdef PIC mflr r0 -/* Establishes GOT addressability so we can load __cache_line_size - from static. This value was set from the aux vector during startup. */ +/* Establishes GOT addressability so we can load the cache line size + from rtld_global_ro. This value was set from the aux vector during + startup. */ SETUP_GOT_ACCESS(r9,got_label) - addis r9,r9,__cache_line_size-got_label@ha - lwz r9,__cache_line_size-got_label@l(r9) - mtlr r0 -#else -/* Load __cache_line_size from static. This value was set from the - aux vector during startup. */ - lis r9,__cache_line_size@ha - lwz r9,__cache_line_size@l(r9) + addis r9,r9,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r9,r9,_GLOBAL_OFFSET_TABLE_-got_label@l + mtlr r0 #endif + __GLRO(r9, r9, _dl_cache_line_size, + RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET) cmplwi cr5, r9, 0 bne+ cr5,L(cachelineset) -/* __cache_line_size not set: generic byte copy without much optimization */ +/* Cache line size not set: generic byte copy without much optimization */ andi. r0,r5,1 /* If length is odd copy one byte. */ beq L(cachelinenotset_align) lbz r7,0(r4) /* Read one byte from source. */ diff --git a/sysdeps/powerpc/powerpc32/dl-machine.c b/sysdeps/powerpc/powerpc32/dl-machine.c index a90cbc1ae3..740ff8f2ec 100644 --- a/sysdeps/powerpc/powerpc32/dl-machine.c +++ b/sysdeps/powerpc/powerpc32/dl-machine.c @@ -25,11 +25,6 @@ #include #include <_itoa.h> -/* The value __cache_line_size is defined in dl-sysdep.c and is initialised - by _dl_sysdep_start via DL_PLATFORM_INIT. */ -extern int __cache_line_size attribute_hidden; - - /* Stuff for the PLT. */ #define PLT_INITIAL_ENTRY_WORDS 18 #define PLT_LONGBRANCH_ENTRY_WORDS 0 @@ -309,14 +304,14 @@ __elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) Assumes that dcbst and icbi apply to lines of 16 bytes or more. Current known line sizes are 16, 32, and 128 bytes. - The following gets the __cache_line_size, when available. */ + The following gets the cache line size, when available. */ /* Default minimum 4 words per cache line. */ int line_size_words = 4; - if (lazy && __cache_line_size != 0) + if (lazy && GLRO(dl_cache_line_size) != 0) /* Convert bytes to words. */ - line_size_words = __cache_line_size / 4; + line_size_words = GLRO(dl_cache_line_size) / 4; size_modified = lazy ? rel_offset_words : 6; for (i = 0; i < size_modified; i += line_size_words) diff --git a/sysdeps/powerpc/powerpc32/memset.S b/sysdeps/powerpc/powerpc32/memset.S index fc8b1a2547..ea96913e4e 100644 --- a/sysdeps/powerpc/powerpc32/memset.S +++ b/sysdeps/powerpc/powerpc32/memset.S @@ -17,12 +17,13 @@ . */ #include +#include /* void * [r3] memset (void *s [r3], int c [r4], size_t n [r5])); Returns 's'. The memset is done in four sizes: byte (8 bits), word (32 bits), - 32-byte blocks (256 bits) and __cache_line_size (128, 256, 1024 bits). + 32-byte blocks (256 bits) and cache line size (128, 256, 1024 bits). There is a special case for setting whole cache lines to 0, which takes advantage of the dcbz instruction. */ @@ -95,7 +96,7 @@ L(caligned): /* Check if we can use the special case for clearing memory using dcbz. This requires that we know the correct cache line size for this - processor. Getting the __cache_line_size may require establishing GOT + processor. Getting the cache line size may require establishing GOT addressability, so branch out of line to set this up. */ beq cr1, L(checklinesize) @@ -230,26 +231,22 @@ L(medium_28t): blr L(checklinesize): -#ifdef SHARED - mflr rTMP /* If the remaining length is less the 32 bytes then don't bother getting the cache line size. */ beq L(medium) -/* Establishes GOT addressability so we can load __cache_line_size - from static. This value was set from the aux vector during startup. */ +#ifdef PIC + mflr rTMP +/* Establishes GOT addressability so we can load the cache line size + from rtld_global_ro. This value was set from the aux vector during + startup. */ SETUP_GOT_ACCESS(rGOT,got_label) - addis rGOT,rGOT,__cache_line_size-got_label@ha - lwz rCLS,__cache_line_size-got_label@l(rGOT) + addis rGOT,rGOT,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi rGOT,rGOT,_GLOBAL_OFFSET_TABLE_-got_label@l mtlr rTMP -#else -/* Load __cache_line_size from static. This value was set from the - aux vector during startup. */ - lis rCLS,__cache_line_size@ha -/* If the remaining length is less the 32 bytes then don't bother getting - the cache line size. */ - beq L(medium) - lwz rCLS,__cache_line_size@l(rCLS) #endif +/* Load rtld_global_ro._dl_cache_line_size. */ + __GLRO(rCLS, rGOT, _dl_cache_line_size, + RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET) /* If the cache line size was not set then goto to L(nondcbz), which is safe for any cache line size. */ diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h index c21ea87f5a..4605276ebf 100644 --- a/sysdeps/powerpc/powerpc32/sysdep.h +++ b/sysdeps/powerpc/powerpc32/sysdep.h @@ -157,4 +157,30 @@ GOT_LABEL: ; \ /* Label in text section. */ #define C_TEXT(name) name +/* Read the value of member from rtld_global_ro. */ +#ifdef PIC +# ifdef SHARED +# if IS_IN (rtld) +/* Inside ld.so we use the local alias to avoid runtime GOT + relocations. */ +# define __GLRO(rOUT, rGOT, member, offset) \ + lwz rOUT,_rtld_local_ro@got(rGOT); \ + lwz rOUT,offset(rOUT) +# else +# define __GLRO(rOUT, rGOT, member, offset) \ + lwz rOUT,_rtld_global_ro@got(rGOT); \ + lwz rOUT,offset(rOUT) +# endif +# else +# define __GLRO(rOUT, rGOT, member, offset) \ + lwz rOUT,member@got(rGOT); \ + lwz rOUT,0(rOUT) +# endif +#else +/* Position-dependent code does not require access to the GOT. */ +# define __GLRO(rOUT, rGOT, member, offset) \ + lis rOUT,(member+LOWORD)@ha \ + lwz rOUT,(member+LOWORD)@l(rOUT) +#endif /* PIC */ + #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/powerpc/powerpc64/a2/memcpy.S b/sysdeps/powerpc/powerpc64/a2/memcpy.S index 6d5c5afddb..e515255126 100644 --- a/sysdeps/powerpc/powerpc64/a2/memcpy.S +++ b/sysdeps/powerpc/powerpc64/a2/memcpy.S @@ -18,6 +18,7 @@ . */ #include +#include #ifndef MEMCPY # define MEMCPY memcpy @@ -27,8 +28,19 @@ #define ZERO_AHEAD 2 /* no cache lines DST zeroing ahead */ .section ".toc","aw" -.LC0: - .tc __cache_line_size[TC],__cache_line_size +.LC__dl_cache_line_size: +#ifdef SHARED +# if IS_IN (rtld) + /* Inside ld.so we use the local alias to avoid runtime GOT + relocations. */ + .tc _rtld_local_ro[TC],_rtld_local_ro +# else + .tc _rtld_global_ro[TC],_rtld_global_ro +# endif +#else + .tc _dl_cache_line_size[TC],_dl_cache_line_size +#endif + .section ".text" .align 2 @@ -55,7 +67,8 @@ ENTRY (MEMCPY, 5) */ neg r8,r3 /* LS 4 bits = # bytes to 8-byte dest bdry */ - ld r9,.LC0@toc(r2) /* Get cache line size (part 1) */ + /* Get cache line size (part 1) */ + ld r9,.LC__dl_cache_line_size@toc(r2) clrldi r8,r8,64-4 /* align to 16byte boundary */ sub r7,r4,r3 /* compute offset to src from dest */ lwz r9,0(r9) /* Get cache line size (part 2) */ @@ -121,7 +134,7 @@ L(dst_aligned): cmpdi cr0,r9,0 /* Cache line size set? */ bne+ cr0,L(cachelineset) -/* __cache_line_size not set: generic byte copy without much optimization */ +/* Cache line size not set: generic byte copy without much optimization */ clrldi. r0,r5,63 /* If length is odd copy one byte */ beq L(cachelinenotset_align) lbz r7,0(r4) /* Read one byte from source */ diff --git a/sysdeps/powerpc/powerpc64/memset.S b/sysdeps/powerpc/powerpc64/memset.S index 5d96696e87..d49ef102d4 100644 --- a/sysdeps/powerpc/powerpc64/memset.S +++ b/sysdeps/powerpc/powerpc64/memset.S @@ -17,10 +17,22 @@ . */ #include +#include .section ".toc","aw" -.LC0: - .tc __cache_line_size[TC],__cache_line_size +.LC__dl_cache_line_size: +#ifdef SHARED +# if IS_IN (rtld) + /* Inside ld.so we use the local alias to avoid runtime GOT + relocations. */ + .tc _rtld_local_ro[TC],_rtld_local_ro +# else + .tc _rtld_global_ro[TC],_rtld_global_ro +# endif +#else + .tc _dl_cache_line_size[TC],_dl_cache_line_size +#endif + .section ".text" .align 2 @@ -146,8 +158,14 @@ L(zloopstart): /* If the remaining length is less the 32 bytes, don't bother getting the cache line size. */ beq L(medium) - ld rCLS,.LC0@toc(r2) + ld rCLS,.LC__dl_cache_line_size@toc(r2) +# ifdef SHARED + /* Load _rtld_global_ro._dl_cache_line_size. */ + lwz rCLS,RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET(rCLS) +# else + /* Load _dl_cache_line_size. */ lwz rCLS,0(rCLS) +# endif /* If the cache line size was not set just goto to L(nondcbz) which is safe for any cache line size. */ cmpldi cr1,rCLS,0 diff --git a/sysdeps/powerpc/rtld-global-offsets.sym b/sysdeps/powerpc/rtld-global-offsets.sym index f5ea5a1466..6b348fd522 100644 --- a/sysdeps/powerpc/rtld-global-offsets.sym +++ b/sysdeps/powerpc/rtld-global-offsets.sym @@ -6,3 +6,4 @@ RTLD_GLOBAL_RO_DL_HWCAP_OFFSET rtld_global_ro_offsetof (_dl_hwcap) RTLD_GLOBAL_RO_DL_HWCAP2_OFFSET rtld_global_ro_offsetof (_dl_hwcap2) +RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET rtld_global_ro_offsetof (_dl_cache_line_size) diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h new file mode 100644 index 0000000000..44bea09974 --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h @@ -0,0 +1,24 @@ +/* Auxiliary vector processing. Linux/PPC version. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Scan the Aux Vector for the "Data Cache Block Size" entry and assign it + to dl_cache_line_size. */ +#define DL_PLATFORM_AUXV \ + case AT_DCACHEBSIZE: \ + GLRO(dl_cache_line_size) = av->a_un.a_val; \ + break; diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-support.c b/sysdeps/unix/sysv/linux/powerpc/dl-support.c new file mode 100644 index 0000000000..2a792d7092 --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/dl-support.c @@ -0,0 +1,23 @@ +/* Support for dynamic linking code in static libc. Linux/PPC version. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "dl-auxv.h" +#include +int GLRO(dl_cache_line_size); + +#include diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c b/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c index fa19cc66c0..6d51d6061e 100644 --- a/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c +++ b/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c @@ -18,16 +18,6 @@ #include #include - -int __cache_line_size attribute_hidden; - -/* Scan the Aux Vector for the "Data Cache Block Size" entry. If found - verify that the static extern __cache_line_size is defined by checking - for not NULL. If it is defined then assign the cache block size - value to __cache_line_size. */ -#define DL_PLATFORM_AUXV \ - case AT_DCACHEBSIZE: \ - __cache_line_size = av->a_un.a_val; \ - break; +#include #include diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-start.c b/sysdeps/unix/sysv/linux/powerpc/libc-start.c index a659a9144f..3a5eb0e952 100644 --- a/sysdeps/unix/sysv/linux/powerpc/libc-start.c +++ b/sysdeps/unix/sysv/linux/powerpc/libc-start.c @@ -24,7 +24,6 @@ #include #endif -int __cache_line_size attribute_hidden; /* The main work is done in the generic function. */ #define LIBC_START_MAIN generic_start_main #define LIBC_START_DISABLE_INLINE @@ -71,15 +70,12 @@ __libc_start_main (int argc, char **argv, rtld_fini = NULL; } - /* Initialize the __cache_line_size variable from the aux vector. For the - static case, we also need _dl_hwcap, _dl_hwcap2 and _dl_platform, so we - can call __tcb_parse_hwcap_and_convert_at_platform (). */ for (ElfW (auxv_t) * av = auxvec; av->a_type != AT_NULL; ++av) switch (av->a_type) { - case AT_DCACHEBSIZE: - __cache_line_size = av->a_un.a_val; - break; + /* For the static case, we also need _dl_hwcap, _dl_hwcap2 and + _dl_platform, so we can call + __tcb_parse_hwcap_and_convert_at_platform (). */ #ifndef SHARED case AT_HWCAP: _dl_hwcap = (unsigned long int) av->a_un.a_val; -- 2.24.1