From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 28896 invoked by alias); 18 Sep 2019 12:58:48 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 28797 invoked by uid 89); 18 Sep 2019 12:58:47 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-18.3 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,KAM_SHORT autolearn=ham version=3.3.1 spammy= X-HELO: foss.arm.com Received: from foss.arm.com (HELO foss.arm.com) (217.140.110.172) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 18 Sep 2019 12:58:43 +0000 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 02DE01000; Wed, 18 Sep 2019 05:58:42 -0700 (PDT) Received: from [10.2.206.47] (e120808-lin.cambridge.arm.com [10.2.206.47]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id C87E13F575; Wed, 18 Sep 2019 05:58:40 -0700 (PDT) Subject: Re: [PATCH, AArch64 v4 4/6] aarch64: Add out-of-line functions for LSE atomics To: Richard Henderson , gcc-patches@gcc.gnu.org Cc: Wilco.Dijkstra@arm.com, Marcus.Shawcroft@arm.com, James.Greenhalgh@arm.com References: <20190918015817.24408-1-richard.henderson@linaro.org> <20190918015817.24408-5-richard.henderson@linaro.org> From: Kyrill Tkachov Message-ID: <1d905132-5067-c966-73ca-f5822d517b56@foss.arm.com> Date: Wed, 18 Sep 2019 12:58:00 -0000 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.7.1 MIME-Version: 1.0 In-Reply-To: <20190918015817.24408-5-richard.henderson@linaro.org> Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit X-SW-Source: 2019-09/txt/msg01105.txt.bz2 On 9/18/19 2:58 AM, Richard Henderson wrote: > This is the libgcc part of the interface -- providing the functions. > Rationale is provided at the top of libgcc/config/aarch64/lse.S. > > * config/aarch64/lse-init.c: New file. > * config/aarch64/lse.S: New file. > * config/aarch64/t-lse: New file. > * config.host: Add t-lse to all aarch64 tuples. > --- > libgcc/config/aarch64/lse-init.c | 45 ++++++ > libgcc/config.host | 4 + > libgcc/config/aarch64/lse.S | 235 +++++++++++++++++++++++++++++++ > libgcc/config/aarch64/t-lse | 44 ++++++ > 4 files changed, 328 insertions(+) > create mode 100644 libgcc/config/aarch64/lse-init.c > create mode 100644 libgcc/config/aarch64/lse.S > create mode 100644 libgcc/config/aarch64/t-lse > > diff --git a/libgcc/config/aarch64/lse-init.c b/libgcc/config/aarch64/lse-init.c > new file mode 100644 > index 00000000000..51fb21d45c9 > --- /dev/null > +++ b/libgcc/config/aarch64/lse-init.c > @@ -0,0 +1,45 @@ > +/* Out-of-line LSE atomics for AArch64 architecture, Init. > + Copyright (C) 2018 Free Software Foundation, Inc. > + Contributed by Linaro Ltd. > + This, and the other new files, will need an updated copyright date now. Thanks, Kyrill > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it under > +the terms of the GNU General Public License as published by the Free > +Software Foundation; either version 3, or (at your option) any later > +version. > + > +GCC is distributed in the hope that it will be useful, but WITHOUT ANY > +WARRANTY; without even the implied warranty of MERCHANTABILITY or > +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > +for more details. > + > +Under Section 7 of GPL version 3, you are granted additional > +permissions described in the GCC Runtime Library Exception, version > +3.1, as published by the Free Software Foundation. > + > +You should have received a copy of the GNU General Public License and > +a copy of the GCC Runtime Library Exception along with this program; > +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > +. */ > + > +/* Define the symbol gating the LSE implementations. */ > +_Bool __aarch64_have_lse_atomics > + __attribute__((visibility("hidden"), nocommon)); > + > +/* Disable initialization of __aarch64_have_lse_atomics during bootstrap. */ > +#ifndef inhibit_libc > +# include > + > +/* Disable initialization if the system headers are too old. */ > +# if defined(AT_HWCAP) && defined(HWCAP_ATOMICS) > + > +static void __attribute__((constructor)) > +init_have_lse_atomics (void) > +{ > + unsigned long hwcap = getauxval (AT_HWCAP); > + __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0; > +} > + > +# endif /* HWCAP */ > +#endif /* inhibit_libc */ > diff --git a/libgcc/config.host b/libgcc/config.host > index 728e543ea39..122113fc519 100644 > --- a/libgcc/config.host > +++ b/libgcc/config.host > @@ -350,12 +350,14 @@ aarch64*-*-elf | aarch64*-*-rtems*) > extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o" > extra_parts="$extra_parts crtfastmath.o" > tmake_file="${tmake_file} ${cpu_type}/t-aarch64" > + tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc" > tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" > md_unwind_header=aarch64/aarch64-unwind.h > ;; > aarch64*-*-freebsd*) > extra_parts="$extra_parts crtfastmath.o" > tmake_file="${tmake_file} ${cpu_type}/t-aarch64" > + tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc" > tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" > md_unwind_header=aarch64/freebsd-unwind.h > ;; > @@ -367,12 +369,14 @@ aarch64*-*-netbsd*) > ;; > aarch64*-*-fuchsia*) > tmake_file="${tmake_file} ${cpu_type}/t-aarch64" > + tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc" > tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp" > ;; > aarch64*-*-linux*) > extra_parts="$extra_parts crtfastmath.o" > md_unwind_header=aarch64/linux-unwind.h > tmake_file="${tmake_file} ${cpu_type}/t-aarch64" > + tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc" > tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" > ;; > alpha*-*-linux*) > diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S > new file mode 100644 > index 00000000000..c24a39242ca > --- /dev/null > +++ b/libgcc/config/aarch64/lse.S > @@ -0,0 +1,235 @@ > +/* Out-of-line LSE atomics for AArch64 architecture. > + Copyright (C) 2018 Free Software Foundation, Inc. > + Contributed by Linaro Ltd. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it under > +the terms of the GNU General Public License as published by the Free > +Software Foundation; either version 3, or (at your option) any later > +version. > + > +GCC is distributed in the hope that it will be useful, but WITHOUT ANY > +WARRANTY; without even the implied warranty of MERCHANTABILITY or > +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > +for more details. > + > +Under Section 7 of GPL version 3, you are granted additional > +permissions described in the GCC Runtime Library Exception, version > +3.1, as published by the Free Software Foundation. > + > +You should have received a copy of the GNU General Public License and > +a copy of the GCC Runtime Library Exception along with this program; > +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > +. */ > + > +/* > + * The problem that we are trying to solve is operating system deployment > + * of ARMv8.1-Atomics, also known as Large System Exensions (LSE). > + * > + * There are a number of potential solutions for this problem which have > + * been proposed and rejected for various reasons. To recap: > + * > + * (1) Multiple builds. The dynamic linker will examine /lib64/atomics/ > + * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten. > + * However, not all Linux distributions are happy with multiple builds, > + * and anyway it has no effect on main applications. > + * > + * (2) IFUNC. We could put these functions into libgcc_s.so, and have > + * a single copy of each function for all DSOs. However, ARM is concerned > + * that the branch-to-indirect-branch that is implied by using a PLT, > + * as required by IFUNC, is too much overhead for smaller cpus. > + * > + * (3) Statically predicted direct branches. This is the approach that > + * is taken here. These functions are linked into every DSO that uses them. > + * All of the symbols are hidden, so that the functions are called via a > + * direct branch. The choice of LSE vs non-LSE is done via one byte load > + * followed by a well-predicted direct branch. The functions are compiled > + * separately to minimize code size. > + */ > + > +/* Tell the assembler to accept LSE instructions. */ > + .arch armv8-a+lse > + > +/* Declare the symbol gating the LSE implementations. */ > + .hidden __aarch64_have_lse_atomics > + > +/* Turn size and memory model defines into mnemonic fragments. */ > +#if SIZE == 1 > +# define S b > +# define UXT uxtb > +#elif SIZE == 2 > +# define S h > +# define UXT uxth > +#elif SIZE == 4 || SIZE == 8 || SIZE == 16 > +# define S > +# define UXT mov > +#else > +# error > +#endif > + > +#if MODEL == 1 > +# define SUFF _relax > +# define A > +# define L > +#elif MODEL == 2 > +# define SUFF _acq > +# define A a > +# define L > +#elif MODEL == 3 > +# define SUFF _rel > +# define A > +# define L l > +#elif MODEL == 4 > +# define SUFF _acq_rel > +# define A a > +# define L l > +#else > +# error > +#endif > + > +/* Concatenate symbols. */ > +#define glue2_(A, B) A ## B > +#define glue2(A, B) glue2_(A, B) > +#define glue3_(A, B, C) A ## B ## C > +#define glue3(A, B, C) glue3_(A, B, C) > +#define glue4_(A, B, C, D) A ## B ## C ## D > +#define glue4(A, B, C, D) glue4_(A, B, C, D) > + > +/* Select the size of a register, given a regno. */ > +#define x(N) glue2(x, N) > +#define w(N) glue2(w, N) > +#if SIZE < 8 > +# define s(N) w(N) > +#else > +# define s(N) x(N) > +#endif > + > +#define NAME(BASE) glue4(__aarch64_, BASE, SIZE, SUFF) > +#define LDXR glue4(ld, A, xr, S) > +#define STXR glue4(st, L, xr, S) > + > +/* Temporary registers used. Other than these, only the return value > + register (x0) and the flags are modified. */ > +#define tmp0 16 > +#define tmp1 17 > +#define tmp2 15 > + > +/* Start and end a function. */ > +.macro STARTFN name > + .text > + .balign 16 > + .globl \name > + .hidden \name > + .type \name, %function > + .cfi_startproc > +\name: > +.endm > + > +.macro ENDFN name > + .cfi_endproc > + .size \name, . - \name > +.endm > + > +/* Branch to LABEL if LSE is disabled. */ > +.macro JUMP_IF_NOT_LSE label > + adrp x(tmp0), __aarch64_have_lse_atomics > + ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics] > + cbz w(tmp0), \label > +.endm > + > +#ifdef L_cas > + > +STARTFN NAME(cas) > + JUMP_IF_NOT_LSE 8f > + > +#if SIZE < 16 > +#define CAS glue4(cas, A, L, S) > + > + CAS s(0), s(1), [x2] > + ret > + > +8: UXT s(tmp0), s(0) > +0: LDXR s(0), [x2] > + cmp s(0), s(tmp0) > + bne 1f > + STXR w(tmp1), s(1), [x2] > + cbnz w(tmp1), 0b > +1: ret > + > +#else > +#define LDXP glue3(ld, A, xp) > +#define STXP glue3(st, L, xp) > +#define CASP glue3(casp, A, L) > + > + CASP x0, x1, x2, x3, [x4] > + ret > + > +8: mov x(tmp0), x0 > + mov x(tmp1), x1 > +0: LDXP x0, x1, [x4] > + cmp x0, x(tmp0) > + ccmp x1, x(tmp1), #0, eq > + bne 1f > + STXP w(tmp2), x(tmp0), x(tmp1), [x4] > + cbnz w(tmp2), 0b > +1: ret > + > +#endif > + > +ENDFN NAME(cas) > +#endif > + > +#ifdef L_swp > +#define SWP glue4(swp, A, L, S) > + > +STARTFN NAME(swp) > + JUMP_IF_NOT_LSE 8f > + > + SWP s(0), s(0), [x1] > + ret > + > +8: mov s(tmp0), s(0) > +0: LDXR s(0), [x1] > + STXR w(tmp1), s(tmp0), [x1] > + cbnz w(tmp1), 0b > + ret > + > +ENDFN NAME(swp) > +#endif > + > +#if defined(L_ldadd) || defined(L_ldclr) \ > + || defined(L_ldeor) || defined(L_ldset) > + > +#ifdef L_ldadd > +#define LDNM ldadd > +#define OP add > +#elif defined(L_ldclr) > +#define LDNM ldclr > +#define OP bic > +#elif defined(L_ldeor) > +#define LDNM ldeor > +#define OP eor > +#elif defined(L_ldset) > +#define LDNM ldset > +#define OP orr > +#else > +#error > +#endif > +#define LDOP glue4(LDNM, A, L, S) > + > +STARTFN NAME(LDNM) > + JUMP_IF_NOT_LSE 8f > + > + LDOP s(0), s(0), [x1] > + ret > + > +8: mov s(tmp0), s(0) > +0: LDXR s(0), [x1] > + OP s(tmp1), s(0), s(tmp0) > + STXR w(tmp1), s(tmp1), [x1] > + cbnz w(tmp1), 0b > + ret > + > +ENDFN NAME(LDNM) > +#endif > diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse > new file mode 100644 > index 00000000000..c7f4223cd45 > --- /dev/null > +++ b/libgcc/config/aarch64/t-lse > @@ -0,0 +1,44 @@ > +# Out-of-line LSE atomics for AArch64 architecture. > +# Copyright (C) 2018 Free Software Foundation, Inc. > +# Contributed by Linaro Ltd. > +# > +# This file is part of GCC. > +# > +# GCC is free software; you can redistribute it and/or modify it > +# under the terms of the GNU General Public License as published by > +# the Free Software Foundation; either version 3, or (at your option) > +# any later version. > +# > +# GCC is distributed in the hope that it will be useful, but > +# WITHOUT ANY WARRANTY; without even the implied warranty of > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +# General Public License for more details. > +# > +# You should have received a copy of the GNU General Public License > +# along with GCC; see the file COPYING3. If not see > +# . > + > +# Compare-and-swap has 5 sizes and 4 memory models. > +S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas)) > +O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0))) > + > +# Swap, Load-and-operate have 4 sizes and 4 memory models > +S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset)) > +O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1))) > + > +LSE_OBJS := $(O0) $(O1) > + > +libgcc-objects += $(LSE_OBJS) lse-init$(objext) > + > +empty = > +space = $(empty) $(empty) > +PAT_SPLIT = $(subst _,$(space),$(*F)) > +PAT_BASE = $(word 1,$(PAT_SPLIT)) > +PAT_N = $(word 2,$(PAT_SPLIT)) > +PAT_M = $(word 3,$(PAT_SPLIT)) > + > +lse-init$(objext): $(srcdir)/config/aarch64/lse-init.c > + $(gcc_compile) -c $< > + > +$(LSE_OBJS): $(srcdir)/config/aarch64/lse.S > + $(gcc_compile) -DL_$(PAT_BASE) -DSIZE=$(PAT_N) -DMODEL=$(PAT_M) -c $<