From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1895) id AA67C3896C19; Tue, 15 Nov 2022 15:08:56 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org AA67C3896C19 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1668524936; bh=VY4rzNvrXFL6j8pa7lppdngCNGXciCBCznimWsgdX2c=; h=From:To:Subject:Date:From; b=cJ48mQwrNfTj/MQeV6mgjqe0vQA05RaA7fZDtdwIC6NcQVHY0nV6DT6nnRsWHGddo TlV9r4gAW1NTffcNnnIYTYGfGj5oMx62gVy+EEyZiensEhJglQ1sWvCWqB4WBPjCdM gKaedZoGeDB8Hb1c85oD8dOcuSvQZrSqTsfZeBeY= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Wilco Dijkstra To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-4064] libatomic: Add support for LSE and LSE2 X-Act-Checkin: gcc X-Git-Author: Wilco Dijkstra X-Git-Refname: refs/heads/master X-Git-Oldrev: 5925f0ec54ab5ed773935eec09a602f58fa0ca2c X-Git-Newrev: d1288d850944f69a795e4ff444a427eba3fec11b Message-Id: <20221115150856.AA67C3896C19@sourceware.org> Date: Tue, 15 Nov 2022 15:08:56 +0000 (GMT) List-Id: https://gcc.gnu.org/g:d1288d850944f69a795e4ff444a427eba3fec11b commit r13-4064-gd1288d850944f69a795e4ff444a427eba3fec11b Author: Wilco Dijkstra Date: Tue Nov 15 14:38:55 2022 +0000 libatomic: Add support for LSE and LSE2 Add support for AArch64 LSE and LSE2 to libatomic. Disable outline atomics, and use LSE ifuncs for 1-8 byte atomics and LSE2 ifuncs for 16-byte atomics. On Neoverse V1, 16-byte atomics are ~4x faster due to avoiding locks. Note this is safe since we swap all 16-byte atomics using the same ifunc, so they either use locks or LSE2 atomics, but never a mix. This also improves ABI compatibility with LLVM: its inlined 16-byte atomics are compatible with the new libatomic if LSE2 is supported. libatomic/ * Makefile.in: Regenerated with automake 1.15.1. * Makefile.am: Add atomic_16.S for AArch64. * configure.tgt: Disable outline atomics in AArch64 build. * config/linux/aarch64/atomic_16.S: New file - implementation of ifuncs for 16-byte atomics. * config/linux/aarch64/host-config.h: Enable ifuncs, use LSE (HWCAP_ATOMICS) for 1-8-byte atomics and LSE2 (HWCAP_USCAT) for 16-byte atomics. Diff: --- libatomic/Makefile.am | 2 + libatomic/Makefile.in | 62 +++- libatomic/config/linux/aarch64/atomic_16.S | 462 +++++++++++++++++++++++++++ libatomic/config/linux/aarch64/host-config.h | 18 +- libatomic/configure.tgt | 1 + 5 files changed, 527 insertions(+), 18 deletions(-) diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am index d88515e4a03..41e5da28512 100644 --- a/libatomic/Makefile.am +++ b/libatomic/Makefile.am @@ -127,6 +127,8 @@ if HAVE_IFUNC if ARCH_AARCH64_LINUX IFUNC_OPTIONS = -march=armv8-a+lse libatomic_la_LIBADD += $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS))) +libatomic_la_SOURCES += atomic_16.S + endif if ARCH_ARM_LINUX IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64 diff --git a/libatomic/Makefile.in b/libatomic/Makefile.in index 80d25653dc7..89e29fc60a7 100644 --- a/libatomic/Makefile.in +++ b/libatomic/Makefile.in @@ -90,13 +90,14 @@ build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_1 = $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS))) -@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = $(foreach \ +@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = atomic_16.S +@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(foreach \ @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ s,$(SIZES),$(addsuffix \ @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _$(s)_1_.lo,$(SIZEOBJS))) \ @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix \ @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _8_2_.lo,$(SIZEOBJS)) -@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(addsuffix _8_1_.lo,$(SIZEOBJS)) -@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \ +@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _8_1_.lo,$(SIZEOBJS)) +@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_5 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \ @ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix _16_2_.lo,$(SIZEOBJS)) subdir = . @@ -154,8 +155,11 @@ am__uninstall_files_from_dir = { \ } am__installdirs = "$(DESTDIR)$(toolexeclibdir)" LTLIBRARIES = $(noinst_LTLIBRARIES) $(toolexeclib_LTLIBRARIES) +@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__objects_1 = \ +@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@ atomic_16.lo am_libatomic_la_OBJECTS = gload.lo gstore.lo gcas.lo gexch.lo \ - glfree.lo lock.lo init.lo fenv.lo fence.lo flag.lo + glfree.lo lock.lo init.lo fenv.lo fence.lo flag.lo \ + $(am__objects_1) libatomic_la_OBJECTS = $(am_libatomic_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) @@ -165,9 +169,9 @@ libatomic_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(libatomic_la_LDFLAGS) $(LDFLAGS) -o $@ libatomic_convenience_la_DEPENDENCIES = $(libatomic_la_LIBADD) -am__objects_1 = gload.lo gstore.lo gcas.lo gexch.lo glfree.lo lock.lo \ - init.lo fenv.lo fence.lo flag.lo -am_libatomic_convenience_la_OBJECTS = $(am__objects_1) +am__objects_2 = gload.lo gstore.lo gcas.lo gexch.lo glfree.lo lock.lo \ + init.lo fenv.lo fence.lo flag.lo $(am__objects_1) +am_libatomic_convenience_la_OBJECTS = $(am__objects_2) libatomic_convenience_la_OBJECTS = \ $(am_libatomic_convenience_la_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) @@ -185,6 +189,16 @@ am__v_at_1 = depcomp = $(SHELL) $(top_srcdir)/../depcomp am__depfiles_maybe = depfiles am__mv = mv -f +CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) +LTCPPASCOMPILE = $(LIBTOOL) $(AM_V_lt) $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CCASFLAGS) $(CCASFLAGS) +AM_V_CPPAS = $(am__v_CPPAS_@AM_V@) +am__v_CPPAS_ = $(am__v_CPPAS_@AM_DEFAULT_V@) +am__v_CPPAS_0 = @echo " CPPAS " $@; +am__v_CPPAS_1 = COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ @@ -369,6 +383,7 @@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ +runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ @@ -404,9 +419,8 @@ noinst_LTLIBRARIES = libatomic_convenience.la @LIBAT_BUILD_VERSIONED_SHLIB_SUN_TRUE@@LIBAT_BUILD_VERSIONED_SHLIB_TRUE@libatomic_version_dep = libatomic.map-sun libatomic_version_info = -version-info $(libtool_VERSION) libatomic_la_LDFLAGS = $(libatomic_version_info) $(libatomic_version_script) $(lt_host_flags) -libatomic_la_SOURCES = gload.c gstore.c gcas.c gexch.c glfree.c lock.c init.c \ - fenv.c fence.c flag.c - +libatomic_la_SOURCES = gload.c gstore.c gcas.c gexch.c glfree.c lock.c \ + init.c fenv.c fence.c flag.c $(am__append_2) SIZEOBJS = load store cas exch fadd fsub fand fior fxor fnand tas EXTRA_libatomic_la_SOURCES = $(addsuffix _n.c,$(SIZEOBJS)) libatomic_la_DEPENDENCIES = $(libatomic_la_LIBADD) $(libatomic_version_dep) @@ -432,8 +446,8 @@ all_c_files := $(foreach dir,$(search_path),$(wildcard $(dir)/*.c)) # Then sort through them to find the one we want, and select the first. M_SRC = $(firstword $(filter %/$(M_FILE), $(all_c_files))) libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \ - _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_2) \ - $(am__append_3) $(am__append_4) + _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_3) \ + $(am__append_4) $(am__append_5) @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64 @ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586 @@ -450,7 +464,7 @@ all: auto-config.h $(MAKE) $(AM_MAKEFLAGS) all-recursive .SUFFIXES: -.SUFFIXES: .c .lo .o .obj +.SUFFIXES: .S .c .lo .o .obj am--refresh: Makefile @: $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/../multilib.am $(am__configure_deps) @@ -559,6 +573,7 @@ mostlyclean-compile: distclean-compile: -rm -f *.tab.c +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fence.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fenv.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/flag.Plo@am__quote@ @@ -570,6 +585,27 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/init.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@ +.S.o: +@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ $< + +.S.obj: +@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.S.lo: +@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(LTCPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(LTCPPASCOMPILE) -c -o $@ $< + .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S new file mode 100644 index 00000000000..bced7290dc1 --- /dev/null +++ b/libatomic/config/linux/aarch64/atomic_16.S @@ -0,0 +1,462 @@ +/* Copyright (C) 2022 Free Software Foundation, Inc. + + This file is part of the GNU Atomic Library (libatomic). + + Libatomic is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libatomic is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + + + .arch armv8-a+lse + +#define ENTRY(name) \ + .global name; \ + .hidden name; \ + .type name,%function; \ + .p2align 4; \ +name: \ + .cfi_startproc; \ + hint 34 // bti c + +#define END(name) \ + .cfi_endproc; \ + .size name, .-name; + +#define res0 x0 +#define res1 x1 +#define in0 x2 +#define in1 x3 +#define tmp0 x6 +#define tmp1 x7 +#define exp0 x8 +#define exp1 x9 + +#ifdef __AARCH64EB__ +# define reslo x1 +# define reshi x0 +# define inlo x3 +# define inhi x2 +# define tmplo x7 +# define tmphi x6 +#else +# define reslo x0 +# define reshi x1 +# define inlo x2 +# define inhi x3 +# define tmplo x6 +# define tmphi x7 +#endif + +#define RELAXED 0 +#define CONSUME 1 +#define ACQUIRE 2 +#define RELEASE 3 +#define ACQ_REL 4 +#define SEQ_CST 5 + + +ENTRY (libat_load_16_i1) + cbnz w1, 1f + ldp res0, res1, [x0] + ret +1: + cmp w1, ACQUIRE + b.hi 2f + ldp res0, res1, [x0] + dmb ishld + ret +2: + ldp res0, res1, [x0] + dmb ish + ret +END (libat_load_16_i1) + + +ENTRY (libat_store_16_i1) + cbnz w4, 1f + stp in0, in1, [x0] + ret +1: + dmb ish + stp in0, in1, [x0] + cmp w4, SEQ_CST + beq 2f + ret +2: + dmb ish + ret +END (libat_store_16_i1) + + +ENTRY (libat_exchange_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + stxp w4, in0, in1, [x5] + cbnz w4, 1b + ret +2: + cmp w4, ACQUIRE + b.hi 4f +3: + ldaxp res0, res1, [x5] + stxp w4, in0, in1, [x5] + cbnz w4, 3b + ret +4: + cmp w4, RELEASE + b.ne 6f +5: + ldxp res0, res1, [x5] + stlxp w4, in0, in1, [x5] + cbnz w4, 5b + ret +6: + ldaxp res0, res1, [x5] + stlxp w4, in0, in1, [x5] + cbnz w4, 6b + ret +END (libat_exchange_16_i1) + + +ENTRY (libat_compare_exchange_16_i1) + ldp exp0, exp1, [x1] + mov tmp0, exp0 + mov tmp1, exp1 + cbz w4, 2f + cmp w4, RELEASE + b.hs 3f + caspa exp0, exp1, in0, in1, [x0] +0: + cmp exp0, tmp0 + ccmp exp1, tmp1, 0, eq + bne 1f + mov x0, 1 + ret +1: + stp exp0, exp1, [x1] + mov x0, 0 + ret +2: + casp exp0, exp1, in0, in1, [x0] + b 0b +3: + b.hi 4f + caspl exp0, exp1, in0, in1, [x0] + b 0b +4: + caspal exp0, exp1, in0, in1, [x0] + b 0b +END (libat_compare_exchange_16_i1) + + +ENTRY (libat_fetch_add_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + adds tmplo, reslo, inlo + adc tmphi, reshi, inhi + stxp w4, tmp0, tmp1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + adds tmplo, reslo, inlo + adc tmphi, reshi, inhi + stlxp w4, tmp0, tmp1, [x5] + cbnz w4, 2b + ret +END (libat_fetch_add_16_i1) + + +ENTRY (libat_add_fetch_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + adds reslo, reslo, inlo + adc reshi, reshi, inhi + stxp w4, res0, res1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + adds reslo, reslo, inlo + adc reshi, reshi, inhi + stlxp w4, res0, res1, [x5] + cbnz w4, 2b + ret +END (libat_add_fetch_16_i1) + + +ENTRY (libat_fetch_sub_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + subs tmplo, reslo, inlo + sbc tmphi, reshi, inhi + stxp w4, tmp0, tmp1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + subs tmplo, reslo, inlo + sbc tmphi, reshi, inhi + stlxp w4, tmp0, tmp1, [x5] + cbnz w4, 2b + ret +END (libat_fetch_sub_16_i1) + + +ENTRY (libat_sub_fetch_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + subs reslo, reslo, inlo + sbc reshi, reshi, inhi + stxp w4, res0, res1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + subs reslo, reslo, inlo + sbc reshi, reshi, inhi + stlxp w4, res0, res1, [x5] + cbnz w4, 2b + ret +END (libat_sub_fetch_16_i1) + + +ENTRY (libat_fetch_or_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + orr tmp0, res0, in0 + orr tmp1, res1, in1 + stxp w4, tmp0, tmp1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + orr tmp0, res0, in0 + orr tmp1, res1, in1 + stlxp w4, tmp0, tmp1, [x5] + cbnz w4, 2b + ret +END (libat_fetch_or_16_i1) + + +ENTRY (libat_or_fetch_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + orr res0, res0, in0 + orr res1, res1, in1 + stxp w4, res0, res1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + orr res0, res0, in0 + orr res1, res1, in1 + stlxp w4, res0, res1, [x5] + cbnz w4, 2b + ret +END (libat_or_fetch_16_i1) + + +ENTRY (libat_fetch_and_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + and tmp0, res0, in0 + and tmp1, res1, in1 + stxp w4, tmp0, tmp1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + and tmp0, res0, in0 + and tmp1, res1, in1 + stlxp w4, tmp0, tmp1, [x5] + cbnz w4, 2b + ret +END (libat_fetch_and_16_i1) + + +ENTRY (libat_and_fetch_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + and res0, res0, in0 + and res1, res1, in1 + stxp w4, res0, res1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + and res0, res0, in0 + and res1, res1, in1 + stlxp w4, res0, res1, [x5] + cbnz w4, 2b + ret +END (libat_and_fetch_16_i1) + + +ENTRY (libat_fetch_xor_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + eor tmp0, res0, in0 + eor tmp1, res1, in1 + stxp w4, tmp0, tmp1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + eor tmp0, res0, in0 + eor tmp1, res1, in1 + stlxp w4, tmp0, tmp1, [x5] + cbnz w4, 2b + ret +END (libat_fetch_xor_16_i1) + + +ENTRY (libat_xor_fetch_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + eor res0, res0, in0 + eor res1, res1, in1 + stxp w4, res0, res1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + eor res0, res0, in0 + eor res1, res1, in1 + stlxp w4, res0, res1, [x5] + cbnz w4, 2b + ret +END (libat_xor_fetch_16_i1) + + +ENTRY (libat_fetch_nand_16_i1) + mov x5, x0 + mvn in0, in0 + mvn in1, in1 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + orn tmp0, in0, res0 + orn tmp1, in1, res1 + stxp w4, tmp0, tmp1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + orn tmp0, in0, res0 + orn tmp1, in1, res1 + stlxp w4, tmp0, tmp1, [x5] + cbnz w4, 2b + ret +END (libat_fetch_nand_16_i1) + + +ENTRY (libat_nand_fetch_16_i1) + mov x5, x0 + mvn in0, in0 + mvn in1, in1 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + orn res0, in0, res0 + orn res1, in1, res1 + stxp w4, res0, res1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + orn res0, in0, res0 + orn res1, in1, res1 + stlxp w4, res0, res1, [x5] + cbnz w4, 2b + ret +END (libat_nand_fetch_16_i1) + + +ENTRY (libat_test_and_set_16_i1) + mov w2, 1 + cbnz w1, 2f + swpb w0, w2, [x0] + ret + +2: swpalb w0, w2, [x0] + ret +END (libat_test_and_set_16_i1) + + +/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */ +#define FEATURE_1_AND 0xc0000000 +#define FEATURE_1_BTI 1 +#define FEATURE_1_PAC 2 + +/* Supported features based on the code generation options. */ +#if defined(__ARM_FEATURE_BTI_DEFAULT) +# define BTI_FLAG FEATURE_1_BTI +#else +# define BTI_FLAG 0 +#endif + +#if __ARM_FEATURE_PAC_DEFAULT & 3 +# define PAC_FLAG FEATURE_1_PAC +#else +# define PAC_FLAG 0 +#endif + +/* Add a NT_GNU_PROPERTY_TYPE_0 note. */ +#define GNU_PROPERTY(type, value) \ + .section .note.gnu.property, "a"; \ + .p2align 3; \ + .word 4; \ + .word 16; \ + .word 5; \ + .asciz "GNU"; \ + .word type; \ + .word 4; \ + .word value; \ + .word 0; + +#if defined(__linux__) || defined(__FreeBSD__) +.section .note.GNU-stack, "", %progbits + +/* Add GNU property note if built with branch protection. */ +# if (BTI_FLAG|PAC_FLAG) != 0 +GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG) +# endif +#endif diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h index 769ba6edc60..d9b5ab31bc8 100644 --- a/libatomic/config/linux/aarch64/host-config.h +++ b/libatomic/config/linux/aarch64/host-config.h @@ -22,14 +22,22 @@ . */ #if HAVE_IFUNC -#include +#include -# ifdef HWCAP_ATOMICS -# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS) +#ifdef HWCAP_USCAT +# if N == 16 +# define IFUNC_COND_1 (hwcap & HWCAP_USCAT) # else -# define IFUNC_COND_1 (false) +# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS) # endif -# define IFUNC_NCOND(N) (1) +#else +# define IFUNC_COND_1 (false) +#endif +#define IFUNC_NCOND(N) (1) + +#if N == 16 && IFUNC_ALT != 0 +# define DONE 1 +#endif #endif /* HAVE_IFUNC */ diff --git a/libatomic/configure.tgt b/libatomic/configure.tgt index 86a59475b6e..57f093603bb 100644 --- a/libatomic/configure.tgt +++ b/libatomic/configure.tgt @@ -49,6 +49,7 @@ case "${target_cpu}" in fi ;; esac + XCFLAGS="${XCFLAGS} -mno-outline-atomics" ;; arm*) ARCH=arm