public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-4064] libatomic: Add support for LSE and LSE2
@ 2022-11-15 15:08 Wilco Dijkstra
  0 siblings, 0 replies; only message in thread
From: Wilco Dijkstra @ 2022-11-15 15:08 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d1288d850944f69a795e4ff444a427eba3fec11b

commit r13-4064-gd1288d850944f69a795e4ff444a427eba3fec11b
Author: Wilco Dijkstra <wilco.dijkstra@arm.com>
Date:   Tue Nov 15 14:38:55 2022 +0000

    libatomic: Add support for LSE and LSE2
    
    Add support for AArch64 LSE and LSE2 to libatomic.  Disable outline atomics,
    and use LSE ifuncs for 1-8 byte atomics and LSE2 ifuncs for 16-byte atomics.
    On Neoverse V1, 16-byte atomics are ~4x faster due to avoiding locks.
    
    Note this is safe since we swap all 16-byte atomics using the same ifunc,
    so they either use locks or LSE2 atomics, but never a mix. This also improves
    ABI compatibility with LLVM: its inlined 16-byte atomics are compatible with
    the new libatomic if LSE2 is supported.
    
    libatomic/
            * Makefile.in: Regenerated with automake 1.15.1.
            * Makefile.am: Add atomic_16.S for AArch64.
            * configure.tgt: Disable outline atomics in AArch64 build.
            * config/linux/aarch64/atomic_16.S: New file - implementation of
            ifuncs for 16-byte atomics.
            * config/linux/aarch64/host-config.h: Enable ifuncs, use LSE
            (HWCAP_ATOMICS) for 1-8-byte atomics and LSE2 (HWCAP_USCAT) for
            16-byte atomics.

Diff:
---
 libatomic/Makefile.am                        |   2 +
 libatomic/Makefile.in                        |  62 +++-
 libatomic/config/linux/aarch64/atomic_16.S   | 462 +++++++++++++++++++++++++++
 libatomic/config/linux/aarch64/host-config.h |  18 +-
 libatomic/configure.tgt                      |   1 +
 5 files changed, 527 insertions(+), 18 deletions(-)

diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am
index d88515e4a03..41e5da28512 100644
--- a/libatomic/Makefile.am
+++ b/libatomic/Makefile.am
@@ -127,6 +127,8 @@ if HAVE_IFUNC
 if ARCH_AARCH64_LINUX
 IFUNC_OPTIONS	     = -march=armv8-a+lse
 libatomic_la_LIBADD += $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS)))
+libatomic_la_SOURCES += atomic_16.S
+
 endif
 if ARCH_ARM_LINUX
 IFUNC_OPTIONS	     = -march=armv7-a+fp -DHAVE_KERNEL64
diff --git a/libatomic/Makefile.in b/libatomic/Makefile.in
index 80d25653dc7..89e29fc60a7 100644
--- a/libatomic/Makefile.in
+++ b/libatomic/Makefile.in
@@ -90,13 +90,14 @@ build_triplet = @build@
 host_triplet = @host@
 target_triplet = @target@
 @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_1 = $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS)))
-@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = $(foreach \
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = atomic_16.S
+@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(foreach \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@	s,$(SIZES),$(addsuffix \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@	_$(s)_1_.lo,$(SIZEOBJS))) \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@	$(addsuffix \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@	_8_2_.lo,$(SIZEOBJS))
-@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
-@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
+@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_5 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
 @ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@		       $(addsuffix _16_2_.lo,$(SIZEOBJS))
 
 subdir = .
@@ -154,8 +155,11 @@ am__uninstall_files_from_dir = { \
   }
 am__installdirs = "$(DESTDIR)$(toolexeclibdir)"
 LTLIBRARIES = $(noinst_LTLIBRARIES) $(toolexeclib_LTLIBRARIES)
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__objects_1 =  \
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@	atomic_16.lo
 am_libatomic_la_OBJECTS = gload.lo gstore.lo gcas.lo gexch.lo \
-	glfree.lo lock.lo init.lo fenv.lo fence.lo flag.lo
+	glfree.lo lock.lo init.lo fenv.lo fence.lo flag.lo \
+	$(am__objects_1)
 libatomic_la_OBJECTS = $(am_libatomic_la_OBJECTS)
 AM_V_lt = $(am__v_lt_@AM_V@)
 am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
@@ -165,9 +169,9 @@ libatomic_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
 	$(libatomic_la_LDFLAGS) $(LDFLAGS) -o $@
 libatomic_convenience_la_DEPENDENCIES = $(libatomic_la_LIBADD)
-am__objects_1 = gload.lo gstore.lo gcas.lo gexch.lo glfree.lo lock.lo \
-	init.lo fenv.lo fence.lo flag.lo
-am_libatomic_convenience_la_OBJECTS = $(am__objects_1)
+am__objects_2 = gload.lo gstore.lo gcas.lo gexch.lo glfree.lo lock.lo \
+	init.lo fenv.lo fence.lo flag.lo $(am__objects_1)
+am_libatomic_convenience_la_OBJECTS = $(am__objects_2)
 libatomic_convenience_la_OBJECTS =  \
 	$(am_libatomic_convenience_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
@@ -185,6 +189,16 @@ am__v_at_1 =
 depcomp = $(SHELL) $(top_srcdir)/../depcomp
 am__depfiles_maybe = depfiles
 am__mv = mv -f
+CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS)
+LTCPPASCOMPILE = $(LIBTOOL) $(AM_V_lt) $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CCASFLAGS) $(CCASFLAGS)
+AM_V_CPPAS = $(am__v_CPPAS_@AM_V@)
+am__v_CPPAS_ = $(am__v_CPPAS_@AM_DEFAULT_V@)
+am__v_CPPAS_0 = @echo "  CPPAS   " $@;
+am__v_CPPAS_1 = 
 COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
 	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
 LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
@@ -369,6 +383,7 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
+runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
@@ -404,9 +419,8 @@ noinst_LTLIBRARIES = libatomic_convenience.la
 @LIBAT_BUILD_VERSIONED_SHLIB_SUN_TRUE@@LIBAT_BUILD_VERSIONED_SHLIB_TRUE@libatomic_version_dep = libatomic.map-sun
 libatomic_version_info = -version-info $(libtool_VERSION)
 libatomic_la_LDFLAGS = $(libatomic_version_info) $(libatomic_version_script) $(lt_host_flags)
-libatomic_la_SOURCES = gload.c gstore.c gcas.c gexch.c glfree.c lock.c init.c \
-	fenv.c fence.c flag.c
-
+libatomic_la_SOURCES = gload.c gstore.c gcas.c gexch.c glfree.c lock.c \
+	init.c fenv.c fence.c flag.c $(am__append_2)
 SIZEOBJS = load store cas exch fadd fsub fand fior fxor fnand tas
 EXTRA_libatomic_la_SOURCES = $(addsuffix _n.c,$(SIZEOBJS))
 libatomic_la_DEPENDENCIES = $(libatomic_la_LIBADD) $(libatomic_version_dep)
@@ -432,8 +446,8 @@ all_c_files := $(foreach dir,$(search_path),$(wildcard $(dir)/*.c))
 # Then sort through them to find the one we want, and select the first.
 M_SRC = $(firstword $(filter %/$(M_FILE), $(all_c_files)))
 libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \
-	_$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_2) \
-	$(am__append_3) $(am__append_4)
+	_$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_3) \
+	$(am__append_4) $(am__append_5)
 @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64
 @ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586
@@ -450,7 +464,7 @@ all: auto-config.h
 	$(MAKE) $(AM_MAKEFLAGS) all-recursive
 
 .SUFFIXES:
-.SUFFIXES: .c .lo .o .obj
+.SUFFIXES: .S .c .lo .o .obj
 am--refresh: Makefile
 	@:
 $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/../multilib.am $(am__configure_deps)
@@ -559,6 +573,7 @@ mostlyclean-compile:
 distclean-compile:
 	-rm -f *.tab.c
 
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic_16.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fence.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fenv.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/flag.Plo@am__quote@
@@ -570,6 +585,27 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/init.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@
 
+.S.o:
+@am__fastdepCCAS_TRUE@	$(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCCAS_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ $<
+
+.S.obj:
+@am__fastdepCCAS_TRUE@	$(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCCAS_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.S.lo:
+@am__fastdepCCAS_TRUE@	$(AM_V_CPPAS)$(LTCPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCCAS_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS@am__nodep@)$(LTCPPASCOMPILE) -c -o $@ $<
+
 .c.o:
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S
new file mode 100644
index 00000000000..bced7290dc1
--- /dev/null
+++ b/libatomic/config/linux/aarch64/atomic_16.S
@@ -0,0 +1,462 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+
+   This file is part of the GNU Atomic Library (libatomic).
+
+   Libatomic is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libatomic is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+	.arch	armv8-a+lse
+
+#define ENTRY(name)		\
+	.global name;		\
+	.hidden name;		\
+	.type name,%function;	\
+	.p2align 4;		\
+name:				\
+	.cfi_startproc;		\
+	hint	34	// bti c
+
+#define END(name)		\
+	.cfi_endproc;		\
+	.size name, .-name;
+
+#define res0 x0
+#define res1 x1
+#define in0  x2
+#define in1  x3
+#define tmp0 x6
+#define tmp1 x7
+#define exp0 x8
+#define exp1 x9
+
+#ifdef __AARCH64EB__
+# define reslo x1
+# define reshi x0
+# define inlo  x3
+# define inhi  x2
+# define tmplo x7
+# define tmphi x6
+#else
+# define reslo x0
+# define reshi x1
+# define inlo  x2
+# define inhi  x3
+# define tmplo x6
+# define tmphi x7
+#endif
+
+#define RELAXED 0
+#define CONSUME 1
+#define ACQUIRE 2
+#define RELEASE 3
+#define ACQ_REL 4
+#define SEQ_CST 5
+
+
+ENTRY (libat_load_16_i1)
+	cbnz	w1, 1f
+	ldp	res0, res1, [x0]
+	ret
+1:
+	cmp	w1, ACQUIRE
+	b.hi	2f
+	ldp	res0, res1, [x0]
+	dmb	ishld
+	ret
+2:
+	ldp	res0, res1, [x0]
+	dmb	ish
+	ret
+END (libat_load_16_i1)
+
+
+ENTRY (libat_store_16_i1)
+	cbnz	w4, 1f
+	stp	in0, in1, [x0]
+	ret
+1:
+	dmb	ish
+	stp	in0, in1, [x0]
+	cmp	w4, SEQ_CST
+	beq	2f
+	ret
+2:
+	dmb	ish
+	ret
+END (libat_store_16_i1)
+
+
+ENTRY (libat_exchange_16_i1)
+	mov	x5, x0
+	cbnz	w4, 2f
+1:
+	ldxp	res0, res1, [x5]
+	stxp	w4, in0, in1, [x5]
+	cbnz	w4, 1b
+	ret
+2:
+	cmp	w4, ACQUIRE
+	b.hi	4f
+3:
+	ldaxp	res0, res1, [x5]
+	stxp	w4, in0, in1, [x5]
+	cbnz	w4, 3b
+	ret
+4:
+	cmp	w4, RELEASE
+	b.ne	6f
+5:
+	ldxp	res0, res1, [x5]
+	stlxp	w4, in0, in1, [x5]
+	cbnz	w4, 5b
+	ret
+6:
+	ldaxp	res0, res1, [x5]
+	stlxp	w4, in0, in1, [x5]
+	cbnz	w4, 6b
+	ret
+END (libat_exchange_16_i1)
+
+
+ENTRY (libat_compare_exchange_16_i1)
+	ldp	exp0, exp1, [x1]
+	mov	tmp0, exp0
+	mov	tmp1, exp1
+	cbz	w4, 2f
+	cmp	w4, RELEASE
+	b.hs	3f
+	caspa	exp0, exp1, in0, in1, [x0]
+0:
+	cmp	exp0, tmp0
+	ccmp	exp1, tmp1, 0, eq
+	bne	1f
+	mov	x0, 1
+	ret
+1:
+	stp	exp0, exp1, [x1]
+	mov	x0, 0
+	ret
+2:
+	casp	exp0, exp1, in0, in1, [x0]
+	b	0b
+3:
+	b.hi	4f
+	caspl	exp0, exp1, in0, in1, [x0]
+	b	0b
+4:
+	caspal	exp0, exp1, in0, in1, [x0]
+	b	0b
+END (libat_compare_exchange_16_i1)
+
+
+ENTRY (libat_fetch_add_16_i1)
+	mov	x5, x0
+	cbnz	w4, 2f
+1:
+	ldxp	res0, res1, [x5]
+	adds	tmplo, reslo, inlo
+	adc	tmphi, reshi, inhi
+	stxp	w4, tmp0, tmp1, [x5]
+	cbnz	w4, 1b
+	ret
+2:
+	ldaxp	res0, res1, [x5]
+	adds	tmplo, reslo, inlo
+	adc	tmphi, reshi, inhi
+	stlxp	w4, tmp0, tmp1, [x5]
+	cbnz	w4, 2b
+	ret
+END (libat_fetch_add_16_i1)
+
+
+ENTRY (libat_add_fetch_16_i1)
+	mov	x5, x0
+	cbnz	w4, 2f
+1:
+	ldxp	res0, res1, [x5]
+	adds	reslo, reslo, inlo
+	adc	reshi, reshi, inhi
+	stxp	w4, res0, res1, [x5]
+	cbnz	w4, 1b
+	ret
+2:
+	ldaxp	res0, res1, [x5]
+	adds	reslo, reslo, inlo
+	adc	reshi, reshi, inhi
+	stlxp	w4, res0, res1, [x5]
+	cbnz	w4, 2b
+	ret
+END (libat_add_fetch_16_i1)
+
+
+ENTRY (libat_fetch_sub_16_i1)
+	mov	x5, x0
+	cbnz	w4, 2f
+1:
+	ldxp	res0, res1, [x5]
+	subs	tmplo, reslo, inlo
+	sbc	tmphi, reshi, inhi
+	stxp	w4, tmp0, tmp1, [x5]
+	cbnz	w4, 1b
+	ret
+2:
+	ldaxp	res0, res1, [x5]
+	subs	tmplo, reslo, inlo
+	sbc	tmphi, reshi, inhi
+	stlxp	w4, tmp0, tmp1, [x5]
+	cbnz	w4, 2b
+	ret
+END (libat_fetch_sub_16_i1)
+
+
+ENTRY (libat_sub_fetch_16_i1)
+	mov	x5, x0
+	cbnz	w4, 2f
+1:
+	ldxp	res0, res1, [x5]
+	subs	reslo, reslo, inlo
+	sbc	reshi, reshi, inhi
+	stxp	w4, res0, res1, [x5]
+	cbnz	w4, 1b
+	ret
+2:
+	ldaxp	res0, res1, [x5]
+	subs	reslo, reslo, inlo
+	sbc	reshi, reshi, inhi
+	stlxp	w4, res0, res1, [x5]
+	cbnz	w4, 2b
+	ret
+END (libat_sub_fetch_16_i1)
+
+
+ENTRY (libat_fetch_or_16_i1)
+	mov	x5, x0
+	cbnz	w4, 2f
+1:
+	ldxp	res0, res1, [x5]
+	orr	tmp0, res0, in0
+	orr	tmp1, res1, in1
+	stxp	w4, tmp0, tmp1, [x5]
+	cbnz	w4, 1b
+	ret
+2:
+	ldaxp	res0, res1, [x5]
+	orr	tmp0, res0, in0
+	orr	tmp1, res1, in1
+	stlxp	w4, tmp0, tmp1, [x5]
+	cbnz	w4, 2b
+	ret
+END (libat_fetch_or_16_i1)
+
+
+ENTRY (libat_or_fetch_16_i1)
+	mov	x5, x0
+	cbnz	w4, 2f
+1:
+	ldxp	res0, res1, [x5]
+	orr	res0, res0, in0
+	orr	res1, res1, in1
+	stxp	w4, res0, res1, [x5]
+	cbnz	w4, 1b
+	ret
+2:
+	ldaxp	res0, res1, [x5]
+	orr	res0, res0, in0
+	orr	res1, res1, in1
+	stlxp	w4, res0, res1, [x5]
+	cbnz	w4, 2b
+	ret
+END (libat_or_fetch_16_i1)
+
+
+ENTRY (libat_fetch_and_16_i1)
+	mov	x5, x0
+	cbnz	w4, 2f
+1:
+	ldxp	res0, res1, [x5]
+	and	tmp0, res0, in0
+	and	tmp1, res1, in1
+	stxp	w4, tmp0, tmp1, [x5]
+	cbnz	w4, 1b
+	ret
+2:
+	ldaxp	res0, res1, [x5]
+	and	tmp0, res0, in0
+	and	tmp1, res1, in1
+	stlxp	w4, tmp0, tmp1, [x5]
+	cbnz	w4, 2b
+	ret
+END (libat_fetch_and_16_i1)
+
+
+ENTRY (libat_and_fetch_16_i1)
+	mov	x5, x0
+	cbnz	w4, 2f
+1:
+	ldxp	res0, res1, [x5]
+	and	res0, res0, in0
+	and	res1, res1, in1
+	stxp	w4, res0, res1, [x5]
+	cbnz	w4, 1b
+	ret
+2:
+	ldaxp	res0, res1, [x5]
+	and	res0, res0, in0
+	and	res1, res1, in1
+	stlxp	w4, res0, res1, [x5]
+	cbnz	w4, 2b
+	ret
+END (libat_and_fetch_16_i1)
+
+
+ENTRY (libat_fetch_xor_16_i1)
+	mov	x5, x0
+	cbnz	w4, 2f
+1:
+	ldxp	res0, res1, [x5]
+	eor	tmp0, res0, in0
+	eor	tmp1, res1, in1
+	stxp	w4, tmp0, tmp1, [x5]
+	cbnz	w4, 1b
+	ret
+2:
+	ldaxp	res0, res1, [x5]
+	eor	tmp0, res0, in0
+	eor	tmp1, res1, in1
+	stlxp	w4, tmp0, tmp1, [x5]
+	cbnz	w4, 2b
+	ret
+END (libat_fetch_xor_16_i1)
+
+
+ENTRY (libat_xor_fetch_16_i1)
+	mov	x5, x0
+	cbnz	w4, 2f
+1:
+	ldxp	res0, res1, [x5]
+	eor	res0, res0, in0
+	eor	res1, res1, in1
+	stxp	w4, res0, res1, [x5]
+	cbnz	w4, 1b
+	ret
+2:
+	ldaxp	res0, res1, [x5]
+	eor	res0, res0, in0
+	eor	res1, res1, in1
+	stlxp	w4, res0, res1, [x5]
+	cbnz	w4, 2b
+	ret
+END (libat_xor_fetch_16_i1)
+
+
+ENTRY (libat_fetch_nand_16_i1)
+	mov	x5, x0
+	mvn	in0, in0
+	mvn	in1, in1
+	cbnz	w4, 2f
+1:
+	ldxp	res0, res1, [x5]
+	orn	tmp0, in0, res0
+	orn	tmp1, in1, res1
+	stxp	w4, tmp0, tmp1, [x5]
+	cbnz	w4, 1b
+	ret
+2:
+	ldaxp	res0, res1, [x5]
+	orn	tmp0, in0, res0
+	orn	tmp1, in1, res1
+	stlxp	w4, tmp0, tmp1, [x5]
+	cbnz	w4, 2b
+	ret
+END (libat_fetch_nand_16_i1)
+
+
+ENTRY (libat_nand_fetch_16_i1)
+	mov	x5, x0
+	mvn	in0, in0
+	mvn	in1, in1
+	cbnz	w4, 2f
+1:
+	ldxp	res0, res1, [x5]
+	orn	res0, in0, res0
+	orn	res1, in1, res1
+	stxp	w4, res0, res1, [x5]
+	cbnz	w4, 1b
+	ret
+2:
+	ldaxp	res0, res1, [x5]
+	orn	res0, in0, res0
+	orn	res1, in1, res1
+	stlxp	w4, res0, res1, [x5]
+	cbnz	w4, 2b
+	ret
+END (libat_nand_fetch_16_i1)
+
+
+ENTRY (libat_test_and_set_16_i1)
+	mov	w2, 1
+	cbnz	w1, 2f
+	swpb	w0, w2, [x0]
+	ret
+
+2:	swpalb	w0, w2, [x0]
+	ret
+END (libat_test_and_set_16_i1)
+
+
+/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code.  */
+#define FEATURE_1_AND 0xc0000000
+#define FEATURE_1_BTI 1
+#define FEATURE_1_PAC 2
+
+/* Supported features based on the code generation options.  */
+#if defined(__ARM_FEATURE_BTI_DEFAULT)
+# define BTI_FLAG FEATURE_1_BTI
+#else
+# define BTI_FLAG 0
+#endif
+
+#if __ARM_FEATURE_PAC_DEFAULT & 3
+# define PAC_FLAG FEATURE_1_PAC
+#else
+# define PAC_FLAG 0
+#endif
+
+/* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
+#define GNU_PROPERTY(type, value)	\
+  .section .note.gnu.property, "a";     \
+  .p2align 3;				\
+  .word 4;				\
+  .word 16;				\
+  .word 5;				\
+  .asciz "GNU";				\
+  .word type;				\
+  .word 4;				\
+  .word value;				\
+  .word 0;
+
+#if defined(__linux__) || defined(__FreeBSD__)
+.section .note.GNU-stack, "", %progbits
+
+/* Add GNU property note if built with branch protection.  */
+# if (BTI_FLAG|PAC_FLAG) != 0
+GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
+# endif
+#endif
diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h
index 769ba6edc60..d9b5ab31bc8 100644
--- a/libatomic/config/linux/aarch64/host-config.h
+++ b/libatomic/config/linux/aarch64/host-config.h
@@ -22,14 +22,22 @@
    <http://www.gnu.org/licenses/>.  */
 
 #if HAVE_IFUNC
-#include <stdlib.h>
+#include <sys/auxv.h>
 
-# ifdef HWCAP_ATOMICS
-#  define IFUNC_COND_1	(hwcap & HWCAP_ATOMICS)
+#ifdef HWCAP_USCAT
+# if N == 16
+#  define IFUNC_COND_1	(hwcap & HWCAP_USCAT)
 # else
-#  define IFUNC_COND_1	(false)
+#  define IFUNC_COND_1	(hwcap & HWCAP_ATOMICS)
 # endif
-# define IFUNC_NCOND(N)	(1)
+#else
+#  define IFUNC_COND_1	(false)
+#endif
+#define IFUNC_NCOND(N)	(1)
+
+#if N == 16 && IFUNC_ALT != 0
+# define DONE 1
+#endif
 
 #endif /* HAVE_IFUNC */
 
diff --git a/libatomic/configure.tgt b/libatomic/configure.tgt
index 86a59475b6e..57f093603bb 100644
--- a/libatomic/configure.tgt
+++ b/libatomic/configure.tgt
@@ -49,6 +49,7 @@ case "${target_cpu}" in
 		fi
 		;;
 	esac
+	XCFLAGS="${XCFLAGS} -mno-outline-atomics"
 	;;
   arm*)
 	ARCH=arm

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-11-15 15:08 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-15 15:08 [gcc r13-4064] libatomic: Add support for LSE and LSE2 Wilco Dijkstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).