public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] libatomic: Add rcpc3 128-bit atomic operations for AArch64
@ 2023-11-13 11:47 Victor Do Nascimento
  2023-12-08 14:59 ` Szabolcs Nagy
  0 siblings, 1 reply; 3+ messages in thread
From: Victor Do Nascimento @ 2023-11-13 11:47 UTC (permalink / raw)
  To: gcc-patches
  Cc: kyrylo.tkachov, richard.sandiford, Richard.Earnshaw,
	Victor Do Nascimento

Continuing on from previously-proposed Libatomic enablement work [1],
the introduction of the optional RCPC3 architectural extension for
Armv8.2-A upwards provides additional support for the release
consistency model, introducing both the Load-Acquire RCpc Pair
Ordered, and Store-Release Pair Ordered operations in the form of 
LDIAPP and STILP.

These operations single-copy atomic on cores which also implement
LSE2 and, as such, support for these operations is added to Libatomic
and employed accordingly when the LSE2 and RCPC3 features are detected
in a given core at runtime.

The possibility that a core implements (beyond LSE & LSE2) both the
LSE128 and RCPC3 features has also required that support for up to 4
ifuncs (up from 3 before) be added, so that the lse128+rcpc option is
available for selection at runtime.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636287.html

libatomic/ChangeLog:

	  * libatomic_i.h (GEN_SELECTOR): define for
	  IFUNC_NCOND(N) == 4.
	  * configure.ac: Add call to LIBAT_TEST_FEAT_LRCPC3() test.
	  * configure: Regenerate.
	  * config/linux/aarch64/host-config.h (HAS_LRCPC3): New.
	  (has_rcpc3): Likewise.
	  * config/linux/aarch64/atomic_16.S (libat_load_16): Add
	  LRCPC3 variant.
	  (libat_store_16): Likewise.
	  * acinclude.m4 (LIBAT_TEST_FEAT_LRCPC3): New.
	  (HAVE_FEAT_LRCPC3): Likewise
	  (ARCH_AARCH64_HAVE_LRCPC3): Likewise.
	  * Makefile.am (AM_CPPFLAGS): Conditionally append
	  -DHAVE_FEAT_LRCPC3 flag.
---
 libatomic/Makefile.am                        |  6 +-
 libatomic/Makefile.in                        | 22 +++--
 libatomic/acinclude.m4                       | 19 ++++
 libatomic/auto-config.h.in                   |  3 +
 libatomic/config/linux/aarch64/atomic_16.S   | 94 +++++++++++++++++++-
 libatomic/config/linux/aarch64/host-config.h | 26 +++++-
 libatomic/configure                          | 59 +++++++++++-
 libatomic/configure.ac                       |  1 +
 libatomic/libatomic_i.h                      | 18 ++++
 9 files changed, 230 insertions(+), 18 deletions(-)

diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am
index 24e843db67d..dee38e46af9 100644
--- a/libatomic/Makefile.am
+++ b/libatomic/Makefile.am
@@ -130,8 +130,12 @@ libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix _$(s)_.lo,$(SIZEOBJS)))
 ## On a target-specific basis, include alternates to be selected by IFUNC.
 if HAVE_IFUNC
 if ARCH_AARCH64_LINUX
+AM_CPPFLAGS	      =
 if ARCH_AARCH64_HAVE_LSE128
-AM_CPPFLAGS	     = -DHAVE_FEAT_LSE128
+AM_CPPFLAGS	     += -DHAVE_FEAT_LSE128
+endif
+if ARCH_AARCH64_HAVE_LRCPC3
+AM_CPPFLAGS	    += -DHAVE_FEAT_LRCPC3
 endif
 IFUNC_OPTIONS	     = -march=armv8-a+lse
 libatomic_la_LIBADD += $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS)))
diff --git a/libatomic/Makefile.in b/libatomic/Makefile.in
index cd48fa21334..8e87d12907a 100644
--- a/libatomic/Makefile.in
+++ b/libatomic/Makefile.in
@@ -89,15 +89,17 @@ POST_UNINSTALL = :
 build_triplet = @build@
 host_triplet = @host@
 target_triplet = @target@
-@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_1 = $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS)))
-@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = atomic_16.S
-@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(foreach \
+@ARCH_AARCH64_HAVE_LSE128_TRUE@@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_1 = -DHAVE_FEAT_LSE128
+@ARCH_AARCH64_HAVE_LRCPC3_TRUE@@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = -DHAVE_FEAT_LRCPC3
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS)))
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = atomic_16.S
+@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_5 = $(foreach \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@	s,$(SIZES),$(addsuffix \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@	_$(s)_1_.lo,$(SIZEOBJS))) \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@	$(addsuffix \
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@	_8_2_.lo,$(SIZEOBJS))
-@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
-@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_5 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
+@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_6 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_7 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
 @ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@		       $(addsuffix _16_2_.lo,$(SIZEOBJS))
 
 subdir = .
@@ -424,7 +426,7 @@ libatomic_la_LDFLAGS = $(libatomic_version_info) $(libatomic_version_script) \
 	$(lt_host_flags) $(libatomic_darwin_rpath)
 
 libatomic_la_SOURCES = gload.c gstore.c gcas.c gexch.c glfree.c lock.c \
-	init.c fenv.c fence.c flag.c $(am__append_2)
+	init.c fenv.c fence.c flag.c $(am__append_4)
 SIZEOBJS = load store cas exch fadd fsub fand fior fxor fnand tas
 EXTRA_libatomic_la_SOURCES = $(addsuffix _n.c,$(SIZEOBJS))
 libatomic_la_DEPENDENCIES = $(libatomic_la_LIBADD) $(libatomic_version_dep)
@@ -450,9 +452,11 @@ all_c_files := $(foreach dir,$(search_path),$(wildcard $(dir)/*.c))
 # Then sort through them to find the one we want, and select the first.
 M_SRC = $(firstword $(filter %/$(M_FILE), $(all_c_files)))
 libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \
-	_$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_3) \
-	$(am__append_4) $(am__append_5)
-@ARCH_AARCH64_HAVE_LSE128_TRUE@@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@AM_CPPFLAGS = -DHAVE_FEAT_LSE128
+	_$(s)_.lo,$(SIZEOBJS))) $(am__append_3) $(am__append_5) \
+	$(am__append_6) $(am__append_7)
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@AM_CPPFLAGS =  \
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@	$(am__append_1) \
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@	$(am__append_2)
 @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse
 @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64
 @ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586
diff --git a/libatomic/acinclude.m4 b/libatomic/acinclude.m4
index 4197db8f404..ba8671ebb8e 100644
--- a/libatomic/acinclude.m4
+++ b/libatomic/acinclude.m4
@@ -102,6 +102,25 @@ AC_DEFUN([LIBAT_TEST_FEAT_LSE128],[
   AM_CONDITIONAL([ARCH_AARCH64_HAVE_LSE128], [test x$libat_cv_have_feat_lse128 = xyes])
 ])
 
+dnl
+dnl Test if the host assembler supports armv8.2-a RCPC3 isns.
+dnl
+AC_DEFUN([LIBAT_TEST_FEAT_LRCPC3],[
+  AC_CACHE_CHECK([for armv8.2-a LRCPC3 insn support],
+    [libat_cv_have_feat_lrcpc3],[
+    AC_LANG_CONFTEST([AC_LANG_PROGRAM([],[asm(".arch armv8.2-a+rcpc3")])])
+    if AC_TRY_EVAL(ac_link); then
+      eval libat_cv_have_feat_lrcpc3=yes
+    else
+      eval libat_cv_have_feat_lrcpc3=no
+    fi
+    rm -f conftest*
+  ])
+  LIBAT_DEFINE_YESNO([HAVE_FEAT_LRCPC3], [$libat_cv_have_feat_lrcpc3],
+	[Have LRCPC3 support for 16 byte integers.])
+  AM_CONDITIONAL([ARCH_AARCH64_HAVE_LRCPC3], [test x$libat_cv_have_feat_lrcpc3 = xyes])
+])
+
 dnl
 dnl Test if we have __atomic_load and __atomic_store for mode $1, size $2
 dnl
diff --git a/libatomic/auto-config.h.in b/libatomic/auto-config.h.in
index 7c78933b07d..26d56e7da67 100644
--- a/libatomic/auto-config.h.in
+++ b/libatomic/auto-config.h.in
@@ -105,6 +105,9 @@
 /* Define to 1 if you have the <dlfcn.h> header file. */
 #undef HAVE_DLFCN_H
 
+/* Have LRCPC3 support for 16 byte integers. */
+#undef HAVE_FEAT_LRCPC3
+
 /* Have LSE128 support for 16 byte integers. */
 #undef HAVE_FEAT_LSE128
 
diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S
index 44a773031f8..31608663c6a 100644
--- a/libatomic/config/linux/aarch64/atomic_16.S
+++ b/libatomic/config/linux/aarch64/atomic_16.S
@@ -38,7 +38,13 @@
    The libat_<op>_16_i2 entry points are used when LSE2 is available.  */
 
 #if HAVE_FEAT_LSE128
+# if HAVE_FEAT_LRCPC3
+	.arch	armv8-a+lse128+rcpc3
+# else
 	.arch	armv8-a+lse128
+# endif
+#elif HAVE_FEAT_LRCPC3
+	.arch	armv8-a+lse+rcpc3
 #else
 	.arch	armv8-a+lse
 #endif
@@ -70,8 +76,10 @@ name##feat:				\
 	.set alias##from, alias##to;
 
 #define CORE
-#define LSE128	_i1
-#define LSE2	_i2
+#define LSE128_LRCPC3	_i1
+#define LSE128		_i2
+#define LRCPC3		_i3
+#define LSE2		_i4
 
 #define res0 x0
 #define res1 x1
@@ -124,6 +132,29 @@ ENTRY (libat_load_16, CORE)
 END (libat_load_16, CORE)
 
 
+#if HAVE_FEAT_LRCPC3
+ENTRY (libat_load_16, LRCPC3)
+	cbnz	w1, 1f
+
+	/* RELAXED.  */
+	ldp	res0, res1, [x0]
+	ret
+1:
+	cmp	w1, SEQ_CST
+	b.eq	2f
+
+	/* ACQUIRE/CONSUME (Load-AcquirePC semantics).  */
+	ldiapp	res0, res1, [x0]
+	ret
+
+	/* SEQ_CST.  */
+2:	ldar	tmp0, [x0]	/* Block reordering with Store-Release instr.  */
+	ldiapp	res0, res1, [x0]
+	ret
+END (libat_load_16, LRCPC3)
+#endif
+
+
 ENTRY (libat_load_16, LSE2)
 	cbnz	w1, 1f
 
@@ -164,6 +195,21 @@ ENTRY (libat_store_16, CORE)
 END (libat_store_16, CORE)
 
 
+#if HAVE_FEAT_LRCPC3
+ENTRY (libat_store_16, LRCPC3)
+	cbnz	w4, 1f
+
+	/* RELAXED.  */
+	stp	in0, in1, [x0]
+	ret
+
+	/* RELEASE/SEQ_CST.  */
+1:	stilp	in0, in1, [x0]
+	ret
+END (libat_store_16, LRCPC3)
+#endif
+
+
 ENTRY (libat_store_16, LSE2)
 	cbnz	w4, 1f
 
@@ -702,6 +748,27 @@ ENTRY (libat_test_and_set_16, CORE)
 END (libat_test_and_set_16, CORE)
 
 
+/* Alias all LSE128_LRCPC3 ifuncs to their specific implementations,
+   that is, map it to LSE128, LRCPC or CORE as appropriate.  */
+
+ALIAS (libat_exchange_16, LSE128_LRCPC3, LSE128)
+ALIAS (libat_fetch_or_16, LSE128_LRCPC3, LSE128)
+ALIAS (libat_fetch_and_16, LSE128_LRCPC3, LSE128)
+ALIAS (libat_or_fetch_16, LSE128_LRCPC3, LSE128)
+ALIAS (libat_and_fetch_16, LSE128_LRCPC3, LSE128)
+ALIAS (libat_load_16, LSE128_LRCPC3, LRCPC3)
+ALIAS (libat_store_16, LSE128_LRCPC3, LRCPC3)
+ALIAS (libat_compare_exchange_16, LSE128_LRCPC3, LSE2)
+ALIAS (libat_fetch_add_16, LSE128_LRCPC3, LSE2)
+ALIAS (libat_add_fetch_16, LSE128_LRCPC3, LSE2)
+ALIAS (libat_fetch_sub_16, LSE128_LRCPC3, LSE2)
+ALIAS (libat_sub_fetch_16, LSE128_LRCPC3, LSE2)
+ALIAS (libat_fetch_xor_16, LSE128_LRCPC3, LSE2)
+ALIAS (libat_xor_fetch_16, LSE128_LRCPC3, LSE2)
+ALIAS (libat_fetch_nand_16, LSE128_LRCPC3, LSE2)
+ALIAS (libat_nand_fetch_16, LSE128_LRCPC3, LSE2)
+ALIAS (libat_test_and_set_16, LSE128_LRCPC3, LSE2)
+
 /* Alias entry points which are the same in LSE2 and LSE128.  */
 
 #if !HAVE_FEAT_LSE128
@@ -724,6 +791,29 @@ ALIAS (libat_fetch_nand_16, LSE128, LSE2)
 ALIAS (libat_nand_fetch_16, LSE128, LSE2)
 ALIAS (libat_test_and_set_16, LSE128, LSE2)
 
+
+/* Alias entry points which are the same in LRCPC3 and LSE2.  */
+
+#if !HAVE_FEAT_LRCPC3
+ALIAS (libat_load_16, LRCPC3, LSE2)
+ALIAS (libat_store_16, LRCPC3, LSE2)
+#endif
+ALIAS (libat_exchange_16, LRCPC3, LSE2)
+ALIAS (libat_fetch_or_16, LRCPC3, LSE2)
+ALIAS (libat_fetch_and_16, LRCPC3, LSE2)
+ALIAS (libat_or_fetch_16, LRCPC3, LSE2)
+ALIAS (libat_and_fetch_16, LRCPC3, LSE2)
+ALIAS (libat_compare_exchange_16, LRCPC3, LSE2)
+ALIAS (libat_fetch_add_16, LRCPC3, LSE2)
+ALIAS (libat_add_fetch_16, LRCPC3, LSE2)
+ALIAS (libat_fetch_sub_16, LRCPC3, LSE2)
+ALIAS (libat_sub_fetch_16, LRCPC3, LSE2)
+ALIAS (libat_fetch_xor_16, LRCPC3, LSE2)
+ALIAS (libat_xor_fetch_16, LRCPC3, LSE2)
+ALIAS (libat_fetch_nand_16, LRCPC3, LSE2)
+ALIAS (libat_nand_fetch_16, LRCPC3, LSE2)
+ALIAS (libat_test_and_set_16, LRCPC3, LSE2)
+
 /* Alias entry points which are the same in baseline and LSE2.  */
 
 ALIAS (libat_exchange_16, LSE2, CORE)
diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h
index d873e91b1c9..445003217bf 100644
--- a/libatomic/config/linux/aarch64/host-config.h
+++ b/libatomic/config/linux/aarch64/host-config.h
@@ -26,11 +26,13 @@
 
 #ifdef HWCAP_USCAT
 # if N == 16
-#  define IFUNC_COND_1		(has_lse128 (hwcap))
-#  define IFUNC_COND_2		(has_lse2 (hwcap))
-#  define IFUNC_NCOND(N)	2
+#  define IFUNC_COND_1		(has_lse128 (hwcap) && has_rcpc3 (hwcap))
+#  define IFUNC_COND_2		(has_lse128 (hwcap))
+#  define IFUNC_COND_3		(has_rcpc3  (hwcap))
+#  define IFUNC_COND_4		(has_lse2   (hwcap))
+#  define IFUNC_NCOND(N)	4
 # else
-#  define IFUNC_COND_1		(hwcap & HWCAP_ATOMICS)
+#  define IFUNC_COND_1	(hwcap & HWCAP_ATOMICS)
 #  define IFUNC_NCOND(N)	1
 # endif
 #else
@@ -81,6 +83,14 @@ has_lse2 (unsigned long hwcap)
   (val & 0xf00000) >= 0x300000;					\
     })
 
+/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic,
+   bits[23:20].  The expected value is 0b0011.  Check that.  */
+#define HAS_LRCPC3() ({						\
+  unsigned long val;						\
+  asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (val));	\
+  (val & 0xf00000) >= 0x300000;					\
+    })
+
 static inline bool
 has_lse128 (unsigned long hwcap)
 {
@@ -88,6 +98,14 @@ has_lse128 (unsigned long hwcap)
     return true;
   return false;
 }
+
+static inline bool
+has_rcpc3 (unsigned long hwcap)
+{
+  if (has_lse2 (hwcap) && HAS_LRCPC3 ())
+    return true;
+  return false;
+}
 #endif
 
 #include_next <host-config.h>
diff --git a/libatomic/configure b/libatomic/configure
index ee3bbb97d69..248ac215dac 100755
--- a/libatomic/configure
+++ b/libatomic/configure
@@ -657,6 +657,8 @@ LIBAT_BUILD_VERSIONED_SHLIB_TRUE
 OPT_LDFLAGS
 SECTION_LDFLAGS
 SYSROOT_CFLAGS_FOR_TARGET
+ARCH_AARCH64_HAVE_LRCPC3_FALSE
+ARCH_AARCH64_HAVE_LRCPC3_TRUE
 ARCH_AARCH64_HAVE_LSE128_FALSE
 ARCH_AARCH64_HAVE_LSE128_TRUE
 enable_aarch64_lse
@@ -11458,7 +11460,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11461 "configure"
+#line 11463 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11564,7 +11566,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11567 "configure"
+#line 11569 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11977,6 +11979,55 @@ else
 fi
 
 
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for armv8.2-a LRCPC3 insn support" >&5
+$as_echo_n "checking for armv8.2-a LRCPC3 insn support... " >&6; }
+if ${libat_cv_have_feat_lrcpc3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+asm(".arch armv8.2-a+rcpc3")
+  ;
+  return 0;
+}
+_ACEOF
+    if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+      eval libat_cv_have_feat_lrcpc3=yes
+    else
+      eval libat_cv_have_feat_lrcpc3=no
+    fi
+    rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libat_cv_have_feat_lrcpc3" >&5
+$as_echo "$libat_cv_have_feat_lrcpc3" >&6; }
+
+  yesno=`echo $libat_cv_have_feat_lrcpc3 | tr 'yesno' '1  0 '`
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_FEAT_LRCPC3 $yesno
+_ACEOF
+
+
+   if test x$libat_cv_have_feat_lrcpc3 = xyes; then
+  ARCH_AARCH64_HAVE_LRCPC3_TRUE=
+  ARCH_AARCH64_HAVE_LRCPC3_FALSE='#'
+else
+  ARCH_AARCH64_HAVE_LRCPC3_TRUE='#'
+  ARCH_AARCH64_HAVE_LRCPC3_FALSE=
+fi
+
+
     ;;
 esac
 
@@ -16044,6 +16095,10 @@ if test -z "${ARCH_AARCH64_HAVE_LSE128_TRUE}" && test -z "${ARCH_AARCH64_HAVE_LS
   as_fn_error $? "conditional \"ARCH_AARCH64_HAVE_LSE128\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
 fi
+if test -z "${ARCH_AARCH64_HAVE_LRCPC3_TRUE}" && test -z "${ARCH_AARCH64_HAVE_LRCPC3_FALSE}"; then
+  as_fn_error $? "conditional \"ARCH_AARCH64_HAVE_LRCPC3\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
 
 if test -z "${LIBAT_BUILD_VERSIONED_SHLIB_TRUE}" && test -z "${LIBAT_BUILD_VERSIONED_SHLIB_FALSE}"; then
   as_fn_error $? "conditional \"LIBAT_BUILD_VERSIONED_SHLIB\" was never defined.
diff --git a/libatomic/configure.ac b/libatomic/configure.ac
index b2fe68d7d0f..77cec927f86 100644
--- a/libatomic/configure.ac
+++ b/libatomic/configure.ac
@@ -170,6 +170,7 @@ case "$target" in
  *aarch64*)
     ACX_PROG_CC_WARNING_OPTS([-march=armv8-a+lse],[enable_aarch64_lse])
     LIBAT_TEST_FEAT_LSE128()
+    LIBAT_TEST_FEAT_LRCPC3()
     ;;
 esac
 
diff --git a/libatomic/libatomic_i.h b/libatomic/libatomic_i.h
index 1b474e79bf0..605ddba81e3 100644
--- a/libatomic/libatomic_i.h
+++ b/libatomic/libatomic_i.h
@@ -275,6 +275,24 @@ bool libat_is_lock_free (size_t, void *) MAN(is_lock_free);
 	    return C3(libat_,X,_i3);				\
 	  return C2(libat_,X);					\
 	}
+# elif IFUNC_NCOND(N) == 4
+#  define GEN_SELECTOR(X)					\
+	extern typeof(C2(libat_,X)) C3(libat_,X,_i1) HIDDEN;	\
+	extern typeof(C2(libat_,X)) C3(libat_,X,_i2) HIDDEN;	\
+	extern typeof(C2(libat_,X)) C3(libat_,X,_i3) HIDDEN;	\
+	extern typeof(C2(libat_,X)) C3(libat_,X,_i4) HIDDEN;	\
+	static typeof(C2(libat_,X)) * C2(select_,X) (IFUNC_RESOLVER_ARGS) \
+	{							\
+	  if (IFUNC_COND_1)					\
+	    return C3(libat_,X,_i1);				\
+	  if (IFUNC_COND_2)					\
+	    return C3(libat_,X,_i2);				\
+	  if (IFUNC_COND_3)					\
+	    return C3(libat_,X,_i3);				\
+	  if (IFUNC_COND_4)					\
+	    return C3(libat_,X,_i4);				\
+	  return C2(libat_,X);					\
+	}
 # else
 #  error "Unsupported number of ifunc alternatives."
 # endif
-- 
2.42.0


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] libatomic: Add rcpc3 128-bit atomic operations for AArch64
  2023-11-13 11:47 [PATCH] libatomic: Add rcpc3 128-bit atomic operations for AArch64 Victor Do Nascimento
@ 2023-12-08 14:59 ` Szabolcs Nagy
  0 siblings, 0 replies; 3+ messages in thread
From: Szabolcs Nagy @ 2023-12-08 14:59 UTC (permalink / raw)
  To: Victor Do Nascimento, gcc-patches
  Cc: kyrylo.tkachov, richard.sandiford, Richard.Earnshaw

The 11/13/2023 11:47, Victor Do Nascimento wrote:
> +/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic,
> +   bits[23:20].  The expected value is 0b0011.  Check that.  */
> +#define HAS_LRCPC3() ({						\
> +  unsigned long val;						\
> +  asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (val));	\
> +  (val & 0xf00000) >= 0x300000;					\
> +    })

same comment as for the lse128 patch: use hwcaps
(and wait for linux release before committing).

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH] libatomic: Add rcpc3 128-bit atomic operations for AArch64
@ 2024-05-16 13:51 Victor Do Nascimento
  0 siblings, 0 replies; 3+ messages in thread
From: Victor Do Nascimento @ 2024-05-16 13:51 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.sandiford, Richard.Earnshaw, Victor Do Nascimento

The introduction of the optional RCPC3 architectural extension for
Armv8.2-A upwards provides additional support for the release
consistency model, introducing the Load-Acquire RCpc Pair Ordered, and
Store-Release Pair Ordered operations in the form of LDIAPP and STILP.

These operations are single-copy atomic on cores which also implement
LSE2 and, as such, support for these operations is added to Libatomic
and employed accordingly when the LSE2 and RCPC3 features are detected
in a given core at runtime.

libatomic/ChangeLog:

	* configure.ac: Add call to LIBAT_TEST_FEAT_LRCPC3() test.
	* configure: Regenerate.
	* config/linux/aarch64/host-config.h (has_rcpc3): New.
	(HWCAP2_LRCPC3): Likewise.
	(LSE2_LRCPC3_ATOP): Likewise.
	* libatomic/config/linux/aarch64/atomic_16.S: New +rcpc3 .arch
	directives.
	* config/linux/aarch64/atomic_16.S (libat_load_16): Add LRCPC3
	variant.
	(libat_store_16): Likewise.
	* acinclude.m4 (LIBAT_TEST_FEAT_AARCH64_LRCPC3): New.
	(HAVE_FEAT_LRCPC3): Likewise
	(ARCH_AARCH64_HAVE_LRCPC3): Likewise.
	* auto-config.h.in (HAVE_FEAT_LRCPC3): New.
---
 libatomic/acinclude.m4                       | 18 +++++++
 libatomic/auto-config.h.in                   |  3 ++
 libatomic/config/linux/aarch64/atomic_16.S   | 55 +++++++++++++++++++-
 libatomic/config/linux/aarch64/host-config.h | 39 ++++++++++++--
 libatomic/configure                          | 41 +++++++++++++++
 libatomic/configure.ac                       |  1 +
 6 files changed, 152 insertions(+), 5 deletions(-)

diff --git a/libatomic/acinclude.m4 b/libatomic/acinclude.m4
index 6d2e0b1c355..628275b9945 100644
--- a/libatomic/acinclude.m4
+++ b/libatomic/acinclude.m4
@@ -101,6 +101,24 @@ AC_DEFUN([LIBAT_TEST_FEAT_AARCH64_LSE128],[
 	[Have LSE128 support for 16 byte integers.])
 ])
 
+dnl
+dnl Test if the host assembler supports armv8.2-a RCPC3 isns.
+dnl
+AC_DEFUN([LIBAT_TEST_FEAT_AARCH64_LRCPC3],[
+  AC_CACHE_CHECK([for armv8.2-a LRCPC3 insn support],
+    [libat_cv_have_feat_lrcpc3],[
+    AC_LANG_CONFTEST([AC_LANG_PROGRAM([],[asm(".arch armv8.2-a+rcpc3")])])
+    if AC_TRY_EVAL(ac_link); then
+      eval libat_cv_have_feat_lrcpc3=yes
+    else
+      eval libat_cv_have_feat_lrcpc3=no
+    fi
+    rm -f conftest*
+  ])
+  LIBAT_DEFINE_YESNO([HAVE_FEAT_LRCPC3], [$libat_cv_have_feat_lrcpc3],
+	[Have LRCPC3 support for 16 byte integers.])
+])
+
 dnl
 dnl Test if we have __atomic_load and __atomic_store for mode $1, size $2
 dnl
diff --git a/libatomic/auto-config.h.in b/libatomic/auto-config.h.in
index 7c78933b07d..a925686effa 100644
--- a/libatomic/auto-config.h.in
+++ b/libatomic/auto-config.h.in
@@ -108,6 +108,9 @@
 /* Have LSE128 support for 16 byte integers. */
 #undef HAVE_FEAT_LSE128
 
+/* Have LRCPC3 support for 16 byte integers. */
+#undef HAVE_FEAT_LRCPC3
+
 /* Define to 1 if you have the <fenv.h> header file. */
 #undef HAVE_FENV_H
 
diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S
index 27363f82b75..47ceb7301c9 100644
--- a/libatomic/config/linux/aarch64/atomic_16.S
+++ b/libatomic/config/linux/aarch64/atomic_16.S
@@ -42,7 +42,13 @@
 
 #if HAVE_IFUNC
 # if HAVE_FEAT_LSE128
+#  if HAVE_FEAT_LRCPC3
+	.arch	armv9-a+lse128+rcpc3
+#  else
 	.arch	armv9-a+lse128
+#  endif
+# elif HAVE_FEAT_LRCPC3
+	.arch	armv8-a+lse+rcpc3
 # else
 	.arch	armv8-a+lse
 # endif
@@ -50,9 +56,20 @@
 	.arch	armv8-a+lse
 #endif
 
+/* There is overlap in some atomic instructions being implemented in both RCPC3
+   and LSE2 extensions, so both _i1 and _i2 suffixes are needed in such
+   situations.  Otherwise, all extension-specific implementations are mapped
+   to _i1.  */
+
+#if HAVE_FEAT_LRCPC3
+# define LRCPC3(NAME)	libat_##NAME##_i1
+# define LSE2(NAME)	libat_##NAME##_i2
+#else
+# define LSE2(NAME)	libat_##NAME##_i1
+#endif
+
 #define LSE128(NAME)	libat_##NAME##_i1
 #define LSE(NAME)	libat_##NAME##_i1
-#define LSE2(NAME)	libat_##NAME##_i1
 #define CORE(NAME)	libat_##NAME
 #define ATOMIC(NAME)	__atomic_##NAME
 
@@ -722,6 +739,42 @@ ENTRY_FEAT (and_fetch_16, LSE128)
 	ret
 END_FEAT (and_fetch_16, LSE128)
 #endif /* HAVE_FEAT_LSE128 */
+
+
+#if HAVE_FEAT_LRCPC3
+ENTRY_FEAT (load_16, LRCPC3)
+	cbnz	w1, 1f
+
+	/* RELAXED.  */
+	ldp	res0, res1, [x0]
+	ret
+1:
+	cmp	w1, SEQ_CST
+	b.eq	2f
+
+	/* ACQUIRE/CONSUME (Load-AcquirePC semantics).  */
+	ldiapp	res0, res1, [x0]
+	ret
+
+	/* SEQ_CST.  */
+2:	ldar	tmp0, [x0]	/* Block reordering with Store-Release instr.  */
+	ldiapp	res0, res1, [x0]
+	ret
+END_FEAT (load_16, LRCPC3)
+
+
+ENTRY_FEAT (store_16, LRCPC3)
+	cbnz	w4, 1f
+
+	/* RELAXED.  */
+	stp	in0, in1, [x0]
+	ret
+
+	/* RELEASE/SEQ_CST.  */
+1:	stilp	in0, in1, [x0]
+	ret
+END_FEAT (store_16, LRCPC3)
+#endif /* HAVE_FEAT_LRCPC3 */
 #endif /* HAVE_IFUNC */
 
 
diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h
index 6e010594a6c..dce472d26d1 100644
--- a/libatomic/config/linux/aarch64/host-config.h
+++ b/libatomic/config/linux/aarch64/host-config.h
@@ -33,6 +33,9 @@
 #ifndef HWCAP_USCAT
 # define HWCAP_USCAT	(1 << 25)
 #endif
+#ifndef HWCAP2_LRCPC3
+# define HWCAP2_LRCPC3	(1UL << 46)
+#endif
 #ifndef HWCAP2_LSE128
 # define HWCAP2_LSE128	(1UL << 47)
 #endif
@@ -54,7 +57,7 @@ typedef struct __ifunc_arg_t {
 #if defined (LAT_CAS_N)
 # define LSE_ATOP
 #elif defined (LAT_LOAD_N) || defined (LAT_STORE_N)
-# define LSE2_ATOP
+# define LSE2_LRCPC3_ATOP
 #elif defined (LAT_EXCH_N) || defined (LAT_FIOR_N) || defined (LAT_FAND_N)
 # define LSE128_ATOP
 #endif
@@ -63,9 +66,15 @@ typedef struct __ifunc_arg_t {
 #  if defined (LSE_ATOP)
 #   define IFUNC_NCOND(N)	1
 #   define IFUNC_COND_1	(hwcap & HWCAP_ATOMICS)
-#  elif defined (LSE2_ATOP)
-#   define IFUNC_NCOND(N)	1
-#   define IFUNC_COND_1	(has_lse2 (hwcap, features))
+#  elif defined (LSE2_LRCPC3_ATOP)
+#   if HAVE_FEAT_LRCPC3
+#    define IFUNC_NCOND(N)	2
+#    define IFUNC_COND_1	(has_rcpc3 (hwcap, features))
+#    define IFUNC_COND_2	(has_lse2  (hwcap, features))
+#   else
+#    define IFUNC_NCOND(N)	1
+#    define IFUNC_COND_1	(has_lse2 (hwcap, features))
+#   endif
 #  elif  HAVE_FEAT_LSE128 && defined (LSE128_ATOP)
 #   define IFUNC_NCOND(N)	1
 #   define IFUNC_COND_1	(has_lse128 (hwcap, features))
@@ -131,6 +140,28 @@ has_lse128 (unsigned long hwcap, const __ifunc_arg_t *features)
   return false;
 }
 
+/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic, bits[23:20].  The
+   expected value is 0b0011.  Check that.  */
+
+static inline bool
+has_rcpc3 (unsigned long hwcap, const __ifunc_arg_t *features)
+{
+  if (hwcap & _IFUNC_ARG_HWCAP
+      && features->_hwcap2 & HWCAP2_LRCPC3)
+    return true;
+  /* Try fallback feature check method to guarantee LRCPC3 is not implemented.
+
+     In the absence of HWCAP_CPUID, we are unable to check for RCPC3, return.
+     If feature check available, check LSE2 prerequisite before proceeding.  */
+  if (!(hwcap & HWCAP_CPUID)  || !(hwcap & HWCAP_USCAT))
+    return false;
+  unsigned long isar1;
+  asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (isar1));
+  if (AT_FEAT_FIELD (isar1) >= 3)
+    return true;
+  return false;
+}
+
 #endif /* HAVE_IFUNC */
 
 /* All 128-bit atomic functions are defined in aarch64/atomic_16.S.  */
diff --git a/libatomic/configure b/libatomic/configure
index 32cb3ecac26..ce3a1d9ce67 100755
--- a/libatomic/configure
+++ b/libatomic/configure
@@ -14740,6 +14740,47 @@ _ACEOF
 
 
 
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for armv8.2-a LRCPC3 insn support" >&5
+$as_echo_n "checking for armv8.2-a LRCPC3 insn support... " >&6; }
+if ${libat_cv_have_feat_lrcpc3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+asm(".arch armv8.2-a+rcpc3")
+  ;
+  return 0;
+}
+_ACEOF
+    if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+      eval libat_cv_have_feat_lrcpc3=yes
+    else
+      eval libat_cv_have_feat_lrcpc3=no
+    fi
+    rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libat_cv_have_feat_lrcpc3" >&5
+$as_echo "$libat_cv_have_feat_lrcpc3" >&6; }
+
+  yesno=`echo $libat_cv_have_feat_lrcpc3 | tr 'yesno' '1  0 '`
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_FEAT_LRCPC3 $yesno
+_ACEOF
+
+
+
+
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
 $as_echo_n "checking whether byte ordering is bigendian... " >&6; }
 if ${ac_cv_c_bigendian+:} false; then :
diff --git a/libatomic/configure.ac b/libatomic/configure.ac
index 85824fa7614..8fd20e183a6 100644
--- a/libatomic/configure.ac
+++ b/libatomic/configure.ac
@@ -208,6 +208,7 @@ LIBAT_FORALL_MODES([LIBAT_HAVE_ATOMIC_FETCH_OP])
 
 # Check for target-specific assembly-level support for atomic operations.
 LIBAT_TEST_FEAT_AARCH64_LSE128()
+LIBAT_TEST_FEAT_AARCH64_LRCPC3()
 
 AC_C_BIGENDIAN
 # I don't like the default behaviour of WORDS_BIGENDIAN undefined for LE.
-- 
2.34.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-05-16 13:51 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-13 11:47 [PATCH] libatomic: Add rcpc3 128-bit atomic operations for AArch64 Victor Do Nascimento
2023-12-08 14:59 ` Szabolcs Nagy
2024-05-16 13:51 Victor Do Nascimento

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).