public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-4139] aarch64: Fix up LDAPR codegen
@ 2022-11-18  9:08 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2022-11-18  9:08 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:33de7b37463fd3b846f76e86ed55aaa46870f92e

commit r13-4139-g33de7b37463fd3b846f76e86ed55aaa46870f92e
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Fri Nov 18 09:06:38 2022 +0000

    aarch64: Fix up LDAPR codegen
    
    Upon some further inspection I realised I had misunderstood some intricacies of the extending loads of the RCPC feature.
    This patch fixes up the recent GCC support accordingly. In particular:
    * The sign-extending forms are a form of LDAPURS* and are actually part of FEAT_RCPC2
    that is enabled with Armv8.4-a rather than the base Armv8.3-a FEAT_RCPC.
    The patch introduces a TARGET_RCPC2 macro and gates this combine pattern accordingly.
    * The assembly output for the zero-extending LDAPR instruction should always use %w formatting for its destination register.
    
    The testcase is split into zero-extending and sign-extending parts since they require different architecture pragmas.
    It's also straightforward to add the rest of the FEAT_RCPC2 codegen
    (with immediate offset addressing modes) but that can be done as a separate patch.
    Apologies for not catching this sooner, but it hasn't been in trunk long, so no harm done.
    
    Bootstrapped and tested on aarch64-none-linux-gnu.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64.h (TARGET_RCPC2): Define.
            * config/aarch64/atomics.md (*aarch64_atomic_load<ALLX:mode>_rcpc_zext):
            Adjust output template.
            (*aarch64_atomic_load<ALLX:mode>_rcpc_sex): Guard on TARGET_RCPC2.
            Adjust output template.
            * config/aarch64/iterators.md (w_sz): New mode attr.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/ldapr-ext.c: Rename to...
            * gcc.target/aarch64/ldapr-zext.c: ... This.  Fix expected assembly.
            * gcc.target/aarch64/ldapr-sext.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64.h                       |  4 ++
 gcc/config/aarch64/atomics.md                      |  6 +-
 gcc/config/aarch64/iterators.md                    |  4 ++
 gcc/testsuite/gcc.target/aarch64/ldapr-sext.c      | 67 ++++++++++++++++++++++
 .../aarch64/{ldapr-ext.c => ldapr-zext.c}          | 37 ++----------
 5 files changed, 83 insertions(+), 35 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 070466d9129..dcb1ecef71a 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -336,6 +336,10 @@ enum class aarch64_feature : unsigned char {
 /* RCPC loads from Armv8.3-a.  */
 #define TARGET_RCPC (AARCH64_ISA_RCPC)
 
+/* The RCPC2 extensions from Armv8.4-a that allow immediate offsets to LDAPR
+   and sign-extending versions.*/
+#define TARGET_RCPC2 (AARCH64_ISA_RCPC8_4)
+
 /* Apply the workaround for Cortex-A53 erratum 835769.  */
 #define TARGET_FIX_ERR_A53_835769	\
   ((aarch64_fix_a53_err835769 == 2)	\
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index 1805012c9e9..b6eac4e31fb 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -712,7 +712,7 @@
          (match_operand:SI 2 "const_int_operand")]			;; model
        UNSPECV_LDAP)))]
   "TARGET_RCPC && (<GPI:sizen> > <ALLX:sizen>)"
-  "ldapr<ALLX:atomic_sfx>\t%<GPI:w>0, %1"
+  "ldapr<ALLX:atomic_sfx>\t%w0, %1"
 )
 
 (define_insn "*aarch64_atomic_load<ALLX:mode>_rcpc_sext"
@@ -722,8 +722,8 @@
         [(match_operand:ALLX 1 "aarch64_sync_memory_operand" "Q")
          (match_operand:SI 2 "const_int_operand")]			;; model
        UNSPECV_LDAP)))]
-  "TARGET_RCPC && (<GPI:sizen> > <ALLX:sizen>)"
-  "ldaprs<ALLX:atomic_sfx>\t%<GPI:w>0, %1"
+  "TARGET_RCPC2 && (<GPI:sizen> > <ALLX:sizen>)"
+  "ldapurs<ALLX:size>\t%<ALLX:w_sx>0, %1"
 )
 
 (define_insn "atomic_store<mode>"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 7c7fcbbc24b..a3e40758e7b 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1012,6 +1012,10 @@
 ;; 32-bit version and "%x0" in the 64-bit version.
 (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
 
+;; Similar to w above, but used for sign-extending loads where we want to
+;; use %x0 for SImode.
+(define_mode_attr w_sx [(QI "w") (HI "w") (SI "x")])
+
 ;; The size of access, in bytes.
 (define_mode_attr ldst_sz [(SI "4") (DI "8")])
 ;; Likewise for load/store pair.
diff --git a/gcc/testsuite/gcc.target/aarch64/ldapr-sext.c b/gcc/testsuite/gcc.target/aarch64/ldapr-sext.c
new file mode 100644
index 00000000000..292c55d9e6a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldapr-sext.c
@@ -0,0 +1,67 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+#include <stdatomic.h>
+
+#pragma GCC target "arch=armv8.4-a"
+
+atomic_ullong u64;
+atomic_llong s64;
+atomic_uint u32;
+atomic_int s32;
+atomic_ushort u16;
+atomic_short s16;
+atomic_uchar u8;
+atomic_schar s8;
+
+#define TEST(name, ldsize, rettype)				\
+rettype								\
+test_##name (void)						\
+{								\
+  return atomic_load_explicit (&ldsize, memory_order_acquire);	\
+}
+
+/*
+**test_s8_s64:
+**...
+**	ldapursb	w0, \[x[0-9]+\]
+**	ret
+*/
+
+TEST(s8_s64, s8, long long)
+
+/*
+**test_s16_s64:
+**...
+**	ldapursh	w0, \[x[0-9]+\]
+**	ret
+*/
+
+TEST(s16_s64, s16, long long)
+
+/*
+**test_s32_s64:
+**...
+**	ldapursw	x0, \[x[0-9]+\]
+**	ret
+*/
+
+TEST(s32_s64, s32, long long)
+
+/*
+**test_s8_s32:
+**...
+**	ldapursb	w0, \[x[0-9]+\]
+**	ret
+*/
+
+TEST(s8_s32, s8, int)
+
+/*
+**test_s16_s32:
+**...
+**	ldapursh	w0, \[x[0-9]+\]
+**	ret
+*/
+
+TEST(s16_s32, s16, int)
diff --git a/gcc/testsuite/gcc.target/aarch64/ldapr-ext.c b/gcc/testsuite/gcc.target/aarch64/ldapr-zext.c
similarity index 67%
rename from gcc/testsuite/gcc.target/aarch64/ldapr-ext.c
rename to gcc/testsuite/gcc.target/aarch64/ldapr-zext.c
index aed27e06235..6f448eee1d9 100644
--- a/gcc/testsuite/gcc.target/aarch64/ldapr-ext.c
+++ b/gcc/testsuite/gcc.target/aarch64/ldapr-zext.c
@@ -24,38 +24,29 @@ test_##name (void)						\
 /*
 **test_u8_u64:
 **...
-**	ldaprb	x0, \[x[0-9]+\]
+**	ldaprb	w0, \[x[0-9]+\]
 **	ret
 */
 
 TEST(u8_u64, u8, unsigned long long)
 
-/*
-**test_s8_s64:
-**...
-**	ldaprsb	x0, \[x[0-9]+\]
-**	ret
-*/
-
-TEST(s8_s64, s8, long long)
-
 /*
 **test_u16_u64:
 **...
-**	ldaprh	x0, \[x[0-9]+\]
+**	ldaprh	w0, \[x[0-9]+\]
 **	ret
 */
 
 TEST(u16_u64, u16, unsigned long long)
 
 /*
-**test_s16_s64:
+**test_u32_u64:
 **...
-**	ldaprsh	x0, \[x[0-9]+\]
+**	ldapr	w0, \[x[0-9]+\]
 **	ret
 */
 
-TEST(s16_s64, s16, long long)
+TEST(u32_u64, u32, unsigned long long)
 
 /*
 **test_u8_u32:
@@ -66,29 +57,11 @@ TEST(s16_s64, s16, long long)
 
 TEST(u8_u32, u8, unsigned)
 
-/*
-**test_s8_s32:
-**...
-**	ldaprsb	w0, \[x[0-9]+\]
-**	ret
-*/
-
-TEST(s8_s32, s8, int)
-
 /*
 **test_u16_u32:
 **...
 **	ldaprh	w0, \[x[0-9]+\]
 **	ret
 */
-
 TEST(u16_u32, u16, unsigned)
 
-/*
-**test_s16_s32:
-**...
-**	ldaprsh	w0, \[x[0-9]+\]
-**	ret
-*/
-
-TEST(s16_s32, s16, int)

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-11-18  9:08 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-18  9:08 [gcc r13-4139] aarch64: Fix up LDAPR codegen Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).