public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][GCC] aarch64: Add LS64 extension and intrinsics
@ 2021-11-11 22:39 Przemyslaw Wirkus
  2021-11-15 13:43 ` Richard Sandiford
  0 siblings, 1 reply; 5+ messages in thread
From: Przemyslaw Wirkus @ 2021-11-11 22:39 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Sandiford, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov

[-- Attachment #1: Type: text/plain, Size: 4012 bytes --]

Hi,

This patch is adding support for LS64 (Armv8.7-A Load/Store 64 Byte extension)
which is part of Armv8.7-A architecture. Changes include missing plumbing for
TARGET_LS64, LS64 data structure and intrinsics defined in ACLE [0]. Machine
description of intrinsics is using new V8DI mode added in a separate patch.
__ARM_FEATURE_LS64 is defined if the Armv8.7-A LS64 instructions for atomic
64-byte access to device memory are supported.

New compiler internal type is added wrapping ACLE struct data512_t [0]:

typedef struct {
  uint64_t val[8];
} __arm_data512_t;

Please note that command line support for this feature was already added [1].

  [0] https://github.com/ARM-software/acle/blob/main/main/acle.rst#load-store-64-byte-intrinsics
  [1] commit e159c0aa10e50c292a534535c73f38d22b6129a8 (AArch64: Add command-line
      support for Armv8.7-a)

For below C code see example snippets of generated code:

#include <arm_acle.h>

void
func(const void * addr, data512_t *data) {
  *data = __arm_ld64b (addr);
}

func:
	ld64b	x8, [x0]
	stp	x8, x9, [x1]
	sub	sp, sp, #64
	stp	x10, x11, [x1, 16]
	stp	x12, x13, [x1, 32]
	stp	x14, x15, [x1, 48]
	add	sp, sp, 64
	ret
~~~

#include <arm_acle.h>

uint64_t
func(void *addr, data512_t value) {
    return  __arm_st64bv (addr, value);
}

func:
	ldp	x8, x9, [x1]
	ldp	x10, x11, [x1, 16]
	ldp	x12, x13, [x1, 32]
	ldp	x14, x15, [x1, 48]
	st64bv	x1, x8, [x0]
	mov	x0, x1
	ret

~~~

uint64_t
ls64_store_v0(const data512_t *input, void *addr)
{
    uint64_t status;
    __asm__ volatile ("st64bv0 %0, %2, [%1]"
                      : "=r" (status), "=r" (addr)
                      : "r" (*input)
                      : "memory");
    return status;
}

ls64_store_v0:
	ldp	x8, x9, [x0]
	ldp	x10, x11, [x0, 16]
	ldp	x12, x13, [x0, 32]
	ldp	x14, x15, [x0, 48]
	st64bv0 x0, x8, [x1]
	ret

Regtested on aarch64-elf cross and no issues.

OK for master?

gcc/ChangeLog:

2021-11-11  Przemyslaw Wirkus  <przemyslaw.wirkus@arm.com>

	* config/aarch64/aarch64-builtins.c (enum aarch64_builtins):
	Define AARCH64_LS64_BUILTIN_LD64B, AARCH64_LS64_BUILTIN_ST64B,
	AARCH64_LS64_BUILTIN_ST64BV, AARCH64_LS64_BUILTIN_ST64BV0.
	(aarch64_init_ls64_builtin_decl): Helper function.
	(aarch64_init_ls64_builtins): Helper function.
	(aarch64_init_ls64_builtins_types): Helper function.
	(aarch64_general_init_builtins): Init LS64 intrisics for
	TARGET_LS64.
	(aarch64_expand_builtin_ls64): LS64 intrinsics expander.
	(aarch64_general_expand_builtin): Handle aarch64_expand_builtin_ls64.
	(ls64_builtins_data): New helper struct.
	(v8di_UP): New define.
	* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define
	__ARM_FEATURE_LS64.
	* config/aarch64/aarch64.h (AARCH64_ISA_LS64): New define.
	(AARCH64_ISA_V8_7): New define.
	(TARGET_LS64): New define.
	* config/aarch64/aarch64.md: Add UNSPEC_LD64B, UNSPEC_ST64B,
	UNSPEC_ST64BV and UNSPEC_ST64BV0.
	(ld64b): New define_insn.
	(st64b): New define_insn.
	(st64bv): New define_insn.
	(st64bv0): New define_insn.
	* config/aarch64/arm_acle.h (target):
	(data512_t): New type derived from __arm_data512_t.
	(__arm_data512_t): New internal type.
	(__arm_ld64b): New intrinsic.
	(__arm_st64b): New intrinsic.
	(__arm_st64bv): New intrinsic.
	(__arm_st64bv0): New intrinsic.
	* config/arm/types.md: Add new type ls64.

gcc/testsuite/ChangeLog:

2021-11-11  Przemyslaw Wirkus  <przemyslaw.wirkus@arm.com>

	* gcc.target/aarch64/acle/ls64_asm.c: New test.
	* gcc.target/aarch64/acle/ls64_ld64b-2.c: New test.
	* gcc.target/aarch64/acle/ls64_ld64b.c: New test.
	* gcc.target/aarch64/acle/ls64_st64b.c: New test.
	* gcc.target/aarch64/acle/ls64_st64bv-2.c: New test.
	* gcc.target/aarch64/acle/ls64_st64bv.c: New test.
	* gcc.target/aarch64/acle/ls64_st64bv0-2.c: New test.
	* gcc.target/aarch64/acle/ls64_st64bv0.c: New test.
	* gcc.target/aarch64/pragma_cpp_predefs_2.c: Add checks
	for __ARM_FEATURE_LS64.

[-- Attachment #2: rb14982.patch --]
[-- Type: application/octet-stream, Size: 31392 bytes --]

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 5053bf0f8fd6638bf84a6df06c0987a0216b69e7..d4a82eec3b26bfd1cb976d0870d60ee7d10b689a 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -49,6 +49,7 @@
 #include "gimple-fold.h"
 
 #define v8qi_UP  E_V8QImode
+#define v8di_UP  E_V8DImode
 #define v4hi_UP  E_V4HImode
 #define v4hf_UP  E_V4HFmode
 #define v2si_UP  E_V2SImode
@@ -615,6 +616,11 @@ enum aarch64_builtins
   AARCH64_MEMTAG_BUILTIN_SET_TAG,
   AARCH64_MEMTAG_BUILTIN_GET_TAG,
   AARCH64_MEMTAG_BUILTIN_END,
+  /* LS64 builtins.  */
+  AARCH64_LS64_BUILTIN_LD64B,
+  AARCH64_LS64_BUILTIN_ST64B,
+  AARCH64_LS64_BUILTIN_ST64BV,
+  AARCH64_LS64_BUILTIN_ST64BV0,
   AARCH64_BUILTIN_MAX
 };
 
@@ -1579,6 +1585,71 @@ aarch64_init_memtag_builtins (void)
 #undef AARCH64_INIT_MEMTAG_BUILTINS_DECL
 }
 
+/* Add builtins for Load/store 64 Byte instructions.  */
+
+typedef struct
+{
+  const char *name;
+  unsigned int code;
+  tree type;
+} ls64_builtins_data;
+
+static void
+aarch64_init_ls64_builtin_decl (const char *name, tree type, unsigned int code)
+{
+  aarch64_builtin_decls[code] = aarch64_general_add_builtin (name, type, code);
+}
+
+static GTY(()) tree ls64_arm_data_t = NULL_TREE;
+
+static void
+aarch64_init_ls64_builtins_types (void)
+{
+  /* Synthesize the name of the user-visible vector tuple type.  */
+  char *tuple_type_name = "__arm_data512_t";
+  tree node_type = get_typenode_from_name (UINT64_TYPE);
+  tree array_type = build_array_type_nelts (node_type, 8);
+  SET_TYPE_MODE (array_type, V8DImode);
+  unsigned int alignment = known_eq (GET_MODE_SIZE (E_V8DImode), 64) ? 64 : 0;
+
+  gcc_assert (TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type));
+  gcc_assert (TYPE_ALIGN (array_type) == alignment);
+
+  tree field = build_decl (input_location, FIELD_DECL,
+                           get_identifier ("val"), array_type);
+
+  ls64_arm_data_t = lang_hooks.types.simulate_record_decl (input_location,
+                         tuple_type_name,
+                         make_array_slice (&field, 1));
+
+  gcc_assert (TYPE_MODE_RAW (ls64_arm_data_t) == TYPE_MODE (ls64_arm_data_t));
+  gcc_assert (TYPE_ALIGN (ls64_arm_data_t) == alignment);
+}
+
+static void
+aarch64_init_ls64_builtins (void)
+{
+  ls64_builtins_data data[4] = {
+    {"__builtin_aarch64_ld64b", AARCH64_LS64_BUILTIN_LD64B,
+     build_function_type_list (aarch64_simd_intXI_type_node,
+                               const_ptr_type_node, NULL_TREE)},
+    {"__builtin_aarch64_st64b", AARCH64_LS64_BUILTIN_ST64B,
+     build_function_type_list (void_type_node, ptr_type_node,
+                               aarch64_simd_intXI_type_node, NULL_TREE)},
+    {"__builtin_aarch64_st64bv", AARCH64_LS64_BUILTIN_ST64BV,
+     build_function_type_list (uint64_type_node, ptr_type_node,
+                               aarch64_simd_intXI_type_node, NULL_TREE)},
+    {"__builtin_aarch64_st64bv0", AARCH64_LS64_BUILTIN_ST64BV0,
+     build_function_type_list (uint64_type_node, ptr_type_node,
+                               aarch64_simd_intXI_type_node, NULL_TREE)},
+  };
+
+  for (size_t i = 0; i < ARRAY_SIZE (data); ++i)
+    aarch64_init_ls64_builtin_decl (data[i].name, data[i].type, data[i].code);
+
+  aarch64_init_ls64_builtins_types ();
+}
+
 /* Initialize fpsr fpcr getters and setters.  */
 
 static void
@@ -1668,6 +1739,9 @@ aarch64_general_init_builtins (void)
 
   if (TARGET_MEMTAG)
     aarch64_init_memtag_builtins ();
+
+  if (TARGET_LS64)
+    aarch64_init_ls64_builtins ();
 }
 
 /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group.  */
@@ -2138,6 +2212,60 @@ aarch64_expand_builtin_tme (int fcode, tree exp, rtx target)
     return target;
 }
 
+/* Function to expand an expression EXP which calls one of the Load/Store
+   64 Byte extension (LS64) builtins FCODE with the result going to TARGET.
+   If IGNORE is true the return value is ignored.  */
+static rtx
+aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx target, int ignore)
+{
+  switch (fcode)
+    {
+    case AARCH64_LS64_BUILTIN_LD64B:
+      {
+        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+        op0 = force_reg (Pmode, op0);
+        /* Do not emit instruction when return value is ignored.  */
+        if (ignore)
+          return target;
+        target = convert_to_mode (V8DImode, target, true);
+        emit_insn (GEN_FCN (CODE_FOR_ld64b) (target, op0));
+        break;
+      }
+    case AARCH64_LS64_BUILTIN_ST64B:
+      {
+        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+        op0 = force_reg (Pmode, op0);
+        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
+        op1 = convert_to_mode (V8DImode, op1, true);
+        emit_insn (GEN_FCN (CODE_FOR_st64b) (op0, op1));
+        break;
+      }
+    case AARCH64_LS64_BUILTIN_ST64BV:
+      {
+        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+        op0 = force_reg (Pmode, op0);
+        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
+        op1 = convert_to_mode (V8DImode, op1, true);
+        target = ignore ? gen_reg_rtx(DImode) : force_reg (DImode, target);
+        emit_insn (GEN_FCN (CODE_FOR_st64bv) (target, op0, op1));
+        break;
+      }
+    case AARCH64_LS64_BUILTIN_ST64BV0:
+      {
+        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+        op0 = force_reg (Pmode, op0);
+        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
+        op1 = convert_to_mode (V8DImode, op1, true);
+        target = ignore ? gen_reg_rtx(DImode) : force_reg (DImode, target);
+        emit_insn (GEN_FCN (CODE_FOR_st64bv0) (target, op0, op1));
+        break;
+      }
+    default :
+      gcc_unreachable ();
+    }
+    return target;
+}
+
 /* Expand a random number builtin EXP with code FCODE, putting the result
    int TARGET.  If IGNORE is true the return value is ignored.  */
 
@@ -2396,6 +2524,12 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
       || fcode == AARCH64_TME_BUILTIN_TCANCEL)
     return aarch64_expand_builtin_tme (fcode, exp, target);
 
+  if (fcode == AARCH64_LS64_BUILTIN_LD64B
+      || fcode == AARCH64_LS64_BUILTIN_ST64B
+      || fcode == AARCH64_LS64_BUILTIN_ST64BV
+      || fcode == AARCH64_LS64_BUILTIN_ST64BV0)
+    return aarch64_expand_builtin_ls64 (fcode, exp, target, ignore);
+
   if (fcode >= AARCH64_MEMTAG_BUILTIN_START
       && fcode <= AARCH64_MEMTAG_BUILTIN_END)
     return aarch64_expand_builtin_memtag (fcode, exp, target);
diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index d6653e474dec9bcddde2106f36ceb22f1d43375c..3af3e5c96daf674648dbc008b15ade0e303b66f8 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -200,6 +200,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
 			"__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", pfile);
   aarch64_def_or_undef (TARGET_BF16_FP,
 			"__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile);
+  aarch64_def_or_undef (TARGET_LS64,
+			"__ARM_FEATURE_LS64", pfile);
 
   /* Not for ACLE, but required to keep "float.h" correct if we switch
      target between implementations that do or do not support ARMv8.2-A
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 2792bb29adbbb5b3145b3f767615af8edbc30b08..426ad5ac77376f561c92d3e35b627939eb481773 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -310,6 +310,8 @@ extern unsigned aarch64_architecture_version;
 #define AARCH64_ISA_V8_R	   (aarch64_isa_flags & AARCH64_FL_V8_R)
 #define AARCH64_ISA_PAUTH	   (aarch64_isa_flags & AARCH64_FL_PAUTH)
 #define AARCH64_ISA_V9		   (aarch64_isa_flags & AARCH64_FL_V9)
+#define AARCH64_ISA_LS64	   (aarch64_isa_flags & AARCH64_FL_LS64)
+#define AARCH64_ISA_V8_7	   (aarch64_isa_flags & AARCH64_FL_V8_7)
 
 /* Crypto is an optional extension to AdvSIMD.  */
 #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
@@ -401,6 +403,9 @@ extern unsigned aarch64_architecture_version;
 /* PAUTH instructions are enabled through +pauth.  */
 #define TARGET_PAUTH (AARCH64_ISA_PAUTH)
 
+/* LS64 instructions are enabled through +ls64.  */
+#define TARGET_LS64 (AARCH64_ISA_LS64)
+
 /* Make sure this is always defined so we don't have to check for ifdefs
    but rather use normal ifs.  */
 #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 4035e061706793849c68ae09bcb2e4b9580ab7b6..bf4a23c8682767ae706ba3879938aed08f394cc2 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -187,6 +187,10 @@ (define_c_enum "unspec" [
     UNSPEC_LD2_LANE
     UNSPEC_LD3_LANE
     UNSPEC_LD4_LANE
+    UNSPEC_LD64B
+    UNSPEC_ST64B
+    UNSPEC_ST64BV
+    UNSPEC_ST64BV0
     UNSPEC_MB
     UNSPEC_NOP
     UNSPEC_PACIA1716
@@ -7499,6 +7503,45 @@ (define_insn "stg"
   [(set_attr "type" "memtag")]
 )
 
+;; Load/Store 64-bit (LS64) instructions.
+(define_insn "ld64b"
+  [(set (match_operand:V8DI 0 "register_operand" "=r")
+        (unspec:V8DI [(mem:V8DI (match_operand:DI 1 "register_operand" "r"))] UNSPEC_LD64B)
+  )]
+  "TARGET_LS64"
+  "ld64b\\t%0, [%1]"
+  [(set_attr "type" "ls64")]
+)
+
+(define_insn "st64b"
+  [(set (mem:V8DI (match_operand:DI 0 "register_operand" "=r"))
+        (unspec:V8DI [(match_operand:V8DI 1 "register_operand" "r")] UNSPEC_ST64B)
+  )]
+  "TARGET_LS64"
+  "st64b\\t%1, [%0]"
+  [(set_attr "type" "ls64")]
+)
+
+(define_insn "st64bv"
+  [(clobber (match_operand:DI 0 "register_operand" "=r"))
+   (set (mem:V8DI (match_operand:DI 1 "register_operand" "=r"))
+        (unspec:V8DI [(match_operand:V8DI 2 "register_operand" "r")] UNSPEC_ST64BV)
+  )]
+  "TARGET_LS64"
+  "st64bv\\t%0, %2, [%1]"
+  [(set_attr "type" "ls64")]
+)
+
+(define_insn "st64bv0"
+  [(clobber (match_operand:DI 0 "register_operand" "=r"))
+   (set (mem:V8DI (match_operand:DI 1 "register_operand" "=r"))
+        (unspec:V8DI [(match_operand:V8DI 2 "register_operand" "r")] UNSPEC_ST64BV0)
+  )]
+  "TARGET_LS64"
+  "st64bv0\\t%0, %2, [%1]"
+  [(set_attr "type" "ls64")]
+)
+
 ;; AdvSIMD Stuff
 (include "aarch64-simd.md")
 
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index 13f23632474b260122f30a3c566033664b0b5963..26d886949a34f77a65f55fbf3b4cc01884bfd883 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -214,6 +214,57 @@ __ttest (void)
 #pragma GCC pop_options
 #endif
 
+#ifdef __ARM_FEATURE_LS64
+#pragma GCC push_options
+#pragma GCC target ("+nothing+ls64")
+
+typedef __arm_data512_t data512_t;
+
+__extension__ extern __inline data512_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_ld64b (const void *addr)
+{
+  __builtin_aarch64_simd_xi __o;
+  data512_t __temp = { };
+  __o = __builtin_aarch64_ld64b (addr);
+  __builtin_memcpy (&__temp, &__o, sizeof (__o));
+  return __temp;
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_st64b (void *addr, data512_t value)
+{
+  __builtin_aarch64_simd_xi __o;
+  __builtin_memcpy (&__o, &value, sizeof (__o));
+  __builtin_aarch64_st64b (addr, __o);
+}
+
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_st64bv (void *addr, data512_t value)
+{
+  uint64_t __ret;
+  __builtin_aarch64_simd_xi __o;
+  __builtin_memcpy (&__o, &value, sizeof (__o));
+  __ret = __builtin_aarch64_st64bv (addr, __o);
+  return __ret;
+}
+
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_st64bv0 (void *addr, data512_t value)
+{
+  uint64_t __ret;
+  __builtin_aarch64_simd_xi __o;
+  __builtin_memcpy (&__o, &value, sizeof (__o));
+  __ret = __builtin_aarch64_st64bv0 (addr, __o);
+  return __ret;
+}
+
+#pragma GCC pop_options
+#endif
+
 #pragma GCC push_options
 #pragma GCC target ("+nothing+rng")
 __extension__ extern __inline int
diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
index b9514dafb86a280bee3d3f84845e0743cd18a34d..6dce71fd27e5dfbd08746509bc6fdeeade69a4a4 100644
--- a/gcc/config/arm/types.md
+++ b/gcc/config/arm/types.md
@@ -1122,6 +1122,7 @@ (define_attr "type"
   coproc,\
   tme,\
   memtag,\
+  ls64,\
   mve_move,\
   mve_store,\
   mve_load"
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_asm.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_asm.c
new file mode 100644
index 0000000000000000000000000000000000000000..60738a5b8d6b35837a7b286d16416a0eb289e34e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_asm.c
@@ -0,0 +1,130 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+/* Inline assembly for LS64 instructions.  */
+
+#include <arm_acle.h>
+
+void
+ls64_load(data512_t *output, const void *addr)
+{
+    __asm__ volatile ("ld64b %0, [%1]"
+                      : "=r" (*output)
+                      : "r" (addr)
+                      : "memory");
+}
+
+/* { dg-final { scan-assembler-times {ld64b x[0-9]+, \[x[0-9]+\]} 1 } } */
+
+/* LD64B <Xt>, [<Xn>] - make sure Xt != Xn  */
+/* { dg-final { scan-assembler-not {ld64b x0, \[x0\]} } } */
+/* { dg-final { scan-assembler-not {ld64b x1, \[x1\]} } } */
+/* { dg-final { scan-assembler-not {ld64b x2, \[x2\]} } } */
+/* { dg-final { scan-assembler-not {ld64b x3, \[x3\]} } } */
+/* { dg-final { scan-assembler-not {ld64b x4, \[x4\]} } } */
+/* { dg-final { scan-assembler-not {ld64b x5, \[x5\]} } } */
+/* { dg-final { scan-assembler-not {ld64b x6, \[x6\]} } } */
+/* { dg-final { scan-assembler-not {ld64b x7, \[x7\]} } } */
+/* { dg-final { scan-assembler-not {ld64b x8, \[x8\]} } } */
+/* { dg-final { scan-assembler-not {ld64b x9, \[x9\]} } } */
+
+void
+ls64_store(const data512_t *input, void *addr)
+{
+    __asm__ volatile ("st64b %1, [%0]"
+                      : /* No outputs.  */
+                      : "r" (addr), "r" (*input)
+                      : "memory");
+}
+
+/* { dg-final { scan-assembler-times {st64b x[0-9]+, \[x[0-9]+\]} 1 } } */
+
+/* ST64B <Xt>, [<Xn>] - make sure Xt != Xn  */
+/* { dg-final { scan-assembler-not {st64b x0, \[x0\]} } } */
+/* { dg-final { scan-assembler-not {st64b x1, \[x1\]} } } */
+/* { dg-final { scan-assembler-not {st64b x2, \[x2\]} } } */
+/* { dg-final { scan-assembler-not {st64b x3, \[x3\]} } } */
+/* { dg-final { scan-assembler-not {st64b x4, \[x4\]} } } */
+/* { dg-final { scan-assembler-not {st64b x5, \[x5\]} } } */
+/* { dg-final { scan-assembler-not {st64b x6, \[x6\]} } } */
+/* { dg-final { scan-assembler-not {st64b x7, \[x7\]} } } */
+/* { dg-final { scan-assembler-not {st64b x8, \[x8\]} } } */
+/* { dg-final { scan-assembler-not {st64b x9, \[x9\]} } } */
+
+uint64_t
+ls64_store_v(const data512_t *input, void *addr)
+{
+    uint64_t status;
+    __asm__ volatile ("st64bv %0, %2, [%1]"
+                      : "=r" (status), "=r" (addr)
+                      : "r" (*input)
+                      : "memory");
+    return status;
+}
+
+/* { dg-final { scan-assembler-times {st64bv x[0-9]+, x[0-9]+, \[x[0-9]+\]} 1 } } */
+
+/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
+/* { dg-final { scan-assembler-not {st64bv x0, x0,} } } */
+/* { dg-final { scan-assembler-not {st64bv x1, x1,} } } */
+/* { dg-final { scan-assembler-not {st64bv x2, x2,} } } */
+/* { dg-final { scan-assembler-not {st64bv x3, x3,} } } */
+/* { dg-final { scan-assembler-not {st64bv x4, x4,} } } */
+/* { dg-final { scan-assembler-not {st64bv x5, x5,} } } */
+/* { dg-final { scan-assembler-not {st64bv x6, x6,} } } */
+/* { dg-final { scan-assembler-not {st64bv x7, x7,} } } */
+/* { dg-final { scan-assembler-not {st64bv x8, x8,} } } */
+/* { dg-final { scan-assembler-not {st64bv x9, x9,} } } */
+
+/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
+/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x0, \[x0\]} } } */
+/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x1, \[x1\]} } } */
+/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x2, \[x2\]} } } */
+/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x3, \[x3\]} } } */
+/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x4, \[x4\]} } } */
+/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x5, \[x5\]} } } */
+/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x6, \[x6\]} } } */
+/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x7, \[x7\]} } } */
+/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x8, \[x8\]} } } */
+/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x9, \[x9\]} } } */
+
+uint64_t
+ls64_store_v0(const data512_t *input, void *addr)
+{
+    uint64_t status;
+    __asm__ volatile ("st64bv0 %0, %2, [%1]"
+                      : "=r" (status), "=r" (addr)
+                      : "r" (*input)
+                      : "memory");
+    return status;
+}
+
+/* { dg-final { scan-assembler-times {st64bv0 x[0-9]+, x[0-9]+, \[x[0-9]+\]} 1 } } */
+
+/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
+/* { dg-final { scan-assembler-not {st64bv0 x0, x0,} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x1, x1,} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x2, x2,} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x3, x3,} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x4, x4,} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x5, x5,} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x6, x6,} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x7, x7,} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x8, x8,} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x9, x9,} } } */
+
+/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
+/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x0, \[x0\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x1, \[x1\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x2, \[x2\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x3, \[x3\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x4, \[x4\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x5, \[x5\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x6, \[x6\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x7, \[x7\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x8, \[x8\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x9, \[x9\]} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b-2.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..1d39618b44367522d8a29b77f2a79e339ceb35b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b-2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+void
+func(const void * addr) {
+    data512_t ret = __arm_ld64b (addr);   /* Should be optimized out.  */
+}
+
+/* { dg-final { scan-assembler-not {ld64b\tx[0-9]+, \[x[0-9]+\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b.c
new file mode 100644
index 0000000000000000000000000000000000000000..d8a3f6c51fb5f07137eea5b032d4fdca9b1aa93b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+void
+func(const void * addr, data512_t *data) {
+  *data = __arm_ld64b (addr);
+}
+
+/* { dg-final { scan-assembler-times {ld64b\tx[0-9]+, \[x[0-9]+\]\n} 1 } } */
+
+/* LD64B <Xt>, [<Xn>] - make sure Xt != Xn  */
+/* { dg-final { scan-assembler-not {ld64b\tx0, \[x0\]} } } */
+/* { dg-final { scan-assembler-not {ld64b\tx1, \[x1\]} } } */
+/* { dg-final { scan-assembler-not {ld64b\tx2, \[x2\]} } } */
+/* { dg-final { scan-assembler-not {ld64b\tx3, \[x3\]} } } */
+/* { dg-final { scan-assembler-not {ld64b\tx4, \[x4\]} } } */
+/* { dg-final { scan-assembler-not {ld64b\tx5, \[x5\]} } } */
+/* { dg-final { scan-assembler-not {ld64b\tx6, \[x6\]} } } */
+/* { dg-final { scan-assembler-not {ld64b\tx7, \[x7\]} } } */
+/* { dg-final { scan-assembler-not {ld64b\tx8, \[x8\]} } } */
+/* { dg-final { scan-assembler-not {ld64b\tx9, \[x9\]} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64b.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64b.c
new file mode 100644
index 0000000000000000000000000000000000000000..757ff738ada8b706b68b517f20bec7b1ad05a5b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64b.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+void
+func(void *addr, data512_t value) {
+    __arm_st64b (addr, value);
+}
+
+/* { dg-final { scan-assembler-times {st64b\tx[0-9]+, \[x[0-9]+\]\n} 1 } } */
+
+/* ST64B <Xt>, [<Xn>] - make sure Xt != Xn  */
+/* { dg-final { scan-assembler-not {st64b\tx0, \[x0\]} } } */
+/* { dg-final { scan-assembler-not {st64b\tx1, \[x1\]} } } */
+/* { dg-final { scan-assembler-not {st64b\tx2, \[x2\]} } } */
+/* { dg-final { scan-assembler-not {st64b\tx3, \[x3\]} } } */
+/* { dg-final { scan-assembler-not {st64b\tx4, \[x4\]} } } */
+/* { dg-final { scan-assembler-not {st64b\tx5, \[x5\]} } } */
+/* { dg-final { scan-assembler-not {st64b\tx6, \[x6\]} } } */
+/* { dg-final { scan-assembler-not {st64b\tx7, \[x7\]} } } */
+/* { dg-final { scan-assembler-not {st64b\tx8, \[x8\]} } } */
+/* { dg-final { scan-assembler-not {st64b\tx9, \[x9\]} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv-2.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..3fb2520223c294dcd80548bad1d0c176a696c37b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv-2.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+void
+func(void *addr, data512_t value) {
+    __arm_st64bv (addr, value);
+}
+
+/* { dg-final { scan-assembler-times {st64bv\tx[0-9]+, x[0-9]+, \[x[0-9]+\]\n} 1 } } */
+
+/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
+/* { dg-final { scan-assembler-not {st64bv\tx0, x0,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx1, x1,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx2, x2,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx3, x3,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx4, x4,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx5, x5,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx6, x6,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx7, x7,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx8, x8,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx9, x9,} } } */
+
+/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x0, \[x0\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x1, \[x1\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x2, \[x2\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x3, \[x3\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x4, \[x4\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x5, \[x5\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x6, \[x6\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x7, \[x7\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x8, \[x8\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x9, \[x9\]} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv.c
new file mode 100644
index 0000000000000000000000000000000000000000..7bb40c3a9db643a5b2548e4e7e639224126e03f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+uint64_t
+func(void *addr, data512_t value) {
+    return  __arm_st64bv (addr, value);
+}
+
+/* { dg-final { scan-assembler-times {st64bv\tx[0-9]+, x[0-9]+, \[x[0-9]+\]\n} 1 } } */
+
+/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
+/* { dg-final { scan-assembler-not {st64bv\tx0, x0,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx1, x1,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx2, x2,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx3, x3,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx4, x4,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx5, x5,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx6, x6,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx7, x7,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx8, x8,} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx9, x9,} } } */
+
+/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x0, \[x0\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x1, \[x1\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x2, \[x2\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x3, \[x3\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x4, \[x4\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x5, \[x5\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x6, \[x6\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x7, \[x7\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x8, \[x8\]} } } */
+/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x9, \[x9\]} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0-2.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..bd8fd5dcf41d3b0e9dab6be53cc099014cdcf1e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0-2.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+void
+func(void *addr, data512_t value) {
+    __arm_st64bv0 (addr, value);
+}
+
+/* { dg-final { scan-assembler-times {st64bv0\tx[0-9]+, x[0-9]+, \[x[0-9]+\]\n} 1 } } */
+
+/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
+/* { dg-final { scan-assembler-not {st64bv0\tx0, x0,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx1, x1,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx2, x2,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx3, x3,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx4, x4,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx5, x5,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx6, x6,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx7, x7,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx8, x8,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx9, x9,} } } */
+
+/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x0, \[x0\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x1, \[x1\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x2, \[x2\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x3, \[x3\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x4, \[x4\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x5, \[x5\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x6, \[x6\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x7, \[x7\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x8, \[x8\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x9, \[x9\]} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0.c
new file mode 100644
index 0000000000000000000000000000000000000000..7a8e9bf53ad0f03a382a950783949989182adcf9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+uint64_t
+func(void *addr, data512_t value) {
+    return __arm_st64bv0 (addr, value);
+}
+
+/* { dg-final { scan-assembler-times {st64bv0\tx[0-9]+, x[0-9]+, \[x[0-9]+\]\n} 1 } } */
+
+/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
+/* { dg-final { scan-assembler-not {st64bv0\tx0, x0,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx1, x1,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx2, x2,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx3, x3,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx4, x4,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx5, x5,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx6, x6,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx7, x7,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx8, x8,} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx9, x9,} } } */
+
+/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x0, \[x0\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x1, \[x1\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x2, \[x2\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x3, \[x3\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x4, \[x4\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x5, \[x5\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x6, \[x6\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x7, \[x7\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x8, \[x8\]} } } */
+/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x9, \[x9\]} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c
index 7244359ccfb9cbcbbd8285b050113c004a6af2a6..2d76bfc23dfdcd78a74ec0e4845a3bd8d110b010 100644
--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c
@@ -240,6 +240,20 @@
 #endif
 #pragma GCC pop_options
 
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.7-a")
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+#pragma GCC pop_options
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.7-a+ls64")
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+#pragma GCC pop_options
+
 #pragma GCC pop_options
 
 int

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH][GCC] aarch64: Add LS64 extension and intrinsics
  2021-11-11 22:39 [PATCH][GCC] aarch64: Add LS64 extension and intrinsics Przemyslaw Wirkus
@ 2021-11-15 13:43 ` Richard Sandiford
  2021-12-13 13:48   ` Przemyslaw Wirkus
  0 siblings, 1 reply; 5+ messages in thread
From: Richard Sandiford @ 2021-11-15 13:43 UTC (permalink / raw)
  To: Przemyslaw Wirkus
  Cc: gcc-patches, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov

Przemyslaw Wirkus <Przemyslaw.Wirkus@arm.com> writes:
> Hi,
>
> This patch is adding support for LS64 (Armv8.7-A Load/Store 64 Byte extension)
> which is part of Armv8.7-A architecture. Changes include missing plumbing for
> TARGET_LS64, LS64 data structure and intrinsics defined in ACLE [0]. Machine
> description of intrinsics is using new V8DI mode added in a separate patch.
> __ARM_FEATURE_LS64 is defined if the Armv8.7-A LS64 instructions for atomic
> 64-byte access to device memory are supported.
>
> New compiler internal type is added wrapping ACLE struct data512_t [0]:
>
> typedef struct {
>   uint64_t val[8];
> } __arm_data512_t;
>
> Please note that command line support for this feature was already added [1].
>
>   [0] https://github.com/ARM-software/acle/blob/main/main/acle.rst#load-store-64-byte-intrinsics
>   [1] commit e159c0aa10e50c292a534535c73f38d22b6129a8 (AArch64: Add command-line
>       support for Armv8.7-a)
>
> For below C code see example snippets of generated code:
>
> #include <arm_acle.h>
>
> void
> func(const void * addr, data512_t *data) {
>   *data = __arm_ld64b (addr);
> }
>
> func:
> 	ld64b	x8, [x0]
> 	stp	x8, x9, [x1]
> 	sub	sp, sp, #64
> 	stp	x10, x11, [x1, 16]
> 	stp	x12, x13, [x1, 32]
> 	stp	x14, x15, [x1, 48]
> 	add	sp, sp, 64
> 	ret
> ~~~
>
> #include <arm_acle.h>
>
> uint64_t
> func(void *addr, data512_t value) {
>     return  __arm_st64bv (addr, value);
> }
>
> func:
> 	ldp	x8, x9, [x1]
> 	ldp	x10, x11, [x1, 16]
> 	ldp	x12, x13, [x1, 32]
> 	ldp	x14, x15, [x1, 48]
> 	st64bv	x1, x8, [x0]
> 	mov	x0, x1
> 	ret
>
> ~~~
>
> uint64_t
> ls64_store_v0(const data512_t *input, void *addr)
> {
>     uint64_t status;
>     __asm__ volatile ("st64bv0 %0, %2, [%1]"
>                       : "=r" (status), "=r" (addr)
>                       : "r" (*input)
>                       : "memory");
>     return status;
> }
>
> ls64_store_v0:
> 	ldp	x8, x9, [x0]
> 	ldp	x10, x11, [x0, 16]
> 	ldp	x12, x13, [x0, 32]
> 	ldp	x14, x15, [x0, 48]
> 	st64bv0 x0, x8, [x1]
> 	ret
>
> Regtested on aarch64-elf cross and no issues.
>
> OK for master?
>
> gcc/ChangeLog:
>
> 2021-11-11  Przemyslaw Wirkus  <przemyslaw.wirkus@arm.com>
>
> 	* config/aarch64/aarch64-builtins.c (enum aarch64_builtins):
> 	Define AARCH64_LS64_BUILTIN_LD64B, AARCH64_LS64_BUILTIN_ST64B,
> 	AARCH64_LS64_BUILTIN_ST64BV, AARCH64_LS64_BUILTIN_ST64BV0.
> 	(aarch64_init_ls64_builtin_decl): Helper function.
> 	(aarch64_init_ls64_builtins): Helper function.
> 	(aarch64_init_ls64_builtins_types): Helper function.
> 	(aarch64_general_init_builtins): Init LS64 intrisics for
> 	TARGET_LS64.
> 	(aarch64_expand_builtin_ls64): LS64 intrinsics expander.
> 	(aarch64_general_expand_builtin): Handle aarch64_expand_builtin_ls64.
> 	(ls64_builtins_data): New helper struct.
> 	(v8di_UP): New define.
> 	* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define
> 	__ARM_FEATURE_LS64.
> 	* config/aarch64/aarch64.h (AARCH64_ISA_LS64): New define.
> 	(AARCH64_ISA_V8_7): New define.
> 	(TARGET_LS64): New define.
> 	* config/aarch64/aarch64.md: Add UNSPEC_LD64B, UNSPEC_ST64B,
> 	UNSPEC_ST64BV and UNSPEC_ST64BV0.
> 	(ld64b): New define_insn.
> 	(st64b): New define_insn.
> 	(st64bv): New define_insn.
> 	(st64bv0): New define_insn.
> 	* config/aarch64/arm_acle.h (target):
> 	(data512_t): New type derived from __arm_data512_t.
> 	(__arm_data512_t): New internal type.
> 	(__arm_ld64b): New intrinsic.
> 	(__arm_st64b): New intrinsic.
> 	(__arm_st64bv): New intrinsic.
> 	(__arm_st64bv0): New intrinsic.
> 	* config/arm/types.md: Add new type ls64.
>
> gcc/testsuite/ChangeLog:
>
> 2021-11-11  Przemyslaw Wirkus  <przemyslaw.wirkus@arm.com>
>
> 	* gcc.target/aarch64/acle/ls64_asm.c: New test.
> 	* gcc.target/aarch64/acle/ls64_ld64b-2.c: New test.
> 	* gcc.target/aarch64/acle/ls64_ld64b.c: New test.
> 	* gcc.target/aarch64/acle/ls64_st64b.c: New test.
> 	* gcc.target/aarch64/acle/ls64_st64bv-2.c: New test.
> 	* gcc.target/aarch64/acle/ls64_st64bv.c: New test.
> 	* gcc.target/aarch64/acle/ls64_st64bv0-2.c: New test.
> 	* gcc.target/aarch64/acle/ls64_st64bv0.c: New test.
> 	* gcc.target/aarch64/pragma_cpp_predefs_2.c: Add checks
> 	for __ARM_FEATURE_LS64.
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
> index 5053bf0f8fd6638bf84a6df06c0987a0216b69e7..d4a82eec3b26bfd1cb976d0870d60ee7d10b689a 100644
> --- a/gcc/config/aarch64/aarch64-builtins.c
> +++ b/gcc/config/aarch64/aarch64-builtins.c
> @@ -49,6 +49,7 @@
>  #include "gimple-fold.h"
>  
>  #define v8qi_UP  E_V8QImode
> +#define v8di_UP  E_V8DImode
>  #define v4hi_UP  E_V4HImode
>  #define v4hf_UP  E_V4HFmode
>  #define v2si_UP  E_V2SImode
> @@ -615,6 +616,11 @@ enum aarch64_builtins
>    AARCH64_MEMTAG_BUILTIN_SET_TAG,
>    AARCH64_MEMTAG_BUILTIN_GET_TAG,
>    AARCH64_MEMTAG_BUILTIN_END,
> +  /* LS64 builtins.  */
> +  AARCH64_LS64_BUILTIN_LD64B,
> +  AARCH64_LS64_BUILTIN_ST64B,
> +  AARCH64_LS64_BUILTIN_ST64BV,
> +  AARCH64_LS64_BUILTIN_ST64BV0,
>    AARCH64_BUILTIN_MAX
>  };
>  
> @@ -1579,6 +1585,71 @@ aarch64_init_memtag_builtins (void)
>  #undef AARCH64_INIT_MEMTAG_BUILTINS_DECL
>  }
>  
> +/* Add builtins for Load/store 64 Byte instructions.  */
> +
> +typedef struct
> +{
> +  const char *name;
> +  unsigned int code;
> +  tree type;
> +} ls64_builtins_data;
> +
> +static void
> +aarch64_init_ls64_builtin_decl (const char *name, tree type, unsigned int code)
> +{
> +  aarch64_builtin_decls[code] = aarch64_general_add_builtin (name, type, code);
> +}
> +
> +static GTY(()) tree ls64_arm_data_t = NULL_TREE;
> +
> +static void
> +aarch64_init_ls64_builtins_types (void)
> +{
> +  /* Synthesize the name of the user-visible vector tuple type.  */
> +  char *tuple_type_name = "__arm_data512_t";
> +  tree node_type = get_typenode_from_name (UINT64_TYPE);
> +  tree array_type = build_array_type_nelts (node_type, 8);
> +  SET_TYPE_MODE (array_type, V8DImode);
> +  unsigned int alignment = known_eq (GET_MODE_SIZE (E_V8DImode), 64) ? 64 : 0;

The alignment should always be 64 bits, so I think we should just hard-code
that in the gcc_asserts below.

> +
> +  gcc_assert (TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type));
> +  gcc_assert (TYPE_ALIGN (array_type) == alignment);
> +
> +  tree field = build_decl (input_location, FIELD_DECL,
> +                           get_identifier ("val"), array_type);
> +
> +  ls64_arm_data_t = lang_hooks.types.simulate_record_decl (input_location,
> +                         tuple_type_name,
> +                         make_array_slice (&field, 1));
> +
> +  gcc_assert (TYPE_MODE_RAW (ls64_arm_data_t) == TYPE_MODE (ls64_arm_data_t));

I think the more important test here is that:

  TYPE_MODE (ls64_arm_data_t) == V8DImode

so we should probably test that first, before the line above.

> +  gcc_assert (TYPE_ALIGN (ls64_arm_data_t) == alignment);
> +}
> +
> +static void
> +aarch64_init_ls64_builtins (void)
> +{
> +  ls64_builtins_data data[4] = {
> +    {"__builtin_aarch64_ld64b", AARCH64_LS64_BUILTIN_LD64B,
> +     build_function_type_list (aarch64_simd_intXI_type_node,
> +                               const_ptr_type_node, NULL_TREE)},
> +    {"__builtin_aarch64_st64b", AARCH64_LS64_BUILTIN_ST64B,
> +     build_function_type_list (void_type_node, ptr_type_node,
> +                               aarch64_simd_intXI_type_node, NULL_TREE)},
> +    {"__builtin_aarch64_st64bv", AARCH64_LS64_BUILTIN_ST64BV,
> +     build_function_type_list (uint64_type_node, ptr_type_node,
> +                               aarch64_simd_intXI_type_node, NULL_TREE)},
> +    {"__builtin_aarch64_st64bv0", AARCH64_LS64_BUILTIN_ST64BV0,
> +     build_function_type_list (uint64_type_node, ptr_type_node,
> +                               aarch64_simd_intXI_type_node, NULL_TREE)},

Do these functions need to use aarch64_simd_intXI_type_node?  Now that
we're defining data512_t in the compiler itself, I think we could use
it directly in the function prototypes, which would simplify some of
the header file code (and hopefully improve codegen).

> +  };
> +
> +  for (size_t i = 0; i < ARRAY_SIZE (data); ++i)
> +    aarch64_init_ls64_builtin_decl (data[i].name, data[i].type, data[i].code);
> +
> +  aarch64_init_ls64_builtins_types ();
> +}
> +
>  /* Initialize fpsr fpcr getters and setters.  */
>  
>  static void
> @@ -1668,6 +1739,9 @@ aarch64_general_init_builtins (void)
>  
>    if (TARGET_MEMTAG)
>      aarch64_init_memtag_builtins ();
> +
> +  if (TARGET_LS64)
> +    aarch64_init_ls64_builtins ();
>  }
>  
>  /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group.  */
> @@ -2138,6 +2212,60 @@ aarch64_expand_builtin_tme (int fcode, tree exp, rtx target)
>      return target;
>  }
>  
> +/* Function to expand an expression EXP which calls one of the Load/Store
> +   64 Byte extension (LS64) builtins FCODE with the result going to TARGET.
> +   If IGNORE is true the return value is ignored.  */
> +static rtx
> +aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx target, int ignore)
> +{
> +  switch (fcode)
> +    {
> +    case AARCH64_LS64_BUILTIN_LD64B:
> +      {
> +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> +        op0 = force_reg (Pmode, op0);
> +        /* Do not emit instruction when return value is ignored.  */
> +        if (ignore)
> +          return target;
> +        target = convert_to_mode (V8DImode, target, true);

If we do use the real structure type in the prototypes then the
conversions in this function won't be needed.

> +        emit_insn (GEN_FCN (CODE_FOR_ld64b) (target, op0));
> +        break;
> +      }
> +    case AARCH64_LS64_BUILTIN_ST64B:
> +      {
> +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> +        op0 = force_reg (Pmode, op0);
> +        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
> +        op1 = convert_to_mode (V8DImode, op1, true);
> +        emit_insn (GEN_FCN (CODE_FOR_st64b) (op0, op1));
> +        break;
> +      }
> +    case AARCH64_LS64_BUILTIN_ST64BV:
> +      {
> +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> +        op0 = force_reg (Pmode, op0);
> +        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
> +        op1 = convert_to_mode (V8DImode, op1, true);
> +        target = ignore ? gen_reg_rtx(DImode) : force_reg (DImode, target);

force_reg doesn't look right here: target is a destination operand whereas
force_reg works on source operands.

It would probably be better to use the expand_insn interface instead.
See AARCH64_JSCVT for an example.

> +        emit_insn (GEN_FCN (CODE_FOR_st64bv) (target, op0, op1));
> +        break;
> +      }
> +    case AARCH64_LS64_BUILTIN_ST64BV0:
> +      {
> +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> +        op0 = force_reg (Pmode, op0);
> +        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
> +        op1 = convert_to_mode (V8DImode, op1, true);
> +        target = ignore ? gen_reg_rtx(DImode) : force_reg (DImode, target);
> +        emit_insn (GEN_FCN (CODE_FOR_st64bv0) (target, op0, op1));
> +        break;
> +      }
> +    default :
> +      gcc_unreachable ();
> +    }
> +    return target;
> +}
> +
>  /* Expand a random number builtin EXP with code FCODE, putting the result
>     int TARGET.  If IGNORE is true the return value is ignored.  */
>  
> @@ -2396,6 +2524,12 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
>        || fcode == AARCH64_TME_BUILTIN_TCANCEL)
>      return aarch64_expand_builtin_tme (fcode, exp, target);
>  
> +  if (fcode == AARCH64_LS64_BUILTIN_LD64B
> +      || fcode == AARCH64_LS64_BUILTIN_ST64B
> +      || fcode == AARCH64_LS64_BUILTIN_ST64BV
> +      || fcode == AARCH64_LS64_BUILTIN_ST64BV0)
> +    return aarch64_expand_builtin_ls64 (fcode, exp, target, ignore);
> +
>    if (fcode >= AARCH64_MEMTAG_BUILTIN_START
>        && fcode <= AARCH64_MEMTAG_BUILTIN_END)
>      return aarch64_expand_builtin_memtag (fcode, exp, target);
> diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
> index d6653e474dec9bcddde2106f36ceb22f1d43375c..3af3e5c96daf674648dbc008b15ade0e303b66f8 100644
> --- a/gcc/config/aarch64/aarch64-c.c
> +++ b/gcc/config/aarch64/aarch64-c.c
> @@ -200,6 +200,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
>  			"__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", pfile);
>    aarch64_def_or_undef (TARGET_BF16_FP,
>  			"__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile);
> +  aarch64_def_or_undef (TARGET_LS64,
> +			"__ARM_FEATURE_LS64", pfile);
>  
>    /* Not for ACLE, but required to keep "float.h" correct if we switch
>       target between implementations that do or do not support ARMv8.2-A
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 2792bb29adbbb5b3145b3f767615af8edbc30b08..426ad5ac77376f561c92d3e35b627939eb481773 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -310,6 +310,8 @@ extern unsigned aarch64_architecture_version;
>  #define AARCH64_ISA_V8_R	   (aarch64_isa_flags & AARCH64_FL_V8_R)
>  #define AARCH64_ISA_PAUTH	   (aarch64_isa_flags & AARCH64_FL_PAUTH)
>  #define AARCH64_ISA_V9		   (aarch64_isa_flags & AARCH64_FL_V9)
> +#define AARCH64_ISA_LS64	   (aarch64_isa_flags & AARCH64_FL_LS64)
> +#define AARCH64_ISA_V8_7	   (aarch64_isa_flags & AARCH64_FL_V8_7)
>  
>  /* Crypto is an optional extension to AdvSIMD.  */
>  #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
> @@ -401,6 +403,9 @@ extern unsigned aarch64_architecture_version;
>  /* PAUTH instructions are enabled through +pauth.  */
>  #define TARGET_PAUTH (AARCH64_ISA_PAUTH)
>  
> +/* LS64 instructions are enabled through +ls64.  */
> +#define TARGET_LS64 (AARCH64_ISA_LS64)
> +
>  /* Make sure this is always defined so we don't have to check for ifdefs
>     but rather use normal ifs.  */
>  #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 4035e061706793849c68ae09bcb2e4b9580ab7b6..bf4a23c8682767ae706ba3879938aed08f394cc2 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -187,6 +187,10 @@ (define_c_enum "unspec" [
>      UNSPEC_LD2_LANE
>      UNSPEC_LD3_LANE
>      UNSPEC_LD4_LANE
> +    UNSPEC_LD64B
> +    UNSPEC_ST64B
> +    UNSPEC_ST64BV
> +    UNSPEC_ST64BV0
>      UNSPEC_MB
>      UNSPEC_NOP
>      UNSPEC_PACIA1716
> @@ -7499,6 +7503,45 @@ (define_insn "stg"
>    [(set_attr "type" "memtag")]
>  )
>  
> +;; Load/Store 64-bit (LS64) instructions.
> +(define_insn "ld64b"
> +  [(set (match_operand:V8DI 0 "register_operand" "=r")
> +        (unspec:V8DI [(mem:V8DI (match_operand:DI 1 "register_operand" "r"))] UNSPEC_LD64B)
> +  )]
> +  "TARGET_LS64"
> +  "ld64b\\t%0, [%1]"
> +  [(set_attr "type" "ls64")]
> +)
> +
> +(define_insn "st64b"
> +  [(set (mem:V8DI (match_operand:DI 0 "register_operand" "=r"))
> +        (unspec:V8DI [(match_operand:V8DI 1 "register_operand" "r")] UNSPEC_ST64B)

The unspec probably needs to be unspec_volatile.  The danger as things
stand is that the second __arm_st64b in:

    __arm_st64b (addr, a);
    …
    __arm_st64b (addr, a);

could be optimised away if the … clearly doesn't involve a store to addr.

> +  )]
> +  "TARGET_LS64"
> +  "st64b\\t%1, [%0]"
> +  [(set_attr "type" "ls64")]
> +)
> +
> +(define_insn "st64bv"
> +  [(clobber (match_operand:DI 0 "register_operand" "=r"))

(clobber …) should only be used for values that don't matter.
Here I think we'll need a (set …) with an (unspec_volatile …) source.
I guess pedantically, it should be a separate unspec number from
UNSPEC_ST64BV, since the value being stored isn't the same as the
value being returned.

> +   (set (mem:V8DI (match_operand:DI 1 "register_operand" "=r"))
> +        (unspec:V8DI [(match_operand:V8DI 2 "register_operand" "r")] UNSPEC_ST64BV)
> +  )]
> +  "TARGET_LS64"
> +  "st64bv\\t%0, %2, [%1]"
> +  [(set_attr "type" "ls64")]
> +)
> +
> +(define_insn "st64bv0"
> +  [(clobber (match_operand:DI 0 "register_operand" "=r"))
> +   (set (mem:V8DI (match_operand:DI 1 "register_operand" "=r"))
> +        (unspec:V8DI [(match_operand:V8DI 2 "register_operand" "r")] UNSPEC_ST64BV0)
> +  )]
> +  "TARGET_LS64"
> +  "st64bv0\\t%0, %2, [%1]"
> +  [(set_attr "type" "ls64")]
> +)
> +
>  ;; AdvSIMD Stuff
>  (include "aarch64-simd.md")
>  
> diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
> index 13f23632474b260122f30a3c566033664b0b5963..26d886949a34f77a65f55fbf3b4cc01884bfd883 100644
> --- a/gcc/config/aarch64/arm_acle.h
> +++ b/gcc/config/aarch64/arm_acle.h
> @@ -214,6 +214,57 @@ __ttest (void)
>  #pragma GCC pop_options
>  #endif
>  
> +#ifdef __ARM_FEATURE_LS64
> +#pragma GCC push_options
> +#pragma GCC target ("+nothing+ls64")
> +
> +typedef __arm_data512_t data512_t;
> +
> +__extension__ extern __inline data512_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_ld64b (const void *addr)
> +{
> +  __builtin_aarch64_simd_xi __o;
> +  data512_t __temp = { };
> +  __o = __builtin_aarch64_ld64b (addr);
> +  __builtin_memcpy (&__temp, &__o, sizeof (__o));
> +  return __temp;
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_st64b (void *addr, data512_t value)
> +{
> +  __builtin_aarch64_simd_xi __o;
> +  __builtin_memcpy (&__o, &value, sizeof (__o));
> +  __builtin_aarch64_st64b (addr, __o);
> +}
> +
> +__extension__ extern __inline uint64_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_st64bv (void *addr, data512_t value)
> +{
> +  uint64_t __ret;
> +  __builtin_aarch64_simd_xi __o;
> +  __builtin_memcpy (&__o, &value, sizeof (__o));
> +  __ret = __builtin_aarch64_st64bv (addr, __o);
> +  return __ret;
> +}
> +
> +__extension__ extern __inline uint64_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_st64bv0 (void *addr, data512_t value)
> +{
> +  uint64_t __ret;
> +  __builtin_aarch64_simd_xi __o;
> +  __builtin_memcpy (&__o, &value, sizeof (__o));
> +  __ret = __builtin_aarch64_st64bv0 (addr, __o);
> +  return __ret;
> +}
> +
> +#pragma GCC pop_options
> +#endif
> +
>  #pragma GCC push_options
>  #pragma GCC target ("+nothing+rng")
>  __extension__ extern __inline int
> diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
> index b9514dafb86a280bee3d3f84845e0743cd18a34d..6dce71fd27e5dfbd08746509bc6fdeeade69a4a4 100644
> --- a/gcc/config/arm/types.md
> +++ b/gcc/config/arm/types.md
> @@ -1122,6 +1122,7 @@ (define_attr "type"
>    coproc,\
>    tme,\
>    memtag,\
> +  ls64,\
>    mve_move,\
>    mve_store,\
>    mve_load"
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_asm.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_asm.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..60738a5b8d6b35837a7b286d16416a0eb289e34e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_asm.c
> @@ -0,0 +1,130 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> +
> +#ifndef __ARM_FEATURE_LS64
> +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> +#endif
> +
> +/* Inline assembly for LS64 instructions.  */
> +
> +#include <arm_acle.h>
> +
> +void
> +ls64_load(data512_t *output, const void *addr)
> +{
> +    __asm__ volatile ("ld64b %0, [%1]"
> +                      : "=r" (*output)
> +                      : "r" (addr)
> +                      : "memory");
> +}
> +
> +/* { dg-final { scan-assembler-times {ld64b x[0-9]+, \[x[0-9]+\]} 1 } } */

Could you double-check whether this passes?  I'd have expected a tab
rather than a space to be needed after ld64b, like in the other tests.

> +
> +/* LD64B <Xt>, [<Xn>] - make sure Xt != Xn  */

As discussed off-list, we don't need this requirement.  (FWIW, %0 would
have needed to be an earlyclobber (=&r) if we had wanted to enforce it.)

> +/* { dg-final { scan-assembler-not {ld64b x0, \[x0\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b x1, \[x1\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b x2, \[x2\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b x3, \[x3\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b x4, \[x4\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b x5, \[x5\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b x6, \[x6\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b x7, \[x7\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b x8, \[x8\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b x9, \[x9\]} } } */
> +
> +void
> +ls64_store(const data512_t *input, void *addr)
> +{
> +    __asm__ volatile ("st64b %1, [%0]"
> +                      : /* No outputs.  */
> +                      : "r" (addr), "r" (*input)
> +                      : "memory");
> +}
> +
> +/* { dg-final { scan-assembler-times {st64b x[0-9]+, \[x[0-9]+\]} 1 } } */
> +
> +/* ST64B <Xt>, [<Xn>] - make sure Xt != Xn  */

Same here.  Xt != Xn needs to be true in the sense that *input and
addr are different values that are live at the same time, but that's
not specific to this instruction and so doesn't seem worth testing.

> +/* { dg-final { scan-assembler-not {st64b x0, \[x0\]} } } */
> +/* { dg-final { scan-assembler-not {st64b x1, \[x1\]} } } */
> +/* { dg-final { scan-assembler-not {st64b x2, \[x2\]} } } */
> +/* { dg-final { scan-assembler-not {st64b x3, \[x3\]} } } */
> +/* { dg-final { scan-assembler-not {st64b x4, \[x4\]} } } */
> +/* { dg-final { scan-assembler-not {st64b x5, \[x5\]} } } */
> +/* { dg-final { scan-assembler-not {st64b x6, \[x6\]} } } */
> +/* { dg-final { scan-assembler-not {st64b x7, \[x7\]} } } */
> +/* { dg-final { scan-assembler-not {st64b x8, \[x8\]} } } */
> +/* { dg-final { scan-assembler-not {st64b x9, \[x9\]} } } */
> +
> +uint64_t
> +ls64_store_v(const data512_t *input, void *addr)
> +{
> +    uint64_t status;
> +    __asm__ volatile ("st64bv %0, %2, [%1]"
> +                      : "=r" (status), "=r" (addr)
> +                      : "r" (*input)
> +                      : "memory");

addr should be an input rather than an output.  Samme for the other
stores.

> +    return status;
> +}
> +
> +/* { dg-final { scan-assembler-times {st64bv x[0-9]+, x[0-9]+, \[x[0-9]+\]} 1 } } */
> +
> +/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
> +/* { dg-final { scan-assembler-not {st64bv x0, x0,} } } */
> +/* { dg-final { scan-assembler-not {st64bv x1, x1,} } } */
> +/* { dg-final { scan-assembler-not {st64bv x2, x2,} } } */
> +/* { dg-final { scan-assembler-not {st64bv x3, x3,} } } */
> +/* { dg-final { scan-assembler-not {st64bv x4, x4,} } } */
> +/* { dg-final { scan-assembler-not {st64bv x5, x5,} } } */
> +/* { dg-final { scan-assembler-not {st64bv x6, x6,} } } */
> +/* { dg-final { scan-assembler-not {st64bv x7, x7,} } } */
> +/* { dg-final { scan-assembler-not {st64bv x8, x8,} } } */
> +/* { dg-final { scan-assembler-not {st64bv x9, x9,} } } */
> +
> +/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
> +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x0, \[x0\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x1, \[x1\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x2, \[x2\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x3, \[x3\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x4, \[x4\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x5, \[x5\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x6, \[x6\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x7, \[x7\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x8, \[x8\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x9, \[x9\]} } } */
> +
> +uint64_t
> +ls64_store_v0(const data512_t *input, void *addr)
> +{
> +    uint64_t status;
> +    __asm__ volatile ("st64bv0 %0, %2, [%1]"
> +                      : "=r" (status), "=r" (addr)
> +                      : "r" (*input)
> +                      : "memory");
> +    return status;
> +}
> +
> +/* { dg-final { scan-assembler-times {st64bv0 x[0-9]+, x[0-9]+, \[x[0-9]+\]} 1 } } */
> +
> +/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
> +/* { dg-final { scan-assembler-not {st64bv0 x0, x0,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x1, x1,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x2, x2,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x3, x3,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x4, x4,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x5, x5,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x6, x6,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x7, x7,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x8, x8,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x9, x9,} } } */
> +
> +/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
> +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x0, \[x0\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x1, \[x1\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x2, \[x2\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x3, \[x3\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x4, \[x4\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x5, \[x5\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x6, \[x6\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x7, \[x7\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x8, \[x8\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x9, \[x9\]} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b-2.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b-2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..1d39618b44367522d8a29b77f2a79e339ceb35b5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b-2.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> +
> +#ifndef __ARM_FEATURE_LS64
> +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> +#endif
> +
> +#include <arm_acle.h>
> +
> +void
> +func(const void * addr) {
> +    data512_t ret = __arm_ld64b (addr);   /* Should be optimized out.  */
> +}
> +
> +/* { dg-final { scan-assembler-not {ld64b\tx[0-9]+, \[x[0-9]+\]\n} } } */

Probably more robust to drop everything after the tab, so that the test
doesn't accidentally pass due to a typo in the complex part of the regexp.

Thanks,
Richard

> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..d8a3f6c51fb5f07137eea5b032d4fdca9b1aa93b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> +
> +#ifndef __ARM_FEATURE_LS64
> +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> +#endif
> +
> +#include <arm_acle.h>
> +
> +void
> +func(const void * addr, data512_t *data) {
> +  *data = __arm_ld64b (addr);
> +}
> +
> +/* { dg-final { scan-assembler-times {ld64b\tx[0-9]+, \[x[0-9]+\]\n} 1 } } */
> +
> +/* LD64B <Xt>, [<Xn>] - make sure Xt != Xn  */
> +/* { dg-final { scan-assembler-not {ld64b\tx0, \[x0\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b\tx1, \[x1\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b\tx2, \[x2\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b\tx3, \[x3\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b\tx4, \[x4\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b\tx5, \[x5\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b\tx6, \[x6\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b\tx7, \[x7\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b\tx8, \[x8\]} } } */
> +/* { dg-final { scan-assembler-not {ld64b\tx9, \[x9\]} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64b.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64b.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..757ff738ada8b706b68b517f20bec7b1ad05a5b3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64b.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> +
> +#ifndef __ARM_FEATURE_LS64
> +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> +#endif
> +
> +#include <arm_acle.h>
> +
> +void
> +func(void *addr, data512_t value) {
> +    __arm_st64b (addr, value);
> +}
> +
> +/* { dg-final { scan-assembler-times {st64b\tx[0-9]+, \[x[0-9]+\]\n} 1 } } */
> +
> +/* ST64B <Xt>, [<Xn>] - make sure Xt != Xn  */
> +/* { dg-final { scan-assembler-not {st64b\tx0, \[x0\]} } } */
> +/* { dg-final { scan-assembler-not {st64b\tx1, \[x1\]} } } */
> +/* { dg-final { scan-assembler-not {st64b\tx2, \[x2\]} } } */
> +/* { dg-final { scan-assembler-not {st64b\tx3, \[x3\]} } } */
> +/* { dg-final { scan-assembler-not {st64b\tx4, \[x4\]} } } */
> +/* { dg-final { scan-assembler-not {st64b\tx5, \[x5\]} } } */
> +/* { dg-final { scan-assembler-not {st64b\tx6, \[x6\]} } } */
> +/* { dg-final { scan-assembler-not {st64b\tx7, \[x7\]} } } */
> +/* { dg-final { scan-assembler-not {st64b\tx8, \[x8\]} } } */
> +/* { dg-final { scan-assembler-not {st64b\tx9, \[x9\]} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv-2.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv-2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..3fb2520223c294dcd80548bad1d0c176a696c37b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv-2.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> +
> +#ifndef __ARM_FEATURE_LS64
> +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> +#endif
> +
> +#include <arm_acle.h>
> +
> +void
> +func(void *addr, data512_t value) {
> +    __arm_st64bv (addr, value);
> +}
> +
> +/* { dg-final { scan-assembler-times {st64bv\tx[0-9]+, x[0-9]+, \[x[0-9]+\]\n} 1 } } */
> +
> +/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
> +/* { dg-final { scan-assembler-not {st64bv\tx0, x0,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx1, x1,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx2, x2,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx3, x3,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx4, x4,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx5, x5,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx6, x6,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx7, x7,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx8, x8,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx9, x9,} } } */
> +
> +/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x0, \[x0\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x1, \[x1\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x2, \[x2\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x3, \[x3\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x4, \[x4\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x5, \[x5\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x6, \[x6\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x7, \[x7\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x8, \[x8\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x9, \[x9\]} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..7bb40c3a9db643a5b2548e4e7e639224126e03f7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> +
> +#ifndef __ARM_FEATURE_LS64
> +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> +#endif
> +
> +#include <arm_acle.h>
> +
> +uint64_t
> +func(void *addr, data512_t value) {
> +    return  __arm_st64bv (addr, value);
> +}
> +
> +/* { dg-final { scan-assembler-times {st64bv\tx[0-9]+, x[0-9]+, \[x[0-9]+\]\n} 1 } } */
> +
> +/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
> +/* { dg-final { scan-assembler-not {st64bv\tx0, x0,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx1, x1,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx2, x2,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx3, x3,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx4, x4,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx5, x5,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx6, x6,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx7, x7,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx8, x8,} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx9, x9,} } } */
> +
> +/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x0, \[x0\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x1, \[x1\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x2, \[x2\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x3, \[x3\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x4, \[x4\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x5, \[x5\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x6, \[x6\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x7, \[x7\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x8, \[x8\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x9, \[x9\]} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0-2.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0-2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..bd8fd5dcf41d3b0e9dab6be53cc099014cdcf1e2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0-2.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> +
> +#ifndef __ARM_FEATURE_LS64
> +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> +#endif
> +
> +#include <arm_acle.h>
> +
> +void
> +func(void *addr, data512_t value) {
> +    __arm_st64bv0 (addr, value);
> +}
> +
> +/* { dg-final { scan-assembler-times {st64bv0\tx[0-9]+, x[0-9]+, \[x[0-9]+\]\n} 1 } } */
> +
> +/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
> +/* { dg-final { scan-assembler-not {st64bv0\tx0, x0,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx1, x1,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx2, x2,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx3, x3,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx4, x4,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx5, x5,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx6, x6,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx7, x7,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx8, x8,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx9, x9,} } } */
> +
> +/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x0, \[x0\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x1, \[x1\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x2, \[x2\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x3, \[x3\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x4, \[x4\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x5, \[x5\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x6, \[x6\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x7, \[x7\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x8, \[x8\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x9, \[x9\]} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..7a8e9bf53ad0f03a382a950783949989182adcf9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> +
> +#ifndef __ARM_FEATURE_LS64
> +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> +#endif
> +
> +#include <arm_acle.h>
> +
> +uint64_t
> +func(void *addr, data512_t value) {
> +    return __arm_st64bv0 (addr, value);
> +}
> +
> +/* { dg-final { scan-assembler-times {st64bv0\tx[0-9]+, x[0-9]+, \[x[0-9]+\]\n} 1 } } */
> +
> +/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
> +/* { dg-final { scan-assembler-not {st64bv0\tx0, x0,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx1, x1,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx2, x2,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx3, x3,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx4, x4,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx5, x5,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx6, x6,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx7, x7,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx8, x8,} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx9, x9,} } } */
> +
> +/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x0, \[x0\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x1, \[x1\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x2, \[x2\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x3, \[x3\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x4, \[x4\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x5, \[x5\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x6, \[x6\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x7, \[x7\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x8, \[x8\]} } } */
> +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x9, \[x9\]} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c
> index 7244359ccfb9cbcbbd8285b050113c004a6af2a6..2d76bfc23dfdcd78a74ec0e4845a3bd8d110b010 100644
> --- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c
> @@ -240,6 +240,20 @@
>  #endif
>  #pragma GCC pop_options
>  
> +#pragma GCC push_options
> +#pragma GCC target ("arch=armv8.7-a")
> +#ifndef __ARM_FEATURE_LS64
> +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> +#endif
> +#pragma GCC pop_options
> +
> +#pragma GCC push_options
> +#pragma GCC target ("arch=armv8.7-a+ls64")
> +#ifndef __ARM_FEATURE_LS64
> +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> +#endif
> +#pragma GCC pop_options
> +
>  #pragma GCC pop_options
>  
>  int

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH][GCC] aarch64: Add LS64 extension and intrinsics
  2021-11-15 13:43 ` Richard Sandiford
@ 2021-12-13 13:48   ` Przemyslaw Wirkus
  2021-12-14 11:58     ` Richard Sandiford
  0 siblings, 1 reply; 5+ messages in thread
From: Przemyslaw Wirkus @ 2021-12-13 13:48 UTC (permalink / raw)
  To: Richard Sandiford
  Cc: gcc-patches, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov

[-- Attachment #1: Type: text/plain, Size: 46016 bytes --]

Hello Richard,

I've updated my patch following all your comments. Thank you.

Boostrapped on aarch64-linux-gnu and all new ACLE tests pass.

OK to install?

gcc/ChangeLog:

	* config/aarch64/aarch64-builtins.c (enum aarch64_builtins):
	Define AARCH64_LS64_BUILTIN_LD64B, AARCH64_LS64_BUILTIN_ST64B,
	AARCH64_LS64_BUILTIN_ST64BV, AARCH64_LS64_BUILTIN_ST64BV0.
	(aarch64_init_ls64_builtin_decl): Helper function.
	(aarch64_init_ls64_builtins): Helper function.
	(aarch64_init_ls64_builtins_types): Helper function.
	(aarch64_general_init_builtins): Init LS64 intrisics for
	TARGET_LS64.
	(aarch64_expand_builtin_ls64): LS64 intrinsics expander.
	(aarch64_general_expand_builtin): Handle aarch64_expand_builtin_ls64.
	(ls64_builtins_data): New helper struct.
	(v8di_UP): New define.
	* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define
	__ARM_FEATURE_LS64.
	* config/aarch64/aarch64-simd.md (movv8di): New pattern.
	(aarch64_movv8di): New pattern.
	* config/aarch64/aarch64.c (aarch64_classify_address): New test for TI.
	* config/aarch64/aarch64-simd.md: Add new V8DI mov expand.
	* config/aarch64/aarch64.c (aarch64_classify_address): Enforce the
	TI range (7-bit signed scaled) for both ends of the range.
	* config/aarch64/aarch64.h (AARCH64_ISA_LS64): New define.
	(TARGET_LS64): New define.
	* config/aarch64/aarch64.md: Add UNSPEC_LD64B, UNSPEC_ST64B,
	UNSPEC_ST64BV and UNSPEC_ST64BV0.
	(ld64b): New define_insn.
	(st64b): New define_insn.
	(st64bv): New define_insn.
	(st64bv0): New define_insn.
	* config/aarch64/arm_acle.h (target):
	(data512_t): New type derived from __arm_data512_t.
	(__arm_data512_t): New internal type.
	(__arm_ld64b): New intrinsic.
	(__arm_st64b): New intrinsic.
	(__arm_st64bv): New intrinsic.
	(__arm_st64bv0): New intrinsic.
	* config/arm/types.md: Add new type ls64.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/acle/ls64_asm.c: New test.
	* gcc.target/aarch64/acle/ls64_ld64b.c: New test.
	* gcc.target/aarch64/acle/ls64_ld64b-2.c: New test.
	* gcc.target/aarch64/acle/ls64_ld64b-3.c: New test.
	* gcc.target/aarch64/acle/ls64_st64b.c: New test.
	* gcc.target/aarch64/acle/ls64_ld_st_o0.c: New test.
	* gcc.target/aarch64/acle/ls64_st64b-2.c: New test.
	* gcc.target/aarch64/acle/ls64_st64bv.c: New test.
	* gcc.target/aarch64/acle/ls64_st64bv-2.c: New test.
	* gcc.target/aarch64/acle/ls64_st64bv-3.c: New test.
	* gcc.target/aarch64/acle/ls64_st64bv0.c: New test.
	* gcc.target/aarch64/acle/ls64_st64bv0-2.c: New test.
	* gcc.target/aarch64/acle/ls64_st64bv0-3.c: New test.
	* gcc.target/aarch64/pragma_cpp_predefs_2.c: Add checks
	for __ARM_FEATURE_LS64.

Kind regards, 
Przemyslaw Wirkus

> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: 15 November 2021 13:43
> To: Przemyslaw Wirkus <Przemyslaw.Wirkus@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
> Subject: Re: [PATCH][GCC] aarch64: Add LS64 extension and intrinsics
> 
> Przemyslaw Wirkus <Przemyslaw.Wirkus@arm.com> writes:
> > Hi,
> >
> > This patch is adding support for LS64 (Armv8.7-A Load/Store 64 Byte
> > extension) which is part of Armv8.7-A architecture. Changes include
> > missing plumbing for TARGET_LS64, LS64 data structure and intrinsics
> > defined in ACLE [0]. Machine description of intrinsics is using new V8DI mode
> added in a separate patch.
> > __ARM_FEATURE_LS64 is defined if the Armv8.7-A LS64 instructions for
> > atomic 64-byte access to device memory are supported.
> >
> > New compiler internal type is added wrapping ACLE struct data512_t [0]:
> >
> > typedef struct {
> >   uint64_t val[8];
> > } __arm_data512_t;
> >
> > Please note that command line support for this feature was already added [1].
> >
> >   [0] https://github.com/ARM-software/acle/blob/main/main/acle.rst#load-
> store-64-byte-intrinsics
> >   [1] commit e159c0aa10e50c292a534535c73f38d22b6129a8 (AArch64: Add
> command-line
> >       support for Armv8.7-a)
> >
> > For below C code see example snippets of generated code:
> >
> > #include <arm_acle.h>
> >
> > void
> > func(const void * addr, data512_t *data) {
> >   *data = __arm_ld64b (addr);
> > }
> >
> > func:
> > 	ld64b	x8, [x0]
> > 	stp	x8, x9, [x1]
> > 	sub	sp, sp, #64
> > 	stp	x10, x11, [x1, 16]
> > 	stp	x12, x13, [x1, 32]
> > 	stp	x14, x15, [x1, 48]
> > 	add	sp, sp, 64
> > 	ret
> > ~~~
> >
> > #include <arm_acle.h>
> >
> > uint64_t
> > func(void *addr, data512_t value) {
> >     return  __arm_st64bv (addr, value); }
> >
> > func:
> > 	ldp	x8, x9, [x1]
> > 	ldp	x10, x11, [x1, 16]
> > 	ldp	x12, x13, [x1, 32]
> > 	ldp	x14, x15, [x1, 48]
> > 	st64bv	x1, x8, [x0]
> > 	mov	x0, x1
> > 	ret
> >
> > ~~~
> >
> > uint64_t
> > ls64_store_v0(const data512_t *input, void *addr) {
> >     uint64_t status;
> >     __asm__ volatile ("st64bv0 %0, %2, [%1]"
> >                       : "=r" (status), "=r" (addr)
> >                       : "r" (*input)
> >                       : "memory");
> >     return status;
> > }
> >
> > ls64_store_v0:
> > 	ldp	x8, x9, [x0]
> > 	ldp	x10, x11, [x0, 16]
> > 	ldp	x12, x13, [x0, 32]
> > 	ldp	x14, x15, [x0, 48]
> > 	st64bv0 x0, x8, [x1]
> > 	ret
> >
> > Regtested on aarch64-elf cross and no issues.
> >
> > OK for master?
> >
> > gcc/ChangeLog:
> >
> > 2021-11-11  Przemyslaw Wirkus  <przemyslaw.wirkus@arm.com>
> >
> > 	* config/aarch64/aarch64-builtins.c (enum aarch64_builtins):
> > 	Define AARCH64_LS64_BUILTIN_LD64B,
> AARCH64_LS64_BUILTIN_ST64B,
> > 	AARCH64_LS64_BUILTIN_ST64BV, AARCH64_LS64_BUILTIN_ST64BV0.
> > 	(aarch64_init_ls64_builtin_decl): Helper function.
> > 	(aarch64_init_ls64_builtins): Helper function.
> > 	(aarch64_init_ls64_builtins_types): Helper function.
> > 	(aarch64_general_init_builtins): Init LS64 intrisics for
> > 	TARGET_LS64.
> > 	(aarch64_expand_builtin_ls64): LS64 intrinsics expander.
> > 	(aarch64_general_expand_builtin): Handle
> aarch64_expand_builtin_ls64.
> > 	(ls64_builtins_data): New helper struct.
> > 	(v8di_UP): New define.
> > 	* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define
> > 	__ARM_FEATURE_LS64.
> > 	* config/aarch64/aarch64.h (AARCH64_ISA_LS64): New define.
> > 	(AARCH64_ISA_V8_7): New define.
> > 	(TARGET_LS64): New define.
> > 	* config/aarch64/aarch64.md: Add UNSPEC_LD64B, UNSPEC_ST64B,
> > 	UNSPEC_ST64BV and UNSPEC_ST64BV0.
> > 	(ld64b): New define_insn.
> > 	(st64b): New define_insn.
> > 	(st64bv): New define_insn.
> > 	(st64bv0): New define_insn.
> > 	* config/aarch64/arm_acle.h (target):
> > 	(data512_t): New type derived from __arm_data512_t.
> > 	(__arm_data512_t): New internal type.
> > 	(__arm_ld64b): New intrinsic.
> > 	(__arm_st64b): New intrinsic.
> > 	(__arm_st64bv): New intrinsic.
> > 	(__arm_st64bv0): New intrinsic.
> > 	* config/arm/types.md: Add new type ls64.
> >
> > gcc/testsuite/ChangeLog:
> >
> > 2021-11-11  Przemyslaw Wirkus  <przemyslaw.wirkus@arm.com>
> >
> > 	* gcc.target/aarch64/acle/ls64_asm.c: New test.
> > 	* gcc.target/aarch64/acle/ls64_ld64b-2.c: New test.
> > 	* gcc.target/aarch64/acle/ls64_ld64b.c: New test.
> > 	* gcc.target/aarch64/acle/ls64_st64b.c: New test.
> > 	* gcc.target/aarch64/acle/ls64_st64bv-2.c: New test.
> > 	* gcc.target/aarch64/acle/ls64_st64bv.c: New test.
> > 	* gcc.target/aarch64/acle/ls64_st64bv0-2.c: New test.
> > 	* gcc.target/aarch64/acle/ls64_st64bv0.c: New test.
> > 	* gcc.target/aarch64/pragma_cpp_predefs_2.c: Add checks
> > 	for __ARM_FEATURE_LS64.
> >
> > diff --git a/gcc/config/aarch64/aarch64-builtins.c
> > b/gcc/config/aarch64/aarch64-builtins.c
> > index
> >
> 5053bf0f8fd6638bf84a6df06c0987a0216b69e7..d4a82eec3b26bfd1cb976d0870
> d6
> > 0ee7d10b689a 100644
> > --- a/gcc/config/aarch64/aarch64-builtins.c
> > +++ b/gcc/config/aarch64/aarch64-builtins.c
> > @@ -49,6 +49,7 @@
> >  #include "gimple-fold.h"
> >
> >  #define v8qi_UP  E_V8QImode
> > +#define v8di_UP  E_V8DImode
> >  #define v4hi_UP  E_V4HImode
> >  #define v4hf_UP  E_V4HFmode
> >  #define v2si_UP  E_V2SImode
> > @@ -615,6 +616,11 @@ enum aarch64_builtins
> >    AARCH64_MEMTAG_BUILTIN_SET_TAG,
> >    AARCH64_MEMTAG_BUILTIN_GET_TAG,
> >    AARCH64_MEMTAG_BUILTIN_END,
> > +  /* LS64 builtins.  */
> > +  AARCH64_LS64_BUILTIN_LD64B,
> > +  AARCH64_LS64_BUILTIN_ST64B,
> > +  AARCH64_LS64_BUILTIN_ST64BV,
> > +  AARCH64_LS64_BUILTIN_ST64BV0,
> >    AARCH64_BUILTIN_MAX
> >  };
> >
> > @@ -1579,6 +1585,71 @@ aarch64_init_memtag_builtins (void)  #undef
> > AARCH64_INIT_MEMTAG_BUILTINS_DECL  }
> >
> > +/* Add builtins for Load/store 64 Byte instructions.  */
> > +
> > +typedef struct
> > +{
> > +  const char *name;
> > +  unsigned int code;
> > +  tree type;
> > +} ls64_builtins_data;
> > +
> > +static void
> > +aarch64_init_ls64_builtin_decl (const char *name, tree type, unsigned
> > +int code) {
> > +  aarch64_builtin_decls[code] = aarch64_general_add_builtin (name,
> > +type, code); }
> > +
> > +static GTY(()) tree ls64_arm_data_t = NULL_TREE;
> > +
> > +static void
> > +aarch64_init_ls64_builtins_types (void) {
> > +  /* Synthesize the name of the user-visible vector tuple type.  */
> > +  char *tuple_type_name = "__arm_data512_t";
> > +  tree node_type = get_typenode_from_name (UINT64_TYPE);
> > +  tree array_type = build_array_type_nelts (node_type, 8);
> > +  SET_TYPE_MODE (array_type, V8DImode);
> > +  unsigned int alignment = known_eq (GET_MODE_SIZE (E_V8DImode), 64)
> > +? 64 : 0;
> 
> The alignment should always be 64 bits, so I think we should just hard-code
> that in the gcc_asserts below.
> 
> > +
> > +  gcc_assert (TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type));
> > + gcc_assert (TYPE_ALIGN (array_type) == alignment);
> > +
> > +  tree field = build_decl (input_location, FIELD_DECL,
> > +                           get_identifier ("val"), array_type);
> > +
> > +  ls64_arm_data_t = lang_hooks.types.simulate_record_decl (input_location,
> > +                         tuple_type_name,
> > +                         make_array_slice (&field, 1));
> > +
> > +  gcc_assert (TYPE_MODE_RAW (ls64_arm_data_t) == TYPE_MODE
> > + (ls64_arm_data_t));
> 
> I think the more important test here is that:
> 
>   TYPE_MODE (ls64_arm_data_t) == V8DImode
> 
> so we should probably test that first, before the line above.
> 
> > +  gcc_assert (TYPE_ALIGN (ls64_arm_data_t) == alignment); }
> > +
> > +static void
> > +aarch64_init_ls64_builtins (void)
> > +{
> > +  ls64_builtins_data data[4] = {
> > +    {"__builtin_aarch64_ld64b", AARCH64_LS64_BUILTIN_LD64B,
> > +     build_function_type_list (aarch64_simd_intXI_type_node,
> > +                               const_ptr_type_node, NULL_TREE)},
> > +    {"__builtin_aarch64_st64b", AARCH64_LS64_BUILTIN_ST64B,
> > +     build_function_type_list (void_type_node, ptr_type_node,
> > +                               aarch64_simd_intXI_type_node, NULL_TREE)},
> > +    {"__builtin_aarch64_st64bv", AARCH64_LS64_BUILTIN_ST64BV,
> > +     build_function_type_list (uint64_type_node, ptr_type_node,
> > +                               aarch64_simd_intXI_type_node, NULL_TREE)},
> > +    {"__builtin_aarch64_st64bv0", AARCH64_LS64_BUILTIN_ST64BV0,
> > +     build_function_type_list (uint64_type_node, ptr_type_node,
> > +                               aarch64_simd_intXI_type_node,
> > +NULL_TREE)},
> 
> Do these functions need to use aarch64_simd_intXI_type_node?  Now that
> we're defining data512_t in the compiler itself, I think we could use it directly
> in the function prototypes, which would simplify some of the header file code
> (and hopefully improve codegen).
> 
> > +  };
> > +
> > +  for (size_t i = 0; i < ARRAY_SIZE (data); ++i)
> > +    aarch64_init_ls64_builtin_decl (data[i].name, data[i].type,
> > + data[i].code);
> > +
> > +  aarch64_init_ls64_builtins_types (); }
> > +
> >  /* Initialize fpsr fpcr getters and setters.  */
> >
> >  static void
> > @@ -1668,6 +1739,9 @@ aarch64_general_init_builtins (void)
> >
> >    if (TARGET_MEMTAG)
> >      aarch64_init_memtag_builtins ();
> > +
> > +  if (TARGET_LS64)
> > +    aarch64_init_ls64_builtins ();
> >  }
> >
> >  /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL
> > group.  */ @@ -2138,6 +2212,60 @@ aarch64_expand_builtin_tme (int fcode,
> tree exp, rtx target)
> >      return target;
> >  }
> >
> > +/* Function to expand an expression EXP which calls one of the Load/Store
> > +   64 Byte extension (LS64) builtins FCODE with the result going to TARGET.
> > +   If IGNORE is true the return value is ignored.  */ static rtx
> > +aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx target, int
> > +ignore) {
> > +  switch (fcode)
> > +    {
> > +    case AARCH64_LS64_BUILTIN_LD64B:
> > +      {
> > +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> > +        op0 = force_reg (Pmode, op0);
> > +        /* Do not emit instruction when return value is ignored.  */
> > +        if (ignore)
> > +          return target;
> > +        target = convert_to_mode (V8DImode, target, true);
> 
> If we do use the real structure type in the prototypes then the conversions in
> this function won't be needed.
> 
> > +        emit_insn (GEN_FCN (CODE_FOR_ld64b) (target, op0));
> > +        break;
> > +      }
> > +    case AARCH64_LS64_BUILTIN_ST64B:
> > +      {
> > +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> > +        op0 = force_reg (Pmode, op0);
> > +        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
> > +        op1 = convert_to_mode (V8DImode, op1, true);
> > +        emit_insn (GEN_FCN (CODE_FOR_st64b) (op0, op1));
> > +        break;
> > +      }
> > +    case AARCH64_LS64_BUILTIN_ST64BV:
> > +      {
> > +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> > +        op0 = force_reg (Pmode, op0);
> > +        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
> > +        op1 = convert_to_mode (V8DImode, op1, true);
> > +        target = ignore ? gen_reg_rtx(DImode) : force_reg (DImode,
> > + target);
> 
> force_reg doesn't look right here: target is a destination operand whereas
> force_reg works on source operands.
> 
> It would probably be better to use the expand_insn interface instead.
> See AARCH64_JSCVT for an example.
> 
> > +        emit_insn (GEN_FCN (CODE_FOR_st64bv) (target, op0, op1));
> > +        break;
> > +      }
> > +    case AARCH64_LS64_BUILTIN_ST64BV0:
> > +      {
> > +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> > +        op0 = force_reg (Pmode, op0);
> > +        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
> > +        op1 = convert_to_mode (V8DImode, op1, true);
> > +        target = ignore ? gen_reg_rtx(DImode) : force_reg (DImode, target);
> > +        emit_insn (GEN_FCN (CODE_FOR_st64bv0) (target, op0, op1));
> > +        break;
> > +      }
> > +    default :
> > +      gcc_unreachable ();
> > +    }
> > +    return target;
> > +}
> > +
> >  /* Expand a random number builtin EXP with code FCODE, putting the result
> >     int TARGET.  If IGNORE is true the return value is ignored.  */
> >
> > @@ -2396,6 +2524,12 @@ aarch64_general_expand_builtin (unsigned int
> fcode, tree exp, rtx target,
> >        || fcode == AARCH64_TME_BUILTIN_TCANCEL)
> >      return aarch64_expand_builtin_tme (fcode, exp, target);
> >
> > +  if (fcode == AARCH64_LS64_BUILTIN_LD64B
> > +      || fcode == AARCH64_LS64_BUILTIN_ST64B
> > +      || fcode == AARCH64_LS64_BUILTIN_ST64BV
> > +      || fcode == AARCH64_LS64_BUILTIN_ST64BV0)
> > +    return aarch64_expand_builtin_ls64 (fcode, exp, target, ignore);
> > +
> >    if (fcode >= AARCH64_MEMTAG_BUILTIN_START
> >        && fcode <= AARCH64_MEMTAG_BUILTIN_END)
> >      return aarch64_expand_builtin_memtag (fcode, exp, target); diff
> > --git a/gcc/config/aarch64/aarch64-c.c
> > b/gcc/config/aarch64/aarch64-c.c index
> >
> d6653e474dec9bcddde2106f36ceb22f1d43375c..3af3e5c96daf674648dbc008b
> 15a
> > de0e303b66f8 100644
> > --- a/gcc/config/aarch64/aarch64-c.c
> > +++ b/gcc/config/aarch64/aarch64-c.c
> > @@ -200,6 +200,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
> >  			"__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", pfile);
> >    aarch64_def_or_undef (TARGET_BF16_FP,
> >  			"__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile);
> > +  aarch64_def_or_undef (TARGET_LS64,
> > +			"__ARM_FEATURE_LS64", pfile);
> >
> >    /* Not for ACLE, but required to keep "float.h" correct if we switch
> >       target between implementations that do or do not support
> > ARMv8.2-A diff --git a/gcc/config/aarch64/aarch64.h
> > b/gcc/config/aarch64/aarch64.h index
> >
> 2792bb29adbbb5b3145b3f767615af8edbc30b08..426ad5ac77376f561c92d3e35
> b62
> > 7939eb481773 100644
> > --- a/gcc/config/aarch64/aarch64.h
> > +++ b/gcc/config/aarch64/aarch64.h
> > @@ -310,6 +310,8 @@ extern unsigned aarch64_architecture_version;
> >  #define AARCH64_ISA_V8_R	   (aarch64_isa_flags & AARCH64_FL_V8_R)
> >  #define AARCH64_ISA_PAUTH	   (aarch64_isa_flags & AARCH64_FL_PAUTH)
> >  #define AARCH64_ISA_V9		   (aarch64_isa_flags &
> AARCH64_FL_V9)
> > +#define AARCH64_ISA_LS64	   (aarch64_isa_flags & AARCH64_FL_LS64)
> > +#define AARCH64_ISA_V8_7	   (aarch64_isa_flags & AARCH64_FL_V8_7)
> >
> >  /* Crypto is an optional extension to AdvSIMD.  */  #define
> > TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO) @@ -401,6
> +403,9 @@
> > extern unsigned aarch64_architecture_version;
> >  /* PAUTH instructions are enabled through +pauth.  */  #define
> > TARGET_PAUTH (AARCH64_ISA_PAUTH)
> >
> > +/* LS64 instructions are enabled through +ls64.  */ #define
> > +TARGET_LS64 (AARCH64_ISA_LS64)
> > +
> >  /* Make sure this is always defined so we don't have to check for ifdefs
> >     but rather use normal ifs.  */
> >  #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT diff --git
> > a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index
> >
> 4035e061706793849c68ae09bcb2e4b9580ab7b6..bf4a23c8682767ae706ba387
> 9938
> > aed08f394cc2 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -187,6 +187,10 @@ (define_c_enum "unspec" [
> >      UNSPEC_LD2_LANE
> >      UNSPEC_LD3_LANE
> >      UNSPEC_LD4_LANE
> > +    UNSPEC_LD64B
> > +    UNSPEC_ST64B
> > +    UNSPEC_ST64BV
> > +    UNSPEC_ST64BV0
> >      UNSPEC_MB
> >      UNSPEC_NOP
> >      UNSPEC_PACIA1716
> > @@ -7499,6 +7503,45 @@ (define_insn "stg"
> >    [(set_attr "type" "memtag")]
> >  )
> >
> > +;; Load/Store 64-bit (LS64) instructions.
> > +(define_insn "ld64b"
> > +  [(set (match_operand:V8DI 0 "register_operand" "=r")
> > +        (unspec:V8DI [(mem:V8DI (match_operand:DI 1
> > +"register_operand" "r"))] UNSPEC_LD64B)
> > +  )]
> > +  "TARGET_LS64"
> > +  "ld64b\\t%0, [%1]"
> > +  [(set_attr "type" "ls64")]
> > +)
> > +
> > +(define_insn "st64b"
> > +  [(set (mem:V8DI (match_operand:DI 0 "register_operand" "=r"))
> > +        (unspec:V8DI [(match_operand:V8DI 1 "register_operand" "r")]
> > +UNSPEC_ST64B)
> 
> The unspec probably needs to be unspec_volatile.  The danger as things stand
> is that the second __arm_st64b in:
> 
>     __arm_st64b (addr, a);
>     …
>     __arm_st64b (addr, a);
> 
> could be optimised away if the … clearly doesn't involve a store to addr.
> 
> > +  )]
> > +  "TARGET_LS64"
> > +  "st64b\\t%1, [%0]"
> > +  [(set_attr "type" "ls64")]
> > +)
> > +
> > +(define_insn "st64bv"
> > +  [(clobber (match_operand:DI 0 "register_operand" "=r"))
> 
> (clobber …) should only be used for values that don't matter.
> Here I think we'll need a (set …) with an (unspec_volatile …) source.
> I guess pedantically, it should be a separate unspec number from
> UNSPEC_ST64BV, since the value being stored isn't the same as the value being
> returned.
> 
> > +   (set (mem:V8DI (match_operand:DI 1 "register_operand" "=r"))
> > +        (unspec:V8DI [(match_operand:V8DI 2 "register_operand" "r")]
> > +UNSPEC_ST64BV)
> > +  )]
> > +  "TARGET_LS64"
> > +  "st64bv\\t%0, %2, [%1]"
> > +  [(set_attr "type" "ls64")]
> > +)
> > +
> > +(define_insn "st64bv0"
> > +  [(clobber (match_operand:DI 0 "register_operand" "=r"))
> > +   (set (mem:V8DI (match_operand:DI 1 "register_operand" "=r"))
> > +        (unspec:V8DI [(match_operand:V8DI 2 "register_operand" "r")]
> > +UNSPEC_ST64BV0)
> > +  )]
> > +  "TARGET_LS64"
> > +  "st64bv0\\t%0, %2, [%1]"
> > +  [(set_attr "type" "ls64")]
> > +)
> > +
> >  ;; AdvSIMD Stuff
> >  (include "aarch64-simd.md")
> >
> > diff --git a/gcc/config/aarch64/arm_acle.h
> > b/gcc/config/aarch64/arm_acle.h index
> >
> 13f23632474b260122f30a3c566033664b0b5963..26d886949a34f77a65f55fbf3b
> 4c
> > c01884bfd883 100644
> > --- a/gcc/config/aarch64/arm_acle.h
> > +++ b/gcc/config/aarch64/arm_acle.h
> > @@ -214,6 +214,57 @@ __ttest (void)
> >  #pragma GCC pop_options
> >  #endif
> >
> > +#ifdef __ARM_FEATURE_LS64
> > +#pragma GCC push_options
> > +#pragma GCC target ("+nothing+ls64")
> > +
> > +typedef __arm_data512_t data512_t;
> > +
> > +__extension__ extern __inline data512_t __attribute__
> > +((__always_inline__, __gnu_inline__, __artificial__)) __arm_ld64b
> > +(const void *addr) {
> > +  __builtin_aarch64_simd_xi __o;
> > +  data512_t __temp = { };
> > +  __o = __builtin_aarch64_ld64b (addr);
> > +  __builtin_memcpy (&__temp, &__o, sizeof (__o));
> > +  return __temp;
> > +}
> > +
> > +__extension__ extern __inline void
> > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > +__arm_st64b (void *addr, data512_t value) {
> > +  __builtin_aarch64_simd_xi __o;
> > +  __builtin_memcpy (&__o, &value, sizeof (__o));
> > +  __builtin_aarch64_st64b (addr, __o); }
> > +
> > +__extension__ extern __inline uint64_t __attribute__
> > +((__always_inline__, __gnu_inline__, __artificial__)) __arm_st64bv
> > +(void *addr, data512_t value) {
> > +  uint64_t __ret;
> > +  __builtin_aarch64_simd_xi __o;
> > +  __builtin_memcpy (&__o, &value, sizeof (__o));
> > +  __ret = __builtin_aarch64_st64bv (addr, __o);
> > +  return __ret;
> > +}
> > +
> > +__extension__ extern __inline uint64_t __attribute__
> > +((__always_inline__, __gnu_inline__, __artificial__))
> > +__arm_st64bv0 (void *addr, data512_t value) {
> > +  uint64_t __ret;
> > +  __builtin_aarch64_simd_xi __o;
> > +  __builtin_memcpy (&__o, &value, sizeof (__o));
> > +  __ret = __builtin_aarch64_st64bv0 (addr, __o);
> > +  return __ret;
> > +}
> > +
> > +#pragma GCC pop_options
> > +#endif
> > +
> >  #pragma GCC push_options
> >  #pragma GCC target ("+nothing+rng")
> >  __extension__ extern __inline int
> > diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md index
> >
> b9514dafb86a280bee3d3f84845e0743cd18a34d..6dce71fd27e5dfbd08746509b
> c6f
> > deeade69a4a4 100644
> > --- a/gcc/config/arm/types.md
> > +++ b/gcc/config/arm/types.md
> > @@ -1122,6 +1122,7 @@ (define_attr "type"
> >    coproc,\
> >    tme,\
> >    memtag,\
> > +  ls64,\
> >    mve_move,\
> >    mve_store,\
> >    mve_load"
> > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_asm.c
> > b/gcc/testsuite/gcc.target/aarch64/acle/ls64_asm.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..60738a5b8d6b35837a7b286
> d1641
> > 6a0eb289e34e
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_asm.c
> > @@ -0,0 +1,130 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> > +
> > +#ifndef __ARM_FEATURE_LS64
> > +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> > +#endif
> > +
> > +/* Inline assembly for LS64 instructions.  */
> > +
> > +#include <arm_acle.h>
> > +
> > +void
> > +ls64_load(data512_t *output, const void *addr) {
> > +    __asm__ volatile ("ld64b %0, [%1]"
> > +                      : "=r" (*output)
> > +                      : "r" (addr)
> > +                      : "memory");
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {ld64b x[0-9]+, \[x[0-9]+\]} 1 }
> > +} */
> 
> Could you double-check whether this passes?  I'd have expected a tab rather
> than a space to be needed after ld64b, like in the other tests.
> 
> > +
> > +/* LD64B <Xt>, [<Xn>] - make sure Xt != Xn  */
> 
> As discussed off-list, we don't need this requirement.  (FWIW, %0 would have
> needed to be an earlyclobber (=&r) if we had wanted to enforce it.)
> 
> > +/* { dg-final { scan-assembler-not {ld64b x0, \[x0\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b x1, \[x1\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b x2, \[x2\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b x3, \[x3\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b x4, \[x4\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b x5, \[x5\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b x6, \[x6\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b x7, \[x7\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b x8, \[x8\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b x9, \[x9\]} } } */
> > +
> > +void
> > +ls64_store(const data512_t *input, void *addr) {
> > +    __asm__ volatile ("st64b %1, [%0]"
> > +                      : /* No outputs.  */
> > +                      : "r" (addr), "r" (*input)
> > +                      : "memory");
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {st64b x[0-9]+, \[x[0-9]+\]} 1 }
> > +} */
> > +
> > +/* ST64B <Xt>, [<Xn>] - make sure Xt != Xn  */
> 
> Same here.  Xt != Xn needs to be true in the sense that *input and addr are
> different values that are live at the same time, but that's not specific to this
> instruction and so doesn't seem worth testing.
> 
> > +/* { dg-final { scan-assembler-not {st64b x0, \[x0\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b x1, \[x1\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b x2, \[x2\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b x3, \[x3\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b x4, \[x4\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b x5, \[x5\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b x6, \[x6\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b x7, \[x7\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b x8, \[x8\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b x9, \[x9\]} } } */
> > +
> > +uint64_t
> > +ls64_store_v(const data512_t *input, void *addr) {
> > +    uint64_t status;
> > +    __asm__ volatile ("st64bv %0, %2, [%1]"
> > +                      : "=r" (status), "=r" (addr)
> > +                      : "r" (*input)
> > +                      : "memory");
> 
> addr should be an input rather than an output.  Samme for the other stores.
> 
> > +    return status;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {st64bv x[0-9]+, x[0-9]+,
> > +\[x[0-9]+\]} 1 } } */
> > +
> > +/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
> > +/* { dg-final { scan-assembler-not {st64bv x0, x0,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv x1, x1,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv x2, x2,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv x3, x3,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv x4, x4,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv x5, x5,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv x6, x6,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv x7, x7,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv x8, x8,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv x9, x9,} } } */
> > +
> > +/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
> > +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x0, \[x0\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x1, \[x1\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x2, \[x2\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x3, \[x3\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x4, \[x4\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x5, \[x5\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x6, \[x6\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x7, \[x7\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x8, \[x8\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv x[0-9]+, x9, \[x9\]} } }
> > +*/
> > +
> > +uint64_t
> > +ls64_store_v0(const data512_t *input, void *addr) {
> > +    uint64_t status;
> > +    __asm__ volatile ("st64bv0 %0, %2, [%1]"
> > +                      : "=r" (status), "=r" (addr)
> > +                      : "r" (*input)
> > +                      : "memory");
> > +    return status;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {st64bv0 x[0-9]+, x[0-9]+,
> > +\[x[0-9]+\]} 1 } } */
> > +
> > +/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
> > +/* { dg-final { scan-assembler-not {st64bv0 x0, x0,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0 x1, x1,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0 x2, x2,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0 x3, x3,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0 x4, x4,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0 x5, x5,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0 x6, x6,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0 x7, x7,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0 x8, x8,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0 x9, x9,} } } */
> > +
> > +/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
> > +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x0, \[x0\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x1, \[x1\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x2, \[x2\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x3, \[x3\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x4, \[x4\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x5, \[x5\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x6, \[x6\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x7, \[x7\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x8, \[x8\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0 x[0-9]+, x9, \[x9\]} } }
> > +*/
> > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b-2.c
> > b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b-2.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..1d39618b44367522d8a29b7
> 7f2a7
> > 9e339ceb35b5
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b-2.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> > +
> > +#ifndef __ARM_FEATURE_LS64
> > +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> > +#endif
> > +
> > +#include <arm_acle.h>
> > +
> > +void
> > +func(const void * addr) {
> > +    data512_t ret = __arm_ld64b (addr);   /* Should be optimized out.  */
> > +}
> > +
> > +/* { dg-final { scan-assembler-not {ld64b\tx[0-9]+, \[x[0-9]+\]\n} }
> > +} */
> 
> Probably more robust to drop everything after the tab, so that the test doesn't
> accidentally pass due to a typo in the complex part of the regexp.
> 
> Thanks,
> Richard
> 
> > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b.c
> > b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..d8a3f6c51fb5f07137eea5b03
> 2d4
> > fdca9b1aa93b
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b.c
> > @@ -0,0 +1,27 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> > +
> > +#ifndef __ARM_FEATURE_LS64
> > +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> > +#endif
> > +
> > +#include <arm_acle.h>
> > +
> > +void
> > +func(const void * addr, data512_t *data) {
> > +  *data = __arm_ld64b (addr);
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {ld64b\tx[0-9]+, \[x[0-9]+\]\n}
> > +1 } } */
> > +
> > +/* LD64B <Xt>, [<Xn>] - make sure Xt != Xn  */
> > +/* { dg-final { scan-assembler-not {ld64b\tx0, \[x0\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b\tx1, \[x1\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b\tx2, \[x2\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b\tx3, \[x3\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b\tx4, \[x4\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b\tx5, \[x5\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b\tx6, \[x6\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b\tx7, \[x7\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b\tx8, \[x8\]} } } */
> > +/* { dg-final { scan-assembler-not {ld64b\tx9, \[x9\]} } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64b.c
> > b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64b.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..757ff738ada8b706b68b517f2
> 0be
> > c7b1ad05a5b3
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64b.c
> > @@ -0,0 +1,27 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> > +
> > +#ifndef __ARM_FEATURE_LS64
> > +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> > +#endif
> > +
> > +#include <arm_acle.h>
> > +
> > +void
> > +func(void *addr, data512_t value) {
> > +    __arm_st64b (addr, value);
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {st64b\tx[0-9]+, \[x[0-9]+\]\n}
> > +1 } } */
> > +
> > +/* ST64B <Xt>, [<Xn>] - make sure Xt != Xn  */
> > +/* { dg-final { scan-assembler-not {st64b\tx0, \[x0\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b\tx1, \[x1\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b\tx2, \[x2\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b\tx3, \[x3\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b\tx4, \[x4\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b\tx5, \[x5\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b\tx6, \[x6\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b\tx7, \[x7\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b\tx8, \[x8\]} } } */
> > +/* { dg-final { scan-assembler-not {st64b\tx9, \[x9\]} } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv-2.c
> > b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv-2.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..3fb2520223c294dcd80548ba
> d1d0
> > c176a696c37b
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv-2.c
> > @@ -0,0 +1,39 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> > +
> > +#ifndef __ARM_FEATURE_LS64
> > +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> > +#endif
> > +
> > +#include <arm_acle.h>
> > +
> > +void
> > +func(void *addr, data512_t value) {
> > +    __arm_st64bv (addr, value);
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {st64bv\tx[0-9]+, x[0-9]+,
> > +\[x[0-9]+\]\n} 1 } } */
> > +
> > +/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
> > +/* { dg-final { scan-assembler-not {st64bv\tx0, x0,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx1, x1,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx2, x2,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx3, x3,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx4, x4,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx5, x5,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx6, x6,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx7, x7,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx8, x8,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx9, x9,} } } */
> > +
> > +/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x0, \[x0\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x1, \[x1\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x2, \[x2\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x3, \[x3\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x4, \[x4\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x5, \[x5\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x6, \[x6\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x7, \[x7\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x8, \[x8\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x9, \[x9\]} } }
> > +*/
> > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv.c
> > b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..7bb40c3a9db643a5b2548e4e
> 7e63
> > 9224126e03f7
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv.c
> > @@ -0,0 +1,39 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> > +
> > +#ifndef __ARM_FEATURE_LS64
> > +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> > +#endif
> > +
> > +#include <arm_acle.h>
> > +
> > +uint64_t
> > +func(void *addr, data512_t value) {
> > +    return  __arm_st64bv (addr, value); }
> > +
> > +/* { dg-final { scan-assembler-times {st64bv\tx[0-9]+, x[0-9]+,
> > +\[x[0-9]+\]\n} 1 } } */
> > +
> > +/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
> > +/* { dg-final { scan-assembler-not {st64bv\tx0, x0,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx1, x1,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx2, x2,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx3, x3,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx4, x4,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx5, x5,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx6, x6,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx7, x7,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx8, x8,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv\tx9, x9,} } } */
> > +
> > +/* ST64BV <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x0, \[x0\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x1, \[x1\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x2, \[x2\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x3, \[x3\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x4, \[x4\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x5, \[x5\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x6, \[x6\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x7, \[x7\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x8, \[x8\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv\tx[0-9]+, x9, \[x9\]} } }
> > +*/
> > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0-2.c
> > b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0-2.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..bd8fd5dcf41d3b0e9dab6be5
> 3cc0
> > 99014cdcf1e2
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0-2.c
> > @@ -0,0 +1,39 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> > +
> > +#ifndef __ARM_FEATURE_LS64
> > +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> > +#endif
> > +
> > +#include <arm_acle.h>
> > +
> > +void
> > +func(void *addr, data512_t value) {
> > +    __arm_st64bv0 (addr, value);
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {st64bv0\tx[0-9]+, x[0-9]+,
> > +\[x[0-9]+\]\n} 1 } } */
> > +
> > +/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx0, x0,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx1, x1,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx2, x2,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx3, x3,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx4, x4,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx5, x5,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx6, x6,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx7, x7,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx8, x8,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx9, x9,} } } */
> > +
> > +/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x0, \[x0\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x1, \[x1\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x2, \[x2\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x3, \[x3\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x4, \[x4\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x5, \[x5\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x6, \[x6\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x7, \[x7\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x8, \[x8\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x9, \[x9\]} } }
> > +*/
> > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0.c
> > b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..7a8e9bf53ad0f03a382a95078
> 394
> > 9989182adcf9
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0.c
> > @@ -0,0 +1,39 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=armv8-a+ls64 -O2" } */
> > +
> > +#ifndef __ARM_FEATURE_LS64
> > +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> > +#endif
> > +
> > +#include <arm_acle.h>
> > +
> > +uint64_t
> > +func(void *addr, data512_t value) {
> > +    return __arm_st64bv0 (addr, value); }
> > +
> > +/* { dg-final { scan-assembler-times {st64bv0\tx[0-9]+, x[0-9]+,
> > +\[x[0-9]+\]\n} 1 } } */
> > +
> > +/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xs != Xt  */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx0, x0,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx1, x1,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx2, x2,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx3, x3,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx4, x4,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx5, x5,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx6, x6,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx7, x7,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx8, x8,} } } */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx9, x9,} } } */
> > +
> > +/* ST64BV0 <Xs>, <Xt>, [<Xn>]  -  make sure Xt != Xn  */
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x0, \[x0\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x1, \[x1\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x2, \[x2\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x3, \[x3\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x4, \[x4\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x5, \[x5\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x6, \[x6\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x7, \[x7\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x8, \[x8\]} } }
> > +*/
> > +/* { dg-final { scan-assembler-not {st64bv0\tx[0-9]+, x9, \[x9\]} } }
> > +*/
> > diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c
> > b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c
> > index
> >
> 7244359ccfb9cbcbbd8285b050113c004a6af2a6..2d76bfc23dfdcd78a74ec0e484
> 5a
> > 3bd8d110b010 100644
> > --- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c
> > +++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c
> > @@ -240,6 +240,20 @@
> >  #endif
> >  #pragma GCC pop_options
> >
> > +#pragma GCC push_options
> > +#pragma GCC target ("arch=armv8.7-a") #ifndef __ARM_FEATURE_LS64
> > +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> > +#endif
> > +#pragma GCC pop_options
> > +
> > +#pragma GCC push_options
> > +#pragma GCC target ("arch=armv8.7-a+ls64") #ifndef __ARM_FEATURE_LS64
> > +#error "__ARM_FEATURE_LS64 is not defined but should be!"
> > +#endif
> > +#pragma GCC pop_options
> > +
> >  #pragma GCC pop_options
> >
> >  int

[-- Attachment #2: rb14982-2.patch --]
[-- Type: application/octet-stream, Size: 26105 bytes --]

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 303e1e542823f01558c5afc3b1015df12737f06d..8cfbb96b3dbdc68ca55a9c205c55a1cee8705636 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -49,6 +49,7 @@
 #include "gimple-fold.h"
 
 #define v8qi_UP  E_V8QImode
+#define v8di_UP  E_V8DImode
 #define v4hi_UP  E_V4HImode
 #define v4hf_UP  E_V4HFmode
 #define v2si_UP  E_V2SImode
@@ -607,6 +608,11 @@ enum aarch64_builtins
   AARCH64_MEMTAG_BUILTIN_SET_TAG,
   AARCH64_MEMTAG_BUILTIN_GET_TAG,
   AARCH64_MEMTAG_BUILTIN_END,
+  /* LS64 builtins.  */
+  AARCH64_LS64_BUILTIN_LD64B,
+  AARCH64_LS64_BUILTIN_ST64B,
+  AARCH64_LS64_BUILTIN_ST64BV,
+  AARCH64_LS64_BUILTIN_ST64BV0,
   AARCH64_BUILTIN_MAX
 };
 
@@ -1571,6 +1577,70 @@ aarch64_init_memtag_builtins (void)
 #undef AARCH64_INIT_MEMTAG_BUILTINS_DECL
 }
 
+/* Add builtins for Load/store 64 Byte instructions.  */
+
+typedef struct
+{
+  const char *name;
+  unsigned int code;
+  tree type;
+} ls64_builtins_data;
+
+static GTY(()) tree ls64_arm_data_t = NULL_TREE;
+
+static void
+aarch64_init_ls64_builtins_types (void)
+{
+  /* Synthesize:
+
+     typedef struct {
+       uint64_t val[8];
+     } __arm_data512_t;  */
+  const char *tuple_type_name = "__arm_data512_t";
+  tree node_type = get_typenode_from_name (UINT64_TYPE);
+  tree array_type = build_array_type_nelts (node_type, 8);
+  SET_TYPE_MODE (array_type, V8DImode);
+
+  gcc_assert (TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type));
+  gcc_assert (TYPE_ALIGN (array_type) == 64);
+
+  tree field = build_decl (input_location, FIELD_DECL,
+                           get_identifier ("val"), array_type);
+
+  ls64_arm_data_t = lang_hooks.types.simulate_record_decl (input_location,
+                         tuple_type_name,
+                         make_array_slice (&field, 1));
+
+  gcc_assert (TYPE_MODE (ls64_arm_data_t) == V8DImode);
+  gcc_assert (TYPE_MODE_RAW (ls64_arm_data_t) == TYPE_MODE (ls64_arm_data_t));
+  gcc_assert (TYPE_ALIGN (ls64_arm_data_t) == 64);
+}
+
+static void
+aarch64_init_ls64_builtins (void)
+{
+  aarch64_init_ls64_builtins_types ();
+
+  ls64_builtins_data data[4] = {
+    {"__builtin_aarch64_ld64b", AARCH64_LS64_BUILTIN_LD64B,
+     build_function_type_list (ls64_arm_data_t,
+                               const_ptr_type_node, NULL_TREE)},
+    {"__builtin_aarch64_st64b", AARCH64_LS64_BUILTIN_ST64B,
+     build_function_type_list (void_type_node, ptr_type_node,
+                               ls64_arm_data_t, NULL_TREE)},
+    {"__builtin_aarch64_st64bv", AARCH64_LS64_BUILTIN_ST64BV,
+     build_function_type_list (uint64_type_node, ptr_type_node,
+                               ls64_arm_data_t, NULL_TREE)},
+    {"__builtin_aarch64_st64bv0", AARCH64_LS64_BUILTIN_ST64BV0,
+     build_function_type_list (uint64_type_node, ptr_type_node,
+                               ls64_arm_data_t, NULL_TREE)},
+  };
+
+  for (size_t i = 0; i < ARRAY_SIZE (data); ++i)
+    aarch64_builtin_decls[data[i].code]
+      = aarch64_general_add_builtin (data[i].name, data[i].type, data[i].code);
+}
+
 /* Initialize fpsr fpcr getters and setters.  */
 
 static void
@@ -1660,6 +1730,9 @@ aarch64_general_init_builtins (void)
 
   if (TARGET_MEMTAG)
     aarch64_init_memtag_builtins ();
+
+  if (TARGET_LS64)
+    aarch64_init_ls64_builtins ();
 }
 
 /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group.  */
@@ -2130,6 +2203,57 @@ aarch64_expand_builtin_tme (int fcode, tree exp, rtx target)
     return target;
 }
 
+/* Function to expand an expression EXP which calls one of the Load/Store
+   64 Byte extension (LS64) builtins FCODE with the result going to TARGET.  */
+static rtx
+aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx target)
+{
+  expand_operand ops[3];
+
+  switch (fcode)
+    {
+    case AARCH64_LS64_BUILTIN_LD64B:
+      {
+        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+        create_output_operand (&ops[0], target, V8DImode);
+        create_input_operand (&ops[1], op0, DImode);
+        expand_insn (CODE_FOR_ld64b, 2, ops);
+        return ops[0].value;
+      }
+    case AARCH64_LS64_BUILTIN_ST64B:
+      {
+        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
+        create_output_operand (&ops[0], op0, DImode);
+        create_input_operand (&ops[1], op1, V8DImode);
+        expand_insn (CODE_FOR_st64b, 2, ops);
+        return const0_rtx;
+      }
+    case AARCH64_LS64_BUILTIN_ST64BV:
+      {
+        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
+        create_output_operand (&ops[0], target, DImode);
+        create_input_operand (&ops[1], op0, DImode);
+        create_input_operand (&ops[2], op1, V8DImode);
+        expand_insn (CODE_FOR_st64bv, 3, ops);
+        return ops[0].value;
+      }
+    case AARCH64_LS64_BUILTIN_ST64BV0:
+      {
+        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
+        create_output_operand (&ops[0], target, DImode);
+        create_input_operand (&ops[1], op0, DImode);
+        create_input_operand (&ops[2], op1, V8DImode);
+        expand_insn (CODE_FOR_st64bv0, 3, ops);
+        return ops[0].value;
+      }
+    }
+
+    gcc_unreachable ();
+}
+
 /* Expand a random number builtin EXP with code FCODE, putting the result
    int TARGET.  If IGNORE is true the return value is ignored.  */
 
@@ -2388,6 +2512,12 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
       || fcode == AARCH64_TME_BUILTIN_TCANCEL)
     return aarch64_expand_builtin_tme (fcode, exp, target);
 
+  if (fcode == AARCH64_LS64_BUILTIN_LD64B
+      || fcode == AARCH64_LS64_BUILTIN_ST64B
+      || fcode == AARCH64_LS64_BUILTIN_ST64BV
+      || fcode == AARCH64_LS64_BUILTIN_ST64BV0)
+    return aarch64_expand_builtin_ls64 (fcode, exp, target);
+
   if (fcode >= AARCH64_MEMTAG_BUILTIN_START
       && fcode <= AARCH64_MEMTAG_BUILTIN_END)
     return aarch64_expand_builtin_memtag (fcode, exp, target);
diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index d6653e474dec9bcddde2106f36ceb22f1d43375c..3af3e5c96daf674648dbc008b15ade0e303b66f8 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -200,6 +200,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
 			"__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", pfile);
   aarch64_def_or_undef (TARGET_BF16_FP,
 			"__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile);
+  aarch64_def_or_undef (TARGET_LS64,
+			"__ARM_FEATURE_LS64", pfile);
 
   /* Not for ACLE, but required to keep "float.h" correct if we switch
      target between implementations that do or do not support ARMv8.2-A
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 175a9f07e2597b4b6a8d19141b948a8bb796db16..9ebf795a624f0183e0333349d0db7a71ba2d17dd 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7123,6 +7123,15 @@ (define_expand "mov<mode>"
     }
 })
 
+(define_expand "movv8di"
+  [(set (match_operand:V8DI 0 "nonimmediate_operand")
+	(match_operand:V8DI 1 "general_operand"))]
+  "TARGET_SIMD"
+{
+  if (can_create_pseudo_p () && MEM_P (operands[0]))
+    operands[1] = force_reg (V8DImode, operands[1]);
+})
+
 (define_expand "aarch64_ld1x3<vstruct_elt>"
   [(match_operand:VSTRUCT_3QD 0 "register_operand")
    (match_operand:DI 1 "register_operand")]
@@ -7253,6 +7262,17 @@ (define_insn "*aarch64_mov<mode>"
    (set_attr "length" "<insn_count>,4,4")]
 )
 
+(define_insn "*aarch64_movv8di"
+  [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
+	(match_operand:V8DI 1 "general_operand" " r,r,m"))]
+  "!BYTES_BIG_ENDIAN
+   && (register_operand (operands[0], V8DImode)
+       || register_operand (operands[1], V8DImode))"
+  "#"
+  [(set_attr "type" "multiple,multiple,multiple")
+   (set_attr "length" "32,16,16")]
+)
+
 (define_insn "aarch64_be_ld1<mode>"
   [(set (match_operand:VALLDI_F16 0	"register_operand" "=w")
 	(unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
@@ -7496,6 +7516,34 @@ (define_split
     FAIL;
 })
 
+(define_split
+  [(set (match_operand:V8DI 0 "nonimmediate_operand")
+        (match_operand:V8DI 1 "general_operand"))]
+  "TARGET_SIMD && reload_completed"
+  [(const_int 0)]
+{
+  if (register_operand (operands[0], V8DImode)
+      && register_operand (operands[1], V8DImode))
+    {
+      aarch64_simd_emit_reg_reg_move (operands, DImode, 8);
+      DONE;
+    }
+  else if ((register_operand (operands[0], V8DImode)
+            && memory_operand (operands[1], V8DImode))
+           || (memory_operand (operands[0], V8DImode)
+            && register_operand (operands[1], V8DImode)))
+    {
+      for (int offset = 0; offset < 64; offset += 16)
+        emit_move_insn (simplify_gen_subreg (TImode, operands[0],
+                                             V8DImode, offset),
+                        simplify_gen_subreg (TImode, operands[1],
+                                             V8DImode, offset));
+      DONE;
+    }
+  else
+    FAIL;
+})
+
 (define_expand "aarch64_ld<nregs>r<vstruct_elt>"
   [(match_operand:VSTRUCT_QD 0 "register_operand")
    (match_operand:DI 1 "register_operand")]
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 2792bb29adbbb5b3145b3f767615af8edbc30b08..affd2ec8d2c68c54024979dbc8aaf1b72cf0d32c 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -310,6 +310,7 @@ extern unsigned aarch64_architecture_version;
 #define AARCH64_ISA_V8_R	   (aarch64_isa_flags & AARCH64_FL_V8_R)
 #define AARCH64_ISA_PAUTH	   (aarch64_isa_flags & AARCH64_FL_PAUTH)
 #define AARCH64_ISA_V9		   (aarch64_isa_flags & AARCH64_FL_V9)
+#define AARCH64_ISA_LS64	   (aarch64_isa_flags & AARCH64_FL_LS64)
 
 /* Crypto is an optional extension to AdvSIMD.  */
 #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
@@ -401,6 +402,9 @@ extern unsigned aarch64_architecture_version;
 /* PAUTH instructions are enabled through +pauth.  */
 #define TARGET_PAUTH (AARCH64_ISA_PAUTH)
 
+/* LS64 instructions are enabled through +ls64.  */
+#define TARGET_LS64 (AARCH64_ISA_LS64)
+
 /* Make sure this is always defined so we don't have to check for ifdefs
    but rather use normal ifs.  */
 #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index be24b7320d28deed9a19a0451c96bd67d2fb3104..e0ceba68968a28a9fcf1ba6e3a3036783b0931b0 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -10013,8 +10013,12 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	     instruction memory accesses.  */
 	  if (mode == TImode || mode == TFmode)
 	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
-		    && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
-			|| offset_12bit_unsigned_scaled_p (mode, offset)));
+	            && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
+	            || offset_12bit_unsigned_scaled_p (mode, offset)));
+
+	  if (mode == V8DImode)
+	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
+	            && aarch64_offset_7bit_signed_scaled_p (DImode, offset + 48));
 
 	  /* A 7bit offset check because OImode will emit a ldp/stp
 	     instruction (only big endian will get here).
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 5297b2d3f95744ac72e36814c6676cc97478d48b..4fd53156206b5d8a6b8d09bc4e4f01d8b3453f10 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -187,6 +187,12 @@ (define_c_enum "unspec" [
     UNSPEC_LD2_LANE
     UNSPEC_LD3_LANE
     UNSPEC_LD4_LANE
+    UNSPEC_LD64B
+    UNSPEC_ST64B
+    UNSPEC_ST64BV
+    UNSPEC_ST64BV_RET
+    UNSPEC_ST64BV0
+    UNSPEC_ST64BV0_RET
     UNSPEC_MB
     UNSPEC_NOP
     UNSPEC_PACIA1716
@@ -7499,6 +7505,52 @@ (define_insn "stg"
   [(set_attr "type" "memtag")]
 )
 
+;; Load/Store 64-bit (LS64) instructions.
+(define_insn "ld64b"
+  [(set (match_operand:V8DI 0 "register_operand" "=r")
+        (unspec_volatile:V8DI
+          [(mem:V8DI (match_operand:DI 1 "register_operand" "r"))]
+            UNSPEC_LD64B)
+  )]
+  "TARGET_LS64"
+  "ld64b\\t%0, [%1]"
+  [(set_attr "type" "ls64")]
+)
+
+(define_insn "st64b"
+  [(set (mem:V8DI (match_operand:DI 0 "register_operand" "=r"))
+        (unspec_volatile:V8DI [(match_operand:V8DI 1 "register_operand" "r")]
+            UNSPEC_ST64B)
+  )]
+  "TARGET_LS64"
+  "st64b\\t%1, [%0]"
+  [(set_attr "type" "ls64")]
+)
+
+(define_insn "st64bv"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec_volatile:DI [(const_int 0)] UNSPEC_ST64BV_RET))
+   (set (mem:V8DI (match_operand:DI 1 "register_operand" "r"))
+        (unspec_volatile:V8DI [(match_operand:V8DI 2 "register_operand" "r")]
+            UNSPEC_ST64BV)
+  )]
+  "TARGET_LS64"
+  "st64bv\\t%0, %2, [%1]"
+  [(set_attr "type" "ls64")]
+)
+
+(define_insn "st64bv0"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec_volatile:DI [(const_int 0)] UNSPEC_ST64BV0_RET))
+   (set (mem:V8DI (match_operand:DI 1 "register_operand" "r"))
+        (unspec_volatile:V8DI [(match_operand:V8DI 2 "register_operand" "r")]
+            UNSPEC_ST64BV0)
+  )]
+  "TARGET_LS64"
+  "st64bv0\\t%0, %2, [%1]"
+  [(set_attr "type" "ls64")]
+)
+
 ;; AdvSIMD Stuff
 (include "aarch64-simd.md")
 
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index 13f23632474b260122f30a3c566033664b0b5963..030e343490f14e3e7e394e63bb4ab6df13cda177 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -214,6 +214,43 @@ __ttest (void)
 #pragma GCC pop_options
 #endif
 
+#ifdef __ARM_FEATURE_LS64
+#pragma GCC push_options
+#pragma GCC target ("+nothing+ls64")
+
+typedef __arm_data512_t data512_t;
+
+__extension__ extern __inline data512_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_ld64b (const void *__addr)
+{
+  return __builtin_aarch64_ld64b (__addr);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_st64b (void *__addr, data512_t __value)
+{
+  __builtin_aarch64_st64b (__addr, __value);
+}
+
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_st64bv (void *__addr, data512_t __value)
+{
+  return __builtin_aarch64_st64bv (__addr, __value);
+}
+
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_st64bv0 (void *__addr, data512_t __value)
+{
+  return __builtin_aarch64_st64bv0 (__addr, __value);
+}
+
+#pragma GCC pop_options
+#endif
+
 #pragma GCC push_options
 #pragma GCC target ("+nothing+rng")
 __extension__ extern __inline int
diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
index b9514dafb86a280bee3d3f84845e0743cd18a34d..6dce71fd27e5dfbd08746509bc6fdeeade69a4a4 100644
--- a/gcc/config/arm/types.md
+++ b/gcc/config/arm/types.md
@@ -1122,6 +1122,7 @@ (define_attr "type"
   coproc,\
   tme,\
   memtag,\
+  ls64,\
   mve_move,\
   mve_store,\
   mve_load"
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_asm.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_asm.c
new file mode 100644
index 0000000000000000000000000000000000000000..ba9960c9a3c40bb980342b58c11e44455696a75a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_asm.c
@@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+/* Inline assembly for LS64 instructions.  */
+
+#include <arm_acle.h>
+
+void
+ls64_load (data512_t *output, const void *addr)
+{
+    __asm__ volatile ("ld64b %0, [%1]"
+                      : "=r" (*output)
+                      : "r" (addr)
+                      : "memory");
+}
+
+/* { dg-final { scan-assembler-times {ld64b } 1 } } */
+
+void
+ls64_store (const data512_t *input, void *addr)
+{
+    __asm__ volatile ("st64b %1, [%0]"
+                      : /* No outputs.  */
+                      : "r" (addr), "r" (*input)
+                      : "memory");
+}
+
+/* { dg-final { scan-assembler-times {st64b } 1 } } */
+
+uint64_t
+ls64_store_v (const data512_t *input, void *addr)
+{
+    uint64_t status;
+    __asm__ volatile ("st64bv %0, %2, [%1]"
+                      : "=r" (status)
+                      : "r" (addr), "r" (*input)
+                      : "memory");
+    return status;
+}
+
+/* { dg-final { scan-assembler-times {st64bv } 1 } } */
+
+uint64_t
+ls64_store_v0 (const data512_t *input, void *addr)
+{
+    uint64_t status;
+    __asm__ volatile ("st64bv0 %0, %2, [%1]"
+                      : "=r" (status)
+                      : "r" (addr), "r" (*input)
+                      : "memory");
+    return status;
+}
+
+/* { dg-final { scan-assembler-times {st64bv0 } 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b-2.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..2a94657a31afada0eb60252bbd3c9f3d43b8c879
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b-2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+void
+func (const void * addr) {
+    data512_t ret = __arm_ld64b (addr);
+}
+
+/* { dg-final { scan-assembler-times {ld64b\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b-3.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b-3.c
new file mode 100644
index 0000000000000000000000000000000000000000..155ea401f23870e023866e3a92f75da66f4fa9fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b-3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+void
+func(const void * addr, data512_t *data) {
+  *data = __arm_ld64b (addr);
+}
+
+/* { dg-final { scan-assembler-times {ld64b\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b.c
new file mode 100644
index 0000000000000000000000000000000000000000..e3fc1411221e9a58051fa2147f8a744733e6c233
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld64b.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+data512_t
+func(const void * addr) {
+  return __arm_ld64b (addr);
+}
+
+/* { dg-final { scan-assembler-times {ld64b\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld_st_o0.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld_st_o0.c
new file mode 100644
index 0000000000000000000000000000000000000000..550d75c8e0b4f4168b609c6c16554470ea7fc8c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_ld_st_o0.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O0" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+/* Make sure no issues when compile with -O0.  */
+
+data512_t
+func1 (const void * addr) {
+  return __arm_ld64b (addr);
+}
+
+void
+func2 (void *addr, data512_t value) {
+    __arm_st64b (addr, value);
+}
+
+uint64_t
+func3 (void *addr, data512_t value) {
+    return  __arm_st64bv (addr, value);
+}
+
+uint64_t
+func4 (void *addr, data512_t value) {
+    return __arm_st64bv0 (addr, value);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64b-2.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64b-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..bfd737b86c6fcaa1ef57f86afcfec1bdbf4f7422
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64b-2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+void
+func(void *addr, data512_t *value) {
+    __arm_st64b (addr, *value);
+}
+
+/* { dg-final { scan-assembler-times {st64b\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64b.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64b.c
new file mode 100644
index 0000000000000000000000000000000000000000..75b91803eefb5024ca1bb0f5c62dd1381aeb5fdc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64b.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+void
+func(void *addr, data512_t value) {
+    __arm_st64b (addr, value);
+}
+
+/* { dg-final { scan-assembler-times {st64b\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv-2.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..c3ef83e57eb8d710425da48c48b9328d52ecc9bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv-2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+void
+func(void *addr, data512_t value) {
+    __arm_st64bv (addr, value);
+}
+
+/* { dg-final { scan-assembler-times {st64bv\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv-3.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv-3.c
new file mode 100644
index 0000000000000000000000000000000000000000..370db7960ec907a722c9e676494a0ee46e0b7d25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv-3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+void
+func(void *addr, data512_t *value) {
+    __arm_st64bv (addr, *value);
+}
+
+/* { dg-final { scan-assembler-times {st64bv\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv.c
new file mode 100644
index 0000000000000000000000000000000000000000..52ef9c4593109c2547ff5f0d86379b04b2f4cd40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+uint64_t
+func(void *addr, data512_t value) {
+    return  __arm_st64bv (addr, value);
+}
+
+/* { dg-final { scan-assembler-times {st64bv\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0-2.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..c49fa56d94075aa252a3299c80eeb8adac41f699
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0-2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+void
+func(void *addr, data512_t value) {
+    __arm_st64bv0 (addr, value);
+}
+
+/* { dg-final { scan-assembler-times {st64bv0\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0-3.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0-3.c
new file mode 100644
index 0000000000000000000000000000000000000000..af6917c795f8b96b1090eca7f06bdf5caaf16ed4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0-3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+void
+func(void *addr, data512_t *value) {
+    __arm_st64bv0 (addr, *value);
+}
+
+/* { dg-final { scan-assembler-times {st64bv0\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0.c b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0.c
new file mode 100644
index 0000000000000000000000000000000000000000..bce10ae3653b7f6cd366c94a2b8dfa71fdfaca70
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_st64bv0.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+ls64 -O2" } */
+
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+
+#include <arm_acle.h>
+
+uint64_t
+func(void *addr, data512_t value) {
+    return __arm_st64bv0 (addr, value);
+}
+
+/* { dg-final { scan-assembler-times {st64bv0\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c
index 7244359ccfb9cbcbbd8285b050113c004a6af2a6..2d76bfc23dfdcd78a74ec0e4845a3bd8d110b010 100644
--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c
@@ -240,6 +240,20 @@
 #endif
 #pragma GCC pop_options
 
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.7-a")
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+#pragma GCC pop_options
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.7-a+ls64")
+#ifndef __ARM_FEATURE_LS64
+#error "__ARM_FEATURE_LS64 is not defined but should be!"
+#endif
+#pragma GCC pop_options
+
 #pragma GCC pop_options
 
 int

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH][GCC] aarch64: Add LS64 extension and intrinsics
  2021-12-13 13:48   ` Przemyslaw Wirkus
@ 2021-12-14 11:58     ` Richard Sandiford
  2021-12-14 14:57       ` Przemyslaw Wirkus
  0 siblings, 1 reply; 5+ messages in thread
From: Richard Sandiford @ 2021-12-14 11:58 UTC (permalink / raw)
  To: Przemyslaw Wirkus
  Cc: gcc-patches, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov

Przemyslaw Wirkus <Przemyslaw.Wirkus@arm.com> writes:
> Hello Richard,
>
> I've updated my patch following all your comments. Thank you.
>
> Boostrapped on aarch64-linux-gnu and all new ACLE tests pass.
>
> OK to install?

Thanks.  OK with a couple of formatting nits:

> @@ -2130,6 +2203,57 @@ aarch64_expand_builtin_tme (int fcode, tree exp, rtx target)
>      return target;
>  }
>  
> +/* Function to expand an expression EXP which calls one of the Load/Store
> +   64 Byte extension (LS64) builtins FCODE with the result going to TARGET.  */
> +static rtx
> +aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx target)
> +{
> +  expand_operand ops[3];
> +
> +  switch (fcode)
> +    {
> +    case AARCH64_LS64_BUILTIN_LD64B:
> +      {
> +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> +        create_output_operand (&ops[0], target, V8DImode);
> +        create_input_operand (&ops[1], op0, DImode);
> +        expand_insn (CODE_FOR_ld64b, 2, ops);
> +        return ops[0].value;
> +      }
> +    case AARCH64_LS64_BUILTIN_ST64B:
> +      {
> +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> +        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
> +        create_output_operand (&ops[0], op0, DImode);
> +        create_input_operand (&ops[1], op1, V8DImode);
> +        expand_insn (CODE_FOR_st64b, 2, ops);
> +        return const0_rtx;
> +      }
> +    case AARCH64_LS64_BUILTIN_ST64BV:
> +      {
> +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> +        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
> +        create_output_operand (&ops[0], target, DImode);
> +        create_input_operand (&ops[1], op0, DImode);
> +        create_input_operand (&ops[2], op1, V8DImode);
> +        expand_insn (CODE_FOR_st64bv, 3, ops);
> +        return ops[0].value;
> +      }
> +    case AARCH64_LS64_BUILTIN_ST64BV0:
> +      {
> +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> +        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
> +        create_output_operand (&ops[0], target, DImode);
> +        create_input_operand (&ops[1], op0, DImode);
> +        create_input_operand (&ops[2], op1, V8DImode);
> +        expand_insn (CODE_FOR_st64bv0, 3, ops);
> +        return ops[0].value;
> +      }
> +    }
> +
> +    gcc_unreachable ();

This line should be indented by 2 spaces rather than 4.

> +}
> +
>  /* Expand a random number builtin EXP with code FCODE, putting the result
>     int TARGET.  If IGNORE is true the return value is ignored.  */
>  
> […]
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index be24b7320d28deed9a19a0451c96bd67d2fb3104..e0ceba68968a28a9fcf1ba6e3a3036783b0931b0 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -10013,8 +10013,12 @@ aarch64_classify_address (struct aarch64_address_info *info,
>  	     instruction memory accesses.  */
>  	  if (mode == TImode || mode == TFmode)
>  	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
> -		    && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
> -			|| offset_12bit_unsigned_scaled_p (mode, offset)));
> +	            && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
> +	            || offset_12bit_unsigned_scaled_p (mode, offset)));

The original formatting was correct here.

> +
> +	  if (mode == V8DImode)
> +	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
> +	            && aarch64_offset_7bit_signed_scaled_p (DImode, offset + 48));
>  
>  	  /* A 7bit offset check because OImode will emit a ldp/stp
>  	     instruction (only big endian will get here).

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH][GCC] aarch64: Add LS64 extension and intrinsics
  2021-12-14 11:58     ` Richard Sandiford
@ 2021-12-14 14:57       ` Przemyslaw Wirkus
  0 siblings, 0 replies; 5+ messages in thread
From: Przemyslaw Wirkus @ 2021-12-14 14:57 UTC (permalink / raw)
  To: Richard Sandiford
  Cc: gcc-patches, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov

> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: 14 December 2021 11:58
> To: Przemyslaw Wirkus <Przemyslaw.Wirkus@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
> Subject: Re: [PATCH][GCC] aarch64: Add LS64 extension and intrinsics
> 
> Przemyslaw Wirkus <Przemyslaw.Wirkus@arm.com> writes:
> > Hello Richard,
> >
> > I've updated my patch following all your comments. Thank you.
> >
> > Boostrapped on aarch64-linux-gnu and all new ACLE tests pass.
> >
> > OK to install?
> 
> Thanks.  OK with a couple of formatting nits:

Updated and committed:

commit fdcddba8f29ea3878851b8b4cd37d0fd3476d3bf

Thank you!

> > @@ -2130,6 +2203,57 @@ aarch64_expand_builtin_tme (int fcode, tree
> exp, rtx target)
> >      return target;
> >  }
> >
> > +/* Function to expand an expression EXP which calls one of the
> Load/Store
> > +   64 Byte extension (LS64) builtins FCODE with the result going to
> > +TARGET.  */ static rtx
> > +aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx target) {
> > +  expand_operand ops[3];
> > +
> > +  switch (fcode)
> > +    {
> > +    case AARCH64_LS64_BUILTIN_LD64B:
> > +      {
> > +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> > +        create_output_operand (&ops[0], target, V8DImode);
> > +        create_input_operand (&ops[1], op0, DImode);
> > +        expand_insn (CODE_FOR_ld64b, 2, ops);
> > +        return ops[0].value;
> > +      }
> > +    case AARCH64_LS64_BUILTIN_ST64B:
> > +      {
> > +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> > +        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
> > +        create_output_operand (&ops[0], op0, DImode);
> > +        create_input_operand (&ops[1], op1, V8DImode);
> > +        expand_insn (CODE_FOR_st64b, 2, ops);
> > +        return const0_rtx;
> > +      }
> > +    case AARCH64_LS64_BUILTIN_ST64BV:
> > +      {
> > +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> > +        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
> > +        create_output_operand (&ops[0], target, DImode);
> > +        create_input_operand (&ops[1], op0, DImode);
> > +        create_input_operand (&ops[2], op1, V8DImode);
> > +        expand_insn (CODE_FOR_st64bv, 3, ops);
> > +        return ops[0].value;
> > +      }
> > +    case AARCH64_LS64_BUILTIN_ST64BV0:
> > +      {
> > +        rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> > +        rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
> > +        create_output_operand (&ops[0], target, DImode);
> > +        create_input_operand (&ops[1], op0, DImode);
> > +        create_input_operand (&ops[2], op1, V8DImode);
> > +        expand_insn (CODE_FOR_st64bv0, 3, ops);
> > +        return ops[0].value;
> > +      }
> > +    }
> > +
> > +    gcc_unreachable ();
> 
> This line should be indented by 2 spaces rather than 4.
> 
> > +}
> > +
> >  /* Expand a random number builtin EXP with code FCODE, putting the
> result
> >     int TARGET.  If IGNORE is true the return value is ignored.  */
> >
> > […]
> > diff --git a/gcc/config/aarch64/aarch64.c
> > b/gcc/config/aarch64/aarch64.c index
> >
> be24b7320d28deed9a19a0451c96bd67d2fb3104..e0ceba68968a28a9fcf1ba6
> e3a30
> > 36783b0931b0 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -10013,8 +10013,12 @@ aarch64_classify_address (struct
> aarch64_address_info *info,
> >  	     instruction memory accesses.  */
> >  	  if (mode == TImode || mode == TFmode)
> >  	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
> > -		    && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
> > -			|| offset_12bit_unsigned_scaled_p (mode, offset)));
> > +	            && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
> > +	            || offset_12bit_unsigned_scaled_p (mode, offset)));
> 
> The original formatting was correct here.
> 
> > +
> > +	  if (mode == V8DImode)
> > +	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
> > +	            && aarch64_offset_7bit_signed_scaled_p (DImode, offset +
> > +48));
> >
> >  	  /* A 7bit offset check because OImode will emit a ldp/stp
> >  	     instruction (only big endian will get here).

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-12-14 14:57 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-11 22:39 [PATCH][GCC] aarch64: Add LS64 extension and intrinsics Przemyslaw Wirkus
2021-11-15 13:43 ` Richard Sandiford
2021-12-13 13:48   ` Przemyslaw Wirkus
2021-12-14 11:58     ` Richard Sandiford
2021-12-14 14:57       ` Przemyslaw Wirkus

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).