public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/ARM/heads/morello)] Tweak prototypes of __atomic_fetch_*_capability
@ 2022-05-06 14:43 Matthew Malcomson
  0 siblings, 0 replies; only message in thread
From: Matthew Malcomson @ 2022-05-06 14:43 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:ca03be2e923586ea336b5b78d8b58bd5dd825aa9

commit ca03be2e923586ea336b5b78d8b58bd5dd825aa9
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Mon Apr 25 18:51:44 2022 +0100

    Tweak prototypes of __atomic_fetch_*_capability
    
    At the function prototype level, the capability forms of the
    following functions treated “val” as a capability:
    
    - __atomic_fetch_<operation>(ptr, val, memorder)
    - __atomic_<operation>_fetch(ptr, val, memorder)
    - __sync_fetch_and_<operation>(ptr, val)
    - __sync_<operation>_and_fetch(ptr, val)
    
    However, these arguments are used to add to, subtract from,
    or otherwise do arithmetic on the capability data.  The type
    of the argument for __atomic_fetch_add should therefore be
    the type associated with “*ptr + val”, etc.
    
    The same argument applies to non-capability pointer types,
    and I think it's a bug that GCC (for all targets) accepts:
    
      int *foo(int **ptr, int *val) {
        return __atomic_fetch_add(ptr, val, __ATOMIC_ACQUIRE);
      }
    
    without warning.  Clang in contrast says:
    
      warning: incompatible pointer to integer conversion passing 'int *' to parameter of type 'long' [-Wint-conversion]
    
    But I think this becomes more important with capabilities,
    since if we view the functions as operations on two capabilities,
    we introduce ambiguity about provenance.  It also affects the
    type signature and ABI of any out-of-line implementations.
    
    This patch therefore makes the “val” arguments above have
    non-capability type, matching clang's behaviour.  The code
    generated by the functions already (correctly) dropped the
    capability, so this is purely a change to the prototype rather
    than a change to the underlying behaviour.
    
    At the same time, the patch optimises the implementation of the add
    functions so that they can use ADD Cn, Cn, Xn rather than a separate
    ADD + SCVALUE.  (In particular, the split happens later than combine,
    so we can't rely on that pass to do the optimisation.)

Diff:
---
 gcc/builtin-types.def                              |   7 +
 gcc/builtins.c                                     | 283 +++++++------
 gcc/config/aarch64/aarch64.c                       | 110 ++---
 gcc/config/aarch64/atomics.md                      |  57 +--
 gcc/fortran/types.def                              |   7 +
 gcc/optabs.c                                       |  40 +-
 gcc/sync-builtins.def                              | 156 ++++---
 .../gcc.target/aarch64/morello/invalid-atomic-1.c  |  19 +
 .../gcc.target/aarch64/morello/invalid-sync-1.c    |  16 +
 .../morello/normal-base-atomic-operation-1.c       | 461 ++++++++++++++++++++
 .../morello/normal-base-atomic-operation-2.c       | 461 ++++++++++++++++++++
 .../morello/normal-base-atomic-operation-3.c       | 461 ++++++++++++++++++++
 .../morello/normal-base-atomic-operation-4.c       | 463 +++++++++++++++++++++
 .../morello/normal-base-atomic-operation-5.c       | 461 ++++++++++++++++++++
 .../morello/normal-base-atomic-operation-6.c       | 461 ++++++++++++++++++++
 .../aarch64/morello/normal-base-sync-operation-1.c | 462 ++++++++++++++++++++
 16 files changed, 3644 insertions(+), 281 deletions(-)

diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
index 2aff3e6a4c9..c2c472aa773 100644
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -63,6 +63,9 @@ DEF_PRIMITIVE_TYPE (BT_INT, integer_type_node)
 DEF_PRIMITIVE_TYPE (BT_ICAP, intcap_type_node
 			     ? intcap_type_node
 			     : error_mark_node)
+DEF_PRIMITIVE_TYPE (BT_ICAPOFF, intcap_type_node
+				? TREE_TYPE (intcap_type_node)
+				: error_mark_node)
 DEF_PRIMITIVE_TYPE (BT_UINT, unsigned_type_node)
 DEF_PRIMITIVE_TYPE (BT_LONG, long_integer_type_node)
 DEF_PRIMITIVE_TYPE (BT_ULONG, long_unsigned_type_node)
@@ -341,6 +344,8 @@ DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR, BT_FN_VOID_PTR)
 		       BT_VOLATILE_PTR, BT_DTYPE)
 FOR_ALL_SYNC_N(DEF_SYNC)
 #undef DEF_SYNC
+DEF_FUNCTION_TYPE_2 (BT_FN_ICAP_VPTR_ICAPOFF, BT_ICAP, \
+		     BT_VOLATILE_PTR, BT_ICAPOFF)
 
 #define DEF_SYNC(DTYPE, BT_DTYPE) \
   DEF_FUNCTION_TYPE_2 (BT_FN_##DTYPE##_CONST_VPTR_INT, BT_DTYPE, \
@@ -540,6 +545,8 @@ FOR_ALL_SYNC_N (DEF_SYNC)
 		       BT_VOLATILE_PTR, BT_DTYPE, BT_INT)
 FOR_ALL_SYNC_N (DEF_SYNC)
 #undef DEF_SYNC
+DEF_FUNCTION_TYPE_3 (BT_FN_ICAP_VPTR_ICAPOFF_INT, BT_ICAP, \
+		     BT_VOLATILE_PTR, BT_ICAPOFF, BT_INT)
 
 #define DEF_SYNC(DTYPE, BT_DTYPE) \
   DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_##DTYPE##_INT, BT_VOID, \
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 0f6a277f8ae..3bfc350b2bc 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -6776,12 +6776,13 @@ expand_expr_force_mode (tree exp, machine_mode mode)
    fetch_and_xxx form.  */
 
 static rtx
-expand_builtin_sync_operation (machine_mode mode, tree exp,
+expand_builtin_sync_operation (scalar_addr_mode mode, tree exp,
 			       enum rtx_code code, bool after,
 			       rtx target)
 {
   rtx val, mem;
   location_t loc = EXPR_LOCATION (exp);
+  auto op_mode = offset_mode (mode);
 
   if (code == NOT && warn_sync_nand)
     {
@@ -6817,7 +6818,7 @@ expand_builtin_sync_operation (machine_mode mode, tree exp,
 
   /* Expand the operands.  */
   mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
-  val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
+  val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), op_mode);
 
   return expand_atomic_fetch_op (target, mem, val, code, MEMMODEL_SYNC_SEQ_CST,
 				 after);
@@ -7229,7 +7230,7 @@ expand_builtin_atomic_store (machine_mode mode, tree exp)
    resolved to an instruction sequence.  */
 
 static rtx
-expand_builtin_atomic_fetch_op (machine_mode mode, tree exp, rtx target,
+expand_builtin_atomic_fetch_op (scalar_addr_mode mode, tree exp, rtx target,
 				enum rtx_code code, bool fetch_after,
 				bool ignore, enum built_in_function ext_call)
 {
@@ -7238,11 +7239,13 @@ expand_builtin_atomic_fetch_op (machine_mode mode, tree exp, rtx target,
   tree fndecl;
   tree addr;
 
+  auto op_mode = offset_mode (mode);
+
   model = get_memmodel (CALL_EXPR_ARG (exp, 2));
 
   /* Expand the operands.  */
   mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
-  val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
+  val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), op_mode);
 
   /* Only try generating instructions if inlining is turned on.  */
   if (flag_inline_atomics)
@@ -8639,88 +8642,120 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
       break;
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_SYNC_FETCH_AND_ADD):
-      mode = builtin_sync_mode (BUILT_IN_SYNC_FETCH_AND_ADD_N, fcode);
-      target = expand_builtin_sync_operation (mode, exp, PLUS, false, target);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_SYNC_FETCH_AND_ADD_N, fcode);
+	target = expand_builtin_sync_operation (smode, exp, PLUS,
+						false, target);
+	if (target)
+	  return target;
+	break;
+      }
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_SYNC_FETCH_AND_SUB):
-      mode = builtin_sync_mode (BUILT_IN_SYNC_FETCH_AND_SUB_N, fcode);
-      target = expand_builtin_sync_operation (mode, exp, MINUS, false, target);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_SYNC_FETCH_AND_SUB_N, fcode);
+	target = expand_builtin_sync_operation (smode, exp, MINUS,
+						false, target);
+	if (target)
+	  return target;
+	break;
+      }
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_SYNC_FETCH_AND_OR):
-      mode = builtin_sync_mode (BUILT_IN_SYNC_FETCH_AND_OR_N, fcode);
-      target = expand_builtin_sync_operation (mode, exp, IOR, false, target);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_SYNC_FETCH_AND_OR_N, fcode);
+	target = expand_builtin_sync_operation (smode, exp, IOR,
+						false, target);
+	if (target)
+	  return target;
+	break;
+      }
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_SYNC_FETCH_AND_AND):
-      mode = builtin_sync_mode (BUILT_IN_SYNC_FETCH_AND_AND_N, fcode);
-      target = expand_builtin_sync_operation (mode, exp, AND, false, target);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_SYNC_FETCH_AND_AND_N, fcode);
+	target = expand_builtin_sync_operation (smode, exp, AND,
+						false, target);
+	if (target)
+	  return target;
+	break;
+      }
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_SYNC_FETCH_AND_XOR):
-      mode = builtin_sync_mode (BUILT_IN_SYNC_FETCH_AND_XOR_N, fcode);
-      target = expand_builtin_sync_operation (mode, exp, XOR, false, target);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_SYNC_FETCH_AND_XOR_N, fcode);
+	target = expand_builtin_sync_operation (smode, exp, XOR,
+						false, target);
+	if (target)
+	  return target;
+	break;
+      }
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_SYNC_FETCH_AND_NAND):
-      mode = builtin_sync_mode (BUILT_IN_SYNC_FETCH_AND_NAND_N, fcode);
-      target = expand_builtin_sync_operation (mode, exp, NOT, false, target);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_SYNC_FETCH_AND_NAND_N, fcode);
+	target = expand_builtin_sync_operation (smode, exp, NOT,
+						false, target);
+	if (target)
+	  return target;
+	break;
+      }
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_SYNC_ADD_AND_FETCH):
-      mode = builtin_sync_mode (BUILT_IN_SYNC_ADD_AND_FETCH_N, fcode);
-      target = expand_builtin_sync_operation (mode, exp, PLUS, true, target);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_SYNC_ADD_AND_FETCH_N, fcode);
+	target = expand_builtin_sync_operation (smode, exp, PLUS,
+						true, target);
+	if (target)
+	  return target;
+	break;
+      }
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_SYNC_SUB_AND_FETCH):
-      mode = builtin_sync_mode (BUILT_IN_SYNC_SUB_AND_FETCH_N, fcode);
-      target = expand_builtin_sync_operation (mode, exp, MINUS, true, target);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_SYNC_SUB_AND_FETCH_N, fcode);
+	target = expand_builtin_sync_operation (smode, exp, MINUS,
+						true, target);
+	if (target)
+	  return target;
+	break;
+      }
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_SYNC_OR_AND_FETCH):
-      mode = builtin_sync_mode (BUILT_IN_SYNC_OR_AND_FETCH_N, fcode);
-      target = expand_builtin_sync_operation (mode, exp, IOR, true, target);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_SYNC_OR_AND_FETCH_N, fcode);
+	target = expand_builtin_sync_operation (smode, exp, IOR, true, target);
+	if (target)
+	  return target;
+	break;
+      }
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_SYNC_AND_AND_FETCH):
-      mode = builtin_sync_mode (BUILT_IN_SYNC_AND_AND_FETCH_N, fcode);
-      target = expand_builtin_sync_operation (mode, exp, AND, true, target);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_SYNC_AND_AND_FETCH_N, fcode);
+	target = expand_builtin_sync_operation (smode, exp, AND, true, target);
+	if (target)
+	  return target;
+	break;
+      }
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_SYNC_XOR_AND_FETCH):
-      mode = builtin_sync_mode (BUILT_IN_SYNC_XOR_AND_FETCH_N, fcode);
-      target = expand_builtin_sync_operation (mode, exp, XOR, true, target);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_SYNC_XOR_AND_FETCH_N, fcode);
+	target = expand_builtin_sync_operation (smode, exp, XOR, true, target);
+	if (target)
+	  return target;
+	break;
+      }
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_SYNC_NAND_AND_FETCH):
-      mode = builtin_sync_mode (BUILT_IN_SYNC_NAND_AND_FETCH_N, fcode);
-      target = expand_builtin_sync_operation (mode, exp, NOT, true, target);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_SYNC_NAND_AND_FETCH_N, fcode);
+	target = expand_builtin_sync_operation (smode, exp, NOT, true, target);
+	if (target)
+	  return target;
+	break;
+      }
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP):
       if (mode == VOIDmode)
@@ -8801,12 +8836,12 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_ATOMIC_ADD_FETCH):
       {
-	mode = builtin_sync_mode (BUILT_IN_ATOMIC_ADD_FETCH_N, fcode);
+	auto smode = builtin_sync_mode (BUILT_IN_ATOMIC_ADD_FETCH_N, fcode);
 	enum built_in_function lib;
 	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_ADD_1 + 
 				       (fcode - BUILT_IN_ATOMIC_ADD_FETCH_1));
-	target = expand_builtin_atomic_fetch_op (mode, exp, target, PLUS, true,
-						 ignore, lib);
+	target = expand_builtin_atomic_fetch_op (smode, exp, target, PLUS,
+						 true, ignore, lib);
 	if (target)
 	  return target;
 	break;
@@ -8814,11 +8849,11 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_ATOMIC_SUB_FETCH):
       {
 	enum built_in_function lib;
-	mode = builtin_sync_mode (BUILT_IN_ATOMIC_SUB_FETCH_N, fcode);
+	auto smode = builtin_sync_mode (BUILT_IN_ATOMIC_SUB_FETCH_N, fcode);
 	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_SUB_1 + 
 				       (fcode - BUILT_IN_ATOMIC_SUB_FETCH_1));
-	target = expand_builtin_atomic_fetch_op (mode, exp, target, MINUS, true,
-						 ignore, lib);
+	target = expand_builtin_atomic_fetch_op (smode, exp, target, MINUS,
+						 true, ignore, lib);
 	if (target)
 	  return target;
 	break;
@@ -8826,11 +8861,11 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_ATOMIC_AND_FETCH):
       {
 	enum built_in_function lib;
-	mode = builtin_sync_mode (BUILT_IN_ATOMIC_AND_FETCH_N, fcode);
+	auto smode = builtin_sync_mode (BUILT_IN_ATOMIC_AND_FETCH_N, fcode);
 	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_AND_1 + 
 				       (fcode - BUILT_IN_ATOMIC_AND_FETCH_1));
-	target = expand_builtin_atomic_fetch_op (mode, exp, target, AND, true,
-						 ignore, lib);
+	target = expand_builtin_atomic_fetch_op (smode, exp, target, AND,
+						 true, ignore, lib);
 	if (target)
 	  return target;
 	break;
@@ -8838,11 +8873,11 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_ATOMIC_NAND_FETCH):
       {
 	enum built_in_function lib;
-	mode = builtin_sync_mode (BUILT_IN_ATOMIC_NAND_FETCH_N, fcode);
+	auto smode = builtin_sync_mode (BUILT_IN_ATOMIC_NAND_FETCH_N, fcode);
 	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_NAND_1 + 
 				       (fcode - BUILT_IN_ATOMIC_NAND_FETCH_1));
-	target = expand_builtin_atomic_fetch_op (mode, exp, target, NOT, true,
-						 ignore, lib);
+	target = expand_builtin_atomic_fetch_op (smode, exp, target, NOT,
+						 true, ignore, lib);
 	if (target)
 	  return target;
 	break;
@@ -8850,11 +8885,11 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_ATOMIC_XOR_FETCH):
       {
 	enum built_in_function lib;
-	mode = builtin_sync_mode (BUILT_IN_ATOMIC_XOR_FETCH_N, fcode);
+	auto smode = builtin_sync_mode (BUILT_IN_ATOMIC_XOR_FETCH_N, fcode);
 	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_XOR_1 + 
 				       (fcode - BUILT_IN_ATOMIC_XOR_FETCH_1));
-	target = expand_builtin_atomic_fetch_op (mode, exp, target, XOR, true,
-						 ignore, lib);
+	target = expand_builtin_atomic_fetch_op (smode, exp, target, XOR,
+						 true, ignore, lib);
 	if (target)
 	  return target;
 	break;
@@ -8862,62 +8897,74 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_ATOMIC_OR_FETCH):
       {
 	enum built_in_function lib;
-	mode = builtin_sync_mode (BUILT_IN_ATOMIC_OR_FETCH_N, fcode);
+	auto smode = builtin_sync_mode (BUILT_IN_ATOMIC_OR_FETCH_N, fcode);
 	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_OR_1 + 
 				       (fcode - BUILT_IN_ATOMIC_OR_FETCH_1));
-	target = expand_builtin_atomic_fetch_op (mode, exp, target, IOR, true,
-						 ignore, lib);
+	target = expand_builtin_atomic_fetch_op (smode, exp, target, IOR,
+						 true, ignore, lib);
 	if (target)
 	  return target;
 	break;
       }
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_ATOMIC_FETCH_ADD):
-      mode = builtin_sync_mode (BUILT_IN_ATOMIC_FETCH_ADD_N, fcode);
-      target = expand_builtin_atomic_fetch_op (mode, exp, target, PLUS, false,
-					       ignore, BUILT_IN_NONE);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_ATOMIC_FETCH_ADD_N, fcode);
+	target = expand_builtin_atomic_fetch_op (smode, exp, target, PLUS,
+						 false, ignore, BUILT_IN_NONE);
+	if (target)
+	  return target;
+	break;
+      }
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_ATOMIC_FETCH_SUB):
-      mode = builtin_sync_mode (BUILT_IN_ATOMIC_FETCH_SUB_N, fcode);
-      target = expand_builtin_atomic_fetch_op (mode, exp, target, MINUS, false,
-					       ignore, BUILT_IN_NONE);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_ATOMIC_FETCH_SUB_N, fcode);
+	target = expand_builtin_atomic_fetch_op (smode, exp, target, MINUS,
+						 false, ignore, BUILT_IN_NONE);
+	if (target)
+	  return target;
+	break;
+      }
 
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_ATOMIC_FETCH_AND):
-      mode = builtin_sync_mode (BUILT_IN_ATOMIC_FETCH_AND_N, fcode);
-      target = expand_builtin_atomic_fetch_op (mode, exp, target, AND, false,
-					       ignore, BUILT_IN_NONE);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_ATOMIC_FETCH_AND_N, fcode);
+	target = expand_builtin_atomic_fetch_op (smode, exp, target, AND,
+						 false, ignore, BUILT_IN_NONE);
+	if (target)
+	  return target;
+	break;
+      }
   
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_ATOMIC_FETCH_NAND):
-      mode = builtin_sync_mode (BUILT_IN_ATOMIC_FETCH_NAND_N, fcode);
-      target = expand_builtin_atomic_fetch_op (mode, exp, target, NOT, false,
-					       ignore, BUILT_IN_NONE);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_ATOMIC_FETCH_NAND_N, fcode);
+	target = expand_builtin_atomic_fetch_op (smode, exp, target, NOT,
+						 false, ignore, BUILT_IN_NONE);
+	if (target)
+	  return target;
+	break;
+      }
  
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_ATOMIC_FETCH_XOR):
-      mode = builtin_sync_mode (BUILT_IN_ATOMIC_FETCH_XOR_N, fcode);
-      target = expand_builtin_atomic_fetch_op (mode, exp, target, XOR, false,
-					       ignore, BUILT_IN_NONE);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_ATOMIC_FETCH_XOR_N, fcode);
+	target = expand_builtin_atomic_fetch_op (smode, exp, target, XOR,
+						 false, ignore, BUILT_IN_NONE);
+	if (target)
+	  return target;
+	break;
+      }
  
     CASE_SYNC_BUILTIN_ALL_N (BUILT_IN_ATOMIC_FETCH_OR):
-      mode = builtin_sync_mode (BUILT_IN_ATOMIC_FETCH_OR_N, fcode);
-      target = expand_builtin_atomic_fetch_op (mode, exp, target, IOR, false,
-					       ignore, BUILT_IN_NONE);
-      if (target)
-	return target;
-      break;
+      {
+	auto smode = builtin_sync_mode (BUILT_IN_ATOMIC_FETCH_OR_N, fcode);
+	target = expand_builtin_atomic_fetch_op (smode, exp, target, IOR,
+						 false, ignore, BUILT_IN_NONE);
+	if (target)
+	  return target;
+	break;
+      }
 
     case BUILT_IN_ATOMIC_TEST_AND_SET:
       return expand_builtin_atomic_test_and_set (exp, target);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c143e379d84..4afa004ec1c 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -21035,7 +21035,7 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
   /* Split after prolog/epilog to avoid interactions with shrinkwrapping.  */
   gcc_assert (epilogue_completed);
 
-  machine_mode mode = GET_MODE (mem);
+  auto mode = as_a<scalar_addr_mode> (GET_MODE (mem));
   machine_mode arith_mode, subreg_mode;
   switch (mode)
     {
@@ -21067,74 +21067,74 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
     old_out = gen_lowpart (subreg_mode, old_out);
   else
     old_out = new_out;
-  value = simplify_gen_subreg (subreg_mode, value, mode, 0);
 
   /* The initial load can be relaxed for a __sync operation since a final
      barrier will be emitted to stop code hoisting.  */
- if (is_sync)
+  if (is_sync)
     aarch64_emit_load_exclusive (mode, old_out, mem,
 				 GEN_INT (MEMMODEL_RELAXED));
   else
     aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
 
-  switch (code)
+  if (code == SET)
+    new_out = value;
+  else
     {
-    case SET:
-	new_out = value;
-      break;
-
-    case NOT:
-      value = drop_capability (value);
-      if (CAPABILITY_MODE_P (mode))
-	{
-	  /* For Morello we require a temporary DImode register so that we can
-	  then REPLACE_ADDRESS_VALUE the OP result into new_out.  */
-	  x = gen_rtx_AND (arith_mode, drop_capability (old_out), value);
-	  emit_insn (gen_rtx_SET (tmp_reg, x));
-	  x = gen_rtx_NOT (arith_mode, tmp_reg);
-	  emit_insn (gen_rtx_SET (tmp_reg, x));
-	  emit_insn (gen_replace_address_value_cadi (new_out, old_out,
-						     tmp_reg));
-	}
-      else
+      value = simplify_gen_subreg (arith_mode, value, offset_mode (mode), 0);
+      switch (code)
 	{
-	  x = gen_rtx_AND (arith_mode, old_out, value);
-	  emit_insn (gen_rtx_SET (new_out, x));
-	  x = gen_rtx_NOT (arith_mode, new_out);
-	  emit_insn (gen_rtx_SET (new_out, x));
-	}
-      break;
+	case NOT:
+	  if (CAPABILITY_MODE_P (mode))
+	    {
+	      /* For Morello we require a temporary DImode register so that we can
+	      then REPLACE_ADDRESS_VALUE the OP result into new_out.  */
+	      x = gen_rtx_AND (arith_mode, drop_capability (old_out), value);
+	      emit_insn (gen_rtx_SET (tmp_reg, x));
+	      x = gen_rtx_NOT (arith_mode, tmp_reg);
+	      emit_insn (gen_rtx_SET (tmp_reg, x));
+	      emit_insn (gen_replace_address_value_cadi (new_out, old_out,
+							 tmp_reg));
+	    }
+	  else
+	    {
+	      x = gen_rtx_AND (arith_mode, old_out, value);
+	      emit_insn (gen_rtx_SET (new_out, x));
+	      x = gen_rtx_NOT (arith_mode, new_out);
+	      emit_insn (gen_rtx_SET (new_out, x));
+	    }
+	  break;
 
-    case MINUS:
-      value = drop_capability (value);
-      if (CONST_INT_P (value))
-	{
-	  value = GEN_INT (-INTVAL (value));
-	  code = PLUS;
-	}
-      /* Fall through.  */
-    default:
-      value = drop_capability (value);
-      if (CAPABILITY_MODE_P (mode))
-	{
-	  /* For Morello we require a temporary DImode register so that we can
-	  then REPLACE_ADDRESS_VALUE the OP result into new_out.  */
-	  x = gen_rtx_fmt_ee (code, arith_mode, drop_capability (old_out),
-			      value);
-	  emit_insn (gen_rtx_SET (tmp_reg, x));
-	  emit_insn (gen_replace_address_value_cadi (new_out, old_out,
-						     tmp_reg));
-	}
-      else
-	{
-	  x = gen_rtx_fmt_ee (code, arith_mode, old_out, value);
-	  emit_insn (gen_rtx_SET (new_out, x));
+	case MINUS:
+	  if (CONST_INT_P (value))
+	    {
+	      value = GEN_INT (-INTVAL (value));
+	      code = PLUS;
+	    }
+	  /* Fall through.  */
+	default:
+	  if (CAPABILITY_MODE_P (mode) && code == PLUS)
+	    emit_insn (gen_pointer_plus_cadi (new_out, old_out, value));
+	  else if (CAPABILITY_MODE_P (mode))
+	    {
+	      /* For Morello we require a temporary DImode register so that we can
+	      then REPLACE_ADDRESS_VALUE the OP result into new_out.  */
+	      x = gen_rtx_fmt_ee (code, arith_mode, drop_capability (old_out),
+				  value);
+	      emit_insn (gen_rtx_SET (tmp_reg, x));
+	      emit_insn (gen_replace_address_value_cadi (new_out, old_out,
+							 tmp_reg));
+	    }
+	  else
+	    {
+	      x = gen_rtx_fmt_ee (code, arith_mode, old_out, value);
+	      emit_insn (gen_rtx_SET (new_out, x));
+	    }
+	    break;
 	}
-	break;
+      new_out = gen_lowpart (mode, new_out);
     }
 
-  aarch64_emit_store_exclusive (mode, cond, mem,
-				gen_lowpart (mode, new_out), model_rtx);
+  aarch64_emit_store_exclusive (mode, cond, mem, new_out, model_rtx);
 
   if (aarch64_track_speculation)
     {
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index f3a220f806c..79e48cd9f64 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -344,7 +344,7 @@
 (define_expand "atomic_<atomic_optab>cadi"
  [(match_operand:CADI 0 "aarch64_sync_memory_operand")
   (atomic_op:CADI
-   (match_operand:CADI 1 "<atomic_op_operand>")
+   (match_operand:DI 1 "<atomic_op_operand>")
    (match_operand:SI 2 "const_int_operand"))]
   ""
   {
@@ -355,14 +355,12 @@
       {
 	gcc_assert (TARGET_CAPABILITY_FAKE);
 	const atomic_ool_names *names;
-	rtx temp = drop_capability (operands[1]);
 
 	switch (<CODE>)
 	  {
 	  case MINUS:
-	    temp = expand_simple_unop (DImode, NEG, temp, NULL_RTX, 1);
-	    operands[1] = expand_replace_address_value (CADImode, operands[1],
-							temp, NULL_RTX);
+	    operands[1] = expand_simple_unop (DImode, NEG, operands[1],
+					      NULL_RTX, 1);
 	    /* fallthru */
 	  case PLUS:
 	    names = &aarch64_ool_ldadd_names;
@@ -374,9 +372,8 @@
 	    names = &aarch64_ool_ldeor_names;
 	    break;
 	  case AND:
-	    temp = expand_simple_unop (DImode, NOT, temp, NULL_RTX, 1);
-	    operands[1] = expand_replace_address_value (CADImode, operands[1],
-							temp, NULL_RTX);
+	    operands[1] = expand_simple_unop (DImode, NOT, operands[1],
+					      NULL_RTX, 1);
 	    names = &aarch64_ool_ldclr_names;
 	    break;
 	  default:
@@ -384,7 +381,7 @@
 	  }
 	rtx func = aarch64_atomic_ool_func (CADImode, operands[2], names);
 	emit_library_call_value (func, NULL_RTX, LCT_NORMAL, CADImode,
-				 operands[1], CADImode,
+				 operands[1], DImode,
 				 XEXP (operands[0], 0), Pmode);
         DONE;
       }
@@ -420,7 +417,7 @@
 (define_insn_and_split "aarch64_atomic_<atomic_capoptab>cadi"
 [(replace_address_value:CADI (match_operand:CADI 0 "aarch64_sync_memory_operand" "+Q")
    (unspec_volatile:DI
-    [(match_operand:CADI 1 "<atomic_capop_operand>" "r<atomic_capopconst_operand>")
+    [(match_operand:DI 1 "<atomic_capop_operand>" "r<atomic_capopconst_operand>")
      (match_operand:SI 2 "const_int_operand")]
     ATOMIC_CAPOP))
   (clobber (reg:CC CC_REGNUM))
@@ -499,7 +496,7 @@
   [(replace_address_value:CADI (match_operand:CADI 0 "aarch64_sync_memory_operand" "+Q")
     (unspec_volatile:DI
       [(match_dup 0)
-       (match_operand:CADI 1 "aarch64_logical_operand" "rL")
+       (match_operand:DI 1 "aarch64_logical_operand" "rL")
        (match_operand:SI 2 "const_int_operand")]		;; model
       UNSPECV_ATOMIC_CAPABILITY_NAND))
    (clobber (reg:CC CC_REGNUM))
@@ -604,7 +601,7 @@
  [(match_operand:CADI 0 "register_operand")
   (match_operand:CADI 1 "aarch64_sync_memory_operand")
   (atomic_op:CADI
-   (match_operand:CADI 2 "<atomic_op_operand>")
+   (match_operand:DI 2 "<atomic_op_operand>")
    (match_operand:SI 3 "const_int_operand"))]
  ""
 {
@@ -615,14 +612,11 @@
     {
       gcc_assert (TARGET_CAPABILITY_FAKE);
       const atomic_ool_names *names;
-      rtx tmp = gen_reg_rtx (DImode);
       switch (<CODE>)
 	{
 	case MINUS:
-	  tmp = expand_simple_unop (DImode, NEG, drop_capability (operands[2]),
-				    NULL_RTX, 1);
-	  operands[2] = expand_replace_address_value (CADImode, operands[2],
-						      tmp, NULL_RTX);
+	  operands[2] = expand_simple_unop (DImode, NEG, operands[2],
+					    NULL_RTX, 1);
 	  /* fallthru */
 	case PLUS:
 	  names = &aarch64_ool_ldadd_names;
@@ -634,10 +628,8 @@
 	  names = &aarch64_ool_ldeor_names;
 	  break;
 	case AND:
-	  tmp = expand_simple_unop (DImode, NOT, drop_capability (operands[2]),
-				    NULL_RTX, 1);
-	  operands[2] = expand_replace_address_value (CADImode, operands[2],
-						      tmp, NULL_RTX);
+	  operands[2] = expand_simple_unop (DImode, NOT, operands[2],
+					    NULL_RTX, 1);
 	  names = &aarch64_ool_ldclr_names;
 	  break;
 	default:
@@ -645,7 +637,7 @@
 	}
       rtx func = aarch64_atomic_ool_func (CADImode, operands[3], names);
       rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL,
-					  CADImode, operands[2], CADImode,
+					  CADImode, operands[2], DImode,
 					  XEXP (operands[1], 0), Pmode);
       emit_move_insn (operands[0], rval);
       DONE;
@@ -687,7 +679,7 @@
     (match_dup 1)
     (unspec_volatile:DI
       [(match_dup 1)
-       (match_operand:CADI 2 "<atomic_capop_operand>" "r<atomic_capopconst_operand>")
+       (match_operand:DI 2 "<atomic_capop_operand>" "r<atomic_capopconst_operand>")
        (match_operand:SI 3 "const_int_operand")]		;; model
       ATOMIC_CAPOP))
    (clobber (reg:CC CC_REGNUM))
@@ -759,7 +751,7 @@
    (replace_address_value:CADI (match_dup 1)
      (unspec_volatile:DI
        [(match_dup 1)
-	(match_operand: CADI 2 "aarch64_logical_operand" "rL")
+	(match_operand:DI 2 "aarch64_logical_operand" "rL")
 	(match_operand:SI 3 "const_int_operand")]		;; model
       UNSPECV_ATOMIC_CAPABILITY_NAND))
    (clobber (reg:CC CC_REGNUM))
@@ -784,7 +776,7 @@
  [(match_operand:ALLIC 0 "register_operand")
   (atomic_op:ALLIC
    (match_operand:ALLIC 1 "aarch64_sync_memory_operand")
-   (match_operand:ALLIC 2 "<atomic_op_operand>"))
+   (match_operand:<PTR_OFF> 2 "<atomic_op_operand>"))
   (match_operand:SI 3 "const_int_operand")]
  ""
 {
@@ -794,7 +786,7 @@
        && !(TARGET_MORELLO && CAPABILITY_MODE_P (<MODE>mode)))
     {
       rtx tmp = gen_reg_rtx (<MODE>mode);
-      operands[2] = force_reg (<MODE>mode, operands[2]);
+      operands[2] = force_reg (<PTR_OFF>mode, operands[2]);
       emit_insn (gen_atomic_fetch_<atomic_optab><mode>
                  (tmp, operands[1], operands[2], operands[3]));
 
@@ -803,20 +795,17 @@
 	  switch (<CODE>)
 	    {
 	    case PLUS:
-	      tmp = expand_pointer_plus (<MODE>mode, tmp,
-					 drop_capability (operands[2]),
+	      tmp = expand_pointer_plus (<MODE>mode, tmp, operands[2],
 					 operands[0], 1, OPTAB_WIDEN);
 	      break;
 	    case MINUS:
-	      tmp = expand_pointer_minus (<MODE>mode, tmp,
-					  drop_capability (operands[2]),
+	      tmp = expand_pointer_minus (<MODE>mode, tmp, operands[2],
 					  operands[0], 1, OPTAB_WIDEN);
 	      break;
 	    default:
 	      tmp = expand_simple_binop (noncapability_mode (<MODE>mode),
 					 <CODE>, drop_capability (tmp),
-					 drop_capability (operands[2]),
-					 tmp, 1, OPTAB_WIDEN);
+					 operands[2], tmp, 1, OPTAB_WIDEN);
 	      tmp = expand_replace_address_value (as_a <scalar_addr_mode>
 						    (<MODE>mode),
 						  operands[0], tmp,
@@ -865,7 +854,7 @@
   [(replace_address_value:CADI (match_operand:CADI 0 "register_operand" "=&r")
     (unspec_volatile:DI
       [(match_operand:CADI 1 "aarch64_sync_memory_operand" "+Q")
-      (match_operand:CADI 2 "<atomic_capop_operand>" "r<atomic_capopconst_operand>")]
+      (match_operand:DI 2 "<atomic_capop_operand>" "r<atomic_capopconst_operand>")]
       ATOMIC_CAPOP))
    (set (match_dup 1)
     (unspec_volatile:CADI
@@ -915,7 +904,7 @@
   [(replace_address_value:CADI (match_operand:CADI 0 "register_operand" "=&r")
     (unspec_volatile:DI
 	[(match_operand:CADI 1 "aarch64_sync_memory_operand" "+Q")
-	 (match_operand:CADI 2 "aarch64_logical_operand" "rL")]
+	 (match_operand:DI 2 "aarch64_logical_operand" "rL")]
 	UNSPECV_ATOMIC_CAPABILITY_NAND))
    (set (match_dup 1)
     (unspec_volatile:CADI
diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def
index a770291eb73..820badbf914 100644
--- a/gcc/fortran/types.def
+++ b/gcc/fortran/types.def
@@ -52,6 +52,9 @@ DEF_PRIMITIVE_TYPE (BT_UINT, unsigned_type_node)
 DEF_PRIMITIVE_TYPE (BT_ICAP, intcap_type_node
 			     ? intcap_type_node
 			     : error_mark_node)
+DEF_PRIMITIVE_TYPE (BT_ICAPOFF, intcap_type_node
+				? TREE_TYPE (intcap_type_node)
+				: error_mark_node)
 DEF_PRIMITIVE_TYPE (BT_LONG, long_integer_type_node)
 DEF_PRIMITIVE_TYPE (BT_ULONGLONG, long_long_unsigned_type_node)
 DEF_PRIMITIVE_TYPE (BT_WORD, (*lang_hooks.types.type_for_mode) (word_mode, 1))
@@ -107,6 +110,8 @@ DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR, BT_FN_VOID_PTR)
 		       BT_VOLATILE_PTR, BT_DTYPE)
 FOR_ALL_SYNC_N(DEF_SYNC)
 #undef DEF_SYNC
+DEF_FUNCTION_TYPE_2 (BT_FN_ICAP_VPTR_ICAPOFF, BT_ICAP, \
+		     BT_VOLATILE_PTR, BT_ICAPOFF)
 
 #define DEF_SYNC(DTYPE, BT_DTYPE) \
   DEF_FUNCTION_TYPE_2 (BT_FN_##DTYPE##_CONST_VPTR_INT, BT_DTYPE, \
@@ -145,6 +150,8 @@ FOR_ALL_SYNC_N (DEF_SYNC)
 		       BT_VOLATILE_PTR, BT_DTYPE, BT_INT)
 FOR_ALL_SYNC_N (DEF_SYNC)
 #undef DEF_SYNC
+DEF_FUNCTION_TYPE_3 (BT_FN_ICAP_VPTR_ICAPOFF_INT, BT_ICAP, \
+		     BT_VOLATILE_PTR, BT_ICAPOFF, BT_INT)
 
 #define DEF_SYNC(DTYPE, BT_DTYPE) \
   DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_##DTYPE##_INT, BT_VOID, \
diff --git a/gcc/optabs.c b/gcc/optabs.c
index b85f77c853d..e7fd67a2833 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -6839,7 +6839,8 @@ maybe_optimize_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code,
 {
   /* If the value is prefetched, or not used, it may be possible to replace
      the sequence with a native exchange operation.  */
-  if (!after || target == const0_rtx)
+  if ((!after || target == const0_rtx)
+      && !CAPABILITY_MODE_P (GET_MODE (mem)))
     {
       /* fetch_and (&x, 0, m) can be replaced with exchange (&x, 0, m).  */
       if (code == AND && val == const0_rtx)
@@ -6874,7 +6875,8 @@ static rtx
 maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem,
 	       rtx val, bool use_memmodel, enum memmodel model, bool after)
 {
-  machine_mode mode = GET_MODE (mem);
+  auto mode = as_a<scalar_addr_mode> (GET_MODE (mem));
+  auto op_mode = offset_mode (mode);
   class expand_operand ops[4];
   enum insn_code icode;
   int op_counter = 0;
@@ -6918,7 +6920,7 @@ maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem,
 
   create_fixed_operand (&ops[op_counter++], mem);
   /* VAL may have been promoted to a wider mode.  Shrink it if so.  */
-  create_convert_operand_to (&ops[op_counter++], val, mode, true);
+  create_convert_operand_to (&ops[op_counter++], val, op_mode, true);
 
   if (maybe_expand_insn (icode, num_ops, ops))
     return (target == const0_rtx ? const0_rtx : ops[0].value);
@@ -6944,7 +6946,8 @@ expand_atomic_fetch_op_no_fallback (rtx target, rtx mem, rtx val,
 				    enum rtx_code code, enum memmodel model,
 				    bool after)
 {
-  machine_mode mode = GET_MODE (mem);
+  auto mode = as_a<scalar_addr_mode> (GET_MODE (mem));
+  auto op_mode = offset_mode (mode);
   struct atomic_op_functions optab;
   rtx result;
   bool unused_result = (target == const0_rtx);
@@ -7002,15 +7005,30 @@ expand_atomic_fetch_op_no_fallback (rtx target, rtx mem, rtx val,
 	     Fetch_before == after REVERSE_OP val.  */
 	  if (!after)
 	    code = optab.reverse_code;
-	  if (code == NOT)
+
+	  if (CAPABILITY_MODE_P (mode) && code == PLUS)
+	    result = expand_pointer_plus (mode, result, val, target,
+					  true, OPTAB_LIB_WIDEN);
+	  else
 	    {
-	      result = expand_simple_binop (mode, AND, result, val, NULL_RTX,
-					    true, OPTAB_LIB_WIDEN);
-	      result = expand_simple_unop (mode, NOT, result, target, true);
+	      rtx initial = result;
+	      rtx subtarget = mode == op_mode ? target : NULL_RTX;
+	      if (code == NOT)
+		{
+		  result = expand_simple_binop (op_mode, AND, result, val,
+						NULL_RTX, true,
+						OPTAB_LIB_WIDEN);
+		  result = expand_simple_unop (mode, NOT, result,
+					       subtarget, true);
+		}
+	      else
+		result = expand_simple_binop (op_mode, code, result, val,
+					      subtarget, true,
+					      OPTAB_LIB_WIDEN);
+	      if (mode != op_mode)
+		result = expand_replace_address_value (mode, initial,
+						       result, target);
 	    }
-	  else
-	    result = expand_simple_binop (mode, code, result, val, target,
-					  true, OPTAB_LIB_WIDEN);
 	  return result;
 	}
     }
diff --git a/gcc/sync-builtins.def b/gcc/sync-builtins.def
index d23b344e291..483530f1316 100644
--- a/gcc/sync-builtins.def
+++ b/gcc/sync-builtins.def
@@ -32,25 +32,28 @@ along with GCC; see the file COPYING3.  If not see
    - I4
    - I8
    - I16
-   - ICAP (associated with intcap_t, so includes a tag bit).  */
+   - ICAP (associated with intcap_t, so includes a tag bit)
+
+   DOFF is the non-capability form of DSIZE, so that DOFF is ICAPOFF
+   when DSIZE is ICAP.  */
 
 /* Generate DSIZE-specific functions for ENUM and NAME by invoking:
 
-     DEF (ENUM', NAME', DSIZE)
+     DEF (ENUM', NAME', DSIZE, DOFF)
 
    for each available non-capability DSIZE.  The order of these definitions is
    important, since some code adds to or subtracts from the enum value.  */
 #define FOR_NONCAP_SYNC_N(ENUM, NAME, DEF) \
-  DEF (ENUM##_1, NAME "_1", I1) \
-  DEF (ENUM##_2, NAME "_2", I2) \
-  DEF (ENUM##_4, NAME "_4", I4) \
-  DEF (ENUM##_8, NAME "_8", I8) \
-  DEF (ENUM##_16, NAME "_16", I16)
+  DEF (ENUM##_1, NAME "_1", I1, I1) \
+  DEF (ENUM##_2, NAME "_2", I2, I2) \
+  DEF (ENUM##_4, NAME "_4", I4, I4) \
+  DEF (ENUM##_8, NAME "_8", I8, I8) \
+  DEF (ENUM##_16, NAME "_16", I16, I16)
 
 /* As above, but include ICAP.  */
 #define FOR_ALL_SYNC_N(ENUM, NAME, DEF) \
   FOR_NONCAP_SYNC_N (ENUM, NAME, DEF) \
-  DEF (ENUM##_CAPABILITY, NAME "_capability", ICAP)
+  DEF (ENUM##_CAPABILITY, NAME "_capability", ICAP, ICAPOFF)
 
 /* Case statements for everything defined by FOR_NONCAP_SYNC_N.  */
 #define CASE_SYNC_BUILTIN_NONCAP_N(ENUM) \
@@ -100,7 +103,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /* DSIZE NAME (volatile void *ptr, DSIZE val).  */
 
-#define DEF_SYNC_BUILTIN_RMW_N(ENUM, NAME, DSIZE) \
+#define DEF_SYNC_BUILTIN_RMW_N(ENUM, NAME, DSIZE, DOFF) \
   DEF_SYNC_BUILTIN (ENUM, NAME, BT_FN_##DSIZE##_VPTR_##DSIZE, \
 		    ATTR_NOTHROWCALL_LEAF_LIST)
 
@@ -111,7 +114,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /* DSIZE NAME (volatile void *ptr, DSIZE val, int memorder).  */
 
-#define DEF_SYNC_BUILTIN_RMW_ORDER_N(ENUM, NAME, DSIZE) \
+#define DEF_SYNC_BUILTIN_RMW_ORDER_N(ENUM, NAME, DSIZE, DOFF) \
   DEF_SYNC_BUILTIN (ENUM, NAME, BT_FN_##DSIZE##_VPTR_##DSIZE##_INT, \
 		    ATTR_NOTHROWCALL_LEAF_LIST)
 
@@ -120,9 +123,31 @@ along with GCC; see the file COPYING3.  If not see
 		    ATTR_NOTHROWCALL_LEAF_LIST) \
   FOR_ALL_SYNC_N (ENUM, NAME, DEF_SYNC_BUILTIN_RMW_ORDER_N)
 
+/* DSIZE NAME (volatile void *ptr, DOFF val).  */
+
+#define DEF_SYNC_BUILTIN_RMW_OFF_N(ENUM, NAME, DSIZE, DOFF) \
+  DEF_SYNC_BUILTIN (ENUM, NAME, BT_FN_##DSIZE##_VPTR_##DOFF, \
+		    ATTR_NOTHROWCALL_LEAF_LIST)
+
+#define DEF_SYNC_BUILTIN_RMW_OFF_ALL_N(ENUM, NAME) \
+  DEF_SYNC_BUILTIN (ENUM##_N, NAME, BT_FN_VOID_VAR, \
+		    ATTR_NOTHROWCALL_LEAF_LIST) \
+  FOR_ALL_SYNC_N (ENUM, NAME, DEF_SYNC_BUILTIN_RMW_OFF_N)
+
+/* DSIZE NAME (volatile void *ptr, DOFF val, int memorder).  */
+
+#define DEF_SYNC_BUILTIN_RMW_OFF_ORDER_N(ENUM, NAME, DSIZE, DOFF) \
+  DEF_SYNC_BUILTIN (ENUM, NAME, BT_FN_##DSIZE##_VPTR_##DOFF##_INT, \
+		    ATTR_NOTHROWCALL_LEAF_LIST)
+
+#define DEF_SYNC_BUILTIN_RMW_OFF_ORDER_ALL_N(ENUM, NAME) \
+  DEF_SYNC_BUILTIN (ENUM##_N, NAME, BT_FN_VOID_VAR, \
+		    ATTR_NOTHROWCALL_LEAF_LIST) \
+  FOR_ALL_SYNC_N (ENUM, NAME, DEF_SYNC_BUILTIN_RMW_OFF_ORDER_N)
+
 /* DSIZE NAME (const volatile void *ptr, int memorder).  */
 
-#define DEF_SYNC_BUILTIN_LOAD_ORDER_N(ENUM, NAME, DSIZE) \
+#define DEF_SYNC_BUILTIN_LOAD_ORDER_N(ENUM, NAME, DSIZE, DOFF) \
   DEF_SYNC_BUILTIN (ENUM, NAME, BT_FN_##DSIZE##_CONST_VPTR_INT, \
 		    ATTR_NOTHROWCALL_LEAF_LIST)
 
@@ -133,7 +158,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /* void NAME (volatile void *ptr, DSIZE val, int memorder).  */
 
-#define DEF_SYNC_BUILTIN_STORE_ORDER_N(ENUM, NAME, DSIZE) \
+#define DEF_SYNC_BUILTIN_STORE_ORDER_N(ENUM, NAME, DSIZE, DOFF) \
   DEF_SYNC_BUILTIN (ENUM, NAME, BT_FN_VOID_VPTR_##DSIZE##_INT, \
 		    ATTR_NOTHROWCALL_LEAF_LIST)
 
@@ -144,7 +169,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /* void NAME (volatile void *ptr).  */
 
-#define DEF_SYNC_BUILTIN_RELEASE_N(ENUM, NAME, DSIZE) \
+#define DEF_SYNC_BUILTIN_RELEASE_N(ENUM, NAME, DSIZE, DOFF) \
   DEF_SYNC_BUILTIN (ENUM, NAME, BT_FN_VOID_VPTR, ATTR_NOTHROWCALL_LEAF_LIST)
 
 #define DEF_SYNC_BUILTIN_RELEASE_NONCAP_N(ENUM, NAME) \
@@ -154,7 +179,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /* bool NAME (volatile void *ptr, DSIZE oldval, DSIZE newval).  */
 
-#define DEF_SYNC_BUILTIN_BOOL_CMP_SWAP_N(ENUM, NAME, DSIZE) \
+#define DEF_SYNC_BUILTIN_BOOL_CMP_SWAP_N(ENUM, NAME, DSIZE, DOFF) \
   DEF_SYNC_BUILTIN (ENUM, NAME, BT_FN_BOOL_VPTR_##DSIZE##_##DSIZE, \
 		    ATTR_NOTHROWCALL_LEAF_LIST)
 
@@ -165,7 +190,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /* DSIZE NAME (volatile void *ptr, DSIZE oldval, DSIZE newval).  */
 
-#define DEF_SYNC_BUILTIN_VAL_CMP_SWAP_N(ENUM, NAME, DSIZE) \
+#define DEF_SYNC_BUILTIN_VAL_CMP_SWAP_N(ENUM, NAME, DSIZE, DOFF) \
   DEF_SYNC_BUILTIN (ENUM, NAME, BT_FN_##DSIZE##_VPTR_##DSIZE##_##DSIZE, \
 		    ATTR_NOTHROWCALL_LEAF_LIST)
 
@@ -177,7 +202,7 @@ along with GCC; see the file COPYING3.  If not see
 /* bool NAME (volatile void *ptr, void *expected, DSIZE desired, bool weak,
 	      int success_memorder, int failure_memorder).  */
 
-#define DEF_SYNC_BUILTIN_CMP_XCHG_ORDER_N(ENUM, NAME, DSIZE) \
+#define DEF_SYNC_BUILTIN_CMP_XCHG_ORDER_N(ENUM, NAME, DSIZE, DOFF) \
   DEF_SYNC_BUILTIN (ENUM, NAME, BT_FN_BOOL_VPTR_PTR_##DSIZE##_BOOL_INT_INT, \
 		    ATTR_NOTHROWCALL_LEAF_LIST)
 
@@ -190,40 +215,41 @@ along with GCC; see the file COPYING3.  If not see
    is supposed to be using.  It's overloaded, and is resolved to one of the
    "_1" through "_16" versions, plus some extra casts.  */
 
-DEF_SYNC_BUILTIN_RMW_ALL_N (BUILT_IN_SYNC_FETCH_AND_ADD,
-			    "__sync_fetch_and_add")
+DEF_SYNC_BUILTIN_RMW_OFF_ALL_N (BUILT_IN_SYNC_FETCH_AND_ADD,
+				"__sync_fetch_and_add")
 
-DEF_SYNC_BUILTIN_RMW_ALL_N (BUILT_IN_SYNC_FETCH_AND_SUB,
-			    "__sync_fetch_and_sub")
+DEF_SYNC_BUILTIN_RMW_OFF_ALL_N (BUILT_IN_SYNC_FETCH_AND_SUB,
+				"__sync_fetch_and_sub")
 
-DEF_SYNC_BUILTIN_RMW_ALL_N (BUILT_IN_SYNC_FETCH_AND_OR,
-			    "__sync_fetch_and_or")
+DEF_SYNC_BUILTIN_RMW_OFF_ALL_N (BUILT_IN_SYNC_FETCH_AND_OR,
+				"__sync_fetch_and_or")
 
-DEF_SYNC_BUILTIN_RMW_ALL_N (BUILT_IN_SYNC_FETCH_AND_AND,
-			    "__sync_fetch_and_and")
+DEF_SYNC_BUILTIN_RMW_OFF_ALL_N (BUILT_IN_SYNC_FETCH_AND_AND,
+				"__sync_fetch_and_and")
 
-DEF_SYNC_BUILTIN_RMW_ALL_N (BUILT_IN_SYNC_FETCH_AND_XOR,
-			    "__sync_fetch_and_xor")
+DEF_SYNC_BUILTIN_RMW_OFF_ALL_N (BUILT_IN_SYNC_FETCH_AND_XOR,
+				"__sync_fetch_and_xor")
 
-DEF_SYNC_BUILTIN_RMW_ALL_N (BUILT_IN_SYNC_FETCH_AND_NAND,
-			    "__sync_fetch_and_nand")
+DEF_SYNC_BUILTIN_RMW_OFF_ALL_N (BUILT_IN_SYNC_FETCH_AND_NAND,
+				"__sync_fetch_and_nand")
 
-DEF_SYNC_BUILTIN_RMW_ALL_N (BUILT_IN_SYNC_ADD_AND_FETCH,
-			    "__sync_add_and_fetch")
+DEF_SYNC_BUILTIN_RMW_OFF_ALL_N (BUILT_IN_SYNC_ADD_AND_FETCH,
+				"__sync_add_and_fetch")
 
-DEF_SYNC_BUILTIN_RMW_ALL_N (BUILT_IN_SYNC_SUB_AND_FETCH,
-			    "__sync_sub_and_fetch")
+DEF_SYNC_BUILTIN_RMW_OFF_ALL_N (BUILT_IN_SYNC_SUB_AND_FETCH,
+				"__sync_sub_and_fetch")
 
-DEF_SYNC_BUILTIN_RMW_ALL_N (BUILT_IN_SYNC_OR_AND_FETCH, "__sync_or_and_fetch")
+DEF_SYNC_BUILTIN_RMW_OFF_ALL_N (BUILT_IN_SYNC_OR_AND_FETCH,
+				"__sync_or_and_fetch")
 
-DEF_SYNC_BUILTIN_RMW_ALL_N (BUILT_IN_SYNC_AND_AND_FETCH,
-			    "__sync_and_and_fetch")
+DEF_SYNC_BUILTIN_RMW_OFF_ALL_N (BUILT_IN_SYNC_AND_AND_FETCH,
+				"__sync_and_and_fetch")
 
-DEF_SYNC_BUILTIN_RMW_ALL_N (BUILT_IN_SYNC_XOR_AND_FETCH,
-			    "__sync_xor_and_fetch")
+DEF_SYNC_BUILTIN_RMW_OFF_ALL_N (BUILT_IN_SYNC_XOR_AND_FETCH,
+				"__sync_xor_and_fetch")
 
-DEF_SYNC_BUILTIN_RMW_ALL_N (BUILT_IN_SYNC_NAND_AND_FETCH,
-			    "__sync_nand_and_fetch")
+DEF_SYNC_BUILTIN_RMW_OFF_ALL_N (BUILT_IN_SYNC_NAND_AND_FETCH,
+				"__sync_nand_and_fetch")
 
 DEF_SYNC_BUILTIN_BOOL_CMP_SWAP_ALL_N (BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP,
 				      "__sync_bool_compare_and_swap")
@@ -273,41 +299,41 @@ DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_STORE,
 DEF_SYNC_BUILTIN_STORE_ORDER_ALL_N (BUILT_IN_ATOMIC_STORE,
 				    "__atomic_store", "_n")
 
-DEF_SYNC_BUILTIN_RMW_ORDER_ALL_N (BUILT_IN_ATOMIC_ADD_FETCH,
-				  "__atomic_add_fetch", "")
+DEF_SYNC_BUILTIN_RMW_OFF_ORDER_ALL_N (BUILT_IN_ATOMIC_ADD_FETCH,
+				      "__atomic_add_fetch")
 
-DEF_SYNC_BUILTIN_RMW_ORDER_ALL_N (BUILT_IN_ATOMIC_SUB_FETCH,
-				  "__atomic_sub_fetch", "")
+DEF_SYNC_BUILTIN_RMW_OFF_ORDER_ALL_N (BUILT_IN_ATOMIC_SUB_FETCH,
+				      "__atomic_sub_fetch")
 
-DEF_SYNC_BUILTIN_RMW_ORDER_ALL_N (BUILT_IN_ATOMIC_AND_FETCH,
-				  "__atomic_and_fetch", "")
+DEF_SYNC_BUILTIN_RMW_OFF_ORDER_ALL_N (BUILT_IN_ATOMIC_AND_FETCH,
+				      "__atomic_and_fetch")
 
-DEF_SYNC_BUILTIN_RMW_ORDER_ALL_N (BUILT_IN_ATOMIC_NAND_FETCH,
-				  "__atomic_nand_fetch", "")
+DEF_SYNC_BUILTIN_RMW_OFF_ORDER_ALL_N (BUILT_IN_ATOMIC_NAND_FETCH,
+				      "__atomic_nand_fetch")
 
-DEF_SYNC_BUILTIN_RMW_ORDER_ALL_N (BUILT_IN_ATOMIC_XOR_FETCH,
-				  "__atomic_xor_fetch", "")
+DEF_SYNC_BUILTIN_RMW_OFF_ORDER_ALL_N (BUILT_IN_ATOMIC_XOR_FETCH,
+				      "__atomic_xor_fetch")
 
-DEF_SYNC_BUILTIN_RMW_ORDER_ALL_N (BUILT_IN_ATOMIC_OR_FETCH,
-				  "__atomic_or_fetch", "")
+DEF_SYNC_BUILTIN_RMW_OFF_ORDER_ALL_N (BUILT_IN_ATOMIC_OR_FETCH,
+				      "__atomic_or_fetch")
 
-DEF_SYNC_BUILTIN_RMW_ORDER_ALL_N (BUILT_IN_ATOMIC_FETCH_ADD,
-				  "__atomic_fetch_add", "")
+DEF_SYNC_BUILTIN_RMW_OFF_ORDER_ALL_N (BUILT_IN_ATOMIC_FETCH_ADD,
+				      "__atomic_fetch_add")
 
-DEF_SYNC_BUILTIN_RMW_ORDER_ALL_N (BUILT_IN_ATOMIC_FETCH_SUB,
-				  "__atomic_fetch_sub", "")
+DEF_SYNC_BUILTIN_RMW_OFF_ORDER_ALL_N (BUILT_IN_ATOMIC_FETCH_SUB,
+				      "__atomic_fetch_sub")
 
-DEF_SYNC_BUILTIN_RMW_ORDER_ALL_N (BUILT_IN_ATOMIC_FETCH_AND,
-				  "__atomic_fetch_and", "")
+DEF_SYNC_BUILTIN_RMW_OFF_ORDER_ALL_N (BUILT_IN_ATOMIC_FETCH_AND,
+				      "__atomic_fetch_and")
 
-DEF_SYNC_BUILTIN_RMW_ORDER_ALL_N (BUILT_IN_ATOMIC_FETCH_NAND,
-				  "__atomic_fetch_nand", "")
+DEF_SYNC_BUILTIN_RMW_OFF_ORDER_ALL_N (BUILT_IN_ATOMIC_FETCH_NAND,
+				      "__atomic_fetch_nand")
 
-DEF_SYNC_BUILTIN_RMW_ORDER_ALL_N (BUILT_IN_ATOMIC_FETCH_XOR,
-				  "__atomic_fetch_xor", "")
+DEF_SYNC_BUILTIN_RMW_OFF_ORDER_ALL_N (BUILT_IN_ATOMIC_FETCH_XOR,
+				      "__atomic_fetch_xor")
 
-DEF_SYNC_BUILTIN_RMW_ORDER_ALL_N (BUILT_IN_ATOMIC_FETCH_OR,
-				  "__atomic_fetch_or", "")
+DEF_SYNC_BUILTIN_RMW_OFF_ORDER_ALL_N (BUILT_IN_ATOMIC_FETCH_OR,
+				      "__atomic_fetch_or")
 
 DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ALWAYS_LOCK_FREE,
 		  "__atomic_always_lock_free",
@@ -345,6 +371,10 @@ DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FERAISEEXCEPT,
 #undef DEF_SYNC_BUILTIN_STORE_ORDER_N
 #undef DEF_SYNC_BUILTIN_LOAD_ORDER_ALL_N
 #undef DEF_SYNC_BUILTIN_LOAD_ORDER_N
+#undef DEF_SYNC_BUILTIN_RMW_OFF_ORDER_ALL_N
+#undef DEF_SYNC_BUILTIN_RMW_OFF_ORDER_N
+#undef DEF_SYNC_BUILTIN_RMW_OFF_ALL_N
+#undef DEF_SYNC_BUILTIN_RMW_OFF_N
 #undef DEF_SYNC_BUILTIN_RMW_ORDER_ALL_N
 #undef DEF_SYNC_BUILTIN_RMW_ORDER_N
 #undef DEF_SYNC_BUILTIN_RMW_ALL_N
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/invalid-atomic-1.c b/gcc/testsuite/gcc.target/aarch64/morello/invalid-atomic-1.c
new file mode 100644
index 00000000000..cf92a8e4921
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/invalid-atomic-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+
+#include <stdatomic.h>
+
+void atomic_check_valid_1(int *__capability *intptrptr, int *intptr)
+{
+  intptr = __atomic_fetch_add(intptrptr, intptr, memory_order_relaxed); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __atomic_fetch_sub(intptrptr, intptr, memory_order_relaxed); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __atomic_add_fetch(intptrptr, intptr, memory_order_relaxed); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __atomic_sub_fetch(intptrptr, intptr, memory_order_relaxed); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __atomic_fetch_and(intptrptr, intptr, memory_order_relaxed); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __atomic_fetch_or(intptrptr, intptr, memory_order_relaxed); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __atomic_fetch_xor(intptrptr, intptr, memory_order_relaxed); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __atomic_fetch_nand(intptrptr, intptr, memory_order_relaxed); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __atomic_and_fetch(intptrptr, intptr, memory_order_relaxed); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __atomic_or_fetch(intptrptr, intptr, memory_order_relaxed); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __atomic_xor_fetch(intptrptr, intptr, memory_order_relaxed); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __atomic_nand_fetch(intptrptr, intptr, memory_order_relaxed); /* { dg-warning "integer from pointer without a cast" } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/invalid-sync-1.c b/gcc/testsuite/gcc.target/aarch64/morello/invalid-sync-1.c
new file mode 100644
index 00000000000..d9acbe5fe0b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/invalid-sync-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+
+void atomic_check_valid_1(int *__capability *intptrptr, int *intptr) {
+  intptr = __sync_fetch_and_add(intptrptr, intptr); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __sync_fetch_and_sub(intptrptr, intptr); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __sync_add_and_fetch(intptrptr, intptr); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __sync_sub_and_fetch(intptrptr, intptr); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __sync_fetch_and_and(intptrptr, intptr); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __sync_fetch_and_or(intptrptr, intptr); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __sync_fetch_and_xor(intptrptr, intptr); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __sync_fetch_and_nand(intptrptr, intptr); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __sync_and_and_fetch(intptrptr, intptr); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __sync_or_and_fetch(intptrptr, intptr); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __sync_xor_and_fetch(intptrptr, intptr); /* { dg-warning "integer from pointer without a cast" } */
+  intptr = __sync_nand_and_fetch(intptrptr, intptr); /* { dg-warning "integer from pointer without a cast" } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-1.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-1.c
new file mode 100644
index 00000000000..2b42a5ae917
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-1.c
@@ -0,0 +1,461 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-foptimize-sibling-calls -save-temps" } */
+/* { dg-final { check-function-bodies "**" ""  { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mfake-capability" } { "" } }  */
+
+#include <stdint.h>
+
+typedef __uint128_t uint128;
+typedef __intcap intcap;
+
+#define TEST_OPERATION(TYPE, OPERATION)					\
+  TYPE									\
+  test_##TYPE##_fetch_and_##OPERATION (TYPE *__capability ptr, TYPE val) \
+  {									\
+    return __atomic_fetch_##OPERATION ((TYPE *) ptr, val,		\
+				       __ATOMIC_RELAXED);		\
+  }									\
+									\
+  TYPE									\
+  test_##TYPE##_##OPERATION##_and_fetch (TYPE *__capability ptr, TYPE val) \
+  {									\
+    return __atomic_##OPERATION##_fetch ((TYPE *) ptr, val,		\
+					 __ATOMIC_RELAXED);		\
+  }
+
+#define TEST_SIZE(TYPE)				\
+  TEST_OPERATION (TYPE, add)			\
+  TEST_OPERATION (TYPE, sub)			\
+  TEST_OPERATION (TYPE, and)			\
+  TEST_OPERATION (TYPE, nand)			\
+  TEST_OPERATION (TYPE, or)			\
+  TEST_OPERATION (TYPE, xor)
+
+/*
+** test_uint8_t_fetch_and_add:
+**	...
+**	ldaddb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_sub:
+**	...
+**	ldaddb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_and:
+**	...
+**	ldclrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_nand:
+**	...
+**	ldxrb	.*
+**	...
+**	stxrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_or:
+**	...
+**	ldsetb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_xor:
+**	...
+**	ldeorb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_add_and_fetch:
+**	...
+**	ldaddb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_sub_and_fetch:
+**	...
+**	ldaddb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_and_and_fetch:
+**	...
+**	ldclrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_nand_and_fetch:
+**	...
+**	ldxrb	.*
+**	...
+**	stxrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_or_and_fetch:
+**	...
+**	ldsetb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_xor_and_fetch:
+**	...
+**	ldeorb	.*
+**	...
+*/
+TEST_SIZE (uint8_t)
+
+/*
+** test_uint16_t_fetch_and_add:
+**	...
+**	ldaddh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_sub:
+**	...
+**	ldaddh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_and:
+**	...
+**	ldclrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_nand:
+**	...
+**	ldxrh	.*
+**	...
+**	stxrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_or:
+**	...
+**	ldseth	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_xor:
+**	...
+**	ldeorh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_add_and_fetch:
+**	...
+**	ldaddh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_sub_and_fetch:
+**	...
+**	ldaddh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_and_and_fetch:
+**	...
+**	ldclrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_nand_and_fetch:
+**	...
+**	ldxrh	.*
+**	...
+**	stxrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_or_and_fetch:
+**	...
+**	ldseth	.*
+**	...
+*/
+
+/*
+** test_uint16_t_xor_and_fetch:
+**	...
+**	ldeorh	.*
+**	...
+*/
+TEST_SIZE (uint16_t)
+
+/*
+** test_uint32_t_fetch_and_add:
+**	...
+**	ldadd	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_sub:
+**	...
+**	ldadd	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_and:
+**	...
+**	ldclr	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_nand:
+**	...
+**	ldxr	w[0-9]+, .*
+**	...
+**	stxr	w[0-9]+, w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_or:
+**	...
+**	ldset	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_xor:
+**	...
+**	ldeor	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_add_and_fetch:
+**	...
+**	ldadd	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_sub_and_fetch:
+**	...
+**	ldadd	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_and_and_fetch:
+**	...
+**	ldclr	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_nand_and_fetch:
+**	...
+**	ldxr	w[0-9]+, .*
+**	...
+**	stxr	w[0-9]+, w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_or_and_fetch:
+**	...
+**	ldset	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_xor_and_fetch:
+**	...
+**	ldeor	w[0-9]+, .*
+**	...
+*/
+TEST_SIZE (uint32_t)
+
+/*
+** test_uint64_t_fetch_and_add:
+**	...
+**	ldadd	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_sub:
+**	...
+**	ldadd	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_and:
+**	...
+**	ldclr	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_nand:
+**	...
+**	ldxr	x[0-9]+, .*
+**	...
+**	stxr	w[0-9]+, x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_or:
+**	...
+**	ldset	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_xor:
+**	...
+**	ldeor	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_add_and_fetch:
+**	...
+**	ldadd	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_sub_and_fetch:
+**	...
+**	ldadd	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_and_and_fetch:
+**	...
+**	ldclr	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_nand_and_fetch:
+**	...
+**	ldxr	x[0-9]+, .*
+**	...
+**	stxr	w[0-9]+, x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_or_and_fetch:
+**	...
+**	ldset	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_xor_and_fetch:
+**	...
+**	ldeor	x[0-9]+, .*
+**	...
+*/
+TEST_SIZE (uint64_t)
+
+/* Must compile, but don't match the result.  */
+TEST_SIZE (uint128)
+
+/*
+** test_intcap_fetch_and_add:
+**	...
+**	ldxr	(c[0-9]+), \[([xc][0-9]+)\]
+**	add	(c[0-9]+), \1, x[0-9]+
+**	stxr	(w[0-9]+), \3, \[\2\]
+**	cbnz	\4, .*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_sub:
+**	...
+**	ldxr	.*
+**	sub	.*
+**	scvalue	.*
+**	stxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_and:
+**	...
+**	ldxr	.*
+**	and	.*
+**	scvalue	.*
+**	stxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_nand:
+**	...
+**	ldxr	.*
+**	.*
+**	stxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_or:
+**	...
+**	ldxr	.*
+**	orr	.*
+**	scvalue	.*
+**	stxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_xor:
+**	...
+**	ldxr	.*
+**	eor	.*
+**	scvalue	.*
+**	stxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/* Don't match the rest since they currently include a redundant final
+   operation.  */
+TEST_SIZE (intcap)
+
+/* { dg-final { scan-assembler-not {\tdmb\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-2.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-2.c
new file mode 100644
index 00000000000..5312e915c0e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-2.c
@@ -0,0 +1,461 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-foptimize-sibling-calls -save-temps" } */
+/* { dg-final { check-function-bodies "**" ""  { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mfake-capability" } { "" } }  */
+
+#include <stdint.h>
+
+typedef __uint128_t uint128;
+typedef __intcap intcap;
+
+#define TEST_OPERATION(TYPE, OPERATION)					\
+  TYPE									\
+  test_##TYPE##_fetch_and_##OPERATION (TYPE *__capability ptr, TYPE val) \
+  {									\
+    return __atomic_fetch_##OPERATION ((TYPE *) ptr, val,		\
+				       __ATOMIC_CONSUME);		\
+  }									\
+									\
+  TYPE									\
+  test_##TYPE##_##OPERATION##_and_fetch (TYPE *__capability ptr, TYPE val) \
+  {									\
+    return __atomic_##OPERATION##_fetch ((TYPE *) ptr, val,		\
+					 __ATOMIC_CONSUME);		\
+  }
+
+#define TEST_SIZE(TYPE)				\
+  TEST_OPERATION (TYPE, add)			\
+  TEST_OPERATION (TYPE, sub)			\
+  TEST_OPERATION (TYPE, and)			\
+  TEST_OPERATION (TYPE, nand)			\
+  TEST_OPERATION (TYPE, or)			\
+  TEST_OPERATION (TYPE, xor)
+
+/*
+** test_uint8_t_fetch_and_add:
+**	...
+**	ldaddab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_sub:
+**	...
+**	ldaddab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_and:
+**	...
+**	ldclrab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_nand:
+**	...
+**	ldaxrb	.*
+**	...
+**	stxrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_or:
+**	...
+**	ldsetab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_xor:
+**	...
+**	ldeorab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_add_and_fetch:
+**	...
+**	ldaddab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_sub_and_fetch:
+**	...
+**	ldaddab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_and_and_fetch:
+**	...
+**	ldclrab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_nand_and_fetch:
+**	...
+**	ldaxrb	.*
+**	...
+**	stxrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_or_and_fetch:
+**	...
+**	ldsetab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_xor_and_fetch:
+**	...
+**	ldeorab	.*
+**	...
+*/
+TEST_SIZE (uint8_t)
+
+/*
+** test_uint16_t_fetch_and_add:
+**	...
+**	ldaddah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_sub:
+**	...
+**	ldaddah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_and:
+**	...
+**	ldclrah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_nand:
+**	...
+**	ldaxrh	.*
+**	...
+**	stxrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_or:
+**	...
+**	ldsetah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_xor:
+**	...
+**	ldeorah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_add_and_fetch:
+**	...
+**	ldaddah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_sub_and_fetch:
+**	...
+**	ldaddah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_and_and_fetch:
+**	...
+**	ldclrah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_nand_and_fetch:
+**	...
+**	ldaxrh	.*
+**	...
+**	stxrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_or_and_fetch:
+**	...
+**	ldsetah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_xor_and_fetch:
+**	...
+**	ldeorah	.*
+**	...
+*/
+TEST_SIZE (uint16_t)
+
+/*
+** test_uint32_t_fetch_and_add:
+**	...
+**	ldadda	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_sub:
+**	...
+**	ldadda	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_and:
+**	...
+**	ldclra	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_nand:
+**	...
+**	ldaxr	w[0-9]+, .*
+**	...
+**	stxr	w[0-9]+, w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_or:
+**	...
+**	ldseta	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_xor:
+**	...
+**	ldeora	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_add_and_fetch:
+**	...
+**	ldadda	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_sub_and_fetch:
+**	...
+**	ldadda	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_and_and_fetch:
+**	...
+**	ldclra	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_nand_and_fetch:
+**	...
+**	ldaxr	w[0-9]+, .*
+**	...
+**	stxr	w[0-9]+, w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_or_and_fetch:
+**	...
+**	ldseta	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_xor_and_fetch:
+**	...
+**	ldeora	w[0-9]+, .*
+**	...
+*/
+TEST_SIZE (uint32_t)
+
+/*
+** test_uint64_t_fetch_and_add:
+**	...
+**	ldadda	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_sub:
+**	...
+**	ldadda	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_and:
+**	...
+**	ldclra	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_nand:
+**	...
+**	ldaxr	x[0-9]+, .*
+**	...
+**	stxr	w[0-9]+, x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_or:
+**	...
+**	ldseta	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_xor:
+**	...
+**	ldeora	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_add_and_fetch:
+**	...
+**	ldadda	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_sub_and_fetch:
+**	...
+**	ldadda	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_and_and_fetch:
+**	...
+**	ldclra	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_nand_and_fetch:
+**	...
+**	ldaxr	x[0-9]+, .*
+**	...
+**	stxr	w[0-9]+, x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_or_and_fetch:
+**	...
+**	ldseta	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_xor_and_fetch:
+**	...
+**	ldeora	x[0-9]+, .*
+**	...
+*/
+TEST_SIZE (uint64_t)
+
+/* Must compile, but don't match the result.  */
+TEST_SIZE (uint128)
+
+/*
+** test_intcap_fetch_and_add:
+**	...
+**	ldaxr	(c[0-9]+), \[([xc][0-9]+)\]
+**	add	(c[0-9]+), \1, x[0-9]+
+**	stxr	(w[0-9]+), \3, \[\2\]
+**	cbnz	\4, .*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_sub:
+**	...
+**	ldaxr	.*
+**	sub	.*
+**	scvalue	.*
+**	stxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_and:
+**	...
+**	ldaxr	.*
+**	and	.*
+**	scvalue	.*
+**	stxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_nand:
+**	...
+**	ldaxr	.*
+**	.*
+**	stxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_or:
+**	...
+**	ldaxr	.*
+**	orr	.*
+**	scvalue	.*
+**	stxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_xor:
+**	...
+**	ldaxr	.*
+**	eor	.*
+**	scvalue	.*
+**	stxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/* Don't match the rest since they currently include a redundant final
+   operation.  */
+TEST_SIZE (intcap)
+
+/* { dg-final { scan-assembler-not {\tdmb\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-3.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-3.c
new file mode 100644
index 00000000000..96005e807b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-3.c
@@ -0,0 +1,461 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-foptimize-sibling-calls -save-temps" } */
+/* { dg-final { check-function-bodies "**" ""  { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mfake-capability" } { "" } }  */
+
+#include <stdint.h>
+
+typedef __uint128_t uint128;
+typedef __intcap intcap;
+
+#define TEST_OPERATION(TYPE, OPERATION)					\
+  TYPE									\
+  test_##TYPE##_fetch_and_##OPERATION (TYPE *__capability ptr, TYPE val) \
+  {									\
+    return __atomic_fetch_##OPERATION ((TYPE *) ptr, val,		\
+				       __ATOMIC_ACQUIRE);		\
+  }									\
+									\
+  TYPE									\
+  test_##TYPE##_##OPERATION##_and_fetch (TYPE *__capability ptr, TYPE val) \
+  {									\
+    return __atomic_##OPERATION##_fetch ((TYPE *) ptr, val,		\
+					 __ATOMIC_ACQUIRE);		\
+  }
+
+#define TEST_SIZE(TYPE)				\
+  TEST_OPERATION (TYPE, add)			\
+  TEST_OPERATION (TYPE, sub)			\
+  TEST_OPERATION (TYPE, and)			\
+  TEST_OPERATION (TYPE, nand)			\
+  TEST_OPERATION (TYPE, or)			\
+  TEST_OPERATION (TYPE, xor)
+
+/*
+** test_uint8_t_fetch_and_add:
+**	...
+**	ldaddab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_sub:
+**	...
+**	ldaddab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_and:
+**	...
+**	ldclrab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_nand:
+**	...
+**	ldaxrb	.*
+**	...
+**	stxrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_or:
+**	...
+**	ldsetab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_xor:
+**	...
+**	ldeorab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_add_and_fetch:
+**	...
+**	ldaddab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_sub_and_fetch:
+**	...
+**	ldaddab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_and_and_fetch:
+**	...
+**	ldclrab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_nand_and_fetch:
+**	...
+**	ldaxrb	.*
+**	...
+**	stxrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_or_and_fetch:
+**	...
+**	ldsetab	.*
+**	...
+*/
+
+/*
+** test_uint8_t_xor_and_fetch:
+**	...
+**	ldeorab	.*
+**	...
+*/
+TEST_SIZE (uint8_t)
+
+/*
+** test_uint16_t_fetch_and_add:
+**	...
+**	ldaddah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_sub:
+**	...
+**	ldaddah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_and:
+**	...
+**	ldclrah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_nand:
+**	...
+**	ldaxrh	.*
+**	...
+**	stxrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_or:
+**	...
+**	ldsetah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_xor:
+**	...
+**	ldeorah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_add_and_fetch:
+**	...
+**	ldaddah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_sub_and_fetch:
+**	...
+**	ldaddah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_and_and_fetch:
+**	...
+**	ldclrah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_nand_and_fetch:
+**	...
+**	ldaxrh	.*
+**	...
+**	stxrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_or_and_fetch:
+**	...
+**	ldsetah	.*
+**	...
+*/
+
+/*
+** test_uint16_t_xor_and_fetch:
+**	...
+**	ldeorah	.*
+**	...
+*/
+TEST_SIZE (uint16_t)
+
+/*
+** test_uint32_t_fetch_and_add:
+**	...
+**	ldadda	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_sub:
+**	...
+**	ldadda	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_and:
+**	...
+**	ldclra	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_nand:
+**	...
+**	ldaxr	w[0-9]+, .*
+**	...
+**	stxr	w[0-9]+, w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_or:
+**	...
+**	ldseta	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_xor:
+**	...
+**	ldeora	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_add_and_fetch:
+**	...
+**	ldadda	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_sub_and_fetch:
+**	...
+**	ldadda	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_and_and_fetch:
+**	...
+**	ldclra	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_nand_and_fetch:
+**	...
+**	ldaxr	w[0-9]+, .*
+**	...
+**	stxr	w[0-9]+, w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_or_and_fetch:
+**	...
+**	ldseta	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_xor_and_fetch:
+**	...
+**	ldeora	w[0-9]+, .*
+**	...
+*/
+TEST_SIZE (uint32_t)
+
+/*
+** test_uint64_t_fetch_and_add:
+**	...
+**	ldadda	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_sub:
+**	...
+**	ldadda	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_and:
+**	...
+**	ldclra	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_nand:
+**	...
+**	ldaxr	x[0-9]+, .*
+**	...
+**	stxr	w[0-9]+, x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_or:
+**	...
+**	ldseta	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_xor:
+**	...
+**	ldeora	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_add_and_fetch:
+**	...
+**	ldadda	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_sub_and_fetch:
+**	...
+**	ldadda	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_and_and_fetch:
+**	...
+**	ldclra	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_nand_and_fetch:
+**	...
+**	ldaxr	x[0-9]+, .*
+**	...
+**	stxr	w[0-9]+, x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_or_and_fetch:
+**	...
+**	ldseta	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_xor_and_fetch:
+**	...
+**	ldeora	x[0-9]+, .*
+**	...
+*/
+TEST_SIZE (uint64_t)
+
+/* Must compile, but don't match the result.  */
+TEST_SIZE (uint128)
+
+/*
+** test_intcap_fetch_and_add:
+**	...
+**	ldaxr	(c[0-9]+), \[([xc][0-9]+)\]
+**	add	(c[0-9]+), \1, x[0-9]+
+**	stxr	(w[0-9]+), \3, \[\2\]
+**	cbnz	\4, .*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_sub:
+**	...
+**	ldaxr	.*
+**	sub	.*
+**	scvalue	.*
+**	stxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_and:
+**	...
+**	ldaxr	.*
+**	and	.*
+**	scvalue	.*
+**	stxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_nand:
+**	...
+**	ldaxr	.*
+**	.*
+**	stxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_or:
+**	...
+**	ldaxr	.*
+**	orr	.*
+**	scvalue	.*
+**	stxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_xor:
+**	...
+**	ldaxr	.*
+**	eor	.*
+**	scvalue	.*
+**	stxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/* Don't match the rest since they currently include a redundant final
+   operation.  */
+TEST_SIZE (intcap)
+
+/* { dg-final { scan-assembler-not {\tdmb\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-4.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-4.c
new file mode 100644
index 00000000000..6f872881b7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-4.c
@@ -0,0 +1,463 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-foptimize-sibling-calls -save-temps" } */
+/* { dg-final { check-function-bodies "**" ""  { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mfake-capability" } { "" } }  */
+
+#include <stdint.h>
+
+typedef __uint128_t uint128;
+typedef __intcap intcap;
+
+#define TEST_OPERATION(TYPE, OPERATION)					\
+  TYPE									\
+  test_##TYPE##_fetch_and_##OPERATION (TYPE *__capability ptr, TYPE val) \
+  {									\
+    return __atomic_fetch_##OPERATION ((TYPE *) ptr, val,		\
+				       __ATOMIC_RELEASE);		\
+  }									\
+									\
+  TYPE									\
+  test_##TYPE##_##OPERATION##_and_fetch (TYPE *__capability ptr, TYPE val) \
+  {									\
+    return __atomic_##OPERATION##_fetch ((TYPE *) ptr, val,		\
+					 __ATOMIC_RELEASE);		\
+  }
+
+#define TEST_SIZE(TYPE)				\
+  TEST_OPERATION (TYPE, add)			\
+  TEST_OPERATION (TYPE, sub)			\
+  TEST_OPERATION (TYPE, and)			\
+  TEST_OPERATION (TYPE, nand)			\
+  TEST_OPERATION (TYPE, or)			\
+  TEST_OPERATION (TYPE, xor)
+
+/*
+** test_uint8_t_fetch_and_add:
+**	...
+**	ldaddlb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_sub:
+**	...
+**	ldaddlb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_and:
+**	...
+**	ldclrlb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_nand:
+**	...
+**	ldxrb	.*
+**	...
+**	stlxrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_or:
+**	...
+**	ldsetlb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_xor:
+**	...
+**	ldeorlb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_add_and_fetch:
+**	...
+**	ldaddlb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_sub_and_fetch:
+**	...
+**	ldaddlb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_and_and_fetch:
+**	...
+**	ldclrlb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_nand_and_fetch:
+**	...
+**	ldxrb	.*
+**	...
+**	stlxrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_or_and_fetch:
+**	...
+**	ldsetlb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_xor_and_fetch:
+**	...
+**	ldeorlb	.*
+**	...
+*/
+TEST_SIZE (uint8_t)
+
+/*
+** test_uint16_t_fetch_and_add:
+**	...
+**	ldaddlh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_sub:
+**	...
+**	ldaddlh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_and:
+**	...
+**	ldclrlh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_nand:
+**	...
+**	ldxrh	.*
+**	...
+**	stlxrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_or:
+**	...
+**	ldsetlh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_xor:
+**	...
+**	ldeorlh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_add_and_fetch:
+**	...
+**	ldaddlh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_sub_and_fetch:
+**	...
+**	ldaddlh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_and_and_fetch:
+**	...
+**	ldclrlh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_nand_and_fetch:
+**	...
+**	ldxrh	.*
+**	...
+**	stlxrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_or_and_fetch:
+**	...
+**	ldsetlh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_xor_and_fetch:
+**	...
+**	ldeorlh	.*
+**	...
+*/
+TEST_SIZE (uint16_t)
+
+/*
+** test_uint32_t_fetch_and_add:
+**	...
+**	ldaddl	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_sub:
+**	...
+**	ldaddl	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_and:
+**	...
+**	ldclrl	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_nand:
+**	...
+**	ldxr	w[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_or:
+**	...
+**	ldsetl	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_xor:
+**	...
+**	ldeorl	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_add_and_fetch:
+**	...
+**	ldaddl	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_sub_and_fetch:
+**	...
+**	ldaddl	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_and_and_fetch:
+**	...
+**	ldclrl	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_nand_and_fetch:
+**	...
+**	ldxr	w[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_or_and_fetch:
+**	...
+**	ldsetl	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_xor_and_fetch:
+**	...
+**	ldeorl	w[0-9]+, .*
+**	...
+*/
+TEST_SIZE (uint32_t)
+
+/*
+** test_uint64_t_fetch_and_add:
+**	...
+**	ldaddl	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_sub:
+**	...
+**	ldaddl	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_and:
+**	...
+**	ldclrl	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_nand:
+**	...
+**	ldxr	x[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_or:
+**	...
+**	ldsetl	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_xor:
+**	...
+**	ldeorl	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_add_and_fetch:
+**	...
+**	ldaddl	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_sub_and_fetch:
+**	...
+**	ldaddl	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_and_and_fetch:
+**	...
+**	ldclrl	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_nand_and_fetch:
+**	...
+**	ldxr	x[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_or_and_fetch:
+**	...
+**	ldsetl	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_xor_and_fetch:
+**	...
+**	ldeorl	x[0-9]+, .*
+**	...
+*/
+TEST_SIZE (uint64_t)
+
+/* Must compile, but don't match the result.  */
+TEST_SIZE (uint128)
+
+/*
+** test_intcap_fetch_and_add:
+**	...
+**	ldxr	(c[0-9]+), \[([xc][0-9]+)\]
+**	add	(c[0-9]+), \1, x[0-9]+
+**	stlxr	(w[0-9]+), \3, \[\2\]
+**	cbnz	\4, .*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_sub:
+**	...
+**	ldxr	.*
+**	sub	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_and:
+**	...
+**	ldxr	.*
+**	and	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_nand:
+**	...
+**	ldxr	.*
+**	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_or:
+**	...
+**	ldxr	.*
+**	orr	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_xor:
+**	...
+**	ldxr	.*
+**	eor	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/* Don't match the rest since they currently include a redundant final
+   operation.  */
+TEST_SIZE (intcap)
+
+/* { dg-final { scan-assembler-not {\tdmb\t} } } */
+
+/* { dg-final { scan-assembler-not {\tdmb\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-5.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-5.c
new file mode 100644
index 00000000000..aa54e09062e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-5.c
@@ -0,0 +1,461 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-foptimize-sibling-calls -save-temps" } */
+/* { dg-final { check-function-bodies "**" ""  { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mfake-capability" } { "" } }  */
+
+#include <stdint.h>
+
+typedef __uint128_t uint128;
+typedef __intcap intcap;
+
+#define TEST_OPERATION(TYPE, OPERATION)					\
+  TYPE									\
+  test_##TYPE##_fetch_and_##OPERATION (TYPE *__capability ptr, TYPE val) \
+  {									\
+    return __atomic_fetch_##OPERATION ((TYPE *) ptr, val,		\
+				       __ATOMIC_ACQ_REL);		\
+  }									\
+									\
+  TYPE									\
+  test_##TYPE##_##OPERATION##_and_fetch (TYPE *__capability ptr, TYPE val) \
+  {									\
+    return __atomic_##OPERATION##_fetch ((TYPE *) ptr, val,		\
+					 __ATOMIC_ACQ_REL);		\
+  }
+
+#define TEST_SIZE(TYPE)				\
+  TEST_OPERATION (TYPE, add)			\
+  TEST_OPERATION (TYPE, sub)			\
+  TEST_OPERATION (TYPE, and)			\
+  TEST_OPERATION (TYPE, nand)			\
+  TEST_OPERATION (TYPE, or)			\
+  TEST_OPERATION (TYPE, xor)
+
+/*
+** test_uint8_t_fetch_and_add:
+**	...
+**	ldaddalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_sub:
+**	...
+**	ldaddalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_and:
+**	...
+**	ldclralb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_nand:
+**	...
+**	ldaxrb	.*
+**	...
+**	stlxrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_or:
+**	...
+**	ldsetalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_xor:
+**	...
+**	ldeoralb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_add_and_fetch:
+**	...
+**	ldaddalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_sub_and_fetch:
+**	...
+**	ldaddalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_and_and_fetch:
+**	...
+**	ldclralb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_nand_and_fetch:
+**	...
+**	ldaxrb	.*
+**	...
+**	stlxrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_or_and_fetch:
+**	...
+**	ldsetalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_xor_and_fetch:
+**	...
+**	ldeoralb	.*
+**	...
+*/
+TEST_SIZE (uint8_t)
+
+/*
+** test_uint16_t_fetch_and_add:
+**	...
+**	ldaddalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_sub:
+**	...
+**	ldaddalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_and:
+**	...
+**	ldclralh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_nand:
+**	...
+**	ldaxrh	.*
+**	...
+**	stlxrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_or:
+**	...
+**	ldsetalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_xor:
+**	...
+**	ldeoralh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_add_and_fetch:
+**	...
+**	ldaddalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_sub_and_fetch:
+**	...
+**	ldaddalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_and_and_fetch:
+**	...
+**	ldclralh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_nand_and_fetch:
+**	...
+**	ldaxrh	.*
+**	...
+**	stlxrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_or_and_fetch:
+**	...
+**	ldsetalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_xor_and_fetch:
+**	...
+**	ldeoralh	.*
+**	...
+*/
+TEST_SIZE (uint16_t)
+
+/*
+** test_uint32_t_fetch_and_add:
+**	...
+**	ldaddal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_sub:
+**	...
+**	ldaddal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_and:
+**	...
+**	ldclral	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_nand:
+**	...
+**	ldaxr	w[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_or:
+**	...
+**	ldsetal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_xor:
+**	...
+**	ldeoral	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_add_and_fetch:
+**	...
+**	ldaddal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_sub_and_fetch:
+**	...
+**	ldaddal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_and_and_fetch:
+**	...
+**	ldclral	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_nand_and_fetch:
+**	...
+**	ldaxr	w[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_or_and_fetch:
+**	...
+**	ldsetal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_xor_and_fetch:
+**	...
+**	ldeoral	w[0-9]+, .*
+**	...
+*/
+TEST_SIZE (uint32_t)
+
+/*
+** test_uint64_t_fetch_and_add:
+**	...
+**	ldaddal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_sub:
+**	...
+**	ldaddal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_and:
+**	...
+**	ldclral	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_nand:
+**	...
+**	ldaxr	x[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_or:
+**	...
+**	ldsetal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_xor:
+**	...
+**	ldeoral	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_add_and_fetch:
+**	...
+**	ldaddal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_sub_and_fetch:
+**	...
+**	ldaddal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_and_and_fetch:
+**	...
+**	ldclral	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_nand_and_fetch:
+**	...
+**	ldaxr	x[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_or_and_fetch:
+**	...
+**	ldsetal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_xor_and_fetch:
+**	...
+**	ldeoral	x[0-9]+, .*
+**	...
+*/
+TEST_SIZE (uint64_t)
+
+/* Must compile, but don't match the result.  */
+TEST_SIZE (uint128)
+
+/*
+** test_intcap_fetch_and_add:
+**	...
+**	ldaxr	(c[0-9]+), \[([xc][0-9]+)\]
+**	add	(c[0-9]+), \1, x[0-9]+
+**	stlxr	(w[0-9]+), \3, \[\2\]
+**	cbnz	\4, .*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_sub:
+**	...
+**	ldaxr	.*
+**	sub	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_and:
+**	...
+**	ldaxr	.*
+**	and	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_nand:
+**	...
+**	ldaxr	.*
+**	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_or:
+**	...
+**	ldaxr	.*
+**	orr	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_xor:
+**	...
+**	ldaxr	.*
+**	eor	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/* Don't match the rest since they currently include a redundant final
+   operation.  */
+TEST_SIZE (intcap)
+
+/* { dg-final { scan-assembler-not {\tdmb\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-6.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-6.c
new file mode 100644
index 00000000000..da76285c128
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-atomic-operation-6.c
@@ -0,0 +1,461 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-foptimize-sibling-calls -save-temps" } */
+/* { dg-final { check-function-bodies "**" ""  { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mfake-capability" } { "" } }  */
+
+#include <stdint.h>
+
+typedef __uint128_t uint128;
+typedef __intcap intcap;
+
+#define TEST_OPERATION(TYPE, OPERATION)					\
+  TYPE									\
+  test_##TYPE##_fetch_and_##OPERATION (TYPE *__capability ptr, TYPE val) \
+  {									\
+    return __atomic_fetch_##OPERATION ((TYPE *) ptr, val,		\
+				       __ATOMIC_SEQ_CST);		\
+  }									\
+									\
+  TYPE									\
+  test_##TYPE##_##OPERATION##_and_fetch (TYPE *__capability ptr, TYPE val) \
+  {									\
+    return __atomic_##OPERATION##_fetch ((TYPE *) ptr, val,		\
+					 __ATOMIC_SEQ_CST);		\
+  }
+
+#define TEST_SIZE(TYPE)				\
+  TEST_OPERATION (TYPE, add)			\
+  TEST_OPERATION (TYPE, sub)			\
+  TEST_OPERATION (TYPE, and)			\
+  TEST_OPERATION (TYPE, nand)			\
+  TEST_OPERATION (TYPE, or)			\
+  TEST_OPERATION (TYPE, xor)
+
+/*
+** test_uint8_t_fetch_and_add:
+**	...
+**	ldaddalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_sub:
+**	...
+**	ldaddalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_and:
+**	...
+**	ldclralb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_nand:
+**	...
+**	ldaxrb	.*
+**	...
+**	stlxrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_or:
+**	...
+**	ldsetalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_xor:
+**	...
+**	ldeoralb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_add_and_fetch:
+**	...
+**	ldaddalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_sub_and_fetch:
+**	...
+**	ldaddalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_and_and_fetch:
+**	...
+**	ldclralb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_nand_and_fetch:
+**	...
+**	ldaxrb	.*
+**	...
+**	stlxrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_or_and_fetch:
+**	...
+**	ldsetalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_xor_and_fetch:
+**	...
+**	ldeoralb	.*
+**	...
+*/
+TEST_SIZE (uint8_t)
+
+/*
+** test_uint16_t_fetch_and_add:
+**	...
+**	ldaddalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_sub:
+**	...
+**	ldaddalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_and:
+**	...
+**	ldclralh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_nand:
+**	...
+**	ldaxrh	.*
+**	...
+**	stlxrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_or:
+**	...
+**	ldsetalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_xor:
+**	...
+**	ldeoralh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_add_and_fetch:
+**	...
+**	ldaddalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_sub_and_fetch:
+**	...
+**	ldaddalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_and_and_fetch:
+**	...
+**	ldclralh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_nand_and_fetch:
+**	...
+**	ldaxrh	.*
+**	...
+**	stlxrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_or_and_fetch:
+**	...
+**	ldsetalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_xor_and_fetch:
+**	...
+**	ldeoralh	.*
+**	...
+*/
+TEST_SIZE (uint16_t)
+
+/*
+** test_uint32_t_fetch_and_add:
+**	...
+**	ldaddal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_sub:
+**	...
+**	ldaddal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_and:
+**	...
+**	ldclral	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_nand:
+**	...
+**	ldaxr	w[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_or:
+**	...
+**	ldsetal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_xor:
+**	...
+**	ldeoral	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_add_and_fetch:
+**	...
+**	ldaddal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_sub_and_fetch:
+**	...
+**	ldaddal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_and_and_fetch:
+**	...
+**	ldclral	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_nand_and_fetch:
+**	...
+**	ldaxr	w[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_or_and_fetch:
+**	...
+**	ldsetal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_xor_and_fetch:
+**	...
+**	ldeoral	w[0-9]+, .*
+**	...
+*/
+TEST_SIZE (uint32_t)
+
+/*
+** test_uint64_t_fetch_and_add:
+**	...
+**	ldaddal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_sub:
+**	...
+**	ldaddal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_and:
+**	...
+**	ldclral	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_nand:
+**	...
+**	ldaxr	x[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_or:
+**	...
+**	ldsetal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_xor:
+**	...
+**	ldeoral	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_add_and_fetch:
+**	...
+**	ldaddal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_sub_and_fetch:
+**	...
+**	ldaddal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_and_and_fetch:
+**	...
+**	ldclral	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_nand_and_fetch:
+**	...
+**	ldaxr	x[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_or_and_fetch:
+**	...
+**	ldsetal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_xor_and_fetch:
+**	...
+**	ldeoral	x[0-9]+, .*
+**	...
+*/
+TEST_SIZE (uint64_t)
+
+/* Must compile, but don't match the result.  */
+TEST_SIZE (uint128)
+
+/*
+** test_intcap_fetch_and_add:
+**	...
+**	ldaxr	(c[0-9]+), \[([xc][0-9]+)\]
+**	add	(c[0-9]+), \1, x[0-9]+
+**	stlxr	(w[0-9]+), \3, \[\2\]
+**	cbnz	\4, .*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_sub:
+**	...
+**	ldaxr	.*
+**	sub	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_and:
+**	...
+**	ldaxr	.*
+**	and	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_nand:
+**	...
+**	ldaxr	.*
+**	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_or:
+**	...
+**	ldaxr	.*
+**	orr	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_xor:
+**	...
+**	ldaxr	.*
+**	eor	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/* Don't match the rest since they currently include a redundant final
+   operation.  */
+TEST_SIZE (intcap)
+
+/* { dg-final { scan-assembler-not {\tdmb\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-sync-operation-1.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-sync-operation-1.c
new file mode 100644
index 00000000000..1c93ec89b19
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-sync-operation-1.c
@@ -0,0 +1,462 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-foptimize-sibling-calls -save-temps" } */
+/* { dg-final { check-function-bodies "**" ""  { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mfake-capability" } { "" } }  */
+
+#include <stdint.h>
+
+typedef __uint128_t uint128;
+typedef __intcap intcap;
+
+#define TEST_OPERATION(TYPE, OPERATION)				\
+  TYPE									\
+  test_##TYPE##_fetch_and_##OPERATION (TYPE *__capability ptr, TYPE val) \
+  {									\
+    return __sync_fetch_and_##OPERATION ((TYPE *) ptr, val);		\
+  }									\
+									\
+  TYPE									\
+  test_##TYPE##_##OPERATION##_and_fetch (TYPE *__capability ptr, TYPE val) \
+  {									\
+    return __sync_##OPERATION##_and_fetch ((TYPE *) ptr, val);		\
+  }
+
+#define TEST_SIZE(TYPE)				\
+  TEST_OPERATION (TYPE, add)			\
+  TEST_OPERATION (TYPE, sub)			\
+  TEST_OPERATION (TYPE, and)			\
+  TEST_OPERATION (TYPE, nand)			\
+  TEST_OPERATION (TYPE, or)			\
+  TEST_OPERATION (TYPE, xor)
+
+/*
+** test_uint8_t_fetch_and_add:
+**	...
+**	ldaddalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_sub:
+**	...
+**	ldaddalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_and:
+**	...
+**	ldclralb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_nand:
+**	...
+**	ldxrb	.*
+**	...
+**	stlxrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_or:
+**	...
+**	ldsetalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_fetch_and_xor:
+**	...
+**	ldeoralb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_add_and_fetch:
+**	...
+**	ldaddalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_sub_and_fetch:
+**	...
+**	ldaddalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_and_and_fetch:
+**	...
+**	ldclralb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_nand_and_fetch:
+**	...
+**	ldxrb	.*
+**	...
+**	stlxrb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_or_and_fetch:
+**	...
+**	ldsetalb	.*
+**	...
+*/
+
+/*
+** test_uint8_t_xor_and_fetch:
+**	...
+**	ldeoralb	.*
+**	...
+*/
+TEST_SIZE (uint8_t)
+
+/*
+** test_uint16_t_fetch_and_add:
+**	...
+**	ldaddalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_sub:
+**	...
+**	ldaddalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_and:
+**	...
+**	ldclralh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_nand:
+**	...
+**	ldxrh	.*
+**	...
+**	stlxrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_or:
+**	...
+**	ldsetalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_fetch_and_xor:
+**	...
+**	ldeoralh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_add_and_fetch:
+**	...
+**	ldaddalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_sub_and_fetch:
+**	...
+**	ldaddalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_and_and_fetch:
+**	...
+**	ldclralh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_nand_and_fetch:
+**	...
+**	ldxrh	.*
+**	...
+**	stlxrh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_or_and_fetch:
+**	...
+**	ldsetalh	.*
+**	...
+*/
+
+/*
+** test_uint16_t_xor_and_fetch:
+**	...
+**	ldeoralh	.*
+**	...
+*/
+TEST_SIZE (uint16_t)
+
+/*
+** test_uint32_t_fetch_and_add:
+**	...
+**	ldaddal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_sub:
+**	...
+**	ldaddal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_and:
+**	...
+**	ldclral	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_nand:
+**	...
+**	ldxr	w[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_or:
+**	...
+**	ldsetal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_fetch_and_xor:
+**	...
+**	ldeoral	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_add_and_fetch:
+**	...
+**	ldaddal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_sub_and_fetch:
+**	...
+**	ldaddal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_and_and_fetch:
+**	...
+**	ldclral	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_nand_and_fetch:
+**	...
+**	ldxr	w[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_or_and_fetch:
+**	...
+**	ldsetal	w[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint32_t_xor_and_fetch:
+**	...
+**	ldeoral	w[0-9]+, .*
+**	...
+*/
+TEST_SIZE (uint32_t)
+
+/*
+** test_uint64_t_fetch_and_add:
+**	...
+**	ldaddal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_sub:
+**	...
+**	ldaddal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_and:
+**	...
+**	ldclral	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_nand:
+**	...
+**	ldxr	x[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_or:
+**	...
+**	ldsetal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_fetch_and_xor:
+**	...
+**	ldeoral	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_add_and_fetch:
+**	...
+**	ldaddal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_sub_and_fetch:
+**	...
+**	ldaddal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_and_and_fetch:
+**	...
+**	ldclral	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_nand_and_fetch:
+**	...
+**	ldxr	x[0-9]+, .*
+**	...
+**	stlxr	w[0-9]+, x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_or_and_fetch:
+**	...
+**	ldsetal	x[0-9]+, .*
+**	...
+*/
+
+/*
+** test_uint64_t_xor_and_fetch:
+**	...
+**	ldeoral	x[0-9]+, .*
+**	...
+*/
+TEST_SIZE (uint64_t)
+
+/* Must compile, but don't match the result.  */
+TEST_SIZE (uint128)
+
+/*
+** test_intcap_fetch_and_add:
+**	...
+**	ldxr	(c[0-9]+), \[([xc][0-9]+)\]
+**	add	(c[0-9]+), \1, x[0-9]+
+**	stlxr	(w[0-9]+), \3, \[\2\]
+**	cbnz	\4, .*
+**	dmb	ish
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_sub:
+**	...
+**	ldxr	.*
+**	sub	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	dmb	ish
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_and:
+**	...
+**	ldxr	.*
+**	and	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	dmb	ish
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_nand:
+**	...
+**	ldxr	.*
+**	.*
+**	stlxr	.*
+**	cbnz	.*
+**	dmb	ish
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_or:
+**	...
+**	ldxr	.*
+**	orr	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	dmb	ish
+**	...
+*/
+
+/*
+** test_intcap_fetch_and_xor:
+**	...
+**	ldxr	.*
+**	eor	.*
+**	scvalue	.*
+**	stlxr	.*
+**	cbnz	.*
+**	...
+*/
+
+/* Don't match the rest since they currently include a redundant final
+   operation.  */
+TEST_SIZE (intcap)


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-05-06 14:43 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-06 14:43 [gcc(refs/vendors/ARM/heads/morello)] Tweak prototypes of __atomic_fetch_*_capability Matthew Malcomson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).