public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/ARM/heads/morello)] expand: Fix underaligned capability loads/stores
@ 2022-08-24 13:29 Alex Coplan
  0 siblings, 0 replies; only message in thread
From: Alex Coplan @ 2022-08-24 13:29 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:232742941eac38182864eb7e79e25d24adb8d0b3

commit 232742941eac38182864eb7e79e25d24adb8d0b3
Author: Alex Coplan <alex.coplan@arm.com>
Date:   Thu Aug 18 15:26:47 2022 +0100

    expand: Fix underaligned capability loads/stores
    
    This patch fixes expansion of underaligned capability loads/stores.
    Prior to this patch, expand would use {store,extract}_bit_field to
    handle underaligned capability loads/stores. This lead to capabilities
    being copied with pairs of x-register loads/stores, which of course
    invalidated those capabilities.
    
    With this patch, we use emit_block_move to handle these instead, which
    will typically result in a call to memcpy at runtime, which can dispatch
    based on the runtime alignment to use capability loads/stores in the
    case the pointer(s) are well-aligned.
    
    This fixes wrong code bugs seen with std::vector<bool> in the libstdc++
    testsuite.
    
    For example, given the following code on purecap Morello:
    
    typedef unsigned __intcap __attribute__((aligned(1))) T;
    unsigned __intcap ld(T *p) { return *p; }
    void st(T *p, unsigned __intcap c) { *p = c; }
    
    prior to the patch, we used to get the following (wrong) code at -O2:
    
    ld:
            ldp     x1, x0, [c0]
            sub     csp, csp, #16
            stp     x1, x0, [csp]
            ldr     c0, [csp]
            add     csp, csp, 16
            ret
    
    st:
            sub     csp, csp, #16
            str     c1, [csp]
            ldr     x1, [csp]
            str     x1, [c0]
            ldr     x1, [csp, 8]
            str     x1, [c0, 8]
            add     csp, csp, 16
            ret
    
    and we now get the following at -O2:
    
    ld:
            stp     c29, c30, [csp, -48]!
            mov     c1, c0
            mov     x2, 16
            mov     c29, csp
            add     c0, csp, 48
            sub     c0, c0, #16
            bl      memcpy
            ldr     c0, [csp, 32]
            ldp     c29, c30, [csp], 48
            ret
    
    st:
            stp     c29, c30, [csp, -48]!
            mov     c4, c1
            mov     x2, 16
            mov     c29, csp
            add     c1, csp, 48
            sub     c1, c1, #16
            str     c4, [csp, 32]
            bl      memcpy
            ldp     c29, c30, [csp], 48
            ret
    
    gcc/ChangeLog:
    
            * expr.c (expand_assignment): Use emit_block_move to handle
            misaligned capability stores.
            (expand_misaligned_mem_ref): Likewise for misaligned capability
            loads.
            * function.c (assign_parm_setup_reg): Add assert that we don't
            need to handle misaligned capability moves here.
    
    gcc/testsuite/ChangeLog:
    
            * g++.target/aarch64/morello/iter-cap-copy.C: New test.
            * gcc.target/aarch64/morello/misaligned-cap-moves.c: New test.

Diff:
---
 gcc/expr.c                                         | 62 ++++++++++++++++++++--
 gcc/function.c                                     |  8 +++
 .../g++.target/aarch64/morello/iter-cap-copy.C     | 31 +++++++++++
 .../aarch64/morello/misaligned-cap-moves.c         | 15 ++++++
 4 files changed, 111 insertions(+), 5 deletions(-)

diff --git a/gcc/expr.c b/gcc/expr.c
index cbb2a29d038..ae41993aec8 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -5292,13 +5292,17 @@ expand_assignment (tree to, tree from, bool nontemporal)
 	   != CODE_FOR_nothing)
 	  || targetm.slow_unaligned_access (mode, align)))
     {
-      rtx reg, mem;
+      rtx reg = NULL_RTX, from_rtx, mem;
 
-      reg = expand_expr (from, NULL_RTX, VOIDmode, EXPAND_NORMAL);
-      reg = force_not_mem (reg);
+      from_rtx = expand_expr (from, NULL_RTX, VOIDmode, EXPAND_NORMAL);
       mem = expand_expr (to, NULL_RTX, VOIDmode, EXPAND_WRITE);
-      if (TREE_CODE (to) == MEM_REF && REF_REVERSE_STORAGE_ORDER (to))
-	reg = flip_storage_order (mode, reg);
+
+      if (icode != CODE_FOR_nothing || !CAPABILITY_MODE_P (mode))
+	{
+	  reg = force_not_mem (from_rtx);
+	  if (TREE_CODE (to) == MEM_REF && REF_REVERSE_STORAGE_ORDER (to))
+	    reg = flip_storage_order (mode, reg);
+	}
 
       if (icode != CODE_FOR_nothing)
 	{
@@ -5310,6 +5314,22 @@ expand_assignment (tree to, tree from, bool nontemporal)
 	     would silently be omitted.  */
 	  expand_insn (icode, 2, ops);
 	}
+      else if (CAPABILITY_MODE_P (mode))
+	{
+	  push_temp_slots ();
+	  if (!MEM_P (from_rtx))
+	    {
+	      gcc_assert (TREE_CODE (to) != MEM_REF
+			  || !REF_REVERSE_STORAGE_ORDER (to));
+	      rtx temp_slot = assign_stack_temp (mode, GET_MODE_SIZE (mode));
+	      emit_move_insn (temp_slot, from_rtx);
+	      from_rtx = temp_slot;
+	    }
+	  emit_block_move (mem, from_rtx,
+			   gen_int_mode (GET_MODE_SIZE (mode), POmode),
+			   BLOCK_OP_NORMAL);
+	  pop_temp_slots ();
+	}
       else
 	store_bit_field (mem, GET_MODE_BITSIZE (mode), 0, 0, 0, mode, reg,
 			 false);
@@ -8791,6 +8811,38 @@ expand_misaligned_mem_ref (rtx temp, machine_mode mode, int unsignedp,
       expand_insn (icode, 2, ops);
       temp = ops[0].value;
     }
+  else if (CAPABILITY_MODE_P (mode))
+    {
+      push_temp_slots ();
+
+      gcc_assert (MEM_P (temp));
+
+      rtx move_dst;
+      if (target
+	  && MEM_P (target)
+	  && MEM_ALIGN (target) >= GET_MODE_ALIGNMENT (mode))
+	move_dst = target;
+      else
+	move_dst = assign_stack_temp (mode, GET_MODE_SIZE (mode));
+
+      emit_block_move (move_dst, temp,
+		       gen_int_mode (GET_MODE_SIZE (mode), POmode),
+		       BLOCK_OP_NORMAL);
+
+      if (target == move_dst || (target && REG_P (target)))
+	{
+	  temp = target;
+	  if (alt_rtl)
+	    *alt_rtl = target;
+	}
+      else
+	temp = gen_reg_rtx (mode);
+
+      if (REG_P (temp))
+	emit_move_insn (temp, move_dst);
+
+      pop_temp_slots ();
+    }
   else if (targetm.slow_unaligned_access (mode, align))
     temp = extract_bit_field (temp, GET_MODE_BITSIZE (mode),
 			      0, 0, 0, unsignedp, target,
diff --git a/gcc/function.c b/gcc/function.c
index f7d5a0a7b91..2fc80ec6642 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -3321,6 +3321,14 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm,
 	       || targetm.slow_unaligned_access (promoted_nominal_mode,
 						 MEM_ALIGN (data->entry_parm))))
     {
+      /* It is believed this code path is unreachable with
+	 promoted_nominal_mode a capability mode.  If it
+	 becomes reachable, then we will need to use
+	 emit_block_move to handle that case, similar to
+	 other cases handling underalgined capability
+	 moves in expr.c.  */
+      gcc_assert (!CAPABILITY_MODE_P (promoted_nominal_mode));
+
       if (icode != CODE_FOR_nothing)
 	emit_insn (GEN_FCN (icode) (parmreg, validated_mem));
       else
diff --git a/gcc/testsuite/g++.target/aarch64/morello/iter-cap-copy.C b/gcc/testsuite/g++.target/aarch64/morello/iter-cap-copy.C
new file mode 100644
index 00000000000..dbb21c6f645
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/morello/iter-cap-copy.C
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* This was getting miscompiled since the C++ FE emitted a
+   block copy for *p = t, then SRA split out an underaligned capability
+   store for this, and expand split that store into pairs of x-register
+   loads/stores, which invalidated the capability.
+
+   Instead, we now use memcpy when expanding the underaligned capability
+   store.  */
+
+struct iter {
+  unsigned long *p;
+  int y;
+  iter() {}
+};
+
+__attribute__((noipa))
+void f(iter *p, unsigned long *q)
+{
+  iter t;
+  t.p = q;
+  *p = t;
+}
+
+int main()
+{
+  iter i;
+  unsigned long x = 0xdeadbeefUL;
+  f(&i, &x);
+  if (*i.p != 0xdeadbeefUL)
+    __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/misaligned-cap-moves.c b/gcc/testsuite/gcc.target/aarch64/morello/misaligned-cap-moves.c
new file mode 100644
index 00000000000..b49051bc630
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/misaligned-cap-moves.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+typedef unsigned __intcap __attribute__((aligned(1))) T;
+
+__attribute__((noipa))
+void copy_cap(T *p) { p[0] = p[1]; }
+
+int main(void)
+{
+  int x = 42;
+  unsigned __intcap arr[2] = { 0, (unsigned __intcap)&x };
+  copy_cap (arr);
+  int *p = (int *)arr[0];
+  if (*p != 42)
+    __builtin_abort ();
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-08-24 13:29 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-24 13:29 [gcc(refs/vendors/ARM/heads/morello)] expand: Fix underaligned capability loads/stores Alex Coplan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).