public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/ARM/heads/morello)] expand: Fix underaligned capability loads/stores
@ 2022-08-24 13:29 Alex Coplan
0 siblings, 0 replies; only message in thread
From: Alex Coplan @ 2022-08-24 13:29 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:232742941eac38182864eb7e79e25d24adb8d0b3
commit 232742941eac38182864eb7e79e25d24adb8d0b3
Author: Alex Coplan <alex.coplan@arm.com>
Date: Thu Aug 18 15:26:47 2022 +0100
expand: Fix underaligned capability loads/stores
This patch fixes expansion of underaligned capability loads/stores.
Prior to this patch, expand would use {store,extract}_bit_field to
handle underaligned capability loads/stores. This lead to capabilities
being copied with pairs of x-register loads/stores, which of course
invalidated those capabilities.
With this patch, we use emit_block_move to handle these instead, which
will typically result in a call to memcpy at runtime, which can dispatch
based on the runtime alignment to use capability loads/stores in the
case the pointer(s) are well-aligned.
This fixes wrong code bugs seen with std::vector<bool> in the libstdc++
testsuite.
For example, given the following code on purecap Morello:
typedef unsigned __intcap __attribute__((aligned(1))) T;
unsigned __intcap ld(T *p) { return *p; }
void st(T *p, unsigned __intcap c) { *p = c; }
prior to the patch, we used to get the following (wrong) code at -O2:
ld:
ldp x1, x0, [c0]
sub csp, csp, #16
stp x1, x0, [csp]
ldr c0, [csp]
add csp, csp, 16
ret
st:
sub csp, csp, #16
str c1, [csp]
ldr x1, [csp]
str x1, [c0]
ldr x1, [csp, 8]
str x1, [c0, 8]
add csp, csp, 16
ret
and we now get the following at -O2:
ld:
stp c29, c30, [csp, -48]!
mov c1, c0
mov x2, 16
mov c29, csp
add c0, csp, 48
sub c0, c0, #16
bl memcpy
ldr c0, [csp, 32]
ldp c29, c30, [csp], 48
ret
st:
stp c29, c30, [csp, -48]!
mov c4, c1
mov x2, 16
mov c29, csp
add c1, csp, 48
sub c1, c1, #16
str c4, [csp, 32]
bl memcpy
ldp c29, c30, [csp], 48
ret
gcc/ChangeLog:
* expr.c (expand_assignment): Use emit_block_move to handle
misaligned capability stores.
(expand_misaligned_mem_ref): Likewise for misaligned capability
loads.
* function.c (assign_parm_setup_reg): Add assert that we don't
need to handle misaligned capability moves here.
gcc/testsuite/ChangeLog:
* g++.target/aarch64/morello/iter-cap-copy.C: New test.
* gcc.target/aarch64/morello/misaligned-cap-moves.c: New test.
Diff:
---
gcc/expr.c | 62 ++++++++++++++++++++--
gcc/function.c | 8 +++
.../g++.target/aarch64/morello/iter-cap-copy.C | 31 +++++++++++
.../aarch64/morello/misaligned-cap-moves.c | 15 ++++++
4 files changed, 111 insertions(+), 5 deletions(-)
diff --git a/gcc/expr.c b/gcc/expr.c
index cbb2a29d038..ae41993aec8 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -5292,13 +5292,17 @@ expand_assignment (tree to, tree from, bool nontemporal)
!= CODE_FOR_nothing)
|| targetm.slow_unaligned_access (mode, align)))
{
- rtx reg, mem;
+ rtx reg = NULL_RTX, from_rtx, mem;
- reg = expand_expr (from, NULL_RTX, VOIDmode, EXPAND_NORMAL);
- reg = force_not_mem (reg);
+ from_rtx = expand_expr (from, NULL_RTX, VOIDmode, EXPAND_NORMAL);
mem = expand_expr (to, NULL_RTX, VOIDmode, EXPAND_WRITE);
- if (TREE_CODE (to) == MEM_REF && REF_REVERSE_STORAGE_ORDER (to))
- reg = flip_storage_order (mode, reg);
+
+ if (icode != CODE_FOR_nothing || !CAPABILITY_MODE_P (mode))
+ {
+ reg = force_not_mem (from_rtx);
+ if (TREE_CODE (to) == MEM_REF && REF_REVERSE_STORAGE_ORDER (to))
+ reg = flip_storage_order (mode, reg);
+ }
if (icode != CODE_FOR_nothing)
{
@@ -5310,6 +5314,22 @@ expand_assignment (tree to, tree from, bool nontemporal)
would silently be omitted. */
expand_insn (icode, 2, ops);
}
+ else if (CAPABILITY_MODE_P (mode))
+ {
+ push_temp_slots ();
+ if (!MEM_P (from_rtx))
+ {
+ gcc_assert (TREE_CODE (to) != MEM_REF
+ || !REF_REVERSE_STORAGE_ORDER (to));
+ rtx temp_slot = assign_stack_temp (mode, GET_MODE_SIZE (mode));
+ emit_move_insn (temp_slot, from_rtx);
+ from_rtx = temp_slot;
+ }
+ emit_block_move (mem, from_rtx,
+ gen_int_mode (GET_MODE_SIZE (mode), POmode),
+ BLOCK_OP_NORMAL);
+ pop_temp_slots ();
+ }
else
store_bit_field (mem, GET_MODE_BITSIZE (mode), 0, 0, 0, mode, reg,
false);
@@ -8791,6 +8811,38 @@ expand_misaligned_mem_ref (rtx temp, machine_mode mode, int unsignedp,
expand_insn (icode, 2, ops);
temp = ops[0].value;
}
+ else if (CAPABILITY_MODE_P (mode))
+ {
+ push_temp_slots ();
+
+ gcc_assert (MEM_P (temp));
+
+ rtx move_dst;
+ if (target
+ && MEM_P (target)
+ && MEM_ALIGN (target) >= GET_MODE_ALIGNMENT (mode))
+ move_dst = target;
+ else
+ move_dst = assign_stack_temp (mode, GET_MODE_SIZE (mode));
+
+ emit_block_move (move_dst, temp,
+ gen_int_mode (GET_MODE_SIZE (mode), POmode),
+ BLOCK_OP_NORMAL);
+
+ if (target == move_dst || (target && REG_P (target)))
+ {
+ temp = target;
+ if (alt_rtl)
+ *alt_rtl = target;
+ }
+ else
+ temp = gen_reg_rtx (mode);
+
+ if (REG_P (temp))
+ emit_move_insn (temp, move_dst);
+
+ pop_temp_slots ();
+ }
else if (targetm.slow_unaligned_access (mode, align))
temp = extract_bit_field (temp, GET_MODE_BITSIZE (mode),
0, 0, 0, unsignedp, target,
diff --git a/gcc/function.c b/gcc/function.c
index f7d5a0a7b91..2fc80ec6642 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -3321,6 +3321,14 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm,
|| targetm.slow_unaligned_access (promoted_nominal_mode,
MEM_ALIGN (data->entry_parm))))
{
+ /* It is believed this code path is unreachable with
+ promoted_nominal_mode a capability mode. If it
+ becomes reachable, then we will need to use
+ emit_block_move to handle that case, similar to
+ other cases handling underalgined capability
+ moves in expr.c. */
+ gcc_assert (!CAPABILITY_MODE_P (promoted_nominal_mode));
+
if (icode != CODE_FOR_nothing)
emit_insn (GEN_FCN (icode) (parmreg, validated_mem));
else
diff --git a/gcc/testsuite/g++.target/aarch64/morello/iter-cap-copy.C b/gcc/testsuite/g++.target/aarch64/morello/iter-cap-copy.C
new file mode 100644
index 00000000000..dbb21c6f645
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/morello/iter-cap-copy.C
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* This was getting miscompiled since the C++ FE emitted a
+ block copy for *p = t, then SRA split out an underaligned capability
+ store for this, and expand split that store into pairs of x-register
+ loads/stores, which invalidated the capability.
+
+ Instead, we now use memcpy when expanding the underaligned capability
+ store. */
+
+struct iter {
+ unsigned long *p;
+ int y;
+ iter() {}
+};
+
+__attribute__((noipa))
+void f(iter *p, unsigned long *q)
+{
+ iter t;
+ t.p = q;
+ *p = t;
+}
+
+int main()
+{
+ iter i;
+ unsigned long x = 0xdeadbeefUL;
+ f(&i, &x);
+ if (*i.p != 0xdeadbeefUL)
+ __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/misaligned-cap-moves.c b/gcc/testsuite/gcc.target/aarch64/morello/misaligned-cap-moves.c
new file mode 100644
index 00000000000..b49051bc630
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/misaligned-cap-moves.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+typedef unsigned __intcap __attribute__((aligned(1))) T;
+
+__attribute__((noipa))
+void copy_cap(T *p) { p[0] = p[1]; }
+
+int main(void)
+{
+ int x = 42;
+ unsigned __intcap arr[2] = { 0, (unsigned __intcap)&x };
+ copy_cap (arr);
+ int *p = (int *)arr[0];
+ if (*p != 42)
+ __builtin_abort ();
+}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-08-24 13:29 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-24 13:29 [gcc(refs/vendors/ARM/heads/morello)] expand: Fix underaligned capability loads/stores Alex Coplan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).