public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-11247] aarch64: Avoid out-of-range shrink-wrapped saves [PR111677]
@ 2024-02-20 10:14 Alex Coplan
0 siblings, 0 replies; only message in thread
From: Alex Coplan @ 2024-02-20 10:14 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:0b308e2f27b087049fde9fbc64e432de47868a90
commit r11-11247-g0b308e2f27b087049fde9fbc64e432de47868a90
Author: Alex Coplan <alex.coplan@arm.com>
Date: Tue Jan 30 09:39:59 2024 +0000
aarch64: Avoid out-of-range shrink-wrapped saves [PR111677]
The PR shows us ICEing due to an unrecognizable TFmode save emitted by
aarch64_process_components. The problem is that for T{I,F,D}mode we
conservatively require mems to be in range for x-register ldp/stp. That
is because (at least for TImode) it can be allocated to both GPRs and
FPRs, and in the GPR case that is an x-reg ldp/stp, and the FPR case is
a q-register load/store.
As Richard pointed out in the PR, aarch64_get_separate_components
already checks that the offsets are suitable for a single load, so we
just need to choose a mode in aarch64_reg_save_mode that gives the full
q-register range. In this patch, we choose V16QImode as an alternative
16-byte "bag-of-bits" mode that doesn't have the artificial range
restrictions imposed on T{I,F,D}mode.
Unlike for GCC 14 we need additional handling in the load/store pair
code as various cases are not expecting to see V16QImode (particularly
the writeback patterns, but also aarch64_gen_load_pair).
gcc/ChangeLog:
PR target/111677
* config/aarch64/aarch64.c (aarch64_reg_save_mode): Use
V16QImode for the full 16-byte FPR saves in the vector PCS case.
(aarch64_gen_storewb_pair): Handle V16QImode.
(aarch64_gen_loadwb_pair): Likewise.
(aarch64_gen_load_pair): Likewise.
* config/aarch64/aarch64.md (loadwb_pair<TX:mode>_<P:mode>):
Rename to ...
(loadwb_pair<TX_V16QI:mode>_<P:mode>): ... this, extending to
V16QImode.
(storewb_pair<TX:mode>_<P:mode>): Rename to ...
(storewb_pair<TX_V16QI:mode>_<P:mode>): ... this, extending to
V16QImode.
* config/aarch64/iterators.md (TX_V16QI): New.
gcc/testsuite/ChangeLog:
PR target/111677
* gcc.target/aarch64/torture/pr111677.c: New test.
(cherry picked from commit fddce05d67f34174be0f306e1015d3868bbe7c31)
Diff:
---
gcc/config/aarch64/aarch64.c | 13 +++++++-
gcc/config/aarch64/aarch64.md | 35 +++++++++++-----------
gcc/config/aarch64/iterators.md | 3 ++
.../gcc.target/aarch64/torture/pr111677.c | 28 +++++++++++++++++
4 files changed, 61 insertions(+), 18 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3ccfd3c30fc7..9bbbc5043af2 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3466,7 +3466,7 @@ aarch64_reg_save_mode (unsigned int regno)
case ARM_PCS_SIMD:
/* The vector PCS saves the low 128 bits (which is the full
register on non-SVE targets). */
- return TFmode;
+ return V16QImode;
case ARM_PCS_SVE:
/* Use vectors of DImode for registers that need frame
@@ -7830,6 +7830,10 @@ aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
return gen_storewb_pairtf_di (base, base, reg, reg2,
GEN_INT (-adjustment),
GEN_INT (UNITS_PER_VREG - adjustment));
+ case E_V16QImode:
+ return gen_storewb_pairv16qi_di (base, base, reg, reg2,
+ GEN_INT (-adjustment),
+ GEN_INT (UNITS_PER_VREG - adjustment));
default:
gcc_unreachable ();
}
@@ -7875,6 +7879,10 @@ aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
case E_TFmode:
return gen_loadwb_pairtf_di (base, base, reg, reg2, GEN_INT (adjustment),
GEN_INT (UNITS_PER_VREG));
+ case E_V16QImode:
+ return gen_loadwb_pairv16qi_di (base, base, reg, reg2,
+ GEN_INT (adjustment),
+ GEN_INT (UNITS_PER_VREG));
default:
gcc_unreachable ();
}
@@ -7958,6 +7966,9 @@ aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
case E_V4SImode:
return gen_load_pairv4siv4si (reg1, mem1, reg2, mem2);
+ case E_V16QImode:
+ return gen_load_pairv16qiv16qi (reg1, mem1, reg2, mem2);
+
default:
gcc_unreachable ();
}
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 1cca0f5c79f4..002112d5eead 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1737,17 +1737,18 @@
[(set_attr "type" "neon_load1_2reg")]
)
-(define_insn "loadwb_pair<TX:mode>_<P:mode>"
+(define_insn "loadwb_pair<TX_V16QI:mode>_<P:mode>"
[(parallel
[(set (match_operand:P 0 "register_operand" "=k")
- (plus:P (match_operand:P 1 "register_operand" "0")
- (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
- (set (match_operand:TX 2 "register_operand" "=w")
- (mem:TX (match_dup 1)))
- (set (match_operand:TX 3 "register_operand" "=w")
- (mem:TX (plus:P (match_dup 1)
+ (plus:P (match_operand:P 1 "register_operand" "0")
+ (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
+ (set (match_operand:TX_V16QI 2 "register_operand" "=w")
+ (mem:TX_V16QI (match_dup 1)))
+ (set (match_operand:TX_V16QI 3 "register_operand" "=w")
+ (mem:TX_V16QI (plus:P (match_dup 1)
(match_operand:P 5 "const_int_operand" "n"))))])]
- "TARGET_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (<TX:MODE>mode)"
+ "TARGET_SIMD
+ && known_eq (INTVAL (operands[5]), GET_MODE_SIZE (<TX_V16QI:MODE>mode))"
"ldp\\t%q2, %q3, [%1], %4"
[(set_attr "type" "neon_ldp_q")]
)
@@ -1786,20 +1787,20 @@
[(set_attr "type" "neon_store1_2reg<q>")]
)
-(define_insn "storewb_pair<TX:mode>_<P:mode>"
+(define_insn "storewb_pair<TX_V16QI:mode>_<P:mode>"
[(parallel
[(set (match_operand:P 0 "register_operand" "=&k")
- (plus:P (match_operand:P 1 "register_operand" "0")
- (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
- (set (mem:TX (plus:P (match_dup 0)
+ (plus:P (match_operand:P 1 "register_operand" "0")
+ (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
+ (set (mem:TX_V16QI (plus:P (match_dup 0)
(match_dup 4)))
- (match_operand:TX 2 "register_operand" "w"))
- (set (mem:TX (plus:P (match_dup 0)
+ (match_operand:TX_V16QI 2 "register_operand" "w"))
+ (set (mem:TX_V16QI (plus:P (match_dup 0)
(match_operand:P 5 "const_int_operand" "n")))
- (match_operand:TX 3 "register_operand" "w"))])]
+ (match_operand:TX_V16QI 3 "register_operand" "w"))])]
"TARGET_SIMD
- && INTVAL (operands[5])
- == INTVAL (operands[4]) + GET_MODE_SIZE (<TX:MODE>mode)"
+ && known_eq (INTVAL (operands[5]),
+ INTVAL (operands[4]) + GET_MODE_SIZE (<TX_V16QI:MODE>mode))"
"stp\\t%q2, %q3, [%0, %4]!"
[(set_attr "type" "neon_stp_q")]
)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index c9ec6265e7c4..ad37741aff8f 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -289,6 +289,9 @@
(define_mode_iterator TX [TI TF])
+;; TX plus V16QImode.
+(define_mode_iterator TX_V16QI [TI TF V16QI])
+
;; Advanced SIMD opaque structure modes.
(define_mode_iterator VSTRUCT [OI CI XI])
diff --git a/gcc/testsuite/gcc.target/aarch64/torture/pr111677.c b/gcc/testsuite/gcc.target/aarch64/torture/pr111677.c
new file mode 100644
index 000000000000..6bb640c42c03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/torture/pr111677.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target fopenmp } */
+/* { dg-options "-ffast-math -fstack-protector-strong -fopenmp" } */
+typedef struct {
+ long size_z;
+ int width;
+} dt_bilateral_t;
+typedef float dt_aligned_pixel_t[4];
+#pragma omp declare simd
+void dt_bilateral_splat(dt_bilateral_t *b) {
+ float *buf;
+ long offsets[8];
+ for (; b;) {
+ int firstrow;
+ for (int j = firstrow; j; j++)
+ for (int i; i < b->width; i++) {
+ dt_aligned_pixel_t contrib;
+ for (int k = 0; k < 4; k++)
+ buf[offsets[k]] += contrib[k];
+ }
+ float *dest;
+ for (int j = (long)b; j; j++) {
+ float *src = (float *)b->size_z;
+ for (int i = 0; i < (long)b; i++)
+ dest[i] += src[i];
+ }
+ }
+}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2024-02-20 10:14 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-20 10:14 [gcc r11-11247] aarch64: Avoid out-of-range shrink-wrapped saves [PR111677] Alex Coplan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).