* [PATCH 08/11] aarch64: Generalize writeback ldp/stp patterns
@ 2023-11-16 18:09 Alex Coplan
2023-11-21 15:03 ` Richard Sandiford
0 siblings, 1 reply; 2+ messages in thread
From: Alex Coplan @ 2023-11-16 18:09 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard Sandiford, Kyrylo Tkachov
[-- Attachment #1: Type: text/plain, Size: 2478 bytes --]
Thus far the writeback forms of ldp/stp have been exclusively used in
prologue and epilogue code for saving/restoring of registers to/from the
stack.
As such, forms of ldp/stp that weren't needed for prologue/epilogue code
weren't supported by the aarch64 backend. This patch generalizes the
load/store pair writeback patterns to allow:
- Base registers other than the stack pointer.
- Modes that weren't previously supported.
- Combinations of distinct modes provided they have the same size.
- Pre/post variants that weren't previously needed in prologue/epilogue
code.
We make quite some effort to avoid a combinatorial explosion in the
number of patterns generated (and those in the source) by making
extensive use of special predicates.
An updated version of the upcoming ldp/stp pass can generate the
writeback forms, so this patch is motivated by that.
This patch doesn't add zero-extending or sign-extending forms of the
writeback patterns; that is left for future work.
Bootstrapped/regtested as a series on aarch64-linux-gnu, OK for trunk?
gcc/ChangeLog:
* config/aarch64/aarch64-protos.h (aarch64_ldpstp_operand_mode_p): Declare.
* config/aarch64/aarch64.cc (aarch64_gen_storewb_pair): Build RTL
directly instead of invoking named pattern.
(aarch64_gen_loadwb_pair): Likewise.
(aarch64_ldpstp_operand_mode_p): New.
* config/aarch64/aarch64.md (loadwb_pair<GPI:mode>_<P:mode>): Replace with
...
(*loadwb_post_pair_<ldst_sz>): ... this. Generalize as described
in cover letter.
(loadwb_pair<GPF:mode>_<P:mode>): Delete (superseded by the
above).
(*loadwb_post_pair_16): New.
(*loadwb_pre_pair_<ldst_sz>): New.
(loadwb_pair<TX:mode>_<P:mode>): Delete.
(*loadwb_pre_pair_16): New.
(storewb_pair<GPI:mode>_<P:mode>): Replace with ...
(*storewb_pre_pair_<ldst_sz>): ... this. Generalize as
described in cover letter.
(*storewb_pre_pair_16): New.
(storewb_pair<GPF:mode>_<P:mode>): Delete.
(*storewb_post_pair_<ldst_sz>): New.
(storewb_pair<TX:mode>_<P:mode>): Delete.
(*storewb_post_pair_16): New.
* config/aarch64/predicates.md (aarch64_mem_pair_operator): New.
(pmode_plus_operator): New.
(aarch64_ldp_reg_operand): New.
(aarch64_stp_reg_operand): New.
---
gcc/config/aarch64/aarch64-protos.h | 1 +
gcc/config/aarch64/aarch64.cc | 60 +++---
gcc/config/aarch64/aarch64.md | 284 ++++++++++++++++++++--------
gcc/config/aarch64/predicates.md | 38 ++++
4 files changed, 271 insertions(+), 112 deletions(-)
[-- Attachment #2: 0008-aarch64-Generalize-writeback-ldp-stp-patterns.patch --]
[-- Type: text/x-patch, Size: 18801 bytes --]
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 36d6c688bc8..e463fd5c817 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1023,6 +1023,7 @@ bool aarch64_operands_ok_for_ldpstp (rtx *, bool, machine_mode);
bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, machine_mode);
bool aarch64_mem_ok_with_ldpstp_policy_model (rtx, bool, machine_mode);
void aarch64_swap_ldrstr_operands (rtx *, bool);
+bool aarch64_ldpstp_operand_mode_p (machine_mode);
extern void aarch64_asm_output_pool_epilogue (FILE *, const char *,
tree, HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 4820fac67a1..ccf081d2a16 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8977,23 +8977,15 @@ static rtx
aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
HOST_WIDE_INT adjustment)
{
- switch (mode)
- {
- case E_DImode:
- return gen_storewb_pairdi_di (base, base, reg, reg2,
- GEN_INT (-adjustment),
- GEN_INT (UNITS_PER_WORD - adjustment));
- case E_DFmode:
- return gen_storewb_pairdf_di (base, base, reg, reg2,
- GEN_INT (-adjustment),
- GEN_INT (UNITS_PER_WORD - adjustment));
- case E_TFmode:
- return gen_storewb_pairtf_di (base, base, reg, reg2,
- GEN_INT (-adjustment),
- GEN_INT (UNITS_PER_VREG - adjustment));
- default:
- gcc_unreachable ();
- }
+ rtx new_base = plus_constant (Pmode, base, -adjustment);
+ rtx mem = gen_frame_mem (mode, new_base);
+ rtx mem2 = adjust_address_nv (mem, mode, GET_MODE_SIZE (mode));
+
+ return gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (3,
+ gen_rtx_SET (base, new_base),
+ gen_rtx_SET (mem, reg),
+ gen_rtx_SET (mem2, reg2)));
}
/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
@@ -9025,20 +9017,15 @@ static rtx
aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
HOST_WIDE_INT adjustment)
{
- switch (mode)
- {
- case E_DImode:
- return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
- GEN_INT (UNITS_PER_WORD));
- case E_DFmode:
- return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
- GEN_INT (UNITS_PER_WORD));
- case E_TFmode:
- return gen_loadwb_pairtf_di (base, base, reg, reg2, GEN_INT (adjustment),
- GEN_INT (UNITS_PER_VREG));
- default:
- gcc_unreachable ();
- }
+ rtx mem = gen_frame_mem (mode, base);
+ rtx mem2 = adjust_address_nv (mem, mode, GET_MODE_SIZE (mode));
+ rtx new_base = plus_constant (Pmode, base, adjustment);
+
+ return gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (3,
+ gen_rtx_SET (base, new_base),
+ gen_rtx_SET (reg, mem),
+ gen_rtx_SET (reg2, mem2)));
}
/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
@@ -26688,6 +26675,17 @@ aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed)
return false;
}
+bool
+aarch64_ldpstp_operand_mode_p (machine_mode mode)
+{
+ if (!targetm.hard_regno_mode_ok (V0_REGNUM, mode)
+ || hard_regno_nregs (V0_REGNUM, mode) > 1)
+ return false;
+
+ const auto size = GET_MODE_SIZE (mode);
+ return known_eq (size, 4) || known_eq (size, 8) || known_eq (size, 16);
+}
+
/* Return true if MEM1 and MEM2 can be combined into a single access
of mode MODE, with the combined access having the same address as MEM1. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 7be1de38b1c..c92a51690c5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1831,102 +1831,224 @@ (define_insn "store_pair_dw_<TX:mode><TX2:mode>"
(set_attr "fp" "yes")]
)
+;; Writeback load/store pair patterns.
+;;
+;; Note that modes in the patterns [SI DI TI] are used only as a proxy for their
+;; size; aarch64_ldp_reg_operand and aarch64_mem_pair_operator are special
+;; predicates which accept a wide range of operand modes, with the requirement
+;; that the contextual (pattern) mode is of the same size as the operand mode.
+
;; Load pair with post-index writeback. This is primarily used in function
;; epilogues.
-(define_insn "loadwb_pair<GPI:mode>_<P:mode>"
+(define_insn "*loadwb_post_pair_<ldst_sz>"
[(parallel
- [(set (match_operand:P 0 "register_operand" "=k")
- (plus:P (match_operand:P 1 "register_operand" "0")
- (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
- (set (match_operand:GPI 2 "register_operand" "=r")
- (mem:GPI (match_dup 1)))
- (set (match_operand:GPI 3 "register_operand" "=r")
- (mem:GPI (plus:P (match_dup 1)
- (match_operand:P 5 "const_int_operand" "n"))))])]
- "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
- "ldp\\t%<GPI:w>2, %<GPI:w>3, [%1], %4"
- [(set_attr "type" "load_<GPI:ldpstp_sz>")]
-)
-
-(define_insn "loadwb_pair<GPF:mode>_<P:mode>"
+ [(set (match_operand 0 "pmode_register_operand")
+ (match_operator 7 "pmode_plus_operator" [
+ (match_operand 1 "pmode_register_operand")
+ (match_operand 4 "const_int_operand")]))
+ (set (match_operand:GPI 2 "aarch64_ldp_reg_operand")
+ (match_operator 5 "memory_operand" [(match_dup 1)]))
+ (set (match_operand:GPI 3 "aarch64_ldp_reg_operand")
+ (match_operator 6 "memory_operand" [
+ (match_operator 8 "pmode_plus_operator" [
+ (match_dup 1)
+ (const_int <ldst_sz>)])]))])]
+ "aarch64_mem_pair_offset (operands[4], <MODE>mode)
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && !reg_overlap_mentioned_p (operands[0], operands[3])"
+ {@ [cons: =0, 1, =2, =3; attrs: type]
+ [ rk, 0, r, r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%1], %4
+ [ rk, 0, w, w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%1], %4
+ }
+)
+
+;; q-register variant of the above
+(define_insn "*loadwb_post_pair_16"
+ [(parallel
+ [(set (match_operand 0 "pmode_register_operand" "=rk")
+ (match_operator 7 "pmode_plus_operator" [
+ (match_operand 1 "pmode_register_operand" "0")
+ (match_operand 4 "const_int_operand")]))
+ (set (match_operand:TI 2 "aarch64_ldp_reg_operand" "=w")
+ (match_operator 5 "memory_operand" [(match_dup 1)]))
+ (set (match_operand:TI 3 "aarch64_ldp_reg_operand" "=w")
+ (match_operator 6 "memory_operand"
+ [(match_operator 8 "pmode_plus_operator" [
+ (match_dup 1)
+ (const_int 16)])]))])]
+ "TARGET_FLOAT
+ && aarch64_mem_pair_offset (operands[4], TImode)
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && !reg_overlap_mentioned_p (operands[0], operands[3])"
+ "ldp\t%q2, %q3, [%1], %4"
+ [(set_attr "type" "neon_ldp_q")]
+)
+
+;; Load pair with pre-index writeback.
+(define_insn "*loadwb_pre_pair_<ldst_sz>"
[(parallel
- [(set (match_operand:P 0 "register_operand" "=k")
- (plus:P (match_operand:P 1 "register_operand" "0")
- (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
- (set (match_operand:GPF 2 "register_operand" "=w")
- (mem:GPF (match_dup 1)))
- (set (match_operand:GPF 3 "register_operand" "=w")
- (mem:GPF (plus:P (match_dup 1)
- (match_operand:P 5 "const_int_operand" "n"))))])]
- "INTVAL (operands[5]) == GET_MODE_SIZE (<GPF:MODE>mode)"
- "ldp\\t%<GPF:w>2, %<GPF:w>3, [%1], %4"
- [(set_attr "type" "neon_load1_2reg")]
-)
-
-(define_insn "loadwb_pair<TX:mode>_<P:mode>"
+ [(set (match_operand 0 "pmode_register_operand")
+ (match_operator 8 "pmode_plus_operator" [
+ (match_operand 1 "pmode_register_operand")
+ (match_operand 4 "const_int_operand")]))
+ (set (match_operand:GPI 2 "aarch64_ldp_reg_operand")
+ (match_operator 6 "memory_operand" [
+ (match_operator 10 "pmode_plus_operator" [
+ (match_dup 1)
+ (match_dup 4)
+ ])]))
+ (set (match_operand:GPI 3 "aarch64_ldp_reg_operand")
+ (match_operator 7 "memory_operand" [
+ (match_operator 9 "pmode_plus_operator" [
+ (match_dup 1)
+ (match_operand 5 "const_int_operand")
+ ])]))])]
+ "aarch64_mem_pair_offset (operands[4], <MODE>mode)
+ && known_eq (INTVAL (operands[5]),
+ INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode))
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && !reg_overlap_mentioned_p (operands[0], operands[3])"
+ {@ [cons: =&0, 1, =2, =3; attrs: type ]
+ [ rk, 0, r, r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%0, %4]!
+ [ rk, 0, w, w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%0, %4]!
+ }
+)
+
+;; q-register variant of the above
+(define_insn "*loadwb_pre_pair_16"
[(parallel
- [(set (match_operand:P 0 "register_operand" "=k")
- (plus:P (match_operand:P 1 "register_operand" "0")
- (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
- (set (match_operand:TX 2 "register_operand" "=w")
- (mem:TX (match_dup 1)))
- (set (match_operand:TX 3 "register_operand" "=w")
- (mem:TX (plus:P (match_dup 1)
- (match_operand:P 5 "const_int_operand" "n"))))])]
- "TARGET_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (<TX:MODE>mode)"
- "ldp\\t%q2, %q3, [%1], %4"
+ [(set (match_operand 0 "pmode_register_operand" "=&rk")
+ (match_operator 8 "pmode_plus_operator" [
+ (match_operand 1 "pmode_register_operand" "0")
+ (match_operand 4 "const_int_operand")]))
+ (set (match_operand:TI 2 "aarch64_ldp_reg_operand" "=w")
+ (match_operator 6 "memory_operand" [
+ (match_operator 10 "pmode_plus_operator" [
+ (match_dup 1)
+ (match_dup 4)
+ ])]))
+ (set (match_operand:TI 3 "aarch64_ldp_reg_operand" "=w")
+ (match_operator 7 "memory_operand" [
+ (match_operator 9 "pmode_plus_operator" [
+ (match_dup 1)
+ (match_operand 5 "const_int_operand")
+ ])]))])]
+ "TARGET_FLOAT
+ && aarch64_mem_pair_offset (operands[4], TImode)
+ && known_eq (INTVAL (operands[5]), INTVAL (operands[4]) + 16)
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && !reg_overlap_mentioned_p (operands[0], operands[3])"
+ "ldp\t%q2, %q3, [%0, %4]!"
[(set_attr "type" "neon_ldp_q")]
)
;; Store pair with pre-index writeback. This is primarily used in function
;; prologues.
-(define_insn "storewb_pair<GPI:mode>_<P:mode>"
+(define_insn "*storewb_pre_pair_<ldst_sz>"
[(parallel
- [(set (match_operand:P 0 "register_operand" "=&k")
- (plus:P (match_operand:P 1 "register_operand" "0")
- (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
- (set (mem:GPI (plus:P (match_dup 0)
- (match_dup 4)))
- (match_operand:GPI 2 "register_operand" "r"))
- (set (mem:GPI (plus:P (match_dup 0)
- (match_operand:P 5 "const_int_operand" "n")))
- (match_operand:GPI 3 "register_operand" "r"))])]
- "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
- "stp\\t%<GPI:w>2, %<GPI:w>3, [%0, %4]!"
- [(set_attr "type" "store_<GPI:ldpstp_sz>")]
+ [(set (match_operand 0 "pmode_register_operand")
+ (match_operator 6 "pmode_plus_operator" [
+ (match_operand 1 "pmode_register_operand")
+ (match_operand 4 "const_int_operand")
+ ]))
+ (set (match_operator:GPI 7 "aarch64_mem_pair_operator" [
+ (match_operator 8 "pmode_plus_operator" [
+ (match_dup 0)
+ (match_dup 4)
+ ])])
+ (match_operand:GPI 2 "aarch64_stp_reg_operand"))
+ (set (match_operator:GPI 9 "aarch64_mem_pair_operator" [
+ (match_operator 10 "pmode_plus_operator" [
+ (match_dup 0)
+ (match_operand 5 "const_int_operand")
+ ])])
+ (match_operand:GPI 3 "aarch64_stp_reg_operand"))])]
+ "aarch64_mem_pair_offset (operands[4], <MODE>mode)
+ && known_eq (INTVAL (operands[5]),
+ INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode))
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && !reg_overlap_mentioned_p (operands[0], operands[3])"
+ {@ [cons: =&0, 1, 2, 3; attrs: type ]
+ [ rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0, %4]!
+ [ rk, 0, w, w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0, %4]!
+ }
+)
+
+;; q-register variant of the above.
+(define_insn "*storewb_pre_pair_16"
+ [(parallel
+ [(set (match_operand 0 "pmode_register_operand" "=&rk")
+ (match_operator 6 "pmode_plus_operator" [
+ (match_operand 1 "pmode_register_operand" "0")
+ (match_operand 4 "const_int_operand")
+ ]))
+ (set (match_operator:TI 7 "aarch64_mem_pair_operator" [
+ (match_operator 8 "pmode_plus_operator" [
+ (match_dup 0)
+ (match_dup 4)
+ ])])
+ (match_operand:TI 2 "aarch64_ldp_reg_operand" "w"))
+ (set (match_operator:TI 9 "aarch64_mem_pair_operator" [
+ (match_operator 10 "pmode_plus_operator" [
+ (match_dup 0)
+ (match_operand 5 "const_int_operand")
+ ])])
+ (match_operand:TI 3 "aarch64_ldp_reg_operand" "w"))])]
+ "TARGET_FLOAT
+ && aarch64_mem_pair_offset (operands[4], TImode)
+ && known_eq (INTVAL (operands[5]), INTVAL (operands[4]) + 16)
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && !reg_overlap_mentioned_p (operands[0], operands[3])"
+ "stp\\t%q2, %q3, [%0, %4]!"
+ [(set_attr "type" "neon_stp_q")]
)
-(define_insn "storewb_pair<GPF:mode>_<P:mode>"
+;; Store pair with post-index writeback.
+(define_insn "*storewb_post_pair_<ldst_sz>"
[(parallel
- [(set (match_operand:P 0 "register_operand" "=&k")
- (plus:P (match_operand:P 1 "register_operand" "0")
- (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
- (set (mem:GPF (plus:P (match_dup 0)
- (match_dup 4)))
- (match_operand:GPF 2 "register_operand" "w"))
- (set (mem:GPF (plus:P (match_dup 0)
- (match_operand:P 5 "const_int_operand" "n")))
- (match_operand:GPF 3 "register_operand" "w"))])]
- "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPF:MODE>mode)"
- "stp\\t%<GPF:w>2, %<GPF:w>3, [%0, %4]!"
- [(set_attr "type" "neon_store1_2reg<q>")]
-)
-
-(define_insn "storewb_pair<TX:mode>_<P:mode>"
+ [(set (match_operand 0 "pmode_register_operand")
+ (match_operator 5 "pmode_plus_operator" [
+ (match_operand 1 "pmode_register_operand")
+ (match_operand 4 "const_int_operand")
+ ]))
+ (set (match_operator:GPI 6 "aarch64_mem_pair_operator" [(match_dup 1)])
+ (match_operand 2 "aarch64_stp_reg_operand"))
+ (set (match_operator:GPI 7 "aarch64_mem_pair_operator" [
+ (match_operator 8 "pmode_plus_operator" [
+ (match_dup 0)
+ (const_int <ldst_sz>)
+ ])])
+ (match_operand 3 "aarch64_stp_reg_operand"))])]
+ "aarch64_mem_pair_offset (operands[4], <MODE>mode)
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && !reg_overlap_mentioned_p (operands[0], operands[3])"
+ {@ [cons: =0, 1, 2, 3; attrs: type ]
+ [ rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0], %4
+ [ rk, 0, w, w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0], %4
+ }
+)
+
+;; Store pair with post-index writeback.
+(define_insn "*storewb_post_pair_16"
[(parallel
- [(set (match_operand:P 0 "register_operand" "=&k")
- (plus:P (match_operand:P 1 "register_operand" "0")
- (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
- (set (mem:TX (plus:P (match_dup 0)
- (match_dup 4)))
- (match_operand:TX 2 "register_operand" "w"))
- (set (mem:TX (plus:P (match_dup 0)
- (match_operand:P 5 "const_int_operand" "n")))
- (match_operand:TX 3 "register_operand" "w"))])]
- "TARGET_SIMD
- && INTVAL (operands[5])
- == INTVAL (operands[4]) + GET_MODE_SIZE (<TX:MODE>mode)"
- "stp\\t%q2, %q3, [%0, %4]!"
+ [(set (match_operand 0 "pmode_register_operand" "=rk")
+ (match_operator 5 "pmode_plus_operator" [
+ (match_operand 1 "pmode_register_operand" "0")
+ (match_operand 4 "const_int_operand")
+ ]))
+ (set (match_operator:TI 6 "aarch64_mem_pair_operator" [(match_dup 1)])
+ (match_operand:TI 2 "aarch64_ldp_reg_operand" "w"))
+ (set (match_operator:TI 7 "aarch64_mem_pair_operator" [
+ (match_operator 8 "pmode_plus_operator" [
+ (match_dup 0)
+ (const_int 16)
+ ])])
+ (match_operand:TI 3 "aarch64_ldp_reg_operand" "w"))])]
+ "TARGET_FLOAT
+ && aarch64_mem_pair_offset (operands[4], TImode)
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && !reg_overlap_mentioned_p (operands[0], operands[3])"
+ "stp\t%q2, %q3, [%0], %4"
[(set_attr "type" "neon_stp_q")]
)
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index a73724a7fc0..b647e5af7c6 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -257,11 +257,49 @@ (define_predicate "aarch64_mem_pair_offset"
(and (match_code "const_int")
(match_test "aarch64_offset_7bit_signed_scaled_p (mode, INTVAL (op))")))
+(define_special_predicate "aarch64_mem_pair_operator"
+ (and
+ (match_code "mem")
+ (match_test "aarch64_ldpstp_operand_mode_p (GET_MODE (op))")
+ (ior
+ (match_test "mode == VOIDmode")
+ (match_test "known_eq (GET_MODE_SIZE (mode),
+ GET_MODE_SIZE (GET_MODE (op)))"))))
+
(define_predicate "aarch64_mem_pair_operand"
(and (match_code "mem")
(match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), false,
ADDR_QUERY_LDP_STP)")))
+(define_predicate "pmode_plus_operator"
+ (and (match_code "plus")
+ (match_test "GET_MODE (op) == Pmode")))
+
+(define_special_predicate "aarch64_ldp_reg_operand"
+ (and
+ (match_code "reg,subreg")
+ (match_test "aarch64_ldpstp_operand_mode_p (GET_MODE (op))")
+ (ior
+ (match_test "mode == VOIDmode")
+ (match_test "known_eq (GET_MODE_SIZE (mode),
+ GET_MODE_SIZE (GET_MODE (op)))"))))
+
+(define_special_predicate "aarch64_stp_reg_operand"
+ (ior (match_operand 0 "aarch64_ldp_reg_operand")
+ (and (ior
+ (and (match_code "const_int,const,const_vector")
+ (match_test "op == CONST0_RTX (GET_MODE (op))"))
+ (and (match_code "const_double")
+ (match_test "aarch64_float_const_zero_rtx_p (op)")))
+ (ior
+ (match_test "GET_MODE (op) == VOIDmode")
+ (and
+ (match_test "aarch64_ldpstp_operand_mode_p (GET_MODE (op))")
+ (ior
+ (match_test "mode == VOIDmode")
+ (match_test "known_eq (GET_MODE_SIZE (mode),
+ GET_MODE_SIZE (GET_MODE (op)))")))))))
+
;; Used for storing two 64-bit values in an AdvSIMD register using an STP
;; as a 128-bit vec_concat.
(define_predicate "aarch64_mem_pair_lanes_operand"
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH 08/11] aarch64: Generalize writeback ldp/stp patterns
2023-11-16 18:09 [PATCH 08/11] aarch64: Generalize writeback ldp/stp patterns Alex Coplan
@ 2023-11-21 15:03 ` Richard Sandiford
0 siblings, 0 replies; 2+ messages in thread
From: Richard Sandiford @ 2023-11-21 15:03 UTC (permalink / raw)
To: Alex Coplan; +Cc: gcc-patches, Kyrylo Tkachov
Alex Coplan <alex.coplan@arm.com> writes:
> Thus far the writeback forms of ldp/stp have been exclusively used in
> prologue and epilogue code for saving/restoring of registers to/from the
> stack.
>
> As such, forms of ldp/stp that weren't needed for prologue/epilogue code
> weren't supported by the aarch64 backend. This patch generalizes the
> load/store pair writeback patterns to allow:
>
> - Base registers other than the stack pointer.
> - Modes that weren't previously supported.
> - Combinations of distinct modes provided they have the same size.
> - Pre/post variants that weren't previously needed in prologue/epilogue
> code.
>
> We make quite some effort to avoid a combinatorial explosion in the
> number of patterns generated (and those in the source) by making
> extensive use of special predicates.
>
> An updated version of the upcoming ldp/stp pass can generate the
> writeback forms, so this patch is motivated by that.
>
> This patch doesn't add zero-extending or sign-extending forms of the
> writeback patterns; that is left for future work.
>
> Bootstrapped/regtested as a series on aarch64-linux-gnu, OK for trunk?
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-protos.h (aarch64_ldpstp_operand_mode_p): Declare.
> * config/aarch64/aarch64.cc (aarch64_gen_storewb_pair): Build RTL
> directly instead of invoking named pattern.
> (aarch64_gen_loadwb_pair): Likewise.
> (aarch64_ldpstp_operand_mode_p): New.
> * config/aarch64/aarch64.md (loadwb_pair<GPI:mode>_<P:mode>): Replace with
> ...
> (*loadwb_post_pair_<ldst_sz>): ... this. Generalize as described
> in cover letter.
> (loadwb_pair<GPF:mode>_<P:mode>): Delete (superseded by the
> above).
> (*loadwb_post_pair_16): New.
> (*loadwb_pre_pair_<ldst_sz>): New.
> (loadwb_pair<TX:mode>_<P:mode>): Delete.
> (*loadwb_pre_pair_16): New.
> (storewb_pair<GPI:mode>_<P:mode>): Replace with ...
> (*storewb_pre_pair_<ldst_sz>): ... this. Generalize as
> described in cover letter.
> (*storewb_pre_pair_16): New.
> (storewb_pair<GPF:mode>_<P:mode>): Delete.
> (*storewb_post_pair_<ldst_sz>): New.
> (storewb_pair<TX:mode>_<P:mode>): Delete.
> (*storewb_post_pair_16): New.
> * config/aarch64/predicates.md (aarch64_mem_pair_operator): New.
> (pmode_plus_operator): New.
> (aarch64_ldp_reg_operand): New.
> (aarch64_stp_reg_operand): New.
> ---
> gcc/config/aarch64/aarch64-protos.h | 1 +
> gcc/config/aarch64/aarch64.cc | 60 +++---
> gcc/config/aarch64/aarch64.md | 284 ++++++++++++++++++++--------
> gcc/config/aarch64/predicates.md | 38 ++++
> 4 files changed, 271 insertions(+), 112 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 36d6c688bc8..e463fd5c817 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -1023,6 +1023,7 @@ bool aarch64_operands_ok_for_ldpstp (rtx *, bool, machine_mode);
> bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, machine_mode);
> bool aarch64_mem_ok_with_ldpstp_policy_model (rtx, bool, machine_mode);
> void aarch64_swap_ldrstr_operands (rtx *, bool);
> +bool aarch64_ldpstp_operand_mode_p (machine_mode);
>
> extern void aarch64_asm_output_pool_epilogue (FILE *, const char *,
> tree, HOST_WIDE_INT);
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 4820fac67a1..ccf081d2a16 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -8977,23 +8977,15 @@ static rtx
> aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
> HOST_WIDE_INT adjustment)
> {
> - switch (mode)
> - {
> - case E_DImode:
> - return gen_storewb_pairdi_di (base, base, reg, reg2,
> - GEN_INT (-adjustment),
> - GEN_INT (UNITS_PER_WORD - adjustment));
> - case E_DFmode:
> - return gen_storewb_pairdf_di (base, base, reg, reg2,
> - GEN_INT (-adjustment),
> - GEN_INT (UNITS_PER_WORD - adjustment));
> - case E_TFmode:
> - return gen_storewb_pairtf_di (base, base, reg, reg2,
> - GEN_INT (-adjustment),
> - GEN_INT (UNITS_PER_VREG - adjustment));
> - default:
> - gcc_unreachable ();
> - }
> + rtx new_base = plus_constant (Pmode, base, -adjustment);
> + rtx mem = gen_frame_mem (mode, new_base);
> + rtx mem2 = adjust_address_nv (mem, mode, GET_MODE_SIZE (mode));
> +
> + return gen_rtx_PARALLEL (VOIDmode,
> + gen_rtvec (3,
> + gen_rtx_SET (base, new_base),
> + gen_rtx_SET (mem, reg),
> + gen_rtx_SET (mem2, reg2)));
> }
>
> /* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
> @@ -9025,20 +9017,15 @@ static rtx
> aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
> HOST_WIDE_INT adjustment)
> {
> - switch (mode)
> - {
> - case E_DImode:
> - return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
> - GEN_INT (UNITS_PER_WORD));
> - case E_DFmode:
> - return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
> - GEN_INT (UNITS_PER_WORD));
> - case E_TFmode:
> - return gen_loadwb_pairtf_di (base, base, reg, reg2, GEN_INT (adjustment),
> - GEN_INT (UNITS_PER_VREG));
> - default:
> - gcc_unreachable ();
> - }
> + rtx mem = gen_frame_mem (mode, base);
> + rtx mem2 = adjust_address_nv (mem, mode, GET_MODE_SIZE (mode));
> + rtx new_base = plus_constant (Pmode, base, adjustment);
> +
> + return gen_rtx_PARALLEL (VOIDmode,
> + gen_rtvec (3,
> + gen_rtx_SET (base, new_base),
> + gen_rtx_SET (reg, mem),
> + gen_rtx_SET (reg2, mem2)));
> }
>
> /* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
> @@ -26688,6 +26675,17 @@ aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed)
> return false;
> }
>
> +bool
> +aarch64_ldpstp_operand_mode_p (machine_mode mode)
The function should have a comment. Realise it's kind of obvious from
the name, but still.
I suppose at least one thing to clarify is that the mode is for one
register, rather than the pair as a whole.
> +{
> + if (!targetm.hard_regno_mode_ok (V0_REGNUM, mode)
> + || hard_regno_nregs (V0_REGNUM, mode) > 1)
> + return false;
> +
> + const auto size = GET_MODE_SIZE (mode);
> + return known_eq (size, 4) || known_eq (size, 8) || known_eq (size, 16);
> +}
> +
> /* Return true if MEM1 and MEM2 can be combined into a single access
> of mode MODE, with the combined access having the same address as MEM1. */
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 7be1de38b1c..c92a51690c5 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1831,102 +1831,224 @@ (define_insn "store_pair_dw_<TX:mode><TX2:mode>"
> (set_attr "fp" "yes")]
> )
>
> +;; Writeback load/store pair patterns.
> +;;
> +;; Note that modes in the patterns [SI DI TI] are used only as a proxy for their
> +;; size; aarch64_ldp_reg_operand and aarch64_mem_pair_operator are special
> +;; predicates which accept a wide range of operand modes, with the requirement
> +;; that the contextual (pattern) mode is of the same size as the operand mode.
> +
> ;; Load pair with post-index writeback. This is primarily used in function
> ;; epilogues.
> -(define_insn "loadwb_pair<GPI:mode>_<P:mode>"
> +(define_insn "*loadwb_post_pair_<ldst_sz>"
> [(parallel
Pre-existing, but the outer parallel is redundant. insn patterns are
inherently parallel, only define_expand patterns are not.
> - [(set (match_operand:P 0 "register_operand" "=k")
> - (plus:P (match_operand:P 1 "register_operand" "0")
> - (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
> - (set (match_operand:GPI 2 "register_operand" "=r")
> - (mem:GPI (match_dup 1)))
> - (set (match_operand:GPI 3 "register_operand" "=r")
> - (mem:GPI (plus:P (match_dup 1)
> - (match_operand:P 5 "const_int_operand" "n"))))])]
> - "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
> - "ldp\\t%<GPI:w>2, %<GPI:w>3, [%1], %4"
> - [(set_attr "type" "load_<GPI:ldpstp_sz>")]
> -)
> -
> -(define_insn "loadwb_pair<GPF:mode>_<P:mode>"
> + [(set (match_operand 0 "pmode_register_operand")
> + (match_operator 7 "pmode_plus_operator" [
> + (match_operand 1 "pmode_register_operand")
> + (match_operand 4 "const_int_operand")]))
> + (set (match_operand:GPI 2 "aarch64_ldp_reg_operand")
> + (match_operator 5 "memory_operand" [(match_dup 1)]))
> + (set (match_operand:GPI 3 "aarch64_ldp_reg_operand")
> + (match_operator 6 "memory_operand" [
> + (match_operator 8 "pmode_plus_operator" [
> + (match_dup 1)
> + (const_int <ldst_sz>)])]))])]
> + "aarch64_mem_pair_offset (operands[4], <MODE>mode)
> + && !reg_overlap_mentioned_p (operands[0], operands[2])
> + && !reg_overlap_mentioned_p (operands[0], operands[3])"
In principle, the last two conditions shouldn't be needed, since the
requirement holds from generic RTL rules. Same for the other load
patterns.
> + {@ [cons: =0, 1, =2, =3; attrs: type]
> + [ rk, 0, r, r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%1], %4
> + [ rk, 0, w, w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%1], %4
Think we should just use spaces for indentation after the "[".
Only the leading whitespace "needs" to be tabbed.
That'd make diffs easier to read.
> + }
> +)
> +
> +;; q-register variant of the above
> +(define_insn "*loadwb_post_pair_16"
> + [(parallel
> + [(set (match_operand 0 "pmode_register_operand" "=rk")
> + (match_operator 7 "pmode_plus_operator" [
> + (match_operand 1 "pmode_register_operand" "0")
> + (match_operand 4 "const_int_operand")]))
> + (set (match_operand:TI 2 "aarch64_ldp_reg_operand" "=w")
> + (match_operator 5 "memory_operand" [(match_dup 1)]))
> + (set (match_operand:TI 3 "aarch64_ldp_reg_operand" "=w")
> + (match_operator 6 "memory_operand"
> + [(match_operator 8 "pmode_plus_operator" [
> + (match_dup 1)
> + (const_int 16)])]))])]
> + "TARGET_FLOAT
> + && aarch64_mem_pair_offset (operands[4], TImode)
> + && !reg_overlap_mentioned_p (operands[0], operands[2])
> + && !reg_overlap_mentioned_p (operands[0], operands[3])"
> + "ldp\t%q2, %q3, [%1], %4"
> + [(set_attr "type" "neon_ldp_q")]
> +)
> +
> +;; Load pair with pre-index writeback.
> +(define_insn "*loadwb_pre_pair_<ldst_sz>"
> [(parallel
> - [(set (match_operand:P 0 "register_operand" "=k")
> - (plus:P (match_operand:P 1 "register_operand" "0")
> - (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
> - (set (match_operand:GPF 2 "register_operand" "=w")
> - (mem:GPF (match_dup 1)))
> - (set (match_operand:GPF 3 "register_operand" "=w")
> - (mem:GPF (plus:P (match_dup 1)
> - (match_operand:P 5 "const_int_operand" "n"))))])]
> - "INTVAL (operands[5]) == GET_MODE_SIZE (<GPF:MODE>mode)"
> - "ldp\\t%<GPF:w>2, %<GPF:w>3, [%1], %4"
> - [(set_attr "type" "neon_load1_2reg")]
> -)
> -
> -(define_insn "loadwb_pair<TX:mode>_<P:mode>"
> + [(set (match_operand 0 "pmode_register_operand")
> + (match_operator 8 "pmode_plus_operator" [
> + (match_operand 1 "pmode_register_operand")
> + (match_operand 4 "const_int_operand")]))
> + (set (match_operand:GPI 2 "aarch64_ldp_reg_operand")
> + (match_operator 6 "memory_operand" [
> + (match_operator 10 "pmode_plus_operator" [
> + (match_dup 1)
> + (match_dup 4)
> + ])]))
> + (set (match_operand:GPI 3 "aarch64_ldp_reg_operand")
> + (match_operator 7 "memory_operand" [
> + (match_operator 9 "pmode_plus_operator" [
> + (match_dup 1)
> + (match_operand 5 "const_int_operand")
> + ])]))])]
Very minor, but the operand numbering looks a bit inconsistent.
Think it would be more natural to use 9 for the middle arm and
10 for the final one. Same for later patterns.
OK with those changes, thanks.
Richard
> + "aarch64_mem_pair_offset (operands[4], <MODE>mode)
> + && known_eq (INTVAL (operands[5]),
> + INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode))
> + && !reg_overlap_mentioned_p (operands[0], operands[2])
> + && !reg_overlap_mentioned_p (operands[0], operands[3])"
> + {@ [cons: =&0, 1, =2, =3; attrs: type ]
> + [ rk, 0, r, r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%0, %4]!
> + [ rk, 0, w, w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%0, %4]!
> + }
> +)
> +
> +;; q-register variant of the above
> +(define_insn "*loadwb_pre_pair_16"
> [(parallel
> - [(set (match_operand:P 0 "register_operand" "=k")
> - (plus:P (match_operand:P 1 "register_operand" "0")
> - (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
> - (set (match_operand:TX 2 "register_operand" "=w")
> - (mem:TX (match_dup 1)))
> - (set (match_operand:TX 3 "register_operand" "=w")
> - (mem:TX (plus:P (match_dup 1)
> - (match_operand:P 5 "const_int_operand" "n"))))])]
> - "TARGET_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (<TX:MODE>mode)"
> - "ldp\\t%q2, %q3, [%1], %4"
> + [(set (match_operand 0 "pmode_register_operand" "=&rk")
> + (match_operator 8 "pmode_plus_operator" [
> + (match_operand 1 "pmode_register_operand" "0")
> + (match_operand 4 "const_int_operand")]))
> + (set (match_operand:TI 2 "aarch64_ldp_reg_operand" "=w")
> + (match_operator 6 "memory_operand" [
> + (match_operator 10 "pmode_plus_operator" [
> + (match_dup 1)
> + (match_dup 4)
> + ])]))
> + (set (match_operand:TI 3 "aarch64_ldp_reg_operand" "=w")
> + (match_operator 7 "memory_operand" [
> + (match_operator 9 "pmode_plus_operator" [
> + (match_dup 1)
> + (match_operand 5 "const_int_operand")
> + ])]))])]
> + "TARGET_FLOAT
> + && aarch64_mem_pair_offset (operands[4], TImode)
> + && known_eq (INTVAL (operands[5]), INTVAL (operands[4]) + 16)
> + && !reg_overlap_mentioned_p (operands[0], operands[2])
> + && !reg_overlap_mentioned_p (operands[0], operands[3])"
> + "ldp\t%q2, %q3, [%0, %4]!"
> [(set_attr "type" "neon_ldp_q")]
> )
>
> ;; Store pair with pre-index writeback. This is primarily used in function
> ;; prologues.
> -(define_insn "storewb_pair<GPI:mode>_<P:mode>"
> +(define_insn "*storewb_pre_pair_<ldst_sz>"
> [(parallel
> - [(set (match_operand:P 0 "register_operand" "=&k")
> - (plus:P (match_operand:P 1 "register_operand" "0")
> - (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
> - (set (mem:GPI (plus:P (match_dup 0)
> - (match_dup 4)))
> - (match_operand:GPI 2 "register_operand" "r"))
> - (set (mem:GPI (plus:P (match_dup 0)
> - (match_operand:P 5 "const_int_operand" "n")))
> - (match_operand:GPI 3 "register_operand" "r"))])]
> - "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
> - "stp\\t%<GPI:w>2, %<GPI:w>3, [%0, %4]!"
> - [(set_attr "type" "store_<GPI:ldpstp_sz>")]
> + [(set (match_operand 0 "pmode_register_operand")
> + (match_operator 6 "pmode_plus_operator" [
> + (match_operand 1 "pmode_register_operand")
> + (match_operand 4 "const_int_operand")
> + ]))
> + (set (match_operator:GPI 7 "aarch64_mem_pair_operator" [
> + (match_operator 8 "pmode_plus_operator" [
> + (match_dup 0)
> + (match_dup 4)
> + ])])
> + (match_operand:GPI 2 "aarch64_stp_reg_operand"))
> + (set (match_operator:GPI 9 "aarch64_mem_pair_operator" [
> + (match_operator 10 "pmode_plus_operator" [
> + (match_dup 0)
> + (match_operand 5 "const_int_operand")
> + ])])
> + (match_operand:GPI 3 "aarch64_stp_reg_operand"))])]
> + "aarch64_mem_pair_offset (operands[4], <MODE>mode)
> + && known_eq (INTVAL (operands[5]),
> + INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode))
> + && !reg_overlap_mentioned_p (operands[0], operands[2])
> + && !reg_overlap_mentioned_p (operands[0], operands[3])"
> + {@ [cons: =&0, 1, 2, 3; attrs: type ]
> + [ rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0, %4]!
> + [ rk, 0, w, w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0, %4]!
> + }
> +)
> +
> +;; q-register variant of the above.
> +(define_insn "*storewb_pre_pair_16"
> + [(parallel
> + [(set (match_operand 0 "pmode_register_operand" "=&rk")
> + (match_operator 6 "pmode_plus_operator" [
> + (match_operand 1 "pmode_register_operand" "0")
> + (match_operand 4 "const_int_operand")
> + ]))
> + (set (match_operator:TI 7 "aarch64_mem_pair_operator" [
> + (match_operator 8 "pmode_plus_operator" [
> + (match_dup 0)
> + (match_dup 4)
> + ])])
> + (match_operand:TI 2 "aarch64_ldp_reg_operand" "w"))
> + (set (match_operator:TI 9 "aarch64_mem_pair_operator" [
> + (match_operator 10 "pmode_plus_operator" [
> + (match_dup 0)
> + (match_operand 5 "const_int_operand")
> + ])])
> + (match_operand:TI 3 "aarch64_ldp_reg_operand" "w"))])]
> + "TARGET_FLOAT
> + && aarch64_mem_pair_offset (operands[4], TImode)
> + && known_eq (INTVAL (operands[5]), INTVAL (operands[4]) + 16)
> + && !reg_overlap_mentioned_p (operands[0], operands[2])
> + && !reg_overlap_mentioned_p (operands[0], operands[3])"
> + "stp\\t%q2, %q3, [%0, %4]!"
> + [(set_attr "type" "neon_stp_q")]
> )
>
> -(define_insn "storewb_pair<GPF:mode>_<P:mode>"
> +;; Store pair with post-index writeback.
> +(define_insn "*storewb_post_pair_<ldst_sz>"
> [(parallel
> - [(set (match_operand:P 0 "register_operand" "=&k")
> - (plus:P (match_operand:P 1 "register_operand" "0")
> - (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
> - (set (mem:GPF (plus:P (match_dup 0)
> - (match_dup 4)))
> - (match_operand:GPF 2 "register_operand" "w"))
> - (set (mem:GPF (plus:P (match_dup 0)
> - (match_operand:P 5 "const_int_operand" "n")))
> - (match_operand:GPF 3 "register_operand" "w"))])]
> - "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPF:MODE>mode)"
> - "stp\\t%<GPF:w>2, %<GPF:w>3, [%0, %4]!"
> - [(set_attr "type" "neon_store1_2reg<q>")]
> -)
> -
> -(define_insn "storewb_pair<TX:mode>_<P:mode>"
> + [(set (match_operand 0 "pmode_register_operand")
> + (match_operator 5 "pmode_plus_operator" [
> + (match_operand 1 "pmode_register_operand")
> + (match_operand 4 "const_int_operand")
> + ]))
> + (set (match_operator:GPI 6 "aarch64_mem_pair_operator" [(match_dup 1)])
> + (match_operand 2 "aarch64_stp_reg_operand"))
> + (set (match_operator:GPI 7 "aarch64_mem_pair_operator" [
> + (match_operator 8 "pmode_plus_operator" [
> + (match_dup 0)
> + (const_int <ldst_sz>)
> + ])])
> + (match_operand 3 "aarch64_stp_reg_operand"))])]
> + "aarch64_mem_pair_offset (operands[4], <MODE>mode)
> + && !reg_overlap_mentioned_p (operands[0], operands[2])
> + && !reg_overlap_mentioned_p (operands[0], operands[3])"
> + {@ [cons: =0, 1, 2, 3; attrs: type ]
> + [ rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0], %4
> + [ rk, 0, w, w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0], %4
> + }
> +)
> +
> +;; Store pair with post-index writeback.
> +(define_insn "*storewb_post_pair_16"
> [(parallel
> - [(set (match_operand:P 0 "register_operand" "=&k")
> - (plus:P (match_operand:P 1 "register_operand" "0")
> - (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
> - (set (mem:TX (plus:P (match_dup 0)
> - (match_dup 4)))
> - (match_operand:TX 2 "register_operand" "w"))
> - (set (mem:TX (plus:P (match_dup 0)
> - (match_operand:P 5 "const_int_operand" "n")))
> - (match_operand:TX 3 "register_operand" "w"))])]
> - "TARGET_SIMD
> - && INTVAL (operands[5])
> - == INTVAL (operands[4]) + GET_MODE_SIZE (<TX:MODE>mode)"
> - "stp\\t%q2, %q3, [%0, %4]!"
> + [(set (match_operand 0 "pmode_register_operand" "=rk")
> + (match_operator 5 "pmode_plus_operator" [
> + (match_operand 1 "pmode_register_operand" "0")
> + (match_operand 4 "const_int_operand")
> + ]))
> + (set (match_operator:TI 6 "aarch64_mem_pair_operator" [(match_dup 1)])
> + (match_operand:TI 2 "aarch64_ldp_reg_operand" "w"))
> + (set (match_operator:TI 7 "aarch64_mem_pair_operator" [
> + (match_operator 8 "pmode_plus_operator" [
> + (match_dup 0)
> + (const_int 16)
> + ])])
> + (match_operand:TI 3 "aarch64_ldp_reg_operand" "w"))])]
> + "TARGET_FLOAT
> + && aarch64_mem_pair_offset (operands[4], TImode)
> + && !reg_overlap_mentioned_p (operands[0], operands[2])
> + && !reg_overlap_mentioned_p (operands[0], operands[3])"
> + "stp\t%q2, %q3, [%0], %4"
> [(set_attr "type" "neon_stp_q")]
> )
>
> diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
> index a73724a7fc0..b647e5af7c6 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -257,11 +257,49 @@ (define_predicate "aarch64_mem_pair_offset"
> (and (match_code "const_int")
> (match_test "aarch64_offset_7bit_signed_scaled_p (mode, INTVAL (op))")))
>
> +(define_special_predicate "aarch64_mem_pair_operator"
> + (and
> + (match_code "mem")
> + (match_test "aarch64_ldpstp_operand_mode_p (GET_MODE (op))")
> + (ior
> + (match_test "mode == VOIDmode")
> + (match_test "known_eq (GET_MODE_SIZE (mode),
> + GET_MODE_SIZE (GET_MODE (op)))"))))
> +
> (define_predicate "aarch64_mem_pair_operand"
> (and (match_code "mem")
> (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), false,
> ADDR_QUERY_LDP_STP)")))
>
> +(define_predicate "pmode_plus_operator"
> + (and (match_code "plus")
> + (match_test "GET_MODE (op) == Pmode")))
> +
> +(define_special_predicate "aarch64_ldp_reg_operand"
> + (and
> + (match_code "reg,subreg")
> + (match_test "aarch64_ldpstp_operand_mode_p (GET_MODE (op))")
> + (ior
> + (match_test "mode == VOIDmode")
> + (match_test "known_eq (GET_MODE_SIZE (mode),
> + GET_MODE_SIZE (GET_MODE (op)))"))))
> +
> +(define_special_predicate "aarch64_stp_reg_operand"
> + (ior (match_operand 0 "aarch64_ldp_reg_operand")
> + (and (ior
> + (and (match_code "const_int,const,const_vector")
> + (match_test "op == CONST0_RTX (GET_MODE (op))"))
> + (and (match_code "const_double")
> + (match_test "aarch64_float_const_zero_rtx_p (op)")))
> + (ior
> + (match_test "GET_MODE (op) == VOIDmode")
> + (and
> + (match_test "aarch64_ldpstp_operand_mode_p (GET_MODE (op))")
> + (ior
> + (match_test "mode == VOIDmode")
> + (match_test "known_eq (GET_MODE_SIZE (mode),
> + GET_MODE_SIZE (GET_MODE (op)))")))))))
> +
> ;; Used for storing two 64-bit values in an AdvSIMD register using an STP
> ;; as a 128-bit vec_concat.
> (define_predicate "aarch64_mem_pair_lanes_operand"
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-11-21 15:03 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-16 18:09 [PATCH 08/11] aarch64: Generalize writeback ldp/stp patterns Alex Coplan
2023-11-21 15:03 ` Richard Sandiford
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).