From: Kevin Lee <kevinl@rivosinc.com>
To: gcc-patches@gcc.gnu.org
Cc: gnu-toolchain@rivosinc.com
Subject: [PATCH] RISC-V modified add3 for large stack frame optimization [PR105733]
Date: Mon, 19 Sep 2022 09:07:14 -0700 [thread overview]
Message-ID: <CALkvSf8f0-cAyKR6K4GfW3L+DQ6ghexE=kjR-JkeokUeBQ3mpw@mail.gmail.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 9208 bytes --]
Hello GCC,
Started from Jim Wilson's patch in
https://github.com/riscv-admin/riscv-code-speed-optimization/blob/main/projects/gcc-optimizations.adoc
for the large stack frame optimization problem, this augmented patch
generates less instructions for cases such as
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105733.
Original:
foo:
li t0,-4096
addi t0,t0,2016
li a4,4096
add sp,sp,t0
li a5,-4096
addi a4,a4,-2032
add a4,a4,a5
addi a5,sp,16
add a5,a4,a5
add a0,a5,a0
li t0,4096
sd a5,8(sp)
sb zero,2032(a0)
addi t0,t0,-2016
add sp,sp,t0
jr ra
After Patch:
foo:
li t0,-4096
addi t0,t0,2032
add sp,sp,t0
addi a5,sp,-2032
add a0,a5,a0
li t0,4096
sb zero,2032(a0)
addi t0,t0,-2032
add sp,sp,t0
jr ra
========= Summary of gcc testsuite =========
| # of unexpected case / # of unique unexpected
case
| gcc | g++ | gfortran |
rv64gc/ lp64d/ medlow | 4 / 4 | 13 / 4 | 0 / 0 |
No additional failures were created from the testsuite.
gcc/ChangeLog:
Jim Wilson <jim.wilson.gcc@gmail.com>
Michael Collison <collison@rivosinc.com>
Kevin Lee <kevinl@rivosinc.com>
* config/riscv/predicates.md (const_lui_operand): New predicate.
(add_operand): Ditto.
(reg_or_const_int_operand): Ditto.
* config/riscv/riscv-protos.h (riscv_eliminable_reg): New
function.
* config/riscv/riscv.cc (riscv_eliminable_reg): New Function.
(riscv_adjust_libcall_cfi_prologue): Use gen_rtx_SET and
gen_rtx_fmt_ee instead of gen_add3_insn.
(riscv_adjust_libcall_cfi_epilogue): ditto.
* config/riscv/riscv.md (addsi3): Remove.
(adddi3): ditto.
(add<mode>3): New instruction for large stack frame optimization.
(add<mode>3_internal): ditto
(add<mode>3_internal2): New instruction for insns generated in
the prologue and epilogue pass.
---
gcc/config/riscv/predicates.md | 13 +++++
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv.cc | 20 ++++++--
gcc/config/riscv/riscv.md | 84 ++++++++++++++++++++++++++++-----
4 files changed, 101 insertions(+), 17 deletions(-)
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 862e72b0983..b98bb5a9768 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -35,6 +35,14 @@ (define_predicate "sfb_alu_operand"
(ior (match_operand 0 "arith_operand")
(match_operand 0 "lui_operand")))
+(define_predicate "const_lui_operand"
+ (and (match_code "const_int")
+ (match_test "(INTVAL (op) & 0xFFF) == 0 && INTVAL (op) != 0")))
+
+(define_predicate "add_operand"
+ (ior (match_operand 0 "arith_operand")
+ (match_operand 0 "const_lui_operand")))
+
(define_predicate "const_csr_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 0, 31)")))
@@ -59,6 +67,11 @@ (define_predicate "reg_or_0_operand"
(ior (match_operand 0 "const_0_operand")
(match_operand 0 "register_operand")))
+;; For use in adds, when adding to an eliminable register.
+(define_predicate "reg_or_const_int_operand"
+ (ior (match_code "const_int")
+ (match_operand 0 "register_operand")))
+
;; Only use branch-on-bit sequences when the mask is not an ANDI immediate.
(define_predicate "branch_on_bit_operand"
(and (match_code "const_int")
diff --git a/gcc/config/riscv/riscv-protos.h
b/gcc/config/riscv/riscv-protos.h
index 649c5c977e1..8f0aa8114be 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -63,6 +63,7 @@ extern void riscv_expand_conditional_move (rtx, rtx, rtx,
rtx_code, rtx, rtx);
extern rtx riscv_legitimize_call_address (rtx);
extern void riscv_set_return_address (rtx, rtx);
extern bool riscv_expand_block_move (rtx, rtx, rtx);
+extern bool riscv_eliminable_reg (rtx);
extern rtx riscv_return_addr (int, rtx);
extern poly_int64 riscv_initial_elimination_offset (int, int);
extern void riscv_expand_prologue (void);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 675d92c0961..b5577a4f366 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4320,6 +4320,16 @@ riscv_initial_elimination_offset (int from, int to)
return src - dest;
}
+/* Return true if X is a register that will be eliminated later on. */
+bool
+riscv_eliminable_reg (rtx x)
+{
+ return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
+ || REGNO (x) == ARG_POINTER_REGNUM
+ || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
+ && REGNO (x) <= LAST_VIRTUAL_REGISTER));
+}
+
/* Implement RETURN_ADDR_RTX. We do not support moving back to a
previous frame. */
@@ -4521,8 +4531,9 @@ riscv_adjust_libcall_cfi_prologue ()
}
/* Debug info for adjust sp. */
- adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
- stack_pointer_rtx, GEN_INT (-saved_size));
+ adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+ gen_rtx_fmt_ee (PLUS, GET_MODE (stack_pointer_rtx),
+ stack_pointer_rtx, GEN_INT (saved_size)));
dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
dwarf);
return dwarf;
@@ -4624,8 +4635,9 @@ riscv_adjust_libcall_cfi_epilogue ()
int saved_size = cfun->machine->frame.save_libcall_adjustment;
/* Debug info for adjust sp. */
- adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
- stack_pointer_rtx, GEN_INT (saved_size));
+ adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+ gen_rtx_fmt_ee (PLUS, GET_MODE (stack_pointer_rtx),
+ stack_pointer_rtx, GEN_INT (saved_size)));
dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
dwarf);
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 014206fb8bd..0285ac67b2a 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -438,23 +438,80 @@ (define_insn "add<mode>3"
[(set_attr "type" "fadd")
(set_attr "mode" "<UNITMODE>")])
-(define_insn "addsi3"
- [(set (match_operand:SI 0 "register_operand" "=r,r")
- (plus:SI (match_operand:SI 1 "register_operand" " r,r")
- (match_operand:SI 2 "arith_operand" " r,I")))]
+(define_expand "add<mode>3"
+ [(parallel
+ [(set (match_operand:GPR 0 "register_operand" "")
+ (plus:GPR (match_operand:GPR 1 "register_operand" "")
+ (match_operand:GPR 2 "add_operand" "")))
+ (clobber (match_scratch:GPR 3 ""))])]
""
- "add%i2%~\t%0,%1,%2"
+{
+ if (riscv_eliminable_reg (operands[1]))
+ {
+ if (splittable_const_int_operand (operands[2], <MODE>mode))
+ {
+ /* The idea here is that we emit
+ add op0, op1, %hi(op2)
+ addi op0, op0, %lo(op2)
+ Then when op1, the eliminable reg, gets replaced with sp+offset,
+ we can simplify the constants. */
+ HOST_WIDE_INT high_part = CONST_HIGH_PART (INTVAL (operands[2]));
+ emit_insn (gen_add<mode>3_internal (operands[0], operands[1],
+ GEN_INT (high_part)));
+ operands[1] = operands[0];
+ operands[2] = GEN_INT (INTVAL (operands[2]) - high_part);
+ }
+ else if (! const_arith_operand (operands[2], <MODE>mode))
+ operands[2] = force_reg (<MODE>mode, operands[2]);
+ }
+})
+
+(define_insn_and_split "add<mode>3_internal"
+ [(set (match_operand:GPR 0 "register_operand" "=r,r,&r,!&r")
+ (plus:GPR (match_operand:GPR 1 "register_operand" " %r,r,r,0")
+ (match_operand:GPR 2 "add_operand" " r,I,L,L")))
+ (clobber (match_scratch:GPR 3 "=X,X,X,&r"))]
+ ""
+{
+ if ((which_alternative == 2) || (which_alternative == 3))
+ return "#";
+
+ if (TARGET_64BIT && <MODE>mode == SImode)
+ return "add%i2w\t%0,%1,%2";
+ else
+ return "add%i2\t%0,%1,%2";
+}
+ "&& reload_completed && const_lui_operand (operands[2], <MODE>mode)"
+ [(const_int 0)]
+{
+ if (REGNO (operands[0]) != REGNO (operands[1]))
+ {
+ emit_insn (gen_mov<mode> (operands[0], operands[2]));
+ emit_insn (gen_add<mode>3_internal (operands[0], operands[0],
operands[1]));
+ }
+ else
+ {
+ emit_insn (gen_mov<mode> (operands[3], operands[2]));
+ emit_insn (gen_add<mode>3_internal (operands[0], operands[0],
operands[3]));
+ }
+ DONE;
+}
[(set_attr "type" "arith")
- (set_attr "mode" "SI")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "adddi3"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
- (plus:DI (match_operand:DI 1 "register_operand" " r,r")
- (match_operand:DI 2 "arith_operand" " r,I")))]
- "TARGET_64BIT"
- "add%i2\t%0,%1,%2"
+(define_insn "add<mode>3_internal2"
+ [(set (match_operand:GPR 0 "register_operand" "=r,r")
+ (plus:GPR (match_operand:GPR 1 "register_operand" " %r,r")
+ (match_operand:GPR 2 "arith_operand" " r,I")))]
+ ""
+ {
+ if (TARGET_64BIT && <MODE>mode == SImode)
+ return "add%i2w\t%0,%1,%2";
+ else
+ return "add%i2\t%0,%1,%2";
+ }
[(set_attr "type" "arith")
- (set_attr "mode" "DI")])
+ (set_attr "mode" "<MODE>")])
(define_expand "addv<mode>4"
[(set (match_operand:GPR 0 "register_operand" "=r,r")
@@ -500,6 +557,7 @@ (define_expand "addv<mode>4"
DONE;
})
+
(define_expand "uaddv<mode>4"
[(set (match_operand:GPR 0 "register_operand" "=r,r")
(plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
--
2.25.1
next reply other threads:[~2022-09-19 16:07 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-09-19 16:07 Kevin Lee [this message]
2022-09-19 22:16 ` Kito Cheng
2022-09-21 0:04 ` Kevin Lee
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CALkvSf8f0-cAyKR6K4GfW3L+DQ6ghexE=kjR-JkeokUeBQ3mpw@mail.gmail.com' \
--to=kevinl@rivosinc.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=gnu-toolchain@rivosinc.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).