Could you provide some data including code size and performance? add is frequently used patten, so we should more careful when changing that. Kevin Lee 於 2022年9月19日 週一,18:07寫道: > Hello GCC, > Started from Jim Wilson's patch in > > https://github.com/riscv-admin/riscv-code-speed-optimization/blob/main/projects/gcc-optimizations.adoc > for the large stack frame optimization problem, this augmented patch > generates less instructions for cases such as > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105733. > Original: > foo: > li t0,-4096 > addi t0,t0,2016 > li a4,4096 > add sp,sp,t0 > li a5,-4096 > addi a4,a4,-2032 > add a4,a4,a5 > addi a5,sp,16 > add a5,a4,a5 > add a0,a5,a0 > li t0,4096 > sd a5,8(sp) > sb zero,2032(a0) > addi t0,t0,-2016 > add sp,sp,t0 > jr ra > After Patch: > foo: > li t0,-4096 > addi t0,t0,2032 > add sp,sp,t0 > addi a5,sp,-2032 > add a0,a5,a0 > li t0,4096 > sb zero,2032(a0) > addi t0,t0,-2032 > add sp,sp,t0 > jr ra > > ========= Summary of gcc testsuite ========= > | # of unexpected case / # of unique unexpected > case > | gcc | g++ | gfortran | > rv64gc/ lp64d/ medlow | 4 / 4 | 13 / 4 | 0 / 0 | > No additional failures were created from the testsuite. > > gcc/ChangeLog: > Jim Wilson > Michael Collison > Kevin Lee > > * config/riscv/predicates.md (const_lui_operand): New predicate. > (add_operand): Ditto. > (reg_or_const_int_operand): Ditto. > * config/riscv/riscv-protos.h (riscv_eliminable_reg): New > function. > * config/riscv/riscv.cc (riscv_eliminable_reg): New Function. > (riscv_adjust_libcall_cfi_prologue): Use gen_rtx_SET and > gen_rtx_fmt_ee instead of gen_add3_insn. > (riscv_adjust_libcall_cfi_epilogue): ditto. > * config/riscv/riscv.md (addsi3): Remove. > (adddi3): ditto. > (add3): New instruction for large stack frame optimization. > (add3_internal): ditto > (add3_internal2): New instruction for insns generated in > the prologue and epilogue pass. > --- > gcc/config/riscv/predicates.md | 13 +++++ > gcc/config/riscv/riscv-protos.h | 1 + > gcc/config/riscv/riscv.cc | 20 ++++++-- > gcc/config/riscv/riscv.md | 84 ++++++++++++++++++++++++++++----- > 4 files changed, 101 insertions(+), 17 deletions(-) > > diff --git a/gcc/config/riscv/predicates.md > b/gcc/config/riscv/predicates.md > index 862e72b0983..b98bb5a9768 100644 > --- a/gcc/config/riscv/predicates.md > +++ b/gcc/config/riscv/predicates.md > @@ -35,6 +35,14 @@ (define_predicate "sfb_alu_operand" > (ior (match_operand 0 "arith_operand") > (match_operand 0 "lui_operand"))) > > +(define_predicate "const_lui_operand" > + (and (match_code "const_int") > + (match_test "(INTVAL (op) & 0xFFF) == 0 && INTVAL (op) != 0"))) > + > +(define_predicate "add_operand" > + (ior (match_operand 0 "arith_operand") > + (match_operand 0 "const_lui_operand"))) > + > (define_predicate "const_csr_operand" > (and (match_code "const_int") > (match_test "IN_RANGE (INTVAL (op), 0, 31)"))) > @@ -59,6 +67,11 @@ (define_predicate "reg_or_0_operand" > (ior (match_operand 0 "const_0_operand") > (match_operand 0 "register_operand"))) > > +;; For use in adds, when adding to an eliminable register. > +(define_predicate "reg_or_const_int_operand" > + (ior (match_code "const_int") > + (match_operand 0 "register_operand"))) > + > ;; Only use branch-on-bit sequences when the mask is not an ANDI > immediate. > (define_predicate "branch_on_bit_operand" > (and (match_code "const_int") > diff --git a/gcc/config/riscv/riscv-protos.h > b/gcc/config/riscv/riscv-protos.h > index 649c5c977e1..8f0aa8114be 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -63,6 +63,7 @@ extern void riscv_expand_conditional_move (rtx, rtx, rtx, > rtx_code, rtx, rtx); > extern rtx riscv_legitimize_call_address (rtx); > extern void riscv_set_return_address (rtx, rtx); > extern bool riscv_expand_block_move (rtx, rtx, rtx); > +extern bool riscv_eliminable_reg (rtx); > extern rtx riscv_return_addr (int, rtx); > extern poly_int64 riscv_initial_elimination_offset (int, int); > extern void riscv_expand_prologue (void); > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc > index 675d92c0961..b5577a4f366 100644 > --- a/gcc/config/riscv/riscv.cc > +++ b/gcc/config/riscv/riscv.cc > @@ -4320,6 +4320,16 @@ riscv_initial_elimination_offset (int from, int to) > return src - dest; > } > > +/* Return true if X is a register that will be eliminated later on. */ > +bool > +riscv_eliminable_reg (rtx x) > +{ > + return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM > + || REGNO (x) == ARG_POINTER_REGNUM > + || (REGNO (x) >= FIRST_VIRTUAL_REGISTER > + && REGNO (x) <= LAST_VIRTUAL_REGISTER)); > +} > + > /* Implement RETURN_ADDR_RTX. We do not support moving back to a > previous frame. */ > > @@ -4521,8 +4531,9 @@ riscv_adjust_libcall_cfi_prologue () > } > > /* Debug info for adjust sp. */ > - adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx, > - stack_pointer_rtx, GEN_INT (-saved_size)); > + adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx, > + gen_rtx_fmt_ee (PLUS, GET_MODE (stack_pointer_rtx), > + stack_pointer_rtx, GEN_INT (saved_size))); > dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, > dwarf); > return dwarf; > @@ -4624,8 +4635,9 @@ riscv_adjust_libcall_cfi_epilogue () > int saved_size = cfun->machine->frame.save_libcall_adjustment; > > /* Debug info for adjust sp. */ > - adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx, > - stack_pointer_rtx, GEN_INT (saved_size)); > + adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx, > + gen_rtx_fmt_ee (PLUS, GET_MODE (stack_pointer_rtx), > + stack_pointer_rtx, GEN_INT (saved_size))); > dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, > dwarf); > > diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md > index 014206fb8bd..0285ac67b2a 100644 > --- a/gcc/config/riscv/riscv.md > +++ b/gcc/config/riscv/riscv.md > @@ -438,23 +438,80 @@ (define_insn "add3" > [(set_attr "type" "fadd") > (set_attr "mode" "")]) > > -(define_insn "addsi3" > - [(set (match_operand:SI 0 "register_operand" "=r,r") > - (plus:SI (match_operand:SI 1 "register_operand" " r,r") > - (match_operand:SI 2 "arith_operand" " r,I")))] > +(define_expand "add3" > + [(parallel > + [(set (match_operand:GPR 0 "register_operand" "") > + (plus:GPR (match_operand:GPR 1 "register_operand" "") > + (match_operand:GPR 2 "add_operand" ""))) > + (clobber (match_scratch:GPR 3 ""))])] > "" > - "add%i2%~\t%0,%1,%2" > +{ > + if (riscv_eliminable_reg (operands[1])) > + { > + if (splittable_const_int_operand (operands[2], mode)) > + { > + /* The idea here is that we emit > + add op0, op1, %hi(op2) > + addi op0, op0, %lo(op2) > + Then when op1, the eliminable reg, gets replaced with sp+offset, > + we can simplify the constants. */ > + HOST_WIDE_INT high_part = CONST_HIGH_PART (INTVAL (operands[2])); > + emit_insn (gen_add3_internal (operands[0], operands[1], > + GEN_INT (high_part))); > + operands[1] = operands[0]; > + operands[2] = GEN_INT (INTVAL (operands[2]) - high_part); > + } > + else if (! const_arith_operand (operands[2], mode)) > + operands[2] = force_reg (mode, operands[2]); > + } > +}) > + > +(define_insn_and_split "add3_internal" > + [(set (match_operand:GPR 0 "register_operand" "=r,r,&r,!&r") > + (plus:GPR (match_operand:GPR 1 "register_operand" " %r,r,r,0") > + (match_operand:GPR 2 "add_operand" " r,I,L,L"))) > + (clobber (match_scratch:GPR 3 "=X,X,X,&r"))] > + "" > +{ > + if ((which_alternative == 2) || (which_alternative == 3)) > + return "#"; > + > + if (TARGET_64BIT && mode == SImode) > + return "add%i2w\t%0,%1,%2"; > + else > + return "add%i2\t%0,%1,%2"; > +} > + "&& reload_completed && const_lui_operand (operands[2], mode)" > + [(const_int 0)] > +{ > + if (REGNO (operands[0]) != REGNO (operands[1])) > + { > + emit_insn (gen_mov (operands[0], operands[2])); > + emit_insn (gen_add3_internal (operands[0], operands[0], > operands[1])); > + } > + else > + { > + emit_insn (gen_mov (operands[3], operands[2])); > + emit_insn (gen_add3_internal (operands[0], operands[0], > operands[3])); > + } > + DONE; > +} > [(set_attr "type" "arith") > - (set_attr "mode" "SI")]) > + (set_attr "mode" "")]) > > -(define_insn "adddi3" > - [(set (match_operand:DI 0 "register_operand" "=r,r") > - (plus:DI (match_operand:DI 1 "register_operand" " r,r") > - (match_operand:DI 2 "arith_operand" " r,I")))] > - "TARGET_64BIT" > - "add%i2\t%0,%1,%2" > +(define_insn "add3_internal2" > + [(set (match_operand:GPR 0 "register_operand" "=r,r") > + (plus:GPR (match_operand:GPR 1 "register_operand" " %r,r") > + (match_operand:GPR 2 "arith_operand" " r,I")))] > + "" > + { > + if (TARGET_64BIT && mode == SImode) > + return "add%i2w\t%0,%1,%2"; > + else > + return "add%i2\t%0,%1,%2"; > + } > [(set_attr "type" "arith") > - (set_attr "mode" "DI")]) > + (set_attr "mode" "")]) > > (define_expand "addv4" > [(set (match_operand:GPR 0 "register_operand" "=r,r") > @@ -500,6 +557,7 @@ (define_expand "addv4" > DONE; > }) > > + > (define_expand "uaddv4" > [(set (match_operand:GPR 0 "register_operand" "=r,r") > (plus:GPR (match_operand:GPR 1 "register_operand" " r,r") > -- > 2.25.1 >