diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 64d5993..49aae52 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -201,6 +201,7 @@ extern void thumb_reload_in_hi (rtx *); extern void thumb_set_return_address (rtx, rtx); extern const char *thumb1_output_casesi (rtx *); extern const char *thumb2_output_casesi (rtx *); +extern bool bad_reg_pair_for_thumb_ldrd_strd (rtx, rtx); #endif /* Defined in pe.c. */ diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index d671281..6d008c5 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -15847,6 +15847,154 @@ arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg) REG_NOTES (par) = dwarf; } +bool +bad_reg_pair_for_thumb_ldrd_strd (rtx src1, rtx src2) +{ + return (GET_CODE (src1) != REG + || GET_CODE (src2) != REG + || (REGNO (src1) == PC_REGNUM) + || (REGNO (src1) == SP_REGNUM) + || (REGNO (src1) == REGNO (src2)) + || (REGNO (src2) == PC_REGNUM) + || (REGNO (src2) == SP_REGNUM)); +} + +/* Generate and emit a pattern that will be recognized as LDRD pattern. If even + number of registers are being popped, multiple LDRD patterns are created for + all register pairs. If odd number of registers are popped, last register is + loaded by using LDR pattern. */ +static void +thumb2_emit_ldrd_pop (unsigned long saved_regs_mask, bool really_return) +{ + int num_regs = 0; + int i, j; + rtx par = NULL_RTX; + rtx dwarf = NULL_RTX; + rtx tmp, reg, tmp1; + + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + num_regs++; + + gcc_assert (num_regs && num_regs <= 16); + gcc_assert (really_return || ((saved_regs_mask & (1 << PC_REGNUM)) == 0)); + + /* We cannot generate ldrd for PC. Hence, reduce the count if PC is + to be popped. So, if num_regs is even, now it will become odd, + and we can generate pop with PC. If num_regs is odd, it will be + even now, and ldr with return can be generated for PC. */ + if (really_return && (saved_regs_mask & (1 << PC_REGNUM))) + num_regs--; + + /* Var j iterates over all the registers to gather all the registers in + saved_regs_mask. Var i gives index of saved registers in stack frame. + A PARALLEL RTX of register-pair is created here, so that pattern for + LDRD can be matched. As PC is always last register to be popped, and + we have already decremented num_regs if PC, we don't have to worry + about PC in this loop. */ + for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++) + if (saved_regs_mask & (1 << j)) + { + gcc_assert (j != SP_REGNUM); + + /* Create RTX for memory load. */ + reg = gen_rtx_REG (SImode, j); + tmp = gen_rtx_SET (SImode, + reg, + gen_frame_mem (SImode, + plus_constant (stack_pointer_rtx, 4 * i))); + RTX_FRAME_RELATED_P (tmp) = 1; + + if (i % 2 == 0) + { + /* When saved-register index (i) is even, the RTX to be emitted is + yet to be created. Hence create it first. The LDRD pattern we + are generating is : + [ (SET (reg_t0) (MEM (PLUS (SP) (NUM)))) + (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ] + where target registers need not be consecutive. */ + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + dwarf = NULL_RTX; + } + + /* ith register is added in PARALLEL RTX. If i is even, the reg_i is + added as 0th element and if i is odd, reg_i is added as 1st element + of LDRD pattern shown above. */ + XVECEXP (par, 0, (i % 2)) = tmp; + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + + if ((i % 2) == 1) + { + /* When saved-register index (i) is odd, RTXs for both the registers + to be loaded are generated in above given LDRD pattern, and the + pattern can be emitted now. */ + par = emit_insn (par); + REG_NOTES (par) = dwarf; + } + + i++; + } + + /* If the number of registers pushed is odd AND really_return is false OR + number of registers are even AND really_return is true, last register is + popped using LDR. It can be PC as well. Hence, adjust the stack first and + then LDR with post increment. */ + + /* Increment the stack pointer, based on there being + num_regs 4-byte registers to restore. */ + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, 4 * i)); + RTX_FRAME_RELATED_P (tmp) = 1; + emit_insn (tmp); + + dwarf = NULL_RTX; + + if (((num_regs % 2) == 1 && !really_return) + || ((num_regs % 2) == 0 && really_return)) + { + /* Scan for the single register to be popped. Skip until the saved + register is found. */ + for (; (saved_regs_mask & (1 << j)) == 0; j++); + + /* Gen LDR with post increment here. */ + tmp1 = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (tmp1, get_frame_alias_set ()); + + reg = gen_rtx_REG (SImode, j); + tmp = gen_rtx_SET (SImode, reg, tmp1); + RTX_FRAME_RELATED_P (tmp) = 1; + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + + if (really_return) + { + /* If really_return, j must be PC_REGNUM. */ + gcc_assert (j == PC_REGNUM); + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + XVECEXP (par, 0, 0) = ret_rtx; + XVECEXP (par, 0, 1) = tmp; + par = emit_jump_insn (par); + } + else + { + par = emit_insn (tmp); + } + + REG_NOTES (par) = dwarf; + } + else if ((num_regs % 2) == 1 && really_return) + { + /* There are 2 registers to be popped. So, generate the pattern + pop_multiple_with_stack_update_and_return to pop in PC. */ + arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)), + really_return); + } + + return; +} + /* Calculate the size of the return value that is passed in registers. */ static unsigned arm_size_return_regs (void) @@ -22615,7 +22763,13 @@ arm_expand_epilogue (bool really_return) } else { - arm_emit_multi_reg_pop (saved_regs_mask, return_in_pc); + if (!current_tune->prefer_ldrd_strd + || optimize_function_for_size_p (cfun) + || TARGET_ARM) + arm_emit_multi_reg_pop (saved_regs_mask, return_in_pc); + else + /* Generate LDRD pattern instead of POP pattern. */ + thumb2_emit_ldrd_pop (saved_regs_mask, return_in_pc); } if (return_in_pc == true) diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index 7d0269a..e33eff2 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -207,6 +207,12 @@ (and (match_code "const_int") (match_test "TARGET_THUMB2 && ival >= 0 && ival <= 255"))) +(define_constraint "Pz" + "@internal In Thumb-2 state a constant in the range -1020 to 1020" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1020 + && ival % 4 == 0"))) + (define_constraint "G" "In ARM/Thumb-2 state a valid FPA immediate constant." (and (match_code "const_double") diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md index 5db4a32..21d2815 100644 --- a/gcc/config/arm/ldmstm.md +++ b/gcc/config/arm/ldmstm.md @@ -21,6 +21,32 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ +(define_insn "*thumb2_ldrd_base" + [(set (match_operand:SI 0 "register_operand" "=r") + (mem:SI (match_operand:SI 1 "s_register_operand" "rk"))) + (set (match_operand:SI 2 "register_operand" "=r") + (mem:SI (plus:SI (match_dup 1) + (const_int 4))))] + "(TARGET_THUMB2 && current_tune->prefer_ldrd_strd + && (!bad_reg_pair_for_thumb_ldrd_strd (operands[0], operands[2])))" + "ldrd%?\t%0, %2, [%1]" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + +(define_insn "*thumb2_ldrd" + [(set (match_operand:SI 0 "register_operand" "=r") + (mem:SI (plus:SI (match_operand:SI 1 "s_register_operand" "rk") + (match_operand:SI 2 "ldrd_immediate_operand" "Pz")))) + (set (match_operand:SI 3 "register_operand" "=r") + (mem:SI (plus:SI (match_dup 1) + (match_operand:SI 4 "const_int_operand" ""))))] + "(TARGET_THUMB2 && current_tune->prefer_ldrd_strd + && ((INTVAL (operands[2]) + 4) == INTVAL (operands[4])) + && (!bad_reg_pair_for_thumb_ldrd_strd (operands[0], operands[3])))" + "ldrd%?\t%0, %3, [%1, %2]" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + (define_insn "*ldm4_ia" [(match_parallel 0 "load_multiple_operation" [(set (match_operand:SI 1 "arm_hard_register_operand" "") diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index 85a112e..881c0b0 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -209,6 +209,10 @@ (match_test "(GET_CODE (op) != CONST_INT || (INTVAL (op) < 4096 && INTVAL (op) > -4096))")))) +(define_predicate "ldrd_immediate_operand" + (and (match_operand 0 "const_int_operand") + (match_test "(INTVAL (op) < 1020 && INTVAL (op) > -1020)"))) + ;; True for operators that can be combined with a shift in ARM state. (define_special_predicate "shiftable_operator" (and (match_code "plus,minus,ior,xor,and")