diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index c6b8f71..06a67b5 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -202,6 +202,7 @@ extern void thumb_reload_in_hi (rtx *); extern void thumb_set_return_address (rtx, rtx); extern const char *thumb1_output_casesi (rtx *); extern const char *thumb2_output_casesi (rtx *); +extern bool bad_reg_pair_for_thumb_ldrd_strd (rtx, rtx); #endif /* Defined in pe.c. */ diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index d709375..3eba510 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -15410,6 +15410,155 @@ arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg) par = emit_insn (par); add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf); } +bool +bad_reg_pair_for_thumb_ldrd_strd (rtx src1, rtx src2) +{ + return (GET_CODE (src1) != REG + || GET_CODE (src2) != REG + || (REGNO (src1) == PC_REGNUM) + || (REGNO (src1) == SP_REGNUM) + || (REGNO (src1) == REGNO (src2)) + || (REGNO (src2) == PC_REGNUM) + || (REGNO (src2) == SP_REGNUM)); +} + +/* Generate and emit a pattern that will be recognized as LDRD pattern. If even + number of registers are being popped, multiple LDRD patterns are created for + all register pairs. If odd number of registers are popped, last register is + loaded by using LDR pattern. */ +static bool +thumb2_emit_ldrd_pop (unsigned long saved_regs_mask, bool really_return) +{ + int num_regs = 0; + int i, j; + rtx par = NULL_RTX; + rtx dwarf = NULL_RTX; + rtx tmp, reg, tmp1; + + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + num_regs++; + + gcc_assert (num_regs && num_regs <= 16); + gcc_assert (really_return || ((saved_regs_mask & (1 << PC_REGNUM)) == 0)); + + if (really_return && (saved_regs_mask & (1 << PC_REGNUM))) + /* We cannot generate ldrd for PC. Hence, reduce the count if PC is + to be popped. So, if num_regs is even, now it will become odd, + and we can generate pop with PC. If num_regs is odd, it will be + even now, and ldr with return can be generated for PC. */ + num_regs--; + + for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++) + /* Var j iterates over all the registers to gather all the registers in + saved_regs_mask. Var i gives index of saved registers in stack frame. + A PARALLEL RTX of register-pair is created here, so that pattern for + LDRD can be matched. As PC is always last register to be popped, and + we have already decremented num_regs if PC, we don't have to worry + about PC in this loop. */ + if (saved_regs_mask & (1 << j)) + { + gcc_assert (j != SP_REGNUM); + + /* Create RTX for memory load. New RTX is created for dwarf as + they are not sharable. */ + reg = gen_rtx_REG (SImode, j); + tmp = gen_rtx_SET (SImode, + reg, + gen_frame_mem (SImode, + plus_constant (stack_pointer_rtx, 4 * i))); + + tmp1 = gen_rtx_SET (SImode, + reg, + gen_frame_mem (SImode, + plus_constant (stack_pointer_rtx, 4 * i))); + RTX_FRAME_RELATED_P (tmp) = 1; + RTX_FRAME_RELATED_P (tmp1) = 1; + + if (i % 2 == 0) + { + /* When saved-register index (i) is even, the RTX to be emitted is + yet to be created. Hence create it first. The LDRD pattern we + are generating is : + [ (SET (reg_t0) (MEM (PLUS (SP) (NUM)))) + (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ] + where target registers need not be consecutive. */ + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + + /* We need to maintain a sequence for DWARF info too. */ + dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2)); + } + + /* ith register is added in PARALLEL RTX. If i is even, the reg_i is + added as 0th element and if i is odd, reg_i is added as 1st element + of LDRD pattern shown above. */ + XVECEXP (par, 0, (i % 2)) = tmp; + XVECEXP (dwarf, 0, (i % 2)) = tmp1; + + if ((i % 2) == 1) + { + /* When saved-register index (i) is odd, RTXs for both the registers + to be loaded are generated in above given LDRD pattern, and the + pattern can be emitted now. */ + par = emit_insn (par); + add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf); + } + + i++; + } + + /* If the number of registers pushed is odd AND really_return is false OR + number of registers are even AND really_return is true, last register is + popped using LDR. It can be PC as well. Hence, adjust the stack first and + then LDR with post increment. */ + + /* Increment the stack pointer, based on there being + num_regs 4-byte registers to restore. */ + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, 4 * i)); + RTX_FRAME_RELATED_P (tmp) = 1; + emit_insn (tmp); + + if (((num_regs % 2) == 1 && !really_return) + || ((num_regs % 2) == 0 && really_return)) + { + /* Gen LDR with post increment here. */ + for (; (saved_regs_mask & (1 << j)) == 0; j++); + + tmp1 = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (tmp1, get_frame_alias_set ()); + + reg = gen_rtx_REG (SImode, j); + tmp = gen_rtx_SET (SImode, reg, tmp1); + RTX_FRAME_RELATED_P (tmp) = 1; + + if (really_return) + { + /* If really_return, j must be PC_REGNUM. */ + gcc_assert (j == PC_REGNUM); + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + XVECEXP (par, 0, 0) = ret_rtx; + XVECEXP (par, 0, 1) = tmp; + emit_jump_insn (par); + } + else + { + emit_insn (tmp); + } + } + else if ((num_regs % 2) == 1 && really_return) + { + /* There are 2 registers to be popped. So, generate the pattern + pop_multiple_with_stack_update_and_return to pop in PC. */ + arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)), + really_return); + } + + return really_return; +} /* Calculate the size of the return value that is passed in registers. */ static unsigned @@ -22236,7 +22385,13 @@ thumb2_expand_epilogue (bool is_sibling) really_return = true; } - arm_emit_multi_reg_pop (saved_regs_mask, really_return); + if (!current_tune->prefer_ldrd_strd || optimize_size) + arm_emit_multi_reg_pop (saved_regs_mask, really_return); + else + /* Generate LDRD pattern instead of POP pattern. */ + really_return = thumb2_emit_ldrd_pop (saved_regs_mask, + really_return); + if (really_return == true) return; } diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index d8ce982..3c55699 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -207,6 +207,12 @@ (and (match_code "const_int") (match_test "TARGET_THUMB2 && ival >= 0 && ival <= 255"))) +(define_constraint "Pz" + "@internal In Thumb-2 state a constant in the range -1020 to 1020" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1020 + && ival % 4 == 0"))) + (define_constraint "G" "In ARM/Thumb-2 state a valid FPA immediate constant." (and (match_code "const_double") diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md index 5db4a32..21d2815 100644 --- a/gcc/config/arm/ldmstm.md +++ b/gcc/config/arm/ldmstm.md @@ -21,6 +21,32 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ +(define_insn "*thumb2_ldrd_base" + [(set (match_operand:SI 0 "register_operand" "=r") + (mem:SI (match_operand:SI 1 "s_register_operand" "rk"))) + (set (match_operand:SI 2 "register_operand" "=r") + (mem:SI (plus:SI (match_dup 1) + (const_int 4))))] + "(TARGET_THUMB2 && current_tune->prefer_ldrd_strd + && (!bad_reg_pair_for_thumb_ldrd_strd (operands[0], operands[2])))" + "ldrd%?\t%0, %2, [%1]" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + +(define_insn "*thumb2_ldrd" + [(set (match_operand:SI 0 "register_operand" "=r") + (mem:SI (plus:SI (match_operand:SI 1 "s_register_operand" "rk") + (match_operand:SI 2 "ldrd_immediate_operand" "Pz")))) + (set (match_operand:SI 3 "register_operand" "=r") + (mem:SI (plus:SI (match_dup 1) + (match_operand:SI 4 "const_int_operand" ""))))] + "(TARGET_THUMB2 && current_tune->prefer_ldrd_strd + && ((INTVAL (operands[2]) + 4) == INTVAL (operands[4])) + && (!bad_reg_pair_for_thumb_ldrd_strd (operands[0], operands[3])))" + "ldrd%?\t%0, %3, [%1, %2]" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + (define_insn "*ldm4_ia" [(match_parallel 0 "load_multiple_operation" [(set (match_operand:SI 1 "arm_hard_register_operand" "") diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index 79e65fe..e074425 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -203,6 +203,10 @@ (match_test "(GET_CODE (op) != CONST_INT || (INTVAL (op) < 4096 && INTVAL (op) > -4096))")))) +(define_predicate "ldrd_immediate_operand" + (and (match_operand 0 "const_int_operand") + (match_test "(INTVAL (op) < 1020 && INTVAL (op) > -1020)"))) + ;; True for operators that can be combined with a shift in ARM state. (define_special_predicate "shiftable_operator" (and (match_code "plus,minus,ior,xor,and")