diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 06a67b5..d5287ad 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -162,6 +162,7 @@ extern const char *arm_output_memory_barrier (rtx *); extern const char *arm_output_sync_insn (rtx, rtx *); extern unsigned int arm_sync_loop_insns (rtx , rtx *); extern int arm_attr_length_push_multi(rtx, rtx); +extern bool bad_reg_pair_for_arm_ldrd_strd (rtx, rtx); #if defined TREE_CODE extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index fd8c31d..08fa0d5 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -93,6 +93,7 @@ static bool arm_assemble_integer (rtx, unsigned int, int); static void arm_print_operand (FILE *, rtx, int); static void arm_print_operand_address (FILE *, rtx); static bool arm_print_operand_punct_valid_p (unsigned char code); +static rtx emit_multi_reg_push (unsigned long); static const char *fp_const_from_val (REAL_VALUE_TYPE *); static arm_cc get_arm_condition_code (rtx); static HOST_WIDE_INT int_log2 (HOST_WIDE_INT); @@ -15095,6 +15096,116 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, } } +/* STRD in ARM mode needs consecutive registers to be stored. This function + keeps accumulating non-consecutive registers until first consecutive register + pair is found. It then generates multi-reg PUSH for all accumulated + registers, and then generates STRD with write-back for consecutive register + pair. This process is repeated until all the registers are stored on stack. + multi-reg PUSH takes care of lone registers as well. */ +static void +arm_emit_strd_push (unsigned long saved_regs_mask) +{ + int num_regs = 0; + int i, j; + rtx par = NULL_RTX; + rtx dwarf = NULL_RTX; + rtx insn = NULL_RTX; + rtx tmp, tmp1; + unsigned long regs_to_be_pushed_mask; + + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + num_regs++; + + gcc_assert (num_regs && num_regs <= 16); + + for (i=0, j = LAST_ARM_REGNUM, regs_to_be_pushed_mask = 0; i < num_regs; j--) + /* Var j iterates over all registers to gather all registers in + saved_regs_mask. Var i is used to count number of registers stored on + stack. regs_to_be_pushed_mask accumulates non-consecutive registers + that can be pushed using multi-reg PUSH before STRD is generated. */ + if (saved_regs_mask & (1 << j)) + { + gcc_assert (j != SP_REGNUM); + gcc_assert (j != PC_REGNUM); + i++; + + if ((j % 2 == 1) + && (saved_regs_mask & (1 << (j - 1))) + && regs_to_be_pushed_mask) + { + /* Current register and previous register form register pair for + which STRD can be generated. Hence, emit PUSH for accumulated + registers and reset regs_to_be_pushed_mask. */ + insn = emit_multi_reg_push (regs_to_be_pushed_mask); + regs_to_be_pushed_mask = 0; + RTX_FRAME_RELATED_P (insn) = 1; + continue; + } + + regs_to_be_pushed_mask |= (1 << j); + + if ((j % 2) == 0 && (saved_regs_mask & (1 << (j + 1)))) + { + /* We have found 2 consecutive registers, for which STRD can be + generated. Generate pattern to emit STRD as accumulated + registers have already been pushed. */ + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3)); + dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3)); + + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, -8)); + tmp1 = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, -8)); + RTX_FRAME_RELATED_P (tmp) = 1; + RTX_FRAME_RELATED_P (tmp1) = 1; + XVECEXP (par, 0, 0) = tmp; + XVECEXP (dwarf, 0, 0) = tmp1; + + tmp = gen_rtx_SET (SImode, + gen_frame_mem (SImode, stack_pointer_rtx), + gen_rtx_REG (SImode, j)); + tmp1 = gen_rtx_SET (SImode, + gen_frame_mem (SImode, stack_pointer_rtx), + gen_rtx_REG (SImode, j)); + RTX_FRAME_RELATED_P (tmp) = 1; + RTX_FRAME_RELATED_P (tmp1) = 1; + XVECEXP (par, 0, 1) = tmp; + XVECEXP (dwarf, 0, 1) = tmp1; + + tmp = gen_rtx_SET (SImode, + gen_frame_mem (SImode, + plus_constant (stack_pointer_rtx, 4)), + gen_rtx_REG (SImode, j + 1)); + tmp1 = gen_rtx_SET (SImode, + gen_frame_mem (SImode, + plus_constant (stack_pointer_rtx, 4)), + gen_rtx_REG (SImode, j + 1)); + RTX_FRAME_RELATED_P (tmp) = 1; + RTX_FRAME_RELATED_P (tmp1) = 1; + XVECEXP (par, 0, 2) = tmp; + XVECEXP (dwarf, 0, 2) = tmp1; + + insn = emit_insn (par); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + RTX_FRAME_RELATED_P (insn) = 1; + regs_to_be_pushed_mask = 0; + } + } + + /* Check if any accumulated registers are yet to be pushed, and generate + multi-reg PUSH for them. */ + if (regs_to_be_pushed_mask) + { + insn = emit_multi_reg_push (regs_to_be_pushed_mask); + RTX_FRAME_RELATED_P (insn) = 1; + } + + return; +} + /* Generate and emit a pattern that will be recognized as STRD pattern. If even number of registers are being pushed, multiple STRD patterns are created for all register pairs. If odd number of registers are pushed, first register is @@ -15529,6 +15640,18 @@ arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg) par = emit_insn (par); add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf); } + +bool +bad_reg_pair_for_arm_ldrd_strd (rtx src1, rtx src2) +{ + return (GET_CODE (src1) != REG + || GET_CODE (src2) != REG + || ((REGNO (src1) + 1) != REGNO (src2)) + || ((REGNO (src1) % 2) != 0) + || (REGNO (src2) == PC_REGNUM) + || (REGNO (src2) == SP_REGNUM)); +} + bool bad_reg_pair_for_thumb_ldrd_strd (rtx src1, rtx src2) { @@ -15958,7 +16081,8 @@ arm_get_frame_offsets (void) use 32-bit push/pop instructions. */ if (! any_sibcall_uses_r3 () && arm_size_return_regs () <= 12 - && (offsets->saved_regs_mask & (1 << 3)) == 0) + && (offsets->saved_regs_mask & (1 << 3)) == 0 + && (TARGET_THUMB2 || !current_tune->prefer_ldrd_strd)) { reg = 3; } @@ -16427,9 +16551,12 @@ arm_expand_prologue (void) } } - if (TARGET_THUMB2 && current_tune->prefer_ldrd_strd && !optimize_size) + if (current_tune->prefer_ldrd_strd && !optimize_size) { - thumb2_emit_strd_push (live_regs_mask); + if (TARGET_THUMB2) + thumb2_emit_strd_push (live_regs_mask); + else + arm_emit_strd_push (live_regs_mask); } else { diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md index e3dcd4f..3c729bb 100644 --- a/gcc/config/arm/ldmstm.md +++ b/gcc/config/arm/ldmstm.md @@ -73,6 +73,42 @@ [(set_attr "type" "store2") (set_attr "predicable" "yes")]) +(define_insn "*arm_strd_base" + [(set (match_operand:SI 0 "arm_hard_register_operand" "+rk") + (plus:SI (match_dup 0) + (const_int -8))) + (set (mem:SI (match_dup 0)) + (match_operand:SI 1 "arm_hard_register_operand" "r")) + (set (mem:SI (plus:SI (match_dup 0) + (const_int 4))) + (match_operand:SI 2 "arm_hard_register_operand" "r"))] + "(TARGET_ARM && current_tune->prefer_ldrd_strd + && (!bad_reg_pair_for_arm_ldrd_strd (operands[1], operands[2])) + && (REGNO (operands[1]) != REGNO (operands[0])) + && (REGNO (operands[2]) != REGNO (operands[0])))" + "str%(d%)\t%1, %2, [%0, #-8]!" + [(set_attr "type" "store2") + (set_attr "predicable" "yes")]) + +(define_peephole2 + [(parallel + [(set (match_operand:SI 0 "arm_hard_register_operand" "") + (plus:SI (match_dup 0) + (const_int -8))) + (set (mem:SI (match_dup 0)) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 0) + (const_int 4))) + (match_operand:SI 2 "arm_hard_register_operand" ""))])] + "(TARGET_ARM && current_tune->prefer_ldrd_strd + && (!bad_reg_pair_for_arm_ldrd_strd (operands[1], operands[2])) + && (REGNO (operands[1]) != REGNO (operands[0])) + && (REGNO (operands[2]) != REGNO (operands[0])))" + [(set (mem:DI (pre_dec:SI (match_dup 0))) + (match_dup 1))] + "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));" +) + (define_insn "*ldm4_ia" [(match_parallel 0 "load_multiple_operation" [(set (match_operand:SI 1 "arm_hard_register_operand" "")