diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 986262b..434c154 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -13346,85 +13346,167 @@ aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri, return; } -/* Given OPERANDS of consecutive load/store, check if we can merge - them into ldp/stp. LOAD is true if they are load instructions. - MODE is the mode of memory operands. */ +/* Extract in to REG and MEM operands to an LDP/STP operation from + OPERANDS. The count of operands to extract is OPS, and whether + we are looking at a load or a store is given by LOAD. */ -bool -aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, - enum machine_mode mode) +static void +aarch64_extract_ldpstp_operands (unsigned int ops, bool load, + rtx *operands, rtx *reg, rtx *mem) { - HOST_WIDE_INT offval_1, offval_2, msize; - enum reg_class rclass_1, rclass_2; - rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2; + for (unsigned int i = 0; i < ops; i++) + { + unsigned int twoi = i * 2; + reg[i] = operands[load ? twoi : (twoi + 1)]; + mem[i] = operands[load ? (twoi + 1) : twoi]; + /* Sanity check. */ + gcc_assert (MEM_P (mem[i])); - if (load) + if (load) + gcc_assert (REG_P (reg[i])); + } +} + +/* Return TRUE if each RTX in REG (which has size COUNT) is of the + same register class. For the purpose of this function anything which + would not fit REG_P (i.e. a const_int 0 or a const_double 0.0) is + consider to be in GENERAL_REGS. */ + +static bool +aarch64_ldpstp_ops_same_reg_class_p (unsigned int count, rtx *reg) +{ + /* Check if the registers are of same class. */ + reg_class rclass = (REG_P (reg[0]) && FP_REGNUM_P (REGNO (reg[0]))) + ? FP_REGS + : GENERAL_REGS; + + for (unsigned int i = 1; i < count; i++) { - mem_1 = operands[1]; - mem_2 = operands[3]; - reg_1 = operands[0]; - reg_2 = operands[2]; - gcc_assert (REG_P (reg_1) && REG_P (reg_2)); - if (REGNO (reg_1) == REGNO (reg_2)) + reg_class rc = (REG_P (reg[i]) && FP_REGNUM_P (REGNO (reg[i]))) + ? FP_REGS + : GENERAL_REGS; + if (rclass != rc) return false; } - else + + return true; +} + +/* REG contains the set of registers, sized by COUNT, which are written by + a sequence of (base + offset) loads which are based from BASE and have + offsets OFFSETS. Return TRUE if any of the registers in REG clobber + BASE. */ + +static bool +aarch64_ldpstp_load_regs_clobber_base_p (unsigned int count, + rtx *reg, rtx base, + HOST_WIDE_INT *offsets) +{ + for (unsigned int i = 0; i < count - 1; i++) + if (reg_mentioned_p (reg[i], base)) + return true; + + /* In increasing order, the last load can clobber the address. */ + return (offsets[0] > offsets[1] + && reg_mentioned_p (reg[count - 1], base)); +} + +/* Return true if OFFSETS, which has size COUNT, is an ascending or + descending sequence, separated by MSIZE. */ + +static bool +aarch64_ldpstp_offsets_consecutive_p (unsigned int count, + HOST_WIDE_INT *offsets, + HOST_WIDE_INT msize) +{ + bool ascending = true, descending = true; + for (unsigned int i = 0; i < count; i++) { - mem_1 = operands[0]; - mem_2 = operands[2]; - reg_1 = operands[1]; - reg_2 = operands[3]; + ascending &= (offsets[0] == offsets[i] - (msize * i)); + descending &= (offsets[0] == offsets[i] + msize * i); } - /* The mems cannot be volatile. */ - if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)) - return false; + return ascending || descending; +} - /* Check if the addresses are in the form of [base+offset]. */ - extract_base_offset_in_addr (mem_1, &base_1, &offset_1); - if (base_1 == NULL_RTX || offset_1 == NULL_RTX) - return false; - extract_base_offset_in_addr (mem_2, &base_2, &offset_2); - if (base_2 == NULL_RTX || offset_2 == NULL_RTX) - return false; - /* Check if the bases are same. */ - if (!rtx_equal_p (base_1, base_2)) - return false; +/* Helper function for aarch64_operands_ok_for_ldpstp and + aarch64_operands_adjust_ok_for_ldpstp. OPERANDS are the + consecutive load/store operands which we hope to merge. LOAD + is true if these are LOAD instructions. MODE is the mode of the + memory operations. ADJUST is true if we are in the 4-operand + adjust case. */ - offval_1 = INTVAL (offset_1); - offval_2 = INTVAL (offset_2); - msize = GET_MODE_SIZE (mode); - /* Check if the offsets are consecutive. */ - if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize)) - return false; +bool +aarch64_operands_ok_for_ldpstp_1 (rtx *operands, bool load, + enum machine_mode mode, bool adjust) +{ + const unsigned int count = adjust ? 4 : 2; + /* Avoid alloca calls and just size as large as they need to be + for the largest case we can handle. */ + const unsigned int max_ops = 4; + rtx mem[max_ops], reg[max_ops], base[max_ops], offset[max_ops]; + HOST_WIDE_INT offval[max_ops]; + unsigned int i = 0; + HOST_WIDE_INT msize = GET_MODE_SIZE (mode); + + aarch64_extract_ldpstp_operands (count, load, operands, reg, mem); - /* Check if the addresses are clobbered by load. */ if (load) { - if (reg_mentioned_p (reg_1, mem_1)) - return false; + for (i = 0; i < count; i += 2) + if (REGNO (reg[i]) == REGNO (reg[i + 1])) + return false; + } + + /* For the adjust case, skip if memory operand is by itself valid + for ldp/stp. */ + if (adjust + && (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode))) + return false; - /* In increasing order, the last load can clobber the address. */ - if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2)) + /* The mems cannot be volatile. */ + for (i = 0; i < count; i++) + if (MEM_VOLATILE_P (mem[i])) return false; + + /* Check if the addresses are in the form of [base+offset]. */ + for (i = 0; i < count; i++) + { + extract_base_offset_in_addr (mem[i], &base[i], &offset[i]); + if (base[i] == NULL_RTX || offset[i] == NULL_RTX) + return false; } - if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1))) - rclass_1 = FP_REGS; - else - rclass_1 = GENERAL_REGS; + /* Check if the bases are same. */ + for (i = 1; i < count; i++) + if (!rtx_equal_p (base[0], base[i])) + return false; - if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2))) - rclass_2 = FP_REGS; - else - rclass_2 = GENERAL_REGS; + for (unsigned int i = 0; i < count; i++) + offval[i] = INTVAL (offset[i]); - /* Check if the registers are of same class. */ - if (rclass_1 != rclass_2) + if (!aarch64_ldpstp_offsets_consecutive_p (count, offval, msize)) return false; - return true; + /* Check if the addresses are clobbered by load. */ + if (load && aarch64_ldpstp_load_regs_clobber_base_p (count, reg, + base[0], offval)) + return false; + + return aarch64_ldpstp_ops_same_reg_class_p (count, reg); +} + + +/* Given OPERANDS of consecutive load/store, check if we can merge + them into ldp/stp. LOAD is true if they are load instructions. + MODE is the mode of memory operands. */ + +bool +aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, + enum machine_mode mode) +{ + return aarch64_operands_ok_for_ldpstp_1 (operands, load, mode, false); } /* Given OPERANDS of consecutive load/store, check if we can merge @@ -13446,124 +13528,13 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, stp w1, w1, [scratch, 0x8] The peephole patterns detecting this opportunity should guarantee - the scratch register is avaliable. */ + the scratch register is available. */ bool aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load, enum machine_mode mode) { - enum reg_class rclass_1, rclass_2, rclass_3, rclass_4; - HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize; - rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4; - rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4; - - if (load) - { - reg_1 = operands[0]; - mem_1 = operands[1]; - reg_2 = operands[2]; - mem_2 = operands[3]; - reg_3 = operands[4]; - mem_3 = operands[5]; - reg_4 = operands[6]; - mem_4 = operands[7]; - gcc_assert (REG_P (reg_1) && REG_P (reg_2) - && REG_P (reg_3) && REG_P (reg_4)); - if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4)) - return false; - } - else - { - mem_1 = operands[0]; - reg_1 = operands[1]; - mem_2 = operands[2]; - reg_2 = operands[3]; - mem_3 = operands[4]; - reg_3 = operands[5]; - mem_4 = operands[6]; - reg_4 = operands[7]; - } - /* Skip if memory operand is by itslef valid for ldp/stp. */ - if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode)) - return false; - - /* The mems cannot be volatile. */ - if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2) - || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4)) - return false; - - /* Check if the addresses are in the form of [base+offset]. */ - extract_base_offset_in_addr (mem_1, &base_1, &offset_1); - if (base_1 == NULL_RTX || offset_1 == NULL_RTX) - return false; - extract_base_offset_in_addr (mem_2, &base_2, &offset_2); - if (base_2 == NULL_RTX || offset_2 == NULL_RTX) - return false; - extract_base_offset_in_addr (mem_3, &base_3, &offset_3); - if (base_3 == NULL_RTX || offset_3 == NULL_RTX) - return false; - extract_base_offset_in_addr (mem_4, &base_4, &offset_4); - if (base_4 == NULL_RTX || offset_4 == NULL_RTX) - return false; - - /* Check if the bases are same. */ - if (!rtx_equal_p (base_1, base_2) - || !rtx_equal_p (base_2, base_3) - || !rtx_equal_p (base_3, base_4)) - return false; - - offval_1 = INTVAL (offset_1); - offval_2 = INTVAL (offset_2); - offval_3 = INTVAL (offset_3); - offval_4 = INTVAL (offset_4); - msize = GET_MODE_SIZE (mode); - /* Check if the offsets are consecutive. */ - if ((offval_1 != (offval_2 + msize) - || offval_1 != (offval_3 + msize * 2) - || offval_1 != (offval_4 + msize * 3)) - && (offval_4 != (offval_3 + msize) - || offval_4 != (offval_2 + msize * 2) - || offval_4 != (offval_1 + msize * 3))) - return false; - - /* Check if the addresses are clobbered by load. */ - if (load) - { - if (reg_mentioned_p (reg_1, mem_1) - || reg_mentioned_p (reg_2, mem_2) - || reg_mentioned_p (reg_3, mem_3)) - return false; - - /* In increasing order, the last load can clobber the address. */ - if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4)) - return false; - } - - if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1))) - rclass_1 = FP_REGS; - else - rclass_1 = GENERAL_REGS; - - if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2))) - rclass_2 = FP_REGS; - else - rclass_2 = GENERAL_REGS; - - if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3))) - rclass_3 = FP_REGS; - else - rclass_3 = GENERAL_REGS; - - if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4))) - rclass_4 = FP_REGS; - else - rclass_4 = GENERAL_REGS; - - /* Check if the registers are of same class. */ - if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4) - return false; - - return true; + return aarch64_operands_ok_for_ldpstp_1 (operands, load, mode, true); } /* Given OPERANDS of consecutive load/store, this function pairs them