Index: ChangeLog =================================================================== --- ChangeLog (revision 266384) +++ ChangeLog (working copy) @@ -1,3 +1,10 @@ +2018-11-22 Vladimir Makarov + + PR rtl-optimization/87718 + * ira-costs.c: Remove trailing white-spaces. + (record_operand_costs): Add a special treatment for moves + involving a hard register. + 2018-11-22 Uros Bizjak * config/i386/i386.c (ix86_avx_emit_vzeroupper): Remove. Index: ira-costs.c =================================================================== --- ira-costs.c (revision 266155) +++ ira-costs.c (working copy) @@ -1257,7 +1257,7 @@ record_address_regs (machine_mode mode, add_cost = (move_in_cost[i][rclass] * scale) / 2; if (INT_MAX - add_cost < pp_costs[k]) pp_costs[k] = INT_MAX; - else + else pp_costs[k] += add_cost; } } @@ -1283,10 +1283,100 @@ record_operand_costs (rtx_insn *insn, en { const char *constraints[MAX_RECOG_OPERANDS]; machine_mode modes[MAX_RECOG_OPERANDS]; - rtx ops[MAX_RECOG_OPERANDS]; rtx set; int i; + if ((set = single_set (insn)) != NULL_RTX + /* In rare cases the single set insn might have less 2 operands + as the source can be a fixed special reg. */ + && recog_data.n_operands > 1 + && recog_data.operand[0] == SET_DEST (set) + && recog_data.operand[1] == SET_SRC (set)) + { + int regno, other_regno; + rtx dest = SET_DEST (set); + rtx src = SET_SRC (set); + + if (GET_CODE (dest) == SUBREG + && known_eq (GET_MODE_SIZE (GET_MODE (dest)), + GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))) + dest = SUBREG_REG (dest); + if (GET_CODE (src) == SUBREG + && known_eq (GET_MODE_SIZE (GET_MODE (src)), + GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))) + src = SUBREG_REG (src); + if (REG_P (src) && REG_P (dest) + && (((regno = REGNO (src)) >= FIRST_PSEUDO_REGISTER + && (other_regno = REGNO (dest)) < FIRST_PSEUDO_REGISTER) + || ((regno = REGNO (dest)) >= FIRST_PSEUDO_REGISTER + && (other_regno = REGNO (src)) < FIRST_PSEUDO_REGISTER))) + { + machine_mode mode = GET_MODE (SET_SRC (set)); + cost_classes_t cost_classes_ptr = regno_cost_classes[regno]; + enum reg_class *cost_classes = cost_classes_ptr->classes; + reg_class_t rclass, hard_reg_class, pref_class; + int cost, k; + bool dead_p = find_regno_note (insn, REG_DEAD, REGNO (src)); + + hard_reg_class = REGNO_REG_CLASS (other_regno); + i = regno == (int) REGNO (src) ? 1 : 0; + for (k = cost_classes_ptr->num - 1; k >= 0; k--) + { + rclass = cost_classes[k]; + cost = ((i == 0 + ? ira_register_move_cost[mode][hard_reg_class][rclass] + : ira_register_move_cost[mode][rclass][hard_reg_class]) + * frequency); + op_costs[i]->cost[k] = cost; + /* If we have assigned a class to this allocno in our + first pass, add a cost to this alternative + corresponding to what we would add if this allocno + were not in the appropriate class. */ + if (pref) + { + if ((pref_class = pref[COST_INDEX (regno)]) == NO_REGS) + op_costs[i]->cost[k] + += ((i == 0 ? ira_memory_move_cost[mode][rclass][0] : 0) + + (i == 1 ? ira_memory_move_cost[mode][rclass][1] : 0) + * frequency); + else if (ira_reg_class_intersect[pref_class][rclass] + == NO_REGS) + op_costs[i]->cost[k] + += (ira_register_move_cost[mode][pref_class][rclass] + * frequency); + } + /* If this insn is a single set copying operand 1 to + operand 0 and one operand is an allocno with the + other a hard reg or an allocno that prefers a hard + register that is in its own register class then we + may want to adjust the cost of that register class to + -1. + + Avoid the adjustment if the source does not die to + avoid stressing of register allocator by preferencing + two colliding registers into single class. */ + if (dead_p + && TEST_HARD_REG_BIT (reg_class_contents[rclass], other_regno) + && (reg_class_size[(int) rclass] + == (ira_reg_class_max_nregs + [(int) rclass][(int) GET_MODE(src)]))) + { + if (reg_class_size[rclass] == 1) + op_costs[i]->cost[k] = -frequency; + else if (in_hard_reg_set_p (reg_class_contents[rclass], + GET_MODE(src), other_regno)) + op_costs[i]->cost[k] = -frequency; + } + } + op_costs[i]->mem_cost + = ira_memory_move_cost[mode][hard_reg_class][i] * frequency; + if (pref && (pref_class = pref[COST_INDEX (regno)]) != NO_REGS) + op_costs[i]->mem_cost + += ira_memory_move_cost[mode][pref_class][i] * frequency; + return; + } + } + for (i = 0; i < recog_data.n_operands; i++) { constraints[i] = recog_data.constraints[i]; @@ -1302,7 +1392,6 @@ record_operand_costs (rtx_insn *insn, en { memcpy (op_costs[i], init_cost, struct_costs_size); - ops[i] = recog_data.operand[i]; if (GET_CODE (recog_data.operand[i]) == SUBREG) recog_data.operand[i] = SUBREG_REG (recog_data.operand[i]); @@ -1318,7 +1407,7 @@ record_operand_costs (rtx_insn *insn, en recog_data.operand[i], 0, ADDRESS, SCRATCH, frequency * 2); } - + /* Check for commutative in a separate loop so everything will have been initialized. We must do this even if one operand is a constant--see addsi3 in m68k.md. */ @@ -1328,8 +1417,8 @@ record_operand_costs (rtx_insn *insn, en const char *xconstraints[MAX_RECOG_OPERANDS]; int j; - /* Handle commutative operands by swapping the constraints. - We assume the modes are the same. */ + /* Handle commutative operands by swapping the + constraints. We assume the modes are the same. */ for (j = 0; j < recog_data.n_operands; j++) xconstraints[j] = constraints[j]; @@ -1342,69 +1431,6 @@ record_operand_costs (rtx_insn *insn, en record_reg_classes (recog_data.n_alternatives, recog_data.n_operands, recog_data.operand, modes, constraints, insn, pref); - - /* If this insn is a single set copying operand 1 to operand 0 and - one operand is an allocno with the other a hard reg or an allocno - that prefers a hard register that is in its own register class - then we may want to adjust the cost of that register class to -1. - - Avoid the adjustment if the source does not die to avoid - stressing of register allocator by preferencing two colliding - registers into single class. - - Also avoid the adjustment if a copy between hard registers of the - class is expensive (ten times the cost of a default copy is - considered arbitrarily expensive). This avoids losing when the - preferred class is very expensive as the source of a copy - instruction. */ - if ((set = single_set (insn)) != NULL_RTX - /* In rare cases the single set insn might have less 2 operands - as the source can be a fixed special reg. */ - && recog_data.n_operands > 1 - && ops[0] == SET_DEST (set) && ops[1] == SET_SRC (set)) - { - int regno, other_regno; - rtx dest = SET_DEST (set); - rtx src = SET_SRC (set); - - if (GET_CODE (dest) == SUBREG - && known_eq (GET_MODE_SIZE (GET_MODE (dest)), - GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))) - dest = SUBREG_REG (dest); - if (GET_CODE (src) == SUBREG - && known_eq (GET_MODE_SIZE (GET_MODE (src)), - GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))) - src = SUBREG_REG (src); - if (REG_P (src) && REG_P (dest) - && find_regno_note (insn, REG_DEAD, REGNO (src)) - && (((regno = REGNO (src)) >= FIRST_PSEUDO_REGISTER - && (other_regno = REGNO (dest)) < FIRST_PSEUDO_REGISTER) - || ((regno = REGNO (dest)) >= FIRST_PSEUDO_REGISTER - && (other_regno = REGNO (src)) < FIRST_PSEUDO_REGISTER))) - { - machine_mode mode = GET_MODE (src); - cost_classes_t cost_classes_ptr = regno_cost_classes[regno]; - enum reg_class *cost_classes = cost_classes_ptr->classes; - reg_class_t rclass; - int k; - - i = regno == (int) REGNO (src) ? 1 : 0; - for (k = cost_classes_ptr->num - 1; k >= 0; k--) - { - rclass = cost_classes[k]; - if (TEST_HARD_REG_BIT (reg_class_contents[rclass], other_regno) - && (reg_class_size[(int) rclass] - == ira_reg_class_max_nregs [(int) rclass][(int) mode])) - { - if (reg_class_size[rclass] == 1) - op_costs[i]->cost[k] = -frequency; - else if (in_hard_reg_set_p (reg_class_contents[rclass], - mode, other_regno)) - op_costs[i]->cost[k] = -frequency; - } - } - } - } } @@ -1457,7 +1483,7 @@ scan_one_insn (rtx_insn *insn) /* If this insn loads a parameter from its stack slot, then it represents a savings, rather than a cost, if the parameter is - stored in memory. Record this fact. + stored in memory. Record this fact. Similarly if we're loading other constants from memory (constant pool, TOC references, small data areas, etc) and this is the only @@ -1468,7 +1494,7 @@ scan_one_insn (rtx_insn *insn) mem_cost might result in it being loaded using the specialized instruction into a register, then stored into stack and loaded again from the stack. See PR52208. - + Don't do this if SET_SRC (set) has side effect. See PR56124. */ if (set != 0 && REG_P (SET_DEST (set)) && MEM_P (SET_SRC (set)) && (note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) != NULL_RTX @@ -1766,7 +1792,7 @@ find_costs_and_classes (FILE *dump_file) a = ALLOCNO_NEXT_REGNO_ALLOCNO (a)) { int *a_costs, *p_costs; - + a_num = ALLOCNO_NUM (a); if ((flag_ira_region == IRA_REGION_ALL || flag_ira_region == IRA_REGION_MIXED) @@ -1936,7 +1962,7 @@ find_costs_and_classes (FILE *dump_file) int a_num = ALLOCNO_NUM (a); int *total_a_costs = COSTS (total_allocno_costs, a_num)->cost; int *a_costs = COSTS (costs, a_num)->cost; - + if (aclass == NO_REGS) best = NO_REGS; else @@ -1998,7 +2024,7 @@ find_costs_and_classes (FILE *dump_file) } } } - + if (internal_flag_ira_verbose > 4 && dump_file) { if (allocno_p) @@ -2081,7 +2107,7 @@ process_bb_node_for_hard_reg_moves (ira_ int cost; enum reg_class hard_reg_class; machine_mode mode; - + mode = ALLOCNO_MODE (a); hard_reg_class = REGNO_REG_CLASS (hard_regno); ira_init_register_move_cost_if_necessary (mode); Index: testsuite/ChangeLog =================================================================== --- testsuite/ChangeLog (revision 266384) +++ testsuite/ChangeLog (working copy) @@ -1,3 +1,9 @@ +2018-11-22 Vladimir Makarov + + PR rtl-optimization/87718 + * gcc.target/i386/pr82361-1.c: Check only the first operand of + moves. + 2018-11-22 Thomas Preud'homme * gcc.target/arm/pr85434.c: New test. Index: testsuite/gcc.target/i386/pr82361-1.c =================================================================== --- testsuite/gcc.target/i386/pr82361-1.c (revision 266155) +++ testsuite/gcc.target/i386/pr82361-1.c (working copy) @@ -6,7 +6,7 @@ /* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */ /* FIXME: We are still not able to optimize the modulo in f1/f2, only manage one. */ -/* { dg-final { scan-assembler-times "movl\t%edx, %edx" 2 } } */ +/* { dg-final { scan-assembler-times "movl\t%edx" 2 } } */ void f1 (unsigned int a, unsigned int b)