2007-07-04 Paolo Bonzini * function.c (match_asm_constraints_1, rest_of_match_asm_constraints, pass_match_asm_constraints): New. * passes.c (init_optimization_passes): Add new pass. * stmt.c (expand_asm_operands): Set cfun->has_asm_statement. * function.h (struct function): Add has_asm_statement bit. (current_function_has_asm_statement): New. * tree-pass.h (pass_match_asm_constraints): New. Index: function.c =================================================================== --- function.c (revision 126190) +++ function.c (working copy) @@ -5504,6 +5504,143 @@ struct tree_opt_pass pass_thread_prologu TODO_ggc_collect, /* todo_flags_finish */ 'w' /* letter */ }; + + +/* This mini-pass fixes fall-out from SSA in asm statements that have + in-out constraints. Say you start with + + orig = inout; + asm ("": "+mr" (inout)); + use (orig); + + which is transformed very early to use explicit output and match operands: + + orig = inout; + asm ("": "=mr" (inout) : "0" (inout)); + use (orig); + + Or, after SSA and copyprop, + + asm ("": "=mr" (inout_2) : "0" (inout_1)); + use (inout_1); + + Clearly inout_2 and inout_1 can't be coalesced easily anymore, as + they represent two separate values, so they will get different pseudo + registers during expansion. Then, since the two operands need to match + per the constraints, but use different pseudo registers, reload can + only register a reload for these operands. But reloads can only be + satisfied by hardregs, not by memory, so we need a register for this + reload, just because we are presented with non-matching operands. + So, even though we allow memory for this operand, no memory can be + used for it, just because the two operands don't match. This can + cause reload failures on register-starved targets. + + So it's a symptom of reload not being able to use memory for reloads + or, alternatively it's also a symptom of both operands not coming into + reload as matching (in which case the pseudo could go to memory just + fine, as the alternative allows it, and no reload would be necessary). + We fix the latter problem here, by transforming + + asm ("": "=mr" (inout_2) : "0" (inout_1)); + + back to + + inout_2 = inout_1; + asm ("": "=mr" (inout_2) : "0" (inout_2)); */ + +static void +match_asm_constraints_1 (rtx insn, rtx *p_sets, int noutputs) +{ + int i; + bool changed = false; + rtx op = SET_SRC (p_sets[0]); + int ninputs = ASM_OPERANDS_INPUT_LENGTH (op); + rtvec inputs = ASM_OPERANDS_INPUT_VEC (op); + + for (i = 0; i < ninputs; i++) + { + rtx input, output, insns; + const char *constraint = ASM_OPERANDS_INPUT_CONSTRAINT (op, i); + char *end; + int match; + + match = strtoul (constraint, &end, 10); + if (end == constraint) + continue; + + gcc_assert (match < noutputs); + output = SET_DEST (p_sets[match]); + input = RTVEC_ELT (inputs, i); + if (rtx_equal_p (output, input) + || (GET_MODE (input) != VOIDmode + && GET_MODE (input) != GET_MODE (output))) + continue; + + start_sequence (); + emit_move_insn (copy_rtx (output), input); + RTVEC_ELT (inputs, i) = copy_rtx (output); + insns = get_insns (); + end_sequence (); + + emit_insn_before (insns, insn); + changed = true; + } + + if (changed) + df_insn_rescan (insn); +} + +static unsigned +rest_of_match_asm_constraints (void) +{ + basic_block bb; + rtx insn, pat, *p_sets; + int noutputs; + + if (!cfun->has_asm_statement) + return 0; + + df_set_flags (DF_DEFER_INSN_RESCAN); + FOR_EACH_BB (bb) + { + FOR_BB_INSNS (bb, insn) + { + if (!INSN_P (insn)) + continue; + + pat = PATTERN (insn); + if (GET_CODE (pat) == PARALLEL) + p_sets = &XVECEXP (pat, 0, 0), noutputs = XVECLEN (pat, 0); + else if (GET_CODE (pat) == SET) + p_sets = &PATTERN (insn), noutputs = 1; + else + continue; + + if (GET_CODE (*p_sets) == SET + && GET_CODE (SET_SRC (*p_sets)) == ASM_OPERANDS) + match_asm_constraints_1 (insn, p_sets, noutputs); + } + } + + return TODO_df_finish; +} + +struct tree_opt_pass pass_match_asm_constraints = +{ + "asmcons", /* name */ + NULL, /* gate */ + rest_of_match_asm_constraints, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + 0, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func, /* todo_flags_finish */ + 0 /* letter */ +}; #include "gt-function.h" Index: passes.c =================================================================== --- passes.c (revision 126190) +++ passes.c (working copy) @@ -740,6 +740,7 @@ init_optimization_passes (void) NEXT_PASS (pass_stack_ptr_mod); NEXT_PASS (pass_mode_switching); NEXT_PASS (pass_see); + NEXT_PASS (pass_match_asm_constraints); NEXT_PASS (pass_sms); NEXT_PASS (pass_sched); NEXT_PASS (pass_subregs_of_mode_init); Index: stmt.c =================================================================== --- stmt.c (revision 126190) +++ stmt.c (working copy) @@ -1078,6 +1078,7 @@ expand_asm_operands (tree string, tree o if (real_output_rtx[i]) emit_move_insn (real_output_rtx[i], output_rtx[i]); + cfun->has_asm_statement = 1; free_temp_slots (); } Index: function.h =================================================================== --- function.h (revision 126190) +++ function.h (working copy) @@ -414,6 +414,9 @@ struct function GTY(()) /* Nonzero if function being compiled has nonlocal gotos to parent function. */ unsigned int has_nonlocal_goto : 1; + + /* Nonzero if function being compiled has an asm statement. */ + unsigned int has_asm_statement : 1; /* Nonzero if the current function is a thunk, i.e., a lightweight function implemented by the output_mi_thunk hook) that just @@ -517,6 +520,7 @@ extern int trampolines_created; #define current_function_has_nonlocal_label (cfun->has_nonlocal_label) #define current_function_calls_unwind_init (cfun->calls_unwind_init) #define current_function_has_nonlocal_goto (cfun->has_nonlocal_goto) +#define current_function_has_asm_statement (cfun->has_asm_statement) #define return_label (cfun->x_return_label) #define naked_return_label (cfun->x_naked_return_label) Index: tree-pass.h =================================================================== --- tree-pass.h (revision 126190) +++ tree-pass.h (working copy) @@ -390,6 +390,7 @@ extern struct tree_opt_pass pass_initial extern struct tree_opt_pass pass_combine; extern struct tree_opt_pass pass_if_after_combine; extern struct tree_opt_pass pass_partition_blocks; +extern struct tree_opt_pass pass_match_asm_constraints; extern struct tree_opt_pass pass_regmove; extern struct tree_opt_pass pass_split_all_insns; extern struct tree_opt_pass pass_lower_subreg2;