Index: config/avr/avr.md =================================================================== --- config/avr/avr.md (Revision 172493) +++ config/avr/avr.md (Arbeitskopie) @@ -1519,22 +1519,22 @@ (define_mode_attr rotsmode [(DI "QI") (S (define_expand "rotl3" [(parallel [(set (match_operand:HIDI 0 "register_operand" "") - (rotate:HIDI (match_operand:HIDI 1 "register_operand" "") - (match_operand:VOID 2 "const_int_operand" ""))) - (clobber (match_dup 3))])] + (rotate:HIDI (match_operand:HIDI 1 "register_operand" "") + (match_operand:VOID 2 "const_int_operand" ""))) + (clobber (match_dup 3))])] "" - " -{ - if (CONST_INT_P (operands[2]) && 0 == (INTVAL (operands[2]) % 8)) { - if (AVR_HAVE_MOVW && 0 == INTVAL (operands[2]) % 16) - operands[3] = gen_reg_rtx (mode); - else - operands[3] = gen_reg_rtx (QImode); - } - else - FAIL; -}") + if (CONST_INT_P (operands[2]) + && 0 == INTVAL (operands[2]) % 8) + { + if (AVR_HAVE_MOVW && 0 == INTVAL (operands[2]) % 16) + operands[3] = gen_rtx_SCRATCH (mode); + else + operands[3] = gen_rtx_SCRATCH (QImode); + } + else + FAIL; + }) ;; Overlapping non-HImode registers often (but not always) need a scratch. @@ -1545,35 +1545,49 @@ (define_expand "rotl3" ; Split word aligned rotates using scratch that is mode dependent. (define_insn_and_split "*rotw" - [(set (match_operand:HIDI 0 "register_operand" "=r,r,#&r") - (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r") - (match_operand 2 "immediate_operand" "n,n,n"))) - (clobber (match_operand: 3 "register_operand" "=" ))] - "(CONST_INT_P (operands[2]) && - (0 == (INTVAL (operands[2]) % 16) && AVR_HAVE_MOVW))" + [(set (match_operand:HIDI 0 "register_operand" "=r,r,&r") + (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r") + (match_operand 2 "const_int_operand" "n,n,n"))) + (clobber (match_scratch: 3 "="))] + "AVR_HAVE_MOVW + && 0 == INTVAL (operands[2]) % 16" "#" "&& (reload_completed || mode == DImode)" [(const_int 0)] - "avr_rotate_bytes (operands); - DONE;" -) + { + /* Split happens in .split1: Cook up a pseudo */ + if (SCRATCH == GET_CODE (operands[3]) + && !reload_completed) + { + operands[3] = gen_reg_rtx (GET_MODE (operands[3])); + } + avr_rotate_bytes (operands); + DONE; + }) ; Split byte aligned rotates using scratch that is always QI mode. (define_insn_and_split "*rotb" - [(set (match_operand:HIDI 0 "register_operand" "=r,r,#&r") - (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r") - (match_operand 2 "immediate_operand" "n,n,n"))) - (clobber (match_operand:QI 3 "register_operand" "=" ))] - "(CONST_INT_P (operands[2]) && - (8 == (INTVAL (operands[2]) % 16) - || (!AVR_HAVE_MOVW && 0 == (INTVAL (operands[2]) % 16))))" + [(set (match_operand:HIDI 0 "register_operand" "=r,r,&r") + (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r") + (match_operand 2 "const_int_operand" "n,n,n"))) + (clobber (match_scratch:QI 3 "=" ))] + "8 == INTVAL (operands[2]) % 16 + || (!AVR_HAVE_MOVW + && 0 == INTVAL (operands[2]) % 16)" "#" "&& (reload_completed || mode == DImode)" [(const_int 0)] - "avr_rotate_bytes (operands); - DONE;" -) + { + /* Split happens in .split1: Cook up a pseudo */ + if (SCRATCH == GET_CODE (operands[3]) + && !reload_completed) + { + operands[3] = gen_reg_rtx (QImode); + } + avr_rotate_bytes (operands); + DONE; + }) ;;<< << << << << << << << << << << << << << << << << << << << << << << << << << Index: config/avr/avr.c =================================================================== --- config/avr/avr.c (Revision 172493) +++ config/avr/avr.c (Arbeitskopie) @@ -4481,141 +4481,147 @@ lshrsi3_out (rtx insn, rtx operands[], i bool avr_rotate_bytes (rtx operands[]) { - int i, j; - enum machine_mode mode = GET_MODE (operands[0]); - bool overlapped = reg_overlap_mentioned_p (operands[0], operands[1]); - bool same_reg = rtx_equal_p (operands[0], operands[1]); - int num = INTVAL (operands[2]); - rtx scratch = operands[3]; - /* Work out if byte or word move is needed. Odd byte rotates need QImode. - Word move if no scratch is needed, otherwise use size of scratch. */ - enum machine_mode move_mode = QImode; - int move_size, offset, size; - - if (num & 0xf) - move_mode = QImode; - else if ((mode == SImode && !same_reg) || !overlapped) - move_mode = HImode; - else - move_mode = GET_MODE (scratch); - - /* Force DI rotate to use QI moves since other DI moves are currently split - into QI moves so forward propagation works better. */ - if (mode == DImode) - move_mode = QImode; - /* Make scratch smaller if needed. */ - if (GET_MODE (scratch) == HImode && move_mode == QImode) - scratch = simplify_gen_subreg (move_mode, scratch, HImode, 0); - - move_size = GET_MODE_SIZE (move_mode); - /* Number of bytes/words to rotate. */ - offset = (num >> 3) / move_size; - /* Number of moves needed. */ - size = GET_MODE_SIZE (mode) / move_size; - /* Himode byte swap is special case to avoid a scratch register. */ - if (mode == HImode && same_reg) - { - /* HImode byte swap, using xor. This is as quick as using scratch. */ - rtx src, dst; - src = simplify_gen_subreg (move_mode, operands[1], mode, 0); - dst = simplify_gen_subreg (move_mode, operands[0], mode, 1); - if (!rtx_equal_p (dst, src)) - { - emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src)); - emit_move_insn (src, gen_rtx_XOR (QImode, src, dst)); - emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src)); - } - } - else - { + int i, j; + enum machine_mode mode = GET_MODE (operands[0]); + bool overlapped = reg_overlap_mentioned_p (operands[0], operands[1]); + bool same_reg = rtx_equal_p (operands[0], operands[1]); + int num = INTVAL (operands[2]); + rtx scratch = operands[3]; + /* Work out if byte or word move is needed. Odd byte rotates need QImode. + Word move if no scratch is needed, otherwise use size of scratch. */ + enum machine_mode move_mode = QImode; + int move_size, offset, size; + + if (num & 0xf) + move_mode = QImode; + else if ((mode == SImode && !same_reg) || !overlapped) + move_mode = HImode; + else + move_mode = GET_MODE (scratch); + + /* Force DI rotate to use QI moves since other DI moves are currently split + into QI moves so forward propagation works better. */ + if (mode == DImode) + move_mode = QImode; + /* Make scratch smaller if needed. */ + if (GET_MODE (scratch) == HImode + && move_mode == QImode + && REG_P (scratch)) + { + scratch = simplify_gen_subreg (move_mode, scratch, HImode, 0); + } + + move_size = GET_MODE_SIZE (move_mode); + /* Number of bytes/words to rotate. */ + offset = (num >> 3) / move_size; + /* Number of moves needed. */ + size = GET_MODE_SIZE (mode) / move_size; + /* Himode byte swap is special case to avoid a scratch register. */ + if (mode == HImode && same_reg) + { + /* HImode byte swap, using xor. This is as quick as using scratch. */ + rtx src, dst; + src = simplify_gen_subreg (move_mode, operands[1], mode, 0); + dst = simplify_gen_subreg (move_mode, operands[0], mode, 1); + if (!rtx_equal_p (dst, src)) + { + emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src)); + emit_move_insn (src, gen_rtx_XOR (QImode, src, dst)); + emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src)); + } + } + else + { #define MAX_SIZE 8 /* GET_MODE_SIZE (DImode) / GET_MODE_SIZE (QImode) */ - /* Create linked list of moves to determine move order. */ - struct { - rtx src, dst; - int links; - } move[MAX_SIZE + 8]; - int blocked, moves; - - gcc_assert (size <= MAX_SIZE); - /* Generate list of subreg moves. */ - for (i = 0; i < size; i++) - { - int from = i; - int to = (from + offset) % size; - move[i].src = simplify_gen_subreg (move_mode, operands[1], - mode, from * move_size); - move[i].dst = simplify_gen_subreg (move_mode, operands[0], - mode, to * move_size); - move[i].links = -1; - } - /* Mark dependence where a dst of one move is the src of another move. - The first move is a conflict as it must wait until second is - performed. We ignore moves to self - we catch this later. */ - if (overlapped) - for (i = 0; i < size; i++) - if (reg_overlap_mentioned_p (move[i].dst, operands[1])) - for (j = 0; j < size; j++) - if (j != i && rtx_equal_p (move[j].src, move[i].dst)) - { - /* The dst of move i is the src of move j. */ - move[i].links = j; - break; - } - - blocked = -1; - moves = 0; - /* Go through move list and perform non-conflicting moves. As each - non-overlapping move is made, it may remove other conflicts - so the process is repeated until no conflicts remain. */ - do - { - blocked = -1; - moves = 0; - /* Emit move where dst is not also a src or we have used that - src already. */ - for (i = 0; i < size; i++) - if (move[i].src != NULL_RTX) - { - if (move[i].links == -1 - || move[move[i].links].src == NULL_RTX) - { - moves++; - /* Ignore NOP moves to self. */ - if (!rtx_equal_p (move[i].dst, move[i].src)) - emit_move_insn (move[i].dst, move[i].src); - - /* Remove conflict from list. */ - move[i].src = NULL_RTX; - } - else - blocked = i; - } - - /* Check for deadlock. This is when no moves occurred and we have - at least one blocked move. */ - if (moves == 0 && blocked != -1) - { - /* Need to use scratch register to break deadlock. - Add move to put dst of blocked move into scratch. - When this move occurs, it will break chain deadlock. - The scratch register is substituted for real move. */ - - move[size].src = move[blocked].dst; - move[size].dst = scratch; - /* Scratch move is never blocked. */ - move[size].links = -1; - /* Make sure we have valid link. */ - gcc_assert (move[blocked].links != -1); - /* Replace src of blocking move with scratch reg. */ - move[move[blocked].links].src = scratch; - /* Make dependent on scratch move occuring. */ - move[blocked].links = size; - size=size+1; - } - } - while (blocked != -1); - } - return true; + /* Create linked list of moves to determine move order. */ + struct { + rtx src, dst; + int links; + } move[MAX_SIZE + 8]; + int blocked, moves; + + gcc_assert (size <= MAX_SIZE); + /* Generate list of subreg moves. */ + for (i = 0; i < size; i++) + { + int from = i; + int to = (from + offset) % size; + move[i].src = simplify_gen_subreg (move_mode, operands[1], + mode, from * move_size); + move[i].dst = simplify_gen_subreg (move_mode, operands[0], + mode, to * move_size); + move[i].links = -1; + } + /* Mark dependence where a dst of one move is the src of another move. + The first move is a conflict as it must wait until second is + performed. We ignore moves to self - we catch this later. */ + if (overlapped) + for (i = 0; i < size; i++) + if (reg_overlap_mentioned_p (move[i].dst, operands[1])) + for (j = 0; j < size; j++) + if (j != i && rtx_equal_p (move[j].src, move[i].dst)) + { + /* The dst of move i is the src of move j. */ + move[i].links = j; + break; + } + + blocked = -1; + moves = 0; + /* Go through move list and perform non-conflicting moves. As each + non-overlapping move is made, it may remove other conflicts + so the process is repeated until no conflicts remain. */ + do + { + blocked = -1; + moves = 0; + /* Emit move where dst is not also a src or we have used that + src already. */ + for (i = 0; i < size; i++) + if (move[i].src != NULL_RTX) + { + if (move[i].links == -1 + || move[move[i].links].src == NULL_RTX) + { + moves++; + /* Ignore NOP moves to self. */ + if (!rtx_equal_p (move[i].dst, move[i].src)) + emit_move_insn (move[i].dst, move[i].src); + + /* Remove conflict from list. */ + move[i].src = NULL_RTX; + } + else + blocked = i; + } + + /* Check for deadlock. This is when no moves occurred and we have + at least one blocked move. */ + if (moves == 0 && blocked != -1) + { + /* Need to use scratch register to break deadlock. + Add move to put dst of blocked move into scratch. + When this move occurs, it will break chain deadlock. + The scratch register is substituted for real move. */ + + gcc_assert (REG_P (scratch)); + + move[size].src = move[blocked].dst; + move[size].dst = scratch; + /* Scratch move is never blocked. */ + move[size].links = -1; + /* Make sure we have valid link. */ + gcc_assert (move[blocked].links != -1); + /* Replace src of blocking move with scratch reg. */ + move[move[blocked].links].src = scratch; + /* Make dependent on scratch move occuring. */ + move[blocked].links = size; + size=size+1; + } + } + while (blocked != -1); + } + return true; } /* Modifies the length assigned to instruction INSN