diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index 5c1343f0df8..dfc949a8c0f 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -58,6 +58,7 @@ extern const char *ret_cond_branch (rtx x, int len, int reverse); extern const char *avr_out_movpsi (rtx_insn *, rtx*, int*); extern const char *avr_out_sign_extend (rtx_insn *, rtx*, int*); extern const char *avr_out_insert_notbit (rtx_insn *, rtx*, int*); +extern const char *avr_out_insv (rtx_insn *, rtx*, int*); extern const char *avr_out_extr (rtx_insn *, rtx*, int*); extern const char *avr_out_extr_not (rtx_insn *, rtx*, int*); extern const char *avr_out_plus_set_ZN (rtx*, int*); diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 5e0217de36f..b4d082315b5 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -9066,6 +9066,159 @@ avr_out_insert_notbit (rtx_insn *insn, rtx op[], int *plen) } +/* Output instructions for XOP[0] = (XOP[1] XOP[2]) & XOP[3] where + * XOP[0] and XOP[1] have the same mode which is one of: QI, HI, PSI, SI. + * XOP[3] is an exact power of 2. + * XOP[2] and XOP[3] are const_int. + * is any of: ASHIFT, LSHIFTRT, ASHIFTRT. + * The result depends on XOP[1]. + Returns "". + PLEN != 0: Set *PLEN to the code length in words. Don't output anything. + PLEN == 0: Output instructions. */ + +const char* +avr_out_insv (rtx_insn *insn, rtx xop[], int *plen) +{ + machine_mode mode = GET_MODE (xop[0]); + int n_bytes = GET_MODE_SIZE (mode); + rtx xsrc = SET_SRC (single_set (insn)); + + // Any of ASHIFT, LSHIFTRT, ASHIFTRT. + enum rtx_code code = GET_CODE (XEXP (xsrc, 0)); + int shift = code == ASHIFT ? INTVAL (xop[2]) : -INTVAL (xop[2]); + + // Determines the position of the output bit. + unsigned mask = GET_MODE_MASK (mode) & INTVAL (xop[3]); + + // Position of the output / input bit, respectively. + int obit = exact_log2 (mask); + int ibit = obit - shift; + + gcc_assert (IN_RANGE (obit, 0, GET_MODE_BITSIZE (mode) - 1)); + gcc_assert (IN_RANGE (ibit, 0, GET_MODE_BITSIZE (mode) - 1)); + + // In the remainder, use the sub-bytes that hold the bits. + rtx op[4] = + { + // Output + simplify_gen_subreg (QImode, xop[0], mode, obit / 8), + GEN_INT (obit & 7), + // Input + simplify_gen_subreg (QImode, xop[1], mode, ibit / 8), + GEN_INT (ibit & 7) + }; + obit &= 7; + ibit &= 7; + + // The length of the default sequence at the end of this function. + // We only emit anything other than the default when we find a sequence + // that is strictly shorter than the default sequence. + const int len0 = 2 + n_bytes - (n_bytes == 4 && AVR_HAVE_MOVW); + + // Finding something shorter than the default sequence implies that there + // must be at most 2 instructions that deal with the bytes containing the + // relevant bits. In addition, we need N_BYTES - 1 instructions to clear + // the remaining result bytes. + + const int n_clr = n_bytes - 1; + bool clr_p = false; + bool andi_p = false; + + if (plen) + *plen = 0; + + if (REGNO (op[0]) == REGNO (op[2]) + // Output reg allows ANDI. + && test_hard_reg_class (LD_REGS, op[0])) + { + if (1 + n_clr < len0 + // Same byte and bit: A single ANDI will do. + && obit == ibit) + { + clr_p = andi_p = true; + } + else if (2 + n_clr < len0 + // |obit - ibit| = 4: SWAP + ANDI will do. + && (obit == ibit + 4 || obit == ibit - 4)) + { + avr_asm_len ("swap %0", op, plen, 1); + clr_p = andi_p = true; + } + else if (2 + n_clr < len0 + // LSL + ANDI will do. + && obit == ibit + 1) + { + avr_asm_len ("lsl %0", op, plen, 1); + clr_p = andi_p = true; + } + else if (2 + n_clr < len0 + // LSR + ANDI will do. + && obit == ibit - 1) + { + avr_asm_len ("lsr %0", op, plen, 1); + clr_p = andi_p = true; + } + } + + if (REGNO (op[0]) != REGNO (op[2]) + && obit == ibit) + { + if (2 + n_clr < len0 + // Same bit but different byte: MOV + ANDI will do. + && test_hard_reg_class (LD_REGS, op[0])) + { + avr_asm_len ("mov %0,%2", op, plen, 1); + clr_p = andi_p = true; + } + else if (2 + n_clr < len0 + // Same bit but different byte: We can use ANDI + MOV, + // but only if the input byte is LD_REGS and unused after. + && test_hard_reg_class (LD_REGS, op[2]) + && reg_unused_after (insn, op[2])) + { + avr_asm_len ("andi %2,1<<%3" CR_TAB + "mov %0,%2", op, plen, 2); + clr_p = true; + } + } + + // Output remaining instructions of the shorter sequence. + + if (andi_p) + avr_asm_len ("andi %0,1<<%1", op, plen, 1); + + if (clr_p) + { + for (int b = 0; b < n_bytes; ++b) + { + rtx byte = simplify_gen_subreg (QImode, xop[0], mode, b); + if (REGNO (byte) != REGNO (op[0])) + avr_asm_len ("clr %0", &byte, plen, 1); + } + + // CLR_P means we found a shorter sequence, so we are done now. + return ""; + } + + // No shorter sequence found, just emit BST, CLR*, BLD sequence. + + avr_asm_len ("bst %2,%3", op, plen, -1); + + if (n_bytes == 4 && AVR_HAVE_MOVW) + avr_asm_len ("clr %A0" CR_TAB + "clr %B0" CR_TAB + "movw %C0,%A0", xop, plen, 3); + else + for (int b = 0; b < n_bytes; ++b) + { + rtx byte = simplify_gen_subreg (QImode, xop[0], mode, b); + avr_asm_len ("clr %0", &byte, plen, 1); + } + + return avr_asm_len ("bld %0,%1", op, plen, 1); +} + + /* Output instructions to extract a bit to 8-bit register XOP[0]. The input XOP[1] is a register or an 8-bit MEM in the lower I/O range. XOP[2] is the const_int bit position. Return "". @@ -9994,6 +10147,7 @@ avr_adjust_insn_length (rtx_insn *insn, int len) case ADJUST_LEN_OUT_BITOP: avr_out_bitop (insn, op, &len); break; case ADJUST_LEN_EXTR_NOT: avr_out_extr_not (insn, op, &len); break; case ADJUST_LEN_EXTR: avr_out_extr (insn, op, &len); break; + case ADJUST_LEN_INSV: avr_out_insv (insn, op, &len); break; case ADJUST_LEN_PLUS: avr_out_plus (insn, op, &len); break; case ADJUST_LEN_ADDTO_SP: avr_out_addto_sp (op, &len); break; diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 83dd15040b0..73bddec2b33 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -163,7 +163,7 @@ (define_attr "adjust_len" ashlhi, ashrhi, lshrhi, ashlsi, ashrsi, lshrsi, ashlpsi, ashrpsi, lshrpsi, - insert_bits, insv_notbit, + insert_bits, insv_notbit, insv, add_set_ZN, cmp_uext, cmp_sext, no" (const_string "no")) @@ -9818,6 +9818,12 @@ (define_insn_and_split "*extzv.io.lsr7" (const_int 1) (const_int 7)))]) +;; This insn serves as a combine bridge because insn combine will only +;; combine so much (3) insns at most. It's not actually an open coded +;; bit-insertion but just a part of it. It may occur in other contexts +;; than INSV though, and in such a case the code may be worse than without +;; this pattern. We still have to emit code for it in that case because +;; we cannot roll back. (define_insn_and_split "*insv.any_shift._split" [(set (match_operand:QISI 0 "register_operand" "=r") (and:QISI (any_shift:QISI (match_operand:QISI 1 "register_operand" "r") @@ -9840,27 +9846,9 @@ (define_insn "*insv.any_shift." (clobber (reg:CC REG_CC))] "reload_completed" { - int shift = == ASHIFT ? INTVAL (operands[2]) : -INTVAL (operands[2]); - int mask = GET_MODE_MASK (mode) & INTVAL (operands[3]); - // Position of the output / input bit, respectively. - int obit = exact_log2 (mask); - int ibit = obit - shift; - gcc_assert (IN_RANGE (obit, 0, )); - gcc_assert (IN_RANGE (ibit, 0, )); - operands[3] = GEN_INT (obit); - operands[2] = GEN_INT (ibit); - - if ( == 1) return "bst %T1%T2\;clr %0\;" "bld %T0%T3"; - if ( == 2) return "bst %T1%T2\;clr %A0\;clr %B0\;" "bld %T0%T3"; - if ( == 3) return "bst %T1%T2\;clr %A0\;clr %B0\;clr %C0\;bld %T0%T3"; - return AVR_HAVE_MOVW - ? "bst %T1%T2\;clr %A0\;clr %B0\;movw %C0,%A0\;" "bld %T0%T3" - : "bst %T1%T2\;clr %A0\;clr %B0\;clr %C0\;clr %D0\;bld %T0%T3"; + return avr_out_insv (insn, operands, nullptr); } - [(set (attr "length") - (minus (symbol_ref "2 + ") - ; One less if we can use a MOVW to clear. - (symbol_ref " == 4 && AVR_HAVE_MOVW")))]) + [(set_attr "adjust_len" "insv")]) (define_insn_and_split "*extzv.hi2" diff --git a/gcc/testsuite/gcc.target/avr/torture/insv-anyshift.c b/gcc/testsuite/gcc.target/avr/torture/insv-anyshift.c new file mode 100644 index 00000000000..2f94e1787b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/torture/insv-anyshift.c @@ -0,0 +1,81 @@ +/* { dg-do run } */ + +typedef __UINT32_TYPE__ uint32_t; + +/* Testing inlined and completely folded versions of functions + against their non-inlined, non-folded counnterparts. */ + +#define MK_FUN1(OBIT, LSR) \ + static __inline__ __attribute__((__always_inline__)) \ + uint32_t fun1_lsr_##OBIT##_##LSR##_ai (int x, uint32_t a) \ + { \ + (void) x; \ + return (a >> LSR) & (1ul << OBIT); \ + } \ + \ + __attribute__((__noinline__,__noclone__)) \ + uint32_t fun1_lsr_##OBIT##_##LSR##_ni (int x, uint32_t a) \ + { \ + return fun1_lsr_##OBIT##_##LSR##_ai (x, a); \ + } \ + \ + void test_fun1_lsr_##OBIT##_##LSR (void) \ + { \ + if (fun1_lsr_##OBIT##_##LSR##_ni (0, 1ul << (OBIT + LSR)) \ + != fun1_lsr_##OBIT##_##LSR##_ai (0, 1ul << (OBIT + LSR))) \ + __builtin_abort(); \ + \ + if (fun1_lsr_##OBIT##_##LSR##_ni (0, 1ul << (OBIT + LSR)) \ + != fun1_lsr_##OBIT##_##LSR##_ai (0, -1ul)) \ + __builtin_abort(); \ + } + + +#define MK_FUN2(OBIT, LSL) \ + static __inline__ __attribute__((__always_inline__)) \ + uint32_t fun2_lsl_##OBIT##_##LSL##_ai (int x, uint32_t a) \ + { \ + (void) x; \ + return (a << LSL) & (1ul << OBIT); \ + } \ + \ + __attribute__((__noinline__,__noclone__)) \ + uint32_t fun2_lsl_##OBIT##_##LSL##_ni (int x, uint32_t a) \ + { \ + return fun2_lsl_##OBIT##_##LSL##_ai (x, a); \ + } \ + \ + void test_fun2_lsl_##OBIT##_##LSL (void) \ + { \ + if (fun2_lsl_##OBIT##_##LSL##_ni (0, 1ul << (OBIT - LSL)) \ + != fun2_lsl_##OBIT##_##LSL##_ai (0, 1ul << (OBIT - LSL))) \ + __builtin_abort(); \ + \ + if (fun2_lsl_##OBIT##_##LSL##_ni (0, 1ul << (OBIT - LSL)) \ + != fun2_lsl_##OBIT##_##LSL##_ai (0, -1ul)) \ + __builtin_abort(); \ + } + + +MK_FUN1 (13, 15) +MK_FUN1 (13, 16) +MK_FUN1 (13, 17) +MK_FUN1 (13, 12) + +MK_FUN2 (12, 8) +MK_FUN2 (13, 8) +MK_FUN2 (16, 8) + +int main (void) +{ + test_fun1_lsr_13_15 (); + test_fun1_lsr_13_16 (); + test_fun1_lsr_13_17 (); + test_fun1_lsr_13_12 (); + + test_fun2_lsl_12_8 (); + test_fun2_lsl_13_8 (); + test_fun2_lsl_16_8 (); + + return 0; +}