diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index 5c1343f0df8..dfc949a8c0f 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -58,6 +58,7 @@ extern const char *ret_cond_branch (rtx x, int len, int reverse);
 extern const char *avr_out_movpsi (rtx_insn *, rtx*, int*);
 extern const char *avr_out_sign_extend (rtx_insn *, rtx*, int*);
 extern const char *avr_out_insert_notbit (rtx_insn *, rtx*, int*);
+extern const char *avr_out_insv (rtx_insn *, rtx*, int*);
 extern const char *avr_out_extr (rtx_insn *, rtx*, int*);
 extern const char *avr_out_extr_not (rtx_insn *, rtx*, int*);
 extern const char *avr_out_plus_set_ZN (rtx*, int*);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 5e0217de36f..b4d082315b5 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -9066,6 +9066,159 @@ avr_out_insert_notbit (rtx_insn *insn, rtx op[], int *plen)
 }
 
 
+/* Output instructions for  XOP[0] = (XOP[1] <Shift> XOP[2]) & XOP[3]  where
+   * XOP[0] and XOP[1] have the same mode which is one of: QI, HI, PSI, SI.
+   * XOP[3] is an exact power of 2.
+   * XOP[2] and XOP[3] are const_int.
+   * <Shift> is any of: ASHIFT, LSHIFTRT, ASHIFTRT.
+   * The result depends on XOP[1].
+   Returns "".
+   PLEN != 0: Set *PLEN to the code length in words.  Don't output anything.
+   PLEN == 0: Output instructions.  */
+
+const char*
+avr_out_insv (rtx_insn *insn, rtx xop[], int *plen)
+{
+  machine_mode mode = GET_MODE (xop[0]);
+  int n_bytes = GET_MODE_SIZE (mode);
+  rtx xsrc = SET_SRC (single_set (insn));
+
+  // Any of ASHIFT, LSHIFTRT, ASHIFTRT.
+  enum rtx_code code = GET_CODE (XEXP (xsrc, 0));
+  int shift = code == ASHIFT ? INTVAL (xop[2]) : -INTVAL (xop[2]);
+
+  // Determines the position of the output bit.
+  unsigned mask = GET_MODE_MASK (mode) & INTVAL (xop[3]);
+
+  // Position of the output / input bit, respectively.
+  int obit = exact_log2 (mask);
+  int ibit = obit - shift;
+
+  gcc_assert (IN_RANGE (obit, 0, GET_MODE_BITSIZE (mode) - 1));
+  gcc_assert (IN_RANGE (ibit, 0, GET_MODE_BITSIZE (mode) - 1));
+
+  // In the remainder, use the sub-bytes that hold the bits.
+  rtx op[4] =
+    {
+      // Output
+      simplify_gen_subreg (QImode, xop[0], mode, obit / 8),
+      GEN_INT (obit & 7),
+      // Input
+      simplify_gen_subreg (QImode, xop[1], mode, ibit / 8),
+      GEN_INT (ibit & 7)
+    };
+  obit &= 7;
+  ibit &= 7;
+
+  // The length of the default sequence at the end of this function.
+  // We only emit anything other than the default when we find a sequence
+  // that is strictly shorter than the default sequence.
+  const int len0 = 2 + n_bytes - (n_bytes == 4 && AVR_HAVE_MOVW);
+
+  // Finding something shorter than the default sequence implies that there
+  // must be at most 2 instructions that deal with the bytes containing the
+  // relevant bits.  In addition, we need  N_BYTES - 1  instructions to clear
+  // the remaining result bytes.
+
+  const int n_clr = n_bytes - 1;
+  bool clr_p = false;
+  bool andi_p = false;
+
+  if (plen)
+    *plen = 0;
+
+  if (REGNO (op[0]) == REGNO (op[2])
+      // Output reg allows ANDI.
+      && test_hard_reg_class (LD_REGS, op[0]))
+    {
+      if (1 + n_clr < len0
+	  // Same byte and bit: A single ANDI will do.
+	  && obit == ibit)
+	{
+	  clr_p = andi_p = true;
+	}
+      else if (2 + n_clr < len0
+	       // |obit - ibit| = 4:  SWAP + ANDI will do.
+	       && (obit == ibit + 4 || obit == ibit - 4))
+	{
+	  avr_asm_len ("swap %0", op, plen, 1);
+	  clr_p = andi_p = true;
+	}
+      else if (2 + n_clr < len0
+	       // LSL + ANDI will do.
+	       && obit == ibit + 1)
+	{
+	  avr_asm_len ("lsl %0", op, plen, 1);
+	  clr_p = andi_p = true;
+	}
+      else if (2 + n_clr < len0
+	       // LSR + ANDI will do.
+	       && obit == ibit - 1)
+	{
+	  avr_asm_len ("lsr %0", op, plen, 1);
+	  clr_p = andi_p = true;
+	}
+    }
+
+  if (REGNO (op[0]) != REGNO (op[2])
+      && obit == ibit)
+    {
+      if (2 + n_clr < len0
+	  // Same bit but different byte: MOV + ANDI will do.
+	  && test_hard_reg_class (LD_REGS, op[0]))
+	{
+	  avr_asm_len ("mov %0,%2", op, plen, 1);
+	  clr_p = andi_p = true;
+	}
+      else if (2 + n_clr < len0
+	       // Same bit but different byte:  We can use ANDI + MOV,
+	       // but only if the input byte is LD_REGS and unused after.
+	       && test_hard_reg_class (LD_REGS, op[2])
+	       && reg_unused_after (insn, op[2]))
+	{
+	  avr_asm_len ("andi %2,1<<%3"  CR_TAB
+		       "mov %0,%2", op, plen, 2);
+	  clr_p = true;
+	}
+    }
+
+  // Output remaining instructions of the shorter sequence.
+
+  if (andi_p)
+    avr_asm_len ("andi %0,1<<%1", op, plen, 1);
+
+  if (clr_p)
+    {
+      for (int b = 0; b < n_bytes; ++b)
+	{
+	  rtx byte = simplify_gen_subreg (QImode, xop[0], mode, b);
+	  if (REGNO (byte) != REGNO (op[0]))
+	    avr_asm_len ("clr %0", &byte, plen, 1);
+	}
+
+      // CLR_P means we found a shorter sequence, so we are done now.
+      return "";
+    }
+
+  // No shorter sequence found, just emit  BST, CLR*, BLD  sequence.
+
+  avr_asm_len ("bst %2,%3", op, plen, -1);
+
+  if (n_bytes == 4 && AVR_HAVE_MOVW)
+    avr_asm_len ("clr %A0"   CR_TAB
+		 "clr %B0"   CR_TAB
+		 "movw %C0,%A0", xop, plen, 3);
+  else
+    for (int b = 0; b < n_bytes; ++b)
+      {
+	rtx byte = simplify_gen_subreg (QImode, xop[0], mode, b);
+	avr_asm_len ("clr %0", &byte, plen, 1);
+      }
+
+  return avr_asm_len ("bld %0,%1", op, plen, 1);
+}
+
+
 /* Output instructions to extract a bit to 8-bit register XOP[0].
    The input XOP[1] is a register or an 8-bit MEM in the lower I/O range.
    XOP[2] is the const_int bit position.  Return "".
@@ -9994,6 +10147,7 @@ avr_adjust_insn_length (rtx_insn *insn, int len)
     case ADJUST_LEN_OUT_BITOP: avr_out_bitop (insn, op, &len); break;
     case ADJUST_LEN_EXTR_NOT: avr_out_extr_not (insn, op, &len); break;
     case ADJUST_LEN_EXTR: avr_out_extr (insn, op, &len); break;
+    case ADJUST_LEN_INSV: avr_out_insv (insn, op, &len); break;
 
     case ADJUST_LEN_PLUS: avr_out_plus (insn, op, &len); break;
     case ADJUST_LEN_ADDTO_SP: avr_out_addto_sp (op, &len); break;
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 83dd15040b0..73bddec2b33 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -163,7 +163,7 @@ (define_attr "adjust_len"
    ashlhi, ashrhi, lshrhi,
    ashlsi, ashrsi, lshrsi,
    ashlpsi, ashrpsi, lshrpsi,
-   insert_bits, insv_notbit,
+   insert_bits, insv_notbit, insv,
    add_set_ZN, cmp_uext, cmp_sext,
    no"
   (const_string "no"))
@@ -9818,6 +9818,12 @@ (define_insn_and_split "*extzv.io.lsr7"
                          (const_int 1)
                          (const_int 7)))])
 
+;; This insn serves as a combine bridge because insn combine will only
+;; combine so much (3) insns at most.  It's not actually an open coded
+;; bit-insertion but just a part of it.  It may occur in other contexts
+;; than INSV though, and in such a case the code may be worse than without
+;; this pattern.  We still have to emit code for it in that case because
+;; we cannot roll back.
 (define_insn_and_split "*insv.any_shift.<mode>_split"
   [(set (match_operand:QISI 0 "register_operand" "=r")
         (and:QISI (any_shift:QISI (match_operand:QISI 1 "register_operand" "r")
@@ -9840,27 +9846,9 @@ (define_insn "*insv.any_shift.<mode>"
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
-    int shift = <CODE> == ASHIFT ? INTVAL (operands[2]) : -INTVAL (operands[2]);
-    int mask = GET_MODE_MASK (<MODE>mode) & INTVAL (operands[3]);
-    // Position of the output / input bit, respectively.
-    int obit = exact_log2 (mask);
-    int ibit = obit - shift;
-    gcc_assert (IN_RANGE (obit, 0, <MSB>));
-    gcc_assert (IN_RANGE (ibit, 0, <MSB>));
-    operands[3] = GEN_INT (obit);
-    operands[2] = GEN_INT (ibit);
-
-    if (<SIZE> == 1) return "bst %T1%T2\;clr %0\;"                 "bld %T0%T3";
-    if (<SIZE> == 2) return "bst %T1%T2\;clr %A0\;clr %B0\;"       "bld %T0%T3";
-    if (<SIZE> == 3) return "bst %T1%T2\;clr %A0\;clr %B0\;clr %C0\;bld %T0%T3";
-    return AVR_HAVE_MOVW
-      ? "bst %T1%T2\;clr %A0\;clr %B0\;movw %C0,%A0\;"  "bld %T0%T3"
-      : "bst %T1%T2\;clr %A0\;clr %B0\;clr %C0\;clr %D0\;bld %T0%T3";
+    return avr_out_insv (insn, operands, nullptr);
   }
-  [(set (attr "length")
-        (minus (symbol_ref "2 + <SIZE>")
-               ; One less if we can use a MOVW to clear.
-               (symbol_ref "<SIZE> == 4 && AVR_HAVE_MOVW")))])
+  [(set_attr "adjust_len" "insv")])
 
 
 (define_insn_and_split "*extzv.<mode>hi2"
diff --git a/gcc/testsuite/gcc.target/avr/torture/insv-anyshift.c b/gcc/testsuite/gcc.target/avr/torture/insv-anyshift.c
new file mode 100644
index 00000000000..2f94e1787b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/insv-anyshift.c
@@ -0,0 +1,81 @@
+/* { dg-do run } */
+
+typedef __UINT32_TYPE__ uint32_t;
+
+/* Testing inlined and completely folded versions of functions
+   against their non-inlined, non-folded counnterparts.  */
+
+#define MK_FUN1(OBIT, LSR)						\
+  static __inline__ __attribute__((__always_inline__))			\
+  uint32_t fun1_lsr_##OBIT##_##LSR##_ai (int x, uint32_t a)		\
+  {									\
+    (void) x;								\
+    return (a >> LSR) & (1ul << OBIT);					\
+  }									\
+									\
+  __attribute__((__noinline__,__noclone__))				\
+  uint32_t fun1_lsr_##OBIT##_##LSR##_ni (int x, uint32_t a)		\
+  {									\
+    return fun1_lsr_##OBIT##_##LSR##_ai (x, a);				\
+  }									\
+									\
+  void test_fun1_lsr_##OBIT##_##LSR (void)				\
+  {									\
+    if (fun1_lsr_##OBIT##_##LSR##_ni (0, 1ul << (OBIT + LSR))		\
+	!= fun1_lsr_##OBIT##_##LSR##_ai (0, 1ul << (OBIT + LSR)))	\
+      __builtin_abort();						\
+									\
+    if (fun1_lsr_##OBIT##_##LSR##_ni (0, 1ul << (OBIT + LSR))		\
+	!= fun1_lsr_##OBIT##_##LSR##_ai (0, -1ul))			\
+      __builtin_abort();						\
+  }
+  
+
+#define MK_FUN2(OBIT, LSL)						\
+  static __inline__ __attribute__((__always_inline__))			\
+  uint32_t fun2_lsl_##OBIT##_##LSL##_ai (int x, uint32_t a)		\
+  {									\
+    (void) x;								\
+    return (a << LSL) & (1ul << OBIT);					\
+  }									\
+									\
+  __attribute__((__noinline__,__noclone__))				\
+  uint32_t fun2_lsl_##OBIT##_##LSL##_ni (int x, uint32_t a)		\
+  {									\
+    return fun2_lsl_##OBIT##_##LSL##_ai (x, a);				\
+  }									\
+									\
+  void test_fun2_lsl_##OBIT##_##LSL (void)				\
+  {									\
+    if (fun2_lsl_##OBIT##_##LSL##_ni (0, 1ul << (OBIT - LSL))		\
+	!= fun2_lsl_##OBIT##_##LSL##_ai (0, 1ul << (OBIT - LSL)))	\
+      __builtin_abort();						\
+									\
+    if (fun2_lsl_##OBIT##_##LSL##_ni (0, 1ul << (OBIT - LSL))		\
+	!= fun2_lsl_##OBIT##_##LSL##_ai (0, -1ul))			\
+      __builtin_abort();						\
+  }
+
+
+MK_FUN1 (13, 15)
+MK_FUN1 (13, 16)
+MK_FUN1 (13, 17)
+MK_FUN1 (13, 12)
+
+MK_FUN2 (12, 8)
+MK_FUN2 (13, 8)
+MK_FUN2 (16, 8)
+
+int main (void)
+{
+  test_fun1_lsr_13_15 ();
+  test_fun1_lsr_13_16 ();
+  test_fun1_lsr_13_17 ();
+  test_fun1_lsr_13_12 ();
+
+  test_fun2_lsl_12_8 ();
+  test_fun2_lsl_13_8 ();
+  test_fun2_lsl_16_8 ();
+
+  return 0;
+}