public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Re: [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL
       [not found] <4e83484c.03c7640a.2591.10bdSMTPIN_ADDED@mx.google.com>
@ 2011-10-21 12:52 ` Ramana Radhakrishnan
  2011-11-07  9:49   ` Sameera Deshpande
  0 siblings, 1 reply; 15+ messages in thread
From: Ramana Radhakrishnan @ 2011-10-21 12:52 UTC (permalink / raw)
  To: Sameera Deshpande; +Cc: gcc-patches, nickc, Richard Earnshaw, paul

Hi Sameera,

The comment about REG_FRAME_RELATED_EXPR vs REG_CFA_RESTORE from one
of your later patches
applies here as well.

>diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
>index 3162b30..f86a3e6 100644
>--- a/gcc/config/arm/arm.c
>+++ b/gcc/config/arm/arm.c
>@@ -8754,6 +8754,140 @@ neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
> #undef CHECK
> }
>
>+/* Return true if OP is a valid load multiple operation for MODE mode.
>+   CONSECUTIVE is true if the registers in the operation must form
>+   a consecutive sequence in the register bank.  STACK_ONLY is true
>+   if the base register must be the stack pointer.  RETURN_PC is true
>+   if value is to be loaded in PC.  */
>+bool
>+load_multiple_operation_p (rtx op, bool consecutive, enum machine_mode mode,
>+                           bool stack_only, bool return_pc)
>+{

<...> snip

>+
>+  /* If DFMode, we must be asking for consecutive,
>+     since FLDMDD can only do consecutive regs.  */

s/DFMode/DFmode
s/FLDMDD/fldmdd (vldm.f64)

Why are you differentiating on stack_only ? Does it really matter ?


>+  gcc_assert ((mode != DFmode) || consecutive);
>+
>+  /* Set up the increments and the regs per val based on the mode.  */
>+  reg_increment = mode == DFmode ? 8 : 4;

Can't you just get the reg_increment based on GET_MODE_SIZE (mode) ?

>+  regs_per_val = mode == DFmode ? 2 : 1;
>+  offset_adj = return_pc ? 1 : 0;
>+
>+  if (count <= 1
>+      || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
>+      || !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj))))
>+    return false;
>+
>+  /* Check to see if this might be a write-back.  */
>+  if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, offset_adj))) == PLUS)
>+    {
>+      i++;
>+      base = 1;
>+      update = true;
>+
>+      /* Now check it more carefully.  */
>+      if (!REG_P (SET_DEST (elt))
>+          || !REG_P (XEXP (SET_SRC (elt), 0))
>+          || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
>+          || INTVAL (XEXP (SET_SRC (elt), 1)) !=
>+              ((count - 1 - offset_adj) * reg_increment))
>+        return false;

A comment here explaining that you are checking for the
increment amount being sane would be good.


>+
>+      /* Check the nature of the base_register being written to.  */
>+      if (stack_only && (REGNO (SET_DEST (elt)) != SP_REGNUM))
>+        return false;
>+    }
>+
>+  i = i + offset_adj;
>+  base = base + offset_adj;
>+  /* Perform a quick check so we don't blow up below.  */
>+  if (GET_CODE (XVECEXP (op, 0, i - 1)) != SET
>+      || !REG_P (SET_DEST (XVECEXP (op, 0, i - 1)))
>+      || !MEM_P (SET_SRC (XVECEXP (op, 0, i - 1))))
>+    return false;
>+
>+  /* If only one reg being loaded, success depends on the type:
>+     FLDMDD can do just one reg, LDM must do at least two.  */

Hmmm isn't this true of only LDM's in Thumb state ? Though it could be argued
that this patch is only T2 epilogues.

>+  if (count <= i)
>+    return mode == DFmode ? true : false;

Again a comment here would be useful.

>+
>+  first_dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1)));
>+  dest_regno = first_dest_regno;
>+
>+  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0);
>+
>+  if (GET_CODE (src_addr) == PLUS)
>+    {
>+      if (!CONST_INT_P (XEXP (src_addr, 1)))
>+	return false;

Watch out for the indentation of the return.

<...snip>
>+)
>+
>+(define_insn "*floating_point_pop_multiple_with_stack_update"

s/floating_point/vfp

>+  [(match_parallel 0 "load_multiple_operation_stack_fp"
>+    [(set (match_operand:SI 1 "s_register_operand" "=k")
>+          (plus:SI (match_operand:SI 2 "s_register_operand" "1")
>+                   (match_operand:SI 3 "const_int_operand" "I")))
>+     (set (match_operand:DF 4 "arm_hard_register_operand" "")
>+          (mem:DF (match_dup 2)))])]
>+  "TARGET_THUMB2"

&& TARGET_HARD_FLOAT && TARGET_VFP

>+  "*
>+  {
>+    int num_regs = XVECLEN (operands[0], 0);
>+    static const struct { const char *const name; } table[]
>+                  = { {\"d0\"}, {\"d1\"}, {\"d2\"}, {\"d3\"},
>+                      {\"d4\"}, {\"d5\"}, {\"d6\"}, {\"d7\"},
>+                      {\"d8\"}, {\"d9\"}, {\"d10\"}, {\"d11\"},
>+                      {\"d12\"}, {\"d13\"}, {\"d14\"}, {\"d15\"},
>+                      {\"d16\"}, {\"d17\"}, {\"d18\"}, {\"d19\"},
>+                      {\"d20\"}, {\"d21\"}, {\"d22\"}, {\"d23\"},
>+                      {\"d24\"}, {\"d25\"}, {\"d26\"}, {\"d27\"},
>+                      {\"d28\"}, {\"d29\"}, {\"d30\"}, {\"d31\"} };


>+    int i;
>+    char pattern[100];
>+    strcpy (pattern, \"fldmfdd\\t\");
>+    strcat (pattern,
>+                    reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0, 0)))]);
>+    strcat (pattern, \"!, {\");
>+    strcat (pattern, table[(REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))
>+                           - FIRST_VFP_REGNUM) / 2].name);

Can't you reuse names from arm.h and avoid the table here ?


>+    for (i = 2; i < num_regs; i++)
>+      {
>+        strcat (pattern, \", %|\");
>+        strcat (pattern, table[(REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))
>+                               - FIRST_VFP_REGNUM) / 2].name);
>+      }

Can't you use fldmfdd {reg_lo-reg_hi} instead of enumerating all the
registers here.

>+    strcat (pattern, \"}\");
>+    output_asm_insn (pattern, operands);
>+    return \"\";
>+  }
>+  "
>+  [(set_attr "type" "load4")]




Ramana

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL
  2011-10-21 12:52 ` [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL Ramana Radhakrishnan
@ 2011-11-07  9:49   ` Sameera Deshpande
  2011-11-07 10:07     ` Paul Brook
  0 siblings, 1 reply; 15+ messages in thread
From: Sameera Deshpande @ 2011-11-07  9:49 UTC (permalink / raw)
  To: Ramana Radhakrishnan; +Cc: gcc-patches, nickc, Richard Earnshaw, paul

[-- Attachment #1: Type: text/plain, Size: 3059 bytes --]

Hi Ramana,

Please find attached reworked patch. The patch is tested with check-gcc,
check-gdb and bootstrap with no regression.

On Fri, 2011-10-21 at 13:43 +0100, Ramana Radhakrishnan wrote: 
> Why are you differentiating on stack_only ? Does it really matter ?
> 
The patterns pop_multi* generate pop instruction, hence I wanted to be
sure that base register is stack.

I can remove stack_only option by
1. Modifying pattern to match SP as base-register explicitly or
2. Generate ldm%(ia%) instruction for non-SP base registers.

I chose second option.

> Hmmm isn't this true of only LDM's in Thumb state ? Though it could be argued
> that this patch is only T2 epilogues.
> 
Yes, its true. But for single register we want to match LDR pattern and
not any of ldm* or pop_multi* pattern. So, I am barring LDM for single
register here.

> >+    strcpy (pattern, \"fldmfdd\\t\");
> >+    strcat (pattern,
> >+                    reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0, 0)))]);
> >+    strcat (pattern, \"!, {\");
> >+    strcat (pattern, table[(REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))
> >+                           - FIRST_VFP_REGNUM) / 2].name);
> 
> Can't you reuse names from arm.h and avoid the table here ?
> 
The array REGISTER_NAMES in aout.h use S0, S2, ...  names for double
registers. Is there any way to use OVERLAPPING_REGISTER_NAMES? If that
can be done, I can eliminate the table here.

Updated ChangeLog entry:

2011-09-28  Ian Bolton         <ian.bolton@arm.com>
            Sameera Deshpande  <sameera.deshpande@arm.com>
           
       * config/arm/arm-protos.h (load_multiple_operation_p): New
declaration.
         (thumb2_expand_epilogue): Likewise.
         (thumb2_output_return): Likewise
         (thumb2_expand_return): Likewise.
         (thumb_unexpanded_epilogue): Rename to... 
         (thumb1_unexpanded_epilogue): ...this 
       * config/arm/arm.c (load_multiple_operation_p): New function. 
         (thumb2_emit_multi_reg_pop): Likewise.
         (thumb2_emit_vfp_multi_reg_pop): Likewise.
         (thumb2_expand_return): Likewise. 
         (thumb2_expand_epilogue): Likewise. 
         (thumb2_output_return): Likewise
         (thumb_unexpanded_epilogue): Rename to...
         ( thumb1_unexpanded_epilogue): ...this
       * config/arm/arm.md (pop_multiple_with_stack_update): New
pattern. 
         (pop_multiple_with_stack_update_and_return): Likewise.
         (thumb2_ldr_with_return): Likewise.
         (vfp_point_pop_multiple_with_stack_update): Likewise.
         (return): Update condition and code for pattern.
         (arm_return): Likewise.
         (epilogue_insns): Likewise.
       * config/arm/predicates.md (load_multiple_operation): Update
predicate.
         (load_multiple_operation_return): New predicate. 
         (load_multiple_operation_fp): Likewise.
       * config/arm/thumb2.md (thumb2_return): Remove.
         (thumb2_rtl_epilogue_return): New pattern.


- Thanks and regards,
  Sameera D.

[-- Attachment #2: thumb2_rtl_epilogue_complete-4Nov.patch --]
[-- Type: text/x-patch, Size: 31196 bytes --]

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 23a29c6..2c38883 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -65,6 +65,7 @@ extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int);
 extern int arm_const_double_rtx (rtx);
 extern int neg_const_double_rtx_ok_for_fpa (rtx);
 extern int vfp3_const_double_rtx (rtx);
+extern bool load_multiple_operation_p (rtx, bool, enum machine_mode, bool);
 extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *);
 extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *,
 					   int *);
@@ -176,10 +177,13 @@ extern int arm_float_words_big_endian (void);
 
 /* Thumb functions.  */
 extern void arm_init_expanders (void);
-extern const char *thumb_unexpanded_epilogue (void);
+extern const char *thumb1_unexpanded_epilogue (void);
 extern void thumb1_expand_prologue (void);
 extern void thumb1_expand_epilogue (void);
 extern const char *thumb1_output_interwork (void);
+extern void thumb2_expand_epilogue (void);
+extern void thumb2_output_return (rtx);
+extern void thumb2_expand_return (void);
 #ifdef TREE_CODE
 extern int is_called_in_ARM_mode (tree);
 #endif
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index e07c8c3..ec87892 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -8906,6 +8906,137 @@ neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
 #undef CHECK
 }
 
+/* Return true if OP is a valid load multiple operation for MODE mode.
+   CONSECUTIVE is true if the registers in the operation must form
+   a consecutive sequence in the register bank.  STACK_ONLY is true
+   if the base register must be the stack pointer.  RETURN_PC is true
+   if value is to be loaded in PC.  */
+bool
+load_multiple_operation_p (rtx op, bool consecutive, enum machine_mode mode,
+                           bool return_pc)
+{
+  HOST_WIDE_INT count = XVECLEN (op, 0);
+  unsigned dest_regno, first_dest_regno;
+  rtx src_addr;
+  HOST_WIDE_INT i = 1, base = 0;
+  HOST_WIDE_INT offset = 0;
+  rtx elt;
+  bool addr_reg_loaded = false;
+  bool update = false;
+  int reg_increment, regs_per_val;
+  int offset_adj;
+
+  /* If DFmode, we must be asking for consecutive,
+     since fldmdd can only do consecutive regs.  */
+  gcc_assert ((mode != DFmode) || consecutive);
+
+  /* Set up the increments and the regs per val based on the mode.  */
+  reg_increment = GET_MODE_SIZE (mode);
+  regs_per_val = mode == DFmode ? 2 : 1;
+  offset_adj = return_pc ? 1 : 0;
+
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
+      || !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj))))
+    return false;
+
+  /* Check to see if this might be a write-back.  */
+  if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, offset_adj))) == PLUS)
+    {
+      i++;
+      base = 1;
+      update = true;
+
+      /* The offset adjustment should be same as number of registers being
+         popped * size of single register.  */
+      if (!REG_P (SET_DEST (elt))
+          || !REG_P (XEXP (SET_SRC (elt), 0))
+          || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
+          || INTVAL (XEXP (SET_SRC (elt), 1)) !=
+              ((count - 1 - offset_adj) * reg_increment))
+        return false;
+    }
+
+  i = i + offset_adj;
+  base = base + offset_adj;
+  /* Perform a quick check so we don't blow up below.  */
+  if (GET_CODE (XVECEXP (op, 0, i - 1)) != SET
+      || !REG_P (SET_DEST (XVECEXP (op, 0, i - 1)))
+      || !MEM_P (SET_SRC (XVECEXP (op, 0, i - 1))))
+    return false;
+
+  /* If only one reg being loaded, success depends on the type:
+     FLDMDD can do just one reg, LDM must do at least two.  */
+  if (count <= i)
+    return mode == DFmode ? true : false;
+
+  first_dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1)));
+  dest_regno = first_dest_regno;
+
+  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0);
+
+  if (GET_CODE (src_addr) == PLUS)
+    {
+      if (!CONST_INT_P (XEXP (src_addr, 1)))
+        return false;
+      offset = INTVAL (XEXP (src_addr, 1));
+      src_addr = XEXP (src_addr, 0);
+    }
+
+  if (!REG_P (src_addr))
+    return false;
+
+  /* The pattern we are trying to match here is:
+     [(SET (R_d0) (MEM (PLUS (src_addr) (offset))))
+      (SET (R_d1) (MEM (PLUS (src_addr) (offset + <reg_increment>))))
+       :
+       :
+      (SET (R_dn) (MEM (PLUS (src_addr) (offset + n * <reg_increment>))))
+     ]
+     Where,
+     1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
+     2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
+     3.  If consecutive is TRUE, then for kth register being loaded,
+         REGNO (R_dk) = REGNO (R_d0) + k.  */
+  for (; i < count; i++)
+    {
+      elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+          || !REG_P (SET_DEST (elt))
+          || GET_MODE (SET_DEST (elt)) != mode
+          || (consecutive
+              && (REGNO (SET_DEST (elt))
+                  != (unsigned int) (first_dest_regno + regs_per_val * (i - base))))
+          || REGNO (SET_DEST (elt)) <= dest_regno
+          || !MEM_P (SET_SRC (elt))
+          || GET_MODE (SET_SRC (elt)) != mode
+          || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+               || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+               || !CONST_INT_P (XEXP (XEXP (SET_SRC (elt), 0), 1))
+               || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) !=
+                           (offset + (i - base) * reg_increment))
+              && (!REG_P (XEXP (SET_SRC (elt), 0))
+		  || offset + (i - base) * reg_increment != 0)))
+        return false;
+
+      dest_regno = REGNO (SET_DEST (elt));
+      if (dest_regno == REGNO (src_addr))
+        addr_reg_loaded = true;
+    }
+
+  if (update && addr_reg_loaded)
+    return false;
+
+  /* For Thumb-1, address register is always modified - either by write-back
+     or by explicit load.  If the pattern does not describe an update, it must
+     be because the address register is in the list of loaded registers.  */
+  if (TARGET_THUMB1)
+    return update || addr_reg_loaded;
+
+  return true;
+}
+
 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
    float elements), and a modified constant (whatever should be output for a
@@ -16092,6 +16223,137 @@ emit_multi_reg_push (unsigned long mask)
   return par;
 }
 
+/* Generate and emit an insn pattern that we will recognize as a pop_multi.
+   SAVED_REGS_MASK shows which registers need to be restored.
+
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.
+
+   There's no reason why this couldn't be used for Thumb-1 or ARM, in theory,
+   but currently the pattern that matches this in the MD file is only enabled
+   for Thumb-2.  */
+static void
+thumb2_emit_multi_reg_pop (unsigned long saved_regs_mask, bool really_return)
+{
+  int num_regs = 0;
+  int i, j;
+  rtx par;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, reg;
+  int offset_adj = really_return ? 1 : 0;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  gcc_assert (num_regs && num_regs <= 16);
+
+  /* The parallel needs to hold num_regs SETs
+     and one SET for the stack update.  */
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1 + offset_adj));
+
+  if (really_return)
+    {
+      tmp = ret_rtx;
+      XVECEXP (par, 0, 0) = tmp;
+    }
+
+  /* Increment the stack pointer, based on there being
+     num_regs 4-byte registers to restore.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (stack_pointer_rtx, 4 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (par, 0, offset_adj) = tmp;
+
+  /* Now restore every reg, which may include PC.  */
+  for (j = 0, i = 0; j < num_regs; i++)
+    if (saved_regs_mask & (1 << i))
+      {
+        reg = gen_rtx_REG (SImode, i);
+        tmp = gen_rtx_SET (VOIDmode,
+                           reg,
+                           gen_frame_mem
+                           (SImode,
+                            plus_constant (stack_pointer_rtx, 4 * j)));
+        RTX_FRAME_RELATED_P (tmp) = 1;
+        XVECEXP (par, 0, j + 1 + offset_adj) = tmp;
+
+        /* We need to maintain a sequence for DWARF info too.  As dwarf info
+           should not have PC, skip PC.  */
+        if (i != PC_REGNUM)
+          dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+        j++;
+      }
+
+  if (really_return)
+    par = emit_jump_insn (par);
+  else
+    par = emit_insn (par);
+
+  REG_NOTES (par) = dwarf;
+}
+
+/* Generate and emit an insn pattern that we will recognize as a pop_multi
+   of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
+
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.  */
+static void
+thumb2_emit_vfp_multi_reg_pop (int first_reg, int num_regs)
+{
+  int i, j;
+  rtx par;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, reg;
+
+  gcc_assert (num_regs && num_regs <= 32);
+
+  if (num_regs > 16)
+    {
+      thumb2_emit_vfp_multi_reg_pop (first_reg, 16);
+      thumb2_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16);
+      return;
+    }
+
+  /* The parallel needs to hold num_regs SETs
+     and one SET for the stack update.  */
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
+
+  /* Increment the stack pointer, based on there being
+     num_regs 8-byte registers to restore.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (stack_pointer_rtx, 8 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (par, 0, 0) = tmp;
+
+  /* now show EVERY reg that will be restored, using a SET for each.  */
+  for (j = 0, i=first_reg; j < num_regs; i += 2)
+    {
+      reg = gen_rtx_REG (DFmode, i);
+
+      tmp = gen_rtx_SET (VOIDmode,
+                         reg,
+                         gen_frame_mem
+                         (DFmode,
+                          plus_constant (stack_pointer_rtx,
+                                         8 * j)));
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      XVECEXP (par, 0, j + 1) = tmp;
+
+      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+      j++;
+    }
+
+  par = emit_insn (par);
+  REG_NOTES (par) = dwarf;
+}
+
 /* Calculate the size of the return value that is passed in registers.  */
 static unsigned
 arm_size_return_regs (void)
@@ -21622,7 +21884,7 @@ thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
 
 /* The bits which aren't usefully expanded as rtl.  */
 const char *
-thumb_unexpanded_epilogue (void)
+thumb1_unexpanded_epilogue (void)
 {
   arm_stack_offsets *offsets;
   int regno;
@@ -22191,7 +22453,6 @@ thumb1_expand_prologue (void)
     cfun->machine->lr_save_eliminated = 0;
 }
 
-
 void
 thumb1_expand_epilogue (void)
 {
@@ -22246,6 +22507,242 @@ thumb1_expand_epilogue (void)
     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
 }
 
+/* Generate pattern *pop_multiple_with_stack_update_and_return if single
+   POP instruction can be generated.  LR should be replaced by PC.  All
+   the checks required are already done by  USE_RETURN_INSN ().  Hence,
+   all we really need to check here is if single register is to be
+   returned, or multiple register return.  */
+void
+thumb2_expand_return (void)
+{
+  int i, num_regs;
+  unsigned long saved_regs_mask;
+  arm_stack_offsets *offsets;
+
+  offsets = arm_get_frame_offsets ();
+  saved_regs_mask = offsets->saved_regs_mask;
+  for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  if (saved_regs_mask)
+    {
+      if (num_regs == 1)
+        {
+          rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+          rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
+          rtx addr = gen_rtx_MEM (SImode,
+                                  gen_rtx_POST_INC (SImode,
+                                                    stack_pointer_rtx));
+          set_mem_alias_set (addr, get_frame_alias_set ());
+          XVECEXP (par, 0, 0) = ret_rtx;
+          XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
+          RTX_FRAME_RELATED_P (par) = 1;
+          emit_jump_insn (par);
+        }
+      else
+        {
+          saved_regs_mask &= ~ (1 << LR_REGNUM);
+          saved_regs_mask |=   (1 << PC_REGNUM);
+          thumb2_emit_multi_reg_pop (saved_regs_mask, true);
+        }
+    }
+  else
+    {
+      emit_jump_insn (ret_rtx);
+    }
+}
+
+/* Generate RTL to represent a Thumb-2 epilogue.
+
+   Note that this RTL does not include the
+   Return insn, which is created separately and
+   handled in thumb2_output_return.  */
+void
+thumb2_expand_epilogue (void)
+{
+  HOST_WIDE_INT amount;
+  int reg;
+  unsigned long saved_regs_mask;
+  unsigned long func_type;
+  int i;
+  arm_stack_offsets *offsets;
+  int num_regs = 0;
+  bool really_return = false;
+
+  func_type = arm_current_func_type ();
+
+  /* Naked functions don't have epilogues.  */
+  if (IS_NAKED (func_type)
+      || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
+    {
+      emit_jump_insn (ret_rtx);
+      return;
+    }
+
+  /* At the end of the code of a function, the stack pointer will
+     be pointing at the outgoing args area, so we first need to
+     get it to point at the saved_regs area.  */
+
+  /* Determine how much to add to the stack pointer.  */
+  offsets = arm_get_frame_offsets ();
+  saved_regs_mask = offsets->saved_regs_mask;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  /* In Thumb-2 mode, the frame pointer points to the last
+     saved register.  */
+  amount = offsets->outgoing_args - offsets->saved_regs;
+
+  if (frame_pointer_needed)
+    {
+      emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+      amount = offsets->locals_base - offsets->saved_regs;
+    }
+
+  gcc_assert (amount >= 0);
+  if (amount)
+    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+                           GEN_INT (amount)));
+
+  /* Emit a USE (stack_pointer_rtx), so that
+     the stack adjustment will not be deleted.  */
+  emit_insn (gen_prologue_use (stack_pointer_rtx));
+
+  /* Now handle any VFP restoration.  */
+  if (TARGET_HARD_FLOAT && TARGET_VFP)
+    {
+      int end_reg = LAST_VFP_REGNUM + 1;
+
+      /* Scan the registers in reverse order.  We need to match
+         any groupings made in the prologue and generate matching
+         fldmdd operations.  The need to match groups is because,
+         unlike pop, fldmdd can only do consecutive regs.  */
+      for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
+        /* Look for a case where a reg does not need restoring.  */
+        if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
+            && (!df_regs_ever_live_p (reg + 1)
+                || call_used_regs[reg + 1]))
+          {
+            /* Restore the regs discovered so far (from reg+2 to end_reg).  */
+            if (end_reg > reg + 2)
+              thumb2_emit_vfp_multi_reg_pop (reg + 2,
+                                             (end_reg - (reg + 2)) / 2);
+            end_reg = reg;
+          }
+
+      /* Restore the remaining regs that we have discovered (or possibly
+         even all of them, if the conditional in the for loop never fired).  */
+      if (end_reg > reg + 2)
+        thumb2_emit_vfp_multi_reg_pop (reg + 2, (end_reg - (reg + 2)) / 2);
+    }
+
+  /* iWMMXt is not supported when Thumb-2 in use.  If it were, we would
+     want to be restoring the appropriate iWMMXt regs here, in a similar
+     way to arm_output_epilogue.  */
+
+  /* If there are registers to restore, make it happen.  */
+  if (saved_regs_mask)
+    {
+      /* It's illegal to do a pop for only one reg, so generate an ldr.  */
+      if (num_regs == 1)
+        {
+          for (i = 0; i <= LAST_ARM_REGNUM; i++)
+            if (saved_regs_mask & (1 << i))
+              {
+                rtx addr = gen_rtx_MEM (SImode,
+                                        gen_rtx_POST_INC (SImode,
+                                                          stack_pointer_rtx));
+                set_mem_alias_set (addr, get_frame_alias_set ());
+                emit_insn (gen_movsi (gen_rtx_REG (SImode, i), addr));
+              }
+        }
+
+      /* Two or more regs warrants the use of a multi-reg pop.  */
+      else
+        {
+          /* If multi-pop is last instruction, don't generate `branch to
+             return-address' instruction.  Instead, pop LR in PC.  */
+          if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+              && !IS_STACKALIGN (func_type)
+              && crtl->args.pretend_args_size == 0
+              && saved_regs_mask & (1 << LR_REGNUM)
+              && !crtl->calls_eh_return)
+            {
+              saved_regs_mask &= ~ (1 << LR_REGNUM);
+              saved_regs_mask |=   (1 << PC_REGNUM);
+              really_return = true;
+            }
+
+          thumb2_emit_multi_reg_pop (saved_regs_mask, really_return);
+          if (really_return == true)
+            return;
+        }
+    }
+
+  /* Unwind the pre-pushed regs.  */
+  if (crtl->args.pretend_args_size)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+                           stack_pointer_rtx,
+                           GEN_INT (crtl->args.pretend_args_size)));
+
+  /* Stack adjustment for exception handler.  */
+  if (crtl->calls_eh_return)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+                           stack_pointer_rtx,
+                           gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
+
+  emit_jump_insn (ret_rtx);
+}
+
+
+/* Generate the appropriate instruction to return for Thumb-2.
+   OPERAND holds a condition, which must be passed to output_asm_insn.  */
+void
+thumb2_output_return (rtx operand)
+{
+  char instr[100];
+  unsigned long func_type;
+
+  func_type = arm_current_func_type ();
+
+  if (IS_NAKED (func_type))
+    /* Do nothing if naked function.  */
+    return;
+
+  if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
+    {
+      rtx op;
+
+      /* A volatile function should never return.  Call abort.  */
+      op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
+      assemble_external_libcall (op);
+      output_asm_insn ("bl\t%a0", &op);
+
+      return;
+    }
+
+  switch ((int) ARM_FUNC_TYPE (func_type))
+    {
+    case ARM_FT_ISR:
+    case ARM_FT_FIQ:
+      sprintf (instr, "subs\t%%|pc, %%|lr, #4");
+      break;
+
+    case ARM_FT_EXCEPTION:
+      sprintf (instr, "movs\t%%|pc, %%|lr");
+      break;
+
+    default:
+      sprintf (instr, "bx\t%%|lr");
+      break;
+    }
+
+  output_asm_insn (instr, &operand);
+}
+
 /* Implementation of insn prologue_thumb1_interwork.  This is the first
    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index a78ba88..64444f2 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -6669,6 +6669,151 @@
 			     FALSE, operands[1], &offset);
 })
 
+;; Pop (as used in epilogue RTL)
+;;
+;; This should really be in thumb2.md, but it needs to live above
+;; the ldmsi patterns, so that it matches before them.
+;; Furthermore, there is no reason why it could not be extended
+;; to support Thumb-1 and ARM at a later date (whereupon it would
+;; fully deserve its spot in this file).
+(define_insn "*pop_multiple_with_stack_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "s_register_operand" "+rk")
+          (plus:SI (match_dup 1)
+                   (match_operand:SI 2 "const_int_operand" "I")))
+        ])]
+  "TARGET_THUMB2"
+  "*
+  {
+    int i;
+    char pattern[100];
+    int num_saves = XVECLEN (operands[0], 0);
+
+    if (REGNO (operands[1]) == SP_REGNUM)
+      {
+        strcpy (pattern, \"pop\\t{\");
+      }
+    else
+      {
+        strcpy (pattern, \"ldm%(ia%)\\t\");
+        strcat (pattern, reg_names[REGNO (operands[1])]);
+        strcat (pattern, \"!, {\");
+      }
+
+    strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))]);
+
+    /* Skip over the first element and the one we just generated.  */
+    for (i = 2; i < (num_saves); i++)
+      {
+        strcat (pattern, \", %|\");
+        strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, operands);
+
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")]
+)
+
+;; Pop with return (as used in epilogue RTL)
+;;
+;; This instruction is generated when the registers are popped at end of
+;; epilogue.  Here, instead of popping the value in LR and then generating
+;; jump to LR, value is popped in PC.  Hence, the pattern is combined with
+;; (return).
+(define_insn "*pop_multiple_with_stack_update_and_return"
+  [(match_parallel 0 "load_multiple_operation_return"
+    [(return)
+     (set (match_operand:SI 1 "s_register_operand" "+k")
+          (plus:SI (match_dup 1)
+                   (match_operand:SI 2 "const_int_operand" "I")))
+        ])]
+  "TARGET_THUMB2"
+  "*
+  {
+    int i;
+    char pattern[100];
+    int num_saves = XVECLEN (operands[0], 0);
+
+    strcpy (pattern, \"pop\\t{\");
+    strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 2), 0))]);
+
+    /* Skip over the first two elements and the one we just generated.  */
+    for (i = 3; i < (num_saves); i++)
+      {
+        strcat (pattern, \", %|\");
+        strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, operands);
+
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")]
+)
+
+(define_insn "*thumb2_ldr_with_return"
+  [(return)
+   (set (reg:SI PC_REGNUM)
+        (mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand" "+k"))))]
+  "TARGET_THUMB2"
+  "ldr%?\t%|pc, [%0], #4"
+  [(set_attr "type" "load1")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*vfp_pop_multiple_with_stack_update"
+  [(match_parallel 0 "load_multiple_operation_fp"
+    [(set (match_operand:SI 1 "s_register_operand" "+k")
+          (plus:SI (match_dup 1)
+                   (match_operand:SI 2 "const_int_operand" "I")))
+     (set (match_operand:DF 3 "arm_hard_register_operand" "")
+          (mem:DF (match_dup 1)))])]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
+  "*
+  {
+    int num_regs = XVECLEN (operands[0], 0);
+    static const struct { const char *const name; } table[]
+                  = { {\"d0\"}, {\"d1\"}, {\"d2\"}, {\"d3\"},
+                      {\"d4\"}, {\"d5\"}, {\"d6\"}, {\"d7\"},
+                      {\"d8\"}, {\"d9\"}, {\"d10\"}, {\"d11\"},
+                      {\"d12\"}, {\"d13\"}, {\"d14\"}, {\"d15\"},
+                      {\"d16\"}, {\"d17\"}, {\"d18\"}, {\"d19\"},
+                      {\"d20\"}, {\"d21\"}, {\"d22\"}, {\"d23\"},
+                      {\"d24\"}, {\"d25\"}, {\"d26\"}, {\"d27\"},
+                      {\"d28\"}, {\"d29\"}, {\"d30\"}, {\"d31\"} };
+    char pattern[100];
+    strcpy (pattern, \"fldmfdd\\t\");
+    strcat (pattern,
+                    reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0, 0)))]);
+    strcat (pattern, \"!, {\");
+    strcat (pattern, table[(REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))
+                           - FIRST_VFP_REGNUM) / 2].name);
+    if ((num_regs - 1) > 1)
+      {
+        strcat (pattern, \"-%|\");
+        strcat (pattern, table[(REGNO (XEXP (XVECEXP (operands[0], 0,
+                                                      num_regs - 1), 0))
+                               - FIRST_VFP_REGNUM) / 2].name);
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, operands);
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")]
+)
+
 (define_expand "store_multiple"
   [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
                           (match_operand:SI 1 "" ""))
@@ -8486,8 +8631,19 @@
 
 (define_expand "return"
   [(return)]
-  "TARGET_32BIT && USE_RETURN_INSN (FALSE)"
-  "")
+  "(TARGET_ARM || (TARGET_THUMB2
+                   && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL
+                   && !IS_STACKALIGN (arm_current_func_type ())))
+    && USE_RETURN_INSN (FALSE)"
+  "
+  {
+    if (TARGET_THUMB2)
+      {
+        thumb2_expand_return ();
+        DONE;
+      }
+  }
+  ")
 
 ;; Often the return insn will be the same as loading from memory, so set attr
 (define_insn "*arm_return"
@@ -10529,6 +10685,11 @@
     emit_insn (gen_prologue_use (gen_rtx_REG (Pmode, 2)));
   if (TARGET_THUMB1)
     thumb1_expand_epilogue ();
+  else if (TARGET_THUMB2)
+  {
+    thumb2_expand_epilogue ();
+    DONE;
+  }
   else if (USE_RETURN_INSN (FALSE))
     {
       emit_jump_insn (gen_return ());
@@ -10572,12 +10733,12 @@
 
 (define_insn "*epilogue_insns"
   [(unspec_volatile [(return)] VUNSPEC_EPILOGUE)]
-  "TARGET_EITHER"
+  "TARGET_ARM || TARGET_THUMB1"
   "*
   if (TARGET_32BIT)
     return arm_output_epilogue (NULL);
   else /* TARGET_THUMB1 */
-    return thumb_unexpanded_epilogue ();
+    return thumb1_unexpanded_epilogue ();
   "
   ; Length is absolute worst case
   [(set_attr "length" "44")
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 92eb004..7e2203d 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -369,84 +369,22 @@
 (define_special_predicate "load_multiple_operation"
   (match_code "parallel")
 {
-  HOST_WIDE_INT count = XVECLEN (op, 0);
-  unsigned dest_regno;
-  rtx src_addr;
-  HOST_WIDE_INT i = 1, base = 0;
-  HOST_WIDE_INT offset = 0;
-  rtx elt;
-  bool addr_reg_loaded = false;
-  bool update = false;
-
-  if (count <= 1
-      || GET_CODE (XVECEXP (op, 0, 0)) != SET
-      || !REG_P (SET_DEST (XVECEXP (op, 0, 0))))
-    return false;
-
-  /* Check to see if this might be a write-back.  */
-  if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, 0))) == PLUS)
-    {
-      i++;
-      base = 1;
-      update = true;
-
-      /* Now check it more carefully.  */
-      if (GET_CODE (SET_DEST (elt)) != REG
-          || GET_CODE (XEXP (SET_SRC (elt), 0)) != REG
-          || GET_CODE (XEXP (SET_SRC (elt), 1)) != CONST_INT
-          || INTVAL (XEXP (SET_SRC (elt), 1)) != (count - 1) * 4)
-        return false;
-    }
-
-  /* Perform a quick check so we don't blow up below.  */
-  if (count <= i
-      || GET_CODE (XVECEXP (op, 0, i - 1)) != SET
-      || GET_CODE (SET_DEST (XVECEXP (op, 0, i - 1))) != REG
-      || GET_CODE (SET_SRC (XVECEXP (op, 0, i - 1))) != MEM)
-    return false;
-
-  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1)));
-  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0);
-  if (GET_CODE (src_addr) == PLUS)
-    {
-      if (GET_CODE (XEXP (src_addr, 1)) != CONST_INT)
-	return false;
-      offset = INTVAL (XEXP (src_addr, 1));
-      src_addr = XEXP (src_addr, 0);
-    }
-  if (!REG_P (src_addr))
-    return false;
+  return load_multiple_operation_p (op, /*consecutive=*/false,
+                                    SImode, /*return_pc*/false);
+})
 
-  for (; i < count; i++)
-    {
-      elt = XVECEXP (op, 0, i);
+(define_special_predicate "load_multiple_operation_return"
+  (match_code "parallel")
+{
+  return load_multiple_operation_p (op, /*consecutive=*/false,
+                                    SImode, /*return_pc*/true);
+})
 
-      if (GET_CODE (elt) != SET
-          || GET_CODE (SET_DEST (elt)) != REG
-          || GET_MODE (SET_DEST (elt)) != SImode
-          || REGNO (SET_DEST (elt)) <= dest_regno
-          || GET_CODE (SET_SRC (elt)) != MEM
-          || GET_MODE (SET_SRC (elt)) != SImode
-          || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
-	       || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
-	       || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT
-	       || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != offset + (i - base) * 4)
-	      && (!REG_P (XEXP (SET_SRC (elt), 0))
-		  || offset + (i - base) * 4 != 0)))
-        return false;
-      dest_regno = REGNO (SET_DEST (elt));
-      if (dest_regno == REGNO (src_addr))
-        addr_reg_loaded = true;
-    }
-  /* For Thumb, we only have updating instructions.  If the pattern does
-     not describe an update, it must be because the address register is
-     in the list of loaded registers - on the hardware, this has the effect
-     of overriding the update.  */
-  if (update && addr_reg_loaded)
-    return false;
-  if (TARGET_THUMB1)
-    return update || addr_reg_loaded;
-  return true;
+(define_special_predicate "load_multiple_operation_fp"
+  (match_code "parallel")
+{
+  return load_multiple_operation_p (op, /*consecutive=*/true,
+                                    DFmode, /*return_pc*/false);
 })
 
 (define_special_predicate "store_multiple_operation"
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 05585da..78f4e81 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -635,17 +635,18 @@
    (set_attr "length" "20")]
 )
 
-;; Note: this is not predicable, to avoid issues with linker-generated
-;; interworking stubs.
-(define_insn "*thumb2_return"
+(define_insn "*thumb2_rtl_epilogue_return"
   [(return)]
-  "TARGET_THUMB2 && USE_RETURN_INSN (FALSE)"
+  "(TARGET_THUMB2)"
   "*
   {
-    return output_return_instruction (const_true_rtx, TRUE, FALSE);
+    thumb2_output_return (const_true_rtx);
+    return \"\";
   }"
-  [(set_attr "type" "load1")
-   (set_attr "length" "12")]
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")
+   (set_attr "conds" "unconditional")]
 )
 
 (define_insn_and_split "thumb2_eh_return"

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL
  2011-11-07  9:49   ` Sameera Deshpande
@ 2011-11-07 10:07     ` Paul Brook
  2011-11-07 17:32       ` Sameera Deshpande
  0 siblings, 1 reply; 15+ messages in thread
From: Paul Brook @ 2011-11-07 10:07 UTC (permalink / raw)
  To: Sameera Deshpande
  Cc: Ramana Radhakrishnan, gcc-patches, nickc, Richard Earnshaw


> > >+    strcpy (pattern, \"fldmfdd\\t\");
> > >+    strcat (pattern,
> > >+                    reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0,
> > >0)))]); +    strcat (pattern, \"!, {\");
> > >+    strcat (pattern, table[(REGNO (XEXP (XVECEXP (operands[0], 0, 1),
> > >0)) +                           - FIRST_VFP_REGNUM) / 2].name);
> > 
> > Can't you reuse names from arm.h and avoid the table here ?
> 
> The array REGISTER_NAMES in aout.h use S0, S2, ...  names for double
> registers. Is there any way to use OVERLAPPING_REGISTER_NAMES? If that
> can be done, I can eliminate the table here.

You should be using %P.

Paul

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL
  2011-11-07 10:07     ` Paul Brook
@ 2011-11-07 17:32       ` Sameera Deshpande
  0 siblings, 0 replies; 15+ messages in thread
From: Sameera Deshpande @ 2011-11-07 17:32 UTC (permalink / raw)
  To: Paul Brook; +Cc: Ramana Radhakrishnan, gcc-patches, nickc, Richard Earnshaw

[-- Attachment #1: Type: text/plain, Size: 457 bytes --]

On Mon, 2011-11-07 at 09:56 +0000, Paul Brook wrote:
> > The array REGISTER_NAMES in aout.h use S0, S2, ...  names for double
> > registers. Is there any way to use OVERLAPPING_REGISTER_NAMES? If that
> > can be done, I can eliminate the table here.
> 
> You should be using %P.
> 

Paul,

Thanks for your comment. Please find attached reworked patch. The patch
is tested with check-gcc without regression.

- Thanks and regards,
  Sameera D. 

[-- Attachment #2: thumb2_rtl_epilogue_complete-7Nov.patch --]
[-- Type: text/x-patch, Size: 30436 bytes --]

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 23a29c6..2c38883 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -65,6 +65,7 @@ extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int);
 extern int arm_const_double_rtx (rtx);
 extern int neg_const_double_rtx_ok_for_fpa (rtx);
 extern int vfp3_const_double_rtx (rtx);
+extern bool load_multiple_operation_p (rtx, bool, enum machine_mode, bool);
 extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *);
 extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *,
 					   int *);
@@ -176,10 +177,13 @@ extern int arm_float_words_big_endian (void);
 
 /* Thumb functions.  */
 extern void arm_init_expanders (void);
-extern const char *thumb_unexpanded_epilogue (void);
+extern const char *thumb1_unexpanded_epilogue (void);
 extern void thumb1_expand_prologue (void);
 extern void thumb1_expand_epilogue (void);
 extern const char *thumb1_output_interwork (void);
+extern void thumb2_expand_epilogue (void);
+extern void thumb2_output_return (rtx);
+extern void thumb2_expand_return (void);
 #ifdef TREE_CODE
 extern int is_called_in_ARM_mode (tree);
 #endif
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index e07c8c3..ec87892 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -8906,6 +8906,137 @@ neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
 #undef CHECK
 }
 
+/* Return true if OP is a valid load multiple operation for MODE mode.
+   CONSECUTIVE is true if the registers in the operation must form
+   a consecutive sequence in the register bank.  STACK_ONLY is true
+   if the base register must be the stack pointer.  RETURN_PC is true
+   if value is to be loaded in PC.  */
+bool
+load_multiple_operation_p (rtx op, bool consecutive, enum machine_mode mode,
+                           bool return_pc)
+{
+  HOST_WIDE_INT count = XVECLEN (op, 0);
+  unsigned dest_regno, first_dest_regno;
+  rtx src_addr;
+  HOST_WIDE_INT i = 1, base = 0;
+  HOST_WIDE_INT offset = 0;
+  rtx elt;
+  bool addr_reg_loaded = false;
+  bool update = false;
+  int reg_increment, regs_per_val;
+  int offset_adj;
+
+  /* If DFmode, we must be asking for consecutive,
+     since fldmdd can only do consecutive regs.  */
+  gcc_assert ((mode != DFmode) || consecutive);
+
+  /* Set up the increments and the regs per val based on the mode.  */
+  reg_increment = GET_MODE_SIZE (mode);
+  regs_per_val = mode == DFmode ? 2 : 1;
+  offset_adj = return_pc ? 1 : 0;
+
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
+      || !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj))))
+    return false;
+
+  /* Check to see if this might be a write-back.  */
+  if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, offset_adj))) == PLUS)
+    {
+      i++;
+      base = 1;
+      update = true;
+
+      /* The offset adjustment should be same as number of registers being
+         popped * size of single register.  */
+      if (!REG_P (SET_DEST (elt))
+          || !REG_P (XEXP (SET_SRC (elt), 0))
+          || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
+          || INTVAL (XEXP (SET_SRC (elt), 1)) !=
+              ((count - 1 - offset_adj) * reg_increment))
+        return false;
+    }
+
+  i = i + offset_adj;
+  base = base + offset_adj;
+  /* Perform a quick check so we don't blow up below.  */
+  if (GET_CODE (XVECEXP (op, 0, i - 1)) != SET
+      || !REG_P (SET_DEST (XVECEXP (op, 0, i - 1)))
+      || !MEM_P (SET_SRC (XVECEXP (op, 0, i - 1))))
+    return false;
+
+  /* If only one reg being loaded, success depends on the type:
+     FLDMDD can do just one reg, LDM must do at least two.  */
+  if (count <= i)
+    return mode == DFmode ? true : false;
+
+  first_dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1)));
+  dest_regno = first_dest_regno;
+
+  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0);
+
+  if (GET_CODE (src_addr) == PLUS)
+    {
+      if (!CONST_INT_P (XEXP (src_addr, 1)))
+        return false;
+      offset = INTVAL (XEXP (src_addr, 1));
+      src_addr = XEXP (src_addr, 0);
+    }
+
+  if (!REG_P (src_addr))
+    return false;
+
+  /* The pattern we are trying to match here is:
+     [(SET (R_d0) (MEM (PLUS (src_addr) (offset))))
+      (SET (R_d1) (MEM (PLUS (src_addr) (offset + <reg_increment>))))
+       :
+       :
+      (SET (R_dn) (MEM (PLUS (src_addr) (offset + n * <reg_increment>))))
+     ]
+     Where,
+     1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
+     2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
+     3.  If consecutive is TRUE, then for kth register being loaded,
+         REGNO (R_dk) = REGNO (R_d0) + k.  */
+  for (; i < count; i++)
+    {
+      elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+          || !REG_P (SET_DEST (elt))
+          || GET_MODE (SET_DEST (elt)) != mode
+          || (consecutive
+              && (REGNO (SET_DEST (elt))
+                  != (unsigned int) (first_dest_regno + regs_per_val * (i - base))))
+          || REGNO (SET_DEST (elt)) <= dest_regno
+          || !MEM_P (SET_SRC (elt))
+          || GET_MODE (SET_SRC (elt)) != mode
+          || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+               || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+               || !CONST_INT_P (XEXP (XEXP (SET_SRC (elt), 0), 1))
+               || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) !=
+                           (offset + (i - base) * reg_increment))
+              && (!REG_P (XEXP (SET_SRC (elt), 0))
+		  || offset + (i - base) * reg_increment != 0)))
+        return false;
+
+      dest_regno = REGNO (SET_DEST (elt));
+      if (dest_regno == REGNO (src_addr))
+        addr_reg_loaded = true;
+    }
+
+  if (update && addr_reg_loaded)
+    return false;
+
+  /* For Thumb-1, address register is always modified - either by write-back
+     or by explicit load.  If the pattern does not describe an update, it must
+     be because the address register is in the list of loaded registers.  */
+  if (TARGET_THUMB1)
+    return update || addr_reg_loaded;
+
+  return true;
+}
+
 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
    float elements), and a modified constant (whatever should be output for a
@@ -16092,6 +16223,137 @@ emit_multi_reg_push (unsigned long mask)
   return par;
 }
 
+/* Generate and emit an insn pattern that we will recognize as a pop_multi.
+   SAVED_REGS_MASK shows which registers need to be restored.
+
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.
+
+   There's no reason why this couldn't be used for Thumb-1 or ARM, in theory,
+   but currently the pattern that matches this in the MD file is only enabled
+   for Thumb-2.  */
+static void
+thumb2_emit_multi_reg_pop (unsigned long saved_regs_mask, bool really_return)
+{
+  int num_regs = 0;
+  int i, j;
+  rtx par;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, reg;
+  int offset_adj = really_return ? 1 : 0;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  gcc_assert (num_regs && num_regs <= 16);
+
+  /* The parallel needs to hold num_regs SETs
+     and one SET for the stack update.  */
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1 + offset_adj));
+
+  if (really_return)
+    {
+      tmp = ret_rtx;
+      XVECEXP (par, 0, 0) = tmp;
+    }
+
+  /* Increment the stack pointer, based on there being
+     num_regs 4-byte registers to restore.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (stack_pointer_rtx, 4 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (par, 0, offset_adj) = tmp;
+
+  /* Now restore every reg, which may include PC.  */
+  for (j = 0, i = 0; j < num_regs; i++)
+    if (saved_regs_mask & (1 << i))
+      {
+        reg = gen_rtx_REG (SImode, i);
+        tmp = gen_rtx_SET (VOIDmode,
+                           reg,
+                           gen_frame_mem
+                           (SImode,
+                            plus_constant (stack_pointer_rtx, 4 * j)));
+        RTX_FRAME_RELATED_P (tmp) = 1;
+        XVECEXP (par, 0, j + 1 + offset_adj) = tmp;
+
+        /* We need to maintain a sequence for DWARF info too.  As dwarf info
+           should not have PC, skip PC.  */
+        if (i != PC_REGNUM)
+          dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+        j++;
+      }
+
+  if (really_return)
+    par = emit_jump_insn (par);
+  else
+    par = emit_insn (par);
+
+  REG_NOTES (par) = dwarf;
+}
+
+/* Generate and emit an insn pattern that we will recognize as a pop_multi
+   of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
+
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.  */
+static void
+thumb2_emit_vfp_multi_reg_pop (int first_reg, int num_regs)
+{
+  int i, j;
+  rtx par;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, reg;
+
+  gcc_assert (num_regs && num_regs <= 32);
+
+  if (num_regs > 16)
+    {
+      thumb2_emit_vfp_multi_reg_pop (first_reg, 16);
+      thumb2_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16);
+      return;
+    }
+
+  /* The parallel needs to hold num_regs SETs
+     and one SET for the stack update.  */
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
+
+  /* Increment the stack pointer, based on there being
+     num_regs 8-byte registers to restore.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (stack_pointer_rtx, 8 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (par, 0, 0) = tmp;
+
+  /* now show EVERY reg that will be restored, using a SET for each.  */
+  for (j = 0, i=first_reg; j < num_regs; i += 2)
+    {
+      reg = gen_rtx_REG (DFmode, i);
+
+      tmp = gen_rtx_SET (VOIDmode,
+                         reg,
+                         gen_frame_mem
+                         (DFmode,
+                          plus_constant (stack_pointer_rtx,
+                                         8 * j)));
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      XVECEXP (par, 0, j + 1) = tmp;
+
+      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+      j++;
+    }
+
+  par = emit_insn (par);
+  REG_NOTES (par) = dwarf;
+}
+
 /* Calculate the size of the return value that is passed in registers.  */
 static unsigned
 arm_size_return_regs (void)
@@ -21622,7 +21884,7 @@ thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
 
 /* The bits which aren't usefully expanded as rtl.  */
 const char *
-thumb_unexpanded_epilogue (void)
+thumb1_unexpanded_epilogue (void)
 {
   arm_stack_offsets *offsets;
   int regno;
@@ -22191,7 +22453,6 @@ thumb1_expand_prologue (void)
     cfun->machine->lr_save_eliminated = 0;
 }
 
-
 void
 thumb1_expand_epilogue (void)
 {
@@ -22246,6 +22507,242 @@ thumb1_expand_epilogue (void)
     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
 }
 
+/* Generate pattern *pop_multiple_with_stack_update_and_return if single
+   POP instruction can be generated.  LR should be replaced by PC.  All
+   the checks required are already done by  USE_RETURN_INSN ().  Hence,
+   all we really need to check here is if single register is to be
+   returned, or multiple register return.  */
+void
+thumb2_expand_return (void)
+{
+  int i, num_regs;
+  unsigned long saved_regs_mask;
+  arm_stack_offsets *offsets;
+
+  offsets = arm_get_frame_offsets ();
+  saved_regs_mask = offsets->saved_regs_mask;
+  for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  if (saved_regs_mask)
+    {
+      if (num_regs == 1)
+        {
+          rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+          rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
+          rtx addr = gen_rtx_MEM (SImode,
+                                  gen_rtx_POST_INC (SImode,
+                                                    stack_pointer_rtx));
+          set_mem_alias_set (addr, get_frame_alias_set ());
+          XVECEXP (par, 0, 0) = ret_rtx;
+          XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
+          RTX_FRAME_RELATED_P (par) = 1;
+          emit_jump_insn (par);
+        }
+      else
+        {
+          saved_regs_mask &= ~ (1 << LR_REGNUM);
+          saved_regs_mask |=   (1 << PC_REGNUM);
+          thumb2_emit_multi_reg_pop (saved_regs_mask, true);
+        }
+    }
+  else
+    {
+      emit_jump_insn (ret_rtx);
+    }
+}
+
+/* Generate RTL to represent a Thumb-2 epilogue.
+
+   Note that this RTL does not include the
+   Return insn, which is created separately and
+   handled in thumb2_output_return.  */
+void
+thumb2_expand_epilogue (void)
+{
+  HOST_WIDE_INT amount;
+  int reg;
+  unsigned long saved_regs_mask;
+  unsigned long func_type;
+  int i;
+  arm_stack_offsets *offsets;
+  int num_regs = 0;
+  bool really_return = false;
+
+  func_type = arm_current_func_type ();
+
+  /* Naked functions don't have epilogues.  */
+  if (IS_NAKED (func_type)
+      || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
+    {
+      emit_jump_insn (ret_rtx);
+      return;
+    }
+
+  /* At the end of the code of a function, the stack pointer will
+     be pointing at the outgoing args area, so we first need to
+     get it to point at the saved_regs area.  */
+
+  /* Determine how much to add to the stack pointer.  */
+  offsets = arm_get_frame_offsets ();
+  saved_regs_mask = offsets->saved_regs_mask;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  /* In Thumb-2 mode, the frame pointer points to the last
+     saved register.  */
+  amount = offsets->outgoing_args - offsets->saved_regs;
+
+  if (frame_pointer_needed)
+    {
+      emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+      amount = offsets->locals_base - offsets->saved_regs;
+    }
+
+  gcc_assert (amount >= 0);
+  if (amount)
+    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+                           GEN_INT (amount)));
+
+  /* Emit a USE (stack_pointer_rtx), so that
+     the stack adjustment will not be deleted.  */
+  emit_insn (gen_prologue_use (stack_pointer_rtx));
+
+  /* Now handle any VFP restoration.  */
+  if (TARGET_HARD_FLOAT && TARGET_VFP)
+    {
+      int end_reg = LAST_VFP_REGNUM + 1;
+
+      /* Scan the registers in reverse order.  We need to match
+         any groupings made in the prologue and generate matching
+         fldmdd operations.  The need to match groups is because,
+         unlike pop, fldmdd can only do consecutive regs.  */
+      for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
+        /* Look for a case where a reg does not need restoring.  */
+        if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
+            && (!df_regs_ever_live_p (reg + 1)
+                || call_used_regs[reg + 1]))
+          {
+            /* Restore the regs discovered so far (from reg+2 to end_reg).  */
+            if (end_reg > reg + 2)
+              thumb2_emit_vfp_multi_reg_pop (reg + 2,
+                                             (end_reg - (reg + 2)) / 2);
+            end_reg = reg;
+          }
+
+      /* Restore the remaining regs that we have discovered (or possibly
+         even all of them, if the conditional in the for loop never fired).  */
+      if (end_reg > reg + 2)
+        thumb2_emit_vfp_multi_reg_pop (reg + 2, (end_reg - (reg + 2)) / 2);
+    }
+
+  /* iWMMXt is not supported when Thumb-2 in use.  If it were, we would
+     want to be restoring the appropriate iWMMXt regs here, in a similar
+     way to arm_output_epilogue.  */
+
+  /* If there are registers to restore, make it happen.  */
+  if (saved_regs_mask)
+    {
+      /* It's illegal to do a pop for only one reg, so generate an ldr.  */
+      if (num_regs == 1)
+        {
+          for (i = 0; i <= LAST_ARM_REGNUM; i++)
+            if (saved_regs_mask & (1 << i))
+              {
+                rtx addr = gen_rtx_MEM (SImode,
+                                        gen_rtx_POST_INC (SImode,
+                                                          stack_pointer_rtx));
+                set_mem_alias_set (addr, get_frame_alias_set ());
+                emit_insn (gen_movsi (gen_rtx_REG (SImode, i), addr));
+              }
+        }
+
+      /* Two or more regs warrants the use of a multi-reg pop.  */
+      else
+        {
+          /* If multi-pop is last instruction, don't generate `branch to
+             return-address' instruction.  Instead, pop LR in PC.  */
+          if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+              && !IS_STACKALIGN (func_type)
+              && crtl->args.pretend_args_size == 0
+              && saved_regs_mask & (1 << LR_REGNUM)
+              && !crtl->calls_eh_return)
+            {
+              saved_regs_mask &= ~ (1 << LR_REGNUM);
+              saved_regs_mask |=   (1 << PC_REGNUM);
+              really_return = true;
+            }
+
+          thumb2_emit_multi_reg_pop (saved_regs_mask, really_return);
+          if (really_return == true)
+            return;
+        }
+    }
+
+  /* Unwind the pre-pushed regs.  */
+  if (crtl->args.pretend_args_size)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+                           stack_pointer_rtx,
+                           GEN_INT (crtl->args.pretend_args_size)));
+
+  /* Stack adjustment for exception handler.  */
+  if (crtl->calls_eh_return)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+                           stack_pointer_rtx,
+                           gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
+
+  emit_jump_insn (ret_rtx);
+}
+
+
+/* Generate the appropriate instruction to return for Thumb-2.
+   OPERAND holds a condition, which must be passed to output_asm_insn.  */
+void
+thumb2_output_return (rtx operand)
+{
+  char instr[100];
+  unsigned long func_type;
+
+  func_type = arm_current_func_type ();
+
+  if (IS_NAKED (func_type))
+    /* Do nothing if naked function.  */
+    return;
+
+  if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
+    {
+      rtx op;
+
+      /* A volatile function should never return.  Call abort.  */
+      op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
+      assemble_external_libcall (op);
+      output_asm_insn ("bl\t%a0", &op);
+
+      return;
+    }
+
+  switch ((int) ARM_FUNC_TYPE (func_type))
+    {
+    case ARM_FT_ISR:
+    case ARM_FT_FIQ:
+      sprintf (instr, "subs\t%%|pc, %%|lr, #4");
+      break;
+
+    case ARM_FT_EXCEPTION:
+      sprintf (instr, "movs\t%%|pc, %%|lr");
+      break;
+
+    default:
+      sprintf (instr, "bx\t%%|lr");
+      break;
+    }
+
+  output_asm_insn (instr, &operand);
+}
+
 /* Implementation of insn prologue_thumb1_interwork.  This is the first
    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index a78ba88..6cfb2da 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -6669,6 +6669,141 @@
 			     FALSE, operands[1], &offset);
 })
 
+;; Pop (as used in epilogue RTL)
+;;
+;; This should really be in thumb2.md, but it needs to live above
+;; the ldmsi patterns, so that it matches before them.
+;; Furthermore, there is no reason why it could not be extended
+;; to support Thumb-1 and ARM at a later date (whereupon it would
+;; fully deserve its spot in this file).
+(define_insn "*pop_multiple_with_stack_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "s_register_operand" "+rk")
+          (plus:SI (match_dup 1)
+                   (match_operand:SI 2 "const_int_operand" "I")))
+        ])]
+  "TARGET_THUMB2"
+  "*
+  {
+    int i;
+    char pattern[100];
+    int num_saves = XVECLEN (operands[0], 0);
+
+    if (REGNO (operands[1]) == SP_REGNUM)
+      {
+        strcpy (pattern, \"pop\\t{\");
+      }
+    else
+      {
+        strcpy (pattern, \"ldm%(ia%)\\t\");
+        strcat (pattern, reg_names[REGNO (operands[1])]);
+        strcat (pattern, \"!, {\");
+      }
+
+    strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))]);
+
+    /* Skip over the first element and the one we just generated.  */
+    for (i = 2; i < (num_saves); i++)
+      {
+        strcat (pattern, \", %|\");
+        strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, operands);
+
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")]
+)
+
+;; Pop with return (as used in epilogue RTL)
+;;
+;; This instruction is generated when the registers are popped at end of
+;; epilogue.  Here, instead of popping the value in LR and then generating
+;; jump to LR, value is popped in PC.  Hence, the pattern is combined with
+;; (return).
+(define_insn "*pop_multiple_with_stack_update_and_return"
+  [(match_parallel 0 "load_multiple_operation_return"
+    [(return)
+     (set (match_operand:SI 1 "s_register_operand" "+k")
+          (plus:SI (match_dup 1)
+                   (match_operand:SI 2 "const_int_operand" "I")))
+        ])]
+  "TARGET_THUMB2"
+  "*
+  {
+    int i;
+    char pattern[100];
+    int num_saves = XVECLEN (operands[0], 0);
+
+    strcpy (pattern, \"pop\\t{\");
+    strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 2), 0))]);
+
+    /* Skip over the first two elements and the one we just generated.  */
+    for (i = 3; i < (num_saves); i++)
+      {
+        strcat (pattern, \", %|\");
+        strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, operands);
+
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")]
+)
+
+(define_insn "*thumb2_ldr_with_return"
+  [(return)
+   (set (reg:SI PC_REGNUM)
+        (mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand" "+k"))))]
+  "TARGET_THUMB2"
+  "ldr%?\t%|pc, [%0], #4"
+  [(set_attr "type" "load1")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*vfp_pop_multiple_with_stack_update"
+  [(match_parallel 0 "load_multiple_operation_fp"
+    [(set (match_operand:SI 1 "s_register_operand" "+k")
+          (plus:SI (match_dup 1)
+                   (match_operand:SI 2 "const_int_operand" "I")))
+     (set (match_operand:DF 3 "arm_hard_register_operand" "")
+          (mem:DF (match_dup 1)))])]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
+  "*
+  {
+    int num_regs = XVECLEN (operands[0], 0);
+    char pattern[100];
+    rtx op_list[2];
+    strcpy (pattern, \"fldmfdd\\t\");
+    strcat (pattern,
+                    reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0, 0)))]);
+    strcat (pattern, \"!, {\");
+    op_list[0] = XEXP (XVECEXP (operands[0], 0, 1), 0);
+    strcat (pattern, \"%P0\");
+    if ((num_regs - 1) > 1)
+      {
+        strcat (pattern, \"-%P1\");
+        op_list [1] = XEXP (XVECEXP (operands[0], 0, num_regs - 1), 0);
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, op_list);
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")]
+)
+
 (define_expand "store_multiple"
   [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
                           (match_operand:SI 1 "" ""))
@@ -8486,8 +8621,19 @@
 
 (define_expand "return"
   [(return)]
-  "TARGET_32BIT && USE_RETURN_INSN (FALSE)"
-  "")
+  "(TARGET_ARM || (TARGET_THUMB2
+                   && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL
+                   && !IS_STACKALIGN (arm_current_func_type ())))
+    && USE_RETURN_INSN (FALSE)"
+  "
+  {
+    if (TARGET_THUMB2)
+      {
+        thumb2_expand_return ();
+        DONE;
+      }
+  }
+  ")
 
 ;; Often the return insn will be the same as loading from memory, so set attr
 (define_insn "*arm_return"
@@ -10529,6 +10675,11 @@
     emit_insn (gen_prologue_use (gen_rtx_REG (Pmode, 2)));
   if (TARGET_THUMB1)
     thumb1_expand_epilogue ();
+  else if (TARGET_THUMB2)
+  {
+    thumb2_expand_epilogue ();
+    DONE;
+  }
   else if (USE_RETURN_INSN (FALSE))
     {
       emit_jump_insn (gen_return ());
@@ -10572,12 +10723,12 @@
 
 (define_insn "*epilogue_insns"
   [(unspec_volatile [(return)] VUNSPEC_EPILOGUE)]
-  "TARGET_EITHER"
+  "TARGET_ARM || TARGET_THUMB1"
   "*
   if (TARGET_32BIT)
     return arm_output_epilogue (NULL);
   else /* TARGET_THUMB1 */
-    return thumb_unexpanded_epilogue ();
+    return thumb1_unexpanded_epilogue ();
   "
   ; Length is absolute worst case
   [(set_attr "length" "44")
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 92eb004..7e2203d 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -369,84 +369,22 @@
 (define_special_predicate "load_multiple_operation"
   (match_code "parallel")
 {
-  HOST_WIDE_INT count = XVECLEN (op, 0);
-  unsigned dest_regno;
-  rtx src_addr;
-  HOST_WIDE_INT i = 1, base = 0;
-  HOST_WIDE_INT offset = 0;
-  rtx elt;
-  bool addr_reg_loaded = false;
-  bool update = false;
-
-  if (count <= 1
-      || GET_CODE (XVECEXP (op, 0, 0)) != SET
-      || !REG_P (SET_DEST (XVECEXP (op, 0, 0))))
-    return false;
-
-  /* Check to see if this might be a write-back.  */
-  if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, 0))) == PLUS)
-    {
-      i++;
-      base = 1;
-      update = true;
-
-      /* Now check it more carefully.  */
-      if (GET_CODE (SET_DEST (elt)) != REG
-          || GET_CODE (XEXP (SET_SRC (elt), 0)) != REG
-          || GET_CODE (XEXP (SET_SRC (elt), 1)) != CONST_INT
-          || INTVAL (XEXP (SET_SRC (elt), 1)) != (count - 1) * 4)
-        return false;
-    }
-
-  /* Perform a quick check so we don't blow up below.  */
-  if (count <= i
-      || GET_CODE (XVECEXP (op, 0, i - 1)) != SET
-      || GET_CODE (SET_DEST (XVECEXP (op, 0, i - 1))) != REG
-      || GET_CODE (SET_SRC (XVECEXP (op, 0, i - 1))) != MEM)
-    return false;
-
-  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1)));
-  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0);
-  if (GET_CODE (src_addr) == PLUS)
-    {
-      if (GET_CODE (XEXP (src_addr, 1)) != CONST_INT)
-	return false;
-      offset = INTVAL (XEXP (src_addr, 1));
-      src_addr = XEXP (src_addr, 0);
-    }
-  if (!REG_P (src_addr))
-    return false;
+  return load_multiple_operation_p (op, /*consecutive=*/false,
+                                    SImode, /*return_pc*/false);
+})
 
-  for (; i < count; i++)
-    {
-      elt = XVECEXP (op, 0, i);
+(define_special_predicate "load_multiple_operation_return"
+  (match_code "parallel")
+{
+  return load_multiple_operation_p (op, /*consecutive=*/false,
+                                    SImode, /*return_pc*/true);
+})
 
-      if (GET_CODE (elt) != SET
-          || GET_CODE (SET_DEST (elt)) != REG
-          || GET_MODE (SET_DEST (elt)) != SImode
-          || REGNO (SET_DEST (elt)) <= dest_regno
-          || GET_CODE (SET_SRC (elt)) != MEM
-          || GET_MODE (SET_SRC (elt)) != SImode
-          || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
-	       || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
-	       || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT
-	       || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != offset + (i - base) * 4)
-	      && (!REG_P (XEXP (SET_SRC (elt), 0))
-		  || offset + (i - base) * 4 != 0)))
-        return false;
-      dest_regno = REGNO (SET_DEST (elt));
-      if (dest_regno == REGNO (src_addr))
-        addr_reg_loaded = true;
-    }
-  /* For Thumb, we only have updating instructions.  If the pattern does
-     not describe an update, it must be because the address register is
-     in the list of loaded registers - on the hardware, this has the effect
-     of overriding the update.  */
-  if (update && addr_reg_loaded)
-    return false;
-  if (TARGET_THUMB1)
-    return update || addr_reg_loaded;
-  return true;
+(define_special_predicate "load_multiple_operation_fp"
+  (match_code "parallel")
+{
+  return load_multiple_operation_p (op, /*consecutive=*/true,
+                                    DFmode, /*return_pc*/false);
 })
 
 (define_special_predicate "store_multiple_operation"
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 05585da..78f4e81 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -635,17 +635,18 @@
    (set_attr "length" "20")]
 )
 
-;; Note: this is not predicable, to avoid issues with linker-generated
-;; interworking stubs.
-(define_insn "*thumb2_return"
+(define_insn "*thumb2_rtl_epilogue_return"
   [(return)]
-  "TARGET_THUMB2 && USE_RETURN_INSN (FALSE)"
+  "(TARGET_THUMB2)"
   "*
   {
-    return output_return_instruction (const_true_rtx, TRUE, FALSE);
+    thumb2_output_return (const_true_rtx);
+    return \"\";
   }"
-  [(set_attr "type" "load1")
-   (set_attr "length" "12")]
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")
+   (set_attr "conds" "unconditional")]
 )
 
 (define_insn_and_split "thumb2_eh_return"

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL
  2011-12-01 11:50             ` Sameera Deshpande
@ 2011-12-09 11:10               ` Ramana Radhakrishnan
  0 siblings, 0 replies; 15+ messages in thread
From: Ramana Radhakrishnan @ 2011-12-09 11:10 UTC (permalink / raw)
  To: Sameera Deshpande; +Cc: Xinyu Qi, gcc-patches

On Thu, Dec 01, 2011 at 11:49:55AM +0000, Sameera Deshpande wrote:

<A large portion snipped>

>
> --
> -;; Note: this is not predicable, to avoid issues with linker-generated
> -;; interworking stubs.
> -(define_insn "*thumb2_return"
> +(define_insn "*thumb2_rtl_epilogue_return"

Rename to *thumb2_return

>    [(return)]
> -  "TARGET_THUMB2 && USE_RETURN_INSN (FALSE)"
> +  "(TARGET_THUMB2)"

No need for paranthesis here. Otherwise OK.

cheers
Ramana

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL
  2011-11-22 13:07           ` Ramana Radhakrishnan
  2011-11-23 10:55             ` Xinyu Qi
@ 2011-12-01 11:50             ` Sameera Deshpande
  2011-12-09 11:10               ` Ramana Radhakrishnan
  1 sibling, 1 reply; 15+ messages in thread
From: Sameera Deshpande @ 2011-12-01 11:50 UTC (permalink / raw)
  To: Ramana Radhakrishnan; +Cc: Xinyu Qi, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 812 bytes --]

On Tue, 2011-11-22 at 10:37 +0000, Ramana Radhakrishnan wrote:

> Xinyu: I seem to have mis-remembered that one of your patches was
> turning on Thumb2 for wMMX.
> >
> > Ramana, in that case, should I add the change you suggested in ARM RTL
> > epilogue patch only?
> 
> The comment in Thumb2 epilogues should remain and yes - it should be
> added to the ARM RTL epilogue patch only. I'm also ok with that being
> in with a #if 0 around it but given it's in the epilogue whoever tries
> turning on Thumb2 for iwMMX will surely notice that in the first
> testrun :)

Ramana,

Please find attached updated patch which sets CFA_RESTORE note for
single register pop and fixing new ICEs in check-gcc at trunk.

The patch is tested with check-gcc, bootstrap and check-gdb without
regression.

-- 

[-- Attachment #2: thumb2_rtl_epilogue_final.patch --]
[-- Type: text/x-patch, Size: 30751 bytes --]

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 23a29c6..2c38883 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -65,6 +65,7 @@ extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int);
 extern int arm_const_double_rtx (rtx);
 extern int neg_const_double_rtx_ok_for_fpa (rtx);
 extern int vfp3_const_double_rtx (rtx);
+extern bool load_multiple_operation_p (rtx, bool, enum machine_mode, bool);
 extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *);
 extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *,
 					   int *);
@@ -176,10 +177,13 @@ extern int arm_float_words_big_endian (void);
 
 /* Thumb functions.  */
 extern void arm_init_expanders (void);
-extern const char *thumb_unexpanded_epilogue (void);
+extern const char *thumb1_unexpanded_epilogue (void);
 extern void thumb1_expand_prologue (void);
 extern void thumb1_expand_epilogue (void);
 extern const char *thumb1_output_interwork (void);
+extern void thumb2_expand_epilogue (void);
+extern void thumb2_output_return (rtx);
+extern void thumb2_expand_return (void);
 #ifdef TREE_CODE
 extern int is_called_in_ARM_mode (tree);
 #endif
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index e3b0b88..40c8b44 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -8906,6 +8906,139 @@ neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
 #undef CHECK
 }
 
+/* Return true if OP is a valid load multiple operation for MODE mode.
+   CONSECUTIVE is true if the registers in the operation must form
+   a consecutive sequence in the register bank.  STACK_ONLY is true
+   if the base register must be the stack pointer.  RETURN_PC is true
+   if value is to be loaded in PC.  */
+bool
+load_multiple_operation_p (rtx op, bool consecutive, enum machine_mode mode,
+                           bool return_pc)
+{
+  HOST_WIDE_INT count = XVECLEN (op, 0);
+  unsigned dest_regno, first_dest_regno;
+  rtx src_addr;
+  HOST_WIDE_INT i = 1, base = 0;
+  HOST_WIDE_INT offset = 0;
+  rtx elt;
+  bool addr_reg_loaded = false;
+  bool update = false;
+  int reg_increment, regs_per_val;
+  int offset_adj;
+
+  /* If DFmode, we must be asking for consecutive,
+     since fldmdd can only do consecutive regs.  */
+  gcc_assert ((mode != DFmode) || consecutive);
+
+  /* Set up the increments and the regs per val based on the mode.  */
+  reg_increment = GET_MODE_SIZE (mode);
+  regs_per_val = mode == DFmode ? 2 : 1;
+  offset_adj = return_pc ? 1 : 0;
+
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
+      || !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj))))
+    return false;
+
+  /* Check to see if this might be a write-back.  */
+  elt = XVECEXP (op, 0, offset_adj);
+  if (GET_CODE (SET_SRC (elt)) == PLUS)
+    {
+      i++;
+      base = 1;
+      update = true;
+
+      /* The offset adjustment should be same as number of registers being
+         popped * size of single register.  */
+      if (!REG_P (SET_DEST (elt))
+          || !REG_P (XEXP (SET_SRC (elt), 0))
+          || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
+          || INTVAL (XEXP (SET_SRC (elt), 1)) !=
+              ((count - 1 - offset_adj) * reg_increment))
+        return false;
+    }
+
+  i = i + offset_adj;
+  base = base + offset_adj;
+  /* Perform a quick check so we don't blow up below.  */
+  if (GET_CODE (XVECEXP (op, 0, i - 1)) != SET
+      || !REG_P (SET_DEST (XVECEXP (op, 0, i - 1)))
+      || !MEM_P (SET_SRC (XVECEXP (op, 0, i - 1))))
+    return false;
+
+  /* If only one reg being loaded, success depends on the type:
+     FLDMDD can do just one reg, LDM must do at least two.  */
+  if (count <= i)
+    return mode == DFmode ? true : false;
+
+  first_dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1)));
+  dest_regno = first_dest_regno;
+
+  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0);
+
+  if (GET_CODE (src_addr) == PLUS)
+    {
+      if (!CONST_INT_P (XEXP (src_addr, 1)))
+        return false;
+      offset = INTVAL (XEXP (src_addr, 1));
+      src_addr = XEXP (src_addr, 0);
+    }
+
+  if (!REG_P (src_addr))
+    return false;
+
+  /* The pattern we are trying to match here is:
+     [(SET (R_d0) (MEM (PLUS (src_addr) (offset))))
+      (SET (R_d1) (MEM (PLUS (src_addr) (offset + <reg_increment>))))
+       :
+       :
+      (SET (R_dn) (MEM (PLUS (src_addr) (offset + n * <reg_increment>))))
+     ]
+     Where,
+     1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
+     2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
+     3.  If consecutive is TRUE, then for kth register being loaded,
+         REGNO (R_dk) = REGNO (R_d0) + k.  */
+  for (; i < count; i++)
+    {
+      elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+          || !REG_P (SET_DEST (elt))
+          || GET_MODE (SET_DEST (elt)) != mode
+          || (consecutive
+              && (REGNO (SET_DEST (elt))
+                  != (unsigned int) (first_dest_regno
+                                     + regs_per_val * (i - base))))
+          || REGNO (SET_DEST (elt)) <= dest_regno
+          || !MEM_P (SET_SRC (elt))
+          || GET_MODE (SET_SRC (elt)) != mode
+          || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+               || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+               || !CONST_INT_P (XEXP (XEXP (SET_SRC (elt), 0), 1))
+               || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) !=
+                           (offset + (i - base) * reg_increment))
+              && (!REG_P (XEXP (SET_SRC (elt), 0))
+		  || offset + (i - base) * reg_increment != 0)))
+        return false;
+
+      dest_regno = REGNO (SET_DEST (elt));
+      if (dest_regno == REGNO (src_addr))
+        addr_reg_loaded = true;
+    }
+
+  if (update && addr_reg_loaded)
+    return false;
+
+  /* For Thumb-1, address register is always modified - either by write-back
+     or by explicit load.  If the pattern does not describe an update, it must
+     be because the address register is in the list of loaded registers.  */
+  if (TARGET_THUMB1)
+    return update || addr_reg_loaded;
+
+  return true;
+}
+
 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
    float elements), and a modified constant (whatever should be output for a
@@ -16097,6 +16230,137 @@ emit_multi_reg_push (unsigned long mask)
   return par;
 }
 
+/* Generate and emit an insn pattern that we will recognize as a pop_multi.
+   SAVED_REGS_MASK shows which registers need to be restored.
+
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.
+
+   There's no reason why this couldn't be used for Thumb-1 or ARM, in theory,
+   but currently the pattern that matches this in the MD file is only enabled
+   for Thumb-2.  */
+static void
+thumb2_emit_multi_reg_pop (unsigned long saved_regs_mask, bool really_return)
+{
+  int num_regs = 0;
+  int i, j;
+  rtx par;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, reg;
+  int offset_adj = really_return ? 1 : 0;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  gcc_assert (num_regs && num_regs <= 16);
+
+  /* The parallel needs to hold num_regs SETs
+     and one SET for the stack update.  */
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1 + offset_adj));
+
+  if (really_return)
+    {
+      tmp = ret_rtx;
+      XVECEXP (par, 0, 0) = tmp;
+    }
+
+  /* Increment the stack pointer, based on there being
+     num_regs 4-byte registers to restore.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (stack_pointer_rtx, 4 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (par, 0, offset_adj) = tmp;
+
+  /* Now restore every reg, which may include PC.  */
+  for (j = 0, i = 0; j < num_regs; i++)
+    if (saved_regs_mask & (1 << i))
+      {
+        reg = gen_rtx_REG (SImode, i);
+        tmp = gen_rtx_SET (VOIDmode,
+                           reg,
+                           gen_frame_mem
+                           (SImode,
+                            plus_constant (stack_pointer_rtx, 4 * j)));
+        RTX_FRAME_RELATED_P (tmp) = 1;
+        XVECEXP (par, 0, j + 1 + offset_adj) = tmp;
+
+        /* We need to maintain a sequence for DWARF info too.  As dwarf info
+           should not have PC, skip PC.  */
+        if (i != PC_REGNUM)
+          dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+        j++;
+      }
+
+  if (really_return)
+    par = emit_jump_insn (par);
+  else
+    par = emit_insn (par);
+
+  REG_NOTES (par) = dwarf;
+}
+
+/* Generate and emit an insn pattern that we will recognize as a pop_multi
+   of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
+
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.  */
+static void
+thumb2_emit_vfp_multi_reg_pop (int first_reg, int num_regs)
+{
+  int i, j;
+  rtx par;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, reg;
+
+  gcc_assert (num_regs && num_regs <= 32);
+
+  if (num_regs > 16)
+    {
+      thumb2_emit_vfp_multi_reg_pop (first_reg, 16);
+      thumb2_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16);
+      return;
+    }
+
+  /* The parallel needs to hold num_regs SETs
+     and one SET for the stack update.  */
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
+
+  /* Increment the stack pointer, based on there being
+     num_regs 8-byte registers to restore.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (stack_pointer_rtx, 8 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (par, 0, 0) = tmp;
+
+  /* Now show every reg that will be restored, using a SET for each.  */
+  for (j = 0, i=first_reg; j < num_regs; i += 2)
+    {
+      reg = gen_rtx_REG (DFmode, i);
+
+      tmp = gen_rtx_SET (VOIDmode,
+                         reg,
+                         gen_frame_mem
+                         (DFmode,
+                          plus_constant (stack_pointer_rtx,
+                                         8 * j)));
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      XVECEXP (par, 0, j + 1) = tmp;
+
+      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+      j++;
+    }
+
+  par = emit_insn (par);
+  REG_NOTES (par) = dwarf;
+}
+
 /* Calculate the size of the return value that is passed in registers.  */
 static unsigned
 arm_size_return_regs (void)
@@ -21622,7 +21886,7 @@ thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
 
 /* The bits which aren't usefully expanded as rtl.  */
 const char *
-thumb_unexpanded_epilogue (void)
+thumb1_unexpanded_epilogue (void)
 {
   arm_stack_offsets *offsets;
   int regno;
@@ -22191,7 +22455,6 @@ thumb1_expand_prologue (void)
     cfun->machine->lr_save_eliminated = 0;
 }
 
-
 void
 thumb1_expand_epilogue (void)
 {
@@ -22248,6 +22511,247 @@ thumb1_expand_epilogue (void)
     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
 }
 
+/* Generate pattern *pop_multiple_with_stack_update_and_return if single
+   POP instruction can be generated.  LR should be replaced by PC.  All
+   the checks required are already done by  USE_RETURN_INSN ().  Hence,
+   all we really need to check here is if single register is to be
+   returned, or multiple register return.  */
+void
+thumb2_expand_return (void)
+{
+  int i, num_regs;
+  unsigned long saved_regs_mask;
+  arm_stack_offsets *offsets;
+
+  offsets = arm_get_frame_offsets ();
+  saved_regs_mask = offsets->saved_regs_mask;
+
+  for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  if (saved_regs_mask)
+    {
+      if (num_regs == 1)
+        {
+          rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+          rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
+          rtx addr = gen_rtx_MEM (SImode,
+                                  gen_rtx_POST_INC (SImode,
+                                                    stack_pointer_rtx));
+          set_mem_alias_set (addr, get_frame_alias_set ());
+          XVECEXP (par, 0, 0) = ret_rtx;
+          XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
+          RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
+          emit_jump_insn (par);
+        }
+      else
+        {
+          saved_regs_mask &= ~ (1 << LR_REGNUM);
+          saved_regs_mask |=   (1 << PC_REGNUM);
+          thumb2_emit_multi_reg_pop (saved_regs_mask, true);
+        }
+    }
+  else
+    {
+      emit_jump_insn (ret_rtx);
+    }
+}
+
+/* Generate RTL to represent a Thumb-2 epilogue.
+
+   Note that this RTL does not include the
+   Return insn, which is created separately and
+   handled in thumb2_output_return.  */
+void
+thumb2_expand_epilogue (void)
+{
+  HOST_WIDE_INT amount;
+  int reg;
+  unsigned long saved_regs_mask;
+  unsigned long func_type;
+  int i;
+  arm_stack_offsets *offsets;
+  int num_regs = 0;
+  bool really_return = false;
+
+  func_type = arm_current_func_type ();
+
+  /* Naked functions don't have epilogues.  */
+  if (IS_NAKED (func_type)
+      || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
+    {
+      emit_jump_insn (ret_rtx);
+      return;
+    }
+
+  /* At the end of the code of a function, the stack pointer will
+     be pointing at the outgoing args area, so we first need to
+     get it to point at the saved_regs area.  */
+
+  /* Determine how much to add to the stack pointer.  */
+  offsets = arm_get_frame_offsets ();
+  saved_regs_mask = offsets->saved_regs_mask;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  /* In Thumb-2 mode, the frame pointer points to the last
+     saved register.  */
+  amount = offsets->outgoing_args - offsets->saved_regs;
+
+  if (frame_pointer_needed)
+    {
+      emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+      amount = offsets->locals_base - offsets->saved_regs;
+    }
+
+  gcc_assert (amount >= 0);
+  if (amount)
+    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+                           GEN_INT (amount)));
+
+  /* Emit a USE (stack_pointer_rtx), so that
+     the stack adjustment will not be deleted.  */
+  emit_insn (gen_prologue_use (stack_pointer_rtx));
+
+  /* Now handle any VFP restoration.  */
+  if (TARGET_HARD_FLOAT && TARGET_VFP)
+    {
+      int end_reg = LAST_VFP_REGNUM + 1;
+
+      /* Scan the registers in reverse order.  We need to match
+         any groupings made in the prologue and generate matching
+         fldmdd operations.  The need to match groups is because,
+         unlike pop, fldmdd can only do consecutive regs.  */
+      for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
+        /* Look for a case where a reg does not need restoring.  */
+        if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
+            && (!df_regs_ever_live_p (reg + 1)
+                || call_used_regs[reg + 1]))
+          {
+            /* Restore the regs discovered so far (from reg+2 to end_reg).  */
+            if (end_reg > reg + 2)
+              thumb2_emit_vfp_multi_reg_pop (reg + 2,
+                                             (end_reg - (reg + 2)) / 2);
+            end_reg = reg;
+          }
+
+      /* Restore the remaining regs that we have discovered (or possibly
+         even all of them, if the conditional in the for loop never fired).  */
+      if (end_reg > reg + 2)
+        thumb2_emit_vfp_multi_reg_pop (reg + 2, (end_reg - (reg + 2)) / 2);
+    }
+
+  /* iWMMXt is not supported when Thumb-2 in use.  If it were, we would
+     want to be restoring the appropriate iWMMXt regs here, in a similar
+     way to arm_output_epilogue.  */
+
+  /* If there are registers to restore, make it happen.  */
+  if (saved_regs_mask)
+    {
+      /* Pop can only be used for more than one reg, so generate an ldr.  */
+      if (num_regs == 1)
+        {
+          for (i = 0; i <= LAST_ARM_REGNUM; i++)
+            if (saved_regs_mask & (1 << i))
+              {
+                rtx insn;
+                rtx addr = gen_rtx_MEM (SImode,
+                                        gen_rtx_POST_INC (SImode,
+                                                          stack_pointer_rtx));
+                set_mem_alias_set (addr, get_frame_alias_set ());
+                insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i), addr));
+                REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
+                                                   gen_rtx_REG (SImode, i),
+                                                   NULL_RTX);
+              }
+        }
+
+      /* Two or more regs warrants the use of a multi-reg pop.  */
+      else
+        {
+          /* If multi-pop is last instruction, don't generate `branch to
+             return-address' instruction.  Instead, pop LR in PC.  */
+          if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+              && !IS_STACKALIGN (func_type)
+              && crtl->args.pretend_args_size == 0
+              && saved_regs_mask & (1 << LR_REGNUM)
+              && !crtl->calls_eh_return)
+            {
+              saved_regs_mask &= ~ (1 << LR_REGNUM);
+              saved_regs_mask |=   (1 << PC_REGNUM);
+              really_return = true;
+            }
+
+          thumb2_emit_multi_reg_pop (saved_regs_mask, really_return);
+          if (really_return == true)
+            return;
+        }
+    }
+
+  /* Unwind the pre-pushed regs.  */
+  if (crtl->args.pretend_args_size)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+                           stack_pointer_rtx,
+                           GEN_INT (crtl->args.pretend_args_size)));
+
+  /* Stack adjustment for exception handler.  */
+  if (crtl->calls_eh_return)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+                           stack_pointer_rtx,
+                           gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
+
+  emit_jump_insn (ret_rtx);
+}
+
+
+/* Generate the appropriate instruction to return for Thumb-2.
+   OPERAND holds a condition, which must be passed to output_asm_insn.  */
+void
+thumb2_output_return (rtx operand)
+{
+  char instr[100];
+  unsigned long func_type;
+
+  func_type = arm_current_func_type ();
+
+  if (IS_NAKED (func_type))
+    /* Do nothing if naked function.  */
+    return;
+
+  if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
+    {
+      rtx op;
+
+      /* A volatile function should never return.  Call abort.  */
+      op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
+      assemble_external_libcall (op);
+      output_asm_insn ("bl\t%a0", &op);
+
+      return;
+    }
+
+  switch ((int) ARM_FUNC_TYPE (func_type))
+    {
+    case ARM_FT_ISR:
+    case ARM_FT_FIQ:
+      sprintf (instr, "subs\t%%|pc, %%|lr, #4");
+      break;
+
+    case ARM_FT_EXCEPTION:
+      sprintf (instr, "movs\t%%|pc, %%|lr");
+      break;
+
+    default:
+      sprintf (instr, "bx\t%%|lr");
+      break;
+    }
+
+  output_asm_insn (instr, &operand);
+}
+
 /* Implementation of insn prologue_thumb1_interwork.  This is the first
    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index a78ba88..6cfb2da 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -6669,6 +6669,141 @@
 			     FALSE, operands[1], &offset);
 })
 
+;; Pop (as used in epilogue RTL)
+;;
+;; This should really be in thumb2.md, but it needs to live above
+;; the ldmsi patterns, so that it matches before them.
+;; Furthermore, there is no reason why it could not be extended
+;; to support Thumb-1 and ARM at a later date (whereupon it would
+;; fully deserve its spot in this file).
+(define_insn "*pop_multiple_with_stack_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "s_register_operand" "+rk")
+          (plus:SI (match_dup 1)
+                   (match_operand:SI 2 "const_int_operand" "I")))
+        ])]
+  "TARGET_THUMB2"
+  "*
+  {
+    int i;
+    char pattern[100];
+    int num_saves = XVECLEN (operands[0], 0);
+
+    if (REGNO (operands[1]) == SP_REGNUM)
+      {
+        strcpy (pattern, \"pop\\t{\");
+      }
+    else
+      {
+        strcpy (pattern, \"ldm%(ia%)\\t\");
+        strcat (pattern, reg_names[REGNO (operands[1])]);
+        strcat (pattern, \"!, {\");
+      }
+
+    strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))]);
+
+    /* Skip over the first element and the one we just generated.  */
+    for (i = 2; i < (num_saves); i++)
+      {
+        strcat (pattern, \", %|\");
+        strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, operands);
+
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")]
+)
+
+;; Pop with return (as used in epilogue RTL)
+;;
+;; This instruction is generated when the registers are popped at end of
+;; epilogue.  Here, instead of popping the value in LR and then generating
+;; jump to LR, value is popped in PC.  Hence, the pattern is combined with
+;; (return).
+(define_insn "*pop_multiple_with_stack_update_and_return"
+  [(match_parallel 0 "load_multiple_operation_return"
+    [(return)
+     (set (match_operand:SI 1 "s_register_operand" "+k")
+          (plus:SI (match_dup 1)
+                   (match_operand:SI 2 "const_int_operand" "I")))
+        ])]
+  "TARGET_THUMB2"
+  "*
+  {
+    int i;
+    char pattern[100];
+    int num_saves = XVECLEN (operands[0], 0);
+
+    strcpy (pattern, \"pop\\t{\");
+    strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 2), 0))]);
+
+    /* Skip over the first two elements and the one we just generated.  */
+    for (i = 3; i < (num_saves); i++)
+      {
+        strcat (pattern, \", %|\");
+        strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, operands);
+
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")]
+)
+
+(define_insn "*thumb2_ldr_with_return"
+  [(return)
+   (set (reg:SI PC_REGNUM)
+        (mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand" "+k"))))]
+  "TARGET_THUMB2"
+  "ldr%?\t%|pc, [%0], #4"
+  [(set_attr "type" "load1")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*vfp_pop_multiple_with_stack_update"
+  [(match_parallel 0 "load_multiple_operation_fp"
+    [(set (match_operand:SI 1 "s_register_operand" "+k")
+          (plus:SI (match_dup 1)
+                   (match_operand:SI 2 "const_int_operand" "I")))
+     (set (match_operand:DF 3 "arm_hard_register_operand" "")
+          (mem:DF (match_dup 1)))])]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
+  "*
+  {
+    int num_regs = XVECLEN (operands[0], 0);
+    char pattern[100];
+    rtx op_list[2];
+    strcpy (pattern, \"fldmfdd\\t\");
+    strcat (pattern,
+                    reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0, 0)))]);
+    strcat (pattern, \"!, {\");
+    op_list[0] = XEXP (XVECEXP (operands[0], 0, 1), 0);
+    strcat (pattern, \"%P0\");
+    if ((num_regs - 1) > 1)
+      {
+        strcat (pattern, \"-%P1\");
+        op_list [1] = XEXP (XVECEXP (operands[0], 0, num_regs - 1), 0);
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, op_list);
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")]
+)
+
 (define_expand "store_multiple"
   [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
                           (match_operand:SI 1 "" ""))
@@ -8486,8 +8621,19 @@
 
 (define_expand "return"
   [(return)]
-  "TARGET_32BIT && USE_RETURN_INSN (FALSE)"
-  "")
+  "(TARGET_ARM || (TARGET_THUMB2
+                   && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL
+                   && !IS_STACKALIGN (arm_current_func_type ())))
+    && USE_RETURN_INSN (FALSE)"
+  "
+  {
+    if (TARGET_THUMB2)
+      {
+        thumb2_expand_return ();
+        DONE;
+      }
+  }
+  ")
 
 ;; Often the return insn will be the same as loading from memory, so set attr
 (define_insn "*arm_return"
@@ -10529,6 +10675,11 @@
     emit_insn (gen_prologue_use (gen_rtx_REG (Pmode, 2)));
   if (TARGET_THUMB1)
     thumb1_expand_epilogue ();
+  else if (TARGET_THUMB2)
+  {
+    thumb2_expand_epilogue ();
+    DONE;
+  }
   else if (USE_RETURN_INSN (FALSE))
     {
       emit_jump_insn (gen_return ());
@@ -10572,12 +10723,12 @@
 
 (define_insn "*epilogue_insns"
   [(unspec_volatile [(return)] VUNSPEC_EPILOGUE)]
-  "TARGET_EITHER"
+  "TARGET_ARM || TARGET_THUMB1"
   "*
   if (TARGET_32BIT)
     return arm_output_epilogue (NULL);
   else /* TARGET_THUMB1 */
-    return thumb_unexpanded_epilogue ();
+    return thumb1_unexpanded_epilogue ();
   "
   ; Length is absolute worst case
   [(set_attr "length" "44")
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 92eb004..7e2203d 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -369,84 +369,22 @@
 (define_special_predicate "load_multiple_operation"
   (match_code "parallel")
 {
-  HOST_WIDE_INT count = XVECLEN (op, 0);
-  unsigned dest_regno;
-  rtx src_addr;
-  HOST_WIDE_INT i = 1, base = 0;
-  HOST_WIDE_INT offset = 0;
-  rtx elt;
-  bool addr_reg_loaded = false;
-  bool update = false;
-
-  if (count <= 1
-      || GET_CODE (XVECEXP (op, 0, 0)) != SET
-      || !REG_P (SET_DEST (XVECEXP (op, 0, 0))))
-    return false;
-
-  /* Check to see if this might be a write-back.  */
-  if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, 0))) == PLUS)
-    {
-      i++;
-      base = 1;
-      update = true;
-
-      /* Now check it more carefully.  */
-      if (GET_CODE (SET_DEST (elt)) != REG
-          || GET_CODE (XEXP (SET_SRC (elt), 0)) != REG
-          || GET_CODE (XEXP (SET_SRC (elt), 1)) != CONST_INT
-          || INTVAL (XEXP (SET_SRC (elt), 1)) != (count - 1) * 4)
-        return false;
-    }
-
-  /* Perform a quick check so we don't blow up below.  */
-  if (count <= i
-      || GET_CODE (XVECEXP (op, 0, i - 1)) != SET
-      || GET_CODE (SET_DEST (XVECEXP (op, 0, i - 1))) != REG
-      || GET_CODE (SET_SRC (XVECEXP (op, 0, i - 1))) != MEM)
-    return false;
-
-  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1)));
-  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0);
-  if (GET_CODE (src_addr) == PLUS)
-    {
-      if (GET_CODE (XEXP (src_addr, 1)) != CONST_INT)
-	return false;
-      offset = INTVAL (XEXP (src_addr, 1));
-      src_addr = XEXP (src_addr, 0);
-    }
-  if (!REG_P (src_addr))
-    return false;
+  return load_multiple_operation_p (op, /*consecutive=*/false,
+                                    SImode, /*return_pc*/false);
+})
 
-  for (; i < count; i++)
-    {
-      elt = XVECEXP (op, 0, i);
+(define_special_predicate "load_multiple_operation_return"
+  (match_code "parallel")
+{
+  return load_multiple_operation_p (op, /*consecutive=*/false,
+                                    SImode, /*return_pc*/true);
+})
 
-      if (GET_CODE (elt) != SET
-          || GET_CODE (SET_DEST (elt)) != REG
-          || GET_MODE (SET_DEST (elt)) != SImode
-          || REGNO (SET_DEST (elt)) <= dest_regno
-          || GET_CODE (SET_SRC (elt)) != MEM
-          || GET_MODE (SET_SRC (elt)) != SImode
-          || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
-	       || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
-	       || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT
-	       || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != offset + (i - base) * 4)
-	      && (!REG_P (XEXP (SET_SRC (elt), 0))
-		  || offset + (i - base) * 4 != 0)))
-        return false;
-      dest_regno = REGNO (SET_DEST (elt));
-      if (dest_regno == REGNO (src_addr))
-        addr_reg_loaded = true;
-    }
-  /* For Thumb, we only have updating instructions.  If the pattern does
-     not describe an update, it must be because the address register is
-     in the list of loaded registers - on the hardware, this has the effect
-     of overriding the update.  */
-  if (update && addr_reg_loaded)
-    return false;
-  if (TARGET_THUMB1)
-    return update || addr_reg_loaded;
-  return true;
+(define_special_predicate "load_multiple_operation_fp"
+  (match_code "parallel")
+{
+  return load_multiple_operation_p (op, /*consecutive=*/true,
+                                    DFmode, /*return_pc*/false);
 })
 
 (define_special_predicate "store_multiple_operation"
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 05585da..78f4e81 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -635,17 +635,18 @@
    (set_attr "length" "20")]
 )
 
-;; Note: this is not predicable, to avoid issues with linker-generated
-;; interworking stubs.
-(define_insn "*thumb2_return"
+(define_insn "*thumb2_rtl_epilogue_return"
   [(return)]
-  "TARGET_THUMB2 && USE_RETURN_INSN (FALSE)"
+  "(TARGET_THUMB2)"
   "*
   {
-    return output_return_instruction (const_true_rtx, TRUE, FALSE);
+    thumb2_output_return (const_true_rtx);
+    return \"\";
   }"
-  [(set_attr "type" "load1")
-   (set_attr "length" "12")]
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")
+   (set_attr "conds" "unconditional")]
 )
 
 (define_insn_and_split "thumb2_eh_return"

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL
  2011-11-22 13:07           ` Ramana Radhakrishnan
@ 2011-11-23 10:55             ` Xinyu Qi
  2011-12-01 11:50             ` Sameera Deshpande
  1 sibling, 0 replies; 15+ messages in thread
From: Xinyu Qi @ 2011-11-23 10:55 UTC (permalink / raw)
  To: Ramana Radhakrishnan, Sameera Deshpande; +Cc: gcc-patches

At 2011-11-22 18:37:16,"Ramana Radhakrishnan" <ramana.radhakrishnan@linaro.org> wrote: 
> >>
> >> Though I don't fully understand this patch, I think it is unnecessary to
> consider turn on Thumb2 for iwmmxt,
> >> because there is no chip that supports WMMX instructions with thumb2
> mode.
> 
> Xinyu: I seem to have mis-remembered that one of your patches was
> turning on Thumb2 for wMMX.

Hi Ramana,
I remember I just enable iwmmxt with vfp in the first part of my patch.

> 
> >
> > Ramana, in that case, should I add the change you suggested in ARM RTL
> > epilogue patch only?
> 
> The comment in Thumb2 epilogues should remain and yes - it should be
> added to the ARM RTL epilogue patch only. I'm also ok with that being
> in with a #if 0 around it but given it's in the epilogue whoever tries
> turning on Thumb2 for iwMMX will surely notice that in the first
> testrun :)
> 
> I'll try and find some time later today to apply both the patches and
> see where I get to.
> 
> cheers
> Ramana
> 
> >
> > --
> >
> >
> >

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL
  2011-11-22 12:16         ` Sameera Deshpande
@ 2011-11-22 13:07           ` Ramana Radhakrishnan
  2011-11-23 10:55             ` Xinyu Qi
  2011-12-01 11:50             ` Sameera Deshpande
  0 siblings, 2 replies; 15+ messages in thread
From: Ramana Radhakrishnan @ 2011-11-22 13:07 UTC (permalink / raw)
  To: Sameera Deshpande; +Cc: Xinyu Qi, gcc-patches

>>
>> Though I don't fully understand this patch, I think it is unnecessary to consider turn on Thumb2 for iwmmxt,
>> because there is no chip that supports WMMX instructions with thumb2 mode.

Xinyu: I seem to have mis-remembered that one of your patches was
turning on Thumb2 for wMMX.

>
> Ramana, in that case, should I add the change you suggested in ARM RTL
> epilogue patch only?

The comment in Thumb2 epilogues should remain and yes - it should be
added to the ARM RTL epilogue patch only. I'm also ok with that being
in with a #if 0 around it but given it's in the epilogue whoever tries
turning on Thumb2 for iwMMX will surely notice that in the first
testrun :)

I'll try and find some time later today to apply both the patches and
see where I get to.

cheers
Ramana

>
> --
>
>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL
  2011-11-22  5:14       ` Xinyu Qi
@ 2011-11-22 12:16         ` Sameera Deshpande
  2011-11-22 13:07           ` Ramana Radhakrishnan
  0 siblings, 1 reply; 15+ messages in thread
From: Sameera Deshpande @ 2011-11-22 12:16 UTC (permalink / raw)
  To: Xinyu Qi; +Cc: Ramana Radhakrishnan, gcc-patches

On Tue, 2011-11-22 at 01:55 +0000, Xinyu Qi wrote:
> At 2011-11-19 07:11:17,"Ramana Radhakrishnan" <ramana.radhakrishnan@linaro.org> wrote:
> > On 10 November 2011 18:07, Sameera Deshpande
> > <sameera.deshpande@arm.com> wrote:
> > > Please find attached the reworked patch.
> > 
> > OK but for a very small bit .
> > 
> > I'll note that we need to add support for the iwMMXt registers but the
> > attached patch (untested) should be what is needed to support the iwMMxt
> > registers and this should be something that should be incorporated into your
> > ARM RTL epilogues patch as well. My understanding is that this is all you
> > should need to do as there is a wldrd as a part of *iwmmx*_movdi instruction
> > and therefore this should just work in this form.
> > 
> > Xinyu can you help in testing this patch once this is applied along with your
> > other patches to turn on Thumb2 for iwmmxt ?
> > 
> > cheers
> > Ramana
> 
> Hi, Ramana,
> 
> Though I don't fully understand this patch, I think it is unnecessary to consider turn on Thumb2 for iwmmxt,
> because there is no chip that supports WMMX instructions with thumb2 mode.
> 
> Thanks,
> Xinyu
> 

Ramana, in that case, should I add the change you suggested in ARM RTL
epilogue patch only?

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL
  2011-11-19  0:01     ` Ramana Radhakrishnan
@ 2011-11-22  5:14       ` Xinyu Qi
  2011-11-22 12:16         ` Sameera Deshpande
  0 siblings, 1 reply; 15+ messages in thread
From: Xinyu Qi @ 2011-11-22  5:14 UTC (permalink / raw)
  To: Ramana Radhakrishnan, Sameera Deshpande; +Cc: gcc-patches

At 2011-11-19 07:11:17,"Ramana Radhakrishnan" <ramana.radhakrishnan@linaro.org> wrote:
> On 10 November 2011 18:07, Sameera Deshpande
> <sameera.deshpande@arm.com> wrote:
> > Please find attached the reworked patch.
> 
> OK but for a very small bit .
> 
> I'll note that we need to add support for the iwMMXt registers but the
> attached patch (untested) should be what is needed to support the iwMMxt
> registers and this should be something that should be incorporated into your
> ARM RTL epilogues patch as well. My understanding is that this is all you
> should need to do as there is a wldrd as a part of *iwmmx*_movdi instruction
> and therefore this should just work in this form.
> 
> Xinyu can you help in testing this patch once this is applied along with your
> other patches to turn on Thumb2 for iwmmxt ?
> 
> cheers
> Ramana

Hi, Ramana,

Though I don't fully understand this patch, I think it is unnecessary to consider turn on Thumb2 for iwmmxt,
because there is no chip that supports WMMX instructions with thumb2 mode.

Thanks,
Xinyu

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL
  2011-11-10 19:07   ` Sameera Deshpande
@ 2011-11-19  0:01     ` Ramana Radhakrishnan
  2011-11-22  5:14       ` Xinyu Qi
  0 siblings, 1 reply; 15+ messages in thread
From: Ramana Radhakrishnan @ 2011-11-19  0:01 UTC (permalink / raw)
  To: Sameera Deshpande; +Cc: gcc-patches, Xinyu Qi

[-- Attachment #1: Type: text/plain, Size: 716 bytes --]

On 10 November 2011 18:07, Sameera Deshpande <sameera.deshpande@arm.com> wrote:
> Please find attached the reworked patch.

OK but for a very small bit .

I'll note that we need to add support for the iwMMXt registers but the
attached patch (untested) should be what is needed to support the
iwMMxt registers and this should be something that should be
incorporated into your ARM RTL epilogues patch as well. My
understanding is that this is all you should need to do as there is a
wldrd as a part of *iwmmx*_movdi instruction and therefore this should
just work in this form.

Xinyu can you help in testing this patch once this is applied along
with your other patches to turn on Thumb2 for iwmmxt ?

cheers
Ramana

[-- Attachment #2: marvell-patch-epilogue.txt --]
[-- Type: text/plain, Size: 1701 bytes --]

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 708142e..1fb86fd 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -264,7 +264,7 @@ static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 static unsigned int arm_autovectorize_vector_sizes (void);
 static int arm_default_branch_cost (bool, bool);
 static int arm_cortex_a5_branch_cost (bool, bool);
-
+static void arm_epilogue_restore_iwmmxt_regs (void);
 \f
 /* Table of machine attributes.  */
 static const struct attribute_spec arm_attribute_table[] =
@@ -22563,6 +22563,30 @@ thumb2_expand_return (void)
     }
 }
 
+static void
+arm_epilogue_restore_iwmmxt_regs (void)
+{
+									
+  int reg;
+  for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
+    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
+      {
+	rtx tmp = gen_rtx_SET (DImode,
+			       gen_rtx_REG (reg, DImode),
+			       gen_frame_mem (DImode, 
+					      gen_rtx_POST_INC 
+					      (DImode,
+					       stack_pointer_rtx)));
+	rtx dwarf = alloc_reg_note (REG_CFA_RESTORE,
+				    gen_rtx_REG (reg, DImode),
+				    NULL_RTX);
+	
+	RTX_FRAME_RELATED_P (tmp) = 1;
+	emit_insn (tmp);
+      }
+  
+}
+
 /* Generate RTL to represent a Thumb-2 epilogue.
 
    Note that this RTL does not include the
@@ -22652,6 +22676,10 @@ thumb2_expand_epilogue (void)
   /* iWMMXt is not supported when Thumb-2 in use.  If it were, we would
      want to be restoring the appropriate iWMMXt regs here, in a similar
      way to arm_output_epilogue.  */
+  if (TARGET_IWMMXT)
+    {
+      arm_epilogue_restore_iwmmxt_regs ();
+    }
 
   /* If there are registers to restore, make it happen.  */
   if (saved_regs_mask)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL
  2011-11-10 14:28 ` Richard Earnshaw
  2011-11-10 15:28   ` Sameera Deshpande
@ 2011-11-10 19:07   ` Sameera Deshpande
  2011-11-19  0:01     ` Ramana Radhakrishnan
  1 sibling, 1 reply; 15+ messages in thread
From: Sameera Deshpande @ 2011-11-10 19:07 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 5714 bytes --]

On Thu, 2011-11-10 at 13:44 +0000, Richard Earnshaw wrote:
> On 28/09/11 17:15, Sameera Deshpande wrote:
> > Hi!
> > 
> > This patch generates Thumb2 epilogues in RTL form.
> > 
> > The work involves defining new functions, predicates and patterns along with
> > few changes in existing code:
> > * The load_multiple_operation predicate was found to be too restrictive for
> > integer loads as it required consecutive destination regs, so this
> > restriction was lifted.
> > * Variations of load_multiple_operation were required to handle cases 
> >    - where SP must be the base register 
> >    - where FP values were being loaded (which do require consecutive
> > destination registers)
> >    - where PC can be in register-list (which requires return pattern along
> > with register loads).
> >   Hence, the common code was factored out into a new function in arm.c and
> > parameterised to show 
> >    - whether consecutive destination regs are needed
> >    - the data type being loaded 
> >    - whether the base register has to be SP
> >    - whether PC is in register-list
> > 
> > The patch is tested with arm-eabi with no regressions.
> > 
> > ChangeLog:
> > 
> > 2011-09-28  Ian Bolton         <ian.bolton@arm.com>
> >             Sameera Deshpande  <sameera.deshpande@arm.com>
> >            
> >        * config/arm/arm-protos.h (load_multiple_operation_p): New
> > declaration.
> >          (thumb2_expand_epilogue): Likewise.
> >          (thumb2_output_return): Likewise
> >          (thumb2_expand_return): Likewise.
> >          (thumb_unexpanded_epilogue): Rename to... 
> >          (thumb1_unexpanded_epilogue): ...this 
> >        * config/arm/arm.c (load_multiple_operation_p): New function. 
> >          (thumb2_emit_multi_reg_pop): Likewise.
> >          (thumb2_emit_vfp_multi_reg_pop): Likewise.
> >          (thumb2_expand_return): Likewise. 
> >          (thumb2_expand_epilogue): Likewise. 
> >          (thumb2_output_return): Likewise
> >          (thumb_unexpanded_epilogue): Rename to...
> >          ( thumb1_unexpanded_epilogue): ...this
> >        * config/arm/arm.md (pop_multiple_with_stack_update): New pattern. 
> >          (pop_multiple_with_stack_update_and_return): Likewise.
> >          (thumb2_ldr_with_return): Likewise.
> >          (floating_point_pop_multiple_with_stack_update): Likewise.
> >          (return): Update condition and code for pattern.
> >          (arm_return): Likewise.
> >          (epilogue_insns): Likewise.
> >        * config/arm/predicates.md (load_multiple_operation): Update
> > predicate.
> >          (load_multiple_operation_stack_and_return): New predicate. 
> >          (load_multiple_operation_stack): Likewise.
> >          (load_multiple_operation_stack_fp): Likewise.
> >        * config/arm/thumb2.md (thumb2_return): Remove.
> >          (thumb2_rtl_epilogue_return): New pattern.
> > 
> > 
> > - Thanks and regards,
> >   Sameera D.
> > 
> > 
> > thumb2_rtl_epilogue_complete-27Sept.patch
> > 
> 
> +  if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, offset_adj))) == PLUS)
> 
> It's generally best not to use assignments within conditionals unless
> there is a strong reason otherwise (that normally implies something like
> being deep within a condition test where you only want to update the
> variable if some pre-conditions are true and that can't be easily
> factored out).
> 
> +                  != (unsigned int) (first_dest_regno + regs_per_val *
> (i - base))))
> 
> Line length (split the line just before the '+' operator.
> 
> +  /* now show EVERY reg that will be restored, using a SET for each.  */
> 
> Capital letter at start of sentence.  Why is EVERY in caps?
> 
> +  saved_regs_mask = offsets->saved_regs_mask;
> +  for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
> 
> blank line before the for loop.
> 
> +      /* It's illegal to do a pop for only one reg, so generate an ldr.  */
> 
> GCC coding standards suggest avoiding the use of 'illegal'.  Suggest
> changing that to 'Pop can only be used for more than one reg; so...'
> 
> +                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 2),
> 0))]);
> +
> +    /* Skip over the first two elements and the one we just generated.
>  */
> +    for (i = 3; i < (num_saves); i++)
> +      {
> +        strcat (pattern, \", %|\");
> 
> +        strcat (pattern,
> 
> +                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i),
> 0))]);
> +      }
> +
> +    strcat (pattern, \"}\");
> +    output_asm_insn (pattern, operands);
> +
> 
> +    return \"\";
> +  }
> +  "
> 
> +  [(set_attr "type" "load4")]
> 
> There's a lot of trailing white space here.  Please remove.
> 
> +(define_insn "*thumb2_ldr_with_return"
> +  [(return)
> +   (set (reg:SI PC_REGNUM)
> +        (mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand"
> "k"))))]
> +  "TARGET_THUMB2"
> +  "ldr%?\t%|pc, [%0], #4"
> +  [(set_attr "type" "load1")
> +   (set_attr "predicable" "yes")]
> +)
> +
> 
> This pattern doesn't seem to be used.  What's its purpose?
> 
> +    static const struct { const char *const name; } table[]
> +                  = { {\"d0\"}, {\"d1\"}, {\"d2\"}, {\"d3\"},
> 
> I'm not keen on having this table.  Generally the register names should
> be configurable depending on the assembler flavour and this patch
> defeats that.  Is there any way to rewrite this code so that it can use
> the standard operand methods for generating register names?
> 
> In summary, this is mostly OK, apart from the last two items.
> 
> R.

Richard,

Please find attached the reworked patch.

-- 

[-- Attachment #2: thumb2_rtl_epilogue_complete-10Nov.patch --]
[-- Type: text/x-patch, Size: 30488 bytes --]

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 23a29c6..2c38883 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -65,6 +65,7 @@ extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int);
 extern int arm_const_double_rtx (rtx);
 extern int neg_const_double_rtx_ok_for_fpa (rtx);
 extern int vfp3_const_double_rtx (rtx);
+extern bool load_multiple_operation_p (rtx, bool, enum machine_mode, bool);
 extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *);
 extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *,
 					   int *);
@@ -176,10 +177,13 @@ extern int arm_float_words_big_endian (void);
 
 /* Thumb functions.  */
 extern void arm_init_expanders (void);
-extern const char *thumb_unexpanded_epilogue (void);
+extern const char *thumb1_unexpanded_epilogue (void);
 extern void thumb1_expand_prologue (void);
 extern void thumb1_expand_epilogue (void);
 extern const char *thumb1_output_interwork (void);
+extern void thumb2_expand_epilogue (void);
+extern void thumb2_output_return (rtx);
+extern void thumb2_expand_return (void);
 #ifdef TREE_CODE
 extern int is_called_in_ARM_mode (tree);
 #endif
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index e07c8c3..63de8bc 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -8906,6 +8906,139 @@ neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
 #undef CHECK
 }
 
+/* Return true if OP is a valid load multiple operation for MODE mode.
+   CONSECUTIVE is true if the registers in the operation must form
+   a consecutive sequence in the register bank.  STACK_ONLY is true
+   if the base register must be the stack pointer.  RETURN_PC is true
+   if value is to be loaded in PC.  */
+bool
+load_multiple_operation_p (rtx op, bool consecutive, enum machine_mode mode,
+                           bool return_pc)
+{
+  HOST_WIDE_INT count = XVECLEN (op, 0);
+  unsigned dest_regno, first_dest_regno;
+  rtx src_addr;
+  HOST_WIDE_INT i = 1, base = 0;
+  HOST_WIDE_INT offset = 0;
+  rtx elt;
+  bool addr_reg_loaded = false;
+  bool update = false;
+  int reg_increment, regs_per_val;
+  int offset_adj;
+
+  /* If DFmode, we must be asking for consecutive,
+     since fldmdd can only do consecutive regs.  */
+  gcc_assert ((mode != DFmode) || consecutive);
+
+  /* Set up the increments and the regs per val based on the mode.  */
+  reg_increment = GET_MODE_SIZE (mode);
+  regs_per_val = mode == DFmode ? 2 : 1;
+  offset_adj = return_pc ? 1 : 0;
+
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
+      || !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj))))
+    return false;
+
+  /* Check to see if this might be a write-back.  */
+  elt = XVECEXP (op, 0, offset_adj);
+  if (GET_CODE (SET_SRC (elt)) == PLUS)
+    {
+      i++;
+      base = 1;
+      update = true;
+
+      /* The offset adjustment should be same as number of registers being
+         popped * size of single register.  */
+      if (!REG_P (SET_DEST (elt))
+          || !REG_P (XEXP (SET_SRC (elt), 0))
+          || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
+          || INTVAL (XEXP (SET_SRC (elt), 1)) !=
+              ((count - 1 - offset_adj) * reg_increment))
+        return false;
+    }
+
+  i = i + offset_adj;
+  base = base + offset_adj;
+  /* Perform a quick check so we don't blow up below.  */
+  if (GET_CODE (XVECEXP (op, 0, i - 1)) != SET
+      || !REG_P (SET_DEST (XVECEXP (op, 0, i - 1)))
+      || !MEM_P (SET_SRC (XVECEXP (op, 0, i - 1))))
+    return false;
+
+  /* If only one reg being loaded, success depends on the type:
+     FLDMDD can do just one reg, LDM must do at least two.  */
+  if (count <= i)
+    return mode == DFmode ? true : false;
+
+  first_dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1)));
+  dest_regno = first_dest_regno;
+
+  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0);
+
+  if (GET_CODE (src_addr) == PLUS)
+    {
+      if (!CONST_INT_P (XEXP (src_addr, 1)))
+        return false;
+      offset = INTVAL (XEXP (src_addr, 1));
+      src_addr = XEXP (src_addr, 0);
+    }
+
+  if (!REG_P (src_addr))
+    return false;
+
+  /* The pattern we are trying to match here is:
+     [(SET (R_d0) (MEM (PLUS (src_addr) (offset))))
+      (SET (R_d1) (MEM (PLUS (src_addr) (offset + <reg_increment>))))
+       :
+       :
+      (SET (R_dn) (MEM (PLUS (src_addr) (offset + n * <reg_increment>))))
+     ]
+     Where,
+     1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
+     2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
+     3.  If consecutive is TRUE, then for kth register being loaded,
+         REGNO (R_dk) = REGNO (R_d0) + k.  */
+  for (; i < count; i++)
+    {
+      elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+          || !REG_P (SET_DEST (elt))
+          || GET_MODE (SET_DEST (elt)) != mode
+          || (consecutive
+              && (REGNO (SET_DEST (elt))
+                  != (unsigned int) (first_dest_regno
+                                     + regs_per_val * (i - base))))
+          || REGNO (SET_DEST (elt)) <= dest_regno
+          || !MEM_P (SET_SRC (elt))
+          || GET_MODE (SET_SRC (elt)) != mode
+          || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+               || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+               || !CONST_INT_P (XEXP (XEXP (SET_SRC (elt), 0), 1))
+               || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) !=
+                           (offset + (i - base) * reg_increment))
+              && (!REG_P (XEXP (SET_SRC (elt), 0))
+		  || offset + (i - base) * reg_increment != 0)))
+        return false;
+
+      dest_regno = REGNO (SET_DEST (elt));
+      if (dest_regno == REGNO (src_addr))
+        addr_reg_loaded = true;
+    }
+
+  if (update && addr_reg_loaded)
+    return false;
+
+  /* For Thumb-1, address register is always modified - either by write-back
+     or by explicit load.  If the pattern does not describe an update, it must
+     be because the address register is in the list of loaded registers.  */
+  if (TARGET_THUMB1)
+    return update || addr_reg_loaded;
+
+  return true;
+}
+
 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
    float elements), and a modified constant (whatever should be output for a
@@ -16092,6 +16225,137 @@ emit_multi_reg_push (unsigned long mask)
   return par;
 }
 
+/* Generate and emit an insn pattern that we will recognize as a pop_multi.
+   SAVED_REGS_MASK shows which registers need to be restored.
+
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.
+
+   There's no reason why this couldn't be used for Thumb-1 or ARM, in theory,
+   but currently the pattern that matches this in the MD file is only enabled
+   for Thumb-2.  */
+static void
+thumb2_emit_multi_reg_pop (unsigned long saved_regs_mask, bool really_return)
+{
+  int num_regs = 0;
+  int i, j;
+  rtx par;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, reg;
+  int offset_adj = really_return ? 1 : 0;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  gcc_assert (num_regs && num_regs <= 16);
+
+  /* The parallel needs to hold num_regs SETs
+     and one SET for the stack update.  */
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1 + offset_adj));
+
+  if (really_return)
+    {
+      tmp = ret_rtx;
+      XVECEXP (par, 0, 0) = tmp;
+    }
+
+  /* Increment the stack pointer, based on there being
+     num_regs 4-byte registers to restore.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (stack_pointer_rtx, 4 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (par, 0, offset_adj) = tmp;
+
+  /* Now restore every reg, which may include PC.  */
+  for (j = 0, i = 0; j < num_regs; i++)
+    if (saved_regs_mask & (1 << i))
+      {
+        reg = gen_rtx_REG (SImode, i);
+        tmp = gen_rtx_SET (VOIDmode,
+                           reg,
+                           gen_frame_mem
+                           (SImode,
+                            plus_constant (stack_pointer_rtx, 4 * j)));
+        RTX_FRAME_RELATED_P (tmp) = 1;
+        XVECEXP (par, 0, j + 1 + offset_adj) = tmp;
+
+        /* We need to maintain a sequence for DWARF info too.  As dwarf info
+           should not have PC, skip PC.  */
+        if (i != PC_REGNUM)
+          dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+        j++;
+      }
+
+  if (really_return)
+    par = emit_jump_insn (par);
+  else
+    par = emit_insn (par);
+
+  REG_NOTES (par) = dwarf;
+}
+
+/* Generate and emit an insn pattern that we will recognize as a pop_multi
+   of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
+
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.  */
+static void
+thumb2_emit_vfp_multi_reg_pop (int first_reg, int num_regs)
+{
+  int i, j;
+  rtx par;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, reg;
+
+  gcc_assert (num_regs && num_regs <= 32);
+
+  if (num_regs > 16)
+    {
+      thumb2_emit_vfp_multi_reg_pop (first_reg, 16);
+      thumb2_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16);
+      return;
+    }
+
+  /* The parallel needs to hold num_regs SETs
+     and one SET for the stack update.  */
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
+
+  /* Increment the stack pointer, based on there being
+     num_regs 8-byte registers to restore.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (stack_pointer_rtx, 8 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (par, 0, 0) = tmp;
+
+  /* Now show every reg that will be restored, using a SET for each.  */
+  for (j = 0, i=first_reg; j < num_regs; i += 2)
+    {
+      reg = gen_rtx_REG (DFmode, i);
+
+      tmp = gen_rtx_SET (VOIDmode,
+                         reg,
+                         gen_frame_mem
+                         (DFmode,
+                          plus_constant (stack_pointer_rtx,
+                                         8 * j)));
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      XVECEXP (par, 0, j + 1) = tmp;
+
+      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+      j++;
+    }
+
+  par = emit_insn (par);
+  REG_NOTES (par) = dwarf;
+}
+
 /* Calculate the size of the return value that is passed in registers.  */
 static unsigned
 arm_size_return_regs (void)
@@ -21622,7 +21886,7 @@ thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
 
 /* The bits which aren't usefully expanded as rtl.  */
 const char *
-thumb_unexpanded_epilogue (void)
+thumb1_unexpanded_epilogue (void)
 {
   arm_stack_offsets *offsets;
   int regno;
@@ -22191,7 +22455,6 @@ thumb1_expand_prologue (void)
     cfun->machine->lr_save_eliminated = 0;
 }
 
-
 void
 thumb1_expand_epilogue (void)
 {
@@ -22246,6 +22509,243 @@ thumb1_expand_epilogue (void)
     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
 }
 
+/* Generate pattern *pop_multiple_with_stack_update_and_return if single
+   POP instruction can be generated.  LR should be replaced by PC.  All
+   the checks required are already done by  USE_RETURN_INSN ().  Hence,
+   all we really need to check here is if single register is to be
+   returned, or multiple register return.  */
+void
+thumb2_expand_return (void)
+{
+  int i, num_regs;
+  unsigned long saved_regs_mask;
+  arm_stack_offsets *offsets;
+
+  offsets = arm_get_frame_offsets ();
+  saved_regs_mask = offsets->saved_regs_mask;
+
+  for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  if (saved_regs_mask)
+    {
+      if (num_regs == 1)
+        {
+          rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+          rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
+          rtx addr = gen_rtx_MEM (SImode,
+                                  gen_rtx_POST_INC (SImode,
+                                                    stack_pointer_rtx));
+          set_mem_alias_set (addr, get_frame_alias_set ());
+          XVECEXP (par, 0, 0) = ret_rtx;
+          XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
+          RTX_FRAME_RELATED_P (par) = 1;
+          emit_jump_insn (par);
+        }
+      else
+        {
+          saved_regs_mask &= ~ (1 << LR_REGNUM);
+          saved_regs_mask |=   (1 << PC_REGNUM);
+          thumb2_emit_multi_reg_pop (saved_regs_mask, true);
+        }
+    }
+  else
+    {
+      emit_jump_insn (ret_rtx);
+    }
+}
+
+/* Generate RTL to represent a Thumb-2 epilogue.
+
+   Note that this RTL does not include the
+   Return insn, which is created separately and
+   handled in thumb2_output_return.  */
+void
+thumb2_expand_epilogue (void)
+{
+  HOST_WIDE_INT amount;
+  int reg;
+  unsigned long saved_regs_mask;
+  unsigned long func_type;
+  int i;
+  arm_stack_offsets *offsets;
+  int num_regs = 0;
+  bool really_return = false;
+
+  func_type = arm_current_func_type ();
+
+  /* Naked functions don't have epilogues.  */
+  if (IS_NAKED (func_type)
+      || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
+    {
+      emit_jump_insn (ret_rtx);
+      return;
+    }
+
+  /* At the end of the code of a function, the stack pointer will
+     be pointing at the outgoing args area, so we first need to
+     get it to point at the saved_regs area.  */
+
+  /* Determine how much to add to the stack pointer.  */
+  offsets = arm_get_frame_offsets ();
+  saved_regs_mask = offsets->saved_regs_mask;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  /* In Thumb-2 mode, the frame pointer points to the last
+     saved register.  */
+  amount = offsets->outgoing_args - offsets->saved_regs;
+
+  if (frame_pointer_needed)
+    {
+      emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+      amount = offsets->locals_base - offsets->saved_regs;
+    }
+
+  gcc_assert (amount >= 0);
+  if (amount)
+    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+                           GEN_INT (amount)));
+
+  /* Emit a USE (stack_pointer_rtx), so that
+     the stack adjustment will not be deleted.  */
+  emit_insn (gen_prologue_use (stack_pointer_rtx));
+
+  /* Now handle any VFP restoration.  */
+  if (TARGET_HARD_FLOAT && TARGET_VFP)
+    {
+      int end_reg = LAST_VFP_REGNUM + 1;
+
+      /* Scan the registers in reverse order.  We need to match
+         any groupings made in the prologue and generate matching
+         fldmdd operations.  The need to match groups is because,
+         unlike pop, fldmdd can only do consecutive regs.  */
+      for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
+        /* Look for a case where a reg does not need restoring.  */
+        if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
+            && (!df_regs_ever_live_p (reg + 1)
+                || call_used_regs[reg + 1]))
+          {
+            /* Restore the regs discovered so far (from reg+2 to end_reg).  */
+            if (end_reg > reg + 2)
+              thumb2_emit_vfp_multi_reg_pop (reg + 2,
+                                             (end_reg - (reg + 2)) / 2);
+            end_reg = reg;
+          }
+
+      /* Restore the remaining regs that we have discovered (or possibly
+         even all of them, if the conditional in the for loop never fired).  */
+      if (end_reg > reg + 2)
+        thumb2_emit_vfp_multi_reg_pop (reg + 2, (end_reg - (reg + 2)) / 2);
+    }
+
+  /* iWMMXt is not supported when Thumb-2 in use.  If it were, we would
+     want to be restoring the appropriate iWMMXt regs here, in a similar
+     way to arm_output_epilogue.  */
+
+  /* If there are registers to restore, make it happen.  */
+  if (saved_regs_mask)
+    {
+      /* Pop can only be used for more than one reg, so generate an ldr.  */
+      if (num_regs == 1)
+        {
+          for (i = 0; i <= LAST_ARM_REGNUM; i++)
+            if (saved_regs_mask & (1 << i))
+              {
+                rtx addr = gen_rtx_MEM (SImode,
+                                        gen_rtx_POST_INC (SImode,
+                                                          stack_pointer_rtx));
+                set_mem_alias_set (addr, get_frame_alias_set ());
+                emit_insn (gen_movsi (gen_rtx_REG (SImode, i), addr));
+              }
+        }
+
+      /* Two or more regs warrants the use of a multi-reg pop.  */
+      else
+        {
+          /* If multi-pop is last instruction, don't generate `branch to
+             return-address' instruction.  Instead, pop LR in PC.  */
+          if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+              && !IS_STACKALIGN (func_type)
+              && crtl->args.pretend_args_size == 0
+              && saved_regs_mask & (1 << LR_REGNUM)
+              && !crtl->calls_eh_return)
+            {
+              saved_regs_mask &= ~ (1 << LR_REGNUM);
+              saved_regs_mask |=   (1 << PC_REGNUM);
+              really_return = true;
+            }
+
+          thumb2_emit_multi_reg_pop (saved_regs_mask, really_return);
+          if (really_return == true)
+            return;
+        }
+    }
+
+  /* Unwind the pre-pushed regs.  */
+  if (crtl->args.pretend_args_size)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+                           stack_pointer_rtx,
+                           GEN_INT (crtl->args.pretend_args_size)));
+
+  /* Stack adjustment for exception handler.  */
+  if (crtl->calls_eh_return)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+                           stack_pointer_rtx,
+                           gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
+
+  emit_jump_insn (ret_rtx);
+}
+
+
+/* Generate the appropriate instruction to return for Thumb-2.
+   OPERAND holds a condition, which must be passed to output_asm_insn.  */
+void
+thumb2_output_return (rtx operand)
+{
+  char instr[100];
+  unsigned long func_type;
+
+  func_type = arm_current_func_type ();
+
+  if (IS_NAKED (func_type))
+    /* Do nothing if naked function.  */
+    return;
+
+  if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
+    {
+      rtx op;
+
+      /* A volatile function should never return.  Call abort.  */
+      op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
+      assemble_external_libcall (op);
+      output_asm_insn ("bl\t%a0", &op);
+
+      return;
+    }
+
+  switch ((int) ARM_FUNC_TYPE (func_type))
+    {
+    case ARM_FT_ISR:
+    case ARM_FT_FIQ:
+      sprintf (instr, "subs\t%%|pc, %%|lr, #4");
+      break;
+
+    case ARM_FT_EXCEPTION:
+      sprintf (instr, "movs\t%%|pc, %%|lr");
+      break;
+
+    default:
+      sprintf (instr, "bx\t%%|lr");
+      break;
+    }
+
+  output_asm_insn (instr, &operand);
+}
+
 /* Implementation of insn prologue_thumb1_interwork.  This is the first
    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index a78ba88..6cfb2da 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -6669,6 +6669,141 @@
 			     FALSE, operands[1], &offset);
 })
 
+;; Pop (as used in epilogue RTL)
+;;
+;; This should really be in thumb2.md, but it needs to live above
+;; the ldmsi patterns, so that it matches before them.
+;; Furthermore, there is no reason why it could not be extended
+;; to support Thumb-1 and ARM at a later date (whereupon it would
+;; fully deserve its spot in this file).
+(define_insn "*pop_multiple_with_stack_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "s_register_operand" "+rk")
+          (plus:SI (match_dup 1)
+                   (match_operand:SI 2 "const_int_operand" "I")))
+        ])]
+  "TARGET_THUMB2"
+  "*
+  {
+    int i;
+    char pattern[100];
+    int num_saves = XVECLEN (operands[0], 0);
+
+    if (REGNO (operands[1]) == SP_REGNUM)
+      {
+        strcpy (pattern, \"pop\\t{\");
+      }
+    else
+      {
+        strcpy (pattern, \"ldm%(ia%)\\t\");
+        strcat (pattern, reg_names[REGNO (operands[1])]);
+        strcat (pattern, \"!, {\");
+      }
+
+    strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))]);
+
+    /* Skip over the first element and the one we just generated.  */
+    for (i = 2; i < (num_saves); i++)
+      {
+        strcat (pattern, \", %|\");
+        strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, operands);
+
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")]
+)
+
+;; Pop with return (as used in epilogue RTL)
+;;
+;; This instruction is generated when the registers are popped at end of
+;; epilogue.  Here, instead of popping the value in LR and then generating
+;; jump to LR, value is popped in PC.  Hence, the pattern is combined with
+;; (return).
+(define_insn "*pop_multiple_with_stack_update_and_return"
+  [(match_parallel 0 "load_multiple_operation_return"
+    [(return)
+     (set (match_operand:SI 1 "s_register_operand" "+k")
+          (plus:SI (match_dup 1)
+                   (match_operand:SI 2 "const_int_operand" "I")))
+        ])]
+  "TARGET_THUMB2"
+  "*
+  {
+    int i;
+    char pattern[100];
+    int num_saves = XVECLEN (operands[0], 0);
+
+    strcpy (pattern, \"pop\\t{\");
+    strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 2), 0))]);
+
+    /* Skip over the first two elements and the one we just generated.  */
+    for (i = 3; i < (num_saves); i++)
+      {
+        strcat (pattern, \", %|\");
+        strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, operands);
+
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")]
+)
+
+(define_insn "*thumb2_ldr_with_return"
+  [(return)
+   (set (reg:SI PC_REGNUM)
+        (mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand" "+k"))))]
+  "TARGET_THUMB2"
+  "ldr%?\t%|pc, [%0], #4"
+  [(set_attr "type" "load1")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*vfp_pop_multiple_with_stack_update"
+  [(match_parallel 0 "load_multiple_operation_fp"
+    [(set (match_operand:SI 1 "s_register_operand" "+k")
+          (plus:SI (match_dup 1)
+                   (match_operand:SI 2 "const_int_operand" "I")))
+     (set (match_operand:DF 3 "arm_hard_register_operand" "")
+          (mem:DF (match_dup 1)))])]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
+  "*
+  {
+    int num_regs = XVECLEN (operands[0], 0);
+    char pattern[100];
+    rtx op_list[2];
+    strcpy (pattern, \"fldmfdd\\t\");
+    strcat (pattern,
+                    reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0, 0)))]);
+    strcat (pattern, \"!, {\");
+    op_list[0] = XEXP (XVECEXP (operands[0], 0, 1), 0);
+    strcat (pattern, \"%P0\");
+    if ((num_regs - 1) > 1)
+      {
+        strcat (pattern, \"-%P1\");
+        op_list [1] = XEXP (XVECEXP (operands[0], 0, num_regs - 1), 0);
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, op_list);
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")]
+)
+
 (define_expand "store_multiple"
   [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
                           (match_operand:SI 1 "" ""))
@@ -8486,8 +8621,19 @@
 
 (define_expand "return"
   [(return)]
-  "TARGET_32BIT && USE_RETURN_INSN (FALSE)"
-  "")
+  "(TARGET_ARM || (TARGET_THUMB2
+                   && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL
+                   && !IS_STACKALIGN (arm_current_func_type ())))
+    && USE_RETURN_INSN (FALSE)"
+  "
+  {
+    if (TARGET_THUMB2)
+      {
+        thumb2_expand_return ();
+        DONE;
+      }
+  }
+  ")
 
 ;; Often the return insn will be the same as loading from memory, so set attr
 (define_insn "*arm_return"
@@ -10529,6 +10675,11 @@
     emit_insn (gen_prologue_use (gen_rtx_REG (Pmode, 2)));
   if (TARGET_THUMB1)
     thumb1_expand_epilogue ();
+  else if (TARGET_THUMB2)
+  {
+    thumb2_expand_epilogue ();
+    DONE;
+  }
   else if (USE_RETURN_INSN (FALSE))
     {
       emit_jump_insn (gen_return ());
@@ -10572,12 +10723,12 @@
 
 (define_insn "*epilogue_insns"
   [(unspec_volatile [(return)] VUNSPEC_EPILOGUE)]
-  "TARGET_EITHER"
+  "TARGET_ARM || TARGET_THUMB1"
   "*
   if (TARGET_32BIT)
     return arm_output_epilogue (NULL);
   else /* TARGET_THUMB1 */
-    return thumb_unexpanded_epilogue ();
+    return thumb1_unexpanded_epilogue ();
   "
   ; Length is absolute worst case
   [(set_attr "length" "44")
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 92eb004..7e2203d 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -369,84 +369,22 @@
 (define_special_predicate "load_multiple_operation"
   (match_code "parallel")
 {
-  HOST_WIDE_INT count = XVECLEN (op, 0);
-  unsigned dest_regno;
-  rtx src_addr;
-  HOST_WIDE_INT i = 1, base = 0;
-  HOST_WIDE_INT offset = 0;
-  rtx elt;
-  bool addr_reg_loaded = false;
-  bool update = false;
-
-  if (count <= 1
-      || GET_CODE (XVECEXP (op, 0, 0)) != SET
-      || !REG_P (SET_DEST (XVECEXP (op, 0, 0))))
-    return false;
-
-  /* Check to see if this might be a write-back.  */
-  if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, 0))) == PLUS)
-    {
-      i++;
-      base = 1;
-      update = true;
-
-      /* Now check it more carefully.  */
-      if (GET_CODE (SET_DEST (elt)) != REG
-          || GET_CODE (XEXP (SET_SRC (elt), 0)) != REG
-          || GET_CODE (XEXP (SET_SRC (elt), 1)) != CONST_INT
-          || INTVAL (XEXP (SET_SRC (elt), 1)) != (count - 1) * 4)
-        return false;
-    }
-
-  /* Perform a quick check so we don't blow up below.  */
-  if (count <= i
-      || GET_CODE (XVECEXP (op, 0, i - 1)) != SET
-      || GET_CODE (SET_DEST (XVECEXP (op, 0, i - 1))) != REG
-      || GET_CODE (SET_SRC (XVECEXP (op, 0, i - 1))) != MEM)
-    return false;
-
-  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1)));
-  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0);
-  if (GET_CODE (src_addr) == PLUS)
-    {
-      if (GET_CODE (XEXP (src_addr, 1)) != CONST_INT)
-	return false;
-      offset = INTVAL (XEXP (src_addr, 1));
-      src_addr = XEXP (src_addr, 0);
-    }
-  if (!REG_P (src_addr))
-    return false;
+  return load_multiple_operation_p (op, /*consecutive=*/false,
+                                    SImode, /*return_pc*/false);
+})
 
-  for (; i < count; i++)
-    {
-      elt = XVECEXP (op, 0, i);
+(define_special_predicate "load_multiple_operation_return"
+  (match_code "parallel")
+{
+  return load_multiple_operation_p (op, /*consecutive=*/false,
+                                    SImode, /*return_pc*/true);
+})
 
-      if (GET_CODE (elt) != SET
-          || GET_CODE (SET_DEST (elt)) != REG
-          || GET_MODE (SET_DEST (elt)) != SImode
-          || REGNO (SET_DEST (elt)) <= dest_regno
-          || GET_CODE (SET_SRC (elt)) != MEM
-          || GET_MODE (SET_SRC (elt)) != SImode
-          || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
-	       || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
-	       || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT
-	       || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != offset + (i - base) * 4)
-	      && (!REG_P (XEXP (SET_SRC (elt), 0))
-		  || offset + (i - base) * 4 != 0)))
-        return false;
-      dest_regno = REGNO (SET_DEST (elt));
-      if (dest_regno == REGNO (src_addr))
-        addr_reg_loaded = true;
-    }
-  /* For Thumb, we only have updating instructions.  If the pattern does
-     not describe an update, it must be because the address register is
-     in the list of loaded registers - on the hardware, this has the effect
-     of overriding the update.  */
-  if (update && addr_reg_loaded)
-    return false;
-  if (TARGET_THUMB1)
-    return update || addr_reg_loaded;
-  return true;
+(define_special_predicate "load_multiple_operation_fp"
+  (match_code "parallel")
+{
+  return load_multiple_operation_p (op, /*consecutive=*/true,
+                                    DFmode, /*return_pc*/false);
 })
 
 (define_special_predicate "store_multiple_operation"
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 05585da..78f4e81 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -635,17 +635,18 @@
    (set_attr "length" "20")]
 )
 
-;; Note: this is not predicable, to avoid issues with linker-generated
-;; interworking stubs.
-(define_insn "*thumb2_return"
+(define_insn "*thumb2_rtl_epilogue_return"
   [(return)]
-  "TARGET_THUMB2 && USE_RETURN_INSN (FALSE)"
+  "(TARGET_THUMB2)"
   "*
   {
-    return output_return_instruction (const_true_rtx, TRUE, FALSE);
+    thumb2_output_return (const_true_rtx);
+    return \"\";
   }"
-  [(set_attr "type" "load1")
-   (set_attr "length" "12")]
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")
+   (set_attr "conds" "unconditional")]
 )
 
 (define_insn_and_split "thumb2_eh_return"

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL
  2011-11-10 14:28 ` Richard Earnshaw
@ 2011-11-10 15:28   ` Sameera Deshpande
  2011-11-10 19:07   ` Sameera Deshpande
  1 sibling, 0 replies; 15+ messages in thread
From: Sameera Deshpande @ 2011-11-10 15:28 UTC (permalink / raw)
  To: Richard Earnshaw; +Cc: gcc-patches, nickc, paul, Ramana Radhakrishnan

Hi Richard,

thanks for your comments.

-- 



> +  if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, offset_adj))) == PLUS)
> 
> It's generally best not to use assignments within conditionals unless
> there is a strong reason otherwise (that normally implies something like
> being deep within a condition test where you only want to update the
> variable if some pre-conditions are true and that can't be easily
> factored out).
> 
> +                  != (unsigned int) (first_dest_regno + regs_per_val *
> (i - base))))
> 
> Line length (split the line just before the '+' operator.
> 
> +  /* now show EVERY reg that will be restored, using a SET for each.  */
> 
> Capital letter at start of sentence.  Why is EVERY in caps?
> 
> +  saved_regs_mask = offsets->saved_regs_mask;
> +  for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
> 
> blank line before the for loop.
> 
> +      /* It's illegal to do a pop for only one reg, so generate an ldr.  */
> 
> GCC coding standards suggest avoiding the use of 'illegal'.  Suggest
> changing that to 'Pop can only be used for more than one reg; so...'
> 
> +                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 2),
> 0))]);
> +
> +    /* Skip over the first two elements and the one we just generated.
>  */
> +    for (i = 3; i < (num_saves); i++)
> +      {
> +        strcat (pattern, \", %|\");
> 
> +        strcat (pattern,
> 
> +                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i),
> 0))]);
> +      }
> +
> +    strcat (pattern, \"}\");
> +    output_asm_insn (pattern, operands);
> +
> 
> +    return \"\";
> +  }
> +  "
> 
> +  [(set_attr "type" "load4")]
> 
> There's a lot of trailing white space here.  Please remove.

Removed white spaces in reworked patch
http://gcc.gnu.org/ml/gcc-patches/2011-11/msg01009.html

> 
> +(define_insn "*thumb2_ldr_with_return"
> +  [(return)
> +   (set (reg:SI PC_REGNUM)
> +        (mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand"
> "k"))))]
> +  "TARGET_THUMB2"
> +  "ldr%?\t%|pc, [%0], #4"
> +  [(set_attr "type" "load1")
> +   (set_attr "predicable" "yes")]
> +)
> +
> 
> This pattern doesn't seem to be used.  What's its purpose?

This pattern is generated from thumb2_expand_return in 

+      if (num_regs == 1)
+        {
+          rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+          rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
+          rtx addr = gen_rtx_MEM (SImode,
+                                  gen_rtx_POST_INC (SImode,
+
stack_pointer_rtx));
+          set_mem_alias_set (addr, get_frame_alias_set ());
+          XVECEXP (par, 0, 0) = ret_rtx;
+          XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
+          RTX_FRAME_RELATED_P (par) = 1;
+          emit_jump_insn (par);
+        }

> 
> +    static const struct { const char *const name; } table[]
> +                  = { {\"d0\"}, {\"d1\"}, {\"d2\"}, {\"d3\"},
> 
> I'm not keen on having this table.  Generally the register names should
> be configurable depending on the assembler flavour and this patch
> defeats that.  Is there any way to rewrite this code so that it can use
> the standard operand methods for generating register names?

The updated patch was resent after comments from Ramana and Paul which
eliminates this table.

http://gcc.gnu.org/ml/gcc-patches/2011-11/msg01009.html

I will take care of other formatting issues and will resend the patch.

> 
> In summary, this is mostly OK, apart from the last two items.
> 
> R.

- Thanks and regards,
  Sameera D.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL
  2011-09-28 17:29 Sameera Deshpande
@ 2011-11-10 14:28 ` Richard Earnshaw
  2011-11-10 15:28   ` Sameera Deshpande
  2011-11-10 19:07   ` Sameera Deshpande
  0 siblings, 2 replies; 15+ messages in thread
From: Richard Earnshaw @ 2011-11-10 14:28 UTC (permalink / raw)
  To: Sameera Deshpande; +Cc: gcc-patches, nickc, paul, Ramana Radhakrishnan

On 28/09/11 17:15, Sameera Deshpande wrote:
> Hi!
> 
> This patch generates Thumb2 epilogues in RTL form.
> 
> The work involves defining new functions, predicates and patterns along with
> few changes in existing code:
> * The load_multiple_operation predicate was found to be too restrictive for
> integer loads as it required consecutive destination regs, so this
> restriction was lifted.
> * Variations of load_multiple_operation were required to handle cases 
>    - where SP must be the base register 
>    - where FP values were being loaded (which do require consecutive
> destination registers)
>    - where PC can be in register-list (which requires return pattern along
> with register loads).
>   Hence, the common code was factored out into a new function in arm.c and
> parameterised to show 
>    - whether consecutive destination regs are needed
>    - the data type being loaded 
>    - whether the base register has to be SP
>    - whether PC is in register-list
> 
> The patch is tested with arm-eabi with no regressions.
> 
> ChangeLog:
> 
> 2011-09-28  Ian Bolton         <ian.bolton@arm.com>
>             Sameera Deshpande  <sameera.deshpande@arm.com>
>            
>        * config/arm/arm-protos.h (load_multiple_operation_p): New
> declaration.
>          (thumb2_expand_epilogue): Likewise.
>          (thumb2_output_return): Likewise
>          (thumb2_expand_return): Likewise.
>          (thumb_unexpanded_epilogue): Rename to... 
>          (thumb1_unexpanded_epilogue): ...this 
>        * config/arm/arm.c (load_multiple_operation_p): New function. 
>          (thumb2_emit_multi_reg_pop): Likewise.
>          (thumb2_emit_vfp_multi_reg_pop): Likewise.
>          (thumb2_expand_return): Likewise. 
>          (thumb2_expand_epilogue): Likewise. 
>          (thumb2_output_return): Likewise
>          (thumb_unexpanded_epilogue): Rename to...
>          ( thumb1_unexpanded_epilogue): ...this
>        * config/arm/arm.md (pop_multiple_with_stack_update): New pattern. 
>          (pop_multiple_with_stack_update_and_return): Likewise.
>          (thumb2_ldr_with_return): Likewise.
>          (floating_point_pop_multiple_with_stack_update): Likewise.
>          (return): Update condition and code for pattern.
>          (arm_return): Likewise.
>          (epilogue_insns): Likewise.
>        * config/arm/predicates.md (load_multiple_operation): Update
> predicate.
>          (load_multiple_operation_stack_and_return): New predicate. 
>          (load_multiple_operation_stack): Likewise.
>          (load_multiple_operation_stack_fp): Likewise.
>        * config/arm/thumb2.md (thumb2_return): Remove.
>          (thumb2_rtl_epilogue_return): New pattern.
> 
> 
> - Thanks and regards,
>   Sameera D.
> 
> 
> thumb2_rtl_epilogue_complete-27Sept.patch
> 

+  if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, offset_adj))) == PLUS)

It's generally best not to use assignments within conditionals unless
there is a strong reason otherwise (that normally implies something like
being deep within a condition test where you only want to update the
variable if some pre-conditions are true and that can't be easily
factored out).

+                  != (unsigned int) (first_dest_regno + regs_per_val *
(i - base))))

Line length (split the line just before the '+' operator.

+  /* now show EVERY reg that will be restored, using a SET for each.  */

Capital letter at start of sentence.  Why is EVERY in caps?

+  saved_regs_mask = offsets->saved_regs_mask;
+  for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)

blank line before the for loop.

+      /* It's illegal to do a pop for only one reg, so generate an ldr.  */

GCC coding standards suggest avoiding the use of 'illegal'.  Suggest
changing that to 'Pop can only be used for more than one reg; so...'

+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 2),
0))]);
+
+    /* Skip over the first two elements and the one we just generated.
 */
+    for (i = 3; i < (num_saves); i++)
+      {
+        strcat (pattern, \", %|\");

+        strcat (pattern,

+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i),
0))]);
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, operands);
+

+    return \"\";
+  }
+  "

+  [(set_attr "type" "load4")]

There's a lot of trailing white space here.  Please remove.

+(define_insn "*thumb2_ldr_with_return"
+  [(return)
+   (set (reg:SI PC_REGNUM)
+        (mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand"
"k"))))]
+  "TARGET_THUMB2"
+  "ldr%?\t%|pc, [%0], #4"
+  [(set_attr "type" "load1")
+   (set_attr "predicable" "yes")]
+)
+

This pattern doesn't seem to be used.  What's its purpose?

+    static const struct { const char *const name; } table[]
+                  = { {\"d0\"}, {\"d1\"}, {\"d2\"}, {\"d3\"},

I'm not keen on having this table.  Generally the register names should
be configurable depending on the assembler flavour and this patch
defeats that.  Is there any way to rewrite this code so that it can use
the standard operand methods for generating register names?

In summary, this is mostly OK, apart from the last two items.

R.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL
@ 2011-09-28 17:29 Sameera Deshpande
  2011-11-10 14:28 ` Richard Earnshaw
  0 siblings, 1 reply; 15+ messages in thread
From: Sameera Deshpande @ 2011-09-28 17:29 UTC (permalink / raw)
  To: gcc-patches; +Cc: nickc, Richard Earnshaw, paul, Ramana Radhakrishnan

[-- Attachment #1: Type: text/plain, Size: 2637 bytes --]

Hi!

This patch generates Thumb2 epilogues in RTL form.

The work involves defining new functions, predicates and patterns along with
few changes in existing code:
* The load_multiple_operation predicate was found to be too restrictive for
integer loads as it required consecutive destination regs, so this
restriction was lifted.
* Variations of load_multiple_operation were required to handle cases 
   - where SP must be the base register 
   - where FP values were being loaded (which do require consecutive
destination registers)
   - where PC can be in register-list (which requires return pattern along
with register loads).
  Hence, the common code was factored out into a new function in arm.c and
parameterised to show 
   - whether consecutive destination regs are needed
   - the data type being loaded 
   - whether the base register has to be SP
   - whether PC is in register-list

The patch is tested with arm-eabi with no regressions.

ChangeLog:

2011-09-28  Ian Bolton         <ian.bolton@arm.com>
            Sameera Deshpande  <sameera.deshpande@arm.com>
           
       * config/arm/arm-protos.h (load_multiple_operation_p): New
declaration.
         (thumb2_expand_epilogue): Likewise.
         (thumb2_output_return): Likewise
         (thumb2_expand_return): Likewise.
         (thumb_unexpanded_epilogue): Rename to... 
         (thumb1_unexpanded_epilogue): ...this 
       * config/arm/arm.c (load_multiple_operation_p): New function. 
         (thumb2_emit_multi_reg_pop): Likewise.
         (thumb2_emit_vfp_multi_reg_pop): Likewise.
         (thumb2_expand_return): Likewise. 
         (thumb2_expand_epilogue): Likewise. 
         (thumb2_output_return): Likewise
         (thumb_unexpanded_epilogue): Rename to...
         ( thumb1_unexpanded_epilogue): ...this
       * config/arm/arm.md (pop_multiple_with_stack_update): New pattern. 
         (pop_multiple_with_stack_update_and_return): Likewise.
         (thumb2_ldr_with_return): Likewise.
         (floating_point_pop_multiple_with_stack_update): Likewise.
         (return): Update condition and code for pattern.
         (arm_return): Likewise.
         (epilogue_insns): Likewise.
       * config/arm/predicates.md (load_multiple_operation): Update
predicate.
         (load_multiple_operation_stack_and_return): New predicate. 
         (load_multiple_operation_stack): Likewise.
         (load_multiple_operation_stack_fp): Likewise.
       * config/arm/thumb2.md (thumb2_return): Remove.
         (thumb2_rtl_epilogue_return): New pattern.


- Thanks and regards,
  Sameera D.

[-- Attachment #2: thumb2_rtl_epilogue_complete-27Sept.patch --]
[-- Type: application/octet-stream, Size: 33328 bytes --]

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 2353704..d964b85 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -63,6 +63,7 @@ extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int);
 extern int arm_const_double_rtx (rtx);
 extern int neg_const_double_rtx_ok_for_fpa (rtx);
 extern int vfp3_const_double_rtx (rtx);
+extern bool load_multiple_operation_p (rtx, bool, enum machine_mode, bool, bool);
 extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *);
 extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *,
 					   int *);
@@ -174,10 +175,13 @@ extern int arm_float_words_big_endian (void);
 
 /* Thumb functions.  */
 extern void arm_init_expanders (void);
-extern const char *thumb_unexpanded_epilogue (void);
+extern const char *thumb1_unexpanded_epilogue (void);
 extern void thumb1_expand_prologue (void);
 extern void thumb1_expand_epilogue (void);
 extern const char *thumb1_output_interwork (void);
+extern void thumb2_expand_epilogue (void);
+extern void thumb2_output_return (rtx);
+extern void thumb2_expand_return (void);
 #ifdef TREE_CODE
 extern int is_called_in_ARM_mode (tree);
 #endif
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 3162b30..f86a3e6 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -8754,6 +8754,140 @@ neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
 #undef CHECK
 }
 
+/* Return true if OP is a valid load multiple operation for MODE mode.
+   CONSECUTIVE is true if the registers in the operation must form
+   a consecutive sequence in the register bank.  STACK_ONLY is true
+   if the base register must be the stack pointer.  RETURN_PC is true
+   if value is to be loaded in PC.  */
+bool
+load_multiple_operation_p (rtx op, bool consecutive, enum machine_mode mode, 
+                           bool stack_only, bool return_pc)
+{
+  HOST_WIDE_INT count = XVECLEN (op, 0);
+  unsigned dest_regno, first_dest_regno;
+  rtx src_addr;
+  HOST_WIDE_INT i = 1, base = 0;
+  HOST_WIDE_INT offset = 0;
+  rtx elt;
+  bool addr_reg_loaded = false;
+  bool update = false;
+  int reg_increment, regs_per_val;
+  int offset_adj;
+
+  /* If DFMode, we must be asking for consecutive,
+     since FLDMDD can only do consecutive regs.  */
+  gcc_assert ((mode != DFmode) || consecutive);
+
+  /* Set up the increments and the regs per val based on the mode.  */
+  reg_increment = mode == DFmode ? 8 : 4;
+  regs_per_val = mode == DFmode ? 2 : 1;
+  offset_adj = return_pc ? 1 : 0;
+
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
+      || !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj))))
+    return false;
+
+  /* Check to see if this might be a write-back.  */
+  if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, offset_adj))) == PLUS)
+    {
+      i++;
+      base = 1;
+      update = true;
+
+      /* Now check it more carefully.  */
+      if (!REG_P (SET_DEST (elt))
+          || !REG_P (XEXP (SET_SRC (elt), 0))
+          || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
+          || INTVAL (XEXP (SET_SRC (elt), 1)) != 
+              ((count - 1 - offset_adj) * reg_increment))
+        return false;
+
+      /* Check the nature of the base_register being written to.  */
+      if (stack_only && (REGNO (SET_DEST (elt)) != SP_REGNUM))
+        return false;
+    }
+
+  i = i + offset_adj;
+  base = base + offset_adj;
+  /* Perform a quick check so we don't blow up below.  */
+  if (GET_CODE (XVECEXP (op, 0, i - 1)) != SET
+      || !REG_P (SET_DEST (XVECEXP (op, 0, i - 1)))
+      || !MEM_P (SET_SRC (XVECEXP (op, 0, i - 1))))
+    return false;
+
+  /* If only one reg being loaded, success depends on the type:
+     FLDMDD can do just one reg, LDM must do at least two.  */
+  if (count <= i)
+    return mode == DFmode ? true : false;
+
+  first_dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1)));
+  dest_regno = first_dest_regno; 
+  
+  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0);
+
+  if (GET_CODE (src_addr) == PLUS)
+    {
+      if (!CONST_INT_P (XEXP (src_addr, 1)))
+	return false;
+      offset = INTVAL (XEXP (src_addr, 1));
+      src_addr = XEXP (src_addr, 0);
+    }
+
+  if (!REG_P (src_addr))
+    return false;
+
+  /* The pattern we are trying to match here is:
+     [(SET (R_d0) (MEM (PLUS (src_addr) (offset))))
+      (SET (R_d1) (MEM (PLUS (src_addr) (offset + <reg_increment>))))
+       :
+       :
+      (SET (R_dn) (MEM (PLUS (src_addr) (offset + n * <reg_increment>))))
+     ]  
+     Where, 
+     1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
+     2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
+     3.  If consecutive is TRUE, then for kth register being loaded, 
+         REGNO (R_dk) = REGNO (R_d0) + k.  */
+  for (; i < count; i++)
+    {
+      elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+          || !REG_P (SET_DEST (elt))
+          || GET_MODE (SET_DEST (elt)) != mode
+          || (consecutive
+              && (REGNO (SET_DEST (elt))
+                  != (unsigned int) (first_dest_regno + regs_per_val * (i - base))))
+          || REGNO (SET_DEST (elt)) <= dest_regno
+          || !MEM_P (SET_SRC (elt))
+          || GET_MODE (SET_SRC (elt)) != mode
+          || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+               || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+               || !CONST_INT_P (XEXP (XEXP (SET_SRC (elt), 0), 1))
+               || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != 
+                           (offset + (i - base) * reg_increment))
+              && (!REG_P (XEXP (SET_SRC (elt), 0))
+		  || offset + (i - base) * reg_increment != 0)))
+        return false;
+
+      dest_regno = REGNO (SET_DEST (elt));
+      if (dest_regno == REGNO (src_addr))
+        addr_reg_loaded = true; 
+    }
+
+  if (update && addr_reg_loaded)
+    return false;
+
+  /* For Thumb-1, address register is always modified - either by write-back
+     or by explicit load.  If the pattern does not describe an update, it must 
+     be because the address register is in the list of loaded registers.  */
+  if (TARGET_THUMB1)
+    return update || addr_reg_loaded;
+
+  return true;
+}
+
 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
    float elements), and a modified constant (whatever should be output for a
@@ -15609,6 +15743,186 @@ emit_multi_reg_push (unsigned long mask)
   return par;
 }
 
+/* Generate and emit an insn pattern that we will recognize as a pop_multi.
+   SAVED_REGS_MASK shows which registers need to be restored.
+
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.
+
+   There's no reason why this couldn't be used for Thumb-1 or ARM, in theory,
+   but currently the pattern that matches this in the MD file is only enabled
+   for Thumb-2.  */
+static void
+thumb2_emit_multi_reg_pop (unsigned long saved_regs_mask, bool really_return)
+{
+  int num_regs = 0;
+  int num_dwarf_regs;
+  int i, j;
+  rtx par;
+  rtx dwarf;
+  int dwarf_par_index;
+  rtx tmp, reg;
+  int offset_adj = really_return ? 1 : 0;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  gcc_assert (num_regs && num_regs <= 16);
+
+  /* We don't record the PC in the dwarf frame information.  */
+  num_dwarf_regs = num_regs;
+  if (saved_regs_mask & (1 << PC_REGNUM))
+    num_dwarf_regs--;
+
+  /* The parallel needs to hold num_regs SETs
+     and one SET for the stack update.  */
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1 + offset_adj));
+
+  /* We need to maintain a sequence for DWARF info too.  */
+  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
+
+  if (really_return) 
+    {
+      tmp = ret_rtx;
+      XVECEXP (par, 0, 0) = tmp;
+    }
+
+  /* Increment the stack pointer, based on there being
+     num_regs 4-byte registers to restore.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (stack_pointer_rtx, 4 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (par, 0, offset_adj) = tmp;
+
+  /* Need to make new rtx for dwarf because they are not sharable.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (stack_pointer_rtx, 4 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (dwarf, 0, 0) = tmp;
+
+  /* Because we don't hold DWARF info for PC, the dwarf index
+     can get out of step with the par index (j).  */
+  dwarf_par_index = 1;
+
+  /* Now restore every reg, which may include PC.  */
+  for (j = 0, i = 0; j < num_regs; i++)
+    if (saved_regs_mask & (1 << i))
+      {
+        reg = gen_rtx_REG (SImode, i);
+        tmp = gen_rtx_SET (VOIDmode,
+                           reg,
+                           gen_frame_mem
+                           (SImode,
+                            plus_constant (stack_pointer_rtx, 4 * j)));
+        RTX_FRAME_RELATED_P (tmp) = 1;
+        XVECEXP (par, 0, j + 1 + offset_adj) = tmp;
+
+        /* Restoration of PC is not recorded in the DWARF info.  */
+        if (i != PC_REGNUM)
+          {
+            tmp = gen_rtx_SET (VOIDmode,
+                               reg,
+                               gen_frame_mem
+                               (SImode,
+                                plus_constant (stack_pointer_rtx, 4 * j)));
+            RTX_FRAME_RELATED_P (tmp) = 1;
+            XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
+          }
+
+        j++;
+      }
+
+  if (really_return) 
+    par = emit_jump_insn (par);
+  else
+    par = emit_insn (par);
+
+  add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
+}
+
+/* Generate and emit an insn pattern that we will recognize as a pop_multi
+   of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
+
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.  */
+static void
+thumb2_emit_vfp_multi_reg_pop (int first_reg, int num_regs)
+{
+  int num_dwarf_regs;
+  int i, j;
+  rtx par;
+  rtx dwarf;
+  rtx tmp, reg;
+
+  gcc_assert (num_regs && num_regs <= 32);
+
+  num_dwarf_regs = num_regs;
+  
+  if (num_regs > 16)
+    {
+      thumb2_emit_vfp_multi_reg_pop (first_reg, 16);
+      thumb2_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16);
+      return;
+    }
+
+  /* The parallel needs to hold num_regs SETs
+     and one SET for the stack update.  */
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
+
+  /* We need to maintain a sequence for DWARF info too.  */
+  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
+
+  /* Increment the stack pointer, based on there being
+     num_regs 8-byte registers to restore.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (stack_pointer_rtx, 8 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (par, 0, 0) = tmp;
+
+  /* Need to make new rtx for dwarf because they are not sharable.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (stack_pointer_rtx, 8 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (dwarf, 0, 0) = tmp;
+
+  /* now show EVERY reg that will be restored, using a SET for each.  */
+  for (j = 0, i=first_reg; j < num_regs; i += 2)
+    {
+      reg = gen_rtx_REG (DFmode, i);
+
+      tmp = gen_rtx_SET (VOIDmode,
+                         reg,
+                         gen_frame_mem
+                         (DFmode,
+                          plus_constant (stack_pointer_rtx,
+                                         8 * j)));
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      XVECEXP (par, 0, j + 1) = tmp;
+
+      /* Need to make new rtx for dwarf because they are not sharable.  */
+      tmp = gen_rtx_SET (VOIDmode,
+                         reg,
+                         gen_frame_mem
+                         (DFmode,
+                          plus_constant (stack_pointer_rtx,
+                                         8 * j)));
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      XVECEXP (dwarf, 0, j + 1) = tmp;
+
+      j++;
+    }
+
+  par = emit_insn (par);
+  add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
+}
+
 /* Calculate the size of the return value that is passed in registers.  */
 static unsigned
 arm_size_return_regs (void)
@@ -21101,7 +21415,7 @@ thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
 
 /* The bits which aren't usefully expanded as rtl.  */
 const char *
-thumb_unexpanded_epilogue (void)
+thumb1_unexpanded_epilogue (void)
 {
   arm_stack_offsets *offsets;
   int regno;
@@ -21669,7 +21983,6 @@ thumb1_expand_prologue (void)
     cfun->machine->lr_save_eliminated = 0;
 }
 
-
 void
 thumb1_expand_epilogue (void)
 {
@@ -21724,9 +22037,244 @@ thumb1_expand_epilogue (void)
     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
 }
 
+/* Generate pattern *pop_multiple_with_stack_update_and_return if single
+   POP instruction can be generated.  LR should be replaced by PC.  All 
+   the checks required are already done by  USE_RETURN_INSN ().  Hence,  
+   all we really need to check here is if single register is to be 
+   returned, or multiple register return.  */
+void 
+thumb2_expand_return (void)
+{
+  int i, num_regs;
+  unsigned long saved_regs_mask;
+  arm_stack_offsets *offsets; 
+
+  offsets = arm_get_frame_offsets (); 
+  saved_regs_mask = offsets->saved_regs_mask;
+  for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  if (saved_regs_mask)
+    {
+      if (num_regs == 1)
+        {
+          rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+          rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
+          rtx addr = gen_rtx_MEM (SImode, 
+                                  gen_rtx_POST_INC (SImode, 
+                                                    stack_pointer_rtx));
+          set_mem_alias_set (addr, get_frame_alias_set ());
+          XVECEXP (par, 0, 0) = ret_rtx;
+          XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
+          RTX_FRAME_RELATED_P (par) = 1;
+          emit_jump_insn (par);
+        }
+      else
+        {
+          saved_regs_mask &= ~ (1 << LR_REGNUM);
+          saved_regs_mask |=   (1 << PC_REGNUM); 
+          thumb2_emit_multi_reg_pop (saved_regs_mask, true);
+        }
+    }
+  else 
+    {
+      emit_jump_insn (ret_rtx);
+    }
+}
+
+/* Generate RTL to represent a Thumb-2 epilogue.
+
+   Note that this RTL does not include the
+   Return insn, which is created separately and
+   handled in thumb2_output_return.  */
+void
+thumb2_expand_epilogue (void)
+{
+  HOST_WIDE_INT amount;
+  int reg;
+  unsigned long saved_regs_mask;
+  unsigned long func_type;
+  int i;
+  arm_stack_offsets *offsets;
+  int num_regs = 0;
+  bool really_return = false;
+
+  func_type = arm_current_func_type ();
+
+  /* Naked functions don't have epilogues.  */
+  if (IS_NAKED (func_type) 
+      || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
+    {
+      emit_jump_insn (ret_rtx);
+      return;
+    }
+
+  /* At the end of the code of a function, the stack pointer will
+     be pointing at the outgoing args area, so we first need to
+     get it to point at the saved_regs area.  */
+
+  /* Determine how much to add to the stack pointer.  */
+  offsets = arm_get_frame_offsets ();
+  saved_regs_mask = offsets->saved_regs_mask;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  /* In Thumb-2 mode, the frame pointer points to the last
+     saved register.  */
+  amount = offsets->outgoing_args - offsets->saved_regs;
+
+  if (frame_pointer_needed)
+    {
+      emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+      amount = offsets->locals_base - offsets->saved_regs;
+    }
+
+  gcc_assert (amount >= 0);
+  if (amount)
+    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, 
+                           GEN_INT (amount)));
+
+  /* Emit a USE (stack_pointer_rtx), so that
+     the stack adjustment will not be deleted.  */
+  emit_insn (gen_prologue_use (stack_pointer_rtx));
+
+  /* Now handle any VFP restoration.  */
+  if (TARGET_HARD_FLOAT && TARGET_VFP)
+    {
+      int end_reg = LAST_VFP_REGNUM + 1;
+
+      /* Scan the registers in reverse order.  We need to match
+         any groupings made in the prologue and generate matching
+         fldmdd operations.  The need to match groups is because,
+         unlike pop, fldmdd can only do consecutive regs.  */
+      for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
+        /* Look for a case where a reg does not need restoring.  */
+        if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
+            && (!df_regs_ever_live_p (reg + 1)
+                || call_used_regs[reg + 1]))
+          {
+            /* Restore the regs discovered so far (from reg+2 to end_reg).  */
+            if (end_reg > reg + 2)
+              thumb2_emit_vfp_multi_reg_pop (reg + 2,
+                                             (end_reg - (reg + 2)) / 2);
+            end_reg = reg;
+          }
+
+      /* Restore the remaining regs that we have discovered (or possibly
+         even all of them, if the conditional in the for loop never fired).  */
+      if (end_reg > reg + 2)
+        thumb2_emit_vfp_multi_reg_pop (reg + 2, (end_reg - (reg + 2)) / 2);
+    } 
+
+  /* iWMMXt is not supported when Thumb-2 in use.  If it were, we would
+     want to be restoring the appropriate iWMMXt regs here, in a similar
+     way to arm_output_epilogue.  */
+
+  /* If there are registers to restore, make it happen.  */
+  if (saved_regs_mask)
+    {
+      /* It's illegal to do a pop for only one reg, so generate an ldr.  */
+      if (num_regs == 1)
+        {
+          for (i = 0; i <= LAST_ARM_REGNUM; i++)
+            if (saved_regs_mask & (1 << i))
+              {
+                rtx addr = gen_rtx_MEM (SImode, 
+                                        gen_rtx_POST_INC (SImode, 
+                                                          stack_pointer_rtx));
+                set_mem_alias_set (addr, get_frame_alias_set ());
+                emit_insn (gen_movsi (gen_rtx_REG (SImode, i), addr));
+              }
+        }
+
+      /* Two or more regs warrants the use of a multi-reg pop.  */
+      else 
+        {
+          /* If multi-pop is last instruction, don't generate `branch to 
+             return-address' instruction.  Instead, pop LR in PC.  */
+          if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+              && !IS_STACKALIGN (func_type)
+              && crtl->args.pretend_args_size == 0
+              && saved_regs_mask & (1 << LR_REGNUM)
+              && !crtl->calls_eh_return)
+            {
+              saved_regs_mask &= ~ (1 << LR_REGNUM);
+              saved_regs_mask |=   (1 << PC_REGNUM);
+              really_return = true;
+            }
+
+          thumb2_emit_multi_reg_pop (saved_regs_mask, really_return);
+          if (really_return == true)
+            return;
+        }
+    }
+
+  /* Unwind the pre-pushed regs.  */
+  if (crtl->args.pretend_args_size)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+                           stack_pointer_rtx,
+                           GEN_INT (crtl->args.pretend_args_size)));
+
+  /* Stack adjustment for exception handler.  */
+  if (crtl->calls_eh_return)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+                           stack_pointer_rtx,
+                           gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
+
+  emit_jump_insn (ret_rtx);
+}
+
+
+/* Generate the appropriate instruction to return for Thumb-2.
+   OPERAND holds a condition, which must be passed to output_asm_insn.  */
+void
+thumb2_output_return (rtx operand)
+{
+  char instr[100];
+  unsigned long func_type;
+
+  func_type = arm_current_func_type ();
+
+  if (IS_NAKED (func_type))
+    /* Do nothing if naked function.  */
+    return;
+
+  if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
+    {
+      rtx op;
+      
+      /* A volatile function should never return.  Call abort.  */
+      op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
+      assemble_external_libcall (op);
+      output_asm_insn ("bl\t%a0", &op);
+      
+      return;
+    }
+
+  switch ((int) ARM_FUNC_TYPE (func_type))
+    {
+    case ARM_FT_ISR:
+    case ARM_FT_FIQ:
+      sprintf (instr, "subs\t%%|pc, %%|lr, #4");
+      break;
+      
+    case ARM_FT_EXCEPTION:
+      sprintf (instr, "movs\t%%|pc, %%|lr");
+      break;
+      
+    default:
+      sprintf (instr, "bx\t%%|lr");
+      break;
+    }
+  
+  output_asm_insn (instr, &operand);
+}
+
 /* Implementation of insn prologue_thumb1_interwork.  This is the first
    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
-
 const char *
 thumb1_output_interwork (void)
 {
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 0f23400..01ac7ca 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -6433,6 +6433,140 @@
 			     FALSE, operands[1], &offset);
 })
 
+;; Pop (as used in epilogue RTL)
+;;
+;; This should really be in thumb2.md, but it needs to live above
+;; the ldmsi patterns, so that it matches before them.
+;; Furthermore, there is no reason why it could not be extended
+;; to support Thumb-1 and ARM at a later date (whereupon it would
+;; fully deserve its spot in this file).
+(define_insn "*pop_multiple_with_stack_update"
+  [(match_parallel 0 "load_multiple_operation_stack"
+    [(set (match_operand:SI 1 "s_register_operand" "=k")
+          (plus:SI (match_operand:SI 2 "s_register_operand" "1")
+                   (match_operand:SI 3 "const_int_operand" "I")))
+        ])]
+  "TARGET_THUMB2"
+  "*
+  {
+    int i;
+    char pattern[100];
+    int num_saves = XVECLEN (operands[0], 0);
+
+    strcpy (pattern, \"pop\\t{\");
+    strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))]);
+
+    /* Skip over the first element and the one we just generated.  */
+    for (i = 2; i < (num_saves); i++)
+      {
+        strcat (pattern, \", %|\");
+        strcat (pattern,
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, operands);
+
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")]
+)
+
+;; Pop with return (as used in epilogue RTL)                                      
+;;
+;; This instruction is generated when the registers are popped at end of 
+;; epilogue.  Here, instead of popping the value in LR and then generating 
+;; jump to LR, value is popped in PC.  Hence, the pattern is combined with 
+;; (return).
+(define_insn "*pop_multiple_with_stack_update_and_return"                   
+  [(match_parallel 0 "load_multiple_operation_stack_and_return" 
+    [(return)
+     (set (match_operand:SI 1 "s_register_operand" "=k")
+          (plus:SI (match_operand:SI 2 "s_register_operand" "1")     
+                   (match_operand:SI 3 "const_int_operand" "I")))    
+        ])]
+  "TARGET_THUMB2"
+  "*   
+  {         
+    int i;                         
+    char pattern[100];
+    int num_saves = XVECLEN (operands[0], 0);        
+
+    strcpy (pattern, \"pop\\t{\"); 
+    strcat (pattern,  
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 2), 0))]);                         
+
+    /* Skip over the first two elements and the one we just generated.  */                         
+    for (i = 3; i < (num_saves); i++)
+      {
+        strcat (pattern, \", %|\");                                                                
+        strcat (pattern,                                                                           
+                reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);                         
+      }
+
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, operands);
+                                                                                                       
+    return \"\";
+  }
+  "                                                                                                    
+  [(set_attr "type" "load4")]
+)
+
+(define_insn "*thumb2_ldr_with_return"
+  [(return)
+   (set (reg:SI PC_REGNUM)
+        (mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand" "k"))))]
+  "TARGET_THUMB2"
+  "ldr%?\t%|pc, [%0], #4"
+  [(set_attr "type" "load1")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*floating_point_pop_multiple_with_stack_update"
+  [(match_parallel 0 "load_multiple_operation_stack_fp"
+    [(set (match_operand:SI 1 "s_register_operand" "=k")
+          (plus:SI (match_operand:SI 2 "s_register_operand" "1")
+                   (match_operand:SI 3 "const_int_operand" "I")))
+     (set (match_operand:DF 4 "arm_hard_register_operand" "")
+          (mem:DF (match_dup 2)))])]
+  "TARGET_THUMB2"
+  "*
+  {
+    int num_regs = XVECLEN (operands[0], 0);
+    static const struct { const char *const name; } table[]
+                  = { {\"d0\"}, {\"d1\"}, {\"d2\"}, {\"d3\"},
+                      {\"d4\"}, {\"d5\"}, {\"d6\"}, {\"d7\"},
+                      {\"d8\"}, {\"d9\"}, {\"d10\"}, {\"d11\"},
+                      {\"d12\"}, {\"d13\"}, {\"d14\"}, {\"d15\"},
+                      {\"d16\"}, {\"d17\"}, {\"d18\"}, {\"d19\"},
+                      {\"d20\"}, {\"d21\"}, {\"d22\"}, {\"d23\"},
+                      {\"d24\"}, {\"d25\"}, {\"d26\"}, {\"d27\"},
+                      {\"d28\"}, {\"d29\"}, {\"d30\"}, {\"d31\"} };
+    int i;
+    char pattern[100];
+    strcpy (pattern, \"fldmfdd\\t\");
+    strcat (pattern,
+                    reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0, 0)))]);
+    strcat (pattern, \"!, {\");
+    strcat (pattern, table[(REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))
+                           - FIRST_VFP_REGNUM) / 2].name);
+    for (i = 2; i < num_regs; i++)
+      {
+        strcat (pattern, \", %|\");
+        strcat (pattern, table[(REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))
+                               - FIRST_VFP_REGNUM) / 2].name);
+      }
+    strcat (pattern, \"}\");
+    output_asm_insn (pattern, operands);
+    return \"\";
+  }
+  "
+  [(set_attr "type" "load4")]
+)
+
 (define_expand "store_multiple"
   [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
                           (match_operand:SI 1 "" ""))
@@ -8250,8 +8384,19 @@
 
 (define_expand "return"
   [(return)]
-  "TARGET_32BIT && USE_RETURN_INSN (FALSE)"
-  "")
+  "(TARGET_ARM || (TARGET_THUMB2 
+                   && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL 
+                   && !IS_STACKALIGN (arm_current_func_type ()))) 
+    && USE_RETURN_INSN (FALSE)"
+  "
+  {
+    if (TARGET_THUMB2)
+      {
+        thumb2_expand_return ();
+        DONE;
+      }
+  }
+  ")
 
 ;; Often the return insn will be the same as loading from memory, so set attr
 (define_insn "*arm_return"
@@ -10110,6 +10255,11 @@
     emit_insn (gen_prologue_use (gen_rtx_REG (Pmode, 2)));
   if (TARGET_THUMB1)
     thumb1_expand_epilogue ();
+  else if (TARGET_THUMB2)
+  {
+    thumb2_expand_epilogue ();
+    DONE;
+  }
   else if (USE_RETURN_INSN (FALSE))
     {
       emit_jump_insn (gen_return ());
@@ -10153,12 +10303,12 @@
 
 (define_insn "*epilogue_insns"
   [(unspec_volatile [(return)] VUNSPEC_EPILOGUE)]
-  "TARGET_EITHER"
+  "TARGET_ARM || TARGET_THUMB1"
   "*
   if (TARGET_32BIT)
     return arm_output_epilogue (NULL);
   else /* TARGET_THUMB1 */
-    return thumb_unexpanded_epilogue ();
+    return thumb1_unexpanded_epilogue ();
   "
   ; Length is absolute worst case
   [(set_attr "length" "44")
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index cfe8d33..4704120 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -350,88 +350,36 @@
        (ior (match_operand 0 "power_of_two_operand")
 	    (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) < 32"))))
 
-
 (define_special_predicate "load_multiple_operation"
   (match_code "parallel")
 {
-  HOST_WIDE_INT count = XVECLEN (op, 0);
-  unsigned dest_regno;
-  rtx src_addr;
-  HOST_WIDE_INT i = 1, base = 0;
-  HOST_WIDE_INT offset = 0;
-  rtx elt;
-  bool addr_reg_loaded = false;
-  bool update = false;
-
-  if (count <= 1
-      || GET_CODE (XVECEXP (op, 0, 0)) != SET
-      || !REG_P (SET_DEST (XVECEXP (op, 0, 0))))
-    return false;
-
-  /* Check to see if this might be a write-back.  */
-  if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, 0))) == PLUS)
-    {
-      i++;
-      base = 1;
-      update = true;
-
-      /* Now check it more carefully.  */
-      if (GET_CODE (SET_DEST (elt)) != REG
-          || GET_CODE (XEXP (SET_SRC (elt), 0)) != REG
-          || GET_CODE (XEXP (SET_SRC (elt), 1)) != CONST_INT
-          || INTVAL (XEXP (SET_SRC (elt), 1)) != (count - 1) * 4)
-        return false;
-    }
-
-  /* Perform a quick check so we don't blow up below.  */
-  if (count <= i
-      || GET_CODE (XVECEXP (op, 0, i - 1)) != SET
-      || GET_CODE (SET_DEST (XVECEXP (op, 0, i - 1))) != REG
-      || GET_CODE (SET_SRC (XVECEXP (op, 0, i - 1))) != MEM)
-    return false;
+  return load_multiple_operation_p (op, /*consecutive=*/false,
+                                    SImode, /*stack_only=*/false, 
+                                    /*return_pc*/false);
+})
 
-  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1)));
-  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0);
-  if (GET_CODE (src_addr) == PLUS)
-    {
-      if (GET_CODE (XEXP (src_addr, 1)) != CONST_INT)
-	return false;
-      offset = INTVAL (XEXP (src_addr, 1));
-      src_addr = XEXP (src_addr, 0);
-    }
-  if (!REG_P (src_addr))
-    return false;
+(define_special_predicate "load_multiple_operation_stack_and_return"
+  (match_code "parallel")
+{
+  return load_multiple_operation_p (op, /*consecutive=*/false,
+                                    SImode, /*stack_only=*/true, 
+                                    /*return_pc*/true);
+})
 
-  for (; i < count; i++)
-    {
-      elt = XVECEXP (op, 0, i);
+(define_special_predicate "load_multiple_operation_stack"
+  (match_code "parallel")
+{
+  return load_multiple_operation_p (op, /*consecutive=*/false,
+                                    SImode, /*stack_only=*/true, 
+                                    /*return_pc*/false);
+})
 
-      if (GET_CODE (elt) != SET
-          || GET_CODE (SET_DEST (elt)) != REG
-          || GET_MODE (SET_DEST (elt)) != SImode
-          || REGNO (SET_DEST (elt)) <= dest_regno
-          || GET_CODE (SET_SRC (elt)) != MEM
-          || GET_MODE (SET_SRC (elt)) != SImode
-          || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
-	       || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
-	       || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT
-	       || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != offset + (i - base) * 4)
-	      && (!REG_P (XEXP (SET_SRC (elt), 0))
-		  || offset + (i - base) * 4 != 0)))
-        return false;
-      dest_regno = REGNO (SET_DEST (elt));
-      if (dest_regno == REGNO (src_addr))
-        addr_reg_loaded = true;
-    }
-  /* For Thumb, we only have updating instructions.  If the pattern does
-     not describe an update, it must be because the address register is
-     in the list of loaded registers - on the hardware, this has the effect
-     of overriding the update.  */
-  if (update && addr_reg_loaded)
-    return false;
-  if (TARGET_THUMB1)
-    return update || addr_reg_loaded;
-  return true;
+(define_special_predicate "load_multiple_operation_stack_fp"
+  (match_code "parallel")
+{
+  return load_multiple_operation_p (op, /*consecutive=*/true,
+                                    DFmode, /*stack_only=*/true, 
+                                    /*return_pc*/false);
 })
 
 (define_special_predicate "store_multiple_operation"
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 9a11012..661ffa2 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -635,17 +635,18 @@
    (set_attr "length" "20")]
 )
 
-;; Note: this is not predicable, to avoid issues with linker-generated
-;; interworking stubs.
-(define_insn "*thumb2_return"
+(define_insn "*thumb2_rtl_epilogue_return"
   [(return)]
-  "TARGET_THUMB2 && USE_RETURN_INSN (FALSE)"
+  "(TARGET_THUMB2)"
   "*
   {
-    return output_return_instruction (const_true_rtx, TRUE, FALSE);
+    thumb2_output_return (const_true_rtx);
+    return \"\";
   }"
-  [(set_attr "type" "load1")
-   (set_attr "length" "12")]
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")
+   (set_attr "conds" "unconditional")]
 )
 
 (define_insn_and_split "thumb2_eh_return"

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2011-12-09 11:10 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <4e83484c.03c7640a.2591.10bdSMTPIN_ADDED@mx.google.com>
2011-10-21 12:52 ` [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL Ramana Radhakrishnan
2011-11-07  9:49   ` Sameera Deshpande
2011-11-07 10:07     ` Paul Brook
2011-11-07 17:32       ` Sameera Deshpande
2011-09-28 17:29 Sameera Deshpande
2011-11-10 14:28 ` Richard Earnshaw
2011-11-10 15:28   ` Sameera Deshpande
2011-11-10 19:07   ` Sameera Deshpande
2011-11-19  0:01     ` Ramana Radhakrishnan
2011-11-22  5:14       ` Xinyu Qi
2011-11-22 12:16         ` Sameera Deshpande
2011-11-22 13:07           ` Ramana Radhakrishnan
2011-11-23 10:55             ` Xinyu Qi
2011-12-01 11:50             ` Sameera Deshpande
2011-12-09 11:10               ` Ramana Radhakrishnan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).