public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* unwind info for epilogues
@ 2009-05-06 20:21 Richard Henderson
  2009-05-06 20:32 ` Joseph S. Myers
  2009-05-20  0:49 ` Ian Lance Taylor
  0 siblings, 2 replies; 26+ messages in thread
From: Richard Henderson @ 2009-05-06 20:21 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1137 bytes --]

Since the beginning, -fasynchronous-unwind-tables has not held correct 
information for function epilogues.  This is an attempt at adding that.

In developing this patch, I tried several different approaches before 
settling on this.  In particular, a code interpretation scheme such as
we use for prologues defeated me.  So in the end I've added enough REG 
notes to let the port maintainer to effectively control the dwarf2 
output directly.  Frankly, I'm considering converting the prologue to 
use the same notes and scrapping the existing interpretation code, which 
is extraordinarily complex.  And incorrect in the middle of the prologue 
for the stack realignment case (the state at the end of the prologue is 
correct, but one intermediate state isn't).

Since this does require changes from port maintainers to their epilogue 
generation code, this currently only has any effect for x86, but should 
not actively fail for other targets.  Although I'd appreaciate folks 
checking that, since I do fiddle around with NOTE_INSN_EPILOGUE_BEG.

I'm on vacation for the next week; I'll have a look at comments when I 
get back.


r~

[-- Attachment #2: d-epilogue-1 --]
[-- Type: text/plain, Size: 44286 bytes --]

--- cfgcleanup.c	(revision 147209)
+++ cfgcleanup.c	(local)
@@ -1672,8 +1672,7 @@ try_crossjump_to_edge (int mode, edge e1
   /* Skip possible basic block header.  */
   if (LABEL_P (newpos1))
     newpos1 = NEXT_INSN (newpos1);
-
-  if (NOTE_P (newpos1))
+  if (NOTE_INSN_BASIC_BLOCK_P (newpos1))
     newpos1 = NEXT_INSN (newpos1);
 
   redirect_from = split_block (src1, PREV_INSN (newpos1))->src;
--- cfglayout.c	(revision 147209)
+++ cfglayout.c	(local)
@@ -1153,23 +1153,19 @@ duplicate_insn_chain (rtx from, rtx to)
 	    case NOTE_INSN_DELETED:
 	    case NOTE_INSN_DELETED_LABEL:
 	      /* No problem to strip these.  */
-	    case NOTE_INSN_EPILOGUE_BEG:
-	      /* Debug code expect these notes to exist just once.
-		 Keep them in the master copy.
-		 ??? It probably makes more sense to duplicate them for each
-		 epilogue copy.  */
 	    case NOTE_INSN_FUNCTION_BEG:
 	      /* There is always just single entry to function.  */
 	    case NOTE_INSN_BASIC_BLOCK:
 	      break;
 
+	    case NOTE_INSN_EPILOGUE_BEG:
+	    case NOTE_INSN_SIBCALL_BEG:
 	    case NOTE_INSN_SWITCH_TEXT_SECTIONS:
 	      emit_note_copy (insn);
 	      break;
 
 	    default:
-	      /* All other notes should have already been eliminated.
-	       */
+	      /* All other notes should have already been eliminated.  */
 	      gcc_unreachable ();
 	    }
 	  break;
--- cfgrtl.c	(revision 147209)
+++ cfgrtl.c	(local)
@@ -86,8 +86,17 @@ static void rtl_make_forwarder_block (ed
 static int
 can_delete_note_p (const_rtx note)
 {
-  return (NOTE_KIND (note) == NOTE_INSN_DELETED
-	  || NOTE_KIND (note) == NOTE_INSN_BASIC_BLOCK);
+  switch (NOTE_KIND (note))
+    {
+    case NOTE_INSN_DELETED:
+    case NOTE_INSN_BASIC_BLOCK:
+    case NOTE_INSN_EPILOGUE_BEG:
+    case NOTE_INSN_SIBCALL_BEG:
+      return true;
+
+    default:
+      return false;
+    }
 }
 
 /* True if a given label can be deleted.  */
--- config/i386/i386.c	(revision 147209)
+++ config/i386/i386.c	(local)
@@ -7587,6 +7587,9 @@ output_set_got (rtx dest, rtx label ATTR
 static rtx
 gen_push (rtx arg)
 {
+  if (ix86_cfa_state->reg == stack_pointer_rtx)
+    ix86_cfa_state->offset += UNITS_PER_WORD;
+
   return gen_rtx_SET (VOIDmode,
 		      gen_rtx_MEM (Pmode,
 				   gen_rtx_PRE_DEC (Pmode,
@@ -7646,8 +7649,7 @@ ix86_save_reg (unsigned int regno, int m
 	}
     }
 
-  if (crtl->drap_reg
-      && regno == REGNO (crtl->drap_reg))
+  if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
     return 1;
 
   return (df_regs_ever_live_p (regno)
@@ -7983,7 +7985,8 @@ ix86_emit_save_sse_regs_using_mov (rtx p
    otherwise.  */
 
 static void
-pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
+pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
+			   int style, bool set_cfa)
 {
   rtx insn;
 
@@ -8001,13 +8004,23 @@ pro_epilogue_adjust_stack (rtx dest, rtx
       gcc_assert (style);
       r11 = gen_rtx_REG (DImode, R11_REG);
       insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
-      if (style < 0)
-	RTX_FRAME_RELATED_P (insn) = 1;
       insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
 							       offset));
     }
-  if (style < 0)
-    RTX_FRAME_RELATED_P (insn) = 1;
+
+  if (set_cfa)
+    {
+      rtx r;
+
+      gcc_assert (ix86_cfa_state->reg == src);
+      ix86_cfa_state->offset += INTVAL (offset);
+      ix86_cfa_state->reg = dest;
+    
+      r = gen_rtx_PLUS (Pmode, src, offset);
+      r = gen_rtx_SET (VOIDmode, dest, r);
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
 }
 
 /* Find an available register to be used as dynamic realign argument
@@ -8142,30 +8155,6 @@ ix86_internal_arg_pointer (void)
   return virtual_incoming_args_rtx;
 }
 
-/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
-   This is called from dwarf2out.c to emit call frame instructions
-   for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
-static void
-ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
-{
-  rtx unspec = SET_SRC (pattern);
-  gcc_assert (GET_CODE (unspec) == UNSPEC);
-
-  switch (index)
-    {
-    case UNSPEC_REG_SAVE:
-      dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
-			      SET_DEST (pattern));
-      break;
-    case UNSPEC_DEF_CFA:
-      dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
-			 INTVAL (XVECEXP (unspec, 0, 0)));
-      break;
-    default:
-      gcc_unreachable ();
-    }
-}
-
 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
    to be generated in correct form.  */
 static void 
@@ -8209,6 +8198,10 @@ ix86_expand_prologue (void)
   /* DRAP should not coexist with stack_realign_fp */
   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
 
+  /* Initialize CFA state for before the prologue.  */
+  ix86_cfa_state->reg = stack_pointer_rtx;
+  ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
+
   ix86_compute_frame_layout (&frame);
 
   /* Emit prologue code to adjust stack alignment and setup DRAP, in case
@@ -8238,6 +8231,7 @@ ix86_expand_prologue (void)
 
       insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
       RTX_FRAME_RELATED_P (insn) = 1; 
+      ix86_cfa_state->reg = crtl->drap_reg;
 
       /* Align the stack.  */
       insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
@@ -8266,6 +8260,9 @@ ix86_expand_prologue (void)
 
       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
       RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (ix86_cfa_state->reg == stack_pointer_rtx)
+        ix86_cfa_state->reg = hard_frame_pointer_rtx;
     }
 
   if (stack_realign_fp)
@@ -8304,7 +8301,8 @@ ix86_expand_prologue (void)
     ;
   else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
     pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
-			       GEN_INT (-allocate), -1);
+			       GEN_INT (-allocate), -1,
+			       ix86_cfa_state->reg == stack_pointer_rtx);
   else
     {
       /* Only valid for Win32.  */
@@ -8332,10 +8330,15 @@ ix86_expand_prologue (void)
       else
 	insn = gen_allocate_stack_worker_32 (eax, eax);
       insn = emit_insn (insn);
-      RTX_FRAME_RELATED_P (insn) = 1;
-      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
-      t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
-      add_reg_note (insn, REG_FRAME_RELATED_EXPR, t);
+
+      if (ix86_cfa_state->reg == stack_pointer_rtx)
+	{
+	  ix86_cfa_state->offset += allocate;
+	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
+	  t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
+	  add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
 
       if (eax_live)
 	{
@@ -8442,18 +8445,96 @@ ix86_expand_prologue (void)
     emit_insn (gen_cld ());
 }
 
+/* Emit code to restore REG using a POP insn.  */
+
+static void
+ix86_emit_restore_reg_using_pop (rtx reg)
+{
+  rtx insn = emit_insn (ix86_gen_pop1 (reg));
+
+  if (ix86_cfa_state->reg == crtl->drap_reg
+      && REGNO (reg) == REGNO (crtl->drap_reg))
+    {
+      /* Previously we'd represented the CFA as an expression
+	 like *(%ebp - 8).  We've just popped that value from
+	 the stack, which means we need to reset the CFA to
+	 the drap register.  This will remain until we restore
+	 the stack pointer.  */
+      add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      return;
+    }
+
+  if (ix86_cfa_state->reg == stack_pointer_rtx)
+    {
+      ix86_cfa_state->offset -= UNITS_PER_WORD;
+      add_reg_note (insn, REG_CFA_ADJUST_CFA,
+		    copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
+    }
+
+  /* When the frame pointer is the CFA, and we pop it, we are
+     swapping back to the stack pointer as the CFA.  This happens
+     for stack frames that don't allocate other data, so we assume
+     the stack pointer is now pointing at the return address, i.e.
+     the function entry state, which makes the offset be 1 word.  */
+  else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
+	   && reg == hard_frame_pointer_rtx)
+    {
+      ix86_cfa_state->reg = stack_pointer_rtx;
+      ix86_cfa_state->offset = UNITS_PER_WORD;
+
+      add_reg_note (insn, REG_CFA_DEF_CFA,
+		    gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+				  GEN_INT (UNITS_PER_WORD)));
+    }
+
+  add_reg_note (insn, REG_CFA_RESTORE, reg);
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* Emit code to restore saved registers using POP insns.  */
+
+static void
+ix86_emit_restore_regs_using_pop (void)
+{
+  int regno;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
+      ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
+}
+
+/* Emit code and notes for the LEAVE instruction.  */
+
+static void
+ix86_emit_leave (void)
+{
+  rtx insn = emit_insn (ix86_gen_leave ());
+
+  if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
+    {
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, 
+		    copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
+      add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+}
+
 /* Emit code to restore saved registers using MOV insns.  First register
    is restored from POINTER + OFFSET.  */
 static void
 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
 				  int maybe_eh_return)
 {
-  int regno;
+  unsigned int regno;
   rtx base_address = gen_rtx_MEM (Pmode, pointer);
+  rtx insn;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
       {
+	rtx reg = gen_rtx_REG (Pmode, regno);
+
 	/* Ensure that adjust_address won't be forced to produce pointer
 	   out of range allowed by x86-64 instruction set.  */
 	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
@@ -8466,9 +8547,23 @@ ix86_emit_restore_regs_using_mov (rtx po
 	    base_address = gen_rtx_MEM (Pmode, r11);
 	    offset = 0;
 	  }
-	emit_move_insn (gen_rtx_REG (Pmode, regno),
-	                adjust_address (base_address, Pmode, offset));
+	insn = emit_move_insn (reg,
+			       adjust_address (base_address, Pmode, offset));
 	offset += UNITS_PER_WORD;
+
+        if (ix86_cfa_state->reg == crtl->drap_reg
+	    && regno == REGNO (crtl->drap_reg))
+	  {
+	    /* Previously we'd represented the CFA as an expression
+	       like *(%ebp - 8).  We've just popped that value from
+	       the stack, which means we need to reset the CFA to
+	       the drap register.  This will remain until we restore
+	       the stack pointer.  */
+	    add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+	  }
+	else
+	  add_reg_note (insn, REG_CFA_RESTORE, reg);
+	RTX_FRAME_RELATED_P (insn) = 1;
       }
 }
 
@@ -8480,11 +8575,13 @@ ix86_emit_restore_sse_regs_using_mov (rt
 {
   int regno;
   rtx base_address = gen_rtx_MEM (TImode, pointer);
-  rtx mem;
+  rtx mem, insn;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
       {
+	rtx reg = gen_rtx_REG (TImode, regno);
+
 	/* Ensure that adjust_address won't be forced to produce pointer
 	   out of range allowed by x86-64 instruction set.  */
 	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
@@ -8499,8 +8596,11 @@ ix86_emit_restore_sse_regs_using_mov (rt
 	  }
 	mem = adjust_address (base_address, TImode, offset);
 	set_mem_align (mem, 128);
-	emit_move_insn (gen_rtx_REG (TImode, regno), mem);
+	insn = emit_move_insn (reg, mem);
 	offset += 16;
+
+	add_reg_note (insn, REG_CFA_RESTORE, reg);
+	RTX_FRAME_RELATED_P (insn) = 1;
       }
 }
 
@@ -8509,10 +8609,11 @@ ix86_emit_restore_sse_regs_using_mov (rt
 void
 ix86_expand_epilogue (int style)
 {
-  int regno;
   int sp_valid;
   struct ix86_frame frame;
   HOST_WIDE_INT offset;
+  struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
+  bool using_drap;
 
   ix86_finalize_stack_realign_flags ();
 
@@ -8538,6 +8639,9 @@ ix86_expand_epilogue (int style)
   offset *= -UNITS_PER_WORD;
   offset -= frame.nsseregs * 16 + frame.padding0;
 
+  using_drap = crtl->drap_reg && crtl->stack_realign_needed;
+  gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
+
   /* If we're only restoring one register and sp is not valid then
      using a move instruction to restore the register since it's
      less work than reloading sp and popping the register.
@@ -8552,7 +8656,8 @@ ix86_expand_epilogue (int style)
       || (TARGET_EPILOGUE_USING_MOVE
 	  && cfun->machine->use_fast_prologue_epilogue
 	  && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
-      || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
+      || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
+	  && frame.to_allocate)
       || (frame_pointer_needed && TARGET_USE_LEAVE
 	  && cfun->machine->use_fast_prologue_epilogue
 	  && (frame.nregs + frame.nsseregs) == 1)
@@ -8606,7 +8711,7 @@ ix86_expand_epilogue (int style)
 	      emit_move_insn (hard_frame_pointer_rtx, tmp);
 
 	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
-					 const0_rtx, style);
+					 const0_rtx, style, false);
 	    }
 	  else
 	    {
@@ -8624,18 +8729,18 @@ ix86_expand_epilogue (int style)
 					    + frame.nregs * UNITS_PER_WORD
 					    + frame.nsseregs * 16
 					    + frame.padding0),
-				   style);
+				   style, !using_drap);
       /* If not an i386, mov & pop is faster than "leave".  */
       else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
 	       || !cfun->machine->use_fast_prologue_epilogue)
-	emit_insn ((*ix86_gen_leave) ());
+	ix86_emit_leave ();
       else
 	{
 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
 				     hard_frame_pointer_rtx,
-				     const0_rtx, style);
+				     const0_rtx, style, !using_drap);
 
-	  emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
+	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
 	}
     }
   else
@@ -8653,11 +8758,12 @@ ix86_expand_epilogue (int style)
           gcc_assert (!stack_realign_fp);
 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
 				     hard_frame_pointer_rtx,
-				     GEN_INT (offset), style);
+				     GEN_INT (offset), style, false);
           ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
 					        frame.to_allocate, style == 2);
 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
-				     GEN_INT (frame.nsseregs * 16), style);
+				     GEN_INT (frame.nsseregs * 16),
+				     style, false);
 	}
       else if (frame.to_allocate || frame.nsseregs)
 	{
@@ -8667,18 +8773,18 @@ ix86_expand_epilogue (int style)
 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
 				     GEN_INT (frame.to_allocate
 				     	      + frame.nsseregs * 16
-					      + frame.padding0), style);
+					      + frame.padding0), style,
+				     !using_drap && !frame_pointer_needed);
 	}
 
-      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-	if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
-	  emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
+      ix86_emit_restore_regs_using_pop ();
+
       if (frame_pointer_needed)
 	{
 	  /* Leave results in shorter dependency chains on CPUs that are
 	     able to grok it fast.  */
 	  if (TARGET_USE_LEAVE)
-	    emit_insn ((*ix86_gen_leave) ());
+	    ix86_emit_leave ();
 	  else
             {
               /* For stack realigned really happens, recover stack 
@@ -8687,47 +8793,70 @@ ix86_expand_epilogue (int style)
               if (stack_realign_fp)
 		pro_epilogue_adjust_stack (stack_pointer_rtx,
 					   hard_frame_pointer_rtx,
-					   const0_rtx, style);
-	      emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
+					   const0_rtx, style, !using_drap);
+	      ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
             }
 	}
     }
 
-  if (crtl->drap_reg && crtl->stack_realign_needed)
+  if (using_drap)
     {
       int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
 			      ? 0 : UNITS_PER_WORD);
+      rtx insn;
+
       gcc_assert (stack_realign_drap);
-      emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
-				   crtl->drap_reg,
-				   GEN_INT (-(UNITS_PER_WORD
-					      + param_ptr_offset))));
-      if (!call_used_regs[REGNO (crtl->drap_reg)])
-	emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
-      
+
+      insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
+					  crtl->drap_reg,
+					  GEN_INT (-(UNITS_PER_WORD
+						     + param_ptr_offset))));
+
+      ix86_cfa_state->reg = stack_pointer_rtx;
+      ix86_cfa_state->offset = UNITS_PER_WORD + param_ptr_offset;
+
+      add_reg_note (insn, REG_CFA_DEF_CFA,
+		    gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
+				  GEN_INT (ix86_cfa_state->offset)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (param_ptr_offset)
+	ix86_emit_restore_reg_using_pop (crtl->drap_reg);
     }
 
   /* Sibcall epilogues don't want a return instruction.  */
   if (style == 0)
-    return;
+    {
+      *ix86_cfa_state = cfa_state_save;
+      return;
+    }
 
   if (crtl->args.pops_args && crtl->args.size)
     {
       rtx popc = GEN_INT (crtl->args.pops_args);
 
-      /* i386 can only pop 64K bytes.  If asked to pop more, pop
-	 return address, do explicit add, and jump indirectly to the
-	 caller.  */
+      /* i386 can only pop 64K bytes.  If asked to pop more, pop return
+	 address, do explicit add, and jump indirectly to the caller.  */
 
       if (crtl->args.pops_args >= 65536)
 	{
 	  rtx ecx = gen_rtx_REG (SImode, CX_REG);
+	  rtx insn;
 
 	  /* There is no "pascal" calling convention in any 64bit ABI.  */
 	  gcc_assert (!TARGET_64BIT);
 
-	  emit_insn (gen_popsi1 (ecx));
-	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
+	  insn = emit_insn (gen_popsi1 (ecx));
+	  ix86_cfa_state->offset -= UNITS_PER_WORD;
+
+	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
+			copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
+	  add_reg_note (insn, REG_CFA_REGISTER,
+			gen_rtx_SET (VOIDmode, ecx, pc_rtx));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				     popc, -1, true);
 	  emit_jump_insn (gen_return_indirect_internal (ecx));
 	}
       else
@@ -8735,6 +8864,10 @@ ix86_expand_epilogue (int style)
     }
   else
     emit_jump_insn (gen_return_internal ());
+
+  /* Restore the state back to the state from the prologue,
+     so that it's correct for the next epilogue.  */
+  *ix86_cfa_state = cfa_state_save;
 }
 
 /* Reset from the function's potential modifications.  */
@@ -30203,8 +30336,6 @@ ix86_enum_va_list (int idx, const char *
 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
 #undef TARGET_GET_DRAP_RTX
 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
-#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
-#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
 #undef TARGET_STRICT_ARGUMENT_NAMING
 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
 
--- config/i386/i386.h	(revision 147209)
+++ config/i386/i386.h	(local)
@@ -2392,6 +2392,15 @@ enum ix86_stack_slot
 \f
 #define FASTCALL_PREFIX '@'
 \f
+/* Machine specific CFA tracking during prologue/epilogue generation.  */
+
+#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS)
+struct GTY(()) machine_cfa_state
+{
+  rtx reg;
+  HOST_WIDE_INT offset;
+};
+
 struct GTY(()) machine_function {
   struct stack_local_entry *stack_locals;
   const char *some_ld_name;
@@ -2418,8 +2427,10 @@ struct GTY(()) machine_function {
   int tls_descriptor_call_expanded_p;
   /* This value is used for amd64 targets and specifies the current abi
      to be used. MS_ABI means ms abi. Otherwise SYSV_ABI means sysv abi.  */
-   enum calling_abi call_abi;
+  enum calling_abi call_abi;
+  struct machine_cfa_state cfa;
 };
+#endif
 
 #define ix86_stack_locals (cfun->machine->stack_locals)
 #define ix86_varargs_gpr_size (cfun->machine->varargs_gpr_size)
@@ -2435,6 +2446,7 @@ struct GTY(()) machine_function {
    REG_SP is live.  */
 #define ix86_current_function_calls_tls_descriptor \
   (ix86_tls_descriptor_calls_expanded_in_cfun && df_regs_ever_live_p (SP_REG))
+#define ix86_cfa_state (&cfun->machine->cfa)
 
 /* Control behavior of x86_file_start.  */
 #define X86_FILE_START_VERSION_DIRECTIVE false
--- dwarf2out.c	(revision 147209)
+++ dwarf2out.c	(local)
@@ -247,7 +247,8 @@ typedef struct GTY(()) cfa_loc {
   HOST_WIDE_INT offset;
   HOST_WIDE_INT base_offset;
   unsigned int reg;
-  int indirect;            /* 1 if CFA is accessed via a dereference.  */
+  BOOL_BITFIELD indirect : 1;  /* 1 if CFA is accessed via a dereference.  */
+  BOOL_BITFIELD in_use : 1;    /* 1 if a saved cfa is stored here.  */
 } dw_cfa_location;
 
 /* All call frame descriptions (FDE's) in the GCC generated DWARF
@@ -404,7 +405,7 @@ static const char *dwarf_cfi_name (unsig
 static dw_cfi_ref new_cfi (void);
 static void add_cfi (dw_cfi_ref *, dw_cfi_ref);
 static void add_fde_cfi (const char *, dw_cfi_ref);
-static void lookup_cfa_1 (dw_cfi_ref, dw_cfa_location *);
+static void lookup_cfa_1 (dw_cfi_ref, dw_cfa_location *, dw_cfa_location *);
 static void lookup_cfa (dw_cfa_location *);
 static void reg_save (const char *, unsigned, unsigned, HOST_WIDE_INT);
 #ifdef DWARF2_UNWIND_INFO
@@ -668,7 +669,10 @@ add_cfi (dw_cfi_ref *list_head, dw_cfi_r
 
   /* When DRAP is used, CFA is defined with an expression.  Redefine
      CFA may lead to a different CFA value.   */
-  if (fde && fde->drap_reg != INVALID_REGNUM)
+  /* ??? Of course, this heuristic fails when we're annotating epilogues,
+     because of course we'll always want to redefine the CFA back to the
+     stack pointer on the way out.  Where should we move this check?  */
+  if (0 && fde && fde->drap_reg != INVALID_REGNUM)
     switch (cfi->dw_cfi_opc)
       {
         case DW_CFA_def_cfa_register:
@@ -774,7 +778,7 @@ add_fde_cfi (const char *label, dw_cfi_r
 /* Subroutine of lookup_cfa.  */
 
 static void
-lookup_cfa_1 (dw_cfi_ref cfi, dw_cfa_location *loc)
+lookup_cfa_1 (dw_cfi_ref cfi, dw_cfa_location *loc, dw_cfa_location *remember)
 {
   switch (cfi->dw_cfi_opc)
     {
@@ -793,6 +797,18 @@ lookup_cfa_1 (dw_cfi_ref cfi, dw_cfa_loc
     case DW_CFA_def_cfa_expression:
       get_cfa_from_loc_descr (loc, cfi->dw_cfi_oprnd1.dw_cfi_loc);
       break;
+
+    case DW_CFA_remember_state:
+      gcc_assert (!remember->in_use);
+      *remember = *loc;
+      remember->in_use = 1;
+      break;
+    case DW_CFA_restore_state:
+      gcc_assert (remember->in_use);
+      *loc = *remember;
+      remember->in_use = 0;
+      break;
+
     default:
       break;
     }
@@ -805,19 +821,19 @@ lookup_cfa (dw_cfa_location *loc)
 {
   dw_cfi_ref cfi;
   dw_fde_ref fde;
+  dw_cfa_location remember;
 
+  memset (loc, 0, sizeof (*loc));
   loc->reg = INVALID_REGNUM;
-  loc->offset = 0;
-  loc->indirect = 0;
-  loc->base_offset = 0;
+  remember = *loc;
 
   for (cfi = cie_cfi_head; cfi; cfi = cfi->dw_cfi_next)
-    lookup_cfa_1 (cfi, loc);
+    lookup_cfa_1 (cfi, loc, &remember);
 
   fde = current_fde ();
   if (fde)
     for (cfi = fde->dw_fde_cfi; cfi; cfi = cfi->dw_cfi_next)
-      lookup_cfa_1 (cfi, loc);
+      lookup_cfa_1 (cfi, loc, &remember);
 }
 
 /* The current rule for calculating the DWARF2 canonical frame address.  */
@@ -827,6 +843,9 @@ static dw_cfa_location cfa;
    from the CFA.  */
 static dw_cfa_location cfa_store;
 
+/* The current save location around an epilogue.  */
+static dw_cfa_location cfa_remember;
+
 /* The running total of the size of arguments pushed onto the stack.  */
 static HOST_WIDE_INT args_size;
 
@@ -1660,6 +1679,156 @@ reg_saved_in (rtx reg)
    value, not an offset.  */
 static dw_cfa_location cfa_temp;
 
+/* A subroutine of dwarf2out_frame_debug, process a REG_DEF_CFA note.  */
+
+static void
+dwarf2out_frame_debug_def_cfa (rtx pat, const char *label)
+{
+  memset (&cfa, 0, sizeof (cfa));
+
+  switch (GET_CODE (pat))
+    {
+    case PLUS:
+      cfa.reg = REGNO (XEXP (pat, 0));
+      cfa.offset = INTVAL (XEXP (pat, 1));
+      break;
+
+    case REG:
+      cfa.reg = REGNO (pat);
+      break;
+
+    default:
+      /* Recurse and define an expression.  */
+      gcc_unreachable ();
+    }
+
+  def_cfa_1 (label, &cfa);
+}
+
+/* A subroutine of dwarf2out_frame_debug, process a REG_ADJUST_CFA note.  */
+
+static void
+dwarf2out_frame_debug_adjust_cfa (rtx pat, const char *label)
+{
+  rtx src, dest;
+
+  gcc_assert (GET_CODE (pat) == SET);
+  dest = XEXP (pat, 0);
+  src = XEXP (pat, 1);
+
+  switch (GET_CODE (src))
+    {
+    case PLUS:
+      gcc_assert (REGNO (XEXP (src, 0)) == cfa.reg);
+      cfa.offset -= INTVAL (XEXP (src, 1));
+      break;
+
+    case REG:
+	break;
+
+    default:
+	gcc_unreachable ();
+    }
+
+  cfa.reg = REGNO (dest);
+  gcc_assert (cfa.indirect == 0);
+
+  def_cfa_1 (label, &cfa);
+}
+
+/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_OFFSET note.  */
+
+static void
+dwarf2out_frame_debug_cfa_offset (rtx set, const char *label)
+{
+  HOST_WIDE_INT offset;
+  rtx src, addr, span;
+
+  src = XEXP (set, 1);
+  addr = XEXP (set, 0);
+  gcc_assert (MEM_P (addr));
+  addr = XEXP (addr, 0);
+  
+  /* As documented, only consider extremely simple addresses.  */
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      gcc_assert (REGNO (addr) == cfa.reg);
+      offset = -cfa.offset;
+      break;
+    case PLUS:
+      gcc_assert (REGNO (XEXP (addr, 0)) == cfa.reg);
+      offset = INTVAL (XEXP (addr, 1)) - cfa.offset;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  span = targetm.dwarf_register_span (src);
+
+  /* ??? We'd like to use queue_reg_save, but we need to come up with
+     a different flushing heuristic for epilogues.  */
+  if (!span)
+    reg_save (label, DWARF_FRAME_REGNUM (REGNO (src)), INVALID_REGNUM, offset);
+  else
+    {
+      /* We have a PARALLEL describing where the contents of SRC live.
+   	 Queue register saves for each piece of the PARALLEL.  */
+      int par_index;
+      int limit;
+      HOST_WIDE_INT span_offset = offset;
+
+      gcc_assert (GET_CODE (span) == PARALLEL);
+
+      limit = XVECLEN (span, 0);
+      for (par_index = 0; par_index < limit; par_index++)
+	{
+	  rtx elem = XVECEXP (span, 0, par_index);
+
+	  reg_save (label, DWARF_FRAME_REGNUM (REGNO (elem)),
+		    INVALID_REGNUM, span_offset);
+	  span_offset += GET_MODE_SIZE (GET_MODE (elem));
+	}
+    }
+}
+
+/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_REGISTER note.  */
+
+static void
+dwarf2out_frame_debug_cfa_register (rtx set, const char *label)
+{
+  rtx src, dest;
+  unsigned sregno, dregno;
+
+  src = XEXP (set, 1);
+  dest = XEXP (set, 0);
+
+  if (src == pc_rtx)
+    sregno = DWARF_FRAME_RETURN_COLUMN;
+  else
+    sregno = DWARF_FRAME_REGNUM (REGNO (src));
+
+  dregno = DWARF_FRAME_REGNUM (REGNO (dest));
+
+  /* ??? We'd like to use queue_reg_save, but we need to come up with
+     a different flushing heuristic for epilogues.  */
+  reg_save (label, sregno, dregno, 0);
+}
+
+/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_RESTORE note.  */
+
+static void
+dwarf2out_frame_debug_cfa_restore (rtx reg, const char *label)
+{
+  dw_cfi_ref cfi = new_cfi ();
+  unsigned int regno = DWARF_FRAME_REGNUM (REGNO (reg));
+
+  cfi->dw_cfi_opc = (regno & ~0x3f ? DW_CFA_restore_extended : DW_CFA_restore);
+  cfi->dw_cfi_oprnd1.dw_cfi_reg_num = regno;
+
+  add_fde_cfi (label, cfi);
+}
+
 /* Record call frame debugging information for an expression EXPR,
    which either sets SP or FP (adjusting how we calculate the frame
    address) or saves a register to the stack or another register.
@@ -2367,7 +2536,8 @@ void
 dwarf2out_frame_debug (rtx insn, bool after_p)
 {
   const char *label;
-  rtx src;
+  rtx note, n;
+  bool handled_one = false;
 
   if (insn == NULL_RTX)
     {
@@ -2412,15 +2582,160 @@ dwarf2out_frame_debug (rtx insn, bool af
     }
 
   label = dwarf2out_cfi_label ();
-  src = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
-  if (src)
-    insn = XEXP (src, 0);
-  else
-    insn = PATTERN (insn);
 
+  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
+    switch (REG_NOTE_KIND (note))
+      {
+      case REG_FRAME_RELATED_EXPR:
+	insn = XEXP (note, 0);
+	goto found;
+
+      case REG_CFA_DEF_CFA:
+	dwarf2out_frame_debug_def_cfa (XEXP (note, 0), label);
+	handled_one = true;
+	break;
+
+      case REG_CFA_ADJUST_CFA:
+	n = XEXP (note, 0);
+	if (n == NULL)
+	  {
+	    n = PATTERN (insn);
+	    if (GET_CODE (n) == PARALLEL)
+	      n = XVECEXP (n, 0, 0);
+	  }
+	dwarf2out_frame_debug_adjust_cfa (n, label);
+	handled_one = true;
+	break;
+
+      case REG_CFA_OFFSET:
+	n = XEXP (note, 0);
+	if (n == NULL)
+	  n = single_set (insn);
+	dwarf2out_frame_debug_cfa_offset (n, label);
+	handled_one = true;
+	break;
+
+      case REG_CFA_REGISTER:
+	n = XEXP (note, 0);
+	if (n == NULL)
+	  {
+	    n = PATTERN (insn);
+	    if (GET_CODE (n) == PARALLEL)
+	      n = XVECEXP (n, 0, 0);
+	  }
+	dwarf2out_frame_debug_cfa_register (n, label);
+	handled_one = true;
+	break;
+
+      case REG_CFA_RESTORE:
+	n = XEXP (note, 0);
+	if (n == NULL)
+	  {
+	    n = PATTERN (insn);
+	    if (GET_CODE (n) == PARALLEL)
+	      n = XVECEXP (n, 0, 0);
+	    n = XEXP (n, 0);
+	  }
+	dwarf2out_frame_debug_cfa_restore (n, label);
+	handled_one = true;
+	break;
+
+      default:
+	break;
+      }
+  if (handled_one)
+    return;
+
+  insn = PATTERN (insn);
+ found:
   dwarf2out_frame_debug_expr (insn, label);
 }
 
+/* Determine if we need to save and restore CFI information around this
+   epilogue.  If SIBCALL is true, then this is a sibcall epilogue.  If
+   we do need to save/restore, then emit the save now, and insert a
+   NOTE_INSN_CFA_RESTORE_STATE at the appropriate place in the stream.  */
+
+void
+dwarf2out_begin_epilogue (rtx insn, bool sibcall)
+{
+  bool saw_frp = false;
+  rtx i;
+  dw_cfi_ref cfi;
+
+  /* Scan forward to the return insn, noticing if there are possible
+     frame related insns.  */
+  for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
+    {
+      if (!INSN_P (i))
+	continue;
+
+      if (sibcall)
+	{
+	  if (CALL_P (i) && SIBLING_CALL_P (i))
+	    break;
+	}
+      else if (returnjump_p (i))
+	break;
+
+      if (RTX_FRAME_RELATED_P (i))
+	saw_frp = true;
+    }
+
+  /* If the port doesn't emit epilogue unwind info, we don't need a
+     save/restore pair.  */
+  if (!saw_frp)
+    return;
+
+  /* Otherwise, search forward to see if the return insn was the last
+     basic block of the function.  If so, we don't need save/restore.  */
+  gcc_assert (i != NULL);
+  i = next_real_insn (i);
+  if (i == NULL)
+    return;
+
+  /* Insert the restore before that next real insn in the stream, and before
+     a potential NOTE_INSN_EPILOGUE_BEG -- we do need these notes to be
+     properly nested.  This should be after any label or alignment.  This
+     will be pushed into the CFI stream by the function below.  */
+  while (1)
+    {
+      rtx p = PREV_INSN (i);
+      if (!NOTE_P (p))
+	break;
+      if (NOTE_KIND (p) == NOTE_INSN_BASIC_BLOCK)
+	break;
+      i = p;
+    }
+  emit_note_before (NOTE_INSN_CFA_RESTORE_STATE, i);
+
+  /* Emit the state save.  */
+  cfi = new_cfi (); 
+  cfi->dw_cfi_opc = DW_CFA_remember_state;
+  add_fde_cfi (dwarf2out_cfi_label (), cfi);
+
+  /* And emulate the state save.  */
+  gcc_assert (!cfa_remember.in_use);
+  cfa_remember = cfa;
+  cfa_remember.in_use = 1;
+}
+
+/* A "subroutine" of dwarf2out_begin_epilogue.  Emit the restore required.  */
+
+void
+dwarf2out_frame_debug_restore_state (void)
+{
+  dw_cfi_ref cfi = new_cfi (); 
+  const char *label = dwarf2out_cfi_label ();
+
+  cfi->dw_cfi_opc = DW_CFA_restore_state;
+  add_fde_cfi (label, cfi);
+
+  gcc_assert (cfa_remember.in_use);
+  cfa = cfa_remember;
+  cfa_remember.in_use = 0;
+}
+
 #endif
 
 /* Describe for the GTY machinery what parts of dw_cfi_oprnd1 are used.  */
@@ -2434,6 +2749,8 @@ dw_cfi_oprnd1_desc (enum dwarf_call_fram
     {
     case DW_CFA_nop:
     case DW_CFA_GNU_window_save:
+    case DW_CFA_remember_state:
+    case DW_CFA_restore_state:
       return dw_cfi_oprnd_unused;
 
     case DW_CFA_set_loc:
@@ -2448,6 +2765,7 @@ dw_cfi_oprnd1_desc (enum dwarf_call_fram
     case DW_CFA_def_cfa:
     case DW_CFA_offset_extended_sf:
     case DW_CFA_def_cfa_sf:
+    case DW_CFA_restore:
     case DW_CFA_restore_extended:
     case DW_CFA_undefined:
     case DW_CFA_same_value:
@@ -2771,6 +3089,13 @@ output_cfi_directive (dw_cfi_ref cfi)
 	       cfi->dw_cfi_oprnd1.dw_cfi_offset);
       break;
 
+    case DW_CFA_remember_state:
+      fprintf (asm_out_file, "\t.cfi_remember_state\n");
+      break;
+    case DW_CFA_restore_state:
+      fprintf (asm_out_file, "\t.cfi_restore_state\n");
+      break;
+
     case DW_CFA_GNU_args_size:
       fprintf (asm_out_file, "\t.cfi_escape 0x%x,", DW_CFA_GNU_args_size);
       dw2_asm_output_data_uleb128_raw (cfi->dw_cfi_oprnd1.dw_cfi_offset);
@@ -12108,6 +12433,7 @@ convert_cfa_to_fb_loc_list (HOST_WIDE_IN
   dw_cfi_ref cfi;
   dw_cfa_location last_cfa, next_cfa;
   const char *start_label, *last_label, *section;
+  dw_cfa_location remember;
 
   fde = current_fde ();
   gcc_assert (fde != NULL);
@@ -12116,17 +12442,16 @@ convert_cfa_to_fb_loc_list (HOST_WIDE_IN
   list_tail = &list;
   list = NULL;
 
+  memset (&next_cfa, 0, sizeof (next_cfa));
   next_cfa.reg = INVALID_REGNUM;
-  next_cfa.offset = 0;
-  next_cfa.indirect = 0;
-  next_cfa.base_offset = 0;
+  remember = next_cfa;
 
   start_label = fde->dw_fde_begin;
 
   /* ??? Bald assumption that the CIE opcode list does not contain
      advance opcodes.  */
   for (cfi = cie_cfi_head; cfi; cfi = cfi->dw_cfi_next)
-    lookup_cfa_1 (cfi, &next_cfa);
+    lookup_cfa_1 (cfi, &next_cfa, &remember);
 
   last_cfa = next_cfa;
   last_label = start_label;
@@ -12153,14 +12478,10 @@ convert_cfa_to_fb_loc_list (HOST_WIDE_IN
 
       case DW_CFA_advance_loc:
 	/* The encoding is complex enough that we should never emit this.  */
-      case DW_CFA_remember_state:
-      case DW_CFA_restore_state:
-	/* We don't handle these two in this function.  It would be possible
-	   if it were to be required.  */
 	gcc_unreachable ();
 
       default:
-	lookup_cfa_1 (cfi, &next_cfa);
+	lookup_cfa_1 (cfi, &next_cfa, &remember);
 	break;
       }
 
--- dwarf2out.h	(revision 147209)
+++ dwarf2out.h	(local)
@@ -20,6 +20,8 @@ along with GCC; see the file COPYING3.  
 
 extern void dwarf2out_decl (tree);
 extern void dwarf2out_frame_debug (rtx, bool);
+extern void dwarf2out_begin_epilogue (rtx, bool);
+extern void dwarf2out_frame_debug_restore_state (void);
 
 extern void debug_dwarf (void);
 struct die_struct;
--- emit-rtl.c	(revision 147209)
+++ emit-rtl.c	(local)
@@ -3335,6 +3335,10 @@ try_split (rtx pat, rtx trial, int last)
   rtx insn_last, insn;
   int njumps = 0;
 
+  /* We're not good at redistributing frame information.  */
+  if (RTX_FRAME_RELATED_P (trial))
+    return trial;
+
   if (any_condjump_p (trial)
       && (note = find_reg_note (trial, REG_BR_PROB, 0)))
     split_branch_probability = INTVAL (XEXP (note, 0));
@@ -5050,6 +5054,9 @@ copy_insn_1 (rtx orig)
   RTX_CODE code;
   const char *format_ptr;
 
+  if (orig == NULL)
+    return NULL;
+
   code = GET_CODE (orig);
 
   switch (code)
--- final.c	(revision 147209)
+++ final.c	(local)
@@ -1869,9 +1869,26 @@ final_scan_insn (rtx insn, FILE *file, i
 	  break;
 
 	case NOTE_INSN_EPILOGUE_BEG:
+#if defined (DWARF2_UNWIND_INFO) && defined (HAVE_epilogue)
+	  if (dwarf2out_do_frame ())
+	    dwarf2out_begin_epilogue (insn, false);
+#endif
 	  targetm.asm_out.function_begin_epilogue (file);
 	  break;
 
+	case NOTE_INSN_SIBCALL_BEG:
+#if defined (DWARF2_UNWIND_INFO) && defined (HAVE_sibcall)
+	  if (dwarf2out_do_frame ())
+	    dwarf2out_begin_epilogue (insn, true);
+#endif
+	  break;
+
+	case NOTE_INSN_CFA_RESTORE_STATE:
+#if defined (DWARF2_UNWIND_INFO)
+	  dwarf2out_frame_debug_restore_state ();
+#endif
+	  break;
+
 	case NOTE_INSN_FUNCTION_BEG:
 	  app_disable ();
 	  (*debug_hooks->end_prologue) (last_linenum, last_filename);
--- function.c	(revision 147209)
+++ function.c	(local)
@@ -5199,6 +5199,7 @@ epilogue_done:
 	}
 
       start_sequence ();
+      emit_note (NOTE_INSN_SIBCALL_BEG);
       emit_insn (gen_sibcall_epilogue ());
       seq = get_insns ();
       end_sequence ();
@@ -5240,24 +5241,28 @@ epilogue_done:
   df_update_entry_exit_and_calls ();
 }
 
-/* Reposition the prologue-end and epilogue-begin notes after instruction
-   scheduling and delayed branch scheduling.  */
+/* Reposition the prologue-end and epilogue-begin notes after
+   instruction scheduling.  */
 
 void
 reposition_prologue_and_epilogue_notes (void)
 {
-#if defined (HAVE_prologue) || defined (HAVE_epilogue)
+#if defined (HAVE_prologue) || defined (HAVE_epilogue) \
+    || defined (HAVE_sibcall_epilogue)
   rtx insn, last, note;
-  int len;
+  basic_block bb;
 
-  if ((len = VEC_length (int, prologue)) > 0)
+  if (!VEC_empty (int, prologue))
     {
       last = 0, note = 0;
 
-      /* Scan from the beginning until we reach the last prologue insn.
-	 We apparently can't depend on basic_block_{head,end} after
-	 reorg has run.  */
-      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+      /* ??? This won't work if the prologue generates a loop.  */
+      bb = BASIC_BLOCK (0);
+
+      /* Scan backward looking for the last prologue insn.  Hopefully
+	 this isn't a gigantic BB, so we aren't doing extra work.  On
+	 average this would seem to be a win.  */
+      FOR_BB_INSNS_REVERSE (bb, insn)
 	{
 	  if (NOTE_P (insn))
 	    {
@@ -5267,21 +5272,27 @@ reposition_prologue_and_epilogue_notes (
 	  else if (contains (insn, &prologue))
 	    {
 	      last = insn;
-	      if (--len == 0)
-		break;
+	      break;
 	    }
 	}
 
       if (last)
 	{
-	  /* Find the prologue-end note if we haven't already, and
-	     move it to just after the last prologue insn.  */
-	  if (note == 0)
+	  if (note == NULL)
 	    {
-	      for (note = last; (note = NEXT_INSN (note));)
-		if (NOTE_P (note)
-		    && NOTE_KIND (note) == NOTE_INSN_PROLOGUE_END)
-		  break;
+	      /* Scan forward looking for the PROLOGUE_END note.  It should
+		 be right at the beginning of the block, possibly with other
+		 insn notes that got moved there.  */
+	      FOR_BB_INSNS (bb, insn)
+		{
+		  if (NOTE_P (insn)
+		      && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)
+		    {
+		      note = insn;
+		      break;
+		    }
+		}
+	      gcc_assert (note != NULL);
 	    }
 
 	  /* Avoid placing note between CODE_LABEL and BASIC_BLOCK note.  */
@@ -5291,42 +5302,70 @@ reposition_prologue_and_epilogue_notes (
 	}
     }
 
-  if ((len = VEC_length (int, epilogue)) > 0)
+  if (!VEC_empty (int, epilogue) || !VEC_empty (int, sibcall_epilogue))
     {
-      last = 0, note = 0;
+      edge_iterator ei;
+      edge e;
+      int kind;
 
-      /* Scan from the end until we reach the first epilogue insn.
-	 We apparently can't depend on basic_block_{head,end} after
-	 reorg has run.  */
-      for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
+      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
 	{
-	  if (NOTE_P (insn))
-	    {
-	      if (NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
-		note = insn;
-	    }
-	  else if (contains (insn, &epilogue))
+	  last = 0, note = 0, kind = 0;
+	  bb = e->src;
+
+	  /* Scan from the beginning until we reach the first epilogue insn.
+	     Take the cue for whether this is a plain or sibcall epilogue
+	     from the kind of note we find first.  */
+	  FOR_BB_INSNS (bb, insn)
 	    {
-	      last = insn;
-	      if (--len == 0)
-		break;
+	      if (NOTE_P (insn))
+		{
+		  if (NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG
+		      || NOTE_KIND (insn) == NOTE_INSN_SIBCALL_BEG)
+		    {
+		      gcc_assert (kind == 0);
+		      kind = NOTE_KIND (insn);
+		      note = insn;
+		      if (last)
+			break;
+		    }
+		}
+	      else
+		{
+		  int c;
+
+		  switch (kind)
+		    {
+		    case NOTE_INSN_EPILOGUE_BEG:
+		      c = contains (insn, &epilogue);
+		      break;
+		    case NOTE_INSN_SIBCALL_BEG:
+		      c = contains (insn, &sibcall_epilogue);
+		      break;
+		    default:
+		      /* ??? Sometimes the note doesn't get sorted to the
+			 beginning like we'd expect.  Search both lists.  */
+		      c = contains (insn, &epilogue);
+		      if (!c)
+			c = contains (insn, &sibcall_epilogue);
+		      break;
+		    }
+
+		  if (c)
+		    {
+		      last = insn;
+		      if (kind != 0)
+			break;
+		    }
+		}
 	    }
-	}
-
-      if (last)
-	{
-	  /* Find the epilogue-begin note if we haven't already, and
-	     move it to just before the first epilogue insn.  */
-	  if (note == 0)
+	     
+	  if (last)
 	    {
-	      for (note = insn; (note = PREV_INSN (note));)
-		if (NOTE_P (note)
-		    && NOTE_KIND (note) == NOTE_INSN_EPILOGUE_BEG)
-		  break;
+	      gcc_assert (note != NULL);
+	      if (PREV_INSN (last) != note)
+		reorder_insns (note, note, PREV_INSN (last));
 	    }
-
-	  if (PREV_INSN (last) != note)
-	    reorder_insns (note, note, PREV_INSN (last));
 	}
     }
 #endif /* HAVE_prologue or HAVE_epilogue */
--- insn-notes.def	(revision 147209)
+++ insn-notes.def	(local)
@@ -53,6 +53,9 @@ INSN_NOTE (PROLOGUE_END)
 /* This marks the point immediately prior to the first epilogue insn.  */
 INSN_NOTE (EPILOGUE_BEG)
 
+/* This marks the point immediately prior to the first insn of a sibcall.  */
+INSN_NOTE (SIBCALL_BEG)
+
 /* These note where exception handling regions begin and end.
    Uses NOTE_EH_HANDLER to identify the region in question.  */
 INSN_NOTE (EH_REGION_BEG)
@@ -70,4 +73,8 @@ INSN_NOTE (BASIC_BLOCK)
    between hot and cold text sections.  */
 INSN_NOTE (SWITCH_TEXT_SECTIONS)
 
+/* Mark the restore point after an epilogue changed CFI data.  Used only
+   when an epilogue appears in the middle of a function.  */
+INSN_NOTE (CFA_RESTORE_STATE)
+
 #undef INSN_NOTE
--- reg-notes.def	(revision 147209)
+++ reg-notes.def	(local)
@@ -118,6 +118,41 @@ REG_NOTE (BR_PRED)
    instead of intuition.  */
 REG_NOTE (FRAME_RELATED_EXPR)
 
+/* Attached to insns that are RTX_FRAME_RELATED_P, but are too complex
+   for FRAME_RELATED_EXPR intuition.  The insn's first pattern must be
+   a SET, and the destination must be the CFA register.  The attached
+   rtx is an expression that defines the CFA.  In the simplest case, the
+   rtx could be just the stack_pointer_rtx; more common would be a PLUS
+   with a base register and a constant offset.  In the most complicated
+   cases, this will result in a DW_CFA_def_cfa_expression with the rtx
+   expression rendered in a dwarf location expression.  */
+REG_NOTE (CFA_DEF_CFA)
+
+/* Attached to insns that are RTX_FRAME_RELATED_P, but are too complex
+   for FRAME_RELATED_EXPR intuition.  This note adjusts the expression
+   from which the CFA is computed.  The attached rtx defines a new CFA
+   expression, relative to the old CFA expression.  This rtx must be of
+   the form (SET new-cfa-reg (PLUS old-cfa-reg const_int)).  If the note
+   rtx is NULL, we use the first SET of the insn.  */
+REG_NOTE (CFA_ADJUST_CFA)
+
+/* Similar to FRAME_RELATED_EXPR, with the additional information that
+   this is a save to memory, i.e. will result in DW_CFA_offset or the
+   like.  The pattern or the insn should be a simple store relative to
+   the CFA.  */
+REG_NOTE (CFA_OFFSET)
+
+/* Similar to FRAME_RELATED_EXPR, with the additional information that this
+   is a save to a register, i.e. will result in DW_CFA_register.  The insn
+   or the pattern should be simple reg-reg move.  */
+REG_NOTE (CFA_REGISTER)
+
+/* Attached to insns that are RTX_FRAME_RELATED_P, with the information
+   that this is a restore operation, i.e. will result in DW_CFA_restore
+   or the like.  Either the attached rtx, or the destination of the insn's
+   first pattern is the register to be restored.  */
+REG_NOTE (CFA_RESTORE)
+
 /* Indicates that REG holds the exception context for the function.
    This context is shared by inline functions, so the code to acquire
    the real exception context is delayed until after inlining.  */

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-05-06 20:21 unwind info for epilogues Richard Henderson
@ 2009-05-06 20:32 ` Joseph S. Myers
  2009-05-30  0:49   ` Richard Henderson
  2009-05-20  0:49 ` Ian Lance Taylor
  1 sibling, 1 reply; 26+ messages in thread
From: Joseph S. Myers @ 2009-05-06 20:32 UTC (permalink / raw)
  To: gcc-patches

On Wed, 6 May 2009, Richard Henderson wrote:

> Since the beginning, -fasynchronous-unwind-tables has not held correct
> information for function epilogues.  This is an attempt at adding that.

See also Nathan Froyd's patch (doing this for x86_64) described in his 
2006 Summit paper.  I haven't compared the approaches (but fully support 
adding this feature).

http://gcc.gnu.org/ml/gcc-patches/2006-03/msg00426.html
http://gcc.gnu.org/ml/gcc-patches/2006-02/msg01091.html

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-05-06 20:21 unwind info for epilogues Richard Henderson
  2009-05-06 20:32 ` Joseph S. Myers
@ 2009-05-20  0:49 ` Ian Lance Taylor
  2009-05-30  1:02   ` Richard Henderson
  1 sibling, 1 reply; 26+ messages in thread
From: Ian Lance Taylor @ 2009-05-20  0:49 UTC (permalink / raw)
  To: gcc-patches

Richard Henderson <rth@redhat.com> writes:

> Since the beginning, -fasynchronous-unwind-tables has not held correct
> information for function epilogues.  This is an attempt at adding
> that.
>
> In developing this patch, I tried several different approaches before
> settling on this.  In particular, a code interpretation scheme such as
> we use for prologues defeated me.  So in the end I've added enough REG
> notes to let the port maintainer to effectively control the dwarf2
> output directly.  Frankly, I'm considering converting the prologue to
> use the same notes and scrapping the existing interpretation code,
> which is extraordinarily complex.  And incorrect in the middle of the
> prologue for the stack realignment case (the state at the end of the
> prologue is correct, but one intermediate state isn't).
>
> Since this does require changes from port maintainers to their
> epilogue generation code, this currently only has any effect for x86,
> but should not actively fail for other targets.  Although I'd
> appreaciate folks checking that, since I do fiddle around with
> NOTE_INSN_EPILOGUE_BEG.

This approach seems clearly superior.  I wonder if you can provide some
simple functions for the backends to call to add notes and such.  One
could also imagine a more complex function which simply walked the
prologue and did something like what the current dwarf2out.c code does,
except adding reg notes.  That might make it relatively easy to
transition the backends to the new scheme.

Ian

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-05-06 20:32 ` Joseph S. Myers
@ 2009-05-30  0:49   ` Richard Henderson
  2009-05-30 12:24     ` Jakub Jelinek
                       ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Richard Henderson @ 2009-05-30  0:49 UTC (permalink / raw)
  To: gcc-patches; +Cc: Joseph S. Myers

[-- Attachment #1: Type: text/plain, Size: 1272 bytes --]

Joseph S. Myers wrote:
> On Wed, 6 May 2009, Richard Henderson wrote:
> 
>> Since the beginning, -fasynchronous-unwind-tables has not held correct
>> information for function epilogues.  This is an attempt at adding that.
> 
> See also Nathan Froyd's patch (doing this for x86_64) described in his 
> 2006 Summit paper.  I haven't compared the approaches (but fully support 
> adding this feature).
> 
> http://gcc.gnu.org/ml/gcc-patches/2006-03/msg00426.html
> http://gcc.gnu.org/ml/gcc-patches/2006-02/msg01091.html
> 

Thanks for the pointers.  I did incorporate a few of Nathan's ideas into
this version of the patch -- primarily, when duplicating the epilogue,
make sure the duplicated insns are also recorded as epilogue insns.

The other large change from the previous patch is the ability to have
the eh_return epilogue from _Unwind_Resume (and friends) marked
properly.  This required the addition of an EH_RETURN rtx, so that
the middle-end could recognize when epilogue expansion should happen,
rather than the add-hoc unspecs that ports had been using.  As it
happens, only i386 and bfin implement eh_return via special epilogues;
most ports only need to overwrite one or more registers before using
a normal epilogue.

Tested on x86_64, i686; committed.


r~

[-- Attachment #2: d-epilogue-2 --]
[-- Type: text/plain, Size: 61349 bytes --]

	* cfgcleanup.c (try_crossjump_to_edge): Only skip past
	NOTE_INSN_BASIC_BLOCK.
	* cfglayout.c (duplicate_insn_chain): Copy epilogue insn marks.
	Duplicate NOTE_INSN_EPILOGUE_BEG notes.
	* cfgrtl.c (can_delete_note_p): Allow NOTE_INSN_EPILOGUE_BEG
	to be deleted.
	* dwarf2out.c (struct cfa_loc): Change indirect field to bitfield,
	add in_use field.
	(add_cfi): Disable check redefining cfa away from drap.
	(lookup_cfa_1): Add remember argument; handle remember/restore.
	(lookup_cfa): Pass remember argument.
	(cfa_remember): New.
	(compute_barrier_args_size_1): Remove sibcall check.
	(dwarf2out_frame_debug_def_cfa): New.
	(dwarf2out_frame_debug_adjust_cfa): New.
	(dwarf2out_frame_debug_cfa_offset): New.
	(dwarf2out_frame_debug_cfa_register): New.
	(dwarf2out_frame_debug_cfa_restore): New.
	(dwarf2out_frame_debug): Handle REG_CFA_* notes.
	(dwarf2out_begin_epilogue): New.
	(dwarf2out_frame_debug_restore_state): New.
	(dw_cfi_oprnd1_desc): Handle DW_CFA_remember_state,
	DW_CFA_restore_state.
	(output_cfi_directive): Likewise.
	(convert_cfa_to_fb_loc_list): Likewise.
	(dw_cfi_oprnd1_desc): Handle DW_CFA_restore.
	* dwarf2out.h: Update.
	* emit-rtl.c (try_split): Don't split RTX_FRAME_RELATED_P.
	(copy_insn_1): Early out for null.
	* final.c (final_scan_insn): Call dwarf2out_begin_epilogue
	and dwarf2out_frame_debug_restore_state.
	* function.c (prologue, epilogue, sibcall_epilogue): Remove.
	(prologue_insn_hash, epilogue_insn_hash): New.
	(free_after_compilation): Adjust freeing accordingly.
	(record_insns): Create hash table if needed; push insns into
	hash instead of array.
	(maybe_copy_epilogue_insn): New.
	(contains): Search hash table instead of array.
	(sibcall_epilogue_contains): Remove.
	(thread_prologue_and_epilogue_insns): Split eh_return insns
	and mark them as epilogues.
	(reposition_prologue_and_epilogue_notes): Rewrite epilogue
	scanning in terms of basic blocks.
	* insn-notes.def (CFA_RESTORE_STATE): New.
	* jump.c (returnjump_p_1): Accept EH_RETURN.
	(eh_returnjump_p_1, eh_returnjump_p): New.
	* reg-notes.def (CFA_DEF_CFA, CFA_ADJUST_CFA, CFA_OFFSET,
	CFA_REGISTER, CFA_RESTORE): New.
	* rtl.def (EH_RETURN): New.
	* rtl.h (eh_returnjump_p, maybe_copy_epilogue_insn): Declare.

	* config/bfin/bfin.md (UNSPEC_VOLATILE_EH_RETURN): Remove.
	(eh_return_internal): Use eh_return rtx; split w/ epilogue.

	* config/i386/i386.c (gen_push): Update cfa state.
	(pro_epilogue_adjust_stack): Add set_cfa argument.  When true,
	add a CFA_ADJUST_CFA note.
	(ix86_dwarf_handle_frame_unspec): Remove.
	(ix86_expand_prologue): Update cfa state.
	(ix86_emit_restore_reg_using_pop): New.
	(ix86_emit_restore_regs_using_pop): New.
	(ix86_emit_leave): New.
	(ix86_emit_restore_regs_using_mov): Add CFA_RESTORE notes.
	(ix86_expand_epilogue): Add notes for unwinding the epilogue.
	* config/i386/i386.h (struct machine_cfa_state): New.
	(ix86_cfa_state): New.
	* config/i386/i386.md (UNSPEC_EH_RETURN): Remove.
	(eh_return_internal): Merge from eh_return_<mode>,
	use eh_return rtx, split w/ epilogue.

--- gcc/cfgcleanup.c	(revision 148000)
+++ gcc/cfgcleanup.c	(local)
@@ -1672,8 +1672,7 @@ try_crossjump_to_edge (int mode, edge e1
   /* Skip possible basic block header.  */
   if (LABEL_P (newpos1))
     newpos1 = NEXT_INSN (newpos1);
-
-  if (NOTE_P (newpos1))
+  if (NOTE_INSN_BASIC_BLOCK_P (newpos1))
     newpos1 = NEXT_INSN (newpos1);
 
   redirect_from = split_block (src1, PREV_INSN (newpos1))->src;
--- gcc/cfglayout.c	(revision 148000)
+++ gcc/cfglayout.c	(local)
@@ -1112,7 +1112,7 @@ cfg_layout_can_duplicate_bb_p (const_bas
 rtx
 duplicate_insn_chain (rtx from, rtx to)
 {
-  rtx insn, last;
+  rtx insn, last, copy;
 
   /* Avoid updating of boundaries of previous basic block.  The
      note will get removed from insn stream in fixup.  */
@@ -1133,7 +1133,8 @@ duplicate_insn_chain (rtx from, rtx to)
 	  if (GET_CODE (PATTERN (insn)) == ADDR_VEC
 	      || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
 	    break;
-	  emit_copy_of_insn_after (insn, get_last_insn ());
+	  copy = emit_copy_of_insn_after (insn, get_last_insn ());
+          maybe_copy_epilogue_insn (insn, copy);
 	  break;
 
 	case CODE_LABEL:
@@ -1153,23 +1154,18 @@ duplicate_insn_chain (rtx from, rtx to)
 	    case NOTE_INSN_DELETED:
 	    case NOTE_INSN_DELETED_LABEL:
 	      /* No problem to strip these.  */
-	    case NOTE_INSN_EPILOGUE_BEG:
-	      /* Debug code expect these notes to exist just once.
-		 Keep them in the master copy.
-		 ??? It probably makes more sense to duplicate them for each
-		 epilogue copy.  */
 	    case NOTE_INSN_FUNCTION_BEG:
 	      /* There is always just single entry to function.  */
 	    case NOTE_INSN_BASIC_BLOCK:
 	      break;
 
+	    case NOTE_INSN_EPILOGUE_BEG:
 	    case NOTE_INSN_SWITCH_TEXT_SECTIONS:
 	      emit_note_copy (insn);
 	      break;
 
 	    default:
-	      /* All other notes should have already been eliminated.
-	       */
+	      /* All other notes should have already been eliminated.  */
 	      gcc_unreachable ();
 	    }
 	  break;
--- gcc/cfgrtl.c	(revision 148000)
+++ gcc/cfgrtl.c	(local)
@@ -86,8 +86,16 @@ static void rtl_make_forwarder_block (ed
 static int
 can_delete_note_p (const_rtx note)
 {
-  return (NOTE_KIND (note) == NOTE_INSN_DELETED
-	  || NOTE_KIND (note) == NOTE_INSN_BASIC_BLOCK);
+  switch (NOTE_KIND (note))
+    {
+    case NOTE_INSN_DELETED:
+    case NOTE_INSN_BASIC_BLOCK:
+    case NOTE_INSN_EPILOGUE_BEG:
+      return true;
+
+    default:
+      return false;
+    }
 }
 
 /* True if a given label can be deleted.  */
--- gcc/config/bfin/bfin.md	(revision 148000)
+++ gcc/config/bfin/bfin.md	(local)
@@ -141,8 +141,7 @@
    (UNSPEC_ONES 12)])
 
 (define_constants
-  [(UNSPEC_VOLATILE_EH_RETURN 0)
-   (UNSPEC_VOLATILE_CSYNC 1)
+  [(UNSPEC_VOLATILE_CSYNC 1)
    (UNSPEC_VOLATILE_SSYNC 2)
    (UNSPEC_VOLATILE_LOAD_FUNCDESC 3)
    (UNSPEC_VOLATILE_STORE_EH_HANDLER 4)
@@ -2573,8 +2572,7 @@
   "bfin_expand_epilogue (0, 0, 1); DONE;")
 
 (define_expand "eh_return"
-  [(unspec_volatile [(match_operand:SI 0 "register_operand" "")]
-		    UNSPEC_VOLATILE_EH_RETURN)]
+  [(use (match_operand:SI 0 "register_operand" ""))]
   ""
 {
   emit_insn (gen_eh_store_handler (EH_RETURN_HANDLER_RTX, operands[0]));
@@ -2592,11 +2590,10 @@
   [(set_attr "type" "mcst")])
 
 (define_insn_and_split "eh_return_internal"
-  [(set (pc)
-	(unspec_volatile [(reg:SI REG_P2)] UNSPEC_VOLATILE_EH_RETURN))]
+  [(eh_return)]
   ""
   "#"
-  "reload_completed"
+  "epilogue_completed"
   [(const_int 1)]
   "bfin_expand_epilogue (1, 1, 0); DONE;")
 
--- gcc/config/i386/i386.c	(revision 148000)
+++ gcc/config/i386/i386.c	(local)
@@ -7609,6 +7609,9 @@ output_set_got (rtx dest, rtx label ATTR
 static rtx
 gen_push (rtx arg)
 {
+  if (ix86_cfa_state->reg == stack_pointer_rtx)
+    ix86_cfa_state->offset += UNITS_PER_WORD;
+
   return gen_rtx_SET (VOIDmode,
 		      gen_rtx_MEM (Pmode,
 				   gen_rtx_PRE_DEC (Pmode,
@@ -7668,8 +7671,7 @@ ix86_save_reg (unsigned int regno, int m
 	}
     }
 
-  if (crtl->drap_reg
-      && regno == REGNO (crtl->drap_reg))
+  if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
     return 1;
 
   return (df_regs_ever_live_p (regno)
@@ -8005,7 +8007,8 @@ ix86_emit_save_sse_regs_using_mov (rtx p
    otherwise.  */
 
 static void
-pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
+pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
+			   int style, bool set_cfa)
 {
   rtx insn;
 
@@ -8023,13 +8026,23 @@ pro_epilogue_adjust_stack (rtx dest, rtx
       gcc_assert (style);
       r11 = gen_rtx_REG (DImode, R11_REG);
       insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
-      if (style < 0)
-	RTX_FRAME_RELATED_P (insn) = 1;
       insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
 							       offset));
     }
-  if (style < 0)
-    RTX_FRAME_RELATED_P (insn) = 1;
+
+  if (set_cfa)
+    {
+      rtx r;
+
+      gcc_assert (ix86_cfa_state->reg == src);
+      ix86_cfa_state->offset += INTVAL (offset);
+      ix86_cfa_state->reg = dest;
+    
+      r = gen_rtx_PLUS (Pmode, src, offset);
+      r = gen_rtx_SET (VOIDmode, dest, r);
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
 }
 
 /* Find an available register to be used as dynamic realign argument
@@ -8164,30 +8177,6 @@ ix86_internal_arg_pointer (void)
   return virtual_incoming_args_rtx;
 }
 
-/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
-   This is called from dwarf2out.c to emit call frame instructions
-   for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
-static void
-ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
-{
-  rtx unspec = SET_SRC (pattern);
-  gcc_assert (GET_CODE (unspec) == UNSPEC);
-
-  switch (index)
-    {
-    case UNSPEC_REG_SAVE:
-      dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
-			      SET_DEST (pattern));
-      break;
-    case UNSPEC_DEF_CFA:
-      dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
-			 INTVAL (XVECEXP (unspec, 0, 0)));
-      break;
-    default:
-      gcc_unreachable ();
-    }
-}
-
 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
    to be generated in correct form.  */
 static void 
@@ -8231,6 +8220,10 @@ ix86_expand_prologue (void)
   /* DRAP should not coexist with stack_realign_fp */
   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
 
+  /* Initialize CFA state for before the prologue.  */
+  ix86_cfa_state->reg = stack_pointer_rtx;
+  ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
+
   ix86_compute_frame_layout (&frame);
 
   /* Emit prologue code to adjust stack alignment and setup DRAP, in case
@@ -8260,6 +8253,7 @@ ix86_expand_prologue (void)
 
       insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
       RTX_FRAME_RELATED_P (insn) = 1; 
+      ix86_cfa_state->reg = crtl->drap_reg;
 
       /* Align the stack.  */
       insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
@@ -8288,6 +8282,9 @@ ix86_expand_prologue (void)
 
       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
       RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (ix86_cfa_state->reg == stack_pointer_rtx)
+        ix86_cfa_state->reg = hard_frame_pointer_rtx;
     }
 
   if (stack_realign_fp)
@@ -8326,7 +8323,8 @@ ix86_expand_prologue (void)
     ;
   else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
     pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
-			       GEN_INT (-allocate), -1);
+			       GEN_INT (-allocate), -1,
+			       ix86_cfa_state->reg == stack_pointer_rtx);
   else
     {
       /* Only valid for Win32.  */
@@ -8354,10 +8352,15 @@ ix86_expand_prologue (void)
       else
 	insn = gen_allocate_stack_worker_32 (eax, eax);
       insn = emit_insn (insn);
-      RTX_FRAME_RELATED_P (insn) = 1;
-      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
-      t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
-      add_reg_note (insn, REG_FRAME_RELATED_EXPR, t);
+
+      if (ix86_cfa_state->reg == stack_pointer_rtx)
+	{
+	  ix86_cfa_state->offset += allocate;
+	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
+	  t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
+	  add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
 
       if (eax_live)
 	{
@@ -8464,18 +8467,96 @@ ix86_expand_prologue (void)
     emit_insn (gen_cld ());
 }
 
+/* Emit code to restore REG using a POP insn.  */
+
+static void
+ix86_emit_restore_reg_using_pop (rtx reg)
+{
+  rtx insn = emit_insn (ix86_gen_pop1 (reg));
+
+  if (ix86_cfa_state->reg == crtl->drap_reg
+      && REGNO (reg) == REGNO (crtl->drap_reg))
+    {
+      /* Previously we'd represented the CFA as an expression
+	 like *(%ebp - 8).  We've just popped that value from
+	 the stack, which means we need to reset the CFA to
+	 the drap register.  This will remain until we restore
+	 the stack pointer.  */
+      add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      return;
+    }
+
+  if (ix86_cfa_state->reg == stack_pointer_rtx)
+    {
+      ix86_cfa_state->offset -= UNITS_PER_WORD;
+      add_reg_note (insn, REG_CFA_ADJUST_CFA,
+		    copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
+    }
+
+  /* When the frame pointer is the CFA, and we pop it, we are
+     swapping back to the stack pointer as the CFA.  This happens
+     for stack frames that don't allocate other data, so we assume
+     the stack pointer is now pointing at the return address, i.e.
+     the function entry state, which makes the offset be 1 word.  */
+  else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
+	   && reg == hard_frame_pointer_rtx)
+    {
+      ix86_cfa_state->reg = stack_pointer_rtx;
+      ix86_cfa_state->offset = UNITS_PER_WORD;
+
+      add_reg_note (insn, REG_CFA_DEF_CFA,
+		    gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+				  GEN_INT (UNITS_PER_WORD)));
+    }
+
+  add_reg_note (insn, REG_CFA_RESTORE, reg);
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* Emit code to restore saved registers using POP insns.  */
+
+static void
+ix86_emit_restore_regs_using_pop (void)
+{
+  int regno;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
+      ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
+}
+
+/* Emit code and notes for the LEAVE instruction.  */
+
+static void
+ix86_emit_leave (void)
+{
+  rtx insn = emit_insn (ix86_gen_leave ());
+
+  if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
+    {
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, 
+		    copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
+      add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+}
+
 /* Emit code to restore saved registers using MOV insns.  First register
    is restored from POINTER + OFFSET.  */
 static void
 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
 				  int maybe_eh_return)
 {
-  int regno;
+  unsigned int regno;
   rtx base_address = gen_rtx_MEM (Pmode, pointer);
+  rtx insn;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
       {
+	rtx reg = gen_rtx_REG (Pmode, regno);
+
 	/* Ensure that adjust_address won't be forced to produce pointer
 	   out of range allowed by x86-64 instruction set.  */
 	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
@@ -8488,9 +8569,23 @@ ix86_emit_restore_regs_using_mov (rtx po
 	    base_address = gen_rtx_MEM (Pmode, r11);
 	    offset = 0;
 	  }
-	emit_move_insn (gen_rtx_REG (Pmode, regno),
-	                adjust_address (base_address, Pmode, offset));
+	insn = emit_move_insn (reg,
+			       adjust_address (base_address, Pmode, offset));
 	offset += UNITS_PER_WORD;
+
+        if (ix86_cfa_state->reg == crtl->drap_reg
+	    && regno == REGNO (crtl->drap_reg))
+	  {
+	    /* Previously we'd represented the CFA as an expression
+	       like *(%ebp - 8).  We've just popped that value from
+	       the stack, which means we need to reset the CFA to
+	       the drap register.  This will remain until we restore
+	       the stack pointer.  */
+	    add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+	  }
+	else
+	  add_reg_note (insn, REG_CFA_RESTORE, reg);
+	RTX_FRAME_RELATED_P (insn) = 1;
       }
 }
 
@@ -8502,11 +8597,13 @@ ix86_emit_restore_sse_regs_using_mov (rt
 {
   int regno;
   rtx base_address = gen_rtx_MEM (TImode, pointer);
-  rtx mem;
+  rtx mem, insn;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
       {
+	rtx reg = gen_rtx_REG (TImode, regno);
+
 	/* Ensure that adjust_address won't be forced to produce pointer
 	   out of range allowed by x86-64 instruction set.  */
 	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
@@ -8521,8 +8618,11 @@ ix86_emit_restore_sse_regs_using_mov (rt
 	  }
 	mem = adjust_address (base_address, TImode, offset);
 	set_mem_align (mem, 128);
-	emit_move_insn (gen_rtx_REG (TImode, regno), mem);
+	insn = emit_move_insn (reg, mem);
 	offset += 16;
+
+	add_reg_note (insn, REG_CFA_RESTORE, reg);
+	RTX_FRAME_RELATED_P (insn) = 1;
       }
 }
 
@@ -8531,10 +8631,11 @@ ix86_emit_restore_sse_regs_using_mov (rt
 void
 ix86_expand_epilogue (int style)
 {
-  int regno;
   int sp_valid;
   struct ix86_frame frame;
   HOST_WIDE_INT offset;
+  struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
+  bool using_drap;
 
   ix86_finalize_stack_realign_flags ();
 
@@ -8560,6 +8661,9 @@ ix86_expand_epilogue (int style)
   offset *= -UNITS_PER_WORD;
   offset -= frame.nsseregs * 16 + frame.padding0;
 
+  using_drap = crtl->drap_reg && crtl->stack_realign_needed;
+  gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
+
   /* If we're only restoring one register and sp is not valid then
      using a move instruction to restore the register since it's
      less work than reloading sp and popping the register.
@@ -8574,7 +8678,8 @@ ix86_expand_epilogue (int style)
       || (TARGET_EPILOGUE_USING_MOVE
 	  && cfun->machine->use_fast_prologue_epilogue
 	  && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
-      || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
+      || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
+	  && frame.to_allocate)
       || (frame_pointer_needed && TARGET_USE_LEAVE
 	  && cfun->machine->use_fast_prologue_epilogue
 	  && (frame.nregs + frame.nsseregs) == 1)
@@ -8622,13 +8727,28 @@ ix86_expand_epilogue (int style)
 	    {
 	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
 	      tmp = plus_constant (tmp, UNITS_PER_WORD);
-	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
+	      tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
 
 	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
-	      emit_move_insn (hard_frame_pointer_rtx, tmp);
+	      tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
+
+	      /* Note that we use SA as a temporary CFA, as the return
+		 address is at the proper place relative to it.  We
+		 pretend this happens at the FP restore insn because
+		 prior to this insn the FP would be stored at the wrong
+		 offset relative to SA, and after this insn we have no
+		 other reasonable register to use for the CFA.  We don't
+		 bother resetting the CFA to the SP for the duration of
+		 the return insn.  */
+	      add_reg_note (tmp, REG_CFA_DEF_CFA,
+			    plus_constant (sa, UNITS_PER_WORD));
+	      add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
+	      RTX_FRAME_RELATED_P (tmp) = 1;
+	      ix86_cfa_state->reg = sa;
+	      ix86_cfa_state->offset = UNITS_PER_WORD;
 
 	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
-					 const0_rtx, style);
+					 const0_rtx, style, false);
 	    }
 	  else
 	    {
@@ -8637,7 +8757,17 @@ ix86_expand_epilogue (int style)
                                          + frame.nregs * UNITS_PER_WORD
 					 + frame.nsseregs * 16
 					 + frame.padding0));
-	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
+	      tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
+
+	      gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
+	      if (ix86_cfa_state->offset != UNITS_PER_WORD)
+		{
+		  ix86_cfa_state->offset = UNITS_PER_WORD;
+		  add_reg_note (tmp, REG_CFA_DEF_CFA,
+				plus_constant (stack_pointer_rtx,
+					       UNITS_PER_WORD));
+		  RTX_FRAME_RELATED_P (tmp) = 1;
+		}
 	    }
 	}
       else if (!frame_pointer_needed)
@@ -8646,18 +8776,18 @@ ix86_expand_epilogue (int style)
 					    + frame.nregs * UNITS_PER_WORD
 					    + frame.nsseregs * 16
 					    + frame.padding0),
-				   style);
+				   style, !using_drap);
       /* If not an i386, mov & pop is faster than "leave".  */
       else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
 	       || !cfun->machine->use_fast_prologue_epilogue)
-	emit_insn ((*ix86_gen_leave) ());
+	ix86_emit_leave ();
       else
 	{
 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
 				     hard_frame_pointer_rtx,
-				     const0_rtx, style);
+				     const0_rtx, style, !using_drap);
 
-	  emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
+	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
 	}
     }
   else
@@ -8675,11 +8805,12 @@ ix86_expand_epilogue (int style)
           gcc_assert (!stack_realign_fp);
 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
 				     hard_frame_pointer_rtx,
-				     GEN_INT (offset), style);
+				     GEN_INT (offset), style, false);
           ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
 					        frame.to_allocate, style == 2);
 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
-				     GEN_INT (frame.nsseregs * 16), style);
+				     GEN_INT (frame.nsseregs * 16),
+				     style, false);
 	}
       else if (frame.to_allocate || frame.nsseregs)
 	{
@@ -8689,18 +8820,18 @@ ix86_expand_epilogue (int style)
 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
 				     GEN_INT (frame.to_allocate
 				     	      + frame.nsseregs * 16
-					      + frame.padding0), style);
+					      + frame.padding0), style,
+				     !using_drap && !frame_pointer_needed);
 	}
 
-      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-	if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
-	  emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
+      ix86_emit_restore_regs_using_pop ();
+
       if (frame_pointer_needed)
 	{
 	  /* Leave results in shorter dependency chains on CPUs that are
 	     able to grok it fast.  */
 	  if (TARGET_USE_LEAVE)
-	    emit_insn ((*ix86_gen_leave) ());
+	    ix86_emit_leave ();
 	  else
             {
               /* For stack realigned really happens, recover stack 
@@ -8709,47 +8840,70 @@ ix86_expand_epilogue (int style)
               if (stack_realign_fp)
 		pro_epilogue_adjust_stack (stack_pointer_rtx,
 					   hard_frame_pointer_rtx,
-					   const0_rtx, style);
-	      emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
+					   const0_rtx, style, !using_drap);
+	      ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
             }
 	}
     }
 
-  if (crtl->drap_reg && crtl->stack_realign_needed)
+  if (using_drap)
     {
       int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
 			      ? 0 : UNITS_PER_WORD);
+      rtx insn;
+
       gcc_assert (stack_realign_drap);
-      emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
-				   crtl->drap_reg,
-				   GEN_INT (-(UNITS_PER_WORD
-					      + param_ptr_offset))));
-      if (!call_used_regs[REGNO (crtl->drap_reg)])
-	emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
-      
+
+      insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
+					  crtl->drap_reg,
+					  GEN_INT (-(UNITS_PER_WORD
+						     + param_ptr_offset))));
+
+      ix86_cfa_state->reg = stack_pointer_rtx;
+      ix86_cfa_state->offset = UNITS_PER_WORD + param_ptr_offset;
+
+      add_reg_note (insn, REG_CFA_DEF_CFA,
+		    gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
+				  GEN_INT (ix86_cfa_state->offset)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (param_ptr_offset)
+	ix86_emit_restore_reg_using_pop (crtl->drap_reg);
     }
 
   /* Sibcall epilogues don't want a return instruction.  */
   if (style == 0)
-    return;
+    {
+      *ix86_cfa_state = cfa_state_save;
+      return;
+    }
 
   if (crtl->args.pops_args && crtl->args.size)
     {
       rtx popc = GEN_INT (crtl->args.pops_args);
 
-      /* i386 can only pop 64K bytes.  If asked to pop more, pop
-	 return address, do explicit add, and jump indirectly to the
-	 caller.  */
+      /* i386 can only pop 64K bytes.  If asked to pop more, pop return
+	 address, do explicit add, and jump indirectly to the caller.  */
 
       if (crtl->args.pops_args >= 65536)
 	{
 	  rtx ecx = gen_rtx_REG (SImode, CX_REG);
+	  rtx insn;
 
 	  /* There is no "pascal" calling convention in any 64bit ABI.  */
 	  gcc_assert (!TARGET_64BIT);
 
-	  emit_insn (gen_popsi1 (ecx));
-	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
+	  insn = emit_insn (gen_popsi1 (ecx));
+	  ix86_cfa_state->offset -= UNITS_PER_WORD;
+
+	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
+			copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
+	  add_reg_note (insn, REG_CFA_REGISTER,
+			gen_rtx_SET (VOIDmode, ecx, pc_rtx));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				     popc, -1, true);
 	  emit_jump_insn (gen_return_indirect_internal (ecx));
 	}
       else
@@ -8757,6 +8911,10 @@ ix86_expand_epilogue (int style)
     }
   else
     emit_jump_insn (gen_return_internal ());
+
+  /* Restore the state back to the state from the prologue,
+     so that it's correct for the next epilogue.  */
+  *ix86_cfa_state = cfa_state_save;
 }
 
 /* Reset from the function's potential modifications.  */
@@ -30361,8 +30519,6 @@ ix86_enum_va_list (int idx, const char *
 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
 #undef TARGET_GET_DRAP_RTX
 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
-#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
-#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
 #undef TARGET_STRICT_ARGUMENT_NAMING
 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
 
--- gcc/config/i386/i386.h	(revision 148000)
+++ gcc/config/i386/i386.h	(local)
@@ -2393,6 +2393,15 @@ enum ix86_stack_slot
 \f
 #define FASTCALL_PREFIX '@'
 \f
+/* Machine specific CFA tracking during prologue/epilogue generation.  */
+
+#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS)
+struct GTY(()) machine_cfa_state
+{
+  rtx reg;
+  HOST_WIDE_INT offset;
+};
+
 struct GTY(()) machine_function {
   struct stack_local_entry *stack_locals;
   const char *some_ld_name;
@@ -2419,8 +2428,10 @@ struct GTY(()) machine_function {
   int tls_descriptor_call_expanded_p;
   /* This value is used for amd64 targets and specifies the current abi
      to be used. MS_ABI means ms abi. Otherwise SYSV_ABI means sysv abi.  */
-   enum calling_abi call_abi;
+  enum calling_abi call_abi;
+  struct machine_cfa_state cfa;
 };
+#endif
 
 #define ix86_stack_locals (cfun->machine->stack_locals)
 #define ix86_varargs_gpr_size (cfun->machine->varargs_gpr_size)
@@ -2436,6 +2447,7 @@ struct GTY(()) machine_function {
    REG_SP is live.  */
 #define ix86_current_function_calls_tls_descriptor \
   (ix86_tls_descriptor_calls_expanded_in_cfun && df_regs_ever_live_p (SP_REG))
+#define ix86_cfa_state (&cfun->machine->cfa)
 
 /* Control behavior of x86_file_start.  */
 #define X86_FILE_START_VERSION_DIRECTIVE false
--- gcc/config/i386/i386.md	(revision 148000)
+++ gcc/config/i386/i386.md	(local)
@@ -101,7 +101,6 @@
    (UNSPEC_ADD_CARRY		34)
    (UNSPEC_FLDCW		35)
    (UNSPEC_REP			36)
-   (UNSPEC_EH_RETURN		37)
    (UNSPEC_LD_MPIC		38)	; load_macho_picbase
    (UNSPEC_TRUNC_NOOP		39)
 
@@ -15982,21 +15981,16 @@
   tmp = gen_rtx_MEM (Pmode, tmp);
   emit_move_insn (tmp, ra);
 
-  if (Pmode == SImode)
-    emit_jump_insn (gen_eh_return_si (sa));
-  else
-    emit_jump_insn (gen_eh_return_di (sa));
+  emit_jump_insn (gen_eh_return_internal ());
   emit_barrier ();
   DONE;
 })
 
-(define_insn_and_split "eh_return_<mode>"
-  [(set (pc)
-        (unspec [(match_operand:P 0 "register_operand" "c")]
-	         UNSPEC_EH_RETURN))]
+(define_insn_and_split "eh_return_internal"
+  [(eh_return)]
   ""
   "#"
-  "reload_completed"
+  "epilogue_completed"
   [(const_int 0)]
   "ix86_expand_epilogue (2); DONE;")
 
--- gcc/dwarf2out.c	(revision 148000)
+++ gcc/dwarf2out.c	(local)
@@ -247,7 +247,8 @@ typedef struct GTY(()) cfa_loc {
   HOST_WIDE_INT offset;
   HOST_WIDE_INT base_offset;
   unsigned int reg;
-  int indirect;            /* 1 if CFA is accessed via a dereference.  */
+  BOOL_BITFIELD indirect : 1;  /* 1 if CFA is accessed via a dereference.  */
+  BOOL_BITFIELD in_use : 1;    /* 1 if a saved cfa is stored here.  */
 } dw_cfa_location;
 
 /* All call frame descriptions (FDE's) in the GCC generated DWARF
@@ -404,7 +405,7 @@ static const char *dwarf_cfi_name (unsig
 static dw_cfi_ref new_cfi (void);
 static void add_cfi (dw_cfi_ref *, dw_cfi_ref);
 static void add_fde_cfi (const char *, dw_cfi_ref);
-static void lookup_cfa_1 (dw_cfi_ref, dw_cfa_location *);
+static void lookup_cfa_1 (dw_cfi_ref, dw_cfa_location *, dw_cfa_location *);
 static void lookup_cfa (dw_cfa_location *);
 static void reg_save (const char *, unsigned, unsigned, HOST_WIDE_INT);
 #ifdef DWARF2_UNWIND_INFO
@@ -668,7 +669,10 @@ add_cfi (dw_cfi_ref *list_head, dw_cfi_r
 
   /* When DRAP is used, CFA is defined with an expression.  Redefine
      CFA may lead to a different CFA value.   */
-  if (fde && fde->drap_reg != INVALID_REGNUM)
+  /* ??? Of course, this heuristic fails when we're annotating epilogues,
+     because of course we'll always want to redefine the CFA back to the
+     stack pointer on the way out.  Where should we move this check?  */
+  if (0 && fde && fde->drap_reg != INVALID_REGNUM)
     switch (cfi->dw_cfi_opc)
       {
         case DW_CFA_def_cfa_register:
@@ -774,7 +778,7 @@ add_fde_cfi (const char *label, dw_cfi_r
 /* Subroutine of lookup_cfa.  */
 
 static void
-lookup_cfa_1 (dw_cfi_ref cfi, dw_cfa_location *loc)
+lookup_cfa_1 (dw_cfi_ref cfi, dw_cfa_location *loc, dw_cfa_location *remember)
 {
   switch (cfi->dw_cfi_opc)
     {
@@ -793,6 +797,18 @@ lookup_cfa_1 (dw_cfi_ref cfi, dw_cfa_loc
     case DW_CFA_def_cfa_expression:
       get_cfa_from_loc_descr (loc, cfi->dw_cfi_oprnd1.dw_cfi_loc);
       break;
+
+    case DW_CFA_remember_state:
+      gcc_assert (!remember->in_use);
+      *remember = *loc;
+      remember->in_use = 1;
+      break;
+    case DW_CFA_restore_state:
+      gcc_assert (remember->in_use);
+      *loc = *remember;
+      remember->in_use = 0;
+      break;
+
     default:
       break;
     }
@@ -805,19 +821,19 @@ lookup_cfa (dw_cfa_location *loc)
 {
   dw_cfi_ref cfi;
   dw_fde_ref fde;
+  dw_cfa_location remember;
 
+  memset (loc, 0, sizeof (*loc));
   loc->reg = INVALID_REGNUM;
-  loc->offset = 0;
-  loc->indirect = 0;
-  loc->base_offset = 0;
+  remember = *loc;
 
   for (cfi = cie_cfi_head; cfi; cfi = cfi->dw_cfi_next)
-    lookup_cfa_1 (cfi, loc);
+    lookup_cfa_1 (cfi, loc, &remember);
 
   fde = current_fde ();
   if (fde)
     for (cfi = fde->dw_fde_cfi; cfi; cfi = cfi->dw_cfi_next)
-      lookup_cfa_1 (cfi, loc);
+      lookup_cfa_1 (cfi, loc, &remember);
 }
 
 /* The current rule for calculating the DWARF2 canonical frame address.  */
@@ -827,6 +843,9 @@ static dw_cfa_location cfa;
    from the CFA.  */
 static dw_cfa_location cfa_store;
 
+/* The current save location around an epilogue.  */
+static dw_cfa_location cfa_remember;
+
 /* The running total of the size of arguments pushed onto the stack.  */
 static HOST_WIDE_INT args_size;
 
@@ -1212,8 +1231,7 @@ compute_barrier_args_size_1 (rtx insn, H
 
   if (! RTX_FRAME_RELATED_P (insn))
     {
-      if (prologue_epilogue_contains (insn)
-	  || sibcall_epilogue_contains (insn))
+      if (prologue_epilogue_contains (insn))
 	/* Nothing */;
       else if (GET_CODE (PATTERN (insn)) == SET)
 	offset = stack_adjust_offset (PATTERN (insn), cur_args_size, 0);
@@ -1386,7 +1404,7 @@ dwarf2out_stack_adjust (rtx insn, bool a
      with this function.  Proper support would require all frame-related
      insns to be marked, and to be able to handle saving state around
      epilogues textually in the middle of the function.  */
-  if (prologue_epilogue_contains (insn) || sibcall_epilogue_contains (insn))
+  if (prologue_epilogue_contains (insn))
     return;
 
   /* If INSN is an instruction from target of an annulled branch, the
@@ -1660,6 +1678,156 @@ reg_saved_in (rtx reg)
    value, not an offset.  */
 static dw_cfa_location cfa_temp;
 
+/* A subroutine of dwarf2out_frame_debug, process a REG_DEF_CFA note.  */
+
+static void
+dwarf2out_frame_debug_def_cfa (rtx pat, const char *label)
+{
+  memset (&cfa, 0, sizeof (cfa));
+
+  switch (GET_CODE (pat))
+    {
+    case PLUS:
+      cfa.reg = REGNO (XEXP (pat, 0));
+      cfa.offset = INTVAL (XEXP (pat, 1));
+      break;
+
+    case REG:
+      cfa.reg = REGNO (pat);
+      break;
+
+    default:
+      /* Recurse and define an expression.  */
+      gcc_unreachable ();
+    }
+
+  def_cfa_1 (label, &cfa);
+}
+
+/* A subroutine of dwarf2out_frame_debug, process a REG_ADJUST_CFA note.  */
+
+static void
+dwarf2out_frame_debug_adjust_cfa (rtx pat, const char *label)
+{
+  rtx src, dest;
+
+  gcc_assert (GET_CODE (pat) == SET);
+  dest = XEXP (pat, 0);
+  src = XEXP (pat, 1);
+
+  switch (GET_CODE (src))
+    {
+    case PLUS:
+      gcc_assert (REGNO (XEXP (src, 0)) == cfa.reg);
+      cfa.offset -= INTVAL (XEXP (src, 1));
+      break;
+
+    case REG:
+	break;
+
+    default:
+	gcc_unreachable ();
+    }
+
+  cfa.reg = REGNO (dest);
+  gcc_assert (cfa.indirect == 0);
+
+  def_cfa_1 (label, &cfa);
+}
+
+/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_OFFSET note.  */
+
+static void
+dwarf2out_frame_debug_cfa_offset (rtx set, const char *label)
+{
+  HOST_WIDE_INT offset;
+  rtx src, addr, span;
+
+  src = XEXP (set, 1);
+  addr = XEXP (set, 0);
+  gcc_assert (MEM_P (addr));
+  addr = XEXP (addr, 0);
+  
+  /* As documented, only consider extremely simple addresses.  */
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      gcc_assert (REGNO (addr) == cfa.reg);
+      offset = -cfa.offset;
+      break;
+    case PLUS:
+      gcc_assert (REGNO (XEXP (addr, 0)) == cfa.reg);
+      offset = INTVAL (XEXP (addr, 1)) - cfa.offset;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  span = targetm.dwarf_register_span (src);
+
+  /* ??? We'd like to use queue_reg_save, but we need to come up with
+     a different flushing heuristic for epilogues.  */
+  if (!span)
+    reg_save (label, DWARF_FRAME_REGNUM (REGNO (src)), INVALID_REGNUM, offset);
+  else
+    {
+      /* We have a PARALLEL describing where the contents of SRC live.
+   	 Queue register saves for each piece of the PARALLEL.  */
+      int par_index;
+      int limit;
+      HOST_WIDE_INT span_offset = offset;
+
+      gcc_assert (GET_CODE (span) == PARALLEL);
+
+      limit = XVECLEN (span, 0);
+      for (par_index = 0; par_index < limit; par_index++)
+	{
+	  rtx elem = XVECEXP (span, 0, par_index);
+
+	  reg_save (label, DWARF_FRAME_REGNUM (REGNO (elem)),
+		    INVALID_REGNUM, span_offset);
+	  span_offset += GET_MODE_SIZE (GET_MODE (elem));
+	}
+    }
+}
+
+/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_REGISTER note.  */
+
+static void
+dwarf2out_frame_debug_cfa_register (rtx set, const char *label)
+{
+  rtx src, dest;
+  unsigned sregno, dregno;
+
+  src = XEXP (set, 1);
+  dest = XEXP (set, 0);
+
+  if (src == pc_rtx)
+    sregno = DWARF_FRAME_RETURN_COLUMN;
+  else
+    sregno = DWARF_FRAME_REGNUM (REGNO (src));
+
+  dregno = DWARF_FRAME_REGNUM (REGNO (dest));
+
+  /* ??? We'd like to use queue_reg_save, but we need to come up with
+     a different flushing heuristic for epilogues.  */
+  reg_save (label, sregno, dregno, 0);
+}
+
+/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_RESTORE note.  */
+
+static void
+dwarf2out_frame_debug_cfa_restore (rtx reg, const char *label)
+{
+  dw_cfi_ref cfi = new_cfi ();
+  unsigned int regno = DWARF_FRAME_REGNUM (REGNO (reg));
+
+  cfi->dw_cfi_opc = (regno & ~0x3f ? DW_CFA_restore_extended : DW_CFA_restore);
+  cfi->dw_cfi_oprnd1.dw_cfi_reg_num = regno;
+
+  add_fde_cfi (label, cfi);
+}
+
 /* Record call frame debugging information for an expression EXPR,
    which either sets SP or FP (adjusting how we calculate the frame
    address) or saves a register to the stack or another register.
@@ -2367,7 +2535,8 @@ void
 dwarf2out_frame_debug (rtx insn, bool after_p)
 {
   const char *label;
-  rtx src;
+  rtx note, n;
+  bool handled_one = false;
 
   if (insn == NULL_RTX)
     {
@@ -2412,15 +2581,158 @@ dwarf2out_frame_debug (rtx insn, bool af
     }
 
   label = dwarf2out_cfi_label ();
-  src = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
-  if (src)
-    insn = XEXP (src, 0);
-  else
-    insn = PATTERN (insn);
 
+  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
+    switch (REG_NOTE_KIND (note))
+      {
+      case REG_FRAME_RELATED_EXPR:
+	insn = XEXP (note, 0);
+	goto found;
+
+      case REG_CFA_DEF_CFA:
+	dwarf2out_frame_debug_def_cfa (XEXP (note, 0), label);
+	handled_one = true;
+	break;
+
+      case REG_CFA_ADJUST_CFA:
+	n = XEXP (note, 0);
+	if (n == NULL)
+	  {
+	    n = PATTERN (insn);
+	    if (GET_CODE (n) == PARALLEL)
+	      n = XVECEXP (n, 0, 0);
+	  }
+	dwarf2out_frame_debug_adjust_cfa (n, label);
+	handled_one = true;
+	break;
+
+      case REG_CFA_OFFSET:
+	n = XEXP (note, 0);
+	if (n == NULL)
+	  n = single_set (insn);
+	dwarf2out_frame_debug_cfa_offset (n, label);
+	handled_one = true;
+	break;
+
+      case REG_CFA_REGISTER:
+	n = XEXP (note, 0);
+	if (n == NULL)
+	  {
+	    n = PATTERN (insn);
+	    if (GET_CODE (n) == PARALLEL)
+	      n = XVECEXP (n, 0, 0);
+	  }
+	dwarf2out_frame_debug_cfa_register (n, label);
+	handled_one = true;
+	break;
+
+      case REG_CFA_RESTORE:
+	n = XEXP (note, 0);
+	if (n == NULL)
+	  {
+	    n = PATTERN (insn);
+	    if (GET_CODE (n) == PARALLEL)
+	      n = XVECEXP (n, 0, 0);
+	    n = XEXP (n, 0);
+	  }
+	dwarf2out_frame_debug_cfa_restore (n, label);
+	handled_one = true;
+	break;
+
+      default:
+	break;
+      }
+  if (handled_one)
+    return;
+
+  insn = PATTERN (insn);
+ found:
   dwarf2out_frame_debug_expr (insn, label);
 }
 
+/* Determine if we need to save and restore CFI information around this
+   epilogue.  If SIBCALL is true, then this is a sibcall epilogue.  If
+   we do need to save/restore, then emit the save now, and insert a
+   NOTE_INSN_CFA_RESTORE_STATE at the appropriate place in the stream.  */
+
+void
+dwarf2out_begin_epilogue (rtx insn)
+{
+  bool saw_frp = false;
+  rtx i;
+  dw_cfi_ref cfi;
+
+  /* Scan forward to the return insn, noticing if there are possible
+     frame related insns.  */
+  for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
+    {
+      if (!INSN_P (i))
+	continue;
+
+      /* Look for both regular and sibcalls to end the block.  */
+      if (returnjump_p (i))
+	break;
+      if (CALL_P (i) && SIBLING_CALL_P (i))
+	break;
+
+      if (RTX_FRAME_RELATED_P (i))
+	saw_frp = true;
+    }
+
+  /* If the port doesn't emit epilogue unwind info, we don't need a
+     save/restore pair.  */
+  if (!saw_frp)
+    return;
+
+  /* Otherwise, search forward to see if the return insn was the last
+     basic block of the function.  If so, we don't need save/restore.  */
+  gcc_assert (i != NULL);
+  i = next_real_insn (i);
+  if (i == NULL)
+    return;
+
+  /* Insert the restore before that next real insn in the stream, and before
+     a potential NOTE_INSN_EPILOGUE_BEG -- we do need these notes to be
+     properly nested.  This should be after any label or alignment.  This
+     will be pushed into the CFI stream by the function below.  */
+  while (1)
+    {
+      rtx p = PREV_INSN (i);
+      if (!NOTE_P (p))
+	break;
+      if (NOTE_KIND (p) == NOTE_INSN_BASIC_BLOCK)
+	break;
+      i = p;
+    }
+  emit_note_before (NOTE_INSN_CFA_RESTORE_STATE, i);
+
+  /* Emit the state save.  */
+  cfi = new_cfi (); 
+  cfi->dw_cfi_opc = DW_CFA_remember_state;
+  add_fde_cfi (dwarf2out_cfi_label (), cfi);
+
+  /* And emulate the state save.  */
+  gcc_assert (!cfa_remember.in_use);
+  cfa_remember = cfa;
+  cfa_remember.in_use = 1;
+}
+
+/* A "subroutine" of dwarf2out_begin_epilogue.  Emit the restore required.  */
+
+void
+dwarf2out_frame_debug_restore_state (void)
+{
+  dw_cfi_ref cfi = new_cfi (); 
+  const char *label = dwarf2out_cfi_label ();
+
+  cfi->dw_cfi_opc = DW_CFA_restore_state;
+  add_fde_cfi (label, cfi);
+
+  gcc_assert (cfa_remember.in_use);
+  cfa = cfa_remember;
+  cfa_remember.in_use = 0;
+}
+
 #endif
 
 /* Describe for the GTY machinery what parts of dw_cfi_oprnd1 are used.  */
@@ -2434,6 +2746,8 @@ dw_cfi_oprnd1_desc (enum dwarf_call_fram
     {
     case DW_CFA_nop:
     case DW_CFA_GNU_window_save:
+    case DW_CFA_remember_state:
+    case DW_CFA_restore_state:
       return dw_cfi_oprnd_unused;
 
     case DW_CFA_set_loc:
@@ -2448,6 +2762,7 @@ dw_cfi_oprnd1_desc (enum dwarf_call_fram
     case DW_CFA_def_cfa:
     case DW_CFA_offset_extended_sf:
     case DW_CFA_def_cfa_sf:
+    case DW_CFA_restore:
     case DW_CFA_restore_extended:
     case DW_CFA_undefined:
     case DW_CFA_same_value:
@@ -2771,6 +3086,13 @@ output_cfi_directive (dw_cfi_ref cfi)
 	       cfi->dw_cfi_oprnd1.dw_cfi_offset);
       break;
 
+    case DW_CFA_remember_state:
+      fprintf (asm_out_file, "\t.cfi_remember_state\n");
+      break;
+    case DW_CFA_restore_state:
+      fprintf (asm_out_file, "\t.cfi_restore_state\n");
+      break;
+
     case DW_CFA_GNU_args_size:
       fprintf (asm_out_file, "\t.cfi_escape 0x%x,", DW_CFA_GNU_args_size);
       dw2_asm_output_data_uleb128_raw (cfi->dw_cfi_oprnd1.dw_cfi_offset);
@@ -12056,6 +12378,7 @@ convert_cfa_to_fb_loc_list (HOST_WIDE_IN
   dw_cfi_ref cfi;
   dw_cfa_location last_cfa, next_cfa;
   const char *start_label, *last_label, *section;
+  dw_cfa_location remember;
 
   fde = current_fde ();
   gcc_assert (fde != NULL);
@@ -12064,17 +12387,16 @@ convert_cfa_to_fb_loc_list (HOST_WIDE_IN
   list_tail = &list;
   list = NULL;
 
+  memset (&next_cfa, 0, sizeof (next_cfa));
   next_cfa.reg = INVALID_REGNUM;
-  next_cfa.offset = 0;
-  next_cfa.indirect = 0;
-  next_cfa.base_offset = 0;
+  remember = next_cfa;
 
   start_label = fde->dw_fde_begin;
 
   /* ??? Bald assumption that the CIE opcode list does not contain
      advance opcodes.  */
   for (cfi = cie_cfi_head; cfi; cfi = cfi->dw_cfi_next)
-    lookup_cfa_1 (cfi, &next_cfa);
+    lookup_cfa_1 (cfi, &next_cfa, &remember);
 
   last_cfa = next_cfa;
   last_label = start_label;
@@ -12101,14 +12423,10 @@ convert_cfa_to_fb_loc_list (HOST_WIDE_IN
 
       case DW_CFA_advance_loc:
 	/* The encoding is complex enough that we should never emit this.  */
-      case DW_CFA_remember_state:
-      case DW_CFA_restore_state:
-	/* We don't handle these two in this function.  It would be possible
-	   if it were to be required.  */
 	gcc_unreachable ();
 
       default:
-	lookup_cfa_1 (cfi, &next_cfa);
+	lookup_cfa_1 (cfi, &next_cfa, &remember);
 	break;
       }
 
--- gcc/dwarf2out.h	(revision 148000)
+++ gcc/dwarf2out.h	(local)
@@ -20,6 +20,8 @@ along with GCC; see the file COPYING3.  
 
 extern void dwarf2out_decl (tree);
 extern void dwarf2out_frame_debug (rtx, bool);
+extern void dwarf2out_begin_epilogue (rtx);
+extern void dwarf2out_frame_debug_restore_state (void);
 
 extern void debug_dwarf (void);
 struct die_struct;
--- gcc/emit-rtl.c	(revision 148000)
+++ gcc/emit-rtl.c	(local)
@@ -3335,6 +3335,10 @@ try_split (rtx pat, rtx trial, int last)
   rtx insn_last, insn;
   int njumps = 0;
 
+  /* We're not good at redistributing frame information.  */
+  if (RTX_FRAME_RELATED_P (trial))
+    return trial;
+
   if (any_condjump_p (trial)
       && (note = find_reg_note (trial, REG_BR_PROB, 0)))
     split_branch_probability = INTVAL (XEXP (note, 0));
@@ -5050,6 +5054,9 @@ copy_insn_1 (rtx orig)
   RTX_CODE code;
   const char *format_ptr;
 
+  if (orig == NULL)
+    return NULL;
+
   code = GET_CODE (orig);
 
   switch (code)
--- gcc/final.c	(revision 148000)
+++ gcc/final.c	(local)
@@ -1879,9 +1879,19 @@ final_scan_insn (rtx insn, FILE *file, i
 	  break;
 
 	case NOTE_INSN_EPILOGUE_BEG:
+#if defined (DWARF2_UNWIND_INFO) && defined (HAVE_epilogue)
+	  if (dwarf2out_do_frame ())
+	    dwarf2out_begin_epilogue (insn);
+#endif
 	  targetm.asm_out.function_begin_epilogue (file);
 	  break;
 
+	case NOTE_INSN_CFA_RESTORE_STATE:
+#if defined (DWARF2_UNWIND_INFO)
+	  dwarf2out_frame_debug_restore_state ();
+#endif
+	  break;
+
 	case NOTE_INSN_FUNCTION_BEG:
 	  app_disable ();
 	  (*debug_hooks->end_prologue) (last_linenum, last_filename);
--- gcc/function.c	(revision 148000)
+++ gcc/function.c	(local)
@@ -124,13 +124,11 @@ struct machine_function * (*init_machine
 /* The currently compiled function.  */
 struct function *cfun = 0;
 
-/* These arrays record the INSN_UIDs of the prologue and epilogue insns.  */
-static VEC(int,heap) *prologue;
-static VEC(int,heap) *epilogue;
-
-/* Array of INSN_UIDs to hold the INSN_UIDs for each sibcall epilogue
-   in this function.  */
-static VEC(int,heap) *sibcall_epilogue;
+/* These hashes record the prologue and epilogue insns.  */
+static GTY((if_marked ("ggc_marked_p"), param_is (struct rtx_def)))
+  htab_t prologue_insn_hash;
+static GTY((if_marked ("ggc_marked_p"), param_is (struct rtx_def)))
+  htab_t epilogue_insn_hash;
 \f
 /* Forward declarations.  */
 
@@ -143,8 +141,8 @@ static tree *get_block_vector (tree, int
 extern tree debug_find_var_in_block_tree (tree, tree);
 /* We always define `record_insns' even if it's not used so that we
    can always export `prologue_epilogue_contains'.  */
-static void record_insns (rtx, VEC(int,heap) **) ATTRIBUTE_UNUSED;
-static int contains (const_rtx, VEC(int,heap) **);
+static void record_insns (rtx, rtx, htab_t *) ATTRIBUTE_UNUSED;
+static bool contains (const_rtx, htab_t);
 #ifdef HAVE_return
 static void emit_return_into_block (basic_block);
 #endif
@@ -207,9 +205,9 @@ free_after_parsing (struct function *f)
 void
 free_after_compilation (struct function *f)
 {
-  VEC_free (int, heap, prologue);
-  VEC_free (int, heap, epilogue);
-  VEC_free (int, heap, sibcall_epilogue);
+  prologue_insn_hash = NULL;
+  epilogue_insn_hash = NULL;
+
   if (crtl->emit.regno_pointer_align)
     free (crtl->emit.regno_pointer_align);
 
@@ -4196,18 +4194,11 @@ init_function_start (tree subr)
     warning (OPT_Waggregate_return, "function returns an aggregate");
 }
 
-/* Make sure all values used by the optimization passes have sane
-   defaults.  */
+/* Make sure all values used by the optimization passes have sane defaults.  */
 unsigned int
 init_function_for_compilation (void)
 {
   reg_renumber = 0;
-
-  /* No prologue/epilogue insns yet.  Make sure that these vectors are
-     empty.  */
-  gcc_assert (VEC_length (int, prologue) == 0);
-  gcc_assert (VEC_length (int, epilogue) == 0);
-  gcc_assert (VEC_length (int, sibcall_epilogue) == 0);
   return 0;
 }
 
@@ -4873,16 +4864,42 @@ get_arg_pointer_save_area (void)
   return ret;
 }
 \f
-/* Extend a vector that records the INSN_UIDs of INSNS
-   (a list of one or more insns).  */
+/* Add a list of INSNS to the hash HASHP, possibly allocating HASHP
+   for the first time.  */
 
 static void
-record_insns (rtx insns, VEC(int,heap) **vecp)
+record_insns (rtx insns, rtx end, htab_t *hashp)
 {
   rtx tmp;
+  htab_t hash = *hashp;
+
+  if (hash == NULL)
+    *hashp = hash
+      = htab_create_ggc (17, htab_hash_pointer, htab_eq_pointer, NULL);
 
-  for (tmp = insns; tmp != NULL_RTX; tmp = NEXT_INSN (tmp))
-    VEC_safe_push (int, heap, *vecp, INSN_UID (tmp));
+  for (tmp = insns; tmp != end; tmp = NEXT_INSN (tmp))
+    {
+      void **slot = htab_find_slot (hash, tmp, INSERT);
+      gcc_assert (*slot == NULL);
+      *slot = tmp;
+    }
+}
+
+/* INSN has been duplicated as COPY, as part of duping a basic block.
+   If INSN is an epilogue insn, then record COPY as epilogue as well.  */
+
+void
+maybe_copy_epilogue_insn (rtx insn, rtx copy)
+{
+  void **slot;
+
+  if (epilogue_insn_hash == NULL
+      || htab_find (epilogue_insn_hash, insn) == NULL)
+    return;
+
+  slot = htab_find_slot (epilogue_insn_hash, copy, INSERT);
+  gcc_assert (*slot == NULL);
+  *slot = copy;
 }
 
 /* Set the locator of the insn chain starting at INSN to LOC.  */
@@ -4897,52 +4914,37 @@ set_insn_locators (rtx insn, int loc)
     }
 }
 
-/* Determine how many INSN_UIDs in VEC are part of INSN.  Because we can
-   be running after reorg, SEQUENCE rtl is possible.  */
+/* Determine if any INSNs in HASH are, or are part of, INSN.  Because
+   we can be running after reorg, SEQUENCE rtl is possible.  */
 
-static int
-contains (const_rtx insn, VEC(int,heap) **vec)
+static bool
+contains (const_rtx insn, htab_t hash)
 {
-  int i, j;
+  if (hash == NULL)
+    return false;
 
-  if (NONJUMP_INSN_P (insn)
-      && GET_CODE (PATTERN (insn)) == SEQUENCE)
+  if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
     {
-      int count = 0;
+      int i;
       for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--)
-	for (j = VEC_length (int, *vec) - 1; j >= 0; --j)
-	  if (INSN_UID (XVECEXP (PATTERN (insn), 0, i))
-	      == VEC_index (int, *vec, j))
-	    count++;
-      return count;
+	if (htab_find (hash, XVECEXP (PATTERN (insn), 0, i)))
+	  return true;
+      return false;
     }
-  else
-    {
-      for (j = VEC_length (int, *vec) - 1; j >= 0; --j)
-	if (INSN_UID (insn) == VEC_index (int, *vec, j))
-	  return 1;
-    }
-  return 0;
+
+  return htab_find (hash, insn) != NULL;
 }
 
 int
 prologue_epilogue_contains (const_rtx insn)
 {
-  if (contains (insn, &prologue))
+  if (contains (insn, prologue_insn_hash))
     return 1;
-  if (contains (insn, &epilogue))
+  if (contains (insn, epilogue_insn_hash))
     return 1;
   return 0;
 }
 
-int
-sibcall_epilogue_contains (const_rtx insn)
-{
-  if (sibcall_epilogue)
-    return contains (insn, &sibcall_epilogue);
-  return 0;
-}
-
 #ifdef HAVE_return
 /* Insert gen_return at the end of block BB.  This also means updating
    block_for_insn appropriately.  */
@@ -4985,7 +4987,7 @@ thread_prologue_and_epilogue_insns (void
 	emit_use (hard_frame_pointer_rtx);
 
       /* Retain a map of the prologue insns.  */
-      record_insns (seq, &prologue);
+      record_insns (seq, NULL, &prologue_insn_hash);
       emit_note (NOTE_INSN_PROLOGUE_END);
  
 #ifndef PROFILE_BEFORE_PROLOGUE
@@ -5117,6 +5119,38 @@ thread_prologue_and_epilogue_insns (void
 	}
     }
 #endif
+
+  /* A small fib -- epilogue is not yet completed, but we wish to re-use
+     this marker for the splits of EH_RETURN patterns, and nothing else
+     uses the flag in the meantime.  */
+  epilogue_completed = 1;
+
+#ifdef HAVE_eh_return
+  /* Find non-fallthru edges that end with EH_RETURN instructions.  On
+     some targets, these get split to a special version of the epilogue
+     code.  In order to be able to properly annotate these with unwind
+     info, try to split them now.  If we get a valid split, drop an
+     EPILOGUE_BEG note and mark the insns as epilogue insns.  */
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+    {
+      rtx prev, last, trial;
+
+      if (e->flags & EDGE_FALLTHRU)
+	continue;
+      last = BB_END (e->src);
+      if (!eh_returnjump_p (last))
+	continue;
+
+      prev = PREV_INSN (last);
+      trial = try_split (PATTERN (last), last, 1);
+      if (trial == last)
+	continue;
+
+      record_insns (NEXT_INSN (prev), NEXT_INSN (trial), &epilogue_insn_hash);
+      emit_note_after (NOTE_INSN_EPILOGUE_BEG, prev);
+    }
+#endif
+
   /* Find the edge that falls through to EXIT.  Other edges may exist
      due to RETURN instructions, but those don't need epilogues.
      There really shouldn't be a mixture -- either all should have
@@ -5137,7 +5171,7 @@ thread_prologue_and_epilogue_insns (void
       emit_jump_insn (seq);
 
       /* Retain a map of the epilogue insns.  */
-      record_insns (seq, &epilogue);
+      record_insns (seq, NULL, &epilogue_insn_hash);
       set_insn_locators (seq, epilogue_locator);
 
       seq = get_insns ();
@@ -5199,6 +5233,7 @@ epilogue_done:
 	}
 
       start_sequence ();
+      emit_note (NOTE_INSN_EPILOGUE_BEG);
       emit_insn (gen_sibcall_epilogue ());
       seq = get_insns ();
       end_sequence ();
@@ -5206,7 +5241,7 @@ epilogue_done:
       /* Retain a map of the epilogue insns.  Used in life analysis to
 	 avoid getting rid of sibcall epilogue insns.  Do this before we
 	 actually emit the sequence.  */
-      record_insns (seq, &sibcall_epilogue);
+      record_insns (seq, NULL, &epilogue_insn_hash);
       set_insn_locators (seq, epilogue_locator);
 
       emit_insn_before (seq, insn);
@@ -5240,23 +5275,29 @@ epilogue_done:
   df_update_entry_exit_and_calls ();
 }
 
-/* Reposition the prologue-end and epilogue-begin notes after instruction
-   scheduling and delayed branch scheduling.  */
+/* Reposition the prologue-end and epilogue-begin notes after
+   instruction scheduling.  */
 
 void
 reposition_prologue_and_epilogue_notes (void)
 {
-#if defined (HAVE_prologue) || defined (HAVE_epilogue)
+#if defined (HAVE_prologue) || defined (HAVE_epilogue) \
+    || defined (HAVE_sibcall_epilogue)
   rtx insn, last, note;
-  int len;
+  basic_block bb;
 
-  if ((len = VEC_length (int, prologue)) > 0)
+  /* Since the hash table is created on demand, the fact that it is
+     non-null is a signal that it is non-empty.  */
+  if (prologue_insn_hash != NULL)
     {
+      size_t len = htab_elements (prologue_insn_hash);
       last = 0, note = 0;
 
-      /* Scan from the beginning until we reach the last prologue insn.
-	 We apparently can't depend on basic_block_{head,end} after
-	 reorg has run.  */
+      /* Scan from the beginning until we reach the last prologue insn.  */
+      /* ??? While we do have the CFG intact, there are two problems:
+	 (1) The prologue can contain loops (typically probing the stack),
+	     which means that the end of the prologue isn't in the first bb.
+	 (2) Sometimes the PROLOGUE_END note gets pushed into the next bb.  */
       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
 	{
 	  if (NOTE_P (insn))
@@ -5264,7 +5305,7 @@ reposition_prologue_and_epilogue_notes (
 	      if (NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)
 		note = insn;
 	    }
-	  else if (contains (insn, &prologue))
+	  else if (contains (insn, prologue_insn_hash))
 	    {
 	      last = insn;
 	      if (--len == 0)
@@ -5274,14 +5315,17 @@ reposition_prologue_and_epilogue_notes (
 
       if (last)
 	{
-	  /* Find the prologue-end note if we haven't already, and
-	     move it to just after the last prologue insn.  */
-	  if (note == 0)
+	  if (note == NULL)
 	    {
-	      for (note = last; (note = NEXT_INSN (note));)
-		if (NOTE_P (note)
-		    && NOTE_KIND (note) == NOTE_INSN_PROLOGUE_END)
-		  break;
+	      /* Scan forward looking for the PROLOGUE_END note.  It should
+		 be right at the beginning of the block, possibly with other
+		 insn notes that got moved there.  */
+	      for (note = NEXT_INSN (last); ; note = NEXT_INSN (note))
+		{
+		  if (NOTE_P (note)
+		      && NOTE_KIND (note) == NOTE_INSN_PROLOGUE_END)
+		    break;
+		}
 	    }
 
 	  /* Avoid placing note between CODE_LABEL and BASIC_BLOCK note.  */
@@ -5291,41 +5335,39 @@ reposition_prologue_and_epilogue_notes (
 	}
     }
 
-  if ((len = VEC_length (int, epilogue)) > 0)
+  if (epilogue_insn_hash != NULL)
     {
-      last = 0, note = 0;
+      edge_iterator ei;
+      edge e;
 
-      /* Scan from the end until we reach the first epilogue insn.
-	 We apparently can't depend on basic_block_{head,end} after
-	 reorg has run.  */
-      for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
+      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
 	{
-	  if (NOTE_P (insn))
-	    {
-	      if (NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
-		note = insn;
-	    }
-	  else if (contains (insn, &epilogue))
-	    {
-	      last = insn;
-	      if (--len == 0)
-		break;
-	    }
-	}
+	  last = 0, note = 0;
+	  bb = e->src;
 
-      if (last)
-	{
-	  /* Find the epilogue-begin note if we haven't already, and
-	     move it to just before the first epilogue insn.  */
-	  if (note == 0)
+	  /* Scan from the beginning until we reach the first epilogue insn.
+	     Take the cue for whether this is a plain or sibcall epilogue
+	     from the kind of note we find first.  */
+	  FOR_BB_INSNS (bb, insn)
 	    {
-	      for (note = insn; (note = PREV_INSN (note));)
-		if (NOTE_P (note)
-		    && NOTE_KIND (note) == NOTE_INSN_EPILOGUE_BEG)
-		  break;
+	      if (NOTE_P (insn))
+		{
+		  if (NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
+		    {
+		      note = insn;
+		      if (last)
+			break;
+		    }
+		}
+	      else if (contains (insn, epilogue_insn_hash))
+		{
+		  last = insn;
+		  if (note != NULL)
+		    break;
+		}
 	    }
-
-	  if (PREV_INSN (last) != note)
+	     
+	  if (last && note && PREV_INSN (last) != note)
 	    reorder_insns (note, note, PREV_INSN (last));
 	}
     }
--- gcc/insn-notes.def	(revision 148000)
+++ gcc/insn-notes.def	(local)
@@ -70,4 +70,8 @@ INSN_NOTE (BASIC_BLOCK)
    between hot and cold text sections.  */
 INSN_NOTE (SWITCH_TEXT_SECTIONS)
 
+/* Mark the restore point after an epilogue changed CFI data.  Used only
+   when an epilogue appears in the middle of a function.  */
+INSN_NOTE (CFA_RESTORE_STATE)
+
 #undef INSN_NOTE
--- gcc/jump.c	(revision 148000)
+++ gcc/jump.c	(local)
@@ -869,8 +869,21 @@ returnjump_p_1 (rtx *loc, void *data ATT
 {
   rtx x = *loc;
 
-  return x && (GET_CODE (x) == RETURN
-	       || (GET_CODE (x) == SET && SET_IS_RETURN_P (x)));
+  if (x == NULL)
+    return false;
+
+  switch (GET_CODE (x))
+    {
+    case RETURN:
+    case EH_RETURN:
+      return true;
+
+    case SET:
+      return SET_IS_RETURN_P (x);
+
+    default:
+      return false;
+    }
 }
 
 int
@@ -881,6 +894,22 @@ returnjump_p (rtx insn)
   return for_each_rtx (&PATTERN (insn), returnjump_p_1, NULL);
 }
 
+/* Return true if INSN is a (possibly conditional) return insn.  */
+
+static int
+eh_returnjump_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
+{
+  return *loc && GET_CODE (*loc) == EH_RETURN;
+}
+
+int
+eh_returnjump_p (rtx insn)
+{
+  if (!JUMP_P (insn))
+    return 0;
+  return for_each_rtx (&PATTERN (insn), eh_returnjump_p_1, NULL);
+}
+
 /* Return true if INSN is a jump that only transfers control and
    nothing more.  */
 
--- gcc/reg-notes.def	(revision 148000)
+++ gcc/reg-notes.def	(local)
@@ -118,6 +118,41 @@ REG_NOTE (BR_PRED)
    instead of intuition.  */
 REG_NOTE (FRAME_RELATED_EXPR)
 
+/* Attached to insns that are RTX_FRAME_RELATED_P, but are too complex
+   for FRAME_RELATED_EXPR intuition.  The insn's first pattern must be
+   a SET, and the destination must be the CFA register.  The attached
+   rtx is an expression that defines the CFA.  In the simplest case, the
+   rtx could be just the stack_pointer_rtx; more common would be a PLUS
+   with a base register and a constant offset.  In the most complicated
+   cases, this will result in a DW_CFA_def_cfa_expression with the rtx
+   expression rendered in a dwarf location expression.  */
+REG_NOTE (CFA_DEF_CFA)
+
+/* Attached to insns that are RTX_FRAME_RELATED_P, but are too complex
+   for FRAME_RELATED_EXPR intuition.  This note adjusts the expression
+   from which the CFA is computed.  The attached rtx defines a new CFA
+   expression, relative to the old CFA expression.  This rtx must be of
+   the form (SET new-cfa-reg (PLUS old-cfa-reg const_int)).  If the note
+   rtx is NULL, we use the first SET of the insn.  */
+REG_NOTE (CFA_ADJUST_CFA)
+
+/* Similar to FRAME_RELATED_EXPR, with the additional information that
+   this is a save to memory, i.e. will result in DW_CFA_offset or the
+   like.  The pattern or the insn should be a simple store relative to
+   the CFA.  */
+REG_NOTE (CFA_OFFSET)
+
+/* Similar to FRAME_RELATED_EXPR, with the additional information that this
+   is a save to a register, i.e. will result in DW_CFA_register.  The insn
+   or the pattern should be simple reg-reg move.  */
+REG_NOTE (CFA_REGISTER)
+
+/* Attached to insns that are RTX_FRAME_RELATED_P, with the information
+   that this is a restore operation, i.e. will result in DW_CFA_restore
+   or the like.  Either the attached rtx, or the destination of the insn's
+   first pattern is the register to be restored.  */
+REG_NOTE (CFA_RESTORE)
+
 /* Indicates that REG holds the exception context for the function.
    This context is shared by inline functions, so the code to acquire
    the real exception context is delayed until after inlining.  */
--- gcc/rtl.def	(revision 148000)
+++ gcc/rtl.def	(local)
@@ -281,6 +281,10 @@ DEF_RTL_EXPR(CALL, "call", "ee", RTX_EXT
 
 DEF_RTL_EXPR(RETURN, "return", "", RTX_EXTRA)
 
+/* Special for EH return from subroutine.  */
+
+DEF_RTL_EXPR(EH_RETURN, "eh_return", "", RTX_EXTRA)
+
 /* Conditional trap.
    Operand 1 is the condition.
    Operand 2 is the trap code.
--- gcc/rtl.h	(revision 148000)
+++ gcc/rtl.h	(local)
@@ -2086,6 +2086,7 @@ extern rtx pc_set (const_rtx);
 extern rtx condjump_label (const_rtx);
 extern int simplejump_p (const_rtx);
 extern int returnjump_p (rtx);
+extern int eh_returnjump_p (rtx);
 extern int onlyjump_p (const_rtx);
 extern int only_sets_cc0_p (const_rtx);
 extern int sets_cc0_p (const_rtx);
@@ -2198,6 +2199,7 @@ extern int prologue_epilogue_contains (c
 extern int sibcall_epilogue_contains (const_rtx);
 extern void mark_temp_addr_taken (rtx);
 extern void update_temp_slot_address (rtx, rtx);
+extern void maybe_copy_epilogue_insn (rtx, rtx);
 
 /* In stmt.c */
 extern void expand_null_return (void);

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-05-20  0:49 ` Ian Lance Taylor
@ 2009-05-30  1:02   ` Richard Henderson
  2009-05-30 14:37     ` Jan Hubicka
  0 siblings, 1 reply; 26+ messages in thread
From: Richard Henderson @ 2009-05-30  1:02 UTC (permalink / raw)
  To: Ian Lance Taylor; +Cc: gcc-patches

Ian Lance Taylor wrote:
>  This approach seems clearly superior.  I wonder if you can provide some
> simple functions for the backends to call to add notes and such.

Possibly.  At the moment I can't really think of any, or at least
anything more complicated than the existing add_reg_note.  I would
expect these to become more apparent as ports are converted.

 > One could also imagine a more complex function which simply walked the
> prologue and did something like what the current dwarf2out.c code does,
> except adding reg notes.  That might make it relatively easy to
> transition the backends to the new scheme.

This same thought occurred to me.  Combine that with tracking the CFA
state across basic block boundaries, and markers for the CFA state to
be placed at hot/cold section boundaries and we'll fix two outstanding
bugs with unwind code.

Now if only someone would rewrite delayed-branch opt to not hork the
CFG, and the aforementioned pass will be easy...


r~

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-05-30  0:49   ` Richard Henderson
@ 2009-05-30 12:24     ` Jakub Jelinek
  2009-05-30 21:24       ` Richard Henderson
  2009-05-30 13:26     ` H.J. Lu
  2009-06-04 20:38     ` Laurent GUERBY
  2 siblings, 1 reply; 26+ messages in thread
From: Jakub Jelinek @ 2009-05-30 12:24 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches

On Fri, May 29, 2009 at 05:42:12PM -0700, Richard Henderson wrote:
> The other large change from the previous patch is the ability to have
> the eh_return epilogue from _Unwind_Resume (and friends) marked
> properly.  This required the addition of an EH_RETURN rtx, so that
> the middle-end could recognize when epilogue expansion should happen,
> rather than the add-hoc unspecs that ports had been using.  As it
> happens, only i386 and bfin implement eh_return via special epilogues;
> most ports only need to overwrite one or more registers before using
> a normal epilogue.
>
> Tested on x86_64, i686; committed.

Thanks.  Just a nit.
For (-O2 -fasynchronous-unwind-tables):
void bar (void);
void bar2 (int, int, int, int);
void bar3 (int, int, int, int, int, int, int);
long foo (int x, int y)
{
  long a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p;
  asm volatile ("" : "=rm" (a), "=rm" (b), "=rm" (c), "=rm" (d), "=rm" (e),
		     "=rm" (f), "=rm" (g), "=rm" (h), "=rm" (i), "=rm" (j),
		     "=rm" (k), "=rm" (l), "=rm" (m), "=rm" (n), "=rm" (o),
		     "=rm" (p));
  bar ();
  bar2 (0, 1, 2, 3);
  bar ();
  bar3 (0, 1, 2, 3, 4, 5, 6);
  bar ();
  bar2 (0, 1, 2, 3);
  return a + b + c + d + e + f + g + h + i + j + k + l + m + n + o + p;
}
On x86_64 we get:
        movq    112(%rsp), %rbp
        .cfi_restore 6
        movq    120(%rsp), %r12
        .cfi_restore 12
        addq    %rbx, %rax
        movq    104(%rsp), %rbx
        .cfi_restore 3
        addq    %r13, %rax
        movq    128(%rsp), %r13
        .cfi_restore 13
        addq    %r14, %rax
        movq    136(%rsp), %r14
        .cfi_restore 14
        addq    %r15, %rax
        addq    16(%rsp), %rax
        movq    144(%rsp), %r15
        .cfi_restore 15
        addq    24(%rsp), %rax
        addq    32(%rsp), %rax
        addq    40(%rsp), %rax
        addq    48(%rsp), %rax
        addq    56(%rsp), %rax
        addq    64(%rsp), %rax
        addq    72(%rsp), %rax
        addq    80(%rsp), %rax
        addq    88(%rsp), %rax
        addq    $152, %rsp
        .cfi_def_cfa_offset 8
        ret
        .cfi_endproc
Couldn't we avoid the .cfi_restore directives altogether on x86_64
in this case?
If the target has red-zone and all saved registers are within the
red-zone after stack is adjusted up, the unwinders can IMHO use the stack
slots just as well as registers.  If the registers weren't saved within
the red-zone or the target doesn't have any (such as i386):
        movl    -32(%ebp), %eax
        addl    -28(%ebp), %eax
        addl    -36(%ebp), %eax
        addl    %edi, %eax
        movl    -4(%ebp), %edi
        .cfi_restore 7
        addl    %esi, %eax
        movl    -8(%ebp), %esi
        .cfi_restore 6
        addl    %ebx, %eax
        movl    -12(%ebp), %ebx
        .cfi_restore 3
        addl    -40(%ebp), %eax
        addl    -44(%ebp), %eax
        addl    -48(%ebp), %eax
        addl    -52(%ebp), %eax
        addl    -56(%ebp), %eax
        addl    -60(%ebp), %eax
        addl    -64(%ebp), %eax
        addl    -68(%ebp), %eax
        addl    -72(%ebp), %eax
        addl    -76(%ebp), %eax
        movl    %ebp, %esp
        .cfi_def_cfa_register 4
        popl    %ebp
        .cfi_restore 5
        .cfi_def_cfa_offset 4
        ret
        .cfi_endproc
then can't the .cfi_restore directives be just moved down to the
movl %ebp, %esp instruction (the stack slots still contain the saved
register content until movl %ebp, %esp is executed)?  This would save at
least a couple of DW_CFA_advance_loc* opcodes.

	Jakub

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-05-30  0:49   ` Richard Henderson
  2009-05-30 12:24     ` Jakub Jelinek
@ 2009-05-30 13:26     ` H.J. Lu
  2009-05-30 20:08       ` [PATCH] Fix i?86 eh regressions (PR middle-end/40304) Jakub Jelinek
  2009-06-29 23:25       ` unwind info for epilogues H.J. Lu
  2009-06-04 20:38     ` Laurent GUERBY
  2 siblings, 2 replies; 26+ messages in thread
From: H.J. Lu @ 2009-05-30 13:26 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches, Joseph S. Myers

On Fri, May 29, 2009 at 5:42 PM, Richard Henderson <rth@redhat.com> wrote:
> Joseph S. Myers wrote:
>>
>> On Wed, 6 May 2009, Richard Henderson wrote:
>>
>>> Since the beginning, -fasynchronous-unwind-tables has not held correct
>>> information for function epilogues.  This is an attempt at adding that.
>>
>> See also Nathan Froyd's patch (doing this for x86_64) described in his
>> 2006 Summit paper.  I haven't compared the approaches (but fully support
>> adding this feature).
>>
>> http://gcc.gnu.org/ml/gcc-patches/2006-03/msg00426.html
>> http://gcc.gnu.org/ml/gcc-patches/2006-02/msg01091.html
>>
>
> Thanks for the pointers.  I did incorporate a few of Nathan's ideas into
> this version of the patch -- primarily, when duplicating the epilogue,
> make sure the duplicated insns are also recorded as epilogue insns.
>
> The other large change from the previous patch is the ability to have
> the eh_return epilogue from _Unwind_Resume (and friends) marked
> properly.  This required the addition of an EH_RETURN rtx, so that
> the middle-end could recognize when epilogue expansion should happen,
> rather than the add-hoc unspecs that ports had been using.  As it
> happens, only i386 and bfin implement eh_return via special epilogues;
> most ports only need to overwrite one or more registers before using
> a normal epilogue.
>
> Tested on x86_64, i686; committed.
>
> r~
>
>        * cfgcleanup.c (try_crossjump_to_edge): Only skip past
>        NOTE_INSN_BASIC_BLOCK.
>        * cfglayout.c (duplicate_insn_chain): Copy epilogue insn marks.
>        Duplicate NOTE_INSN_EPILOGUE_BEG notes.
>        * cfgrtl.c (can_delete_note_p): Allow NOTE_INSN_EPILOGUE_BEG
>        to be deleted.
>        * dwarf2out.c (struct cfa_loc): Change indirect field to bitfield,
>        add in_use field.
>        (add_cfi): Disable check redefining cfa away from drap.
>        (lookup_cfa_1): Add remember argument; handle remember/restore.
>        (lookup_cfa): Pass remember argument.
>        (cfa_remember): New.
>        (compute_barrier_args_size_1): Remove sibcall check.
>        (dwarf2out_frame_debug_def_cfa): New.
>        (dwarf2out_frame_debug_adjust_cfa): New.
>        (dwarf2out_frame_debug_cfa_offset): New.
>        (dwarf2out_frame_debug_cfa_register): New.
>        (dwarf2out_frame_debug_cfa_restore): New.
>        (dwarf2out_frame_debug): Handle REG_CFA_* notes.
>        (dwarf2out_begin_epilogue): New.
>        (dwarf2out_frame_debug_restore_state): New.
>        (dw_cfi_oprnd1_desc): Handle DW_CFA_remember_state,
>        DW_CFA_restore_state.
>        (output_cfi_directive): Likewise.
>        (convert_cfa_to_fb_loc_list): Likewise.
>        (dw_cfi_oprnd1_desc): Handle DW_CFA_restore.
>        * dwarf2out.h: Update.
>        * emit-rtl.c (try_split): Don't split RTX_FRAME_RELATED_P.
>        (copy_insn_1): Early out for null.
>        * final.c (final_scan_insn): Call dwarf2out_begin_epilogue
>        and dwarf2out_frame_debug_restore_state.
>        * function.c (prologue, epilogue, sibcall_epilogue): Remove.
>        (prologue_insn_hash, epilogue_insn_hash): New.
>        (free_after_compilation): Adjust freeing accordingly.
>        (record_insns): Create hash table if needed; push insns into
>        hash instead of array.
>        (maybe_copy_epilogue_insn): New.
>        (contains): Search hash table instead of array.
>        (sibcall_epilogue_contains): Remove.
>        (thread_prologue_and_epilogue_insns): Split eh_return insns
>        and mark them as epilogues.
>        (reposition_prologue_and_epilogue_notes): Rewrite epilogue
>        scanning in terms of basic blocks.
>        * insn-notes.def (CFA_RESTORE_STATE): New.
>        * jump.c (returnjump_p_1): Accept EH_RETURN.
>        (eh_returnjump_p_1, eh_returnjump_p): New.
>        * reg-notes.def (CFA_DEF_CFA, CFA_ADJUST_CFA, CFA_OFFSET,
>        CFA_REGISTER, CFA_RESTORE): New.
>        * rtl.def (EH_RETURN): New.
>        * rtl.h (eh_returnjump_p, maybe_copy_epilogue_insn): Declare.
>
>        * config/bfin/bfin.md (UNSPEC_VOLATILE_EH_RETURN): Remove.
>        (eh_return_internal): Use eh_return rtx; split w/ epilogue.
>
>        * config/i386/i386.c (gen_push): Update cfa state.
>        (pro_epilogue_adjust_stack): Add set_cfa argument.  When true,
>        add a CFA_ADJUST_CFA note.
>        (ix86_dwarf_handle_frame_unspec): Remove.
>        (ix86_expand_prologue): Update cfa state.
>        (ix86_emit_restore_reg_using_pop): New.
>        (ix86_emit_restore_regs_using_pop): New.
>        (ix86_emit_leave): New.
>        (ix86_emit_restore_regs_using_mov): Add CFA_RESTORE notes.
>        (ix86_expand_epilogue): Add notes for unwinding the epilogue.
>        * config/i386/i386.h (struct machine_cfa_state): New.
>        (ix86_cfa_state): New.
>        * config/i386/i386.md (UNSPEC_EH_RETURN): Remove.
>        (eh_return_internal): Merge from eh_return_<mode>,
>        use eh_return rtx, split w/ epilogue.
>

This breaks stack unwind on Linux/ia32:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40304


-- 
H.J.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-05-30  1:02   ` Richard Henderson
@ 2009-05-30 14:37     ` Jan Hubicka
  2009-05-30 20:17       ` Eric Botcazou
  0 siblings, 1 reply; 26+ messages in thread
From: Jan Hubicka @ 2009-05-30 14:37 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Ian Lance Taylor, gcc-patches

> Ian Lance Taylor wrote:
> > This approach seems clearly superior.  I wonder if you can provide some
> >simple functions for the backends to call to add notes and such.
> 
> Possibly.  At the moment I can't really think of any, or at least
> anything more complicated than the existing add_reg_note.  I would
> expect these to become more apparent as ports are converted.
> 
> > One could also imagine a more complex function which simply walked the
> >prologue and did something like what the current dwarf2out.c code does,
> >except adding reg notes.  That might make it relatively easy to
> >transition the backends to the new scheme.
> 
> This same thought occurred to me.  Combine that with tracking the CFA
> state across basic block boundaries, and markers for the CFA state to
> be placed at hot/cold section boundaries and we'll fix two outstanding
> bugs with unwind code.

Needless to say that we need to add support for multiple entry points :)
> 
> Now if only someone would rewrite delayed-branch opt to not hork the
> CFG, and the aforementioned pass will be easy...

Sadly reorg.c was always black box for me. How many active targets use
it?

Honza
> 
> 
> r~

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH] Fix i?86 eh regressions (PR middle-end/40304)
  2009-05-30 13:26     ` H.J. Lu
@ 2009-05-30 20:08       ` Jakub Jelinek
  2009-05-31  0:48         ` Richard Henderson
  2009-06-29 23:25       ` unwind info for epilogues H.J. Lu
  1 sibling, 1 reply; 26+ messages in thread
From: Jakub Jelinek @ 2009-05-30 20:08 UTC (permalink / raw)
  To: Richard Henderson, H.J. Lu; +Cc: gcc-patches

On Sat, May 30, 2009 at 06:21:24AM -0700, H.J. Lu wrote:
> This breaks stack unwind on Linux/ia32:
> 
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40304

The C++ regressions can be fixed by making sure the prologue
stack adjustment is again marked frame related.
The difference e.g. on async-unwind1.C between mainline before Richard's
changes and current trunk with this patch is just in a couple of additional
.cfi_* directives, while unpatched current trunk emits worse code
(csa pass doesn't merge insns that it should) and also DW_CFA_GNU_args_size
ops are wrong, because they shouldn't be changing on prologue insns, but
as the prologue stack adjust isn't marked frame related, they are off by 8.

lea.c regression seems to be unrelated, seems the presence of a frame
related insn later in the bb confuses following peepholing in the same bb.
peephole2_optimize does:
              if (RTX_FRAME_RELATED_P (insn))
                {
                  /* If an insn has RTX_FRAME_RELATED_P set, peephole
                     substitution would lose the
                     REG_FRAME_RELATED_EXPR that is attached.  */
                  peep2_current_count = 0;
                  attempt = NULL;
                }
and the clearing of peep2_current_count apparently affects even peepholing
of 2 insns earlier in the same bb.  I'm not familiar enough with peephole2
pass to understand why yet.

2009-05-30  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/40304
	* config/i386/i386.c (pro_epilogue_adjust_stack): Mark insns
	frame related even if !set_cfa && style < 0.

--- gcc/config/i386/i386.c.jj	2009-05-30 10:13:01.000000000 +0200
+++ gcc/config/i386/i386.c	2009-05-30 20:43:15.000000000 +0200
@@ -8026,6 +8026,8 @@ pro_epilogue_adjust_stack (rtx dest, rtx
       gcc_assert (style);
       r11 = gen_rtx_REG (DImode, R11_REG);
       insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
+      if (style < 0)
+	RTX_FRAME_RELATED_P (insn) = 1;
       insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
 							       offset));
     }
@@ -8043,6 +8045,8 @@ pro_epilogue_adjust_stack (rtx dest, rtx
       add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
       RTX_FRAME_RELATED_P (insn) = 1;
     }
+  else if (style < 0)
+    RTX_FRAME_RELATED_P (insn) = 1;
 }
 
 /* Find an available register to be used as dynamic realign argument


	Jakub

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-05-30 14:37     ` Jan Hubicka
@ 2009-05-30 20:17       ` Eric Botcazou
  0 siblings, 0 replies; 26+ messages in thread
From: Eric Botcazou @ 2009-05-30 20:17 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: gcc-patches, Richard Henderson, Ian Lance Taylor

> Sadly reorg.c was always black box for me. How many active targets use
> it?

MIPS, PA and SPARC are the mainstream ones.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-05-30 12:24     ` Jakub Jelinek
@ 2009-05-30 21:24       ` Richard Henderson
  2009-06-01 17:10         ` Jakub Jelinek
  0 siblings, 1 reply; 26+ messages in thread
From: Richard Henderson @ 2009-05-30 21:24 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

Jakub Jelinek wrote:
> Couldn't we avoid the .cfi_restore directives altogether on x86_64
> in this case?

Yes, we could.

> ... then can't the .cfi_restore directives be just moved down to the
> movl %ebp, %esp instruction (the stack slots still contain the saved
> register content until movl %ebp, %esp is executed)?  This would save at
> least a couple of DW_CFA_advance_loc* opcodes.

Yes, this is also possible.  Indeed, the two optimizations are related.
I thought about the later of these two briefly, but then put it off for
future work.

I havn't thought about all the possible ways this can be optimized, or
what all the constraints are.  I've an idea that we might could attack
this with an unwind generation and optimization pass, as mentioned by
Ian Taylor elsewhere in this thread.


r~

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Fix i?86 eh regressions (PR middle-end/40304)
  2009-05-30 20:08       ` [PATCH] Fix i?86 eh regressions (PR middle-end/40304) Jakub Jelinek
@ 2009-05-31  0:48         ` Richard Henderson
  2009-05-31 10:52           ` Jakub Jelinek
  0 siblings, 1 reply; 26+ messages in thread
From: Richard Henderson @ 2009-05-31  0:48 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: H.J. Lu, gcc-patches

Jakub Jelinek wrote:
> lea.c regression seems to be unrelated, seems the presence of a frame
> related insn later in the bb confuses following peepholing in the same bb.

I havn't looked (as i don't remember seeing that error), but that
may be due to

         * emit-rtl.c (try_split): Don't split RTX_FRAME_RELATED_P.

which I put in because try_split was throwing away my notes.  I
suppose I could work harder in that function to preserve them
somehow...

I'll look at this more Monday.


> 2009-05-30  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR middle-end/40304
> 	* config/i386/i386.c (pro_epilogue_adjust_stack): Mark insns
> 	frame related even if !set_cfa && style < 0.

Ok.  Thanks for fixing this one.


r~

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Fix i?86 eh regressions (PR middle-end/40304)
  2009-05-31  0:48         ` Richard Henderson
@ 2009-05-31 10:52           ` Jakub Jelinek
  2009-05-31 14:11             ` H.J. Lu
  2009-06-01  4:33             ` Ian Lance Taylor
  0 siblings, 2 replies; 26+ messages in thread
From: Jakub Jelinek @ 2009-05-31 10:52 UTC (permalink / raw)
  To: Richard Henderson; +Cc: H.J. Lu, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1256 bytes --]

On Sat, May 30, 2009 at 02:24:42PM -0700, Richard Henderson wrote:
> Jakub Jelinek wrote:
>> lea.c regression seems to be unrelated, seems the presence of a frame
>> related insn later in the bb confuses following peepholing in the same bb.
>
> I havn't looked (as i don't remember seeing that error), but that
> may be due to
>
>         * emit-rtl.c (try_split): Don't split RTX_FRAME_RELATED_P.
>
> which I put in because try_split was throwing away my notes.  I
> suppose I could work harder in that function to preserve them
> somehow...
>
> I'll look at this more Monday.

The following patch seems to fix the peephole2 pass, seems just clearing
peep2_current_count is insufficient, I've tried to copy what is initialized
at the end of each basic block, bootstrapped/regtested on x86_64-linux and
i686-linux.

The second patch is needed to fix bootstrap with Ada,
apparently some of ada/rts/ files include tconfig.h and thus i386.h, without
including hwint.h, but it defines neither IN_LIBGCC2, nor IN_TARGET_LIBS,
but IN_RTS.  Instead of listing each such IN_* macro it seems better to test
for USED_FOR_TARGET (I've verified that whenever i386.h was included with
IN_LIBGCC2 or IN_TARGET_LIBS defined, USED_FOR_TARGET was defined as well).

	Jakub

[-- Attachment #2: Y320 --]
[-- Type: text/plain, Size: 879 bytes --]

2009-05-31  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/40304
	* recog.c (peephole2_optimize): When seeing a RTX_FRAME_RELATED_P
	insn, reinitialize peep2_insn_data and peep2_current in addition
	to clearing peep2_current_count.

--- gcc/recog.c.jj	2009-05-19 10:51:33.000000000 +0200
+++ gcc/recog.c	2009-05-30 21:23:44.000000000 +0200
@@ -3118,7 +3118,13 @@ peephole2_optimize (void)
 		  /* If an insn has RTX_FRAME_RELATED_P set, peephole
 		     substitution would lose the
 		     REG_FRAME_RELATED_EXPR that is attached.  */
+		  for (i = 0; i < MAX_INSNS_PER_PEEP2; ++i)
+		    peep2_insn_data[i].insn = NULL_RTX;
 		  peep2_current_count = 0;
+		  peep2_insn_data[MAX_INSNS_PER_PEEP2].insn = PEEP2_EOB;
+		  peep2_current = MAX_INSNS_PER_PEEP2;
+		  bitmap_copy (peep2_insn_data[peep2_current].live_before,
+			       live);
 		  attempt = NULL;
 		}
 	      else

[-- Attachment #3: Y321 --]
[-- Type: text/plain, Size: 592 bytes --]

2009-05-31  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386.h (struct machine_cfa_state,
	struct machine_function): Guard with ifndef USED_FOR_TARGET
	instead of not IN_LIBGCC2 and not in IN_TARGET_LIBS.

--- gcc/config/i386/i386.h.jj	2009-05-30 15:27:20.000000000 +0200
+++ gcc/config/i386/i386.h	2009-05-31 11:21:28.000000000 +0200
@@ -2395,7 +2395,7 @@ enum ix86_stack_slot
 \f
 /* Machine specific CFA tracking during prologue/epilogue generation.  */
 
-#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS)
+#ifndef USED_FOR_TARGET
 struct GTY(()) machine_cfa_state
 {
   rtx reg;

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Fix i?86 eh regressions (PR middle-end/40304)
  2009-05-31 10:52           ` Jakub Jelinek
@ 2009-05-31 14:11             ` H.J. Lu
  2009-06-01  4:33             ` Ian Lance Taylor
  1 sibling, 0 replies; 26+ messages in thread
From: H.J. Lu @ 2009-05-31 14:11 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Henderson, gcc-patches

On Sun, May 31, 2009 at 2:31 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Sat, May 30, 2009 at 02:24:42PM -0700, Richard Henderson wrote:
>> Jakub Jelinek wrote:
>>> lea.c regression seems to be unrelated, seems the presence of a frame
>>> related insn later in the bb confuses following peepholing in the same bb.
>>
>> I havn't looked (as i don't remember seeing that error), but that
>> may be due to
>>
>>         * emit-rtl.c (try_split): Don't split RTX_FRAME_RELATED_P.
>>
>> which I put in because try_split was throwing away my notes.  I
>> suppose I could work harder in that function to preserve them
>> somehow...
>>
>> I'll look at this more Monday.
>
> The following patch seems to fix the peephole2 pass, seems just clearing
> peep2_current_count is insufficient, I've tried to copy what is initialized
> at the end of each basic block, bootstrapped/regtested on x86_64-linux and
> i686-linux.
>
> The second patch is needed to fix bootstrap with Ada,
> apparently some of ada/rts/ files include tconfig.h and thus i386.h, without
> including hwint.h, but it defines neither IN_LIBGCC2, nor IN_TARGET_LIBS,
> but IN_RTS.  Instead of listing each such IN_* macro it seems better to test
> for USED_FOR_TARGET (I've verified that whenever i386.h was included with
> IN_LIBGCC2 or IN_TARGET_LIBS defined, USED_FOR_TARGET was defined as well).
>

I opened:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40316

for the gcc.target/i386/lea.c regression.


-- 
H.J.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Fix i?86 eh regressions (PR middle-end/40304)
  2009-05-31 10:52           ` Jakub Jelinek
  2009-05-31 14:11             ` H.J. Lu
@ 2009-06-01  4:33             ` Ian Lance Taylor
  2009-06-01 13:46               ` Jakub Jelinek
  1 sibling, 1 reply; 26+ messages in thread
From: Ian Lance Taylor @ 2009-06-01  4:33 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Henderson, H.J. Lu, gcc-patches

Jakub Jelinek <jakub@redhat.com> writes:

> 2009-05-31  Jakub Jelinek  <jakub@redhat.com>
>
> 	PR middle-end/40304
> 	* recog.c (peephole2_optimize): When seeing a RTX_FRAME_RELATED_P
> 	insn, reinitialize peep2_insn_data and peep2_current in addition
> 	to clearing peep2_current_count.
>
> --- gcc/recog.c.jj	2009-05-19 10:51:33.000000000 +0200
> +++ gcc/recog.c	2009-05-30 21:23:44.000000000 +0200
> @@ -3118,7 +3118,13 @@ peephole2_optimize (void)
>  		  /* If an insn has RTX_FRAME_RELATED_P set, peephole
>  		     substitution would lose the
>  		     REG_FRAME_RELATED_EXPR that is attached.  */
> +		  for (i = 0; i < MAX_INSNS_PER_PEEP2; ++i)
> +		    peep2_insn_data[i].insn = NULL_RTX;
>  		  peep2_current_count = 0;
> +		  peep2_insn_data[MAX_INSNS_PER_PEEP2].insn = PEEP2_EOB;
> +		  peep2_current = MAX_INSNS_PER_PEEP2;
> +		  bitmap_copy (peep2_insn_data[peep2_current].live_before,
> +			       live);

Put this in a helper function, I think, one that is also called from the
loop at the start of each block, passing in the live bitmap to use.

Ian

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Fix i?86 eh regressions (PR middle-end/40304)
  2009-06-01  4:33             ` Ian Lance Taylor
@ 2009-06-01 13:46               ` Jakub Jelinek
  2009-06-01 15:53                 ` Richard Henderson
  2009-06-01 16:49                 ` Ian Lance Taylor
  0 siblings, 2 replies; 26+ messages in thread
From: Jakub Jelinek @ 2009-06-01 13:46 UTC (permalink / raw)
  To: Ian Lance Taylor; +Cc: Richard Henderson, H.J. Lu, gcc-patches

On Sun, May 31, 2009 at 09:33:19PM -0700, Ian Lance Taylor wrote:
> Jakub Jelinek <jakub@redhat.com> writes:
> 
> > 2009-05-31  Jakub Jelinek  <jakub@redhat.com>
> >
> > 	PR middle-end/40304
> > 	* recog.c (peephole2_optimize): When seeing a RTX_FRAME_RELATED_P
> > 	insn, reinitialize peep2_insn_data and peep2_current in addition
> > 	to clearing peep2_current_count.
> >
> > --- gcc/recog.c.jj	2009-05-19 10:51:33.000000000 +0200
> > +++ gcc/recog.c	2009-05-30 21:23:44.000000000 +0200
> > @@ -3118,7 +3118,13 @@ peephole2_optimize (void)
> >  		  /* If an insn has RTX_FRAME_RELATED_P set, peephole
> >  		     substitution would lose the
> >  		     REG_FRAME_RELATED_EXPR that is attached.  */
> > +		  for (i = 0; i < MAX_INSNS_PER_PEEP2; ++i)
> > +		    peep2_insn_data[i].insn = NULL_RTX;
> >  		  peep2_current_count = 0;
> > +		  peep2_insn_data[MAX_INSNS_PER_PEEP2].insn = PEEP2_EOB;
> > +		  peep2_current = MAX_INSNS_PER_PEEP2;
> > +		  bitmap_copy (peep2_insn_data[peep2_current].live_before,
> > +			       live);
> 
> Put this in a helper function, I think, one that is also called from the
> loop at the start of each block, passing in the live bitmap to use.

Like this?  Bootstrapped/regtested on x86_64-linux and i686-linux:

2009-06-01  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/40316
	* recog.c (peep2_reinit_state): New function.
	(peephole2_init_state): Use it at the end of a basic block and also
	when seeing a RTX_FRAME_RELATED_P insn.

--- gcc/recog.c.jj	2009-05-19 10:51:33.000000000 +0200
+++ gcc/recog.c	2009-06-01 10:06:26.000000000 +0200
@@ -3056,6 +3056,26 @@ peep2_find_free_register (int from, int 
   return NULL_RTX;
 }
 
+/* Forget all currently tracked instructions, only remember current
+   LIVE regset.  */
+
+static void
+peep2_reinit_state (regset live)
+{
+  int i;
+
+  /* Indicate that all slots except the last holds invalid data.  */
+  for (i = 0; i < MAX_INSNS_PER_PEEP2; ++i)
+    peep2_insn_data[i].insn = NULL_RTX;
+  peep2_current_count = 0;
+
+  /* Indicate that the last slot contains live_after data.  */
+  peep2_insn_data[MAX_INSNS_PER_PEEP2].insn = PEEP2_EOB;
+  peep2_current = MAX_INSNS_PER_PEEP2;
+
+  COPY_REG_SET (peep2_insn_data[MAX_INSNS_PER_PEEP2].live_before, live);
+}
+
 /* Perform the peephole2 optimization pass.  */
 
 static void
@@ -3079,19 +3099,11 @@ peephole2_optimize (void)
   FOR_EACH_BB_REVERSE (bb)
     {
       rtl_profile_for_bb (bb);
-      /* Indicate that all slots except the last holds invalid data.  */
-      for (i = 0; i < MAX_INSNS_PER_PEEP2; ++i)
-	peep2_insn_data[i].insn = NULL_RTX;
-      peep2_current_count = 0;
-
-      /* Indicate that the last slot contains live_after data.  */
-      peep2_insn_data[MAX_INSNS_PER_PEEP2].insn = PEEP2_EOB;
-      peep2_current = MAX_INSNS_PER_PEEP2;
 
       /* Start up propagation.  */
       bitmap_copy (live, DF_LR_OUT (bb));
       df_simulate_initialize_backwards (bb, live);
-      bitmap_copy (peep2_insn_data[MAX_INSNS_PER_PEEP2].live_before, live);
+      peep2_reinit_state (live);
 
       for (insn = BB_END (bb); ; insn = prev)
 	{
@@ -3118,7 +3130,7 @@ peephole2_optimize (void)
 		  /* If an insn has RTX_FRAME_RELATED_P set, peephole
 		     substitution would lose the
 		     REG_FRAME_RELATED_EXPR that is attached.  */
-		  peep2_current_count = 0;
+		  peep2_reinit_state (live);
 		  attempt = NULL;
 		}
 	      else


	Jakub

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Fix i?86 eh regressions (PR middle-end/40304)
  2009-06-01 13:46               ` Jakub Jelinek
@ 2009-06-01 15:53                 ` Richard Henderson
  2009-06-01 16:49                 ` Ian Lance Taylor
  1 sibling, 0 replies; 26+ messages in thread
From: Richard Henderson @ 2009-06-01 15:53 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Ian Lance Taylor, H.J. Lu, gcc-patches

Jakub Jelinek wrote:
> 	PR middle-end/40316
> 	* recog.c (peep2_reinit_state): New function.
> 	(peephole2_init_state): Use it at the end of a basic block and also
> 	when seeing a RTX_FRAME_RELATED_P insn.

Ok.


r~

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Fix i?86 eh regressions (PR middle-end/40304)
  2009-06-01 13:46               ` Jakub Jelinek
  2009-06-01 15:53                 ` Richard Henderson
@ 2009-06-01 16:49                 ` Ian Lance Taylor
  1 sibling, 0 replies; 26+ messages in thread
From: Ian Lance Taylor @ 2009-06-01 16:49 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Henderson, H.J. Lu, gcc-patches

Jakub Jelinek <jakub@redhat.com> writes:

> 2009-06-01  Jakub Jelinek  <jakub@redhat.com>
>
> 	PR middle-end/40316
> 	* recog.c (peep2_reinit_state): New function.
> 	(peephole2_init_state): Use it at the end of a basic block and also
> 	when seeing a RTX_FRAME_RELATED_P insn.

This is OK.

Thanks.

Ian

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-05-30 21:24       ` Richard Henderson
@ 2009-06-01 17:10         ` Jakub Jelinek
  2009-06-01 18:20           ` Richard Henderson
  0 siblings, 1 reply; 26+ messages in thread
From: Jakub Jelinek @ 2009-06-01 17:10 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches

On Sat, May 30, 2009 at 02:12:00PM -0700, Richard Henderson wrote:
> Jakub Jelinek wrote:
>> Couldn't we avoid the .cfi_restore directives altogether on x86_64
>> in this case?
>
> Yes, we could.
>
>> ... then can't the .cfi_restore directives be just moved down to the
>> movl %ebp, %esp instruction (the stack slots still contain the saved
>> register content until movl %ebp, %esp is executed)?  This would save at
>> least a couple of DW_CFA_advance_loc* opcodes.
>
> Yes, this is also possible.  Indeed, the two optimizations are related.
> I thought about the later of these two briefly, but then put it off for
> future work.
>
> I havn't thought about all the possible ways this can be optimized, or
> what all the constraints are.  I've an idea that we might could attack
> this with an unwind generation and optimization pass, as mentioned by
> Ian Taylor elsewhere in this thread.

Here is a patch that does both of these optimizations in the i386 backend
(I believe at least the removal of .cfi_restore because of red-zone
is something that is difficult to do in the middle-end, since it doesn't
know anything about red-zone).

The patch bootstrapped/regtested (together with the other patches I've
posted yesterday/today) on x86_64-linux and i686-linux and decreases
.eh_frame size significantly at least on x86_64.  Comparing size of
cc1plus .eh_frame sections from
		Friday,	today	and today + patch shows:
i686-linux	416656	669980	654980
x86_64-linux	459332	721172	650508

The reason why I'd like to settle this early is that other targets could do
similarly (e.g. on rs6000 only add REG_CFA_RESTOREs for register restores
before the stack restore and queue them all to that stack restore, further
restores are from red-zone and thus are safe, etc.).

2009-06-01  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386.c (queued_cfa_restores): New static variable.
	(ix86_add_cfa_restore_note, ix86_add_queued_cfa_restore_notes): New
	functions.
	(pro_epilogue_adjust_stack): Call ix86_add_queued_cfa_restore_notes.
	(ix86_emit_restore_reg_using_pop): Add RED_OFFSET argument.
	Set RTX_FRAME_RELATED_P immediately after adding a REG_CFA_* note.
	Call ix86_add_cfa_restore_note instead of adding REG_CFA_OFFSET
	note unconditionally.
	(ix86_emit_restore_regs_using_mov): Likewise.
	(ix86_emit_restore_sse_regs_using_mov): Likewise.
	(ix86_emit_restore_regs_using_pop): Add RED_OFFSET argument, pass
	it through to ix86_emit_restore_reg_using_pop.
	(ix86_emit_leave): Add RED_OFFSET argument.  Call
	ix86_add_queued_cfa_restore_notes.  Call ix86_add_cfa_restore_note
	instead of adding REG_CFA_OFFSET note unconditionally.
	(ix86_expand_epilogue): Compute RED_OFFSET, pass it down to
	the above functions.  Call ix86_add_queued_cfa_restore_notes when
	needed.

--- gcc/config/i386/i386.c.jj	2009-05-30 20:43:15.000000000 +0200
+++ gcc/config/i386/i386.c	2009-06-01 17:15:17.000000000 +0200
@@ -8000,6 +8000,49 @@ ix86_emit_save_sse_regs_using_mov (rtx p
       }
 }
 
+static GTY(()) rtx queued_cfa_restores;
+
+/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
+   manipulation insn.  Don't add it if the previously
+   saved value will be left untouched within stack red-zone till return,
+   as unwinders can find the same value in the register and
+   on the stack.  */
+
+static void
+ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
+{
+  if (TARGET_RED_ZONE
+      && !TARGET_64BIT_MS_ABI
+      && red_offset + RED_ZONE_SIZE >= 0
+      && crtl->args.pops_args < 65536)
+    return;
+
+  if (insn)
+    {
+      add_reg_note (insn, REG_CFA_RESTORE, reg);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    queued_cfa_restores
+      = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
+}
+
+/* Add queued REG_CFA_RESTORE notes if any to INSN.  */
+
+static void
+ix86_add_queued_cfa_restore_notes (rtx insn)
+{
+  rtx last;
+  if (!queued_cfa_restores)
+    return;
+  for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
+    ;
+  XEXP (last, 1) = REG_NOTES (insn);
+  REG_NOTES (insn) = queued_cfa_restores;
+  queued_cfa_restores = NULL_RTX;
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
+
 /* Expand prologue or epilogue stack adjustment.
    The pattern exist to put a dependency on all ebp-based memory accesses.
    STYLE should be negative if instructions should be marked as frame related,
@@ -8032,6 +8075,9 @@ pro_epilogue_adjust_stack (rtx dest, rtx
 							       offset));
     }
 
+  if (style >= 0)
+    ix86_add_queued_cfa_restore_notes (insn);
+
   if (set_cfa)
     {
       rtx r;
@@ -8474,7 +8520,7 @@ ix86_expand_prologue (void)
 /* Emit code to restore REG using a POP insn.  */
 
 static void
-ix86_emit_restore_reg_using_pop (rtx reg)
+ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
 {
   rtx insn = emit_insn (ix86_gen_pop1 (reg));
 
@@ -8496,6 +8542,7 @@ ix86_emit_restore_reg_using_pop (rtx reg
       ix86_cfa_state->offset -= UNITS_PER_WORD;
       add_reg_note (insn, REG_CFA_ADJUST_CFA,
 		    copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
+      RTX_FRAME_RELATED_P (insn) = 1;
     }
 
   /* When the frame pointer is the CFA, and we pop it, we are
@@ -8512,37 +8559,43 @@ ix86_emit_restore_reg_using_pop (rtx reg
       add_reg_note (insn, REG_CFA_DEF_CFA,
 		    gen_rtx_PLUS (Pmode, stack_pointer_rtx,
 				  GEN_INT (UNITS_PER_WORD)));
+      RTX_FRAME_RELATED_P (insn) = 1;
     }
 
-  add_reg_note (insn, REG_CFA_RESTORE, reg);
-  RTX_FRAME_RELATED_P (insn) = 1;
+  ix86_add_cfa_restore_note (insn, reg, red_offset);
 }
 
 /* Emit code to restore saved registers using POP insns.  */
 
 static void
-ix86_emit_restore_regs_using_pop (void)
+ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
 {
   int regno;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
-      ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
+      {
+	ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
+					 red_offset);
+	red_offset += UNITS_PER_WORD;
+      }
 }
 
 /* Emit code and notes for the LEAVE instruction.  */
 
 static void
-ix86_emit_leave (void)
+ix86_emit_leave (HOST_WIDE_INT red_offset)
 {
   rtx insn = emit_insn (ix86_gen_leave ());
 
+  ix86_add_queued_cfa_restore_notes (insn);
+
   if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
     {
       add_reg_note (insn, REG_CFA_ADJUST_CFA, 
 		    copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
-      add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
       RTX_FRAME_RELATED_P (insn) = 1;
+      ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
     }
 }
 
@@ -8550,6 +8603,7 @@ ix86_emit_leave (void)
    is restored from POINTER + OFFSET.  */
 static void
 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
+				  HOST_WIDE_INT red_offset,
 				  int maybe_eh_return)
 {
   unsigned int regno;
@@ -8586,10 +8640,12 @@ ix86_emit_restore_regs_using_mov (rtx po
 	       the drap register.  This will remain until we restore
 	       the stack pointer.  */
 	    add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+	    RTX_FRAME_RELATED_P (insn) = 1;
 	  }
 	else
-	  add_reg_note (insn, REG_CFA_RESTORE, reg);
-	RTX_FRAME_RELATED_P (insn) = 1;
+	  ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
+
+	red_offset += UNITS_PER_WORD;
       }
 }
 
@@ -8597,6 +8653,7 @@ ix86_emit_restore_regs_using_mov (rtx po
    is restored from POINTER + OFFSET.  */
 static void
 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
+				      HOST_WIDE_INT red_offset,
 				      int maybe_eh_return)
 {
   int regno;
@@ -8625,8 +8682,9 @@ ix86_emit_restore_sse_regs_using_mov (rt
 	insn = emit_move_insn (reg, mem);
 	offset += 16;
 
-	add_reg_note (insn, REG_CFA_RESTORE, reg);
-	RTX_FRAME_RELATED_P (insn) = 1;
+	ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
+
+	red_offset += 16;
       }
 }
 
@@ -8637,7 +8695,7 @@ ix86_expand_epilogue (int style)
 {
   int sp_valid;
   struct ix86_frame frame;
-  HOST_WIDE_INT offset;
+  HOST_WIDE_INT offset, red_offset;
   struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
   bool using_drap;
 
@@ -8655,6 +8713,9 @@ ix86_expand_epilogue (int style)
   if (frame_pointer_needed && frame.red_zone_size)
     emit_insn (gen_memory_blockage ()); 
 
+  using_drap = crtl->drap_reg && crtl->stack_realign_needed;
+  gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
+
   /* Calculate start of saved registers relative to ebp.  Special care
      must be taken for the normal return case of a function using
      eh_return: the eax and edx registers are marked as saved, but not
@@ -8665,8 +8726,18 @@ ix86_expand_epilogue (int style)
   offset *= -UNITS_PER_WORD;
   offset -= frame.nsseregs * 16 + frame.padding0;
 
-  using_drap = crtl->drap_reg && crtl->stack_realign_needed;
-  gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
+  /* Calculate start of saved registers relative to esp on entry of the
+     function.  When realigning stack, this needs to be smallest possible
+     value at runtime.  */
+  red_offset = offset;
+  if (using_drap)
+    red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
+		  + UNITS_PER_WORD;
+  if (frame_pointer_needed)
+    red_offset -= UNITS_PER_WORD;
+  if (stack_realign_fp)
+    red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
+		  - UNITS_PER_WORD;
 
   /* If we're only restoring one register and sp is not valid then
      using a move instruction to restore the register since it's
@@ -8703,22 +8774,32 @@ ix86_expand_epilogue (int style)
 	  || stack_realign_fp)
 	{
 	  ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
-					        frame.to_allocate, style == 2);
+						frame.to_allocate, red_offset,
+						style == 2);
 	  ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
 					    frame.to_allocate
 					    + frame.nsseregs * 16
+					    + frame.padding0,
+					    red_offset
+					    + frame.nsseregs * 16
 					    + frame.padding0, style == 2);
 	}
       else
         {
 	  ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
-					        offset, style == 2);
+						offset, red_offset,
+						style == 2);
 	  ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
 					    offset
 					    + frame.nsseregs * 16
+					    + frame.padding0,
+					    red_offset
+					    + frame.nsseregs * 16
 					    + frame.padding0, style == 2);
         }
 
+      red_offset -= offset;
+
       /* eh_return epilogues need %ecx added to the stack pointer.  */
       if (style == 2)
 	{
@@ -8746,6 +8827,7 @@ ix86_expand_epilogue (int style)
 		 the return insn.  */
 	      add_reg_note (tmp, REG_CFA_DEF_CFA,
 			    plus_constant (sa, UNITS_PER_WORD));
+	      ix86_add_queued_cfa_restore_notes (tmp);
 	      add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
 	      RTX_FRAME_RELATED_P (tmp) = 1;
 	      ix86_cfa_state->reg = sa;
@@ -8762,6 +8844,7 @@ ix86_expand_epilogue (int style)
 					 + frame.nsseregs * 16
 					 + frame.padding0));
 	      tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
+	      ix86_add_queued_cfa_restore_notes (tmp);
 
 	      gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
 	      if (ix86_cfa_state->offset != UNITS_PER_WORD)
@@ -8784,14 +8867,14 @@ ix86_expand_epilogue (int style)
       /* If not an i386, mov & pop is faster than "leave".  */
       else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
 	       || !cfun->machine->use_fast_prologue_epilogue)
-	ix86_emit_leave ();
+	ix86_emit_leave (red_offset);
       else
 	{
 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
 				     hard_frame_pointer_rtx,
 				     const0_rtx, style, !using_drap);
 
-	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
+	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
 	}
     }
   else
@@ -8811,7 +8894,8 @@ ix86_expand_epilogue (int style)
 				     hard_frame_pointer_rtx,
 				     GEN_INT (offset), style, false);
           ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
-					        frame.to_allocate, style == 2);
+						frame.to_allocate, red_offset,
+						style == 2);
 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
 				     GEN_INT (frame.nsseregs * 16),
 				     style, false);
@@ -8819,7 +8903,7 @@ ix86_expand_epilogue (int style)
       else if (frame.to_allocate || frame.nsseregs)
 	{
           ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
-					        frame.to_allocate,
+						frame.to_allocate, red_offset,
 						style == 2);
 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
 				     GEN_INT (frame.to_allocate
@@ -8828,14 +8912,16 @@ ix86_expand_epilogue (int style)
 				     !using_drap && !frame_pointer_needed);
 	}
 
-      ix86_emit_restore_regs_using_pop ();
+      ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
+					+ frame.padding0);
+      red_offset -= offset;
 
       if (frame_pointer_needed)
 	{
 	  /* Leave results in shorter dependency chains on CPUs that are
 	     able to grok it fast.  */
 	  if (TARGET_USE_LEAVE)
-	    ix86_emit_leave ();
+	    ix86_emit_leave (red_offset);
 	  else
             {
               /* For stack realigned really happens, recover stack 
@@ -8845,7 +8931,8 @@ ix86_expand_epilogue (int style)
 		pro_epilogue_adjust_stack (stack_pointer_rtx,
 					   hard_frame_pointer_rtx,
 					   const0_rtx, style, !using_drap);
-	      ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
+	      ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
+					       red_offset);
             }
 	}
     }
@@ -8872,7 +8959,7 @@ ix86_expand_epilogue (int style)
       RTX_FRAME_RELATED_P (insn) = 1;
 
       if (param_ptr_offset)
-	ix86_emit_restore_reg_using_pop (crtl->drap_reg);
+	ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
     }
 
   /* Sibcall epilogues don't want a return instruction.  */


	Jakub

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-06-01 17:10         ` Jakub Jelinek
@ 2009-06-01 18:20           ` Richard Henderson
  2009-06-01 18:39             ` Jakub Jelinek
  0 siblings, 1 reply; 26+ messages in thread
From: Richard Henderson @ 2009-06-01 18:20 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

Jakub Jelinek wrote:
> (I believe at least the removal of .cfi_restore because of red-zone
> is something that is difficult to do in the middle-end, since it doesn't
> know anything about red-zone).

Possibly, though it wouldn't be that hard to teach the middle-end
about a red-zone.  I think this approach is certainly useful in
the short-term before a middle-end pass exists.

> +  /* Calculate start of saved registers relative to esp on entry of the
> +     function.  When realigning stack, this needs to be smallest possible
> +     value at runtime.  */
> +  red_offset = offset;
> +  if (using_drap)
> +    red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
> +		  + UNITS_PER_WORD;
> +  if (frame_pointer_needed)
> +    red_offset -= UNITS_PER_WORD;
> +  if (stack_realign_fp)
> +    red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
> +		  - UNITS_PER_WORD;

Only one of USING_DRAP or STACK_REALIGN_FP, surely?  Also, I think
"smallest" is misleading, since it implies (to me) smallest absolute
value, whereas what I think you mean is "most negative" or "largest".


r~

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-06-01 18:20           ` Richard Henderson
@ 2009-06-01 18:39             ` Jakub Jelinek
  2009-06-01 19:15               ` Richard Henderson
  0 siblings, 1 reply; 26+ messages in thread
From: Jakub Jelinek @ 2009-06-01 18:39 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches

On Mon, Jun 01, 2009 at 11:20:12AM -0700, Richard Henderson wrote:
>> +  /* Calculate start of saved registers relative to esp on entry of the
>> +     function.  When realigning stack, this needs to be smallest possible
>> +     value at runtime.  */
>> +  red_offset = offset;
>> +  if (using_drap)
>> +    red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
>> +		  + UNITS_PER_WORD;
>> +  if (frame_pointer_needed)
>> +    red_offset -= UNITS_PER_WORD;
>> +  if (stack_realign_fp)
>> +    red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
>> +		  - UNITS_PER_WORD;
>
> Only one of USING_DRAP or STACK_REALIGN_FP, surely?  Also, I think

There are assertions earlier on that ensure that stack_realign_fp is
not true when using_drap:

  /* DRAP should not coexist with stack_realign_fp */
  gcc_assert (!(crtl->drap_reg && stack_realign_fp));

When using_drap we need to account for the padding from and insn
(0 to crtl->stack_alignment_needed / BITS_PER_UNIT - UNITS_PER_WORD
bytes) and one or two word pushes before that (so + UNITS_PER_WORD),
while for stack_realign_fp only for the and insn created padding.
In the above I've tried to mirror what ix86_expand_prologue does
(also 3 ifs, no else/else if), but if you want, I can surely
swap stack_realign_fp and frame_pointer_needed case and add else
before if (stack_realign_fp).

> "smallest" is misleading, since it implies (to me) smallest absolute
> value, whereas what I think you mean is "most negative" or "largest".

Will change to most negative.  Ok with that change?

	Jakub

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-06-01 18:39             ` Jakub Jelinek
@ 2009-06-01 19:15               ` Richard Henderson
  0 siblings, 0 replies; 26+ messages in thread
From: Richard Henderson @ 2009-06-01 19:15 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

Jakub Jelinek wrote:
> In the above I've tried to mirror what ix86_expand_prologue does
> (also 3 ifs, no else/else if), but if you want, I can surely
> swap stack_realign_fp and frame_pointer_needed case and add else
> before if (stack_realign_fp).

I think I would prefer the else if.  Same change in prologue, I guess.

> Will change to most negative.  Ok with that change?

Yes, thanks.


r~

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-05-30  0:49   ` Richard Henderson
  2009-05-30 12:24     ` Jakub Jelinek
  2009-05-30 13:26     ` H.J. Lu
@ 2009-06-04 20:38     ` Laurent GUERBY
  2 siblings, 0 replies; 26+ messages in thread
From: Laurent GUERBY @ 2009-06-04 20:38 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches, Joseph S. Myers

Hi,

This patch might have caused/trigered on hppa-linux:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40338

<<
Last revision that did bootstrap on hppa-linux is 147972, starting with
147996
I get on compile farm gcc61:

/home/guerby/build/./gcc/xgcc -B/home/guerby/build/./gcc/
-B/n/61/guerby/install-trunk/hppa2.0-unknown-linux-gnu/bin/
-B/n/61/guerby/install-trunk/hppa2.0-unknown-linux-gnu/lib/ -isystem
/n/61/guerby/instal\
l-trunk/hppa2.0-unknown-linux-gnu/include -isystem
/n/61/guerby/install-trunk/hppa2.0-unknown-linux-gnu/sys-include    -g -O2 -O2 
-g -O2 -DIN_GCC   -W -Wall -Wwrite-strings -Wstrict-prototypes -Wmissing-pr\
ototypes -Wcast-qual -Wold-style-definition  -isystem ./include  -fPIC -DELF=1
-DLINUX=1 -g -DHAVE_GTHR_DEFAULT -DIN_LIBGCC2 -D__GCC_FLOAT_NOT_NEEDED   -I.
-I. -I../.././gcc -I../../../trunk/libgcc -I../../\
../trunk/libgcc/. -I../../../trunk/libgcc/../gcc
-I../../../trunk/libgcc/../include  -DHAVE_CC_TLS -o _bswapdi2.o -MT
_bswapdi2.o -MD -MP -MF _bswapdi2.dep -DL_bswapdi2 -c
../../../trunk/libgcc/../gcc/libgc\
c2.c \
          -fvisibility=hidden -DHIDE_EXPORTS
../../../trunk/libgcc/../gcc/libgcc2.c: In function '__bswapdi2':
../../../trunk/libgcc/../gcc/libgcc2.c:513: internal compiler error: in
dwarf2out_begin_epilogue, at dwarf2out.c:2689
Please submit a full bug report,
with preprocessed source if appropriate.
See <http://gcc.gnu.org/bugs.html> for instructions.
make[3]: *** [_bswapdi2.o] Error 1
make[3]: Leaving directory
`/home/guerby/build/hppa2.0-unknown-linux-gnu/libgcc'
make[2]: *** [all-stage1-target-libgcc] Error 2
make[2]: Leaving directory `/home/guerby/build'
make[1]: *** [stage1-bubble] Error 2
make[1]: Leaving directory `/home/guerby/build'
make: *** [bootstrap] Error 2
>>

Laurent

On Fri, 2009-05-29 at 17:42 -0700, Richard Henderson wrote:
> Joseph S. Myers wrote:
> > On Wed, 6 May 2009, Richard Henderson wrote:
> > 
> >> Since the beginning, -fasynchronous-unwind-tables has not held correct
> >> information for function epilogues.  This is an attempt at adding that.
> > 
> > See also Nathan Froyd's patch (doing this for x86_64) described in his 
> > 2006 Summit paper.  I haven't compared the approaches (but fully support 
> > adding this feature).
> > 
> > http://gcc.gnu.org/ml/gcc-patches/2006-03/msg00426.html
> > http://gcc.gnu.org/ml/gcc-patches/2006-02/msg01091.html
> > 
> 
> Thanks for the pointers.  I did incorporate a few of Nathan's ideas into
> this version of the patch -- primarily, when duplicating the epilogue,
> make sure the duplicated insns are also recorded as epilogue insns.
> 
> The other large change from the previous patch is the ability to have
> the eh_return epilogue from _Unwind_Resume (and friends) marked
> properly.  This required the addition of an EH_RETURN rtx, so that
> the middle-end could recognize when epilogue expansion should happen,
> rather than the add-hoc unspecs that ports had been using.  As it
> happens, only i386 and bfin implement eh_return via special epilogues;
> most ports only need to overwrite one or more registers before using
> a normal epilogue.
> 
> Tested on x86_64, i686; committed.
> 
> 
> r~
> plain text document attachment (d-epilogue-2)
> 	* cfgcleanup.c (try_crossjump_to_edge): Only skip past
> 	NOTE_INSN_BASIC_BLOCK.
> 	* cfglayout.c (duplicate_insn_chain): Copy epilogue insn marks.
> 	Duplicate NOTE_INSN_EPILOGUE_BEG notes.
> 	* cfgrtl.c (can_delete_note_p): Allow NOTE_INSN_EPILOGUE_BEG
> 	to be deleted.
> 	* dwarf2out.c (struct cfa_loc): Change indirect field to bitfield,
> 	add in_use field.
> 	(add_cfi): Disable check redefining cfa away from drap.
> 	(lookup_cfa_1): Add remember argument; handle remember/restore.
> 	(lookup_cfa): Pass remember argument.
> 	(cfa_remember): New.
> 	(compute_barrier_args_size_1): Remove sibcall check.
> 	(dwarf2out_frame_debug_def_cfa): New.
> 	(dwarf2out_frame_debug_adjust_cfa): New.
> 	(dwarf2out_frame_debug_cfa_offset): New.
> 	(dwarf2out_frame_debug_cfa_register): New.
> 	(dwarf2out_frame_debug_cfa_restore): New.
> 	(dwarf2out_frame_debug): Handle REG_CFA_* notes.
> 	(dwarf2out_begin_epilogue): New.
> 	(dwarf2out_frame_debug_restore_state): New.
> 	(dw_cfi_oprnd1_desc): Handle DW_CFA_remember_state,
> 	DW_CFA_restore_state.
> 	(output_cfi_directive): Likewise.
> 	(convert_cfa_to_fb_loc_list): Likewise.
> 	(dw_cfi_oprnd1_desc): Handle DW_CFA_restore.
> 	* dwarf2out.h: Update.
> 	* emit-rtl.c (try_split): Don't split RTX_FRAME_RELATED_P.
> 	(copy_insn_1): Early out for null.
> 	* final.c (final_scan_insn): Call dwarf2out_begin_epilogue
> 	and dwarf2out_frame_debug_restore_state.
> 	* function.c (prologue, epilogue, sibcall_epilogue): Remove.
> 	(prologue_insn_hash, epilogue_insn_hash): New.
> 	(free_after_compilation): Adjust freeing accordingly.
> 	(record_insns): Create hash table if needed; push insns into
> 	hash instead of array.
> 	(maybe_copy_epilogue_insn): New.
> 	(contains): Search hash table instead of array.
> 	(sibcall_epilogue_contains): Remove.
> 	(thread_prologue_and_epilogue_insns): Split eh_return insns
> 	and mark them as epilogues.
> 	(reposition_prologue_and_epilogue_notes): Rewrite epilogue
> 	scanning in terms of basic blocks.
> 	* insn-notes.def (CFA_RESTORE_STATE): New.
> 	* jump.c (returnjump_p_1): Accept EH_RETURN.
> 	(eh_returnjump_p_1, eh_returnjump_p): New.
> 	* reg-notes.def (CFA_DEF_CFA, CFA_ADJUST_CFA, CFA_OFFSET,
> 	CFA_REGISTER, CFA_RESTORE): New.
> 	* rtl.def (EH_RETURN): New.
> 	* rtl.h (eh_returnjump_p, maybe_copy_epilogue_insn): Declare.
> 
> 	* config/bfin/bfin.md (UNSPEC_VOLATILE_EH_RETURN): Remove.
> 	(eh_return_internal): Use eh_return rtx; split w/ epilogue.
> 
> 	* config/i386/i386.c (gen_push): Update cfa state.
> 	(pro_epilogue_adjust_stack): Add set_cfa argument.  When true,
> 	add a CFA_ADJUST_CFA note.
> 	(ix86_dwarf_handle_frame_unspec): Remove.
> 	(ix86_expand_prologue): Update cfa state.
> 	(ix86_emit_restore_reg_using_pop): New.
> 	(ix86_emit_restore_regs_using_pop): New.
> 	(ix86_emit_leave): New.
> 	(ix86_emit_restore_regs_using_mov): Add CFA_RESTORE notes.
> 	(ix86_expand_epilogue): Add notes for unwinding the epilogue.
> 	* config/i386/i386.h (struct machine_cfa_state): New.
> 	(ix86_cfa_state): New.
> 	* config/i386/i386.md (UNSPEC_EH_RETURN): Remove.
> 	(eh_return_internal): Merge from eh_return_<mode>,
> 	use eh_return rtx, split w/ epilogue.
> 
> --- gcc/cfgcleanup.c	(revision 148000)
> +++ gcc/cfgcleanup.c	(local)
> @@ -1672,8 +1672,7 @@ try_crossjump_to_edge (int mode, edge e1
>    /* Skip possible basic block header.  */
>    if (LABEL_P (newpos1))
>      newpos1 = NEXT_INSN (newpos1);
> -
> -  if (NOTE_P (newpos1))
> +  if (NOTE_INSN_BASIC_BLOCK_P (newpos1))
>      newpos1 = NEXT_INSN (newpos1);
>  
>    redirect_from = split_block (src1, PREV_INSN (newpos1))->src;
> --- gcc/cfglayout.c	(revision 148000)
> +++ gcc/cfglayout.c	(local)
> @@ -1112,7 +1112,7 @@ cfg_layout_can_duplicate_bb_p (const_bas
>  rtx
>  duplicate_insn_chain (rtx from, rtx to)
>  {
> -  rtx insn, last;
> +  rtx insn, last, copy;
>  
>    /* Avoid updating of boundaries of previous basic block.  The
>       note will get removed from insn stream in fixup.  */
> @@ -1133,7 +1133,8 @@ duplicate_insn_chain (rtx from, rtx to)
>  	  if (GET_CODE (PATTERN (insn)) == ADDR_VEC
>  	      || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
>  	    break;
> -	  emit_copy_of_insn_after (insn, get_last_insn ());
> +	  copy = emit_copy_of_insn_after (insn, get_last_insn ());
> +          maybe_copy_epilogue_insn (insn, copy);
>  	  break;
>  
>  	case CODE_LABEL:
> @@ -1153,23 +1154,18 @@ duplicate_insn_chain (rtx from, rtx to)
>  	    case NOTE_INSN_DELETED:
>  	    case NOTE_INSN_DELETED_LABEL:
>  	      /* No problem to strip these.  */
> -	    case NOTE_INSN_EPILOGUE_BEG:
> -	      /* Debug code expect these notes to exist just once.
> -		 Keep them in the master copy.
> -		 ??? It probably makes more sense to duplicate them for each
> -		 epilogue copy.  */
>  	    case NOTE_INSN_FUNCTION_BEG:
>  	      /* There is always just single entry to function.  */
>  	    case NOTE_INSN_BASIC_BLOCK:
>  	      break;
>  
> +	    case NOTE_INSN_EPILOGUE_BEG:
>  	    case NOTE_INSN_SWITCH_TEXT_SECTIONS:
>  	      emit_note_copy (insn);
>  	      break;
>  
>  	    default:
> -	      /* All other notes should have already been eliminated.
> -	       */
> +	      /* All other notes should have already been eliminated.  */
>  	      gcc_unreachable ();
>  	    }
>  	  break;
> --- gcc/cfgrtl.c	(revision 148000)
> +++ gcc/cfgrtl.c	(local)
> @@ -86,8 +86,16 @@ static void rtl_make_forwarder_block (ed
>  static int
>  can_delete_note_p (const_rtx note)
>  {
> -  return (NOTE_KIND (note) == NOTE_INSN_DELETED
> -	  || NOTE_KIND (note) == NOTE_INSN_BASIC_BLOCK);
> +  switch (NOTE_KIND (note))
> +    {
> +    case NOTE_INSN_DELETED:
> +    case NOTE_INSN_BASIC_BLOCK:
> +    case NOTE_INSN_EPILOGUE_BEG:
> +      return true;
> +
> +    default:
> +      return false;
> +    }
>  }
>  
>  /* True if a given label can be deleted.  */
> --- gcc/config/bfin/bfin.md	(revision 148000)
> +++ gcc/config/bfin/bfin.md	(local)
> @@ -141,8 +141,7 @@
>     (UNSPEC_ONES 12)])
>  
>  (define_constants
> -  [(UNSPEC_VOLATILE_EH_RETURN 0)
> -   (UNSPEC_VOLATILE_CSYNC 1)
> +  [(UNSPEC_VOLATILE_CSYNC 1)
>     (UNSPEC_VOLATILE_SSYNC 2)
>     (UNSPEC_VOLATILE_LOAD_FUNCDESC 3)
>     (UNSPEC_VOLATILE_STORE_EH_HANDLER 4)
> @@ -2573,8 +2572,7 @@
>    "bfin_expand_epilogue (0, 0, 1); DONE;")
>  
>  (define_expand "eh_return"
> -  [(unspec_volatile [(match_operand:SI 0 "register_operand" "")]
> -		    UNSPEC_VOLATILE_EH_RETURN)]
> +  [(use (match_operand:SI 0 "register_operand" ""))]
>    ""
>  {
>    emit_insn (gen_eh_store_handler (EH_RETURN_HANDLER_RTX, operands[0]));
> @@ -2592,11 +2590,10 @@
>    [(set_attr "type" "mcst")])
>  
>  (define_insn_and_split "eh_return_internal"
> -  [(set (pc)
> -	(unspec_volatile [(reg:SI REG_P2)] UNSPEC_VOLATILE_EH_RETURN))]
> +  [(eh_return)]
>    ""
>    "#"
> -  "reload_completed"
> +  "epilogue_completed"
>    [(const_int 1)]
>    "bfin_expand_epilogue (1, 1, 0); DONE;")
>  
> --- gcc/config/i386/i386.c	(revision 148000)
> +++ gcc/config/i386/i386.c	(local)
> @@ -7609,6 +7609,9 @@ output_set_got (rtx dest, rtx label ATTR
>  static rtx
>  gen_push (rtx arg)
>  {
> +  if (ix86_cfa_state->reg == stack_pointer_rtx)
> +    ix86_cfa_state->offset += UNITS_PER_WORD;
> +
>    return gen_rtx_SET (VOIDmode,
>  		      gen_rtx_MEM (Pmode,
>  				   gen_rtx_PRE_DEC (Pmode,
> @@ -7668,8 +7671,7 @@ ix86_save_reg (unsigned int regno, int m
>  	}
>      }
>  
> -  if (crtl->drap_reg
> -      && regno == REGNO (crtl->drap_reg))
> +  if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
>      return 1;
>  
>    return (df_regs_ever_live_p (regno)
> @@ -8005,7 +8007,8 @@ ix86_emit_save_sse_regs_using_mov (rtx p
>     otherwise.  */
>  
>  static void
> -pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
> +pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
> +			   int style, bool set_cfa)
>  {
>    rtx insn;
>  
> @@ -8023,13 +8026,23 @@ pro_epilogue_adjust_stack (rtx dest, rtx
>        gcc_assert (style);
>        r11 = gen_rtx_REG (DImode, R11_REG);
>        insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
> -      if (style < 0)
> -	RTX_FRAME_RELATED_P (insn) = 1;
>        insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
>  							       offset));
>      }
> -  if (style < 0)
> -    RTX_FRAME_RELATED_P (insn) = 1;
> +
> +  if (set_cfa)
> +    {
> +      rtx r;
> +
> +      gcc_assert (ix86_cfa_state->reg == src);
> +      ix86_cfa_state->offset += INTVAL (offset);
> +      ix86_cfa_state->reg = dest;
> +    
> +      r = gen_rtx_PLUS (Pmode, src, offset);
> +      r = gen_rtx_SET (VOIDmode, dest, r);
> +      add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
> +      RTX_FRAME_RELATED_P (insn) = 1;
> +    }
>  }
>  
>  /* Find an available register to be used as dynamic realign argument
> @@ -8164,30 +8177,6 @@ ix86_internal_arg_pointer (void)
>    return virtual_incoming_args_rtx;
>  }
>  
> -/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
> -   This is called from dwarf2out.c to emit call frame instructions
> -   for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
> -static void
> -ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
> -{
> -  rtx unspec = SET_SRC (pattern);
> -  gcc_assert (GET_CODE (unspec) == UNSPEC);
> -
> -  switch (index)
> -    {
> -    case UNSPEC_REG_SAVE:
> -      dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
> -			      SET_DEST (pattern));
> -      break;
> -    case UNSPEC_DEF_CFA:
> -      dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
> -			 INTVAL (XVECEXP (unspec, 0, 0)));
> -      break;
> -    default:
> -      gcc_unreachable ();
> -    }
> -}
> -
>  /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
>     to be generated in correct form.  */
>  static void 
> @@ -8231,6 +8220,10 @@ ix86_expand_prologue (void)
>    /* DRAP should not coexist with stack_realign_fp */
>    gcc_assert (!(crtl->drap_reg && stack_realign_fp));
>  
> +  /* Initialize CFA state for before the prologue.  */
> +  ix86_cfa_state->reg = stack_pointer_rtx;
> +  ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
> +
>    ix86_compute_frame_layout (&frame);
>  
>    /* Emit prologue code to adjust stack alignment and setup DRAP, in case
> @@ -8260,6 +8253,7 @@ ix86_expand_prologue (void)
>  
>        insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
>        RTX_FRAME_RELATED_P (insn) = 1; 
> +      ix86_cfa_state->reg = crtl->drap_reg;
>  
>        /* Align the stack.  */
>        insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
> @@ -8288,6 +8282,9 @@ ix86_expand_prologue (void)
>  
>        insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
>        RTX_FRAME_RELATED_P (insn) = 1;
> +
> +      if (ix86_cfa_state->reg == stack_pointer_rtx)
> +        ix86_cfa_state->reg = hard_frame_pointer_rtx;
>      }
>  
>    if (stack_realign_fp)
> @@ -8326,7 +8323,8 @@ ix86_expand_prologue (void)
>      ;
>    else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
>      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
> -			       GEN_INT (-allocate), -1);
> +			       GEN_INT (-allocate), -1,
> +			       ix86_cfa_state->reg == stack_pointer_rtx);
>    else
>      {
>        /* Only valid for Win32.  */
> @@ -8354,10 +8352,15 @@ ix86_expand_prologue (void)
>        else
>  	insn = gen_allocate_stack_worker_32 (eax, eax);
>        insn = emit_insn (insn);
> -      RTX_FRAME_RELATED_P (insn) = 1;
> -      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
> -      t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
> -      add_reg_note (insn, REG_FRAME_RELATED_EXPR, t);
> +
> +      if (ix86_cfa_state->reg == stack_pointer_rtx)
> +	{
> +	  ix86_cfa_state->offset += allocate;
> +	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
> +	  t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
> +	  add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
> +	  RTX_FRAME_RELATED_P (insn) = 1;
> +	}
>  
>        if (eax_live)
>  	{
> @@ -8464,18 +8467,96 @@ ix86_expand_prologue (void)
>      emit_insn (gen_cld ());
>  }
>  
> +/* Emit code to restore REG using a POP insn.  */
> +
> +static void
> +ix86_emit_restore_reg_using_pop (rtx reg)
> +{
> +  rtx insn = emit_insn (ix86_gen_pop1 (reg));
> +
> +  if (ix86_cfa_state->reg == crtl->drap_reg
> +      && REGNO (reg) == REGNO (crtl->drap_reg))
> +    {
> +      /* Previously we'd represented the CFA as an expression
> +	 like *(%ebp - 8).  We've just popped that value from
> +	 the stack, which means we need to reset the CFA to
> +	 the drap register.  This will remain until we restore
> +	 the stack pointer.  */
> +      add_reg_note (insn, REG_CFA_DEF_CFA, reg);
> +      RTX_FRAME_RELATED_P (insn) = 1;
> +      return;
> +    }
> +
> +  if (ix86_cfa_state->reg == stack_pointer_rtx)
> +    {
> +      ix86_cfa_state->offset -= UNITS_PER_WORD;
> +      add_reg_note (insn, REG_CFA_ADJUST_CFA,
> +		    copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
> +    }
> +
> +  /* When the frame pointer is the CFA, and we pop it, we are
> +     swapping back to the stack pointer as the CFA.  This happens
> +     for stack frames that don't allocate other data, so we assume
> +     the stack pointer is now pointing at the return address, i.e.
> +     the function entry state, which makes the offset be 1 word.  */
> +  else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
> +	   && reg == hard_frame_pointer_rtx)
> +    {
> +      ix86_cfa_state->reg = stack_pointer_rtx;
> +      ix86_cfa_state->offset = UNITS_PER_WORD;
> +
> +      add_reg_note (insn, REG_CFA_DEF_CFA,
> +		    gen_rtx_PLUS (Pmode, stack_pointer_rtx,
> +				  GEN_INT (UNITS_PER_WORD)));
> +    }
> +
> +  add_reg_note (insn, REG_CFA_RESTORE, reg);
> +  RTX_FRAME_RELATED_P (insn) = 1;
> +}
> +
> +/* Emit code to restore saved registers using POP insns.  */
> +
> +static void
> +ix86_emit_restore_regs_using_pop (void)
> +{
> +  int regno;
> +
> +  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
> +    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
> +      ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
> +}
> +
> +/* Emit code and notes for the LEAVE instruction.  */
> +
> +static void
> +ix86_emit_leave (void)
> +{
> +  rtx insn = emit_insn (ix86_gen_leave ());
> +
> +  if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
> +    {
> +      add_reg_note (insn, REG_CFA_ADJUST_CFA, 
> +		    copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
> +      add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
> +      RTX_FRAME_RELATED_P (insn) = 1;
> +    }
> +}
> +
>  /* Emit code to restore saved registers using MOV insns.  First register
>     is restored from POINTER + OFFSET.  */
>  static void
>  ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
>  				  int maybe_eh_return)
>  {
> -  int regno;
> +  unsigned int regno;
>    rtx base_address = gen_rtx_MEM (Pmode, pointer);
> +  rtx insn;
>  
>    for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>      if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
>        {
> +	rtx reg = gen_rtx_REG (Pmode, regno);
> +
>  	/* Ensure that adjust_address won't be forced to produce pointer
>  	   out of range allowed by x86-64 instruction set.  */
>  	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
> @@ -8488,9 +8569,23 @@ ix86_emit_restore_regs_using_mov (rtx po
>  	    base_address = gen_rtx_MEM (Pmode, r11);
>  	    offset = 0;
>  	  }
> -	emit_move_insn (gen_rtx_REG (Pmode, regno),
> -	                adjust_address (base_address, Pmode, offset));
> +	insn = emit_move_insn (reg,
> +			       adjust_address (base_address, Pmode, offset));
>  	offset += UNITS_PER_WORD;
> +
> +        if (ix86_cfa_state->reg == crtl->drap_reg
> +	    && regno == REGNO (crtl->drap_reg))
> +	  {
> +	    /* Previously we'd represented the CFA as an expression
> +	       like *(%ebp - 8).  We've just popped that value from
> +	       the stack, which means we need to reset the CFA to
> +	       the drap register.  This will remain until we restore
> +	       the stack pointer.  */
> +	    add_reg_note (insn, REG_CFA_DEF_CFA, reg);
> +	  }
> +	else
> +	  add_reg_note (insn, REG_CFA_RESTORE, reg);
> +	RTX_FRAME_RELATED_P (insn) = 1;
>        }
>  }
>  
> @@ -8502,11 +8597,13 @@ ix86_emit_restore_sse_regs_using_mov (rt
>  {
>    int regno;
>    rtx base_address = gen_rtx_MEM (TImode, pointer);
> -  rtx mem;
> +  rtx mem, insn;
>  
>    for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>      if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
>        {
> +	rtx reg = gen_rtx_REG (TImode, regno);
> +
>  	/* Ensure that adjust_address won't be forced to produce pointer
>  	   out of range allowed by x86-64 instruction set.  */
>  	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
> @@ -8521,8 +8618,11 @@ ix86_emit_restore_sse_regs_using_mov (rt
>  	  }
>  	mem = adjust_address (base_address, TImode, offset);
>  	set_mem_align (mem, 128);
> -	emit_move_insn (gen_rtx_REG (TImode, regno), mem);
> +	insn = emit_move_insn (reg, mem);
>  	offset += 16;
> +
> +	add_reg_note (insn, REG_CFA_RESTORE, reg);
> +	RTX_FRAME_RELATED_P (insn) = 1;
>        }
>  }
>  
> @@ -8531,10 +8631,11 @@ ix86_emit_restore_sse_regs_using_mov (rt
>  void
>  ix86_expand_epilogue (int style)
>  {
> -  int regno;
>    int sp_valid;
>    struct ix86_frame frame;
>    HOST_WIDE_INT offset;
> +  struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
> +  bool using_drap;
>  
>    ix86_finalize_stack_realign_flags ();
>  
> @@ -8560,6 +8661,9 @@ ix86_expand_epilogue (int style)
>    offset *= -UNITS_PER_WORD;
>    offset -= frame.nsseregs * 16 + frame.padding0;
>  
> +  using_drap = crtl->drap_reg && crtl->stack_realign_needed;
> +  gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
> +
>    /* If we're only restoring one register and sp is not valid then
>       using a move instruction to restore the register since it's
>       less work than reloading sp and popping the register.
> @@ -8574,7 +8678,8 @@ ix86_expand_epilogue (int style)
>        || (TARGET_EPILOGUE_USING_MOVE
>  	  && cfun->machine->use_fast_prologue_epilogue
>  	  && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
> -      || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
> +      || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
> +	  && frame.to_allocate)
>        || (frame_pointer_needed && TARGET_USE_LEAVE
>  	  && cfun->machine->use_fast_prologue_epilogue
>  	  && (frame.nregs + frame.nsseregs) == 1)
> @@ -8622,13 +8727,28 @@ ix86_expand_epilogue (int style)
>  	    {
>  	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
>  	      tmp = plus_constant (tmp, UNITS_PER_WORD);
> -	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
> +	      tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
>  
>  	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
> -	      emit_move_insn (hard_frame_pointer_rtx, tmp);
> +	      tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
> +
> +	      /* Note that we use SA as a temporary CFA, as the return
> +		 address is at the proper place relative to it.  We
> +		 pretend this happens at the FP restore insn because
> +		 prior to this insn the FP would be stored at the wrong
> +		 offset relative to SA, and after this insn we have no
> +		 other reasonable register to use for the CFA.  We don't
> +		 bother resetting the CFA to the SP for the duration of
> +		 the return insn.  */
> +	      add_reg_note (tmp, REG_CFA_DEF_CFA,
> +			    plus_constant (sa, UNITS_PER_WORD));
> +	      add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
> +	      RTX_FRAME_RELATED_P (tmp) = 1;
> +	      ix86_cfa_state->reg = sa;
> +	      ix86_cfa_state->offset = UNITS_PER_WORD;
>  
>  	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
> -					 const0_rtx, style);
> +					 const0_rtx, style, false);
>  	    }
>  	  else
>  	    {
> @@ -8637,7 +8757,17 @@ ix86_expand_epilogue (int style)
>                                           + frame.nregs * UNITS_PER_WORD
>  					 + frame.nsseregs * 16
>  					 + frame.padding0));
> -	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
> +	      tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
> +
> +	      gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
> +	      if (ix86_cfa_state->offset != UNITS_PER_WORD)
> +		{
> +		  ix86_cfa_state->offset = UNITS_PER_WORD;
> +		  add_reg_note (tmp, REG_CFA_DEF_CFA,
> +				plus_constant (stack_pointer_rtx,
> +					       UNITS_PER_WORD));
> +		  RTX_FRAME_RELATED_P (tmp) = 1;
> +		}
>  	    }
>  	}
>        else if (!frame_pointer_needed)
> @@ -8646,18 +8776,18 @@ ix86_expand_epilogue (int style)
>  					    + frame.nregs * UNITS_PER_WORD
>  					    + frame.nsseregs * 16
>  					    + frame.padding0),
> -				   style);
> +				   style, !using_drap);
>        /* If not an i386, mov & pop is faster than "leave".  */
>        else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
>  	       || !cfun->machine->use_fast_prologue_epilogue)
> -	emit_insn ((*ix86_gen_leave) ());
> +	ix86_emit_leave ();
>        else
>  	{
>  	  pro_epilogue_adjust_stack (stack_pointer_rtx,
>  				     hard_frame_pointer_rtx,
> -				     const0_rtx, style);
> +				     const0_rtx, style, !using_drap);
>  
> -	  emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
> +	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
>  	}
>      }
>    else
> @@ -8675,11 +8805,12 @@ ix86_expand_epilogue (int style)
>            gcc_assert (!stack_realign_fp);
>  	  pro_epilogue_adjust_stack (stack_pointer_rtx,
>  				     hard_frame_pointer_rtx,
> -				     GEN_INT (offset), style);
> +				     GEN_INT (offset), style, false);
>            ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
>  					        frame.to_allocate, style == 2);
>  	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
> -				     GEN_INT (frame.nsseregs * 16), style);
> +				     GEN_INT (frame.nsseregs * 16),
> +				     style, false);
>  	}
>        else if (frame.to_allocate || frame.nsseregs)
>  	{
> @@ -8689,18 +8820,18 @@ ix86_expand_epilogue (int style)
>  	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
>  				     GEN_INT (frame.to_allocate
>  				     	      + frame.nsseregs * 16
> -					      + frame.padding0), style);
> +					      + frame.padding0), style,
> +				     !using_drap && !frame_pointer_needed);
>  	}
>  
> -      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
> -	if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
> -	  emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
> +      ix86_emit_restore_regs_using_pop ();
> +
>        if (frame_pointer_needed)
>  	{
>  	  /* Leave results in shorter dependency chains on CPUs that are
>  	     able to grok it fast.  */
>  	  if (TARGET_USE_LEAVE)
> -	    emit_insn ((*ix86_gen_leave) ());
> +	    ix86_emit_leave ();
>  	  else
>              {
>                /* For stack realigned really happens, recover stack 
> @@ -8709,47 +8840,70 @@ ix86_expand_epilogue (int style)
>                if (stack_realign_fp)
>  		pro_epilogue_adjust_stack (stack_pointer_rtx,
>  					   hard_frame_pointer_rtx,
> -					   const0_rtx, style);
> -	      emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
> +					   const0_rtx, style, !using_drap);
> +	      ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
>              }
>  	}
>      }
>  
> -  if (crtl->drap_reg && crtl->stack_realign_needed)
> +  if (using_drap)
>      {
>        int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
>  			      ? 0 : UNITS_PER_WORD);
> +      rtx insn;
> +
>        gcc_assert (stack_realign_drap);
> -      emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
> -				   crtl->drap_reg,
> -				   GEN_INT (-(UNITS_PER_WORD
> -					      + param_ptr_offset))));
> -      if (!call_used_regs[REGNO (crtl->drap_reg)])
> -	emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
> -      
> +
> +      insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
> +					  crtl->drap_reg,
> +					  GEN_INT (-(UNITS_PER_WORD
> +						     + param_ptr_offset))));
> +
> +      ix86_cfa_state->reg = stack_pointer_rtx;
> +      ix86_cfa_state->offset = UNITS_PER_WORD + param_ptr_offset;
> +
> +      add_reg_note (insn, REG_CFA_DEF_CFA,
> +		    gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
> +				  GEN_INT (ix86_cfa_state->offset)));
> +      RTX_FRAME_RELATED_P (insn) = 1;
> +
> +      if (param_ptr_offset)
> +	ix86_emit_restore_reg_using_pop (crtl->drap_reg);
>      }
>  
>    /* Sibcall epilogues don't want a return instruction.  */
>    if (style == 0)
> -    return;
> +    {
> +      *ix86_cfa_state = cfa_state_save;
> +      return;
> +    }
>  
>    if (crtl->args.pops_args && crtl->args.size)
>      {
>        rtx popc = GEN_INT (crtl->args.pops_args);
>  
> -      /* i386 can only pop 64K bytes.  If asked to pop more, pop
> -	 return address, do explicit add, and jump indirectly to the
> -	 caller.  */
> +      /* i386 can only pop 64K bytes.  If asked to pop more, pop return
> +	 address, do explicit add, and jump indirectly to the caller.  */
>  
>        if (crtl->args.pops_args >= 65536)
>  	{
>  	  rtx ecx = gen_rtx_REG (SImode, CX_REG);
> +	  rtx insn;
>  
>  	  /* There is no "pascal" calling convention in any 64bit ABI.  */
>  	  gcc_assert (!TARGET_64BIT);
>  
> -	  emit_insn (gen_popsi1 (ecx));
> -	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
> +	  insn = emit_insn (gen_popsi1 (ecx));
> +	  ix86_cfa_state->offset -= UNITS_PER_WORD;
> +
> +	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
> +			copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
> +	  add_reg_note (insn, REG_CFA_REGISTER,
> +			gen_rtx_SET (VOIDmode, ecx, pc_rtx));
> +	  RTX_FRAME_RELATED_P (insn) = 1;
> +
> +	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
> +				     popc, -1, true);
>  	  emit_jump_insn (gen_return_indirect_internal (ecx));
>  	}
>        else
> @@ -8757,6 +8911,10 @@ ix86_expand_epilogue (int style)
>      }
>    else
>      emit_jump_insn (gen_return_internal ());
> +
> +  /* Restore the state back to the state from the prologue,
> +     so that it's correct for the next epilogue.  */
> +  *ix86_cfa_state = cfa_state_save;
>  }
>  
>  /* Reset from the function's potential modifications.  */
> @@ -30361,8 +30519,6 @@ ix86_enum_va_list (int idx, const char *
>  #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
>  #undef TARGET_GET_DRAP_RTX
>  #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
> -#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
> -#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
>  #undef TARGET_STRICT_ARGUMENT_NAMING
>  #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
>  
> --- gcc/config/i386/i386.h	(revision 148000)
> +++ gcc/config/i386/i386.h	(local)
> @@ -2393,6 +2393,15 @@ enum ix86_stack_slot
>  \f
>  #define FASTCALL_PREFIX '@'
>  \f
> +/* Machine specific CFA tracking during prologue/epilogue generation.  */
> +
> +#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS)
> +struct GTY(()) machine_cfa_state
> +{
> +  rtx reg;
> +  HOST_WIDE_INT offset;
> +};
> +
>  struct GTY(()) machine_function {
>    struct stack_local_entry *stack_locals;
>    const char *some_ld_name;
> @@ -2419,8 +2428,10 @@ struct GTY(()) machine_function {
>    int tls_descriptor_call_expanded_p;
>    /* This value is used for amd64 targets and specifies the current abi
>       to be used. MS_ABI means ms abi. Otherwise SYSV_ABI means sysv abi.  */
> -   enum calling_abi call_abi;
> +  enum calling_abi call_abi;
> +  struct machine_cfa_state cfa;
>  };
> +#endif
>  
>  #define ix86_stack_locals (cfun->machine->stack_locals)
>  #define ix86_varargs_gpr_size (cfun->machine->varargs_gpr_size)
> @@ -2436,6 +2447,7 @@ struct GTY(()) machine_function {
>     REG_SP is live.  */
>  #define ix86_current_function_calls_tls_descriptor \
>    (ix86_tls_descriptor_calls_expanded_in_cfun && df_regs_ever_live_p (SP_REG))
> +#define ix86_cfa_state (&cfun->machine->cfa)
>  
>  /* Control behavior of x86_file_start.  */
>  #define X86_FILE_START_VERSION_DIRECTIVE false
> --- gcc/config/i386/i386.md	(revision 148000)
> +++ gcc/config/i386/i386.md	(local)
> @@ -101,7 +101,6 @@
>     (UNSPEC_ADD_CARRY		34)
>     (UNSPEC_FLDCW		35)
>     (UNSPEC_REP			36)
> -   (UNSPEC_EH_RETURN		37)
>     (UNSPEC_LD_MPIC		38)	; load_macho_picbase
>     (UNSPEC_TRUNC_NOOP		39)
>  
> @@ -15982,21 +15981,16 @@
>    tmp = gen_rtx_MEM (Pmode, tmp);
>    emit_move_insn (tmp, ra);
>  
> -  if (Pmode == SImode)
> -    emit_jump_insn (gen_eh_return_si (sa));
> -  else
> -    emit_jump_insn (gen_eh_return_di (sa));
> +  emit_jump_insn (gen_eh_return_internal ());
>    emit_barrier ();
>    DONE;
>  })
>  
> -(define_insn_and_split "eh_return_<mode>"
> -  [(set (pc)
> -        (unspec [(match_operand:P 0 "register_operand" "c")]
> -	         UNSPEC_EH_RETURN))]
> +(define_insn_and_split "eh_return_internal"
> +  [(eh_return)]
>    ""
>    "#"
> -  "reload_completed"
> +  "epilogue_completed"
>    [(const_int 0)]
>    "ix86_expand_epilogue (2); DONE;")
>  
> --- gcc/dwarf2out.c	(revision 148000)
> +++ gcc/dwarf2out.c	(local)
> @@ -247,7 +247,8 @@ typedef struct GTY(()) cfa_loc {
>    HOST_WIDE_INT offset;
>    HOST_WIDE_INT base_offset;
>    unsigned int reg;
> -  int indirect;            /* 1 if CFA is accessed via a dereference.  */
> +  BOOL_BITFIELD indirect : 1;  /* 1 if CFA is accessed via a dereference.  */
> +  BOOL_BITFIELD in_use : 1;    /* 1 if a saved cfa is stored here.  */
>  } dw_cfa_location;
>  
>  /* All call frame descriptions (FDE's) in the GCC generated DWARF
> @@ -404,7 +405,7 @@ static const char *dwarf_cfi_name (unsig
>  static dw_cfi_ref new_cfi (void);
>  static void add_cfi (dw_cfi_ref *, dw_cfi_ref);
>  static void add_fde_cfi (const char *, dw_cfi_ref);
> -static void lookup_cfa_1 (dw_cfi_ref, dw_cfa_location *);
> +static void lookup_cfa_1 (dw_cfi_ref, dw_cfa_location *, dw_cfa_location *);
>  static void lookup_cfa (dw_cfa_location *);
>  static void reg_save (const char *, unsigned, unsigned, HOST_WIDE_INT);
>  #ifdef DWARF2_UNWIND_INFO
> @@ -668,7 +669,10 @@ add_cfi (dw_cfi_ref *list_head, dw_cfi_r
>  
>    /* When DRAP is used, CFA is defined with an expression.  Redefine
>       CFA may lead to a different CFA value.   */
> -  if (fde && fde->drap_reg != INVALID_REGNUM)
> +  /* ??? Of course, this heuristic fails when we're annotating epilogues,
> +     because of course we'll always want to redefine the CFA back to the
> +     stack pointer on the way out.  Where should we move this check?  */
> +  if (0 && fde && fde->drap_reg != INVALID_REGNUM)
>      switch (cfi->dw_cfi_opc)
>        {
>          case DW_CFA_def_cfa_register:
> @@ -774,7 +778,7 @@ add_fde_cfi (const char *label, dw_cfi_r
>  /* Subroutine of lookup_cfa.  */
>  
>  static void
> -lookup_cfa_1 (dw_cfi_ref cfi, dw_cfa_location *loc)
> +lookup_cfa_1 (dw_cfi_ref cfi, dw_cfa_location *loc, dw_cfa_location *remember)
>  {
>    switch (cfi->dw_cfi_opc)
>      {
> @@ -793,6 +797,18 @@ lookup_cfa_1 (dw_cfi_ref cfi, dw_cfa_loc
>      case DW_CFA_def_cfa_expression:
>        get_cfa_from_loc_descr (loc, cfi->dw_cfi_oprnd1.dw_cfi_loc);
>        break;
> +
> +    case DW_CFA_remember_state:
> +      gcc_assert (!remember->in_use);
> +      *remember = *loc;
> +      remember->in_use = 1;
> +      break;
> +    case DW_CFA_restore_state:
> +      gcc_assert (remember->in_use);
> +      *loc = *remember;
> +      remember->in_use = 0;
> +      break;
> +
>      default:
>        break;
>      }
> @@ -805,19 +821,19 @@ lookup_cfa (dw_cfa_location *loc)
>  {
>    dw_cfi_ref cfi;
>    dw_fde_ref fde;
> +  dw_cfa_location remember;
>  
> +  memset (loc, 0, sizeof (*loc));
>    loc->reg = INVALID_REGNUM;
> -  loc->offset = 0;
> -  loc->indirect = 0;
> -  loc->base_offset = 0;
> +  remember = *loc;
>  
>    for (cfi = cie_cfi_head; cfi; cfi = cfi->dw_cfi_next)
> -    lookup_cfa_1 (cfi, loc);
> +    lookup_cfa_1 (cfi, loc, &remember);
>  
>    fde = current_fde ();
>    if (fde)
>      for (cfi = fde->dw_fde_cfi; cfi; cfi = cfi->dw_cfi_next)
> -      lookup_cfa_1 (cfi, loc);
> +      lookup_cfa_1 (cfi, loc, &remember);
>  }
>  
>  /* The current rule for calculating the DWARF2 canonical frame address.  */
> @@ -827,6 +843,9 @@ static dw_cfa_location cfa;
>     from the CFA.  */
>  static dw_cfa_location cfa_store;
>  
> +/* The current save location around an epilogue.  */
> +static dw_cfa_location cfa_remember;
> +
>  /* The running total of the size of arguments pushed onto the stack.  */
>  static HOST_WIDE_INT args_size;
>  
> @@ -1212,8 +1231,7 @@ compute_barrier_args_size_1 (rtx insn, H
>  
>    if (! RTX_FRAME_RELATED_P (insn))
>      {
> -      if (prologue_epilogue_contains (insn)
> -	  || sibcall_epilogue_contains (insn))
> +      if (prologue_epilogue_contains (insn))
>  	/* Nothing */;
>        else if (GET_CODE (PATTERN (insn)) == SET)
>  	offset = stack_adjust_offset (PATTERN (insn), cur_args_size, 0);
> @@ -1386,7 +1404,7 @@ dwarf2out_stack_adjust (rtx insn, bool a
>       with this function.  Proper support would require all frame-related
>       insns to be marked, and to be able to handle saving state around
>       epilogues textually in the middle of the function.  */
> -  if (prologue_epilogue_contains (insn) || sibcall_epilogue_contains (insn))
> +  if (prologue_epilogue_contains (insn))
>      return;
>  
>    /* If INSN is an instruction from target of an annulled branch, the
> @@ -1660,6 +1678,156 @@ reg_saved_in (rtx reg)
>     value, not an offset.  */
>  static dw_cfa_location cfa_temp;
>  
> +/* A subroutine of dwarf2out_frame_debug, process a REG_DEF_CFA note.  */
> +
> +static void
> +dwarf2out_frame_debug_def_cfa (rtx pat, const char *label)
> +{
> +  memset (&cfa, 0, sizeof (cfa));
> +
> +  switch (GET_CODE (pat))
> +    {
> +    case PLUS:
> +      cfa.reg = REGNO (XEXP (pat, 0));
> +      cfa.offset = INTVAL (XEXP (pat, 1));
> +      break;
> +
> +    case REG:
> +      cfa.reg = REGNO (pat);
> +      break;
> +
> +    default:
> +      /* Recurse and define an expression.  */
> +      gcc_unreachable ();
> +    }
> +
> +  def_cfa_1 (label, &cfa);
> +}
> +
> +/* A subroutine of dwarf2out_frame_debug, process a REG_ADJUST_CFA note.  */
> +
> +static void
> +dwarf2out_frame_debug_adjust_cfa (rtx pat, const char *label)
> +{
> +  rtx src, dest;
> +
> +  gcc_assert (GET_CODE (pat) == SET);
> +  dest = XEXP (pat, 0);
> +  src = XEXP (pat, 1);
> +
> +  switch (GET_CODE (src))
> +    {
> +    case PLUS:
> +      gcc_assert (REGNO (XEXP (src, 0)) == cfa.reg);
> +      cfa.offset -= INTVAL (XEXP (src, 1));
> +      break;
> +
> +    case REG:
> +	break;
> +
> +    default:
> +	gcc_unreachable ();
> +    }
> +
> +  cfa.reg = REGNO (dest);
> +  gcc_assert (cfa.indirect == 0);
> +
> +  def_cfa_1 (label, &cfa);
> +}
> +
> +/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_OFFSET note.  */
> +
> +static void
> +dwarf2out_frame_debug_cfa_offset (rtx set, const char *label)
> +{
> +  HOST_WIDE_INT offset;
> +  rtx src, addr, span;
> +
> +  src = XEXP (set, 1);
> +  addr = XEXP (set, 0);
> +  gcc_assert (MEM_P (addr));
> +  addr = XEXP (addr, 0);
> +  
> +  /* As documented, only consider extremely simple addresses.  */
> +  switch (GET_CODE (addr))
> +    {
> +    case REG:
> +      gcc_assert (REGNO (addr) == cfa.reg);
> +      offset = -cfa.offset;
> +      break;
> +    case PLUS:
> +      gcc_assert (REGNO (XEXP (addr, 0)) == cfa.reg);
> +      offset = INTVAL (XEXP (addr, 1)) - cfa.offset;
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +
> +  span = targetm.dwarf_register_span (src);
> +
> +  /* ??? We'd like to use queue_reg_save, but we need to come up with
> +     a different flushing heuristic for epilogues.  */
> +  if (!span)
> +    reg_save (label, DWARF_FRAME_REGNUM (REGNO (src)), INVALID_REGNUM, offset);
> +  else
> +    {
> +      /* We have a PARALLEL describing where the contents of SRC live.
> +   	 Queue register saves for each piece of the PARALLEL.  */
> +      int par_index;
> +      int limit;
> +      HOST_WIDE_INT span_offset = offset;
> +
> +      gcc_assert (GET_CODE (span) == PARALLEL);
> +
> +      limit = XVECLEN (span, 0);
> +      for (par_index = 0; par_index < limit; par_index++)
> +	{
> +	  rtx elem = XVECEXP (span, 0, par_index);
> +
> +	  reg_save (label, DWARF_FRAME_REGNUM (REGNO (elem)),
> +		    INVALID_REGNUM, span_offset);
> +	  span_offset += GET_MODE_SIZE (GET_MODE (elem));
> +	}
> +    }
> +}
> +
> +/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_REGISTER note.  */
> +
> +static void
> +dwarf2out_frame_debug_cfa_register (rtx set, const char *label)
> +{
> +  rtx src, dest;
> +  unsigned sregno, dregno;
> +
> +  src = XEXP (set, 1);
> +  dest = XEXP (set, 0);
> +
> +  if (src == pc_rtx)
> +    sregno = DWARF_FRAME_RETURN_COLUMN;
> +  else
> +    sregno = DWARF_FRAME_REGNUM (REGNO (src));
> +
> +  dregno = DWARF_FRAME_REGNUM (REGNO (dest));
> +
> +  /* ??? We'd like to use queue_reg_save, but we need to come up with
> +     a different flushing heuristic for epilogues.  */
> +  reg_save (label, sregno, dregno, 0);
> +}
> +
> +/* A subroutine of dwarf2out_frame_debug, process a REG_CFA_RESTORE note.  */
> +
> +static void
> +dwarf2out_frame_debug_cfa_restore (rtx reg, const char *label)
> +{
> +  dw_cfi_ref cfi = new_cfi ();
> +  unsigned int regno = DWARF_FRAME_REGNUM (REGNO (reg));
> +
> +  cfi->dw_cfi_opc = (regno & ~0x3f ? DW_CFA_restore_extended : DW_CFA_restore);
> +  cfi->dw_cfi_oprnd1.dw_cfi_reg_num = regno;
> +
> +  add_fde_cfi (label, cfi);
> +}
> +
>  /* Record call frame debugging information for an expression EXPR,
>     which either sets SP or FP (adjusting how we calculate the frame
>     address) or saves a register to the stack or another register.
> @@ -2367,7 +2535,8 @@ void
>  dwarf2out_frame_debug (rtx insn, bool after_p)
>  {
>    const char *label;
> -  rtx src;
> +  rtx note, n;
> +  bool handled_one = false;
>  
>    if (insn == NULL_RTX)
>      {
> @@ -2412,15 +2581,158 @@ dwarf2out_frame_debug (rtx insn, bool af
>      }
>  
>    label = dwarf2out_cfi_label ();
> -  src = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
> -  if (src)
> -    insn = XEXP (src, 0);
> -  else
> -    insn = PATTERN (insn);
>  
> +  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
> +    switch (REG_NOTE_KIND (note))
> +      {
> +      case REG_FRAME_RELATED_EXPR:
> +	insn = XEXP (note, 0);
> +	goto found;
> +
> +      case REG_CFA_DEF_CFA:
> +	dwarf2out_frame_debug_def_cfa (XEXP (note, 0), label);
> +	handled_one = true;
> +	break;
> +
> +      case REG_CFA_ADJUST_CFA:
> +	n = XEXP (note, 0);
> +	if (n == NULL)
> +	  {
> +	    n = PATTERN (insn);
> +	    if (GET_CODE (n) == PARALLEL)
> +	      n = XVECEXP (n, 0, 0);
> +	  }
> +	dwarf2out_frame_debug_adjust_cfa (n, label);
> +	handled_one = true;
> +	break;
> +
> +      case REG_CFA_OFFSET:
> +	n = XEXP (note, 0);
> +	if (n == NULL)
> +	  n = single_set (insn);
> +	dwarf2out_frame_debug_cfa_offset (n, label);
> +	handled_one = true;
> +	break;
> +
> +      case REG_CFA_REGISTER:
> +	n = XEXP (note, 0);
> +	if (n == NULL)
> +	  {
> +	    n = PATTERN (insn);
> +	    if (GET_CODE (n) == PARALLEL)
> +	      n = XVECEXP (n, 0, 0);
> +	  }
> +	dwarf2out_frame_debug_cfa_register (n, label);
> +	handled_one = true;
> +	break;
> +
> +      case REG_CFA_RESTORE:
> +	n = XEXP (note, 0);
> +	if (n == NULL)
> +	  {
> +	    n = PATTERN (insn);
> +	    if (GET_CODE (n) == PARALLEL)
> +	      n = XVECEXP (n, 0, 0);
> +	    n = XEXP (n, 0);
> +	  }
> +	dwarf2out_frame_debug_cfa_restore (n, label);
> +	handled_one = true;
> +	break;
> +
> +      default:
> +	break;
> +      }
> +  if (handled_one)
> +    return;
> +
> +  insn = PATTERN (insn);
> + found:
>    dwarf2out_frame_debug_expr (insn, label);
>  }
>  
> +/* Determine if we need to save and restore CFI information around this
> +   epilogue.  If SIBCALL is true, then this is a sibcall epilogue.  If
> +   we do need to save/restore, then emit the save now, and insert a
> +   NOTE_INSN_CFA_RESTORE_STATE at the appropriate place in the stream.  */
> +
> +void
> +dwarf2out_begin_epilogue (rtx insn)
> +{
> +  bool saw_frp = false;
> +  rtx i;
> +  dw_cfi_ref cfi;
> +
> +  /* Scan forward to the return insn, noticing if there are possible
> +     frame related insns.  */
> +  for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
> +    {
> +      if (!INSN_P (i))
> +	continue;
> +
> +      /* Look for both regular and sibcalls to end the block.  */
> +      if (returnjump_p (i))
> +	break;
> +      if (CALL_P (i) && SIBLING_CALL_P (i))
> +	break;
> +
> +      if (RTX_FRAME_RELATED_P (i))
> +	saw_frp = true;
> +    }
> +
> +  /* If the port doesn't emit epilogue unwind info, we don't need a
> +     save/restore pair.  */
> +  if (!saw_frp)
> +    return;
> +
> +  /* Otherwise, search forward to see if the return insn was the last
> +     basic block of the function.  If so, we don't need save/restore.  */
> +  gcc_assert (i != NULL);
> +  i = next_real_insn (i);
> +  if (i == NULL)
> +    return;
> +
> +  /* Insert the restore before that next real insn in the stream, and before
> +     a potential NOTE_INSN_EPILOGUE_BEG -- we do need these notes to be
> +     properly nested.  This should be after any label or alignment.  This
> +     will be pushed into the CFI stream by the function below.  */
> +  while (1)
> +    {
> +      rtx p = PREV_INSN (i);
> +      if (!NOTE_P (p))
> +	break;
> +      if (NOTE_KIND (p) == NOTE_INSN_BASIC_BLOCK)
> +	break;
> +      i = p;
> +    }
> +  emit_note_before (NOTE_INSN_CFA_RESTORE_STATE, i);
> +
> +  /* Emit the state save.  */
> +  cfi = new_cfi (); 
> +  cfi->dw_cfi_opc = DW_CFA_remember_state;
> +  add_fde_cfi (dwarf2out_cfi_label (), cfi);
> +
> +  /* And emulate the state save.  */
> +  gcc_assert (!cfa_remember.in_use);
> +  cfa_remember = cfa;
> +  cfa_remember.in_use = 1;
> +}
> +
> +/* A "subroutine" of dwarf2out_begin_epilogue.  Emit the restore required.  */
> +
> +void
> +dwarf2out_frame_debug_restore_state (void)
> +{
> +  dw_cfi_ref cfi = new_cfi (); 
> +  const char *label = dwarf2out_cfi_label ();
> +
> +  cfi->dw_cfi_opc = DW_CFA_restore_state;
> +  add_fde_cfi (label, cfi);
> +
> +  gcc_assert (cfa_remember.in_use);
> +  cfa = cfa_remember;
> +  cfa_remember.in_use = 0;
> +}
> +
>  #endif
>  
>  /* Describe for the GTY machinery what parts of dw_cfi_oprnd1 are used.  */
> @@ -2434,6 +2746,8 @@ dw_cfi_oprnd1_desc (enum dwarf_call_fram
>      {
>      case DW_CFA_nop:
>      case DW_CFA_GNU_window_save:
> +    case DW_CFA_remember_state:
> +    case DW_CFA_restore_state:
>        return dw_cfi_oprnd_unused;
>  
>      case DW_CFA_set_loc:
> @@ -2448,6 +2762,7 @@ dw_cfi_oprnd1_desc (enum dwarf_call_fram
>      case DW_CFA_def_cfa:
>      case DW_CFA_offset_extended_sf:
>      case DW_CFA_def_cfa_sf:
> +    case DW_CFA_restore:
>      case DW_CFA_restore_extended:
>      case DW_CFA_undefined:
>      case DW_CFA_same_value:
> @@ -2771,6 +3086,13 @@ output_cfi_directive (dw_cfi_ref cfi)
>  	       cfi->dw_cfi_oprnd1.dw_cfi_offset);
>        break;
>  
> +    case DW_CFA_remember_state:
> +      fprintf (asm_out_file, "\t.cfi_remember_state\n");
> +      break;
> +    case DW_CFA_restore_state:
> +      fprintf (asm_out_file, "\t.cfi_restore_state\n");
> +      break;
> +
>      case DW_CFA_GNU_args_size:
>        fprintf (asm_out_file, "\t.cfi_escape 0x%x,", DW_CFA_GNU_args_size);
>        dw2_asm_output_data_uleb128_raw (cfi->dw_cfi_oprnd1.dw_cfi_offset);
> @@ -12056,6 +12378,7 @@ convert_cfa_to_fb_loc_list (HOST_WIDE_IN
>    dw_cfi_ref cfi;
>    dw_cfa_location last_cfa, next_cfa;
>    const char *start_label, *last_label, *section;
> +  dw_cfa_location remember;
>  
>    fde = current_fde ();
>    gcc_assert (fde != NULL);
> @@ -12064,17 +12387,16 @@ convert_cfa_to_fb_loc_list (HOST_WIDE_IN
>    list_tail = &list;
>    list = NULL;
>  
> +  memset (&next_cfa, 0, sizeof (next_cfa));
>    next_cfa.reg = INVALID_REGNUM;
> -  next_cfa.offset = 0;
> -  next_cfa.indirect = 0;
> -  next_cfa.base_offset = 0;
> +  remember = next_cfa;
>  
>    start_label = fde->dw_fde_begin;
>  
>    /* ??? Bald assumption that the CIE opcode list does not contain
>       advance opcodes.  */
>    for (cfi = cie_cfi_head; cfi; cfi = cfi->dw_cfi_next)
> -    lookup_cfa_1 (cfi, &next_cfa);
> +    lookup_cfa_1 (cfi, &next_cfa, &remember);
>  
>    last_cfa = next_cfa;
>    last_label = start_label;
> @@ -12101,14 +12423,10 @@ convert_cfa_to_fb_loc_list (HOST_WIDE_IN
>  
>        case DW_CFA_advance_loc:
>  	/* The encoding is complex enough that we should never emit this.  */
> -      case DW_CFA_remember_state:
> -      case DW_CFA_restore_state:
> -	/* We don't handle these two in this function.  It would be possible
> -	   if it were to be required.  */
>  	gcc_unreachable ();
>  
>        default:
> -	lookup_cfa_1 (cfi, &next_cfa);
> +	lookup_cfa_1 (cfi, &next_cfa, &remember);
>  	break;
>        }
>  
> --- gcc/dwarf2out.h	(revision 148000)
> +++ gcc/dwarf2out.h	(local)
> @@ -20,6 +20,8 @@ along with GCC; see the file COPYING3.  
>  
>  extern void dwarf2out_decl (tree);
>  extern void dwarf2out_frame_debug (rtx, bool);
> +extern void dwarf2out_begin_epilogue (rtx);
> +extern void dwarf2out_frame_debug_restore_state (void);
>  
>  extern void debug_dwarf (void);
>  struct die_struct;
> --- gcc/emit-rtl.c	(revision 148000)
> +++ gcc/emit-rtl.c	(local)
> @@ -3335,6 +3335,10 @@ try_split (rtx pat, rtx trial, int last)
>    rtx insn_last, insn;
>    int njumps = 0;
>  
> +  /* We're not good at redistributing frame information.  */
> +  if (RTX_FRAME_RELATED_P (trial))
> +    return trial;
> +
>    if (any_condjump_p (trial)
>        && (note = find_reg_note (trial, REG_BR_PROB, 0)))
>      split_branch_probability = INTVAL (XEXP (note, 0));
> @@ -5050,6 +5054,9 @@ copy_insn_1 (rtx orig)
>    RTX_CODE code;
>    const char *format_ptr;
>  
> +  if (orig == NULL)
> +    return NULL;
> +
>    code = GET_CODE (orig);
>  
>    switch (code)
> --- gcc/final.c	(revision 148000)
> +++ gcc/final.c	(local)
> @@ -1879,9 +1879,19 @@ final_scan_insn (rtx insn, FILE *file, i
>  	  break;
>  
>  	case NOTE_INSN_EPILOGUE_BEG:
> +#if defined (DWARF2_UNWIND_INFO) && defined (HAVE_epilogue)
> +	  if (dwarf2out_do_frame ())
> +	    dwarf2out_begin_epilogue (insn);
> +#endif
>  	  targetm.asm_out.function_begin_epilogue (file);
>  	  break;
>  
> +	case NOTE_INSN_CFA_RESTORE_STATE:
> +#if defined (DWARF2_UNWIND_INFO)
> +	  dwarf2out_frame_debug_restore_state ();
> +#endif
> +	  break;
> +
>  	case NOTE_INSN_FUNCTION_BEG:
>  	  app_disable ();
>  	  (*debug_hooks->end_prologue) (last_linenum, last_filename);
> --- gcc/function.c	(revision 148000)
> +++ gcc/function.c	(local)
> @@ -124,13 +124,11 @@ struct machine_function * (*init_machine
>  /* The currently compiled function.  */
>  struct function *cfun = 0;
>  
> -/* These arrays record the INSN_UIDs of the prologue and epilogue insns.  */
> -static VEC(int,heap) *prologue;
> -static VEC(int,heap) *epilogue;
> -
> -/* Array of INSN_UIDs to hold the INSN_UIDs for each sibcall epilogue
> -   in this function.  */
> -static VEC(int,heap) *sibcall_epilogue;
> +/* These hashes record the prologue and epilogue insns.  */
> +static GTY((if_marked ("ggc_marked_p"), param_is (struct rtx_def)))
> +  htab_t prologue_insn_hash;
> +static GTY((if_marked ("ggc_marked_p"), param_is (struct rtx_def)))
> +  htab_t epilogue_insn_hash;
>  \f
>  /* Forward declarations.  */
>  
> @@ -143,8 +141,8 @@ static tree *get_block_vector (tree, int
>  extern tree debug_find_var_in_block_tree (tree, tree);
>  /* We always define `record_insns' even if it's not used so that we
>     can always export `prologue_epilogue_contains'.  */
> -static void record_insns (rtx, VEC(int,heap) **) ATTRIBUTE_UNUSED;
> -static int contains (const_rtx, VEC(int,heap) **);
> +static void record_insns (rtx, rtx, htab_t *) ATTRIBUTE_UNUSED;
> +static bool contains (const_rtx, htab_t);
>  #ifdef HAVE_return
>  static void emit_return_into_block (basic_block);
>  #endif
> @@ -207,9 +205,9 @@ free_after_parsing (struct function *f)
>  void
>  free_after_compilation (struct function *f)
>  {
> -  VEC_free (int, heap, prologue);
> -  VEC_free (int, heap, epilogue);
> -  VEC_free (int, heap, sibcall_epilogue);
> +  prologue_insn_hash = NULL;
> +  epilogue_insn_hash = NULL;
> +
>    if (crtl->emit.regno_pointer_align)
>      free (crtl->emit.regno_pointer_align);
>  
> @@ -4196,18 +4194,11 @@ init_function_start (tree subr)
>      warning (OPT_Waggregate_return, "function returns an aggregate");
>  }
>  
> -/* Make sure all values used by the optimization passes have sane
> -   defaults.  */
> +/* Make sure all values used by the optimization passes have sane defaults.  */
>  unsigned int
>  init_function_for_compilation (void)
>  {
>    reg_renumber = 0;
> -
> -  /* No prologue/epilogue insns yet.  Make sure that these vectors are
> -     empty.  */
> -  gcc_assert (VEC_length (int, prologue) == 0);
> -  gcc_assert (VEC_length (int, epilogue) == 0);
> -  gcc_assert (VEC_length (int, sibcall_epilogue) == 0);
>    return 0;
>  }
>  
> @@ -4873,16 +4864,42 @@ get_arg_pointer_save_area (void)
>    return ret;
>  }
>  \f
> -/* Extend a vector that records the INSN_UIDs of INSNS
> -   (a list of one or more insns).  */
> +/* Add a list of INSNS to the hash HASHP, possibly allocating HASHP
> +   for the first time.  */
>  
>  static void
> -record_insns (rtx insns, VEC(int,heap) **vecp)
> +record_insns (rtx insns, rtx end, htab_t *hashp)
>  {
>    rtx tmp;
> +  htab_t hash = *hashp;
> +
> +  if (hash == NULL)
> +    *hashp = hash
> +      = htab_create_ggc (17, htab_hash_pointer, htab_eq_pointer, NULL);
>  
> -  for (tmp = insns; tmp != NULL_RTX; tmp = NEXT_INSN (tmp))
> -    VEC_safe_push (int, heap, *vecp, INSN_UID (tmp));
> +  for (tmp = insns; tmp != end; tmp = NEXT_INSN (tmp))
> +    {
> +      void **slot = htab_find_slot (hash, tmp, INSERT);
> +      gcc_assert (*slot == NULL);
> +      *slot = tmp;
> +    }
> +}
> +
> +/* INSN has been duplicated as COPY, as part of duping a basic block.
> +   If INSN is an epilogue insn, then record COPY as epilogue as well.  */
> +
> +void
> +maybe_copy_epilogue_insn (rtx insn, rtx copy)
> +{
> +  void **slot;
> +
> +  if (epilogue_insn_hash == NULL
> +      || htab_find (epilogue_insn_hash, insn) == NULL)
> +    return;
> +
> +  slot = htab_find_slot (epilogue_insn_hash, copy, INSERT);
> +  gcc_assert (*slot == NULL);
> +  *slot = copy;
>  }
>  
>  /* Set the locator of the insn chain starting at INSN to LOC.  */
> @@ -4897,52 +4914,37 @@ set_insn_locators (rtx insn, int loc)
>      }
>  }
>  
> -/* Determine how many INSN_UIDs in VEC are part of INSN.  Because we can
> -   be running after reorg, SEQUENCE rtl is possible.  */
> +/* Determine if any INSNs in HASH are, or are part of, INSN.  Because
> +   we can be running after reorg, SEQUENCE rtl is possible.  */
>  
> -static int
> -contains (const_rtx insn, VEC(int,heap) **vec)
> +static bool
> +contains (const_rtx insn, htab_t hash)
>  {
> -  int i, j;
> +  if (hash == NULL)
> +    return false;
>  
> -  if (NONJUMP_INSN_P (insn)
> -      && GET_CODE (PATTERN (insn)) == SEQUENCE)
> +  if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
>      {
> -      int count = 0;
> +      int i;
>        for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--)
> -	for (j = VEC_length (int, *vec) - 1; j >= 0; --j)
> -	  if (INSN_UID (XVECEXP (PATTERN (insn), 0, i))
> -	      == VEC_index (int, *vec, j))
> -	    count++;
> -      return count;
> +	if (htab_find (hash, XVECEXP (PATTERN (insn), 0, i)))
> +	  return true;
> +      return false;
>      }
> -  else
> -    {
> -      for (j = VEC_length (int, *vec) - 1; j >= 0; --j)
> -	if (INSN_UID (insn) == VEC_index (int, *vec, j))
> -	  return 1;
> -    }
> -  return 0;
> +
> +  return htab_find (hash, insn) != NULL;
>  }
>  
>  int
>  prologue_epilogue_contains (const_rtx insn)
>  {
> -  if (contains (insn, &prologue))
> +  if (contains (insn, prologue_insn_hash))
>      return 1;
> -  if (contains (insn, &epilogue))
> +  if (contains (insn, epilogue_insn_hash))
>      return 1;
>    return 0;
>  }
>  
> -int
> -sibcall_epilogue_contains (const_rtx insn)
> -{
> -  if (sibcall_epilogue)
> -    return contains (insn, &sibcall_epilogue);
> -  return 0;
> -}
> -
>  #ifdef HAVE_return
>  /* Insert gen_return at the end of block BB.  This also means updating
>     block_for_insn appropriately.  */
> @@ -4985,7 +4987,7 @@ thread_prologue_and_epilogue_insns (void
>  	emit_use (hard_frame_pointer_rtx);
>  
>        /* Retain a map of the prologue insns.  */
> -      record_insns (seq, &prologue);
> +      record_insns (seq, NULL, &prologue_insn_hash);
>        emit_note (NOTE_INSN_PROLOGUE_END);
>   
>  #ifndef PROFILE_BEFORE_PROLOGUE
> @@ -5117,6 +5119,38 @@ thread_prologue_and_epilogue_insns (void
>  	}
>      }
>  #endif
> +
> +  /* A small fib -- epilogue is not yet completed, but we wish to re-use
> +     this marker for the splits of EH_RETURN patterns, and nothing else
> +     uses the flag in the meantime.  */
> +  epilogue_completed = 1;
> +
> +#ifdef HAVE_eh_return
> +  /* Find non-fallthru edges that end with EH_RETURN instructions.  On
> +     some targets, these get split to a special version of the epilogue
> +     code.  In order to be able to properly annotate these with unwind
> +     info, try to split them now.  If we get a valid split, drop an
> +     EPILOGUE_BEG note and mark the insns as epilogue insns.  */
> +  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
> +    {
> +      rtx prev, last, trial;
> +
> +      if (e->flags & EDGE_FALLTHRU)
> +	continue;
> +      last = BB_END (e->src);
> +      if (!eh_returnjump_p (last))
> +	continue;
> +
> +      prev = PREV_INSN (last);
> +      trial = try_split (PATTERN (last), last, 1);
> +      if (trial == last)
> +	continue;
> +
> +      record_insns (NEXT_INSN (prev), NEXT_INSN (trial), &epilogue_insn_hash);
> +      emit_note_after (NOTE_INSN_EPILOGUE_BEG, prev);
> +    }
> +#endif
> +
>    /* Find the edge that falls through to EXIT.  Other edges may exist
>       due to RETURN instructions, but those don't need epilogues.
>       There really shouldn't be a mixture -- either all should have
> @@ -5137,7 +5171,7 @@ thread_prologue_and_epilogue_insns (void
>        emit_jump_insn (seq);
>  
>        /* Retain a map of the epilogue insns.  */
> -      record_insns (seq, &epilogue);
> +      record_insns (seq, NULL, &epilogue_insn_hash);
>        set_insn_locators (seq, epilogue_locator);
>  
>        seq = get_insns ();
> @@ -5199,6 +5233,7 @@ epilogue_done:
>  	}
>  
>        start_sequence ();
> +      emit_note (NOTE_INSN_EPILOGUE_BEG);
>        emit_insn (gen_sibcall_epilogue ());
>        seq = get_insns ();
>        end_sequence ();
> @@ -5206,7 +5241,7 @@ epilogue_done:
>        /* Retain a map of the epilogue insns.  Used in life analysis to
>  	 avoid getting rid of sibcall epilogue insns.  Do this before we
>  	 actually emit the sequence.  */
> -      record_insns (seq, &sibcall_epilogue);
> +      record_insns (seq, NULL, &epilogue_insn_hash);
>        set_insn_locators (seq, epilogue_locator);
>  
>        emit_insn_before (seq, insn);
> @@ -5240,23 +5275,29 @@ epilogue_done:
>    df_update_entry_exit_and_calls ();
>  }
>  
> -/* Reposition the prologue-end and epilogue-begin notes after instruction
> -   scheduling and delayed branch scheduling.  */
> +/* Reposition the prologue-end and epilogue-begin notes after
> +   instruction scheduling.  */
>  
>  void
>  reposition_prologue_and_epilogue_notes (void)
>  {
> -#if defined (HAVE_prologue) || defined (HAVE_epilogue)
> +#if defined (HAVE_prologue) || defined (HAVE_epilogue) \
> +    || defined (HAVE_sibcall_epilogue)
>    rtx insn, last, note;
> -  int len;
> +  basic_block bb;
>  
> -  if ((len = VEC_length (int, prologue)) > 0)
> +  /* Since the hash table is created on demand, the fact that it is
> +     non-null is a signal that it is non-empty.  */
> +  if (prologue_insn_hash != NULL)
>      {
> +      size_t len = htab_elements (prologue_insn_hash);
>        last = 0, note = 0;
>  
> -      /* Scan from the beginning until we reach the last prologue insn.
> -	 We apparently can't depend on basic_block_{head,end} after
> -	 reorg has run.  */
> +      /* Scan from the beginning until we reach the last prologue insn.  */
> +      /* ??? While we do have the CFG intact, there are two problems:
> +	 (1) The prologue can contain loops (typically probing the stack),
> +	     which means that the end of the prologue isn't in the first bb.
> +	 (2) Sometimes the PROLOGUE_END note gets pushed into the next bb.  */
>        for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
>  	{
>  	  if (NOTE_P (insn))
> @@ -5264,7 +5305,7 @@ reposition_prologue_and_epilogue_notes (
>  	      if (NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)
>  		note = insn;
>  	    }
> -	  else if (contains (insn, &prologue))
> +	  else if (contains (insn, prologue_insn_hash))
>  	    {
>  	      last = insn;
>  	      if (--len == 0)
> @@ -5274,14 +5315,17 @@ reposition_prologue_and_epilogue_notes (
>  
>        if (last)
>  	{
> -	  /* Find the prologue-end note if we haven't already, and
> -	     move it to just after the last prologue insn.  */
> -	  if (note == 0)
> +	  if (note == NULL)
>  	    {
> -	      for (note = last; (note = NEXT_INSN (note));)
> -		if (NOTE_P (note)
> -		    && NOTE_KIND (note) == NOTE_INSN_PROLOGUE_END)
> -		  break;
> +	      /* Scan forward looking for the PROLOGUE_END note.  It should
> +		 be right at the beginning of the block, possibly with other
> +		 insn notes that got moved there.  */
> +	      for (note = NEXT_INSN (last); ; note = NEXT_INSN (note))
> +		{
> +		  if (NOTE_P (note)
> +		      && NOTE_KIND (note) == NOTE_INSN_PROLOGUE_END)
> +		    break;
> +		}
>  	    }
>  
>  	  /* Avoid placing note between CODE_LABEL and BASIC_BLOCK note.  */
> @@ -5291,41 +5335,39 @@ reposition_prologue_and_epilogue_notes (
>  	}
>      }
>  
> -  if ((len = VEC_length (int, epilogue)) > 0)
> +  if (epilogue_insn_hash != NULL)
>      {
> -      last = 0, note = 0;
> +      edge_iterator ei;
> +      edge e;
>  
> -      /* Scan from the end until we reach the first epilogue insn.
> -	 We apparently can't depend on basic_block_{head,end} after
> -	 reorg has run.  */
> -      for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
> +      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
>  	{
> -	  if (NOTE_P (insn))
> -	    {
> -	      if (NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
> -		note = insn;
> -	    }
> -	  else if (contains (insn, &epilogue))
> -	    {
> -	      last = insn;
> -	      if (--len == 0)
> -		break;
> -	    }
> -	}
> +	  last = 0, note = 0;
> +	  bb = e->src;
>  
> -      if (last)
> -	{
> -	  /* Find the epilogue-begin note if we haven't already, and
> -	     move it to just before the first epilogue insn.  */
> -	  if (note == 0)
> +	  /* Scan from the beginning until we reach the first epilogue insn.
> +	     Take the cue for whether this is a plain or sibcall epilogue
> +	     from the kind of note we find first.  */
> +	  FOR_BB_INSNS (bb, insn)
>  	    {
> -	      for (note = insn; (note = PREV_INSN (note));)
> -		if (NOTE_P (note)
> -		    && NOTE_KIND (note) == NOTE_INSN_EPILOGUE_BEG)
> -		  break;
> +	      if (NOTE_P (insn))
> +		{
> +		  if (NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
> +		    {
> +		      note = insn;
> +		      if (last)
> +			break;
> +		    }
> +		}
> +	      else if (contains (insn, epilogue_insn_hash))
> +		{
> +		  last = insn;
> +		  if (note != NULL)
> +		    break;
> +		}
>  	    }
> -
> -	  if (PREV_INSN (last) != note)
> +	     
> +	  if (last && note && PREV_INSN (last) != note)
>  	    reorder_insns (note, note, PREV_INSN (last));
>  	}
>      }
> --- gcc/insn-notes.def	(revision 148000)
> +++ gcc/insn-notes.def	(local)
> @@ -70,4 +70,8 @@ INSN_NOTE (BASIC_BLOCK)
>     between hot and cold text sections.  */
>  INSN_NOTE (SWITCH_TEXT_SECTIONS)
>  
> +/* Mark the restore point after an epilogue changed CFI data.  Used only
> +   when an epilogue appears in the middle of a function.  */
> +INSN_NOTE (CFA_RESTORE_STATE)
> +
>  #undef INSN_NOTE
> --- gcc/jump.c	(revision 148000)
> +++ gcc/jump.c	(local)
> @@ -869,8 +869,21 @@ returnjump_p_1 (rtx *loc, void *data ATT
>  {
>    rtx x = *loc;
>  
> -  return x && (GET_CODE (x) == RETURN
> -	       || (GET_CODE (x) == SET && SET_IS_RETURN_P (x)));
> +  if (x == NULL)
> +    return false;
> +
> +  switch (GET_CODE (x))
> +    {
> +    case RETURN:
> +    case EH_RETURN:
> +      return true;
> +
> +    case SET:
> +      return SET_IS_RETURN_P (x);
> +
> +    default:
> +      return false;
> +    }
>  }
>  
>  int
> @@ -881,6 +894,22 @@ returnjump_p (rtx insn)
>    return for_each_rtx (&PATTERN (insn), returnjump_p_1, NULL);
>  }
>  
> +/* Return true if INSN is a (possibly conditional) return insn.  */
> +
> +static int
> +eh_returnjump_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
> +{
> +  return *loc && GET_CODE (*loc) == EH_RETURN;
> +}
> +
> +int
> +eh_returnjump_p (rtx insn)
> +{
> +  if (!JUMP_P (insn))
> +    return 0;
> +  return for_each_rtx (&PATTERN (insn), eh_returnjump_p_1, NULL);
> +}
> +
>  /* Return true if INSN is a jump that only transfers control and
>     nothing more.  */
>  
> --- gcc/reg-notes.def	(revision 148000)
> +++ gcc/reg-notes.def	(local)
> @@ -118,6 +118,41 @@ REG_NOTE (BR_PRED)
>     instead of intuition.  */
>  REG_NOTE (FRAME_RELATED_EXPR)
>  
> +/* Attached to insns that are RTX_FRAME_RELATED_P, but are too complex
> +   for FRAME_RELATED_EXPR intuition.  The insn's first pattern must be
> +   a SET, and the destination must be the CFA register.  The attached
> +   rtx is an expression that defines the CFA.  In the simplest case, the
> +   rtx could be just the stack_pointer_rtx; more common would be a PLUS
> +   with a base register and a constant offset.  In the most complicated
> +   cases, this will result in a DW_CFA_def_cfa_expression with the rtx
> +   expression rendered in a dwarf location expression.  */
> +REG_NOTE (CFA_DEF_CFA)
> +
> +/* Attached to insns that are RTX_FRAME_RELATED_P, but are too complex
> +   for FRAME_RELATED_EXPR intuition.  This note adjusts the expression
> +   from which the CFA is computed.  The attached rtx defines a new CFA
> +   expression, relative to the old CFA expression.  This rtx must be of
> +   the form (SET new-cfa-reg (PLUS old-cfa-reg const_int)).  If the note
> +   rtx is NULL, we use the first SET of the insn.  */
> +REG_NOTE (CFA_ADJUST_CFA)
> +
> +/* Similar to FRAME_RELATED_EXPR, with the additional information that
> +   this is a save to memory, i.e. will result in DW_CFA_offset or the
> +   like.  The pattern or the insn should be a simple store relative to
> +   the CFA.  */
> +REG_NOTE (CFA_OFFSET)
> +
> +/* Similar to FRAME_RELATED_EXPR, with the additional information that this
> +   is a save to a register, i.e. will result in DW_CFA_register.  The insn
> +   or the pattern should be simple reg-reg move.  */
> +REG_NOTE (CFA_REGISTER)
> +
> +/* Attached to insns that are RTX_FRAME_RELATED_P, with the information
> +   that this is a restore operation, i.e. will result in DW_CFA_restore
> +   or the like.  Either the attached rtx, or the destination of the insn's
> +   first pattern is the register to be restored.  */
> +REG_NOTE (CFA_RESTORE)
> +
>  /* Indicates that REG holds the exception context for the function.
>     This context is shared by inline functions, so the code to acquire
>     the real exception context is delayed until after inlining.  */
> --- gcc/rtl.def	(revision 148000)
> +++ gcc/rtl.def	(local)
> @@ -281,6 +281,10 @@ DEF_RTL_EXPR(CALL, "call", "ee", RTX_EXT
>  
>  DEF_RTL_EXPR(RETURN, "return", "", RTX_EXTRA)
>  
> +/* Special for EH return from subroutine.  */
> +
> +DEF_RTL_EXPR(EH_RETURN, "eh_return", "", RTX_EXTRA)
> +
>  /* Conditional trap.
>     Operand 1 is the condition.
>     Operand 2 is the trap code.
> --- gcc/rtl.h	(revision 148000)
> +++ gcc/rtl.h	(local)
> @@ -2086,6 +2086,7 @@ extern rtx pc_set (const_rtx);
>  extern rtx condjump_label (const_rtx);
>  extern int simplejump_p (const_rtx);
>  extern int returnjump_p (rtx);
> +extern int eh_returnjump_p (rtx);
>  extern int onlyjump_p (const_rtx);
>  extern int only_sets_cc0_p (const_rtx);
>  extern int sets_cc0_p (const_rtx);
> @@ -2198,6 +2199,7 @@ extern int prologue_epilogue_contains (c
>  extern int sibcall_epilogue_contains (const_rtx);
>  extern void mark_temp_addr_taken (rtx);
>  extern void update_temp_slot_address (rtx, rtx);
> +extern void maybe_copy_epilogue_insn (rtx, rtx);
>  
>  /* In stmt.c */
>  extern void expand_null_return (void);


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-05-30 13:26     ` H.J. Lu
  2009-05-30 20:08       ` [PATCH] Fix i?86 eh regressions (PR middle-end/40304) Jakub Jelinek
@ 2009-06-29 23:25       ` H.J. Lu
  2009-12-17 17:47         ` H.J. Lu
  1 sibling, 1 reply; 26+ messages in thread
From: H.J. Lu @ 2009-06-29 23:25 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches, Joseph S. Myers

On Sat, May 30, 2009 at 6:21 AM, H.J. Lu<hjl.tools@gmail.com> wrote:
> On Fri, May 29, 2009 at 5:42 PM, Richard Henderson <rth@redhat.com> wrote:
>> Joseph S. Myers wrote:
>>>
>>> On Wed, 6 May 2009, Richard Henderson wrote:
>>>
>>>> Since the beginning, -fasynchronous-unwind-tables has not held correct
>>>> information for function epilogues.  This is an attempt at adding that.
>>>
>>> See also Nathan Froyd's patch (doing this for x86_64) described in his
>>> 2006 Summit paper.  I haven't compared the approaches (but fully support
>>> adding this feature).
>>>
>>> http://gcc.gnu.org/ml/gcc-patches/2006-03/msg00426.html
>>> http://gcc.gnu.org/ml/gcc-patches/2006-02/msg01091.html
>>>
>>
>> Thanks for the pointers.  I did incorporate a few of Nathan's ideas into
>> this version of the patch -- primarily, when duplicating the epilogue,
>> make sure the duplicated insns are also recorded as epilogue insns.
>>
>> The other large change from the previous patch is the ability to have
>> the eh_return epilogue from _Unwind_Resume (and friends) marked
>> properly.  This required the addition of an EH_RETURN rtx, so that
>> the middle-end could recognize when epilogue expansion should happen,
>> rather than the add-hoc unspecs that ports had been using.  As it
>> happens, only i386 and bfin implement eh_return via special epilogues;
>> most ports only need to overwrite one or more registers before using
>> a normal epilogue.
>>
>> Tested on x86_64, i686; committed.
>>
>> r~
>>
>>        * cfgcleanup.c (try_crossjump_to_edge): Only skip past
>>        NOTE_INSN_BASIC_BLOCK.
>>        * cfglayout.c (duplicate_insn_chain): Copy epilogue insn marks.
>>        Duplicate NOTE_INSN_EPILOGUE_BEG notes.
>>        * cfgrtl.c (can_delete_note_p): Allow NOTE_INSN_EPILOGUE_BEG
>>        to be deleted.
>>        * dwarf2out.c (struct cfa_loc): Change indirect field to bitfield,
>>        add in_use field.
>>        (add_cfi): Disable check redefining cfa away from drap.
>>        (lookup_cfa_1): Add remember argument; handle remember/restore.
>>        (lookup_cfa): Pass remember argument.
>>        (cfa_remember): New.
>>        (compute_barrier_args_size_1): Remove sibcall check.
>>        (dwarf2out_frame_debug_def_cfa): New.
>>        (dwarf2out_frame_debug_adjust_cfa): New.
>>        (dwarf2out_frame_debug_cfa_offset): New.
>>        (dwarf2out_frame_debug_cfa_register): New.
>>        (dwarf2out_frame_debug_cfa_restore): New.
>>        (dwarf2out_frame_debug): Handle REG_CFA_* notes.
>>        (dwarf2out_begin_epilogue): New.
>>        (dwarf2out_frame_debug_restore_state): New.
>>        (dw_cfi_oprnd1_desc): Handle DW_CFA_remember_state,
>>        DW_CFA_restore_state.
>>        (output_cfi_directive): Likewise.
>>        (convert_cfa_to_fb_loc_list): Likewise.
>>        (dw_cfi_oprnd1_desc): Handle DW_CFA_restore.
>>        * dwarf2out.h: Update.
>>        * emit-rtl.c (try_split): Don't split RTX_FRAME_RELATED_P.
>>        (copy_insn_1): Early out for null.
>>        * final.c (final_scan_insn): Call dwarf2out_begin_epilogue
>>        and dwarf2out_frame_debug_restore_state.
>>        * function.c (prologue, epilogue, sibcall_epilogue): Remove.
>>        (prologue_insn_hash, epilogue_insn_hash): New.
>>        (free_after_compilation): Adjust freeing accordingly.
>>        (record_insns): Create hash table if needed; push insns into
>>        hash instead of array.
>>        (maybe_copy_epilogue_insn): New.
>>        (contains): Search hash table instead of array.
>>        (sibcall_epilogue_contains): Remove.
>>        (thread_prologue_and_epilogue_insns): Split eh_return insns
>>        and mark them as epilogues.
>>        (reposition_prologue_and_epilogue_notes): Rewrite epilogue
>>        scanning in terms of basic blocks.
>>        * insn-notes.def (CFA_RESTORE_STATE): New.
>>        * jump.c (returnjump_p_1): Accept EH_RETURN.
>>        (eh_returnjump_p_1, eh_returnjump_p): New.
>>        * reg-notes.def (CFA_DEF_CFA, CFA_ADJUST_CFA, CFA_OFFSET,
>>        CFA_REGISTER, CFA_RESTORE): New.
>>        * rtl.def (EH_RETURN): New.
>>        * rtl.h (eh_returnjump_p, maybe_copy_epilogue_insn): Declare.
>>
>>        * config/bfin/bfin.md (UNSPEC_VOLATILE_EH_RETURN): Remove.
>>        (eh_return_internal): Use eh_return rtx; split w/ epilogue.
>>
>>        * config/i386/i386.c (gen_push): Update cfa state.
>>        (pro_epilogue_adjust_stack): Add set_cfa argument.  When true,
>>        add a CFA_ADJUST_CFA note.
>>        (ix86_dwarf_handle_frame_unspec): Remove.
>>        (ix86_expand_prologue): Update cfa state.
>>        (ix86_emit_restore_reg_using_pop): New.
>>        (ix86_emit_restore_regs_using_pop): New.
>>        (ix86_emit_leave): New.
>>        (ix86_emit_restore_regs_using_mov): Add CFA_RESTORE notes.
>>        (ix86_expand_epilogue): Add notes for unwinding the epilogue.
>>        * config/i386/i386.h (struct machine_cfa_state): New.
>>        (ix86_cfa_state): New.
>>        * config/i386/i386.md (UNSPEC_EH_RETURN): Remove.
>>        (eh_return_internal): Merge from eh_return_<mode>,
>>        use eh_return rtx, split w/ epilogue.
>>
>
> This breaks stack unwind on Linux/ia32:
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40304
>
>

This also breaks debug info for local variable on Linux/ia32:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40596


-- 
H.J.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-06-29 23:25       ` unwind info for epilogues H.J. Lu
@ 2009-12-17 17:47         ` H.J. Lu
  2009-12-28 23:15           ` H.J. Lu
  0 siblings, 1 reply; 26+ messages in thread
From: H.J. Lu @ 2009-12-17 17:47 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches, Joseph S. Myers

On Mon, Jun 29, 2009 at 1:54 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Sat, May 30, 2009 at 6:21 AM, H.J. Lu<hjl.tools@gmail.com> wrote:
>> On Fri, May 29, 2009 at 5:42 PM, Richard Henderson <rth@redhat.com> wrote:
>>> Joseph S. Myers wrote:
>>>>
>>>> On Wed, 6 May 2009, Richard Henderson wrote:
>>>>
>>>>> Since the beginning, -fasynchronous-unwind-tables has not held correct
>>>>> information for function epilogues.  This is an attempt at adding that.
>>>>
>>>> See also Nathan Froyd's patch (doing this for x86_64) described in his
>>>> 2006 Summit paper.  I haven't compared the approaches (but fully support
>>>> adding this feature).
>>>>
>>>> http://gcc.gnu.org/ml/gcc-patches/2006-03/msg00426.html
>>>> http://gcc.gnu.org/ml/gcc-patches/2006-02/msg01091.html
>>>>
>>>
>>> Thanks for the pointers.  I did incorporate a few of Nathan's ideas into
>>> this version of the patch -- primarily, when duplicating the epilogue,
>>> make sure the duplicated insns are also recorded as epilogue insns.
>>>
>>> The other large change from the previous patch is the ability to have
>>> the eh_return epilogue from _Unwind_Resume (and friends) marked
>>> properly.  This required the addition of an EH_RETURN rtx, so that
>>> the middle-end could recognize when epilogue expansion should happen,
>>> rather than the add-hoc unspecs that ports had been using.  As it
>>> happens, only i386 and bfin implement eh_return via special epilogues;
>>> most ports only need to overwrite one or more registers before using
>>> a normal epilogue.
>>>
>>> Tested on x86_64, i686; committed.
>>>
>>> r~
>>>
>>>        * cfgcleanup.c (try_crossjump_to_edge): Only skip past
>>>        NOTE_INSN_BASIC_BLOCK.
>>>        * cfglayout.c (duplicate_insn_chain): Copy epilogue insn marks.
>>>        Duplicate NOTE_INSN_EPILOGUE_BEG notes.
>>>        * cfgrtl.c (can_delete_note_p): Allow NOTE_INSN_EPILOGUE_BEG
>>>        to be deleted.
>>>        * dwarf2out.c (struct cfa_loc): Change indirect field to bitfield,
>>>        add in_use field.
>>>        (add_cfi): Disable check redefining cfa away from drap.
>>>        (lookup_cfa_1): Add remember argument; handle remember/restore.
>>>        (lookup_cfa): Pass remember argument.
>>>        (cfa_remember): New.
>>>        (compute_barrier_args_size_1): Remove sibcall check.
>>>        (dwarf2out_frame_debug_def_cfa): New.
>>>        (dwarf2out_frame_debug_adjust_cfa): New.
>>>        (dwarf2out_frame_debug_cfa_offset): New.
>>>        (dwarf2out_frame_debug_cfa_register): New.
>>>        (dwarf2out_frame_debug_cfa_restore): New.
>>>        (dwarf2out_frame_debug): Handle REG_CFA_* notes.
>>>        (dwarf2out_begin_epilogue): New.
>>>        (dwarf2out_frame_debug_restore_state): New.
>>>        (dw_cfi_oprnd1_desc): Handle DW_CFA_remember_state,
>>>        DW_CFA_restore_state.
>>>        (output_cfi_directive): Likewise.
>>>        (convert_cfa_to_fb_loc_list): Likewise.
>>>        (dw_cfi_oprnd1_desc): Handle DW_CFA_restore.
>>>        * dwarf2out.h: Update.
>>>        * emit-rtl.c (try_split): Don't split RTX_FRAME_RELATED_P.
>>>        (copy_insn_1): Early out for null.
>>>        * final.c (final_scan_insn): Call dwarf2out_begin_epilogue
>>>        and dwarf2out_frame_debug_restore_state.
>>>        * function.c (prologue, epilogue, sibcall_epilogue): Remove.
>>>        (prologue_insn_hash, epilogue_insn_hash): New.
>>>        (free_after_compilation): Adjust freeing accordingly.
>>>        (record_insns): Create hash table if needed; push insns into
>>>        hash instead of array.
>>>        (maybe_copy_epilogue_insn): New.
>>>        (contains): Search hash table instead of array.
>>>        (sibcall_epilogue_contains): Remove.
>>>        (thread_prologue_and_epilogue_insns): Split eh_return insns
>>>        and mark them as epilogues.
>>>        (reposition_prologue_and_epilogue_notes): Rewrite epilogue
>>>        scanning in terms of basic blocks.
>>>        * insn-notes.def (CFA_RESTORE_STATE): New.
>>>        * jump.c (returnjump_p_1): Accept EH_RETURN.
>>>        (eh_returnjump_p_1, eh_returnjump_p): New.
>>>        * reg-notes.def (CFA_DEF_CFA, CFA_ADJUST_CFA, CFA_OFFSET,
>>>        CFA_REGISTER, CFA_RESTORE): New.
>>>        * rtl.def (EH_RETURN): New.
>>>        * rtl.h (eh_returnjump_p, maybe_copy_epilogue_insn): Declare.
>>>
>>>        * config/bfin/bfin.md (UNSPEC_VOLATILE_EH_RETURN): Remove.
>>>        (eh_return_internal): Use eh_return rtx; split w/ epilogue.
>>>
>>>        * config/i386/i386.c (gen_push): Update cfa state.
>>>        (pro_epilogue_adjust_stack): Add set_cfa argument.  When true,
>>>        add a CFA_ADJUST_CFA note.
>>>        (ix86_dwarf_handle_frame_unspec): Remove.
>>>        (ix86_expand_prologue): Update cfa state.
>>>        (ix86_emit_restore_reg_using_pop): New.
>>>        (ix86_emit_restore_regs_using_pop): New.
>>>        (ix86_emit_leave): New.
>>>        (ix86_emit_restore_regs_using_mov): Add CFA_RESTORE notes.
>>>        (ix86_expand_epilogue): Add notes for unwinding the epilogue.
>>>        * config/i386/i386.h (struct machine_cfa_state): New.
>>>        (ix86_cfa_state): New.
>>>        * config/i386/i386.md (UNSPEC_EH_RETURN): Remove.
>>>        (eh_return_internal): Merge from eh_return_<mode>,
>>>        use eh_return rtx, split w/ epilogue.
>>>
>>
>> This breaks stack unwind on Linux/ia32:
>>
>> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40304
>>
>>
>
> This also breaks debug info for local variable on Linux/ia32:
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40596
>

This also caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42396

-- 
H.J.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: unwind info for epilogues
  2009-12-17 17:47         ` H.J. Lu
@ 2009-12-28 23:15           ` H.J. Lu
  0 siblings, 0 replies; 26+ messages in thread
From: H.J. Lu @ 2009-12-28 23:15 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches, Joseph S. Myers

On Thu, Dec 17, 2009 at 8:43 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>
>>>> Thanks for the pointers.  I did incorporate a few of Nathan's ideas into
>>>> this version of the patch -- primarily, when duplicating the epilogue,
>>>> make sure the duplicated insns are also recorded as epilogue insns.
>>>>
>>>> The other large change from the previous patch is the ability to have
>>>> the eh_return epilogue from _Unwind_Resume (and friends) marked
>>>> properly.  This required the addition of an EH_RETURN rtx, so that
>>>> the middle-end could recognize when epilogue expansion should happen,
>>>> rather than the add-hoc unspecs that ports had been using.  As it
>>>> happens, only i386 and bfin implement eh_return via special epilogues;
>>>> most ports only need to overwrite one or more registers before using
>>>> a normal epilogue.
>>>>
>>>> Tested on x86_64, i686; committed.
>>>>
>>>> r~
>>>>
>>>>        * cfgcleanup.c (try_crossjump_to_edge): Only skip past
>>>>        NOTE_INSN_BASIC_BLOCK.
>>>>        * cfglayout.c (duplicate_insn_chain): Copy epilogue insn marks.
>>>>        Duplicate NOTE_INSN_EPILOGUE_BEG notes.
>>>>        * cfgrtl.c (can_delete_note_p): Allow NOTE_INSN_EPILOGUE_BEG
>>>>        to be deleted.
>>>>        * dwarf2out.c (struct cfa_loc): Change indirect field to bitfield,
>>>>        add in_use field.
>>>>        (add_cfi): Disable check redefining cfa away from drap.
>>>>        (lookup_cfa_1): Add remember argument; handle remember/restore.
>>>>        (lookup_cfa): Pass remember argument.
>>>>        (cfa_remember): New.
>>>>        (compute_barrier_args_size_1): Remove sibcall check.
>>>>        (dwarf2out_frame_debug_def_cfa): New.
>>>>        (dwarf2out_frame_debug_adjust_cfa): New.
>>>>        (dwarf2out_frame_debug_cfa_offset): New.
>>>>        (dwarf2out_frame_debug_cfa_register): New.
>>>>        (dwarf2out_frame_debug_cfa_restore): New.
>>>>        (dwarf2out_frame_debug): Handle REG_CFA_* notes.
>>>>        (dwarf2out_begin_epilogue): New.
>>>>        (dwarf2out_frame_debug_restore_state): New.
>>>>        (dw_cfi_oprnd1_desc): Handle DW_CFA_remember_state,
>>>>        DW_CFA_restore_state.
>>>>        (output_cfi_directive): Likewise.
>>>>        (convert_cfa_to_fb_loc_list): Likewise.
>>>>        (dw_cfi_oprnd1_desc): Handle DW_CFA_restore.
>>>>        * dwarf2out.h: Update.
>>>>        * emit-rtl.c (try_split): Don't split RTX_FRAME_RELATED_P.
>>>>        (copy_insn_1): Early out for null.
>>>>        * final.c (final_scan_insn): Call dwarf2out_begin_epilogue
>>>>        and dwarf2out_frame_debug_restore_state.
>>>>        * function.c (prologue, epilogue, sibcall_epilogue): Remove.
>>>>        (prologue_insn_hash, epilogue_insn_hash): New.
>>>>        (free_after_compilation): Adjust freeing accordingly.
>>>>        (record_insns): Create hash table if needed; push insns into
>>>>        hash instead of array.
>>>>        (maybe_copy_epilogue_insn): New.
>>>>        (contains): Search hash table instead of array.
>>>>        (sibcall_epilogue_contains): Remove.
>>>>        (thread_prologue_and_epilogue_insns): Split eh_return insns
>>>>        and mark them as epilogues.
>>>>        (reposition_prologue_and_epilogue_notes): Rewrite epilogue
>>>>        scanning in terms of basic blocks.
>>>>        * insn-notes.def (CFA_RESTORE_STATE): New.
>>>>        * jump.c (returnjump_p_1): Accept EH_RETURN.
>>>>        (eh_returnjump_p_1, eh_returnjump_p): New.
>>>>        * reg-notes.def (CFA_DEF_CFA, CFA_ADJUST_CFA, CFA_OFFSET,
>>>>        CFA_REGISTER, CFA_RESTORE): New.
>>>>        * rtl.def (EH_RETURN): New.
>>>>        * rtl.h (eh_returnjump_p, maybe_copy_epilogue_insn): Declare.
>>>>
>>>>        * config/bfin/bfin.md (UNSPEC_VOLATILE_EH_RETURN): Remove.
>>>>        (eh_return_internal): Use eh_return rtx; split w/ epilogue.
>>>>
>>>>        * config/i386/i386.c (gen_push): Update cfa state.
>>>>        (pro_epilogue_adjust_stack): Add set_cfa argument.  When true,
>>>>        add a CFA_ADJUST_CFA note.
>>>>        (ix86_dwarf_handle_frame_unspec): Remove.
>>>>        (ix86_expand_prologue): Update cfa state.
>>>>        (ix86_emit_restore_reg_using_pop): New.
>>>>        (ix86_emit_restore_regs_using_pop): New.
>>>>        (ix86_emit_leave): New.
>>>>        (ix86_emit_restore_regs_using_mov): Add CFA_RESTORE notes.
>>>>        (ix86_expand_epilogue): Add notes for unwinding the epilogue.
>>>>        * config/i386/i386.h (struct machine_cfa_state): New.
>>>>        (ix86_cfa_state): New.
>>>>        * config/i386/i386.md (UNSPEC_EH_RETURN): Remove.
>>>>        (eh_return_internal): Merge from eh_return_<mode>,
>>>>        use eh_return rtx, split w/ epilogue.
>>>>
>>>
>>> This breaks stack unwind on Linux/ia32:
>>>
>>> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40304
>>>
>>>
>>
>> This also breaks debug info for local variable on Linux/ia32:
>>
>> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40596
>>
>
> This also caused:
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42396
>

This also caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=41883
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=41889


-- 
H.J.

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2009-12-28 20:12 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-05-06 20:21 unwind info for epilogues Richard Henderson
2009-05-06 20:32 ` Joseph S. Myers
2009-05-30  0:49   ` Richard Henderson
2009-05-30 12:24     ` Jakub Jelinek
2009-05-30 21:24       ` Richard Henderson
2009-06-01 17:10         ` Jakub Jelinek
2009-06-01 18:20           ` Richard Henderson
2009-06-01 18:39             ` Jakub Jelinek
2009-06-01 19:15               ` Richard Henderson
2009-05-30 13:26     ` H.J. Lu
2009-05-30 20:08       ` [PATCH] Fix i?86 eh regressions (PR middle-end/40304) Jakub Jelinek
2009-05-31  0:48         ` Richard Henderson
2009-05-31 10:52           ` Jakub Jelinek
2009-05-31 14:11             ` H.J. Lu
2009-06-01  4:33             ` Ian Lance Taylor
2009-06-01 13:46               ` Jakub Jelinek
2009-06-01 15:53                 ` Richard Henderson
2009-06-01 16:49                 ` Ian Lance Taylor
2009-06-29 23:25       ` unwind info for epilogues H.J. Lu
2009-12-17 17:47         ` H.J. Lu
2009-12-28 23:15           ` H.J. Lu
2009-06-04 20:38     ` Laurent GUERBY
2009-05-20  0:49 ` Ian Lance Taylor
2009-05-30  1:02   ` Richard Henderson
2009-05-30 14:37     ` Jan Hubicka
2009-05-30 20:17       ` Eric Botcazou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).