public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][ARC] Refurbish emitting DWARF2 for epilogue.
@ 2015-11-27 14:02 Claudiu Zissulescu
  2015-12-03 21:59 ` Joern Wolfgang Rennecke
  0 siblings, 1 reply; 11+ messages in thread
From: Claudiu Zissulescu @ 2015-11-27 14:02 UTC (permalink / raw)
  To: gcc-patches; +Cc: Claudiu.Zissulescu, gnu, Francois.Bedard, jeremy.bennett

Properly emit DWARF2 related information while expanding epilogue. Remove
the -m[no]-epilogue-cfi option as it is not needed any longer. This patch
solves the dwarf2cfi errors observed while running dejagnu tests.

Ok to commit?
Claudiu

gcc/
2015-11-27  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.c (frame_move): Set frame related flag.
	(arc_save_restore): Emit epilogue related DWARF2 information.
	(arc_expand_epilogue): Likewise.
	* config/arc/arc.opt (mno-epilogue-cfi): Remove.
	(mepilogue-cfi): Likewise.
	* doc/invoke.texi: Remove -m[no]-epilogue-cfi documentation.
---
 gcc/config/arc/arc.c   | 87 ++++++++++++++++++++++++++++++++++++++++----------
 gcc/config/arc/arc.opt |  8 -----
 gcc/doc/invoke.texi    | 10 +-----
 3 files changed, 71 insertions(+), 34 deletions(-)

diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 8bb0969..5200ea5 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -1884,7 +1884,9 @@ frame_insn (rtx x)
 static rtx
 frame_move (rtx dst, rtx src)
 {
-  return frame_insn (gen_rtx_SET (dst, src));
+  rtx tmp = gen_rtx_SET (dst, src);
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  return frame_insn (tmp);
 }
 
 /* Like frame_move, but add a REG_INC note for REG if ADDR contains an
@@ -2307,7 +2309,15 @@ arc_save_restore (rtx base_reg,
 	  if (epilogue_p == 2)
 	    sibthunk_insn = insn;
 	  else
-	    frame_insn (insn);
+	    {
+	      insn = frame_insn (insn);
+	      if (epilogue_p)
+		for (r = start_call; r <= end_call; r++)
+		  {
+		    rtx reg = gen_rtx_REG (SImode, r);
+		    add_reg_note (insn, REG_CFA_RESTORE, reg);
+		  }
+	    }
 	  offset += off;
 	}
 
@@ -2317,6 +2327,7 @@ arc_save_restore (rtx base_reg,
 	    {
 	      rtx reg = gen_rtx_REG (SImode, regno);
 	      rtx addr, mem;
+	      int cfa_adjust = *first_offset;
 
 	      if (*first_offset)
 		{
@@ -2332,7 +2343,20 @@ arc_save_restore (rtx base_reg,
 		}
 	      mem = gen_frame_mem (SImode, addr);
 	      if (epilogue_p)
-		frame_move_inc (reg, mem, base_reg, addr);
+		{
+		  rtx insn =
+		    frame_move_inc (reg, mem, base_reg, addr);
+		  add_reg_note (insn, REG_CFA_RESTORE, reg);
+		  if (cfa_adjust)
+		    {
+		      enum reg_note note = REG_CFA_ADJUST_CFA;
+		      add_reg_note (insn, note,
+				    gen_rtx_SET (stack_pointer_rtx,
+						 plus_constant (Pmode,
+								stack_pointer_rtx,
+								cfa_adjust)));
+		    }
+		}
 	      else
 		frame_move_inc (mem, reg, base_reg, addr);
 	      offset += UNITS_PER_WORD;
@@ -2341,6 +2365,10 @@ arc_save_restore (rtx base_reg,
     }/* if */
   if (sibthunk_insn)
     {
+      int start_call = frame->millicode_start_reg;
+      int end_call = frame->millicode_end_reg;
+      int r;
+
       rtx r12 = gen_rtx_REG (Pmode, 12);
 
       frame_insn (gen_rtx_SET (r12, GEN_INT (offset)));
@@ -2350,6 +2378,15 @@ arc_save_restore (rtx base_reg,
 		       gen_rtx_PLUS (Pmode, stack_pointer_rtx, r12));
       sibthunk_insn = emit_jump_insn (sibthunk_insn);
       RTX_FRAME_RELATED_P (sibthunk_insn) = 1;
+
+      /* Would be nice if we could do this earlier, when the PARALLEL
+	 is populated, but these need to be attached after the
+	 emit.  */
+      for (r = start_call; r <= end_call; r++)
+	{
+	  rtx reg = gen_rtx_REG (SImode, r);
+	  add_reg_note (sibthunk_insn, REG_CFA_RESTORE, reg);
+	}
     }
 } /* arc_save_restore */
 
@@ -2470,6 +2507,7 @@ arc_expand_epilogue (int sibcall_p)
   int can_trust_sp_p = !cfun->calls_alloca;
   int first_offset = 0;
   int millicode_p = cfun->machine->frame_info.millicode_end_reg > 0;
+  rtx insn;
 
   size_to_deallocate = size;
 
@@ -2502,11 +2540,18 @@ arc_expand_epilogue (int sibcall_p)
   /* Restore any saved registers.  */
   if (frame_pointer_needed)
     {
-	  rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
+      insn = emit_insn (gen_blockage ());
+      add_reg_note (insn, REG_CFA_DEF_CFA,
+		    plus_constant (SImode, stack_pointer_rtx,
+				   4));
+      RTX_FRAME_RELATED_P (insn) = 1;
 
-	  frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr),
-			  stack_pointer_rtx, 0);
-	  size_to_deallocate -= UNITS_PER_WORD;
+      rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
+
+      insn = frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr),
+			     stack_pointer_rtx, 0);
+      add_reg_note (insn, REG_CFA_RESTORE, frame_pointer_rtx);
+      size_to_deallocate -= UNITS_PER_WORD;
     }
 
   /* Load blink after the calls to thunk calls in case of optimize size.  */
@@ -2522,7 +2567,7 @@ arc_expand_epilogue (int sibcall_p)
 			    cfun->machine->frame_info.gmask,
 			    1 + sibthunk_p, &first_offset);
 	  if (sibthunk_p)
-	    goto epilogue_done;
+	    return;
     }
   /* If we are to restore registers, and first_offset would require
      a limm to be encoded in a PRE_MODIFY, yet we can add it with a
@@ -2546,6 +2591,7 @@ arc_expand_epilogue (int sibcall_p)
       rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
       int ra_offs = cfun->machine->frame_info.reg_size + first_offset;
       rtx addr = plus_constant (Pmode, stack_pointer_rtx, ra_offs);
+      HOST_WIDE_INT cfa_adjust = 0;
 
       /* If the load of blink would need a LIMM, but we can add
 	 the offset quickly to sp, do the latter.  */
@@ -2571,15 +2617,29 @@ arc_expand_epilogue (int sibcall_p)
 	  && (SMALL_INT (ra_offs) || !SMALL_INT (ra_offs >> 2)))
 	{
 	  addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, addr);
+	  cfa_adjust = ra_offs;
 	  first_offset = 0;
 	  size_to_deallocate -= cfun->machine->frame_info.reg_size;
 	}
       else if (!ra_offs && size_to_deallocate == UNITS_PER_WORD)
 	{
 	  addr = gen_rtx_POST_INC (Pmode, addr);
+	  cfa_adjust = GET_MODE_SIZE (Pmode);
 	  size_to_deallocate = 0;
 	}
-      frame_move_inc (ra, gen_frame_mem (Pmode, addr), stack_pointer_rtx, addr);
+
+      insn = frame_move_inc (ra, gen_frame_mem (Pmode, addr),
+			     stack_pointer_rtx, addr);
+      if (cfa_adjust)
+	{
+	  enum reg_note note = REG_CFA_ADJUST_CFA;
+
+	  add_reg_note (insn, note,
+			gen_rtx_SET (stack_pointer_rtx,
+				     plus_constant (SImode, stack_pointer_rtx,
+						    cfa_adjust)));
+	}
+      add_reg_note (insn, REG_CFA_RESTORE, ra);
     }
 
   if (!millicode_p)
@@ -2603,17 +2663,10 @@ arc_expand_epilogue (int sibcall_p)
 
   if (size > restored)
     frame_stack_add (size - restored);
+
   /* Emit the return instruction.  */
   if (sibcall_p == FALSE)
     emit_jump_insn (gen_simple_return ());
- epilogue_done:
-  if (!TARGET_EPILOGUE_CFI)
-    {
-      rtx_insn *insn;
-
-      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
-	RTX_FRAME_RELATED_P (insn) = 0;
-    }
 }
 
 /* Return the offset relative to the stack pointer where the return address
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index 0c10c67..55aca14 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -340,14 +340,6 @@ mrtsc
 Target Report
 Enable 64-bit Time-Stamp Counter extension instruction.
 
-mno-epilogue-cfi
-Target Report RejectNegative InverseMask(EPILOGUE_CFI)
-Disable generation of cfi for epilogues.
-
-mepilogue-cfi
-Target RejectNegative Mask(EPILOGUE_CFI)
-Enable generation of cfi for epilogues.
-
 EB
 Target
 Pass -EB option through to linker.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 7cef176..3b00e88 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -544,7 +544,7 @@ Objective-C and Objective-C++ Dialects}.
 -mnorm -mspfp -mspfp-compact -mspfp-fast -msimd -msoft-float -mswap @gol
 -mcrc -mdsp-packa -mdvbf -mlock -mmac-d16 -mmac-24 -mrtsc -mswape @gol
 -mtelephony -mxy -misize -mannotate-align -marclinux -marclinux_prof @gol
--mepilogue-cfi -mlong-calls -mmedium-calls -msdata @gol
+-mlong-calls -mmedium-calls -msdata @gol
 -mucb-mcount -mvolatile-cache @gol
 -malign-call -mauto-modify-reg -mbbit-peephole -mno-brcc @gol
 -mcase-vector-pcrel -mcompact-casesi -mno-cond-exec -mearly-cbranchsi @gol
@@ -13121,14 +13121,6 @@ The following options control the semantics of generated code:
 
 @c semantically relevant code generation options
 @table @gcctabopt
-@item -mepilogue-cfi
-@opindex mepilogue-cfi
-Enable generation of call frame information for epilogues.
-
-@item -mno-epilogue-cfi
-@opindex mno-epilogue-cfi
-Disable generation of call frame information for epilogues.
-
 @item -mlong-calls
 @opindex mlong-calls
 Generate call insns as register indirect calls, thus providing access
-- 
1.9.1

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][ARC] Refurbish emitting DWARF2 for epilogue.
  2015-11-27 14:02 [PATCH][ARC] Refurbish emitting DWARF2 for epilogue Claudiu Zissulescu
@ 2015-12-03 21:59 ` Joern Wolfgang Rennecke
  2015-12-07  9:19   ` Claudiu Zissulescu
  0 siblings, 1 reply; 11+ messages in thread
From: Joern Wolfgang Rennecke @ 2015-12-03 21:59 UTC (permalink / raw)
  To: Claudiu Zissulescu, gcc-patches; +Cc: Francois.Bedard, jeremy.bennett


On 27/11/15 13:53, Claudiu Zissulescu wrote:
>    
> @@ -2502,11 +2540,18 @@ arc_expand_epilogue (int sibcall_p)
>     /* Restore any saved registers.  */
>     if (frame_pointer_needed)
>       {
> -	  rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
> +      insn = emit_insn (gen_blockage ());

Is this actually part of the patch to fix cfi generation?
It looks to me like it is working around an alias.c issue - namely, that 
alias.c does
not consider stack and frame pointer based addresses to alias.
https://gcc.gnu.org/ml/gcc/2011-07/msg00461.html
https://github.com/adapteva/epiphany-gcc/commit/6d1194a563e05dfa826ab4635514477af1f7a2b0

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH][ARC] Refurbish emitting DWARF2 for epilogue.
  2015-12-03 21:59 ` Joern Wolfgang Rennecke
@ 2015-12-07  9:19   ` Claudiu Zissulescu
  2015-12-09  4:44     ` Joern Wolfgang Rennecke
  0 siblings, 1 reply; 11+ messages in thread
From: Claudiu Zissulescu @ 2015-12-07  9:19 UTC (permalink / raw)
  To: Joern Wolfgang Rennecke, gcc-patches; +Cc: Francois.Bedard, jeremy.bennett

[-- Attachment #1: Type: text/plain, Size: 708 bytes --]

Hi Joern,

> > +      insn = emit_insn (gen_blockage ());
> 
> Is this actually part of the patch to fix cfi generation?

This instruction prevents the delay branch scheduler to speculatively use epilogue instructions to fill up the delay slots. Hence, generating an assert during dwarf2cfi execution. This behavior is experiment by dg.exp/pr49994-1.s test.

At a closer inspection of the patch, I've refurbish it in a more generic fashion (attached), where the blockage guards the entire expand epilogue process.

It may be questionable if emitting blockage in epilogue is part of the cfi refactoring of the epilogue. However, without it we may still get errors in dwarf2cfi.

Thanks,
Claudiu

[-- Attachment #2: 0001-Refurbish-emitting-DWARF2-related-information-when-e.patch --]
[-- Type: application/octet-stream, Size: 8926 bytes --]

From a9617d9c9b38d39d2fa88bdaabb64ce15c04f6b9 Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@synopsys.com>
Date: Fri, 27 Nov 2015 14:50:46 +0100
Subject: [PATCH] Refurbish emitting DWARF2 related information when expanding
 prologue.

Properly emit DWARF2 related information while expanding epilogue. Remove
the -m[no]-epilogue-cfi option as it is not needed any longer. This patch
solves the dwarf2cfi errors observed while running dejagnu tests.

gcc/
2015-11-27  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.c (frame_move): Set frame related flag.
	(arc_save_restore): Emit epilogue related DWARF2 information.
	(arc_expand_epilogue): Likewise.
	* config/arc/arc.opt (mno-epilogue-cfi): Remove.
	(mepilogue-cfi): Likewise.
	* doc/invoke.texi: Remove -m[no]-epilogue-cfi documentation.
---
 gcc/config/arc/arc.c   | 88 ++++++++++++++++++++++++++++++++++++++++----------
 gcc/config/arc/arc.opt |  8 -----
 gcc/doc/invoke.texi    | 10 +-----
 3 files changed, 72 insertions(+), 34 deletions(-)

diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 8bb0969..d8478b8 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -1884,7 +1884,9 @@ frame_insn (rtx x)
 static rtx
 frame_move (rtx dst, rtx src)
 {
-  return frame_insn (gen_rtx_SET (dst, src));
+  rtx tmp = gen_rtx_SET (dst, src);
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  return frame_insn (tmp);
 }
 
 /* Like frame_move, but add a REG_INC note for REG if ADDR contains an
@@ -2307,7 +2309,15 @@ arc_save_restore (rtx base_reg,
 	  if (epilogue_p == 2)
 	    sibthunk_insn = insn;
 	  else
-	    frame_insn (insn);
+	    {
+	      insn = frame_insn (insn);
+	      if (epilogue_p)
+		for (r = start_call; r <= end_call; r++)
+		  {
+		    rtx reg = gen_rtx_REG (SImode, r);
+		    add_reg_note (insn, REG_CFA_RESTORE, reg);
+		  }
+	    }
 	  offset += off;
 	}
 
@@ -2317,6 +2327,7 @@ arc_save_restore (rtx base_reg,
 	    {
 	      rtx reg = gen_rtx_REG (SImode, regno);
 	      rtx addr, mem;
+	      int cfa_adjust = *first_offset;
 
 	      if (*first_offset)
 		{
@@ -2332,7 +2343,20 @@ arc_save_restore (rtx base_reg,
 		}
 	      mem = gen_frame_mem (SImode, addr);
 	      if (epilogue_p)
-		frame_move_inc (reg, mem, base_reg, addr);
+		{
+		  rtx insn =
+		    frame_move_inc (reg, mem, base_reg, addr);
+		  add_reg_note (insn, REG_CFA_RESTORE, reg);
+		  if (cfa_adjust)
+		    {
+		      enum reg_note note = REG_CFA_ADJUST_CFA;
+		      add_reg_note (insn, note,
+				    gen_rtx_SET (stack_pointer_rtx,
+						 plus_constant (Pmode,
+								stack_pointer_rtx,
+								cfa_adjust)));
+		    }
+		}
 	      else
 		frame_move_inc (mem, reg, base_reg, addr);
 	      offset += UNITS_PER_WORD;
@@ -2341,6 +2365,10 @@ arc_save_restore (rtx base_reg,
     }/* if */
   if (sibthunk_insn)
     {
+      int start_call = frame->millicode_start_reg;
+      int end_call = frame->millicode_end_reg;
+      int r;
+
       rtx r12 = gen_rtx_REG (Pmode, 12);
 
       frame_insn (gen_rtx_SET (r12, GEN_INT (offset)));
@@ -2350,6 +2378,15 @@ arc_save_restore (rtx base_reg,
 		       gen_rtx_PLUS (Pmode, stack_pointer_rtx, r12));
       sibthunk_insn = emit_jump_insn (sibthunk_insn);
       RTX_FRAME_RELATED_P (sibthunk_insn) = 1;
+
+      /* Would be nice if we could do this earlier, when the PARALLEL
+	 is populated, but these need to be attached after the
+	 emit.  */
+      for (r = start_call; r <= end_call; r++)
+	{
+	  rtx reg = gen_rtx_REG (SImode, r);
+	  add_reg_note (sibthunk_insn, REG_CFA_RESTORE, reg);
+	}
     }
 } /* arc_save_restore */
 
@@ -2470,6 +2507,7 @@ arc_expand_epilogue (int sibcall_p)
   int can_trust_sp_p = !cfun->calls_alloca;
   int first_offset = 0;
   int millicode_p = cfun->machine->frame_info.millicode_end_reg > 0;
+  rtx insn;
 
   size_to_deallocate = size;
 
@@ -2485,6 +2523,10 @@ arc_expand_epilogue (int sibcall_p)
   if (!can_trust_sp_p)
     gcc_assert (frame_pointer_needed);
 
+  /* Avoid epilogue instructions to be scheduled across the basic
+     block.  */
+  emit_insn (gen_blockage ());
+
   /* Restore stack pointer to the beginning of saved register area for
      ARCompact ISA.  */
   if (frame_size)
@@ -2502,11 +2544,15 @@ arc_expand_epilogue (int sibcall_p)
   /* Restore any saved registers.  */
   if (frame_pointer_needed)
     {
-	  rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
+      rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
 
-	  frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr),
-			  stack_pointer_rtx, 0);
-	  size_to_deallocate -= UNITS_PER_WORD;
+      insn = frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr),
+			     stack_pointer_rtx, 0);
+      add_reg_note (insn, REG_CFA_RESTORE, frame_pointer_rtx);
+      add_reg_note (insn, REG_CFA_DEF_CFA,
+		    plus_constant (SImode, stack_pointer_rtx,
+				   4));
+      size_to_deallocate -= UNITS_PER_WORD;
     }
 
   /* Load blink after the calls to thunk calls in case of optimize size.  */
@@ -2522,7 +2568,7 @@ arc_expand_epilogue (int sibcall_p)
 			    cfun->machine->frame_info.gmask,
 			    1 + sibthunk_p, &first_offset);
 	  if (sibthunk_p)
-	    goto epilogue_done;
+	    return;
     }
   /* If we are to restore registers, and first_offset would require
      a limm to be encoded in a PRE_MODIFY, yet we can add it with a
@@ -2546,6 +2592,7 @@ arc_expand_epilogue (int sibcall_p)
       rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
       int ra_offs = cfun->machine->frame_info.reg_size + first_offset;
       rtx addr = plus_constant (Pmode, stack_pointer_rtx, ra_offs);
+      HOST_WIDE_INT cfa_adjust = 0;
 
       /* If the load of blink would need a LIMM, but we can add
 	 the offset quickly to sp, do the latter.  */
@@ -2571,15 +2618,29 @@ arc_expand_epilogue (int sibcall_p)
 	  && (SMALL_INT (ra_offs) || !SMALL_INT (ra_offs >> 2)))
 	{
 	  addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, addr);
+	  cfa_adjust = ra_offs;
 	  first_offset = 0;
 	  size_to_deallocate -= cfun->machine->frame_info.reg_size;
 	}
       else if (!ra_offs && size_to_deallocate == UNITS_PER_WORD)
 	{
 	  addr = gen_rtx_POST_INC (Pmode, addr);
+	  cfa_adjust = GET_MODE_SIZE (Pmode);
 	  size_to_deallocate = 0;
 	}
-      frame_move_inc (ra, gen_frame_mem (Pmode, addr), stack_pointer_rtx, addr);
+
+      insn = frame_move_inc (ra, gen_frame_mem (Pmode, addr),
+			     stack_pointer_rtx, addr);
+      if (cfa_adjust)
+	{
+	  enum reg_note note = REG_CFA_ADJUST_CFA;
+
+	  add_reg_note (insn, note,
+			gen_rtx_SET (stack_pointer_rtx,
+				     plus_constant (SImode, stack_pointer_rtx,
+						    cfa_adjust)));
+	}
+      add_reg_note (insn, REG_CFA_RESTORE, ra);
     }
 
   if (!millicode_p)
@@ -2603,17 +2664,10 @@ arc_expand_epilogue (int sibcall_p)
 
   if (size > restored)
     frame_stack_add (size - restored);
+
   /* Emit the return instruction.  */
   if (sibcall_p == FALSE)
     emit_jump_insn (gen_simple_return ());
- epilogue_done:
-  if (!TARGET_EPILOGUE_CFI)
-    {
-      rtx_insn *insn;
-
-      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
-	RTX_FRAME_RELATED_P (insn) = 0;
-    }
 }
 
 /* Return the offset relative to the stack pointer where the return address
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index 0c10c67..55aca14 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -340,14 +340,6 @@ mrtsc
 Target Report
 Enable 64-bit Time-Stamp Counter extension instruction.
 
-mno-epilogue-cfi
-Target Report RejectNegative InverseMask(EPILOGUE_CFI)
-Disable generation of cfi for epilogues.
-
-mepilogue-cfi
-Target RejectNegative Mask(EPILOGUE_CFI)
-Enable generation of cfi for epilogues.
-
 EB
 Target
 Pass -EB option through to linker.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 7cef176..3b00e88 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -544,7 +544,7 @@ Objective-C and Objective-C++ Dialects}.
 -mnorm -mspfp -mspfp-compact -mspfp-fast -msimd -msoft-float -mswap @gol
 -mcrc -mdsp-packa -mdvbf -mlock -mmac-d16 -mmac-24 -mrtsc -mswape @gol
 -mtelephony -mxy -misize -mannotate-align -marclinux -marclinux_prof @gol
--mepilogue-cfi -mlong-calls -mmedium-calls -msdata @gol
+-mlong-calls -mmedium-calls -msdata @gol
 -mucb-mcount -mvolatile-cache @gol
 -malign-call -mauto-modify-reg -mbbit-peephole -mno-brcc @gol
 -mcase-vector-pcrel -mcompact-casesi -mno-cond-exec -mearly-cbranchsi @gol
@@ -13121,14 +13121,6 @@ The following options control the semantics of generated code:
 
 @c semantically relevant code generation options
 @table @gcctabopt
-@item -mepilogue-cfi
-@opindex mepilogue-cfi
-Enable generation of call frame information for epilogues.
-
-@item -mno-epilogue-cfi
-@opindex mno-epilogue-cfi
-Disable generation of call frame information for epilogues.
-
 @item -mlong-calls
 @opindex mlong-calls
 Generate call insns as register indirect calls, thus providing access
-- 
1.9.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][ARC] Refurbish emitting DWARF2 for epilogue.
  2015-12-07  9:19   ` Claudiu Zissulescu
@ 2015-12-09  4:44     ` Joern Wolfgang Rennecke
  2015-12-09  4:52       ` Joern Wolfgang Rennecke
  2015-12-09 15:34       ` Claudiu Zissulescu
  0 siblings, 2 replies; 11+ messages in thread
From: Joern Wolfgang Rennecke @ 2015-12-09  4:44 UTC (permalink / raw)
  To: Claudiu Zissulescu, gcc-patches; +Cc: Francois.Bedard, jeremy.bennett



On 07/12/15 09:19, Claudiu Zissulescu wrote:
> Hi Joern,
>
>>> +      insn = emit_insn (gen_blockage ());
>> Is this actually part of the patch to fix cfi generation?
> This instruction prevents the delay branch scheduler to speculatively use epilogue instructions to fill up the delay slots. Hence, generating an assert during dwarf2cfi execution. This behavior is experiment by dg.exp/pr49994-1.s test.

The main point of having rtl epilogues is to allow such optimizations.
Traditionally, we have said that at -O1, it is OK to curb optimizations 
for the sake of having programs
that are saner to debug, while -O2 and above should just optimize, and 
the debug info generation is
just thrown in afterwards as a best-effort attempt.
Additionally, we also have -Og now.

You can also consider having separate options to control optimizations 
that affect debugging.
If leaving out epilogue cfi is what it takes to allow epilogue 
scheduling without the compiler crashing,
then that is what should be done by default at -O2, but if someone finds 
that particularly vexing,
they might appreciate a -mepilogue-cfi option to change that default, 
and instead disable some
scheduling (delay slot and otherwise).

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][ARC] Refurbish emitting DWARF2 for epilogue.
  2015-12-09  4:44     ` Joern Wolfgang Rennecke
@ 2015-12-09  4:52       ` Joern Wolfgang Rennecke
  2015-12-09 15:34       ` Claudiu Zissulescu
  1 sibling, 0 replies; 11+ messages in thread
From: Joern Wolfgang Rennecke @ 2015-12-09  4:52 UTC (permalink / raw)
  To: Claudiu Zissulescu, gcc-patches; +Cc: Francois.Bedard, jeremy.bennett



On 09/12/15 04:43, Joern Wolfgang Rennecke wrote:
>
> You can also consider having separate options to control optimizations 
> that affect debugging.
> If leaving out epilogue cfi is what it takes to allow epilogue 
> scheduling without the compiler crashing,
> then that is what should be done by default at -O2, but if someone 
> finds that particularly vexing,
> they might appreciate a -mepilogue-cfi option to change that default, 
> and instead disable some
> scheduling (delay slot and otherwise).

P.S.: It might also make sense to build some or all libraries with such 
an option, depending on what
the priorities for performance / debuggability are.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH][ARC] Refurbish emitting DWARF2 for epilogue.
  2015-12-09  4:44     ` Joern Wolfgang Rennecke
  2015-12-09  4:52       ` Joern Wolfgang Rennecke
@ 2015-12-09 15:34       ` Claudiu Zissulescu
  2015-12-09 18:48         ` Joern Wolfgang Rennecke
  1 sibling, 1 reply; 11+ messages in thread
From: Claudiu Zissulescu @ 2015-12-09 15:34 UTC (permalink / raw)
  To: Joern Wolfgang Rennecke, gcc-patches; +Cc: Francois.Bedard, jeremy.bennett

> The main point of having rtl epilogues is to allow such optimizations.
> Traditionally, we have said that at -O1, it is OK to curb optimizations for the
> sake of having programs that are saner to debug, while -O2 and above should
> just optimize, and the debug info generation is just thrown in afterwards as a
> best-effort attempt.
> 
> Additionally, we also have -Og now.

Well, it seems to me that we prefer to disable optimizations when talking about debug related information (see PR target/60598 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@208749 138bc75d-0d04-0410-961f-82ee72b054a4 commit).  

> 
> You can also consider having separate options to control optimizations that
> affect debugging.
> If leaving out epilogue cfi is what it takes to allow epilogue scheduling
> without the compiler crashing, then that is what should be done by default at
> -O2, but if someone finds that particularly vexing, they might appreciate a -
> mepilogue-cfi option to change that default, and instead disable some
> scheduling (delay slot and otherwise).

Unfortunately, dwarf2cfi checks the paths for consistency (dwarf2cfi.c:2284), throwing an error if those paths are not ok. Also, with ARC gcc we focus on Linux type of application, hence, the unwinding info needs to be consistent.  
As far as I can see, having a sort of mno-epilogue-cfi option will just inhibit or not the emitting of the blockage instruction, and the option will be valid only when compiling without emitting dwarf information (i.e., the mno-epilogue-cfi is incompatible with -g). 
Personally, I do not see the benefit of having such an option, as one may lose like 1 cycle per function call (HS/EM cpus) in very particular cases. Running the dg.exp, compile.exp, execute.exp, and builing Linux with some default apps, we found only 4 cases in which the branch scheduler slot needs the blockage mechanism. Also, adding blockage before generating any prologue instruction seems to be a wide spread practice in gcc backends (see SH for example). 

Best,
Claudiu

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][ARC] Refurbish emitting DWARF2 for epilogue.
  2015-12-09 15:34       ` Claudiu Zissulescu
@ 2015-12-09 18:48         ` Joern Wolfgang Rennecke
  2015-12-10 14:47           ` Claudiu Zissulescu
  2015-12-11 10:29           ` Claudiu Zissulescu
  0 siblings, 2 replies; 11+ messages in thread
From: Joern Wolfgang Rennecke @ 2015-12-09 18:48 UTC (permalink / raw)
  To: Claudiu Zissulescu, gcc-patches; +Cc: Francois.Bedard, jeremy.bennett


On 09/12/15 15:34, Claudiu Zissulescu wrote:

> Well, it seems to me that we prefer to disable optimizations when talking about debug related information (see PR target/60598 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@208749 138bc75d-0d04-0410-961f-82ee72b054a4 commit).
Actually, unwind information might also be needed for exception 
handling, depending on the target.
(sjlj exception handling or dwarf2)
In which case it becomes a matter of program correctness if any 
exception might be raised that
could lead to incorrect unwinding due to incorrect/incomplete unwind info.

So were both wrong: this is not merely about debugging.

OTOH, the example you give also shows a much more nuanced approach to 
throttling optimization:
the patch doesn't dead all epilogue scheduling, but specifically tests 
for the presence of a frame related
insn at the point where it could cause trouble.
The equivalent would be to reject a frame related insn in the delay slot 
of a conditional branch.
For arc.md, that would likely involve splitting four of the define_delay 
statements into two each.
If that seems a bit much, a compromize would be to patch in_delay_slot, 
thus affecting unconditional branches too.
More ambitious approaches would be to move the note to a (zero-sized, 
pseudo-)placeholder insn, or
finding a way to make the dwarf2cfi cope with speculated frame related 
insns.  Eg.g even if the delay slot
is not anulled, could we mark the note as anulled?

Another approach would be to fix fill_eager_delay_slots not to stuff 
unconditional frame related insns into
non-anulled delay slots of conditional branches.
Or have some target hook to make it not even bother filling delay slots 
speculatively; for targets that can
fully unexpose the delay slot, like SH and ARC >= ARC700, this aspect of 
fill_eager_delay_slots only mucks up
schedules and increases code size.

> Unfortunately, dwarf2cfi checks the paths for consistency (dwarf2cfi.c:2284), throwing an error if those paths are not ok. Also, with ARC gcc we focus on Linux type of application, hence, the unwinding info needs to be consistent.
> As far as I can see, having a sort of mno-epilogue-cfi option will just inhibit or not the emitting of the blockage instruction, and the option will be valid only when compiling without emitting dwarf information (i.e., the mno-epilogue-cfi is incompatible with -g).
> Personally, I do not see the benefit of having such an option, as one may lose like 1 cycle per function call (HS/EM cpus) in very particular cases. Running the dg.exp, compile.exp, execute.exp, and builing Linux with some default apps, we found only 4 cases in which the branch scheduler slot needs the blockage mechanism.

The number of problems you found needs not bear any relation to the 
number of optimizations
suppressed by the blockage instruction.  Consider the case when there 
are high-latency unconditional
instructions just before a lengthy epilogue.  sched2 scheduling some 
epilogue instructions into the scheduling bubbles can hide the latencies.
More relevant ways to get data would be comparing the object files (from 
a whole toolchain library set and/or one or more big application(s)) 
built with/without the blockage insn emitted, or to benchmark it.
>   Also, adding blockage before generating any prologue instruction seems to be a wide spread practice in gcc backends (see SH for example).
It is a quick way to 'fix' (actually, hide) bugs.  At the expense of 
optimization.
It can be justified if the resources to keep a port in working order are 
limited, and so are the forgone
optimization benefits.
But at a minimum, you should add a comment to explain what problem you 
are papering over.
That works best if you actually file a bug report in bugzilla first 
(about the interaction of fill_eager_delay_slots and dwarf2cfi) so that 
you can name the bug.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH][ARC] Refurbish emitting DWARF2 for epilogue.
  2015-12-09 18:48         ` Joern Wolfgang Rennecke
@ 2015-12-10 14:47           ` Claudiu Zissulescu
  2015-12-11 10:29           ` Claudiu Zissulescu
  1 sibling, 0 replies; 11+ messages in thread
From: Claudiu Zissulescu @ 2015-12-10 14:47 UTC (permalink / raw)
  To: Joern Wolfgang Rennecke, gcc-patches; +Cc: Francois.Bedard, jeremy.bennett


> 
> OTOH, the example you give also shows a much more nuanced approach to
> throttling optimization:
> the patch doesn't dead all epilogue scheduling, but specifically tests for the
> presence of a frame related insn at the point where it could cause trouble.

Actually, in my revised patch I do move the blockage instruction earlier on, before emitting any epilogue related instruction.

> Or have some target hook to make it not even bother filling delay slots
> speculatively; for targets that can fully unexpose the delay slot, like SH and
> ARC >= ARC700, this aspect of fill_eager_delay_slots only mucks up
> schedules and increases code size.
> 
GCC has such a target:

bool TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P (void) [Target Hook]
This predicate controls the use of the eager delay slot filler to disallow speculatively
executed instructions being placed in delay slots. Targets such as certain MIPS architectures
possess both branches with and without delay slots. As the eager delay slot
filler can decrease performance, disabling it is beneficial when ordinary branches are
available. Use of delay slot branches filled using the basic filler is often still desirable
as the delay slot can hide a pipeline bubble.

We can enable this hook for ARC and see what are the numbers. Initially, I thought that we may lose more by enabling this hook. What do u say?

//Claudiu

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH][ARC] Refurbish emitting DWARF2 for epilogue.
  2015-12-09 18:48         ` Joern Wolfgang Rennecke
  2015-12-10 14:47           ` Claudiu Zissulescu
@ 2015-12-11 10:29           ` Claudiu Zissulescu
  2015-12-12 14:49             ` Joern Wolfgang Rennecke
  1 sibling, 1 reply; 11+ messages in thread
From: Claudiu Zissulescu @ 2015-12-11 10:29 UTC (permalink / raw)
  To: Joern Wolfgang Rennecke, gcc-patches; +Cc: Francois.Bedard, jeremy.bennett

[-- Attachment #1: Type: text/plain, Size: 1577 bytes --]

H Joern,

> Or have some target hook to make it not even bother filling delay slots
> speculatively; for targets that can fully unexpose the delay slot, like SH and
> ARC >= ARC700, this aspect of fill_eager_delay_slots only mucks up
> schedules and increases code size.

I propose to solve the dwarf2 issues during epilogue by using the TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P hook for ARC processors. Hence, we do not need to emit the blockage instruction during epilogue. So, I have refactor the patch in two patches as follows:
	- The 0001-Refurbish-emitting-DWARF2-related-information-when-e.patch is the initial patch without emitting the blockage instruction during epilogue.
	- The 0002-ARC-Use-TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P-hook.patch adds TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P hook for ARC.

Both patches are attached.

> 
> More relevant ways to get data would be comparing the object files (from a
> whole toolchain library set and/or one or more big application(s)) built
> with/without the blockage insn emitted, or to benchmark it.

I did some testing here. For size, I used CSiBE testbench, and for speed, I used coremark and dhrystone. Using a blockage or not, doesn't affect the size or speed figures. However, using TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P hook betters the size figures (not much, just .1%), and improves the coremark by 2% and Dhrystone by 1%.

Hence, in the light of the new figures, I favor the above two patch solution. Both patches are checked using dg.exp and compile.exp. Ok to submit?

Thanks,
Claudiu

[-- Attachment #2: 0001-Refurbish-emitting-DWARF2-related-information-when-e.patch --]
[-- Type: application/octet-stream, Size: 8571 bytes --]

From 3a67a5857e9eda0d55d4b35a3e29cd9af00b58ee Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@synopsys.com>
Date: Fri, 27 Nov 2015 14:50:46 +0100
Subject: [PATCH 1/2] Refurbish emitting DWARF2 related information when
 expanding prologue.

Properly emit DWARF2 related information while expanding epilogue. Remove
the -m[no]-epilogue-cfi option as it is not needed any longer. This patch
solves the dwarf2cfi errors observed while running dejagnu tests.

gcc/
2015-11-27  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.c (frame_move): Set frame related flag.
	(arc_save_restore): Emit epilogue related DWARF2 information.
	(arc_expand_epilogue): Likewise.
	* config/arc/arc.opt (mno-epilogue-cfi): Remove.
	(mepilogue-cfi): Likewise.
	* doc/invoke.texi: Remove -m[no]-epilogue-cfi documentation.
---
 gcc/config/arc/arc.c   | 84 ++++++++++++++++++++++++++++++++++++++++----------
 gcc/config/arc/arc.opt |  8 -----
 gcc/doc/invoke.texi    | 10 +-----
 3 files changed, 68 insertions(+), 34 deletions(-)

diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 5bc2bce..2646c3a 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -1888,7 +1888,9 @@ frame_insn (rtx x)
 static rtx
 frame_move (rtx dst, rtx src)
 {
-  return frame_insn (gen_rtx_SET (dst, src));
+  rtx tmp = gen_rtx_SET (dst, src);
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  return frame_insn (tmp);
 }
 
 /* Like frame_move, but add a REG_INC note for REG if ADDR contains an
@@ -2311,7 +2313,15 @@ arc_save_restore (rtx base_reg,
 	  if (epilogue_p == 2)
 	    sibthunk_insn = insn;
 	  else
-	    frame_insn (insn);
+	    {
+	      insn = frame_insn (insn);
+	      if (epilogue_p)
+		for (r = start_call; r <= end_call; r++)
+		  {
+		    rtx reg = gen_rtx_REG (SImode, r);
+		    add_reg_note (insn, REG_CFA_RESTORE, reg);
+		  }
+	    }
 	  offset += off;
 	}
 
@@ -2321,6 +2331,7 @@ arc_save_restore (rtx base_reg,
 	    {
 	      rtx reg = gen_rtx_REG (SImode, regno);
 	      rtx addr, mem;
+	      int cfa_adjust = *first_offset;
 
 	      if (*first_offset)
 		{
@@ -2336,7 +2347,20 @@ arc_save_restore (rtx base_reg,
 		}
 	      mem = gen_frame_mem (SImode, addr);
 	      if (epilogue_p)
-		frame_move_inc (reg, mem, base_reg, addr);
+		{
+		  rtx insn =
+		    frame_move_inc (reg, mem, base_reg, addr);
+		  add_reg_note (insn, REG_CFA_RESTORE, reg);
+		  if (cfa_adjust)
+		    {
+		      enum reg_note note = REG_CFA_ADJUST_CFA;
+		      add_reg_note (insn, note,
+				    gen_rtx_SET (stack_pointer_rtx,
+						 plus_constant (Pmode,
+								stack_pointer_rtx,
+								cfa_adjust)));
+		    }
+		}
 	      else
 		frame_move_inc (mem, reg, base_reg, addr);
 	      offset += UNITS_PER_WORD;
@@ -2345,6 +2369,10 @@ arc_save_restore (rtx base_reg,
     }/* if */
   if (sibthunk_insn)
     {
+      int start_call = frame->millicode_start_reg;
+      int end_call = frame->millicode_end_reg;
+      int r;
+
       rtx r12 = gen_rtx_REG (Pmode, 12);
 
       frame_insn (gen_rtx_SET (r12, GEN_INT (offset)));
@@ -2354,6 +2382,15 @@ arc_save_restore (rtx base_reg,
 		       gen_rtx_PLUS (Pmode, stack_pointer_rtx, r12));
       sibthunk_insn = emit_jump_insn (sibthunk_insn);
       RTX_FRAME_RELATED_P (sibthunk_insn) = 1;
+
+      /* Would be nice if we could do this earlier, when the PARALLEL
+	 is populated, but these need to be attached after the
+	 emit.  */
+      for (r = start_call; r <= end_call; r++)
+	{
+	  rtx reg = gen_rtx_REG (SImode, r);
+	  add_reg_note (sibthunk_insn, REG_CFA_RESTORE, reg);
+	}
     }
 } /* arc_save_restore */
 
@@ -2474,6 +2511,7 @@ arc_expand_epilogue (int sibcall_p)
   int can_trust_sp_p = !cfun->calls_alloca;
   int first_offset = 0;
   int millicode_p = cfun->machine->frame_info.millicode_end_reg > 0;
+  rtx insn;
 
   size_to_deallocate = size;
 
@@ -2506,11 +2544,15 @@ arc_expand_epilogue (int sibcall_p)
   /* Restore any saved registers.  */
   if (frame_pointer_needed)
     {
-	  rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
+      rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
 
-	  frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr),
-			  stack_pointer_rtx, 0);
-	  size_to_deallocate -= UNITS_PER_WORD;
+      insn = frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr),
+			     stack_pointer_rtx, 0);
+      add_reg_note (insn, REG_CFA_RESTORE, frame_pointer_rtx);
+      add_reg_note (insn, REG_CFA_DEF_CFA,
+		    plus_constant (SImode, stack_pointer_rtx,
+				   4));
+      size_to_deallocate -= UNITS_PER_WORD;
     }
 
   /* Load blink after the calls to thunk calls in case of optimize size.  */
@@ -2526,7 +2568,7 @@ arc_expand_epilogue (int sibcall_p)
 			    cfun->machine->frame_info.gmask,
 			    1 + sibthunk_p, &first_offset);
 	  if (sibthunk_p)
-	    goto epilogue_done;
+	    return;
     }
   /* If we are to restore registers, and first_offset would require
      a limm to be encoded in a PRE_MODIFY, yet we can add it with a
@@ -2550,6 +2592,7 @@ arc_expand_epilogue (int sibcall_p)
       rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
       int ra_offs = cfun->machine->frame_info.reg_size + first_offset;
       rtx addr = plus_constant (Pmode, stack_pointer_rtx, ra_offs);
+      HOST_WIDE_INT cfa_adjust = 0;
 
       /* If the load of blink would need a LIMM, but we can add
 	 the offset quickly to sp, do the latter.  */
@@ -2575,15 +2618,29 @@ arc_expand_epilogue (int sibcall_p)
 	  && (SMALL_INT (ra_offs) || !SMALL_INT (ra_offs >> 2)))
 	{
 	  addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, addr);
+	  cfa_adjust = ra_offs;
 	  first_offset = 0;
 	  size_to_deallocate -= cfun->machine->frame_info.reg_size;
 	}
       else if (!ra_offs && size_to_deallocate == UNITS_PER_WORD)
 	{
 	  addr = gen_rtx_POST_INC (Pmode, addr);
+	  cfa_adjust = GET_MODE_SIZE (Pmode);
 	  size_to_deallocate = 0;
 	}
-      frame_move_inc (ra, gen_frame_mem (Pmode, addr), stack_pointer_rtx, addr);
+
+      insn = frame_move_inc (ra, gen_frame_mem (Pmode, addr),
+			     stack_pointer_rtx, addr);
+      if (cfa_adjust)
+	{
+	  enum reg_note note = REG_CFA_ADJUST_CFA;
+
+	  add_reg_note (insn, note,
+			gen_rtx_SET (stack_pointer_rtx,
+				     plus_constant (SImode, stack_pointer_rtx,
+						    cfa_adjust)));
+	}
+      add_reg_note (insn, REG_CFA_RESTORE, ra);
     }
 
   if (!millicode_p)
@@ -2607,17 +2664,10 @@ arc_expand_epilogue (int sibcall_p)
 
   if (size > restored)
     frame_stack_add (size - restored);
+
   /* Emit the return instruction.  */
   if (sibcall_p == FALSE)
     emit_jump_insn (gen_simple_return ());
- epilogue_done:
-  if (!TARGET_EPILOGUE_CFI)
-    {
-      rtx_insn *insn;
-
-      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
-	RTX_FRAME_RELATED_P (insn) = 0;
-    }
 }
 
 /* Return the offset relative to the stack pointer where the return address
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index c4d7306..5b81835 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -340,14 +340,6 @@ mrtsc
 Target Report
 Enable 64-bit Time-Stamp Counter extension instruction.
 
-mno-epilogue-cfi
-Target Report RejectNegative InverseMask(EPILOGUE_CFI)
-Disable generation of cfi for epilogues.
-
-mepilogue-cfi
-Target RejectNegative Mask(EPILOGUE_CFI)
-Enable generation of cfi for epilogues.
-
 EB
 Target
 Pass -EB option through to linker.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 3cddf5c..81f8845 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -542,7 +542,7 @@ Objective-C and Objective-C++ Dialects}.
 -mnorm -mspfp -mspfp-compact -mspfp-fast -msimd -msoft-float -mswap @gol
 -mcrc -mdsp-packa -mdvbf -mlock -mmac-d16 -mmac-24 -mrtsc -mswape @gol
 -mtelephony -mxy -misize -mannotate-align -marclinux -marclinux_prof @gol
--mepilogue-cfi -mlong-calls -mmedium-calls -msdata @gol
+-mlong-calls -mmedium-calls -msdata @gol
 -mucb-mcount -mvolatile-cache @gol
 -malign-call -mauto-modify-reg -mbbit-peephole -mno-brcc @gol
 -mcase-vector-pcrel -mcompact-casesi -mno-cond-exec -mearly-cbranchsi @gol
@@ -13133,14 +13133,6 @@ The following options control the semantics of generated code:
 
 @c semantically relevant code generation options
 @table @gcctabopt
-@item -mepilogue-cfi
-@opindex mepilogue-cfi
-Enable generation of call frame information for epilogues.
-
-@item -mno-epilogue-cfi
-@opindex mno-epilogue-cfi
-Disable generation of call frame information for epilogues.
-
 @item -mlong-calls
 @opindex mlong-calls
 Generate call insns as register indirect calls, thus providing access
-- 
1.9.1


[-- Attachment #3: 0002-ARC-Use-TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P-hook.patch --]
[-- Type: application/octet-stream, Size: 1375 bytes --]

From b39311bc66238133f8f867c428357d6a333c8650 Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@synopsys.com>
Date: Fri, 11 Dec 2015 11:09:02 +0100
Subject: [PATCH 2/2] [ARC] Use TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P hook.

gcc/

2015-12-11  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.c (TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P):
	Provide target hook.
	(arc_no_speculation_in_delay_slots_p): New function.
---
 gcc/config/arc/arc.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 2646c3a..513d138 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -558,6 +558,10 @@ static void arc_finalize_pic (void);
 
 #define TARGET_INSN_LENGTH_PARAMETERS arc_insn_length_parameters
 
+#undef TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P
+#define TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P	\
+  arc_no_speculation_in_delay_slots_p
+
 #undef TARGET_LRA_P
 #define TARGET_LRA_P arc_lra_p
 #define TARGET_REGISTER_PRIORITY arc_register_priority
@@ -10057,6 +10061,14 @@ arc_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
   arc_post_atomic_barrier (model);
 }
 
+/* Implement TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P.  */
+
+static bool
+arc_no_speculation_in_delay_slots_p ()
+{
+  return true;
+}
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-arc.h"
-- 
1.9.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][ARC] Refurbish emitting DWARF2 for epilogue.
  2015-12-11 10:29           ` Claudiu Zissulescu
@ 2015-12-12 14:49             ` Joern Wolfgang Rennecke
  2015-12-14 10:07               ` Claudiu Zissulescu
  0 siblings, 1 reply; 11+ messages in thread
From: Joern Wolfgang Rennecke @ 2015-12-12 14:49 UTC (permalink / raw)
  To: Claudiu Zissulescu, gcc-patches; +Cc: Francois.Bedard, jeremy.bennett



On 11/12/15 10:29, Claudiu Zissulescu wrote:
> I did some testing here. For size, I used CSiBE testbench, and for speed, I used coremark and dhrystone. Using a blockage or not, doesn't affect the size or speed figures. However, using TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P hook betters the size figures (not much, just .1%), and improves the coremark by 2% and Dhrystone by 1%.
>
> Hence, in the light of the new figures, I favor the above two patch solution. Both patches are checked using dg.exp and compile.exp. Ok to submit?
Both patches are OK.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH][ARC] Refurbish emitting DWARF2 for epilogue.
  2015-12-12 14:49             ` Joern Wolfgang Rennecke
@ 2015-12-14 10:07               ` Claudiu Zissulescu
  0 siblings, 0 replies; 11+ messages in thread
From: Claudiu Zissulescu @ 2015-12-14 10:07 UTC (permalink / raw)
  To: Joern Wolfgang Rennecke, gcc-patches; +Cc: Francois.Bedard, jeremy.bennett

Patches submitted.

Thanks,
Claudiu

> -----Original Message-----
> From: Joern Wolfgang Rennecke [mailto:gnu@amylaar.uk]
> Sent: Saturday, December 12, 2015 3:49 PM
> To: Claudiu Zissulescu; gcc-patches@gcc.gnu.org
> Cc: Francois.Bedard@synopsys.com; jeremy.bennett@embecosm.com
> Subject: Re: [PATCH][ARC] Refurbish emitting DWARF2 for epilogue.
> 
> 
> 
> On 11/12/15 10:29, Claudiu Zissulescu wrote:
> > I did some testing here. For size, I used CSiBE testbench, and for speed, I
> used coremark and dhrystone. Using a blockage or not, doesn't affect the
> size or speed figures. However, using
> TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P hook betters the size
> figures (not much, just .1%), and improves the coremark by 2% and
> Dhrystone by 1%.
> >
> > Hence, in the light of the new figures, I favor the above two patch solution.
> Both patches are checked using dg.exp and compile.exp. Ok to submit?
> Both patches are OK.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-12-14 10:07 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-27 14:02 [PATCH][ARC] Refurbish emitting DWARF2 for epilogue Claudiu Zissulescu
2015-12-03 21:59 ` Joern Wolfgang Rennecke
2015-12-07  9:19   ` Claudiu Zissulescu
2015-12-09  4:44     ` Joern Wolfgang Rennecke
2015-12-09  4:52       ` Joern Wolfgang Rennecke
2015-12-09 15:34       ` Claudiu Zissulescu
2015-12-09 18:48         ` Joern Wolfgang Rennecke
2015-12-10 14:47           ` Claudiu Zissulescu
2015-12-11 10:29           ` Claudiu Zissulescu
2015-12-12 14:49             ` Joern Wolfgang Rennecke
2015-12-14 10:07               ` Claudiu Zissulescu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).