public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [RFA patch i386]: Prepare x64 prologue using positive offsets for  frame-pointer
@ 2010-07-04 21:06 Kai Tietz
  2010-07-05  6:18 ` Kai Tietz
  0 siblings, 1 reply; 8+ messages in thread
From: Kai Tietz @ 2010-07-04 21:06 UTC (permalink / raw)
  To: GCC Patches; +Cc: Richard Henderson

[-- Attachment #1: Type: text/plain, Size: 694 bytes --]

Hello,

this patch prepares for x64 SEH, that frame-pointer points at end of
stack-prologue. This is required for x64, because the SEH unwind
information can't express negative offsets for stack.

ChangeLog

2010-07-04  Kai Tietz

        * config/i386/i386.c (ix86_compute_frame_layout): Let frame's
        hard_frame_pointer_offset point to end of prologue.
        (ix86_expand_prologue): Handle x64 frame-pointer prologue.
        (ix86_expand_epilogue): Handle x64 frame-pointer epilogue.

Tested for i686-pc-cygwin, x86_64-pc-mingw32. Ok for apply?

Regards,
Kai

-- 
|  (\_/) This is Bunny. Copy and paste
| (='.'=) Bunny into your signature to help
| (")_(") him gain world domination

[-- Attachment #2: x64_pro.diff --]
[-- Type: application/octet-stream, Size: 7173 bytes --]

Index: gcc/gcc/config/i386/i386.c
===================================================================
--- gcc.orig/gcc/config/i386/i386.c	2010-07-03 10:58:48.000000000 +0200
+++ gcc/gcc/config/i386/i386.c	2010-07-04 22:54:54.765066400 +0200
@@ -8347,6 +8347,12 @@ ix86_compute_frame_layout (struct ix86_f
   /* We've reached end of stack frame.  */
   frame->stack_pointer_offset = offset;
 
+  /* Adjust hard_frame_pointer for x64 abi, so that it points
+     to end of frame.  */
+  if (TARGET_64BIT && DEFAULT_ABI == MS_ABI && frame_pointer_needed)
+      frame->hard_frame_pointer_offset = offset;
+
+
   /* Size prologue needs to allocate.  */
   frame->to_allocate =
     (size + frame->padding1 + frame->padding2
@@ -8720,6 +8726,8 @@ ix86_expand_prologue (void)
   struct ix86_frame frame;
   HOST_WIDE_INT allocate;
   int gen_frame_pointer = frame_pointer_needed;
+  int be_x64frame = TARGET_64BIT && DEFAULT_ABI == MS_ABI
+  		    && frame_pointer_needed;
 
   ix86_finalize_stack_realign_flags ();
 
@@ -8851,11 +8859,14 @@ ix86_expand_prologue (void)
       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
       RTX_FRAME_RELATED_P (insn) = 1;
 
-      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
-      RTX_FRAME_RELATED_P (insn) = 1;
+      if (!be_x64frame)
+	{
+	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1;
 
-      if (ix86_cfa_state->reg == stack_pointer_rtx)
-        ix86_cfa_state->reg = hard_frame_pointer_rtx;
+	  if (ix86_cfa_state->reg == stack_pointer_rtx)
+	    ix86_cfa_state->reg = hard_frame_pointer_rtx;
+	}
     }
 
   if (stack_realign_fp)
@@ -8884,7 +8895,7 @@ ix86_expand_prologue (void)
      a red zone location */
   if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
       && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
-    ix86_emit_save_regs_using_mov ((frame_pointer_needed
+    ix86_emit_save_regs_using_mov ((frame_pointer_needed && !be_x64frame
 				     && !crtl->stack_realign_needed)
                                    ? hard_frame_pointer_rtx
 				   : stack_pointer_rtx,
@@ -8932,7 +8943,7 @@ ix86_expand_prologue (void)
 
       if (eax_live)
 	{
-	  if (frame_pointer_needed)
+	  if (frame_pointer_needed && !be_x64frame)
 	    t = plus_constant (hard_frame_pointer_rtx,
 			       allocate
 			       - frame.to_allocate
@@ -8947,7 +8958,7 @@ ix86_expand_prologue (void)
       && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
          && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
     {
-      if (!frame_pointer_needed
+      if (!frame_pointer_needed || be_x64frame
 	  || !(frame.to_allocate + frame.padding0)
 	  || crtl->stack_realign_needed)
         ix86_emit_save_regs_using_mov (stack_pointer_rtx,
@@ -8957,7 +8968,7 @@ ix86_expand_prologue (void)
         ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
 				       -frame.nregs * UNITS_PER_WORD);
     }
-  if (!frame_pointer_needed
+  if (!frame_pointer_needed || be_x64frame
       || !(frame.to_allocate + frame.padding0)
       || crtl->stack_realign_needed)
     ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
@@ -9011,6 +9022,17 @@ ix86_expand_prologue (void)
   if (crtl->profile && pic_reg_used)
     emit_insn (gen_prologue_use (pic_offset_table_rtx));
 
+  if (be_x64frame)
+    {
+      rtx t;
+
+      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (ix86_cfa_state->reg == stack_pointer_rtx)
+	ix86_cfa_state->reg = hard_frame_pointer_rtx;
+    }
+
   if (crtl->drap_reg && !crtl->stack_realign_needed)
     {
       /* vDRAP is setup but after reload it turns out stack realign
@@ -9223,19 +9245,29 @@ ix86_expand_epilogue (int style)
   HOST_WIDE_INT offset, red_offset;
   struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
   bool using_drap;
+  int be_x64frame = TARGET_64BIT && DEFAULT_ABI == MS_ABI
+  		    && frame_pointer_needed;
+
 
   ix86_finalize_stack_realign_flags ();
 
  /* When stack is realigned, SP must be valid.  */
-  sp_valid = (!frame_pointer_needed
+  sp_valid = (!frame_pointer_needed || be_x64frame
 	      || current_function_sp_is_unchanging
 	      || stack_realign_fp);
 
   ix86_compute_frame_layout (&frame);
 
+ if (be_x64frame)
+   {
+     pro_epilogue_adjust_stack (stack_pointer_rtx,
+     				hard_frame_pointer_rtx,
+     				const0_rtx, style, 0);
+   }
+
   /* See the comment about red zone and frame
      pointer usage in ix86_expand_prologue.  */
-  if (frame_pointer_needed && frame.red_zone_size)
+  if (frame_pointer_needed && (frame.red_zone_size || be_x64frame))
     emit_insn (gen_memory_blockage ());
 
   using_drap = crtl->drap_reg && crtl->stack_realign_needed;
@@ -9286,7 +9318,8 @@ ix86_expand_epilogue (int style)
       || (frame_pointer_needed && TARGET_USE_LEAVE
 	  && cfun->machine->use_fast_prologue_epilogue
 	  && (frame.nregs + frame.nsseregs) == 1)
-      || crtl->calls_eh_return)
+      || crtl->calls_eh_return
+      || be_x64frame)
     {
       /* Restore registers.  We can use ebp or esp to address the memory
 	 locations.  If both are available, default to ebp, since offsets
@@ -9297,7 +9330,7 @@ ix86_expand_epilogue (int style)
 	 If we are realigning stack with bp and sp, regs restore can't
 	 be addressed by bp. sp must be used instead.  */
 
-      if (!frame_pointer_needed
+      if (!frame_pointer_needed || be_x64frame
 	  || (sp_valid && !(frame.to_allocate + frame.padding0))
 	  || stack_realign_fp)
 	{
@@ -9338,7 +9371,7 @@ ix86_expand_epilogue (int style)
 	  /* Neither does regparm nested functions.  */
 	  gcc_assert (!ix86_static_chain_on_stack);
 
-	  if (frame_pointer_needed)
+	  if (frame_pointer_needed && !be_x64frame)
 	    {
 	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
 	      tmp = plus_constant (tmp, UNITS_PER_WORD);
@@ -9387,6 +9420,16 @@ ix86_expand_epilogue (int style)
 		}
 	    }
 	}
+      else if (be_x64frame)
+        {
+	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				     GEN_INT (frame.to_allocate
+					      + frame.nregs * UNITS_PER_WORD
+					      + frame.nsseregs * 16
+					      + frame.padding0),
+				     style, 0);
+	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
+	}
       else if (!frame_pointer_needed)
 	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
 				   GEN_INT (frame.to_allocate
@@ -9416,7 +9459,7 @@ ix86_expand_epilogue (int style)
          won't be able to recover via lea $offset(%bp), %sp, because
          there is a padding area between bp and sp for realign.
          "add $to_allocate, %sp" must be used instead.  */
-      if (!sp_valid)
+      if (!sp_valid && !be_x64frame)
 	{
 	  gcc_assert (frame_pointer_needed);
           gcc_assert (!stack_realign_fp);
@@ -9447,7 +9490,7 @@ ix86_expand_epilogue (int style)
 					+ frame.padding0);
       red_offset -= offset;
 
-      if (frame_pointer_needed)
+      if (frame_pointer_needed && !be_x64frame)
 	{
 	  /* Leave results in shorter dependency chains on CPUs that are
 	     able to grok it fast.  */

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFA patch i386]: Prepare x64 prologue using positive offsets for  frame-pointer
  2010-07-04 21:06 [RFA patch i386]: Prepare x64 prologue using positive offsets for frame-pointer Kai Tietz
@ 2010-07-05  6:18 ` Kai Tietz
  2010-07-14 19:29   ` NightStrike
  0 siblings, 1 reply; 8+ messages in thread
From: Kai Tietz @ 2010-07-05  6:18 UTC (permalink / raw)
  To: GCC Patches; +Cc: Richard Henderson

[-- Attachment #1: Type: text/plain, Size: 989 bytes --]

2010/7/4 Kai Tietz <ktietz70@googlemail.com>:
> Hello,
>
> this patch prepares for x64 SEH, that frame-pointer points at end of
> stack-prologue. This is required for x64, because the SEH unwind
> information can't express negative offsets for stack.
>
> ChangeLog
>
> 2010-07-04  Kai Tietz
>
>        * config/i386/i386.c (ix86_compute_frame_layout): Let frame's
>        hard_frame_pointer_offset point to end of prologue.
>        (ix86_expand_prologue): Handle x64 frame-pointer prologue.
>        (ix86_expand_epilogue): Handle x64 frame-pointer epilogue.
>
> Tested for i686-pc-cygwin, x86_64-pc-mingw32. Ok for apply?
>
> Regards,
> Kai

This update patch avoids, that for profiling-built the x64
frame-layout gets used. It would lead to issues, caused by profiling
code, which assumes old stack-frame layout.

Regards,
Kai


-- 
|  (\_/) This is Bunny. Copy and paste
| (='.'=) Bunny into your signature to help
| (")_(") him gain world domination

[-- Attachment #2: x64_pro.diff --]
[-- Type: application/octet-stream, Size: 7215 bytes --]

Index: gcc/gcc/config/i386/i386.c
===================================================================
--- gcc.orig/gcc/config/i386/i386.c	2010-07-03 10:58:48.000000000 +0200
+++ gcc/gcc/config/i386/i386.c	2010-07-05 08:12:20.043586300 +0200
@@ -8347,6 +8347,13 @@ ix86_compute_frame_layout (struct ix86_f
   /* We've reached end of stack frame.  */
   frame->stack_pointer_offset = offset;
 
+  /* Adjust hard_frame_pointer for x64 abi, so that it points
+     to end of frame.  */
+  if (TARGET_64BIT && DEFAULT_ABI == MS_ABI && frame_pointer_needed
+      && !profile_flag)
+      frame->hard_frame_pointer_offset = offset;
+
+
   /* Size prologue needs to allocate.  */
   frame->to_allocate =
     (size + frame->padding1 + frame->padding2
@@ -8720,6 +8727,8 @@ ix86_expand_prologue (void)
   struct ix86_frame frame;
   HOST_WIDE_INT allocate;
   int gen_frame_pointer = frame_pointer_needed;
+  int be_x64frame = TARGET_64BIT && DEFAULT_ABI == MS_ABI
+  		    && frame_pointer_needed && !profile_flag;
 
   ix86_finalize_stack_realign_flags ();
 
@@ -8851,11 +8860,14 @@ ix86_expand_prologue (void)
       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
       RTX_FRAME_RELATED_P (insn) = 1;
 
-      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
-      RTX_FRAME_RELATED_P (insn) = 1;
+      if (!be_x64frame)
+	{
+	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1;
 
-      if (ix86_cfa_state->reg == stack_pointer_rtx)
-        ix86_cfa_state->reg = hard_frame_pointer_rtx;
+	  if (ix86_cfa_state->reg == stack_pointer_rtx)
+	    ix86_cfa_state->reg = hard_frame_pointer_rtx;
+	}
     }
 
   if (stack_realign_fp)
@@ -8884,7 +8896,7 @@ ix86_expand_prologue (void)
      a red zone location */
   if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
       && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
-    ix86_emit_save_regs_using_mov ((frame_pointer_needed
+    ix86_emit_save_regs_using_mov ((frame_pointer_needed && !be_x64frame
 				     && !crtl->stack_realign_needed)
                                    ? hard_frame_pointer_rtx
 				   : stack_pointer_rtx,
@@ -8932,7 +8944,7 @@ ix86_expand_prologue (void)
 
       if (eax_live)
 	{
-	  if (frame_pointer_needed)
+	  if (frame_pointer_needed && !be_x64frame)
 	    t = plus_constant (hard_frame_pointer_rtx,
 			       allocate
 			       - frame.to_allocate
@@ -8947,7 +8959,7 @@ ix86_expand_prologue (void)
       && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
          && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
     {
-      if (!frame_pointer_needed
+      if (!frame_pointer_needed || be_x64frame
 	  || !(frame.to_allocate + frame.padding0)
 	  || crtl->stack_realign_needed)
         ix86_emit_save_regs_using_mov (stack_pointer_rtx,
@@ -8957,7 +8969,7 @@ ix86_expand_prologue (void)
         ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
 				       -frame.nregs * UNITS_PER_WORD);
     }
-  if (!frame_pointer_needed
+  if (!frame_pointer_needed || be_x64frame
       || !(frame.to_allocate + frame.padding0)
       || crtl->stack_realign_needed)
     ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
@@ -9011,6 +9023,15 @@ ix86_expand_prologue (void)
   if (crtl->profile && pic_reg_used)
     emit_insn (gen_prologue_use (pic_offset_table_rtx));
 
+  if (be_x64frame)
+    {
+      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (ix86_cfa_state->reg == stack_pointer_rtx)
+	ix86_cfa_state->reg = hard_frame_pointer_rtx;
+    }
+
   if (crtl->drap_reg && !crtl->stack_realign_needed)
     {
       /* vDRAP is setup but after reload it turns out stack realign
@@ -9223,19 +9244,29 @@ ix86_expand_epilogue (int style)
   HOST_WIDE_INT offset, red_offset;
   struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
   bool using_drap;
+  int be_x64frame = TARGET_64BIT && DEFAULT_ABI == MS_ABI
+  		    && frame_pointer_needed && !profile_flag;
+
 
   ix86_finalize_stack_realign_flags ();
 
  /* When stack is realigned, SP must be valid.  */
-  sp_valid = (!frame_pointer_needed
+  sp_valid = (!frame_pointer_needed || be_x64frame
 	      || current_function_sp_is_unchanging
 	      || stack_realign_fp);
 
   ix86_compute_frame_layout (&frame);
 
+ if (be_x64frame)
+   {
+     pro_epilogue_adjust_stack (stack_pointer_rtx,
+     				hard_frame_pointer_rtx,
+     				const0_rtx, style, 0);
+   }
+
   /* See the comment about red zone and frame
      pointer usage in ix86_expand_prologue.  */
-  if (frame_pointer_needed && frame.red_zone_size)
+  if (frame_pointer_needed && (frame.red_zone_size || be_x64frame))
     emit_insn (gen_memory_blockage ());
 
   using_drap = crtl->drap_reg && crtl->stack_realign_needed;
@@ -9286,7 +9317,8 @@ ix86_expand_epilogue (int style)
       || (frame_pointer_needed && TARGET_USE_LEAVE
 	  && cfun->machine->use_fast_prologue_epilogue
 	  && (frame.nregs + frame.nsseregs) == 1)
-      || crtl->calls_eh_return)
+      || crtl->calls_eh_return
+      || be_x64frame)
     {
       /* Restore registers.  We can use ebp or esp to address the memory
 	 locations.  If both are available, default to ebp, since offsets
@@ -9297,7 +9329,7 @@ ix86_expand_epilogue (int style)
 	 If we are realigning stack with bp and sp, regs restore can't
 	 be addressed by bp. sp must be used instead.  */
 
-      if (!frame_pointer_needed
+      if (!frame_pointer_needed || be_x64frame
 	  || (sp_valid && !(frame.to_allocate + frame.padding0))
 	  || stack_realign_fp)
 	{
@@ -9338,7 +9370,7 @@ ix86_expand_epilogue (int style)
 	  /* Neither does regparm nested functions.  */
 	  gcc_assert (!ix86_static_chain_on_stack);
 
-	  if (frame_pointer_needed)
+	  if (frame_pointer_needed && !be_x64frame)
 	    {
 	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
 	      tmp = plus_constant (tmp, UNITS_PER_WORD);
@@ -9387,6 +9419,16 @@ ix86_expand_epilogue (int style)
 		}
 	    }
 	}
+      else if (be_x64frame)
+        {
+	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				     GEN_INT (frame.to_allocate
+					      + frame.nregs * UNITS_PER_WORD
+					      + frame.nsseregs * 16
+					      + frame.padding0),
+				     style, 0);
+	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
+	}
       else if (!frame_pointer_needed)
 	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
 				   GEN_INT (frame.to_allocate
@@ -9416,7 +9458,7 @@ ix86_expand_epilogue (int style)
          won't be able to recover via lea $offset(%bp), %sp, because
          there is a padding area between bp and sp for realign.
          "add $to_allocate, %sp" must be used instead.  */
-      if (!sp_valid)
+      if (!sp_valid && !be_x64frame)
 	{
 	  gcc_assert (frame_pointer_needed);
           gcc_assert (!stack_realign_fp);
@@ -9447,7 +9489,7 @@ ix86_expand_epilogue (int style)
 					+ frame.padding0);
       red_offset -= offset;
 
-      if (frame_pointer_needed)
+      if (frame_pointer_needed && !be_x64frame)
 	{
 	  /* Leave results in shorter dependency chains on CPUs that are
 	     able to grok it fast.  */

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFA patch i386]: Prepare x64 prologue using positive offsets for  frame-pointer
  2010-07-05  6:18 ` Kai Tietz
@ 2010-07-14 19:29   ` NightStrike
  2010-07-18 10:42     ` Kai Tietz
  0 siblings, 1 reply; 8+ messages in thread
From: NightStrike @ 2010-07-14 19:29 UTC (permalink / raw)
  To: Kai Tietz; +Cc: GCC Patches, Richard Henderson

On Mon, Jul 5, 2010 at 2:17 AM, Kai Tietz <ktietz70@googlemail.com> wrote:
> 2010/7/4 Kai Tietz <ktietz70@googlemail.com>:
>> Hello,
>>
>> this patch prepares for x64 SEH, that frame-pointer points at end of
>> stack-prologue. This is required for x64, because the SEH unwind
>> information can't express negative offsets for stack.
>>
>> ChangeLog
>>
>> 2010-07-04  Kai Tietz
>>
>>        * config/i386/i386.c (ix86_compute_frame_layout): Let frame's
>>        hard_frame_pointer_offset point to end of prologue.
>>        (ix86_expand_prologue): Handle x64 frame-pointer prologue.
>>        (ix86_expand_epilogue): Handle x64 frame-pointer epilogue.
>>
>> Tested for i686-pc-cygwin, x86_64-pc-mingw32. Ok for apply?
>>
>> Regards,
>> Kai
>
> This update patch avoids, that for profiling-built the x64
> frame-layout gets used. It would lead to issues, caused by profiling
> code, which assumes old stack-frame layout.
>
> Regards,
> Kai
>
>
> --
> |  (\_/) This is Bunny. Copy and paste
> | (='.'=) Bunny into your signature to help
> | (")_(") him gain world domination
>

Ping

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFA patch i386]: Prepare x64 prologue using positive offsets for  frame-pointer
  2010-07-14 19:29   ` NightStrike
@ 2010-07-18 10:42     ` Kai Tietz
  2010-07-20 22:57       ` Richard Henderson
  0 siblings, 1 reply; 8+ messages in thread
From: Kai Tietz @ 2010-07-18 10:42 UTC (permalink / raw)
  To: Richard Henderson; +Cc: GCC Patches, NightStrike

[-- Attachment #1: Type: text/plain, Size: 968 bytes --]

Hello Richard,

here is the follow-up patch about x64-frame-pointer patch. It requires
the -mfentry patch to be applied before to apply clean.
I allowed this patch just for amd64 architectures, as I see here no
much reason to support it for x86 at the moment, too. As this
call-frame needs the -mfentry support, which is limited for x86 to
non-pic compilation.

ChangeLog

	* config/i386/i386.c (override_options): Add check.
	(ix86_compute_frame_layout): Set hard_frame_pointer_offset
	for -mframe-x64.
	(expand_prologue): Add emit of -mframe-x64.
	(ix86_expand_epilogue): Likewise.
	* config/i386/i386.opt (-mframe-x64): New.
	* doc/invoke.texi (mframe-x64): Add documentation.

Tested for x86_64-pc-mingw32, i686-pc-mingw32, and i686-pc-cygwin. Ok
to apply after the -mfentry patch got approval?

Regards,
Kai


-- 
|  (\_/) This is Bunny. Copy and paste
| (='.'=) Bunny into your signature to help
| (")_(") him gain world domination

[-- Attachment #2: x64_pro.diff --]
[-- Type: application/octet-stream, Size: 10062 bytes --]

Index: gcc/gcc/config/i386/i386.c
===================================================================
--- gcc.orig/gcc/config/i386/i386.c	2010-07-18 12:25:33.137270400 +0200
+++ gcc/gcc/config/i386/i386.c	2010-07-18 12:25:34.287336200 +0200
@@ -3691,13 +3691,41 @@ override_options (bool main_args_p)
 #if defined(MCOUNT_NAME) && defined(MCOUNT_NAME_BEFORE_PROLOGUE)
     only_default = 0;
 #endif
+    if (!TARGET_64BIT && flag_framex64)
+      {
+	sorry ("-mframe-x64 isn't supported for 32-bit");
+
+	flag_framex64 = 0;
+      }
+    if (only_default && default_profile_top_flag == 0 && profile_flag
+	&& flag_framex64)
+      {
+	sorry ("-mframe-x64 option requires unsupported -mfentry");
+	flag_framex64 = 0;
+      }
+    else if(!only_default && flag_framex64)
+      {
+	default_profile_top_flag = 1;
+      }
 
     if (flag_fentry == -1)
       flag_fentry = default_profile_top_flag;
+
+    if (flag_framex64 && flag_fentry == 0 && profile_flag)
+      {
+	if (!only_default)
+	  sorry ("-mframe-x64 together with -pg requires -mfentry");
+	else
+	  sorry ("-mframe-x64 together with -pg isn't supported by this"
+	         " target");
+	flag_framex64 = 0;
+      }
+
     if (flag_fentry != 0 && force_default_profile_top_flag)
       {
 	sorry ("-mfentry isn't support for x86 in combination with -fpic");
 	flag_fentry = 0;
+	flag_framex64 = 0;
       }
     else if (flag_fentry != default_profile_top_flag && only_default)
       {
@@ -8473,6 +8501,12 @@ ix86_compute_frame_layout (struct ix86_f
   /* We've reached end of stack frame.  */
   frame->stack_pointer_offset = offset;
 
+  /* Adjust hard_frame_pointer so that it points
+     to end of frame.  */
+  if (frame_pointer_needed && flag_framex64)
+      frame->hard_frame_pointer_offset = offset;
+
+
   /* Size prologue needs to allocate.  */
   frame->to_allocate =
     (size + frame->padding1 + frame->padding2
@@ -9217,6 +9251,7 @@ ix86_expand_prologue (void)
   struct ix86_frame frame;
   HOST_WIDE_INT allocate;
   int gen_frame_pointer = frame_pointer_needed;
+  int be_x64frame = frame_pointer_needed && flag_framex64;
 
   ix86_finalize_stack_realign_flags ();
 
@@ -9348,11 +9383,14 @@ ix86_expand_prologue (void)
       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
       RTX_FRAME_RELATED_P (insn) = 1;
 
-      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
-      RTX_FRAME_RELATED_P (insn) = 1;
+      if (!be_x64frame)
+	{
+	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1;
 
-      if (ix86_cfa_state->reg == stack_pointer_rtx)
-        ix86_cfa_state->reg = hard_frame_pointer_rtx;
+	  if (ix86_cfa_state->reg == stack_pointer_rtx)
+	    ix86_cfa_state->reg = hard_frame_pointer_rtx;
+	}
     }
 
   if (stack_realign_fp)
@@ -9407,7 +9445,7 @@ ix86_expand_prologue (void)
      a red zone location */
   if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
       && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
-    ix86_emit_save_regs_using_mov ((frame_pointer_needed
+    ix86_emit_save_regs_using_mov ((frame_pointer_needed && !be_x64frame
 				     && !crtl->stack_realign_needed)
                                    ? hard_frame_pointer_rtx
 				   : stack_pointer_rtx,
@@ -9451,7 +9489,7 @@ ix86_expand_prologue (void)
 
       if (eax_live)
 	{
-	  if (frame_pointer_needed)
+	  if (frame_pointer_needed && !be_x64frame)
 	    t = plus_constant (hard_frame_pointer_rtx,
 			       allocate
 			       - frame.to_allocate
@@ -9466,7 +9504,7 @@ ix86_expand_prologue (void)
       && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
          && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
     {
-      if (!frame_pointer_needed
+      if (!frame_pointer_needed || be_x64frame
 	  || !(frame.to_allocate + frame.padding0)
 	  || crtl->stack_realign_needed)
         ix86_emit_save_regs_using_mov (stack_pointer_rtx,
@@ -9476,7 +9514,7 @@ ix86_expand_prologue (void)
         ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
 				       -frame.nregs * UNITS_PER_WORD);
     }
-  if (!frame_pointer_needed
+  if (!frame_pointer_needed || be_x64frame
       || !(frame.to_allocate + frame.padding0)
       || crtl->stack_realign_needed)
     ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
@@ -9530,6 +9568,15 @@ ix86_expand_prologue (void)
   if (crtl->profile && !flag_fentry && pic_reg_used)
     emit_insn (gen_prologue_use (pic_offset_table_rtx));
 
+  if (be_x64frame)
+    {
+      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (ix86_cfa_state->reg == stack_pointer_rtx)
+	ix86_cfa_state->reg = hard_frame_pointer_rtx;
+    }
+
   if (crtl->drap_reg && !crtl->stack_realign_needed)
     {
       /* vDRAP is setup but after reload it turns out stack realign
@@ -9742,19 +9789,28 @@ ix86_expand_epilogue (int style)
   HOST_WIDE_INT offset, red_offset;
   struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
   bool using_drap;
+  int be_x64frame = frame_pointer_needed && flag_framex64;
+
 
   ix86_finalize_stack_realign_flags ();
 
  /* When stack is realigned, SP must be valid.  */
-  sp_valid = (!frame_pointer_needed
+  sp_valid = (!frame_pointer_needed || be_x64frame
 	      || current_function_sp_is_unchanging
 	      || stack_realign_fp);
 
   ix86_compute_frame_layout (&frame);
 
+ if (be_x64frame)
+   {
+     pro_epilogue_adjust_stack (stack_pointer_rtx,
+     				hard_frame_pointer_rtx,
+     				const0_rtx, style, 0);
+   }
+
   /* See the comment about red zone and frame
      pointer usage in ix86_expand_prologue.  */
-  if (frame_pointer_needed && frame.red_zone_size)
+  if (frame_pointer_needed && (frame.red_zone_size || be_x64frame))
     emit_insn (gen_memory_blockage ());
 
   using_drap = crtl->drap_reg && crtl->stack_realign_needed;
@@ -9805,7 +9861,8 @@ ix86_expand_epilogue (int style)
       || (frame_pointer_needed && TARGET_USE_LEAVE
 	  && cfun->machine->use_fast_prologue_epilogue
 	  && (frame.nregs + frame.nsseregs) == 1)
-      || crtl->calls_eh_return)
+      || crtl->calls_eh_return
+      || be_x64frame)
     {
       /* Restore registers.  We can use ebp or esp to address the memory
 	 locations.  If both are available, default to ebp, since offsets
@@ -9816,7 +9873,7 @@ ix86_expand_epilogue (int style)
 	 If we are realigning stack with bp and sp, regs restore can't
 	 be addressed by bp. sp must be used instead.  */
 
-      if (!frame_pointer_needed
+      if (!frame_pointer_needed || be_x64frame
 	  || (sp_valid && !(frame.to_allocate + frame.padding0))
 	  || stack_realign_fp)
 	{
@@ -9857,7 +9914,7 @@ ix86_expand_epilogue (int style)
 	  /* Neither does regparm nested functions.  */
 	  gcc_assert (!ix86_static_chain_on_stack);
 
-	  if (frame_pointer_needed)
+	  if (frame_pointer_needed && !be_x64frame)
 	    {
 	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
 	      tmp = plus_constant (tmp, UNITS_PER_WORD);
@@ -9906,6 +9963,16 @@ ix86_expand_epilogue (int style)
 		}
 	    }
 	}
+      else if (be_x64frame)
+        {
+	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				     GEN_INT (frame.to_allocate
+					      + frame.nregs * UNITS_PER_WORD
+					      + frame.nsseregs * 16
+					      + frame.padding0),
+				     style, 0);
+	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
+	}
       else if (!frame_pointer_needed)
 	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
 				   GEN_INT (frame.to_allocate
@@ -9935,7 +10002,7 @@ ix86_expand_epilogue (int style)
          won't be able to recover via lea $offset(%bp), %sp, because
          there is a padding area between bp and sp for realign.
          "add $to_allocate, %sp" must be used instead.  */
-      if (!sp_valid)
+      if (!sp_valid && !be_x64frame)
 	{
 	  gcc_assert (frame_pointer_needed);
           gcc_assert (!stack_realign_fp);
@@ -9966,7 +10033,7 @@ ix86_expand_epilogue (int style)
 					+ frame.padding0);
       red_offset -= offset;
 
-      if (frame_pointer_needed)
+      if (frame_pointer_needed && !be_x64frame)
 	{
 	  /* Leave results in shorter dependency chains on CPUs that are
 	     able to grok it fast.  */
Index: gcc/gcc/config/i386/i386.opt
===================================================================
--- gcc.orig/gcc/config/i386/i386.opt	2010-07-18 12:25:33.140270600 +0200
+++ gcc/gcc/config/i386/i386.opt	2010-07-18 12:25:34.293336600 +0200
@@ -379,3 +379,8 @@ Support F16C built-in functions and code
 mfentry
 Target Report Var(flag_fentry) Init(-1)
 Emit profiling counter call at function entry before prologue.
+
+mframe-x64
+Target Report Var(flag_framex64) Init(0)
+Set the frame-pointer to the stack location at the end of prologue for 64-bit.
+
Index: gcc/gcc/doc/invoke.texi
===================================================================
--- gcc.orig/gcc/doc/invoke.texi	2010-07-18 12:25:33.149271100 +0200
+++ gcc/gcc/doc/invoke.texi	2010-07-18 12:25:34.320338100 +0200
@@ -601,7 +601,7 @@ Objective-C and Objective-C++ Dialects}.
 -momit-leaf-frame-pointer  -mno-red-zone -mno-tls-direct-seg-refs @gol
 -mcmodel=@var{code-model} -mabi=@var{name} @gol
 -m32  -m64 -mlarge-data-threshold=@var{num} @gol
--msse2avx -mfentry}
+-msse2avx -mfentry -mframe-x64}
 
 @emph{IA-64 Options}
 @gccoptlist{-mbig-endian  -mlittle-endian  -mgnu-as  -mgnu-ld  -mno-pic @gol
@@ -12474,6 +12474,14 @@ If profiling is active @option{-pg} put
 counter call before prologue.
 Note: On x86 architectures the attribute @code{ms_hook_prologue}
 isn't possible at the moment for @option{-mfentry} and @option{-pg}.
+
+@item -mframe-x64
+@itemx -mno-frame-x64
+@opindex mframe-x64
+Specifies that the frame-pointer register points to the
+stack-pointer at the end of prologue. For profiling this
+option requires that @option{-mfentry} is active, too.
+Note: On x86 architectures this option is disallowed.
 @end table
 
 These @samp{-m} switches are supported in addition to the above

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFA patch i386]: Prepare x64 prologue using positive offsets for  frame-pointer
  2010-07-18 10:42     ` Kai Tietz
@ 2010-07-20 22:57       ` Richard Henderson
  2010-07-21  7:43         ` Kai Tietz
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Henderson @ 2010-07-20 22:57 UTC (permalink / raw)
  To: Kai Tietz; +Cc: GCC Patches, NightStrike

On 07/18/2010 03:42 AM, Kai Tietz wrote:
> +mframe-x64
> +Target Report Var(flag_framex64) Init(0)
> +Set the frame-pointer to the stack location at the end of prologue for 64-bit.

Modulo testing purposes, a command-line switch makes no sense.  We'll want to
key this off SEH enabled, or at least TARGET_64BIT_MS_ABI.

The state of ix86_expand_prologue/epilogue is... what's a kind word... chaotic?
This isn't your fault, but your patch doesn't help either.  It's extremely
difficult to tell if your patch is correct.  I'm pretty sure it isn't correct
for any case of stack re-alignment, for instance.

I spent an hour or three attempting to tidy up these functions and handle the
frame pointer at the bottom of the frame.  I have some ideas now for how to
clean things up, but I think they'll really need to be staged in in phases.
One big patch would simply be too unwieldy.

I'll work on this cleanup over the next couple of days.



r~

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFA patch i386]: Prepare x64 prologue using positive offsets for  frame-pointer
  2010-07-20 22:57       ` Richard Henderson
@ 2010-07-21  7:43         ` Kai Tietz
  2010-07-21 17:36           ` Richard Henderson
  0 siblings, 1 reply; 8+ messages in thread
From: Kai Tietz @ 2010-07-21  7:43 UTC (permalink / raw)
  To: Richard Henderson; +Cc: GCC Patches, NightStrike

2010/7/21 Richard Henderson <rth@redhat.com>:
> On 07/18/2010 03:42 AM, Kai Tietz wrote:
>> +mframe-x64
>> +Target Report Var(flag_framex64) Init(0)
>> +Set the frame-pointer to the stack location at the end of prologue for 64-bit.
>
> Modulo testing purposes, a command-line switch makes no sense.  We'll want to
> key this off SEH enabled, or at least TARGET_64BIT_MS_ABI.

Well, I agree here, as this frame-layout is required for -mseh. I
added this as general option to have a chance to not enable it by
default. That this layout is dependent just to TARGET_64BIT_MS_ABI
isn't correct, as this macro checks for function ABI, which is for
target unwind-emitting (which has to be active for all calling
conventions) not suiteable. A check of (TARGET_64BIT && ix86_abi ==
MS_ABI) fits better IMHO.

> The state of ix86_expand_prologue/epilogue is... what's a kind word... chaotic?
> This isn't your fault, but your patch doesn't help either.  It's extremely
> difficult to tell if your patch is correct.  I'm pretty sure it isn't correct
> for any case of stack re-alignment, for instance.

Well, as x64 ABI aligns stack (beside leaf-functions) to 16-bytes,
there is for default nothing to fear. There is one nit about AVX
register store, which leads to a stack-realignment of 32-byte, that
would fail. The issue is that x64 ABI doesn't specifies anything about
new AVX (well the HW isn't even on market AFAIK). I see here two
possible ways to address this. a) Sorry the use of AVX and x64 with
SEH unwind-information. Or b) save initial stack-position in
stack-frame at the realignment is performed.

> I spent an hour or three attempting to tidy up these functions and handle the
> frame pointer at the bottom of the frame.  I have some ideas now for how to
> clean things up, but I think they'll really need to be staged in in phases.
> One big patch would simply be too unwieldy.

Excellent.

> I'll work on this cleanup over the next couple of days.
>
>
>
> r~
>


Regards,
Kai

-- 
|  (\_/) This is Bunny. Copy and paste
| (='.'=) Bunny into your signature to help
| (")_(") him gain world domination

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFA patch i386]: Prepare x64 prologue using positive offsets for  frame-pointer
  2010-07-21  7:43         ` Kai Tietz
@ 2010-07-21 17:36           ` Richard Henderson
  2010-07-21 18:06             ` Kai Tietz
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Henderson @ 2010-07-21 17:36 UTC (permalink / raw)
  To: Kai Tietz; +Cc: GCC Patches, NightStrike

On 07/21/2010 12:43 AM, Kai Tietz wrote:
> Well, as x64 ABI aligns stack (beside leaf-functions) to 16-bytes,
> there is for default nothing to fear. There is one nit about AVX
> register store, which leads to a stack-realignment of 32-byte, that
> would fail. The issue is that x64 ABI doesn't specifies anything about
> new AVX (well the HW isn't even on market AFAIK). I see here two
> possible ways to address this. a) Sorry the use of AVX and x64 with
> SEH unwind-information. Or b) save initial stack-position in
> stack-frame at the realignment is performed.

http://software.intel.com/en-us/forums/showthread.php?t=72496

This says that the MSABI for AVX will consider the high parts
of the YMM registers volatile.  Which means that for the purpose
of the prologue we only need to save the low 16 bytes.

Which means that we can arrange the frame like so:

	[ return addr ]
				<-- entry sp
	[ saved int regs ]
	[ saved xmm regs ]
				<-- fp
	[ stack re-alignment ]
	[ local stack frame ]
				<-- sp

Here, fp is not at the bottom of the stack, but it is below all of the
saved registers, which means that they're all at positive offsets from fp,
which is the only real requirement of SEH.  Also, the local stack frame
is aligned, which allows aligned spilling of YMM registers or other over-
aligned data.


r~

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFA patch i386]: Prepare x64 prologue using positive offsets for  frame-pointer
  2010-07-21 17:36           ` Richard Henderson
@ 2010-07-21 18:06             ` Kai Tietz
  0 siblings, 0 replies; 8+ messages in thread
From: Kai Tietz @ 2010-07-21 18:06 UTC (permalink / raw)
  To: Richard Henderson; +Cc: GCC Patches, NightStrike

2010/7/21 Richard Henderson <rth@redhat.com>:
> On 07/21/2010 12:43 AM, Kai Tietz wrote:
>> Well, as x64 ABI aligns stack (beside leaf-functions) to 16-bytes,
>> there is for default nothing to fear. There is one nit about AVX
>> register store, which leads to a stack-realignment of 32-byte, that
>> would fail. The issue is that x64 ABI doesn't specifies anything about
>> new AVX (well the HW isn't even on market AFAIK). I see here two
>> possible ways to address this. a) Sorry the use of AVX and x64 with
>> SEH unwind-information. Or b) save initial stack-position in
>> stack-frame at the realignment is performed.
>
> http://software.intel.com/en-us/forums/showthread.php?t=72496
>
> This says that the MSABI for AVX will consider the high parts
> of the YMM registers volatile.  Which means that for the purpose
> of the prologue we only need to save the low 16 bytes.
>
> Which means that we can arrange the frame like so:
>
>        [ return addr ]
>                                <-- entry sp
>        [ saved int regs ]
>        [ saved xmm regs ]
>                                <-- fp
>        [ stack re-alignment ]
>        [ local stack frame ]
>                                <-- sp
>
> Here, fp is not at the bottom of the stack, but it is below all of the
> saved registers, which means that they're all at positive offsets from fp,
> which is the only real requirement of SEH.  Also, the local stack frame
> is aligned, which allows aligned spilling of YMM registers or other over-
> aligned data.
>
>
> r~
>


This indeed good news. So standard-frame layout remains for x64 and
YMM store in prologue will happen only for the lower 128-bits. So
there won't be issues about the unwind-information.

Kai
-- 
|  (\_/) This is Bunny. Copy and paste
| (='.'=) Bunny into your signature to help
| (")_(") him gain world domination

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2010-07-21 18:06 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-07-04 21:06 [RFA patch i386]: Prepare x64 prologue using positive offsets for frame-pointer Kai Tietz
2010-07-05  6:18 ` Kai Tietz
2010-07-14 19:29   ` NightStrike
2010-07-18 10:42     ` Kai Tietz
2010-07-20 22:57       ` Richard Henderson
2010-07-21  7:43         ` Kai Tietz
2010-07-21 17:36           ` Richard Henderson
2010-07-21 18:06             ` Kai Tietz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).