public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 1/6]: Ping: Merge from Stack Branch - New data in function.h
@ 2008-04-28 13:25 Ye, Joey
  2008-04-30 15:20 ` Jan Hubicka
  0 siblings, 1 reply; 4+ messages in thread
From: Ye, Joey @ 2008-04-28 13:25 UTC (permalink / raw)
  To: GCC Patches; +Cc: Jan Hubicka, H.J. Lu, Guo, Xuepeng

Jan, 

Can you please review for mainline?

http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01019.html

2008-04-28  Joey Ye  <joey.ye@intel.com>
	    H.J. Lu  <hongjiu.lu@intel.com>

	* function.h (function): Move  
	stack_realign_needed, preferred_stack_boundary
	to rtl_data.
	(rtl_data): Add new field drap_reg, stack_alignment_estimated,
	stack_realign_really, need_drap, save_param_ptr_reg,
	stack_realign_processed, stack_realign_finalized and 
	stack_realign_used, stack_realign_needed.
	(stack_realign_fp): New macro.
	(stack_realign_drap): Likewise.

	* flags.h (frame_pointer_needed): Removed.
	* final.c (frame_pointer_needed): Likewise.

Index: function.h
===================================================================
--- function.h	(revision 134716)
+++ function.h	(working copy)
@@ -306,6 +306,9 @@ struct rtl_data GTY(())
      needed by inner routines.  */
   rtx x_arg_pointer_save_area;
 
+  /* Dynamic Realign Argument Pointer used for realigning stack.  */
+  rtx drap_reg;
+
   /* Offset to end of allocated area of stack frame.
      If stack grows down, this is the address of the last stack slot
allocated.
      If stack grows up, this is the address for the next slot.  */
@@ -323,12 +326,19 @@ struct rtl_data GTY(())
   /* Current nesting level for temporaries.  */
   int x_temp_slot_level;
 
-  /* The largest alignment of slot allocated on the stack.  */
+  /* The largest alignment needed on the stack, including requirement
+     for outgoing stack alignment.  */
   unsigned int stack_alignment_needed;
 
   /* Preferred alignment of the end of stack frame.  */
   unsigned int preferred_stack_boundary;
 
+  /* The largest alignment of slot allocated on the stack.  */
+  unsigned int stack_alignment_used;
+
+  /* The estimated stack alignment.  */
+  unsigned int stack_alignment_estimated;
+
   /* For reorg.  */
 
   /* If some insns can be deferred to the delay slots of the epilogue,
the
@@ -387,6 +397,38 @@ struct rtl_data GTY(())
 
   /* Nonzero if code to initialize arg_pointer_save_area has been
emitted.  */
   bool arg_pointer_save_area_init;
+
+  /* Nonzero if current function must be given a frame pointer.
+     Set in global.c if anything is allocated on the stack there.  */
+  bool need_frame_pointer;
+
+  /* Nonzero if need_frame_pointer has been set.  */
+  bool need_frame_pointer_set;
+
+  /* Nonzero if, by estimation, current function stack needs
realignment. */
+  bool stack_realign_needed;
+
+  /* Nonzero if function stack realignment is really needed. This flag
+     will be set after reload if by then criteria of stack realignment
+     is still true. Its value may be contridition to
stack_realign_needed
+     since the latter was set before reload. This flag is more accurate
+     than stack_realign_needed so prologue/epilogue should be generated
+     according to both flags  */
+  bool stack_realign_really;
+
+  /* Nonzero if function being compiled needs dynamic realigned
+     argument pointer (drap) if stack needs realigning.  */
+  bool need_drap;
+
+  /* Nonzero if current function needs to save/restore parameter
+     pointer register in prolog, because it is a callee save reg.  */
+  bool save_param_ptr_reg;
+
+  /* Nonzero if function stack realignment estimatoin is done.  */
+  bool stack_realign_processed;
+
+  /* Nonzero if function stack realignment has been finalized.  */
+  bool stack_realign_finalized;
 };
 
 #define return_label (crtl->x_return_label)
@@ -400,6 +442,9 @@ struct rtl_data GTY(())
 #define avail_temp_slots (crtl->x_avail_temp_slots)
 #define temp_slot_level (crtl->x_temp_slot_level)
 #define nonlocal_goto_handler_labels
(crtl->x_nonlocal_goto_handler_labels)
+#define frame_pointer_needed (crtl->need_frame_pointer)
+#define stack_realign_fp (crtl->stack_realign_needed &&
!crtl->need_drap)
+#define stack_realign_drap (crtl->stack_realign_needed &&
crtl->need_drap)
 
 extern GTY(()) struct rtl_data x_rtl;
 
Index: flags.h
===================================================================
--- flags.h	(revision 134716)
+++ flags.h	(working copy)
@@ -223,12 +223,6 @@ extern int flag_dump_rtl_in_asm;
 \f
 /* Other basic status info about current function.  */
 
-/* Nonzero means current function must be given a frame pointer.
-   Set in stmt.c if anything is allocated on the stack there.
-   Set in reload1.c if anything is allocated on the stack there.  */
-
-extern int frame_pointer_needed;
-
 /* Nonzero if subexpressions must be evaluated from left-to-right.  */
 extern int flag_evaluation_order;
 
Index: final.c
===================================================================
--- final.c	(revision 134716)
+++ final.c	(working copy)
@@ -178,12 +178,6 @@ CC_STATUS cc_status;
 CC_STATUS cc_prev_status;
 #endif
 
-/* Nonzero means current function must be given a frame pointer.
-   Initialized in function.c to 0.  Set only in reload1.c as per
-   the needs of the function.  */
-
-int frame_pointer_needed;
-
 /* Number of unmatched NOTE_INSN_BLOCK_BEG notes we have seen.  */
 
 static int block_depth;

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/6]: Ping: Merge from Stack Branch - New data in function.h
  2008-04-28 13:25 [PATCH 1/6]: Ping: Merge from Stack Branch - New data in function.h Ye, Joey
@ 2008-04-30 15:20 ` Jan Hubicka
  2008-04-30 15:57   ` H.J. Lu
  2008-04-30 23:14   ` Ye, Joey
  0 siblings, 2 replies; 4+ messages in thread
From: Jan Hubicka @ 2008-04-30 15:20 UTC (permalink / raw)
  To: Ye, Joey; +Cc: GCC Patches, Jan Hubicka, H.J. Lu, Guo, Xuepeng

> Jan, 
> 
> Can you please review for mainline?
> 
> http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01019.html
Hi,
I can not really approve the generic bits of the patch, so RTL
maintainer will have to take a look.
The new placement in rtl_data seems fine to me. Do you have any idea how
often the code gets false positives (ie that stack_realign_needed is set
and stack_realign_really is not).
If I understand correctly, then we end up wasting pointer for argument
pointer (pseudo) register, but we don't actually emit the alignment prologue,
right?

The earlier attempt by Bernd to solve same problem
http://gcc.gnu.org/ml/gcc-patches/2002-05/msg02282.html
got around via reload's ellimination, but as I understand its major
problem was fixing esi and thus failing in combination with asm
statements and stringops...

Honza
> 
> 2008-04-28  Joey Ye  <joey.ye@intel.com>
> 	    H.J. Lu  <hongjiu.lu@intel.com>
> 
> 	* function.h (function): Move  
> 	stack_realign_needed, preferred_stack_boundary
> 	to rtl_data.
> 	(rtl_data): Add new field drap_reg, stack_alignment_estimated,
> 	stack_realign_really, need_drap, save_param_ptr_reg,
> 	stack_realign_processed, stack_realign_finalized and 
> 	stack_realign_used, stack_realign_needed.
> 	(stack_realign_fp): New macro.
> 	(stack_realign_drap): Likewise.
> 
> 	* flags.h (frame_pointer_needed): Removed.
> 	* final.c (frame_pointer_needed): Likewise.
> 
> Index: function.h
> ===================================================================
> --- function.h	(revision 134716)
> +++ function.h	(working copy)
> @@ -306,6 +306,9 @@ struct rtl_data GTY(())
>       needed by inner routines.  */
>    rtx x_arg_pointer_save_area;
>  
> +  /* Dynamic Realign Argument Pointer used for realigning stack.  */
> +  rtx drap_reg;
> +
>    /* Offset to end of allocated area of stack frame.
>       If stack grows down, this is the address of the last stack slot
> allocated.
>       If stack grows up, this is the address for the next slot.  */
> @@ -323,12 +326,19 @@ struct rtl_data GTY(())
>    /* Current nesting level for temporaries.  */
>    int x_temp_slot_level;
>  
> -  /* The largest alignment of slot allocated on the stack.  */
> +  /* The largest alignment needed on the stack, including requirement
> +     for outgoing stack alignment.  */
>    unsigned int stack_alignment_needed;
>  
>    /* Preferred alignment of the end of stack frame.  */
>    unsigned int preferred_stack_boundary;
>  
> +  /* The largest alignment of slot allocated on the stack.  */
> +  unsigned int stack_alignment_used;
> +
> +  /* The estimated stack alignment.  */
> +  unsigned int stack_alignment_estimated;
> +
>    /* For reorg.  */
>  
>    /* If some insns can be deferred to the delay slots of the epilogue,
> the
> @@ -387,6 +397,38 @@ struct rtl_data GTY(())
>  
>    /* Nonzero if code to initialize arg_pointer_save_area has been
> emitted.  */
>    bool arg_pointer_save_area_init;
> +
> +  /* Nonzero if current function must be given a frame pointer.
> +     Set in global.c if anything is allocated on the stack there.  */
> +  bool need_frame_pointer;
> +
> +  /* Nonzero if need_frame_pointer has been set.  */
> +  bool need_frame_pointer_set;
> +
> +  /* Nonzero if, by estimation, current function stack needs
> realignment. */
> +  bool stack_realign_needed;
> +
> +  /* Nonzero if function stack realignment is really needed. This flag
> +     will be set after reload if by then criteria of stack realignment
> +     is still true. Its value may be contridition to
> stack_realign_needed
> +     since the latter was set before reload. This flag is more accurate
> +     than stack_realign_needed so prologue/epilogue should be generated
> +     according to both flags  */
> +  bool stack_realign_really;
> +
> +  /* Nonzero if function being compiled needs dynamic realigned
> +     argument pointer (drap) if stack needs realigning.  */
> +  bool need_drap;
> +
> +  /* Nonzero if current function needs to save/restore parameter
> +     pointer register in prolog, because it is a callee save reg.  */
> +  bool save_param_ptr_reg;
> +
> +  /* Nonzero if function stack realignment estimatoin is done.  */
> +  bool stack_realign_processed;
> +
> +  /* Nonzero if function stack realignment has been finalized.  */
> +  bool stack_realign_finalized;
>  };
>  
>  #define return_label (crtl->x_return_label)
> @@ -400,6 +442,9 @@ struct rtl_data GTY(())
>  #define avail_temp_slots (crtl->x_avail_temp_slots)
>  #define temp_slot_level (crtl->x_temp_slot_level)
>  #define nonlocal_goto_handler_labels
> (crtl->x_nonlocal_goto_handler_labels)
> +#define frame_pointer_needed (crtl->need_frame_pointer)
> +#define stack_realign_fp (crtl->stack_realign_needed &&
> !crtl->need_drap)
> +#define stack_realign_drap (crtl->stack_realign_needed &&
> crtl->need_drap)
>  
>  extern GTY(()) struct rtl_data x_rtl;
>  
> Index: flags.h
> ===================================================================
> --- flags.h	(revision 134716)
> +++ flags.h	(working copy)
> @@ -223,12 +223,6 @@ extern int flag_dump_rtl_in_asm;
>  \f
>  /* Other basic status info about current function.  */
>  
> -/* Nonzero means current function must be given a frame pointer.
> -   Set in stmt.c if anything is allocated on the stack there.
> -   Set in reload1.c if anything is allocated on the stack there.  */
> -
> -extern int frame_pointer_needed;
> -
>  /* Nonzero if subexpressions must be evaluated from left-to-right.  */
>  extern int flag_evaluation_order;
>  
> Index: final.c
> ===================================================================
> --- final.c	(revision 134716)
> +++ final.c	(working copy)
> @@ -178,12 +178,6 @@ CC_STATUS cc_status;
>  CC_STATUS cc_prev_status;
>  #endif
>  
> -/* Nonzero means current function must be given a frame pointer.
> -   Initialized in function.c to 0.  Set only in reload1.c as per
> -   the needs of the function.  */
> -
> -int frame_pointer_needed;
> -
>  /* Number of unmatched NOTE_INSN_BLOCK_BEG notes we have seen.  */
>  
>  static int block_depth;

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/6]: Ping: Merge from Stack Branch - New data in function.h
  2008-04-30 15:20 ` Jan Hubicka
@ 2008-04-30 15:57   ` H.J. Lu
  2008-04-30 23:14   ` Ye, Joey
  1 sibling, 0 replies; 4+ messages in thread
From: H.J. Lu @ 2008-04-30 15:57 UTC (permalink / raw)
  To: Jan Hubicka, bernd.schmidt
  Cc: Ye, Joey, GCC Patches, Jan Hubicka, Guo, Xuepeng

On Wed, Apr 30, 2008 at 4:34 AM, Jan Hubicka <jh@suse.cz> wrote:
> > Jan,
>  >
>  > Can you please review for mainline?
>  >
>  > http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01019.html
>  Hi,
>  I can not really approve the generic bits of the patch, so RTL
>  maintainer will have to take a look.
>  The new placement in rtl_data seems fine to me. Do you have any idea how
>  often the code gets false positives (ie that stack_realign_needed is set
>  and stack_realign_really is not).

Joey, do we have such data? We did run SPEC CPU 2000/2006 on ia32
with -mpreferred-stack-boundary=2. Without stack realignment, there
are significant performance drop since double is aligned at 4 byte on
stack. With stack realignment, double is aligned at 8 byte on stack and
there is no performance change, except for 183.equake in SPEC CPU
2000 where there is 5% performance drop due to double is aligned
at 4 byte since it is the first argument of a function and we can't align
it at 8 byte without breaking psABI.

>  If I understand correctly, then we end up wasting pointer for argument
>  pointer (pseudo) register, but we don't actually emit the alignment prologue,
>  right?

I think so. Joey will confirm it.

>
>  The earlier attempt by Bernd to solve same problem
>  http://gcc.gnu.org/ml/gcc-patches/2002-05/msg02282.html
>  got around via reload's ellimination, but as I understand its major
>  problem was fixing esi and thus failing in combination with asm
>  statements and stringops...
>

Bernd, can you take a look at our patches? We changed to

#define PREFERRED_STACK_BOUNDARY_DEFAULT 128
#define STACK_REALIGN_DEFAULT (TARGET_64BIT ? 0 : 1)

in gcc stack branch. That is we realign stack for every function on Linux/ia32,
in both gcc itself and all testcases. The only "make check" failure is one new
testcase on stack branch:

FAIL: gcc.target/i386/stackalign/local-1.c -mno-stackrealign
scan-assembler-not sub[^\\n]*sp

That is we adjust stack unnecessarily.  I believe our solution works for  asm
statements and stringops... If you can show us a testcase, we will make it
to work.

Thanks.

H.J.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH 1/6]: Ping: Merge from Stack Branch - New data in function.h
  2008-04-30 15:20 ` Jan Hubicka
  2008-04-30 15:57   ` H.J. Lu
@ 2008-04-30 23:14   ` Ye, Joey
  1 sibling, 0 replies; 4+ messages in thread
From: Ye, Joey @ 2008-04-30 23:14 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: GCC Patches, Jan Hubicka, H.J. Lu, Guo, Xuepeng

Jan,

> I can not really approve the generic bits of the patch, so RTL
> maintainer will have to take a look.
> The new placement in rtl_data seems fine to me. Do you have any idea
how
> often the code gets false positives (ie that stack_realign_needed is
set
> and stack_realign_really is not).
I don't have a statistic. My estimation is that it happens oftenly when
preferred_stack_boundary=4-byte and function access double or long long
type data.

> If I understand correctly, then we end up wasting pointer for argument
> pointer (pseudo) register, but we don't actually emit the alignment
prologue,
> right? 
Right. Even we won't waste any register when DRAP is not needed. In that
case the only difference will be local frame is addressed by stack
pointer.

Thanks - Joey

-----Original Message-----
From: Jan Hubicka [mailto:jh@suse.cz] 
Sent: Wednesday, April 30, 2008 7:35 PM
To: Ye, Joey
Cc: GCC Patches; Jan Hubicka; H.J. Lu; Guo, Xuepeng
Subject: Re: [PATCH 1/6]: Ping: Merge from Stack Branch - New data in
function.h

> Jan, 
> 
> Can you please review for mainline?
> 
> http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01019.html
Hi,
I can not really approve the generic bits of the patch, so RTL
maintainer will have to take a look.
The new placement in rtl_data seems fine to me. Do you have any idea how
often the code gets false positives (ie that stack_realign_needed is set
and stack_realign_really is not).
If I understand correctly, then we end up wasting pointer for argument
pointer (pseudo) register, but we don't actually emit the alignment
prologue,
right?

The earlier attempt by Bernd to solve same problem
http://gcc.gnu.org/ml/gcc-patches/2002-05/msg02282.html
got around via reload's ellimination, but as I understand its major
problem was fixing esi and thus failing in combination with asm
statements and stringops...

Honza
> 
> 2008-04-28  Joey Ye  <joey.ye@intel.com>
> 	    H.J. Lu  <hongjiu.lu@intel.com>
> 
> 	* function.h (function): Move  
> 	stack_realign_needed, preferred_stack_boundary
> 	to rtl_data.
> 	(rtl_data): Add new field drap_reg, stack_alignment_estimated,
> 	stack_realign_really, need_drap, save_param_ptr_reg,
> 	stack_realign_processed, stack_realign_finalized and 
> 	stack_realign_used, stack_realign_needed.
> 	(stack_realign_fp): New macro.
> 	(stack_realign_drap): Likewise.
> 
> 	* flags.h (frame_pointer_needed): Removed.
> 	* final.c (frame_pointer_needed): Likewise.
> 
> Index: function.h
> ===================================================================
> --- function.h	(revision 134716)
> +++ function.h	(working copy)
> @@ -306,6 +306,9 @@ struct rtl_data GTY(())
>       needed by inner routines.  */
>    rtx x_arg_pointer_save_area;
>  
> +  /* Dynamic Realign Argument Pointer used for realigning stack.  */
> +  rtx drap_reg;
> +
>    /* Offset to end of allocated area of stack frame.
>       If stack grows down, this is the address of the last stack slot
> allocated.
>       If stack grows up, this is the address for the next slot.  */
> @@ -323,12 +326,19 @@ struct rtl_data GTY(())
>    /* Current nesting level for temporaries.  */
>    int x_temp_slot_level;
>  
> -  /* The largest alignment of slot allocated on the stack.  */
> +  /* The largest alignment needed on the stack, including requirement
> +     for outgoing stack alignment.  */
>    unsigned int stack_alignment_needed;
>  
>    /* Preferred alignment of the end of stack frame.  */
>    unsigned int preferred_stack_boundary;
>  
> +  /* The largest alignment of slot allocated on the stack.  */
> +  unsigned int stack_alignment_used;
> +
> +  /* The estimated stack alignment.  */
> +  unsigned int stack_alignment_estimated;
> +
>    /* For reorg.  */
>  
>    /* If some insns can be deferred to the delay slots of the
epilogue,
> the
> @@ -387,6 +397,38 @@ struct rtl_data GTY(())
>  
>    /* Nonzero if code to initialize arg_pointer_save_area has been
> emitted.  */
>    bool arg_pointer_save_area_init;
> +
> +  /* Nonzero if current function must be given a frame pointer.
> +     Set in global.c if anything is allocated on the stack there.  */
> +  bool need_frame_pointer;
> +
> +  /* Nonzero if need_frame_pointer has been set.  */
> +  bool need_frame_pointer_set;
> +
> +  /* Nonzero if, by estimation, current function stack needs
> realignment. */
> +  bool stack_realign_needed;
> +
> +  /* Nonzero if function stack realignment is really needed. This
flag
> +     will be set after reload if by then criteria of stack
realignment
> +     is still true. Its value may be contridition to
> stack_realign_needed
> +     since the latter was set before reload. This flag is more
accurate
> +     than stack_realign_needed so prologue/epilogue should be
generated
> +     according to both flags  */
> +  bool stack_realign_really;
> +
> +  /* Nonzero if function being compiled needs dynamic realigned
> +     argument pointer (drap) if stack needs realigning.  */
> +  bool need_drap;
> +
> +  /* Nonzero if current function needs to save/restore parameter
> +     pointer register in prolog, because it is a callee save reg.  */
> +  bool save_param_ptr_reg;
> +
> +  /* Nonzero if function stack realignment estimatoin is done.  */
> +  bool stack_realign_processed;
> +
> +  /* Nonzero if function stack realignment has been finalized.  */
> +  bool stack_realign_finalized;
>  };
>  
>  #define return_label (crtl->x_return_label)
> @@ -400,6 +442,9 @@ struct rtl_data GTY(())
>  #define avail_temp_slots (crtl->x_avail_temp_slots)
>  #define temp_slot_level (crtl->x_temp_slot_level)
>  #define nonlocal_goto_handler_labels
> (crtl->x_nonlocal_goto_handler_labels)
> +#define frame_pointer_needed (crtl->need_frame_pointer)
> +#define stack_realign_fp (crtl->stack_realign_needed &&
> !crtl->need_drap)
> +#define stack_realign_drap (crtl->stack_realign_needed &&
> crtl->need_drap)
>  
>  extern GTY(()) struct rtl_data x_rtl;
>  
> Index: flags.h
> ===================================================================
> --- flags.h	(revision 134716)
> +++ flags.h	(working copy)
> @@ -223,12 +223,6 @@ extern int flag_dump_rtl_in_asm;
>  \f
>  /* Other basic status info about current function.  */
>  
> -/* Nonzero means current function must be given a frame pointer.
> -   Set in stmt.c if anything is allocated on the stack there.
> -   Set in reload1.c if anything is allocated on the stack there.  */
> -
> -extern int frame_pointer_needed;
> -
>  /* Nonzero if subexpressions must be evaluated from left-to-right.
*/
>  extern int flag_evaluation_order;
>  
> Index: final.c
> ===================================================================
> --- final.c	(revision 134716)
> +++ final.c	(working copy)
> @@ -178,12 +178,6 @@ CC_STATUS cc_status;
>  CC_STATUS cc_prev_status;
>  #endif
>  
> -/* Nonzero means current function must be given a frame pointer.
> -   Initialized in function.c to 0.  Set only in reload1.c as per
> -   the needs of the function.  */
> -
> -int frame_pointer_needed;
> -
>  /* Number of unmatched NOTE_INSN_BLOCK_BEG notes we have seen.  */
>  
>  static int block_depth;

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2008-04-30 19:17 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-04-28 13:25 [PATCH 1/6]: Ping: Merge from Stack Branch - New data in function.h Ye, Joey
2008-04-30 15:20 ` Jan Hubicka
2008-04-30 15:57   ` H.J. Lu
2008-04-30 23:14   ` Ye, Joey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).