public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Bernd Edlinger <bernd.edlinger@hotmail.de>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Cc: Uros Bizjak <ubizjak@gmail.com>
Subject: [PATCH] [i386] Recompute the frame layout less often
Date: Sun, 14 May 2017 09:11:00 -0000	[thread overview]
Message-ID: <HE1PR0701MB216949510BB0F8FA80A60952E4E00@HE1PR0701MB2169.eurprd07.prod.outlook.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 452 bytes --]

Hi,


this patch uses the new TARGET_COMPUTE_FRAME_LAYOUT hook in the i386
backend to avoid re-computing the frame layout when not really
necessary.

It simplifies the logic in ix86_compute_frame_layout by removing
the use_fast_prologue_epilogue_nregs, which is no longer necessary,
because the frame layout can no longer change spontaneously.


Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
Is it OK for trunk?


Thanks
Bernd.

[-- Attachment #2: changelog-frame-layout-i386.txt --]
[-- Type: text/plain, Size: 804 bytes --]

2017-05-14  Bernd Edlinger  <bernd.edlinger@hotmail.de>

	* config/i386/i386.c (ix86_can_use_return_insn_p): Use the ix86_frame
	data structure directly.
	(ix86_initial_elimination_offset): Likewise.
	(ix86_expand_prologue): Likewise.
	(ix86_expand_epilogue): Likewise.
	(ix86_expand_split_stack_prologue): Likewise.
	(ix86_compute_frame_layout): Remove frame parameter ...
	(TARGET_COMPUTE_FRAME_LAYOUT): ... and export it as a target hook.
	(ix86_finalize_stack_realign_flags): Call ix86_compute_frame_layout
	only if necessary.
	(ix86_init_machine_status): Don't set use_fast_prologue_epilogue_nregs.
	(ix86_frame): Move from here ...
	* config/i386/i386.h (ix86_frame): ... to here.
	(machine_function): Remove use_fast_prologue_epilogue_nregs, cache the
	complete ix86_frame data structure instead.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: patch-frame-layout-i386.diff --]
[-- Type: text/x-patch; name="patch-frame-layout-i386.diff", Size: 9223 bytes --]

Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 248005)
+++ gcc/config/i386/i386.c	(working copy)
@@ -2441,53 +2441,6 @@ struct GTY(()) stack_local_entry {
   struct stack_local_entry *next;
 };
 
-/* Structure describing stack frame layout.
-   Stack grows downward:
-
-   [arguments]
-					<- ARG_POINTER
-   saved pc
-
-   saved static chain			if ix86_static_chain_on_stack
-
-   saved frame pointer			if frame_pointer_needed
-					<- HARD_FRAME_POINTER
-   [saved regs]
-					<- regs_save_offset
-   [padding0]
-
-   [saved SSE regs]
-					<- sse_regs_save_offset
-   [padding1]          |
-		       |		<- FRAME_POINTER
-   [va_arg registers]  |
-		       |
-   [frame]	       |
-		       |
-   [padding2]	       | = to_allocate
-					<- STACK_POINTER
-  */
-struct ix86_frame
-{
-  int nsseregs;
-  int nregs;
-  int va_arg_size;
-  int red_zone_size;
-  int outgoing_arguments_size;
-
-  /* The offsets relative to ARG_POINTER.  */
-  HOST_WIDE_INT frame_pointer_offset;
-  HOST_WIDE_INT hard_frame_pointer_offset;
-  HOST_WIDE_INT stack_pointer_offset;
-  HOST_WIDE_INT hfp_save_offset;
-  HOST_WIDE_INT reg_save_offset;
-  HOST_WIDE_INT sse_reg_save_offset;
-
-  /* When save_regs_using_mov is set, emit prologue using
-     move instead of push instructions.  */
-  bool save_regs_using_mov;
-};
-
 /* Which cpu are we scheduling for.  */
 enum attr_cpu ix86_schedule;
 
@@ -2579,7 +2532,7 @@ static unsigned int ix86_function_arg_boundary (ma
 						const_tree);
 static rtx ix86_static_chain (const_tree, bool);
 static int ix86_function_regparm (const_tree, const_tree);
-static void ix86_compute_frame_layout (struct ix86_frame *);
+static void ix86_compute_frame_layout (void);
 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
 						 rtx, rtx, int);
 static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT);
@@ -12007,7 +11960,7 @@ ix86_can_use_return_insn_p (void)
   if (crtl->args.pops_args && crtl->args.size >= 32768)
     return 0;
 
-  ix86_compute_frame_layout (&frame);
+  frame = cfun->machine->frame;
   return (frame.stack_pointer_offset == UNITS_PER_WORD
 	  && (frame.nregs + frame.nsseregs) == 0);
 }
@@ -12493,8 +12446,7 @@ ix86_can_eliminate (const int from, const int to)
 HOST_WIDE_INT
 ix86_initial_elimination_offset (int from, int to)
 {
-  struct ix86_frame frame;
-  ix86_compute_frame_layout (&frame);
+  struct ix86_frame frame = cfun->machine->frame;
 
   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
     return frame.hard_frame_pointer_offset;
@@ -12533,8 +12485,9 @@ ix86_builtin_setjmp_frame_value (void)
 /* Fill structure ix86_frame about frame of currently computed function.  */
 
 static void
-ix86_compute_frame_layout (struct ix86_frame *frame)
+ix86_compute_frame_layout (void)
 {
+  struct ix86_frame *frame = &cfun->machine->frame;
   unsigned HOST_WIDE_INT stack_alignment_needed;
   HOST_WIDE_INT offset;
   unsigned HOST_WIDE_INT preferred_alignment;
@@ -12570,19 +12523,11 @@ static void
      in doing anything except PUSHs.  */
   if (TARGET_SEH)
     cfun->machine->use_fast_prologue_epilogue = false;
-
-  /* During reload iteration the amount of registers saved can change.
-     Recompute the value as needed.  Do not recompute when amount of registers
-     didn't change as reload does multiple calls to the function and does not
-     expect the decision to change within single iteration.  */
-  else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
-           && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
+  else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
     {
       int count = frame->nregs;
       struct cgraph_node *node = cgraph_node::get (current_function_decl);
 
-      cfun->machine->use_fast_prologue_epilogue_nregs = count;
-
       /* The fast prologue uses move instead of push to save registers.  This
          is significantly longer, but also executes faster as modern hardware
          can execute the moves in parallel, but can't do that for push/pop.
@@ -13701,6 +13646,7 @@ ix86_finalize_stack_realign_flags (void)
        < (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
 	  ? crtl->max_used_stack_slot_alignment
 	  : crtl->stack_alignment_needed));
+  bool recompute_frame_layout_p = false;
 
   if (crtl->stack_realign_finalized)
     {
@@ -13750,8 +13696,12 @@ ix86_finalize_stack_realign_flags (void)
 		&& requires_stack_frame_p (insn, prologue_used,
 					   set_up_by_prologue))
 	      {
+		if (crtl->stack_realign_needed != stack_realign)
+		  recompute_frame_layout_p = true;
 		crtl->stack_realign_needed = stack_realign;
 		crtl->stack_realign_finalized = true;
+		if (recompute_frame_layout_p)
+		  ix86_compute_frame_layout ();
 		return;
 	      }
 	}
@@ -13782,10 +13732,15 @@ ix86_finalize_stack_realign_flags (void)
       df_scan_blocks ();
       df_compute_regs_ever_live (true);
       df_analyze ();
+      recompute_frame_layout_p = true;
     }
 
+  if (crtl->stack_realign_needed != stack_realign)
+    recompute_frame_layout_p = true;
   crtl->stack_realign_needed = stack_realign;
   crtl->stack_realign_finalized = true;
+  if (recompute_frame_layout_p)
+    ix86_compute_frame_layout ();
 }
 
 /* Delete SET_GOT right after entry block if it is allocated to reg.  */
@@ -13841,7 +13796,7 @@ ix86_expand_prologue (void)
   m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
   m->fs.sp_valid = true;
 
-  ix86_compute_frame_layout (&frame);
+  frame = m->frame;
 
   if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
     {
@@ -14509,7 +14464,7 @@ ix86_expand_epilogue (int style)
   bool using_drap;
 
   ix86_finalize_stack_realign_flags ();
-  ix86_compute_frame_layout (&frame);
+  frame = m->frame;
 
   m->fs.sp_valid = (!frame_pointer_needed
 		    || (crtl->sp_is_unchanging
@@ -15019,7 +14974,7 @@ ix86_expand_split_stack_prologue (void)
   gcc_assert (flag_split_stack && reload_completed);
 
   ix86_finalize_stack_realign_flags ();
-  ix86_compute_frame_layout (&frame);
+  frame = cfun->machine->frame;
   allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
 
   /* This is the label we will branch to if we have enough stack
@@ -28715,7 +28670,6 @@ ix86_init_machine_status (void)
   struct machine_function *f;
 
   f = ggc_cleared_alloc<machine_function> ();
-  f->use_fast_prologue_epilogue_nregs = -1;
   f->call_abi = ix86_abi;
 
   return f;
@@ -52040,6 +51994,9 @@ ix86_run_selftests (void)
 #undef TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
 
+#undef TARGET_COMPUTE_FRAME_LAYOUT
+#define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
+
 #undef TARGET_FRAME_POINTER_REQUIRED
 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
 
Index: gcc/config/i386/i386.h
===================================================================
--- gcc/config/i386/i386.h	(revision 248005)
+++ gcc/config/i386/i386.h	(working copy)
@@ -2449,6 +2449,53 @@ enum avx_u128_state
 /* Machine specific frame tracking during prologue/epilogue generation.  */
 
 #ifndef USED_FOR_TARGET
+/* Structure describing stack frame layout.
+   Stack grows downward:
+
+   [arguments]
+					<- ARG_POINTER
+   saved pc
+
+   saved static chain			if ix86_static_chain_on_stack
+
+   saved frame pointer			if frame_pointer_needed
+					<- HARD_FRAME_POINTER
+   [saved regs]
+					<- regs_save_offset
+   [padding0]
+
+   [saved SSE regs]
+					<- sse_regs_save_offset
+   [padding1]          |
+		       |		<- FRAME_POINTER
+   [va_arg registers]  |
+		       |
+   [frame]	       |
+		       |
+   [padding2]	       | = to_allocate
+					<- STACK_POINTER
+  */
+struct GTY(()) ix86_frame
+{
+  int nsseregs;
+  int nregs;
+  int va_arg_size;
+  int red_zone_size;
+  int outgoing_arguments_size;
+
+  /* The offsets relative to ARG_POINTER.  */
+  HOST_WIDE_INT frame_pointer_offset;
+  HOST_WIDE_INT hard_frame_pointer_offset;
+  HOST_WIDE_INT stack_pointer_offset;
+  HOST_WIDE_INT hfp_save_offset;
+  HOST_WIDE_INT reg_save_offset;
+  HOST_WIDE_INT sse_reg_save_offset;
+
+  /* When save_regs_using_mov is set, emit prologue using
+     move instead of push instructions.  */
+  bool save_regs_using_mov;
+};
+
 struct GTY(()) machine_frame_state
 {
   /* This pair tracks the currently active CFA as reg+offset.  When reg
@@ -2507,9 +2554,8 @@ struct GTY(()) machine_function {
   int varargs_fpr_size;
   int optimize_mode_switching[MAX_386_ENTITIES];
 
-  /* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE
-     has been computed for.  */
-  int use_fast_prologue_epilogue_nregs;
+  /* Cached initial frame layout for the current function.  */
+  struct ix86_frame frame;
 
   /* For -fsplit-stack support: A stack local which holds a pointer to
      the stack arguments for a function with a variable number of

             reply	other threads:[~2017-05-14  7:42 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-14  9:11 Bernd Edlinger [this message]
2017-05-14  9:58 ` Daniel Santos
2017-05-14 10:27   ` Uros Bizjak
2017-05-14 18:11     ` Bernd Edlinger
2017-05-14 20:37       ` Daniel Santos
2017-05-15  2:23       ` Daniel Santos
2017-05-15 20:41         ` Bernd Edlinger
2017-05-15 23:52           ` Daniel Santos
2017-05-16  5:42             ` Daniel Santos
2017-05-16  8:52               ` Bernd Edlinger
2017-05-16 15:11                 ` Daniel Santos
2017-05-16 17:38               ` Ian Lance Taylor via gcc-patches
2017-05-16 20:10                 ` Bernd Edlinger
2017-05-16 21:05                   ` Bernd Edlinger
2017-05-17  2:12                   ` Daniel Santos
2017-05-17 17:43                     ` Bernd Edlinger
2017-05-18  7:05                       ` Daniel Santos
2017-05-18 13:48                     ` Bernd Edlinger
2017-05-19  3:13                       ` Daniel Santos
2017-05-22 18:32                         ` Bernd Edlinger
2017-05-23 14:34                           ` Bernd Edlinger
2017-05-24  6:49                             ` Daniel Santos
2017-06-01 16:06                             ` [PING][PATCH] " Bernd Edlinger
2017-06-01 18:18                             ` [PATCH] " Uros Bizjak
2017-05-23 23:01                           ` Daniel Santos
2017-05-16 21:35                 ` Daniel Santos
2017-05-17 18:41         ` Bernd Edlinger
2017-05-18  7:14           ` Daniel Santos
2017-05-19  5:12             ` Daniel Santos

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=HE1PR0701MB216949510BB0F8FA80A60952E4E00@HE1PR0701MB2169.eurprd07.prod.outlook.com \
    --to=bernd.edlinger@hotmail.de \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=ubizjak@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).