public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 2/6] Stack align: Collect alignment info
@ 2008-04-17 10:24 Ye, Joey
  0 siblings, 0 replies; only message in thread
From: Ye, Joey @ 2008-04-17 10:24 UTC (permalink / raw)
  To: GCC Patches; +Cc: Lu, Hongjiu, Guo, Xuepeng, Ye, Joey

[-- Attachment #1: Type: text/plain, Size: 2444 bytes --]

Related mail thread: 
http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01099.html
http://gcc.gnu.org/ml/gcc-patches/2008-04/msg01100.html

2008-04-17  Joey Ye  <joey.ye@intel.com>
	    H.J. Lu  <hongjiu.lu@intel.com>

	* builtins.c (expand_builtin_setjmp_receiver): Replace
	virtual_incoming_args_rtx with
	crtl->args.internal_arg_pointer.
	(expand_builtin_apply_args_1): Likewise.
	(expand_builtin_longjmp): DRAP will be needed if some builtins
are
	called.
	(expand_builtin_apply): Likewise.

	* calls.c (expand_call): Don't calculate preferred stack
	boundary according to incoming stack boundary. Replace 
	virtual_incoming_args_rtx with
	crtl->args.internal_arg_pointer.
	(emit_call_1): DRAP will be needed if return pops.

	* emit-rtl.c (gen_reg_rtx): Estimate stack alignment when
generating
	virtual registers.

	* cfgexpand.c (get_decl_align_unit): Estimate stack variable
	alignment and store to stack_alignment_estimated and
	stack_alignment_used.
	(expand_one_var): Likewise.
	(gate_handle_drap): Gate new pass pass_handle_drap.
	(handle_drap): New function.
	(tree_expand_cfg): Calls handle_drap at end.

	* defaults.h (MAX_VECTORIZE_STACK_ALIGNMENT): New.

	* function.c (assign_stack_local_1): Estimate stack variable 
	alignment and store to stack_alignment_estimated.
	(instantiate_new_reg): Instantiate virtual incoming args rtx to
	vDRAP if stack realignment and DRAP is needed.
	(assign_parms): Collect parameter/return type alignment and 
	contribute to stack_alignment_estimated.
	(locate_and_pad_parm): Likewise.
	(allocate_struct_function): Init stack_alignment_estimated and
	stack_alignment_used.
	(get_arg_pointer_save_area): Replace virtual_incoming_args_rtx
	with crtl->args.internal_arg_pointer.

	* global.c (compute_regsets): Set need_fp cannot_elim
	wrt stack_realign_needed.

	* stmt.c (expand_nl_goto_receiver): Replace 
	virtual_incoming_args_rtx with
	crtl->args.internal_arg_pointer.

	* tree-vectorizer.c (vect_can_force_dr_alignment_p): Return
	true if alignment of variable on stack is less than or
	equal to MAX_VECTORIZE_STACK_ALIGNMENT.

	* reload1.c (set_label_offsets): Assert that frame pointer must
be
	elimiated to stack pointer in case stack realignment is
estimated
	to happen without DRAP.
	(elimination_effects): Likewise.
	(eliminate_regs_in_insn): Likewise.
	(mark_not_eliminable): Likewise.
	(update_eliminables): Likewise.

[-- Attachment #2: stack-align-collect-0417.patch --]
[-- Type: application/octet-stream, Size: 25111 bytes --]

Index: defaults.h
===================================================================
--- defaults.h	(.../trunk/gcc)	(revision 134203)
+++ defaults.h	(.../branches/stack/gcc)	(revision 134368)
@@ -940,4 +940,8 @@ along with GCC; see the file COPYING3.  
 #define OUTGOING_REG_PARM_STACK_SPACE 0
 #endif
 
+#ifndef MAX_VECTORIZE_STACK_ALIGNMENT
+#define MAX_VECTORIZE_STACK_ALIGNMENT 0
+#endif
+
 #endif  /* ! GCC_DEFAULTS_H */
Index: builtins.c
===================================================================
--- builtins.c	(.../trunk/gcc)	(revision 134203)
+++ builtins.c	(.../branches/stack/gcc)	(revision 134368)
@@ -740,7 +740,7 @@ expand_builtin_setjmp_receiver (rtx rece
 	{
 	  /* Now restore our arg pointer from the address at which it
 	     was saved in our stack frame.  */
-	  emit_move_insn (virtual_incoming_args_rtx,
+	  emit_move_insn (crtl->args.internal_arg_pointer,
 			  copy_to_reg (get_arg_pointer_save_area ()));
 	}
     }
@@ -775,6 +775,11 @@ expand_builtin_longjmp (rtx buf_addr, rt
   rtx fp, lab, stack, insn, last;
   enum machine_mode sa_mode = STACK_SAVEAREA_MODE (SAVE_NONLOCAL);
 
+  /* DRAP is needed for stack realign if longjmp is expanded to current 
+     function  */
+  if (MAX_VECTORIZE_STACK_ALIGNMENT)
+    cfun->need_drap = true;
+
   if (setjmp_alias_set == -1)
     setjmp_alias_set = new_alias_set ();
 
@@ -1345,7 +1350,7 @@ expand_builtin_apply_args_1 (void)
       }
 
   /* Save the arg pointer to the block.  */
-  tem = copy_to_reg (virtual_incoming_args_rtx);
+  tem = copy_to_reg (crtl->args.internal_arg_pointer);
 #ifdef STACK_GROWS_DOWNWARD
   /* We need the pointer as the caller actually passed them to us, not
      as we might have pretended they were passed.  Make sure it's a valid
@@ -1453,6 +1458,14 @@ expand_builtin_apply (rtx function, rtx 
   /* Allocate a block of memory onto the stack and copy the memory
      arguments to the outgoing arguments address.  */
   allocate_dynamic_stack_space (argsize, 0, BITS_PER_UNIT);
+
+  /* Set DRAP flag to true, even though allocate_dynamic_stack_space
+     may have already set current_function_calls_alloca to true.
+     current_function_calls_alloca won't be set if argsize is zero,
+     so we have to guarantee need_drap is true here.  */
+  if (MAX_VECTORIZE_STACK_ALIGNMENT)
+    cfun->need_drap = true;
+
   dest = virtual_outgoing_args_rtx;
 #ifndef STACK_GROWS_DOWNWARD
   if (GET_CODE (argsize) == CONST_INT)
Index: global.c
===================================================================
--- global.c	(.../trunk/gcc)	(revision 134203)
+++ global.c	(.../branches/stack/gcc)	(revision 134368)
@@ -247,10 +247,17 @@ compute_regsets (HARD_REG_SET *elim_set,
   static const struct {const int from, to; } eliminables[] = ELIMINABLE_REGS;
   size_t i;
 #endif
+
+  /* FIXME: If EXIT_IGNORE_STACK is set, we will not save and restore
+     sp for alloca.  So we can't eliminate the frame pointer in that
+     case.  At some point, we should improve this by emitting the
+     sp-adjusting insns for this case.  */
   int need_fp
     = (! flag_omit_frame_pointer
        || (current_function_calls_alloca && EXIT_IGNORE_STACK)
-       || FRAME_POINTER_REQUIRED);
+       || FRAME_POINTER_REQUIRED
+       || current_function_accesses_prior_frames
+       || cfun->stack_realign_needed);
 
   max_regno = max_reg_num ();
   compact_blocks ();
@@ -271,7 +281,10 @@ compute_regsets (HARD_REG_SET *elim_set,
     {
       bool cannot_elim
 	= (! CAN_ELIMINATE (eliminables[i].from, eliminables[i].to)
-	   || (eliminables[i].to == STACK_POINTER_REGNUM && need_fp));
+	   || (eliminables[i].to == STACK_POINTER_REGNUM
+	       && need_fp 
+	       && (! MAX_VECTORIZE_STACK_ALIGNMENT
+		   || ! stack_realign_fp)));
 
       if (!regs_asm_clobbered[eliminables[i].from])
 	{
Index: function.c
===================================================================
--- function.c	(.../trunk/gcc)	(revision 134203)
+++ function.c	(.../branches/stack/gcc)	(revision 134368)
@@ -342,17 +342,19 @@ assign_stack_local (enum machine_mode mo
 {
   rtx x, addr;
   int bigend_correction = 0;
-  unsigned int alignment;
+  unsigned int alignment, mode_alignment, alignment_in_bits;
   int frame_off, frame_alignment, frame_phase;
 
+  if (mode == BLKmode)
+    mode_alignment = BIGGEST_ALIGNMENT;
+  else
+    mode_alignment = GET_MODE_ALIGNMENT (mode);
+
   if (align == 0)
     {
       tree type;
 
-      if (mode == BLKmode)
-	alignment = BIGGEST_ALIGNMENT;
-      else
-	alignment = GET_MODE_ALIGNMENT (mode);
+      alignment = mode_alignment;
 
       /* Allow the target to (possibly) increase the alignment of this
 	 stack slot.  */
@@ -372,15 +374,48 @@ assign_stack_local (enum machine_mode mo
   else
     alignment = align / BITS_PER_UNIT;
 
+  alignment_in_bits = alignment * BITS_PER_UNIT;
+
   if (FRAME_GROWS_DOWNWARD)
     frame_offset -= size;
 
-  /* Ignore alignment we can't do with expected alignment of the boundary.  */
-  if (alignment * BITS_PER_UNIT > PREFERRED_STACK_BOUNDARY)
-    alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
-
-  if (cfun->stack_alignment_needed < alignment * BITS_PER_UNIT)
-    cfun->stack_alignment_needed = alignment * BITS_PER_UNIT;
+  if (MAX_VECTORIZE_STACK_ALIGNMENT)
+    {
+      if (cfun->stack_alignment_estimated < alignment_in_bits)
+	{
+          if (!cfun->stack_realign_processed)
+            cfun->stack_alignment_estimated = alignment_in_bits;
+          else
+	    {
+	      gcc_assert (!cfun->stack_realign_finalized);
+	      if (!cfun->stack_realign_needed)
+		{
+		  /* It is OK to reduce the alignment as long as the
+		     requested size is 0 or the estimated stack
+		     alignment >= mode alignment.  */
+		  gcc_assert (size == 0
+			      || (cfun->stack_alignment_estimated
+				  >= mode_alignment));
+		  alignment_in_bits = cfun->stack_alignment_estimated;
+		  alignment = alignment_in_bits / BITS_PER_UNIT;
+		}
+	    }
+	}
+    }
+  else
+    {
+      /* Ignore alignment we can't do with expected alignment of the
+	 boundary.  */
+      if (alignment * BITS_PER_UNIT > PREFERRED_STACK_BOUNDARY)
+	{
+	  alignment_in_bits = PREFERRED_STACK_BOUNDARY;
+	  alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
+	}
+    }
+  if (cfun->stack_alignment_needed < alignment_in_bits)
+    cfun->stack_alignment_needed = alignment_in_bits;
+  if (cfun->stack_alignment_used < cfun->stack_alignment_needed)
+    cfun->stack_alignment_used = cfun->stack_alignment_needed;
 
   /* Calculate how many bytes the start of local variables is off from
      stack alignment.  */
@@ -433,6 +468,7 @@ assign_stack_local (enum machine_mode mo
     frame_offset += size;
 
   x = gen_rtx_MEM (mode, addr);
+  set_mem_align (x, alignment_in_bits);
   MEM_NOTRAP_P (x) = 1;
 
   stack_slot_list
@@ -1169,7 +1205,17 @@ instantiate_new_reg (rtx x, HOST_WIDE_IN
   HOST_WIDE_INT offset;
 
   if (x == virtual_incoming_args_rtx)
-    new = arg_pointer_rtx, offset = in_arg_offset;
+    {
+      /* Replace vitural_incoming_args_rtx to internal arg pointer here */
+      if (crtl->args.internal_arg_pointer != virtual_incoming_args_rtx)
+        {
+          gcc_assert (stack_realign_drap);
+          new = crtl->args.internal_arg_pointer;
+          offset = 0;
+        }
+      else
+        new = arg_pointer_rtx, offset = in_arg_offset;
+    }
   else if (x == virtual_stack_vars_rtx)
     new = frame_pointer_rtx, offset = var_offset;
   else if (x == virtual_stack_dynamic_rtx)
@@ -2968,6 +3014,20 @@ assign_parms (tree fndecl)
 	  continue;
 	}
 
+      /* Estimate stack alignment from parameter alignment */
+      if (MAX_VECTORIZE_STACK_ALIGNMENT)
+        {
+          unsigned int align = FUNCTION_ARG_BOUNDARY (data.promoted_mode,
+						      data.passed_type);
+	  if (TYPE_ALIGN (data.nominal_type) > align)
+	    align = TYPE_ALIGN (data.passed_type);
+	  if (cfun->stack_alignment_estimated < align)
+	    {
+	      gcc_assert (!cfun->stack_realign_processed);
+	      cfun->stack_alignment_estimated = align;
+	    }
+	}
+	
       if (current_function_stdarg && !TREE_CHAIN (parm))
 	assign_parms_setup_varargs (&all, &data, false);
 
@@ -3005,6 +3065,28 @@ assign_parms (tree fndecl)
      now that all parameters have been copied out of hard registers.  */
   emit_insn (all.first_conversion_insn);
 
+  /* Estimate reload stack alignment from scalar return mode.  */
+  if (MAX_VECTORIZE_STACK_ALIGNMENT)
+    {
+      if (DECL_RESULT (fndecl))
+	{
+	  tree type = TREE_TYPE (DECL_RESULT (fndecl));
+	  enum machine_mode mode = TYPE_MODE (type);
+
+	  if (mode != BLKmode
+	      && mode != VOIDmode
+	      && !AGGREGATE_TYPE_P (type))
+	    {
+	      unsigned int align = GET_MODE_ALIGNMENT (mode);
+	      if (cfun->stack_alignment_estimated < align)
+		{
+		  gcc_assert (!cfun->stack_realign_processed);
+		  cfun->stack_alignment_estimated = align;
+		}
+	    }
+	} 
+    }
+
   /* If we are receiving a struct value address as the first argument, set up
      the RTL for the function result. As this might require code to convert
      the transmitted address to Pmode, we do this here to ensure that possible
@@ -3282,12 +3364,34 @@ locate_and_pad_parm (enum machine_mode p
   locate->where_pad = where_pad;
   locate->boundary = boundary;
 
-  /* Remember if the outgoing parameter requires extra alignment on the
-     calling function side.  */
-  if (boundary > PREFERRED_STACK_BOUNDARY)
-    boundary = PREFERRED_STACK_BOUNDARY;
+  if (MAX_VECTORIZE_STACK_ALIGNMENT)
+    {
+      /* stack_alignment_estimated can't change after stack has been
+	 realigned.  */
+      if (cfun->stack_alignment_estimated < boundary)
+        {
+          if (!cfun->stack_realign_processed)
+	    cfun->stack_alignment_estimated = boundary;
+	  else
+	    {
+	      gcc_assert (!cfun->stack_realign_finalized
+			  && cfun->stack_realign_needed);
+	    }
+	}
+    }
+  else
+    {
+      /* Remember if the outgoing parameter requires extra alignment on
+         the calling function side.  */
+      if (boundary > PREFERRED_STACK_BOUNDARY)
+        boundary = PREFERRED_STACK_BOUNDARY;
+    }
   if (cfun->stack_alignment_needed < boundary)
     cfun->stack_alignment_needed = boundary;
+  if (cfun->stack_alignment_used < cfun->stack_alignment_needed)
+    cfun->stack_alignment_used = cfun->stack_alignment_needed;
+  if (cfun->preferred_stack_boundary < boundary)
+    cfun->preferred_stack_boundary = boundary;
 
 #ifdef ARGS_GROW_DOWNWARD
   locate->slot_offset.constant = -initial_offset_ptr->constant;
@@ -3843,6 +3947,8 @@ allocate_struct_function (tree fndecl, b
   cfun = ggc_alloc_cleared (sizeof (struct function));
 
   cfun->stack_alignment_needed = STACK_BOUNDARY;
+  cfun->stack_alignment_used = STACK_BOUNDARY;
+  cfun->stack_alignment_estimated = STACK_BOUNDARY;
   cfun->preferred_stack_boundary = STACK_BOUNDARY;
 
   current_function_funcdef_no = get_next_funcdef_no ();
@@ -4622,7 +4728,8 @@ get_arg_pointer_save_area (void)
 	 generated stack slot may not be a valid memory address, so we
 	 have to check it and fix it if necessary.  */
       start_sequence ();
-      emit_move_insn (validize_mem (ret), virtual_incoming_args_rtx);
+      emit_move_insn (validize_mem (ret),
+                      crtl->args.internal_arg_pointer);
       seq = get_insns ();
       end_sequence ();
 
Index: tree-vectorizer.c
===================================================================
--- tree-vectorizer.c	(.../trunk/gcc)	(revision 134203)
+++ tree-vectorizer.c	(.../branches/stack/gcc)	(revision 134368)
@@ -1786,9 +1786,9 @@ vect_can_force_dr_alignment_p (const_tre
 
   if (TREE_STATIC (decl))
     return (alignment <= MAX_OFILE_ALIGNMENT);
+  else if (MAX_VECTORIZE_STACK_ALIGNMENT)
+    return (alignment <= MAX_VECTORIZE_STACK_ALIGNMENT);
   else
-    /* This used to be PREFERRED_STACK_BOUNDARY, however, that is not 100%
-       correct until someone implements forced stack alignment.  */
     return (alignment <= STACK_BOUNDARY); 
 }
 
Index: calls.c
===================================================================
--- calls.c	(.../trunk/gcc)	(revision 134203)
+++ calls.c	(.../branches/stack/gcc)	(revision 134368)
@@ -419,6 +419,10 @@ emit_call_1 (rtx funexp, tree fntree, tr
       rounded_stack_size -= n_popped;
       rounded_stack_size_rtx = GEN_INT (rounded_stack_size);
       stack_pointer_delta -= n_popped;
+
+      /* If popup is needed, stack realign must use DRAP  */
+      if (MAX_VECTORIZE_STACK_ALIGNMENT)
+        cfun->need_drap = true;
     }
 
   if (!ACCUMULATE_OUTGOING_ARGS)
@@ -2091,7 +2095,10 @@ expand_call (tree exp, rtx target, int i
 
   /* Figure out the amount to which the stack should be aligned.  */
   preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
-  if (fndecl)
+
+  /* With automatic stack realignment, we align stack in prologue when
+     needed and there is no need to update preferred_stack_boundary.  */
+  if (!MAX_VECTORIZE_STACK_ALIGNMENT && fndecl)
     {
       struct cgraph_rtl_info *i = cgraph_rtl_info (fndecl);
       if (i && i->preferred_incoming_stack_boundary)
@@ -2392,7 +2399,7 @@ expand_call (tree exp, rtx target, int i
 	 incoming argument block.  */
       if (pass == 0)
 	{
-	  argblock = virtual_incoming_args_rtx;
+	  argblock = crtl->args.internal_arg_pointer;
 	  argblock
 #ifdef STACK_GROWS_DOWNWARD
 	    = plus_constant (argblock, crtl->args.pretend_args_size);
Index: emit-rtl.c
===================================================================
--- emit-rtl.c	(.../trunk/gcc)	(revision 134203)
+++ emit-rtl.c	(.../branches/stack/gcc)	(revision 134368)
@@ -864,9 +864,18 @@ rtx
 gen_reg_rtx (enum machine_mode mode)
 {
   rtx val;
+  unsigned int align = GET_MODE_ALIGNMENT (mode);
 
   gcc_assert (can_create_pseudo_p ());
 
+  /* If a virtual register with bigger mode alignment is generated,
+     increase stack alignment estimation because it might be spilled
+     to stack later.  */
+  if (MAX_VECTORIZE_STACK_ALIGNMENT 
+      && cfun->stack_alignment_estimated < align
+      && !cfun->stack_realign_processed)
+    cfun->stack_alignment_estimated = align;
+
   if (generating_concat_p
       && (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
 	  || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT))
Index: cfgexpand.c
===================================================================
--- cfgexpand.c	(.../trunk/gcc)	(revision 134203)
+++ cfgexpand.c	(.../branches/stack/gcc)	(revision 134368)
@@ -161,10 +161,27 @@ get_decl_align_unit (tree decl)
 
   align = DECL_ALIGN (decl);
   align = LOCAL_ALIGNMENT (TREE_TYPE (decl), align);
-  if (align > PREFERRED_STACK_BOUNDARY)
-    align = PREFERRED_STACK_BOUNDARY;
+
+  if (MAX_VECTORIZE_STACK_ALIGNMENT)
+    {
+      if (cfun->stack_alignment_estimated < align)
+	{
+	  gcc_assert(!cfun->stack_realign_processed);
+          cfun->stack_alignment_estimated = align;
+	}
+    }
+  else
+    {
+      if (align > PREFERRED_STACK_BOUNDARY)
+	align = PREFERRED_STACK_BOUNDARY;
+    }
+
+  /* stack_alignment_needed > PREFERRED_STACK_BOUNDARY is permitted.
+     So here we only make sure stack_alignment_needed >= align.  */
   if (cfun->stack_alignment_needed < align)
     cfun->stack_alignment_needed = align;
+  if (cfun->stack_alignment_used < cfun->stack_alignment_needed)
+    cfun->stack_alignment_used = cfun->stack_alignment_needed;
 
   return align / BITS_PER_UNIT;
 }
@@ -743,6 +760,29 @@ defer_stack_allocation (tree var, bool t
 static HOST_WIDE_INT
 expand_one_var (tree var, bool toplevel, bool really_expand)
 {
+  if (MAX_VECTORIZE_STACK_ALIGNMENT && TREE_CODE (var) == VAR_DECL)
+    {
+      unsigned int align;
+
+      /* Because we don't know if VAR will be in register or on stack,
+	 we conservatively assume it will be on stack even if VAR is
+	 eventually put into register after RA pass.  For non-automatic
+	 variables, which won't be on stack, we collect alignment of
+	 type and ignore user specified alignment.  */
+      if (TREE_STATIC (var) || DECL_EXTERNAL (var))
+	align = TYPE_ALIGN (TREE_TYPE (var));
+      else
+	align = DECL_ALIGN (var);
+
+      if (cfun->stack_alignment_estimated < align)
+        {
+          /* stack_alignment_estimated shouldn't change after stack
+             realign decision made */
+          gcc_assert(!cfun->stack_realign_processed);
+	  cfun->stack_alignment_estimated = align;
+	}
+    }
+
   if (TREE_CODE (var) != VAR_DECL)
     ;
   else if (DECL_EXTERNAL (var))
@@ -1828,6 +1868,44 @@ discover_nonconstant_array_refs (void)
     }
 }
 
+/* This function sets crtl->args.internal_arg_pointer to a virtual
+   register if DRAP is needed.  Local register allocator will replace
+   virtual_incoming_args_rtx with the virtual register.  */
+
+static void
+handle_drap (void)
+{
+  rtx internal_arg_rtx; 
+
+  if (!MAX_VECTORIZE_STACK_ALIGNMENT)
+    return;
+  
+  if (current_function_calls_alloca
+      || cfun->has_nonlocal_label
+      || current_function_has_nonlocal_goto
+      || current_function_calls_eh_return)
+    cfun->need_drap = true;
+
+  /* Call targetm.calls.internal_arg_pointer again.  This time it will
+     return a virtual register if DRAP is needed.  */
+  internal_arg_rtx = targetm.calls.internal_arg_pointer (); 
+
+  /* Assertion to check internal_arg_pointer is set to the right rtx
+     here.  */
+  gcc_assert (crtl->args.internal_arg_pointer == 
+             virtual_incoming_args_rtx);
+
+  /* Do nothing if no need to replace virtual_incoming_args_rtx.  */
+  if (crtl->args.internal_arg_pointer != internal_arg_rtx)
+    {
+      crtl->args.internal_arg_pointer = internal_arg_rtx;
+
+      /* Call fixup_tail_casss to clean up REG_EQUIV note if DRAP is
+         needed. */
+      fixup_tail_calls ();
+    }
+}
+
 /* Translate the intermediate representation contained in the CFG
    from GIMPLE trees to RTL.
 
@@ -1930,6 +2008,9 @@ tree_expand_cfg (void)
   sbitmap_free (blocks);
 
   compact_blocks ();
+
+  handle_drap ();
+
 #ifdef ENABLE_CHECKING
   verify_flow_info ();
 #endif
Index: stmt.c
===================================================================
--- stmt.c	(.../trunk/gcc)	(revision 134203)
+++ stmt.c	(.../branches/stack/gcc)	(revision 134368)
@@ -1819,7 +1819,7 @@ expand_nl_goto_receiver (void)
 	{
 	  /* Now restore our arg pointer from the address at which it
 	     was saved in our stack frame.  */
-	  emit_move_insn (virtual_incoming_args_rtx,
+	  emit_move_insn (crtl->args.internal_arg_pointer,
 			  copy_to_reg (get_arg_pointer_save_area ()));
 	}
     }
Index: reload1.c
===================================================================
--- reload1.c	(.../trunk/gcc)	(revision 134203)
+++ reload1.c	(.../branches/stack/gcc)	(revision 134368)
@@ -2279,7 +2279,13 @@ set_label_offsets (rtx x, rtx insn, int 
 	  if (offsets_at[CODE_LABEL_NUMBER (x) - first_label_num][i]
 	      != (initial_p ? reg_eliminate[i].initial_offset
 		  : reg_eliminate[i].offset))
-	    reg_eliminate[i].can_eliminate = 0;
+            {
+	      /* Must not disable reg eliminate because stack realignment
+	         must eliminate frame pointer to stack pointer.  */
+	      gcc_assert (! MAX_VECTORIZE_STACK_ALIGNMENT
+			  || ! stack_realign_fp);
+	      reg_eliminate[i].can_eliminate = 0;
+            }
 
       return;
 
@@ -2358,7 +2364,13 @@ set_label_offsets (rtx x, rtx insn, int 
 	 offset because we are doing a jump to a variable address.  */
       for (p = reg_eliminate; p < &reg_eliminate[NUM_ELIMINABLE_REGS]; p++)
 	if (p->offset != p->initial_offset)
-	  p->can_eliminate = 0;
+	  {
+	    /* Must not disable reg eliminate because stack realignment
+	       must eliminate frame pointer to stack pointer.  */
+	    gcc_assert (! MAX_VECTORIZE_STACK_ALIGNMENT
+			|| ! stack_realign_fp);
+	    p->can_eliminate = 0;
+	  }
       break;
 
     default:
@@ -2849,7 +2861,13 @@ elimination_effects (rtx x, enum machine
       /* If we modify the source of an elimination rule, disable it.  */
       for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
 	if (ep->from_rtx == XEXP (x, 0))
-	  ep->can_eliminate = 0;
+	  {
+	    /* Must not disable reg eliminate because stack realignment
+	       must eliminate frame pointer to stack pointer.  */
+	    gcc_assert (! MAX_VECTORIZE_STACK_ALIGNMENT
+			|| ! stack_realign_fp);
+	    ep->can_eliminate = 0;
+	  }
 
       /* If we modify the target of an elimination rule by adding a constant,
 	 update its offset.  If we modify the target in any other way, we'll
@@ -2875,7 +2893,14 @@ elimination_effects (rtx x, enum machine
 		    && CONST_INT_P (XEXP (XEXP (x, 1), 1)))
 		  ep->offset -= INTVAL (XEXP (XEXP (x, 1), 1));
 		else
-		  ep->can_eliminate = 0;
+		  {
+		    /* Must not disable reg eliminate because stack
+		       realignment must eliminate frame pointer to
+		       stack pointer.  */
+		    gcc_assert (! MAX_VECTORIZE_STACK_ALIGNMENT
+				|| ! stack_realign_fp);
+		    ep->can_eliminate = 0;
+		  }
 	      }
 	  }
 
@@ -2918,7 +2943,13 @@ elimination_effects (rtx x, enum machine
 	 know how this register is used.  */
       for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
 	if (ep->from_rtx == XEXP (x, 0))
-	  ep->can_eliminate = 0;
+	  {
+	    /* Must not disable reg eliminate because stack realignment
+	       must eliminate frame pointer to stack pointer.  */
+	    gcc_assert (! MAX_VECTORIZE_STACK_ALIGNMENT
+			|| ! stack_realign_fp);
+	    ep->can_eliminate = 0;
+	  }
 
       elimination_effects (XEXP (x, 0), mem_mode);
       return;
@@ -2929,7 +2960,13 @@ elimination_effects (rtx x, enum machine
 	 be performed.  Otherwise, we need not be concerned about it.  */
       for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
 	if (ep->to_rtx == XEXP (x, 0))
-	  ep->can_eliminate = 0;
+	  {
+	    /* Must not disable reg eliminate because stack realignment
+	       must eliminate frame pointer to stack pointer.  */
+	    gcc_assert (! MAX_VECTORIZE_STACK_ALIGNMENT
+			|| ! stack_realign_fp);
+	    ep->can_eliminate = 0;
+	  }
 
       elimination_effects (XEXP (x, 0), mem_mode);
       return;
@@ -2963,7 +3000,14 @@ elimination_effects (rtx x, enum machine
 		    && GET_CODE (XEXP (src, 1)) == CONST_INT)
 		  ep->offset -= INTVAL (XEXP (src, 1));
 		else
-		  ep->can_eliminate = 0;
+		  {
+		    /* Must not disable reg eliminate because stack
+		       realignment must eliminate frame pointer to
+		       stack pointer.  */
+		    gcc_assert (! MAX_VECTORIZE_STACK_ALIGNMENT
+				|| ! stack_realign_fp);
+		    ep->can_eliminate = 0;
+		  }
 	      }
 	}
 
@@ -3292,7 +3336,14 @@ eliminate_regs_in_insn (rtx insn, int re
 	      for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS];
 		   ep++)
 		if (ep->from_rtx == orig_operand[i])
-		  ep->can_eliminate = 0;
+		  {
+		    /* Must not disable reg eliminate because stack
+		       realignment must eliminate frame pointer to
+		       stack pointer.  */
+		    gcc_assert (! MAX_VECTORIZE_STACK_ALIGNMENT
+				|| ! stack_realign_fp);
+		    ep->can_eliminate = 0;
+		  }
 	    }
 
 	  /* Companion to the above plus substitution, we can allow
@@ -3422,7 +3473,13 @@ eliminate_regs_in_insn (rtx insn, int re
   for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
     {
       if (ep->previous_offset != ep->offset && ep->ref_outside_mem)
-	ep->can_eliminate = 0;
+	{
+	  /* Must not disable reg eliminate because stack realignment
+	     must eliminate frame pointer to stack pointer.  */
+	  gcc_assert (! MAX_VECTORIZE_STACK_ALIGNMENT
+		      || ! stack_realign_fp);
+	  ep->can_eliminate = 0;
+	}
 
       ep->ref_outside_mem = 0;
 
@@ -3498,6 +3555,11 @@ mark_not_eliminable (rtx dest, const_rtx
 	    || XEXP (SET_SRC (x), 0) != dest
 	    || GET_CODE (XEXP (SET_SRC (x), 1)) != CONST_INT))
       {
+	/* Must not disable reg eliminate because stack realignment
+	   must eliminate frame pointer to stack pointer.  */
+	gcc_assert (! MAX_VECTORIZE_STACK_ALIGNMENT
+		    || ! stack_realign_fp);
+
 	reg_eliminate[i].can_eliminate_previous
 	  = reg_eliminate[i].can_eliminate = 0;
 	num_eliminable--;
@@ -3668,8 +3730,11 @@ update_eliminables (HARD_REG_SET *pset)
   frame_pointer_needed = 1;
   for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
     {
-      if (ep->can_eliminate && ep->from == FRAME_POINTER_REGNUM
-	  && ep->to != HARD_FRAME_POINTER_REGNUM)
+      if (ep->can_eliminate
+	  && ep->from == FRAME_POINTER_REGNUM
+	  && ep->to != HARD_FRAME_POINTER_REGNUM
+	  && (! MAX_VECTORIZE_STACK_ALIGNMENT
+	      || ! cfun->stack_realign_needed))
 	frame_pointer_needed = 0;
 
       if (! ep->can_eliminate && ep->can_eliminate_previous)
@@ -3736,7 +3791,10 @@ init_elim_table (void)
       ep->to = ep1->to;
       ep->can_eliminate = ep->can_eliminate_previous
 	= (CAN_ELIMINATE (ep->from, ep->to)
-	   && ! (ep->to == STACK_POINTER_REGNUM && frame_pointer_needed));
+	   && ! (ep->to == STACK_POINTER_REGNUM
+		 && frame_pointer_needed 
+		 && (! MAX_VECTORIZE_STACK_ALIGNMENT
+		     || ! stack_realign_fp)));
     }
 #else
   reg_eliminate[0].from = reg_eliminate_1[0].from;

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2008-04-17  8:24 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-04-17 10:24 [PATCH 2/6] Stack align: Collect alignment info Ye, Joey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).