public inbox for gcc@gcc.gnu.org
 help / color / mirror / Atom feed
From: Marc Lehmann <pcg@goof.com>
To: egcs@cygnus.com
Subject: double alignment patch for x86
Date: Sun, 17 Aug 1997 19:41:24 -0000	[thread overview]
Message-ID: <E0x0Aav-0001rY-00.1997-08-17-21-02-33_pgcc_forever_@cerebro> (raw)

Here is my double alignment patch, this time relative to egcs-ss-970814...

It improves performance *much* on the x86...

it adds three new switches

-malign-double
 (no, gcc already had this one)
 it breaks the x86 abi

-mstack-align-double
 aligns local variables
 does NOT break the ABI, defaults to on
 
-marg-align-double
 aligns functions arguments, too
 break the abi, even more severe than -malign-double
 does not work because of a bug in gcc

Here it is, any feedback appreciated:

Thu Aug  7 22:30:59 1997  Bernd Schmidt, Marc Lehmann  <pcg@goof.com>

	* i386.c (ix86_sp_offset, ix86_frame_size): New function.
	(function_prologue, ix86_expand_prologue, ix86_expand_epilogue):
	use ix86_frame_size () instead of get_frame_size ().
	* i386.h (MASK_STACK_ALIGN_DOUBLE, MASK_ARG_ALIGN_DOUBLE,
	TARGET_STACK_ALIGN_DOUBLE, TARGET_ARG_ALIGN_DOUBLE):
	new defines.
	(TARGET_SWITCHES): new switches -m[no-]stack-align-double,
	-m[no]-arg-align-double, -mall-align-double. -mstack-align-double
	enabled by default.
	(FUNCTION_ARG_BOUNDARY): align DFmode and XFmode arguments
	to 64 bit when -marg-align-double.
	(STACK_BOUNDARY, BIGGEST_ALIGNMENT): set to 64 if double-alignment
	requested.
	(INITIAL_ELIMINATION_OFFSET): use ix86_sp_offset () instead
	of get_frame_size ().
	
--- config/i386/i386.c.orig	Sun Aug 17 20:49:15 1997
+++ config/i386/i386.c	Sun Aug 17 20:49:34 1997
@@ -1853,6 +1853,74 @@
     }
 }
 
+/* Calculate initial elimination offsets for fp/ap pointer elimination.  */
+int
+ix86_sp_offset (for_arg)
+     int for_arg;
+{
+  int regno;
+  int offset = 0;
+  int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
+				  || current_function_uses_const_pool);
+  int tsize = get_frame_size ();
+  
+  for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
+    if ((regs_ever_live[regno] && ! call_used_regs[regno])
+	|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
+      offset += 4;
+
+  if (! TARGET_STACK_ALIGN_DOUBLE
+      || (tsize == 0 && (offset % 8) == 4))
+    return tsize + offset + (for_arg ? 4 : 0);
+
+  if (((tsize + offset) % 8) == 0)
+    return tsize + offset + (for_arg ? 8 : 0);
+  
+  return tsize + offset + (for_arg ? 12 : 4);
+}
+
+/* Calculate the size of the frame for this function.  This may be larger
+   than what get_frame_size reports if we need to ensure alignment of
+   doubles.  */
+static int
+ix86_frame_size ()
+{
+  int regno;
+  int offset = 0;
+  int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
+				  || current_function_uses_const_pool);
+  int limit = (frame_pointer_needed ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
+  int tsize = get_frame_size ();
+
+  if (! TARGET_STACK_ALIGN_DOUBLE)
+    return tsize;
+
+  for (regno = 0; regno < limit; regno++)
+    if ((regs_ever_live[regno] && ! call_used_regs[regno])
+	|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
+      offset += 4;
+  
+  /* If we need a frame pointer, we adjust the frame size if necessary */
+  if (frame_pointer_needed)
+    { 
+      if ((tsize % 8) == 4)
+	return tsize+4;
+      else
+	return tsize;
+    }
+  
+  /* Otherwise, it gets complicated.  */
+  
+  /* There's one case where we don't have to do anything.  */
+  if (tsize == 0 && (offset % 8) == 4)
+    return tsize;
+  
+  if (((tsize + offset) % 8) == 0)
+    return tsize + 4;
+  
+  return tsize + 8;
+}
+
 /* Set up the stack and frame (if desired) for the function.  */
 
 void
@@ -1865,7 +1933,7 @@
   rtx xops[4];
   int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
 				  || current_function_uses_const_pool);
-  long tsize = get_frame_size ();
+  long tsize = ix86_frame_size ();
 
   /* pic references don't explicitly mention pic_offset_table_rtx */
   if (TARGET_SCHEDULE_PROLOGUE)
@@ -1948,7 +2016,7 @@
   rtx xops[4];
   int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
 				  || current_function_uses_const_pool);
-  long tsize = get_frame_size ();
+  long tsize = ix86_frame_size ();
   rtx insn;
 
   if (!TARGET_SCHEDULE_PROLOGUE)
@@ -2100,7 +2168,7 @@
   rtx xops[3];
   int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
 				  || current_function_uses_const_pool);
-  long tsize = get_frame_size ();
+  long tsize = ix86_frame_size ();
 
   /* Compute the number of registers to pop */
 
--- config/i386/i386.h.orig	Sun Aug 17 20:49:19 1997
+++ config/i386/i386.h	Sun Aug 17 20:49:34 1997
@@ -96,6 +96,8 @@
 #define MASK_DEBUG_ARG		000020000000	/* Debug function_arg */   
 #define MASK_SCHEDULE_PROLOGUE  000040000000    /* Emit prologue as rtl */
 #define MASK_STACK_PROBE	000100000000	/* Enable stack probing */
+#define MASK_STACK_ALIGN_DOUBLE	000200000000	/* Align doubles on the stack */
+#define MASK_ARG_ALIGN_DOUBLE	000400000000	/* Align doubles in argument slots */
 
 /* Use the floating point instructions */
 #define TARGET_80387 (target_flags & MASK_80387)
@@ -110,6 +112,16 @@
    faster code on the pentium.  */
 #define TARGET_ALIGN_DOUBLE (target_flags & MASK_ALIGN_DOUBLE)
 
+/* Align doubles to a two word boundary on the stack.  This option by itself
+   does not break binary compatibility.  It is implied by
+   TARGET_ALIGN_DOUBLE.  */
+#define TARGET_STACK_ALIGN_DOUBLE (target_flags & MASK_STACK_ALIGN_DOUBLE)
+
+/* Align doubles to a two word boundary on the stack even if used
+   as arguments. This option by will break the ABI in much the same way
+   TARGET_ALIGN_DOUBLE does.  */
+#define TARGET_ARG_ALIGN_DOUBLE (target_flags & MASK_ARG_ALIGN_DOUBLE)
+
 /* Put uninitialized locals into bss, not data.
    Meaningful only on svr3.  */
 #define TARGET_SVR3_SHLIB (target_flags & MASK_SVR3_SHLIB)
@@ -207,8 +219,15 @@
   { "no-debug-arg",		-MASK_DEBUG_ARG },			\
   { "stack-arg-probe",		 MASK_STACK_PROBE },			\
   { "no-stack-arg-probe",	-MASK_STACK_PROBE },			\
+  { "stack-align-double",	 MASK_STACK_ALIGN_DOUBLE },		\
+  { "no-stack-align-double",	-MASK_STACK_ALIGN_DOUBLE },		\
+  { "arg-align-double",		 MASK_ARG_ALIGN_DOUBLE },		\
+  { "no-arg-align-double",	-MASK_ARG_ALIGN_DOUBLE },		\
+  { "all-align-double",		 MASK_ALIGN_DOUBLE			\
+				 | MASK_STACK_ALIGN_DOUBLE		\
+				 | MASK_ARG_ALIGN_DOUBLE},		\
   SUBTARGET_SWITCHES							\
-  { "", MASK_SCHEDULE_PROLOGUE | TARGET_DEFAULT}}
+  { "", MASK_SCHEDULE_PROLOGUE | MASK_STACK_ALIGN_DOUBLE | TARGET_DEFAULT}}
 
 /* Which processor to schedule for. The cpu attribute defines a list that
    mirrors this list, so changes to i386.md must be made at the same time.  */
@@ -396,8 +415,14 @@
 /* Allocation boundary (in *bits*) for storing arguments in argument list.  */
 #define PARM_BOUNDARY 32
 
+/* Aligning doubles to a two-word boundary is faster on pentiums
+   and pentium pros. Unfortunately, it breaks the ABI, so
+   make it only when requested via -marg-align-double.  */
+#define FUNCTION_ARG_BOUNDARY(MODE,TYPE) \
+  ((TARGET_ARG_ALIGN_DOUBLE && (MODE == DFmode || MODE == XFmode)) ? 64 : 32)
+
 /* Boundary (in *bits*) on which stack pointer should be aligned.  */
-#define STACK_BOUNDARY 32
+#define STACK_BOUNDARY (TARGET_STACK_ALIGN_DOUBLE || TARGET_ARG_ALIGN_DOUBLE ? 64 : 32)
 
 /* Allocation boundary (in *bits*) for the code of a function.
    For i486, we get better performance by aligning to a cache
@@ -416,7 +441,11 @@
    The published ABIs say that doubles should be aligned on word
    boundaries, but the Pentium gets better performance with them
    aligned on 64 bit boundaries. */
-#define BIGGEST_ALIGNMENT (TARGET_ALIGN_DOUBLE ? 64 : 32)
+#define BIGGEST_ALIGNMENT (TARGET_ALIGN_DOUBLE || TARGET_STACK_ALIGN_DOUBLE \
+			   || TARGET_ARG_ALIGN_DOUBLE ? 64 : 32)
+
+/* Biggest alignment any structure field can require in bits.  */
+#define BIGGEST_FIELD_ALIGNMENT (TARGET_ALIGN_DOUBLE ? 64 : 32)
 
 /* align DFmode constants and nonaggregates */
 #define ALIGN_DFmode (!TARGET_386)
@@ -1513,19 +1542,10 @@
     (OFFSET) = 8;	/* Skip saved PC and previous frame pointer */	\
   else									\
     {									\
-      int regno;							\
-      int offset = 0;							\
-									\
-      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)		\
-	if ((regs_ever_live[regno] && ! call_used_regs[regno])		\
-	    || (current_function_uses_pic_offset_table			\
-		&& regno == PIC_OFFSET_TABLE_REGNUM))			\
-	  offset += 4;							\
-									\
-      (OFFSET) = offset + get_frame_size ();				\
-									\
       if ((FROM) == ARG_POINTER_REGNUM && (TO) == STACK_POINTER_REGNUM)	\
-	(OFFSET) += 4;	/* Skip saved PC */				\
+        (OFFSET) = ix86_sp_offset (1);					\
+      else								\
+        (OFFSET) = ix86_sp_offset (0);					\
     }									\
 }
 \f
@@ -2699,7 +2719,7 @@
 extern int is_fp_store ();
 extern int agi_dependent ();
 extern int reg_mentioned_in_mem ();
-
+extern int ix86_sp_offset ();
 #ifdef NOTYET
 extern struct rtx_def *copy_all_rtx ();
 extern void rewrite_address ();

             reply	other threads:[~1997-08-17 19:41 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
1997-08-17 19:41 Marc Lehmann [this message]
1997-08-17 19:41 John Carr
1997-08-17 21:48 Jeffrey A Law
1997-08-18 14:53 Monday morning Philippe Laliberte
1997-08-18 15:11 ` double alignment patch for x86 Dave Love
1997-08-18 20:46 coxs
1997-08-18 20:47 meissner
1997-08-19  2:36 2 (small?) problems Ian Lance Taylor
1997-08-19  3:24 ` double alignment patch for x86 Jeffrey A Law
1997-08-19  3:24 meissner
1997-08-19  3:52 ` Jeffrey A Law

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=E0x0Aav-0001rY-00.1997-08-17-21-02-33_pgcc_forever_@cerebro \
    --to=pcg@goof.com \
    --cc=egcs@cygnus.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).