From: Marc Lehmann <pcg@goof.com>
To: egcs@cygnus.com
Subject: double alignment patch for x86
Date: Sun, 17 Aug 1997 19:41:24 -0000 [thread overview]
Message-ID: <E0x0Aav-0001rY-00.1997-08-17-21-02-33_pgcc_forever_@cerebro> (raw)
Here is my double alignment patch, this time relative to egcs-ss-970814...
It improves performance *much* on the x86...
it adds three new switches
-malign-double
(no, gcc already had this one)
it breaks the x86 abi
-mstack-align-double
aligns local variables
does NOT break the ABI, defaults to on
-marg-align-double
aligns functions arguments, too
break the abi, even more severe than -malign-double
does not work because of a bug in gcc
Here it is, any feedback appreciated:
Thu Aug 7 22:30:59 1997 Bernd Schmidt, Marc Lehmann <pcg@goof.com>
* i386.c (ix86_sp_offset, ix86_frame_size): New function.
(function_prologue, ix86_expand_prologue, ix86_expand_epilogue):
use ix86_frame_size () instead of get_frame_size ().
* i386.h (MASK_STACK_ALIGN_DOUBLE, MASK_ARG_ALIGN_DOUBLE,
TARGET_STACK_ALIGN_DOUBLE, TARGET_ARG_ALIGN_DOUBLE):
new defines.
(TARGET_SWITCHES): new switches -m[no-]stack-align-double,
-m[no]-arg-align-double, -mall-align-double. -mstack-align-double
enabled by default.
(FUNCTION_ARG_BOUNDARY): align DFmode and XFmode arguments
to 64 bit when -marg-align-double.
(STACK_BOUNDARY, BIGGEST_ALIGNMENT): set to 64 if double-alignment
requested.
(INITIAL_ELIMINATION_OFFSET): use ix86_sp_offset () instead
of get_frame_size ().
--- config/i386/i386.c.orig Sun Aug 17 20:49:15 1997
+++ config/i386/i386.c Sun Aug 17 20:49:34 1997
@@ -1853,6 +1853,74 @@
}
}
+/* Calculate initial elimination offsets for fp/ap pointer elimination. */
+int
+ix86_sp_offset (for_arg)
+ int for_arg;
+{
+ int regno;
+ int offset = 0;
+ int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
+ || current_function_uses_const_pool);
+ int tsize = get_frame_size ();
+
+ for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
+ if ((regs_ever_live[regno] && ! call_used_regs[regno])
+ || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
+ offset += 4;
+
+ if (! TARGET_STACK_ALIGN_DOUBLE
+ || (tsize == 0 && (offset % 8) == 4))
+ return tsize + offset + (for_arg ? 4 : 0);
+
+ if (((tsize + offset) % 8) == 0)
+ return tsize + offset + (for_arg ? 8 : 0);
+
+ return tsize + offset + (for_arg ? 12 : 4);
+}
+
+/* Calculate the size of the frame for this function. This may be larger
+ than what get_frame_size reports if we need to ensure alignment of
+ doubles. */
+static int
+ix86_frame_size ()
+{
+ int regno;
+ int offset = 0;
+ int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
+ || current_function_uses_const_pool);
+ int limit = (frame_pointer_needed ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
+ int tsize = get_frame_size ();
+
+ if (! TARGET_STACK_ALIGN_DOUBLE)
+ return tsize;
+
+ for (regno = 0; regno < limit; regno++)
+ if ((regs_ever_live[regno] && ! call_used_regs[regno])
+ || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
+ offset += 4;
+
+ /* If we need a frame pointer, we adjust the frame size if necessary */
+ if (frame_pointer_needed)
+ {
+ if ((tsize % 8) == 4)
+ return tsize+4;
+ else
+ return tsize;
+ }
+
+ /* Otherwise, it gets complicated. */
+
+ /* There's one case where we don't have to do anything. */
+ if (tsize == 0 && (offset % 8) == 4)
+ return tsize;
+
+ if (((tsize + offset) % 8) == 0)
+ return tsize + 4;
+
+ return tsize + 8;
+}
+
/* Set up the stack and frame (if desired) for the function. */
void
@@ -1865,7 +1933,7 @@
rtx xops[4];
int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
|| current_function_uses_const_pool);
- long tsize = get_frame_size ();
+ long tsize = ix86_frame_size ();
/* pic references don't explicitly mention pic_offset_table_rtx */
if (TARGET_SCHEDULE_PROLOGUE)
@@ -1948,7 +2016,7 @@
rtx xops[4];
int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
|| current_function_uses_const_pool);
- long tsize = get_frame_size ();
+ long tsize = ix86_frame_size ();
rtx insn;
if (!TARGET_SCHEDULE_PROLOGUE)
@@ -2100,7 +2168,7 @@
rtx xops[3];
int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
|| current_function_uses_const_pool);
- long tsize = get_frame_size ();
+ long tsize = ix86_frame_size ();
/* Compute the number of registers to pop */
--- config/i386/i386.h.orig Sun Aug 17 20:49:19 1997
+++ config/i386/i386.h Sun Aug 17 20:49:34 1997
@@ -96,6 +96,8 @@
#define MASK_DEBUG_ARG 000020000000 /* Debug function_arg */
#define MASK_SCHEDULE_PROLOGUE 000040000000 /* Emit prologue as rtl */
#define MASK_STACK_PROBE 000100000000 /* Enable stack probing */
+#define MASK_STACK_ALIGN_DOUBLE 000200000000 /* Align doubles on the stack */
+#define MASK_ARG_ALIGN_DOUBLE 000400000000 /* Align doubles in argument slots */
/* Use the floating point instructions */
#define TARGET_80387 (target_flags & MASK_80387)
@@ -110,6 +112,16 @@
faster code on the pentium. */
#define TARGET_ALIGN_DOUBLE (target_flags & MASK_ALIGN_DOUBLE)
+/* Align doubles to a two word boundary on the stack. This option by itself
+ does not break binary compatibility. It is implied by
+ TARGET_ALIGN_DOUBLE. */
+#define TARGET_STACK_ALIGN_DOUBLE (target_flags & MASK_STACK_ALIGN_DOUBLE)
+
+/* Align doubles to a two word boundary on the stack even if used
+ as arguments. This option by will break the ABI in much the same way
+ TARGET_ALIGN_DOUBLE does. */
+#define TARGET_ARG_ALIGN_DOUBLE (target_flags & MASK_ARG_ALIGN_DOUBLE)
+
/* Put uninitialized locals into bss, not data.
Meaningful only on svr3. */
#define TARGET_SVR3_SHLIB (target_flags & MASK_SVR3_SHLIB)
@@ -207,8 +219,15 @@
{ "no-debug-arg", -MASK_DEBUG_ARG }, \
{ "stack-arg-probe", MASK_STACK_PROBE }, \
{ "no-stack-arg-probe", -MASK_STACK_PROBE }, \
+ { "stack-align-double", MASK_STACK_ALIGN_DOUBLE }, \
+ { "no-stack-align-double", -MASK_STACK_ALIGN_DOUBLE }, \
+ { "arg-align-double", MASK_ARG_ALIGN_DOUBLE }, \
+ { "no-arg-align-double", -MASK_ARG_ALIGN_DOUBLE }, \
+ { "all-align-double", MASK_ALIGN_DOUBLE \
+ | MASK_STACK_ALIGN_DOUBLE \
+ | MASK_ARG_ALIGN_DOUBLE}, \
SUBTARGET_SWITCHES \
- { "", MASK_SCHEDULE_PROLOGUE | TARGET_DEFAULT}}
+ { "", MASK_SCHEDULE_PROLOGUE | MASK_STACK_ALIGN_DOUBLE | TARGET_DEFAULT}}
/* Which processor to schedule for. The cpu attribute defines a list that
mirrors this list, so changes to i386.md must be made at the same time. */
@@ -396,8 +415,14 @@
/* Allocation boundary (in *bits*) for storing arguments in argument list. */
#define PARM_BOUNDARY 32
+/* Aligning doubles to a two-word boundary is faster on pentiums
+ and pentium pros. Unfortunately, it breaks the ABI, so
+ make it only when requested via -marg-align-double. */
+#define FUNCTION_ARG_BOUNDARY(MODE,TYPE) \
+ ((TARGET_ARG_ALIGN_DOUBLE && (MODE == DFmode || MODE == XFmode)) ? 64 : 32)
+
/* Boundary (in *bits*) on which stack pointer should be aligned. */
-#define STACK_BOUNDARY 32
+#define STACK_BOUNDARY (TARGET_STACK_ALIGN_DOUBLE || TARGET_ARG_ALIGN_DOUBLE ? 64 : 32)
/* Allocation boundary (in *bits*) for the code of a function.
For i486, we get better performance by aligning to a cache
@@ -416,7 +441,11 @@
The published ABIs say that doubles should be aligned on word
boundaries, but the Pentium gets better performance with them
aligned on 64 bit boundaries. */
-#define BIGGEST_ALIGNMENT (TARGET_ALIGN_DOUBLE ? 64 : 32)
+#define BIGGEST_ALIGNMENT (TARGET_ALIGN_DOUBLE || TARGET_STACK_ALIGN_DOUBLE \
+ || TARGET_ARG_ALIGN_DOUBLE ? 64 : 32)
+
+/* Biggest alignment any structure field can require in bits. */
+#define BIGGEST_FIELD_ALIGNMENT (TARGET_ALIGN_DOUBLE ? 64 : 32)
/* align DFmode constants and nonaggregates */
#define ALIGN_DFmode (!TARGET_386)
@@ -1513,19 +1542,10 @@
(OFFSET) = 8; /* Skip saved PC and previous frame pointer */ \
else \
{ \
- int regno; \
- int offset = 0; \
- \
- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) \
- if ((regs_ever_live[regno] && ! call_used_regs[regno]) \
- || (current_function_uses_pic_offset_table \
- && regno == PIC_OFFSET_TABLE_REGNUM)) \
- offset += 4; \
- \
- (OFFSET) = offset + get_frame_size (); \
- \
if ((FROM) == ARG_POINTER_REGNUM && (TO) == STACK_POINTER_REGNUM) \
- (OFFSET) += 4; /* Skip saved PC */ \
+ (OFFSET) = ix86_sp_offset (1); \
+ else \
+ (OFFSET) = ix86_sp_offset (0); \
} \
}
\f
@@ -2699,7 +2719,7 @@
extern int is_fp_store ();
extern int agi_dependent ();
extern int reg_mentioned_in_mem ();
-
+extern int ix86_sp_offset ();
#ifdef NOTYET
extern struct rtx_def *copy_all_rtx ();
extern void rewrite_address ();
next reply other threads:[~1997-08-17 19:41 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
1997-08-17 19:41 Marc Lehmann [this message]
1997-08-17 19:41 John Carr
1997-08-17 21:48 Jeffrey A Law
1997-08-18 14:53 Monday morning Philippe Laliberte
1997-08-18 15:11 ` double alignment patch for x86 Dave Love
1997-08-18 20:46 coxs
1997-08-18 20:47 meissner
1997-08-19 2:36 2 (small?) problems Ian Lance Taylor
1997-08-19 3:24 ` double alignment patch for x86 Jeffrey A Law
1997-08-19 3:24 meissner
1997-08-19 3:52 ` Jeffrey A Law
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=E0x0Aav-0001rY-00.1997-08-17-21-02-33_pgcc_forever_@cerebro \
--to=pcg@goof.com \
--cc=egcs@cygnus.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).