From: Daniel Santos <daniel.santos@pobox.com>
To: Uros Bizjak <ubizjak@gmail.com>
Cc: gcc-patches <gcc-patches@gcc.gnu.org>, Jan Hubicka <hubicka@ucw.cz>
Subject: [PATCH 11/12 rev1] [i386] Add remainder of -mcall-ms2sysv-xlogues implementation
Date: Thu, 04 May 2017 22:11:00 -0000 [thread overview]
Message-ID: <20170504220320.29790-1-daniel.santos@pobox.com> (raw)
In-Reply-To: <20170427080932.11703-11-daniel.santos@pobox.com>
Now generates RTL with appropriate stack restore and leave patterns. Slightly
cleaned up code that calculates the number of vector elements for clarity.
Tests are good when rebased onto gcc-7_1_0-release as HEAD currently fails to
bootstrap.
Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
gcc/config/i386/i386.c | 287 +++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 278 insertions(+), 9 deletions(-)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index f2772b2d10e..e43dc819f9a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14148,6 +14148,78 @@ ix86_elim_entry_set_got (rtx reg)
}
}
+static rtx
+gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
+{
+ rtx addr, mem;
+
+ if (offset)
+ addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
+ mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
+ return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
+}
+
+static inline rtx
+gen_frame_load (rtx reg, rtx frame_reg, int offset)
+{
+ return gen_frame_set (reg, frame_reg, offset, false);
+}
+
+static inline rtx
+gen_frame_store (rtx reg, rtx frame_reg, int offset)
+{
+ return gen_frame_set (reg, frame_reg, offset, true);
+}
+
+static void
+ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
+{
+ struct machine_function *m = cfun->machine;
+ const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
+ + m->call_ms2sysv_extra_regs;
+ rtvec v = rtvec_alloc (ncregs + 1);
+ unsigned int align, i, vi = 0;
+ rtx_insn *insn;
+ rtx sym, addr;
+ rtx rax = gen_rtx_REG (word_mode, AX_REG);
+ const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
+ HOST_WIDE_INT rax_offset = xlogue.get_stub_ptr_offset () + m->fs.sp_offset;
+ HOST_WIDE_INT stack_alloc_size = frame.stack_pointer_offset - m->fs.sp_offset;
+ HOST_WIDE_INT stack_align_off_in = xlogue.get_stack_align_off_in ();
+
+ /* Verify that the incoming stack 16-byte alignment offset matches the
+ layout we're using. */
+ gcc_assert (stack_align_off_in == (m->fs.sp_offset & UNITS_PER_WORD));
+
+ /* Get the stub symbol. */
+ sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
+ : XLOGUE_STUB_SAVE);
+ RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
+
+ /* Setup RAX as the stub's base pointer. */
+ align = GET_MODE_ALIGNMENT (V4SFmode);
+ addr = choose_baseaddr (rax_offset, &align);
+ gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
+ insn = emit_insn (gen_rtx_SET (rax, addr));
+
+ gcc_assert (stack_alloc_size >= xlogue.get_stack_space_used ());
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-stack_alloc_size), -1,
+ m->fs.cfa_reg == stack_pointer_rtx);
+ for (i = 0; i < ncregs; ++i)
+ {
+ const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
+ rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
+ r.regno);
+ RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);;
+ }
+
+ gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
+
+ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
+ RTX_FRAME_RELATED_P (insn) = true;
+}
+
/* Expand the prologue into a bunch of separate insns. */
void
@@ -14395,7 +14467,7 @@ ix86_expand_prologue (void)
performing the actual alignment. Otherwise we cannot guarantee
that there's enough storage above the realignment point. */
allocate = frame.stack_realign_allocate_offset - m->fs.sp_offset;
- if (allocate)
+ if (allocate && !m->call_ms2sysv)
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (-allocate), -1, false);
@@ -14403,7 +14475,6 @@ ix86_expand_prologue (void)
insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
stack_pointer_rtx,
GEN_INT (-align_bytes)));
-
/* For the purposes of register save area addressing, the stack
pointer can no longer be used to access anything in the frame
below m->fs.sp_realigned_offset and the frame pointer cannot be
@@ -14420,6 +14491,9 @@ ix86_expand_prologue (void)
m->fs.sp_valid = false;
}
+ if (m->call_ms2sysv)
+ ix86_emit_outlined_ms2sysv_save (frame);
+
allocate = frame.stack_pointer_offset - m->fs.sp_offset;
if (flag_stack_usage_info)
@@ -14740,17 +14814,19 @@ ix86_emit_restore_regs_using_pop (void)
unsigned int regno;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
- if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false))
+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
}
-/* Emit code and notes for the LEAVE instruction. */
+/* Emit code and notes for the LEAVE instruction. If insn is non-null,
+ omits the emit and only attaches the notes. */
static void
-ix86_emit_leave (void)
+ix86_emit_leave (rtx_insn *insn)
{
struct machine_function *m = cfun->machine;
- rtx_insn *insn = emit_insn (ix86_gen_leave ());
+ if (!insn)
+ insn = emit_insn (ix86_gen_leave ());
ix86_add_queued_cfa_restore_notes (insn);
@@ -14844,6 +14920,164 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
}
}
+static void
+ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
+ bool use_call, int style)
+{
+ struct machine_function *m = cfun->machine;
+ const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
+ + m->call_ms2sysv_extra_regs;
+ rtvec v;
+ unsigned int elems_needed, align, i, vi = 0;
+ rtx_insn *insn;
+ rtx sym, tmp;
+ rtx rsi = gen_rtx_REG (word_mode, SI_REG);
+ rtx r10 = NULL_RTX;
+ const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
+ HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
+ HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
+ rtx rsi_frame_load = NULL_RTX;
+ HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
+ enum xlogue_stub stub;
+
+ gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
+
+ /* If using a realigned stack, we should never start with padding. */
+ gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
+
+ /* Setup RSI as the stub's base pointer. */
+ align = GET_MODE_ALIGNMENT (V4SFmode);
+ tmp = choose_baseaddr (rsi_offset, &align);
+ gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
+ emit_insn (gen_rtx_SET (rsi, tmp));
+
+ /* Get a symbol for the stub. */
+ if (frame_pointer_needed)
+ stub = use_call ? XLOGUE_STUB_RESTORE_HFP
+ : XLOGUE_STUB_RESTORE_HFP_TAIL;
+ else
+ stub = use_call ? XLOGUE_STUB_RESTORE
+ : XLOGUE_STUB_RESTORE_TAIL;
+ sym = xlogue.get_stub_rtx (stub);
+
+ elems_needed = ncregs;
+ if (use_call)
+ elems_needed += 1;
+ else
+ elems_needed += frame_pointer_needed ? 5 : 3;
+ v = rtvec_alloc (elems_needed);
+
+ /* We call the epilogue stub when we need to pop incoming args or we are
+ doing a sibling call as the tail. Otherwise, we will emit a jmp to the
+ epilogue stub and it is the tail-call. */
+ if (use_call)
+ RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
+ else
+ {
+ RTVEC_ELT (v, vi++) = ret_rtx;
+ RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
+ if (frame_pointer_needed)
+ {
+ rtx rbp = gen_rtx_REG (DImode, BP_REG);
+ gcc_assert (m->fs.fp_valid);
+ gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
+
+ tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
+ RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
+ RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
+ tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
+ RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
+ }
+ else
+ {
+ /* If no hard frame pointer, we set R10 to the SP restore value. */
+ gcc_assert (!m->fs.fp_valid);
+ gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
+ gcc_assert (m->fs.sp_valid);
+
+ r10 = gen_rtx_REG (DImode, R10_REG);
+ tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
+ emit_insn (gen_rtx_SET (r10, tmp));
+
+ RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
+ }
+ }
+
+ /* Generate frame load insns and restore notes. */
+ for (i = 0; i < ncregs; ++i)
+ {
+ const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
+ enum machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
+ rtx reg, frame_load;
+
+ reg = gen_rtx_REG (mode, r.regno);
+ frame_load = gen_frame_load (reg, rsi, r.offset);
+
+ /* Save RSI frame load insn & note to add last. */
+ if (r.regno == SI_REG)
+ {
+ gcc_assert (!rsi_frame_load);
+ rsi_frame_load = frame_load;
+ rsi_restore_offset = r.offset;
+ }
+ else
+ {
+ RTVEC_ELT (v, vi++) = frame_load;
+ ix86_add_cfa_restore_note (NULL, reg, r.offset);
+ }
+ }
+
+ /* Add RSI frame load & restore note at the end. */
+ gcc_assert (rsi_frame_load);
+ gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
+ RTVEC_ELT (v, vi++) = rsi_frame_load;
+ ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
+ rsi_restore_offset);
+
+ /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
+ if (!use_call && !frame_pointer_needed)
+ {
+ gcc_assert (m->fs.sp_valid);
+ gcc_assert (!m->fs.sp_realigned);
+
+ /* At this point, R10 should point to frame.stack_realign_offset. */
+ if (m->fs.cfa_reg == stack_pointer_rtx)
+ m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
+ m->fs.sp_offset = frame.stack_realign_offset;
+ }
+
+ gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
+ tmp = gen_rtx_PARALLEL (VOIDmode, v);
+ if (use_call)
+ insn = emit_insn (tmp);
+ else
+ {
+ insn = emit_jump_insn (tmp);
+ JUMP_LABEL (insn) = ret_rtx;
+
+ if (frame_pointer_needed)
+ ix86_emit_leave (insn);
+ else
+ {
+ /* Need CFA adjust note. */
+ tmp = gen_rtx_SET (stack_pointer_rtx, r10);
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
+ }
+ }
+
+ RTX_FRAME_RELATED_P (insn) = true;
+ ix86_add_queued_cfa_restore_notes (insn);
+
+ /* If we're not doing a tail-call, we need to adjust the stack. */
+ if (use_call && m->fs.sp_valid)
+ {
+ HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (dealloc), style,
+ m->fs.cfa_reg == stack_pointer_rtx);
+ }
+}
+
/* Restore function stack, frame, and registers. */
void
@@ -14854,6 +15088,7 @@ ix86_expand_epilogue (int style)
struct ix86_frame frame;
bool restore_regs_via_mov;
bool using_drap;
+ bool restore_stub_is_tail = false;
ix86_finalize_stack_realign_flags ();
ix86_compute_frame_layout (&frame);
@@ -14956,7 +15191,37 @@ ix86_expand_epilogue (int style)
ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
style == 2);
- if (restore_regs_via_mov)
+ if (m->call_ms2sysv)
+ {
+ int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
+
+ /* We cannot use a tail-call for the stub if:
+ 1. We have to pop incoming args,
+ 2. We have additional int regs to restore, or
+ 3. A sibling call will be the tail-call, or
+ 4. We are emitting an eh_return_internal epilogue.
+
+ TODO: Item 4 has not yet tested!
+
+ If any of the above are true, we will call the stub rather than
+ jump to it. */
+ restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
+ ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
+ }
+
+ /* If using out-of-line stub that is a tail-call, then...*/
+ if (m->call_ms2sysv && restore_stub_is_tail)
+ {
+ /* TODO: parinoid tests. (remove eventually) */
+ gcc_assert (m->fs.sp_valid);
+ gcc_assert (!m->fs.sp_realigned);
+ gcc_assert (!m->fs.fp_valid);
+ gcc_assert (!m->fs.realigned);
+ gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
+ gcc_assert (!crtl->drap_reg);
+ gcc_assert (!frame.nregs);
+ }
+ else if (restore_regs_via_mov)
{
rtx t;
@@ -15087,7 +15352,7 @@ ix86_expand_epilogue (int style)
else if (TARGET_USE_LEAVE
|| optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
|| !cfun->machine->use_fast_prologue_epilogue)
- ix86_emit_leave ();
+ ix86_emit_leave (NULL);
else
{
pro_epilogue_adjust_stack (stack_pointer_rtx,
@@ -15198,7 +15463,7 @@ ix86_expand_epilogue (int style)
else
emit_jump_insn (gen_simple_return_pop_internal (popc));
}
- else
+ else if (!m->call_ms2sysv || !restore_stub_is_tail)
emit_jump_insn (gen_simple_return_internal ());
/* Restore the state back to the state from the prologue,
@@ -28927,6 +29192,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
clobber_reg (&use, gen_rtx_REG (mode, regno));
}
+
+ /* Set here, but it may get cleared later. */
+ if (TARGET_CALL_MS2SYSV_XLOGUES)
+ cfun->machine->call_ms2sysv = true;
}
if (vec_len > 1)
--
2.11.0
next prev parent reply other threads:[~2017-05-04 21:58 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-04-27 8:05 [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
2017-04-27 8:05 ` [PATCH 12/12] [i386,testsuite] Test program for ms to sysv abi function calls Daniel Santos
2017-05-17 9:52 ` Thomas Preudhomme
2017-04-27 8:05 ` [PATCH 11/12] [i386] Add remainder of -mcall-ms2sysv-xlogues implementation Daniel Santos
2017-05-04 22:11 ` Daniel Santos [this message]
2017-04-27 8:05 ` [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges Daniel Santos
2017-05-01 11:18 ` Uros Bizjak
2017-05-02 22:19 ` Daniel Santos
2017-05-03 6:17 ` Uros Bizjak
2017-05-03 7:38 ` Daniel Santos
2017-05-03 8:38 ` Uros Bizjak
2017-05-04 21:35 ` [PATCH 09/12 rev1] [i386] Add patterns and predicates mcall-ms2sysv-xlogues Daniel Santos
2017-04-27 8:05 ` [PATCH 03/12] [i386] Use re-aligned stack pointer for aligned SSE movs Daniel Santos
2017-04-27 8:05 ` [PATCH 05/12] [i386] Add option -mcall-ms2sysv-xlogues Daniel Santos
2017-04-28 6:00 ` Sandra Loosemore
2017-04-28 7:37 ` [PATCH 05/12 rev 1] " Daniel Santos
2017-04-27 8:05 ` [PATCH 02/12] [i386] Keep stack pointer valid after after re-alignment Daniel Santos
2017-04-27 8:05 ` [PATCH 10/12] [i386] Add ms2sysv pro/epilogue stubs to libgcc Daniel Santos
2017-04-27 8:05 ` [PATCH 08/12] [i386] Modify ix86_compute_frame_layout for -mcall-ms2sysv-xlogues Daniel Santos
2017-04-27 8:05 ` [PATCH 01/12] [i386] Re-align stack frame prior to SSE saves Daniel Santos
2017-04-27 8:23 ` [PATCH 04/12] [i386] Minor refactoring Daniel Santos
2017-04-27 8:44 ` [PATCH 07/12] [i386] Modify ix86_save_reg to optionally omit stub-managed registers Daniel Santos
2017-04-27 8:51 ` [PATCH 06/12] [i386] Add class xlogue_layout and new fields to struct machine_function Daniel Santos
2017-04-27 18:32 ` [PATCH v4 0/12 GCC8] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues Daniel Santos
2017-05-01 11:31 ` [PATCH v4 0/12] " Uros Bizjak
2017-05-02 10:25 ` JonY
2017-05-02 10:45 ` Kai Tietz
2017-05-03 6:01 ` Daniel Santos
2017-05-05 9:05 ` Daniel Santos
2017-05-06 20:41 ` Daniel Santos
2017-05-08 20:07 ` Daniel Santos
2017-05-03 4:32 ` Daniel Santos
2017-05-13 0:01 ` [PING] " Daniel Santos
2017-05-13 18:29 ` Uros Bizjak
2017-05-13 23:43 ` Daniel Santos
2017-05-14 10:25 ` Uros Bizjak
2017-07-26 19:03 ` H.J. Lu
2017-07-27 0:36 ` Daniel Santos
2017-07-28 13:51 ` Daniel Santos
2017-07-28 14:41 ` H.J. Lu
2017-07-31 10:25 ` Daniel Santos
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170504220320.29790-1-daniel.santos@pobox.com \
--to=daniel.santos@pobox.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=hubicka@ucw.cz \
--cc=ubizjak@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).