From: Daniel Santos <daniel.santos@pobox.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Daniel Santos <daniel.santos@pobox.com>
Subject: [PATCH 9/9] Add remainder of moutline-msabi-xlogues implementation
Date: Wed, 23 Nov 2016 05:16:00 -0000 [thread overview]
Message-ID: <20161123051918.22517-9-daniel.santos@pobox.com> (raw)
In-Reply-To: <25abd41b-923b-2fea-dfc3-9051af632f44@pobox.com>
Adds functions emit_msabi_outlined_save and emit_msabi_outlined_restore,
which are called from ix86_expand_prologue and ix86_expand_epilogue,
respectively. Also adds the code to ix86_expand_call that enables the
optimization (setting the machine_function's outline_ms_sysv field).
---
gcc/config/i386/i386.c | 298 +++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 279 insertions(+), 19 deletions(-)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 1dc244e..6345c61 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -13882,6 +13882,103 @@ ix86_elim_entry_set_got (rtx reg)
}
}
+static rtx
+gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
+{
+ rtx addr, mem;
+
+ if (offset)
+ addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
+ mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
+ return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
+}
+
+static inline rtx
+gen_frame_load (rtx reg, rtx frame_reg, int offset)
+{
+ return gen_frame_set (reg, frame_reg, offset, false);
+}
+
+static inline rtx
+gen_frame_store (rtx reg, rtx frame_reg, int offset)
+{
+ return gen_frame_set (reg, frame_reg, offset, true);
+}
+
+static void
+emit_msabi_outlined_save (const struct ix86_frame &frame)
+{
+ struct machine_function *m = cfun->machine;
+ const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
+ + m->outline_ms_sysv_extra_regs;
+ rtvec v = rtvec_alloc (ncregs - 1 + 3);
+ rtx insn, sym, tmp;
+ rtx rax = gen_rtx_REG (word_mode, AX_REG);
+ unsigned i = 0;
+ unsigned j;
+ const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
+ HOST_WIDE_INT stack_used = xlogue.get_stack_space_used ();
+ HOST_WIDE_INT stack_alloc_size = stack_used;
+ HOST_WIDE_INT rax_offset = xlogue.get_stub_ptr_offset ();
+
+ /* Verify that the incoming stack 16-byte alignment offset matches the
+ layout we're using. */
+ gcc_assert ((m->fs.sp_offset & 15) == xlogue.get_stack_align_off_in ());
+
+ sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
+ : XLOGUE_STUB_SAVE);
+ RTVEC_ELT (v, i++) = gen_rtx_USE (VOIDmode, sym);
+
+ /* Combine as many other allocations as possible. */
+ if (frame.nregs == 0)
+ {
+ if (frame.nsseregs == 0)
+ /* If no other GP or SSE regs, we allocate the whole stack frame. */
+ stack_alloc_size = frame.stack_pointer_offset - m->fs.sp_offset;
+ else
+ stack_alloc_size = frame.reg_save_offset - m->fs.sp_offset;
+
+ gcc_assert (stack_alloc_size >= stack_used);
+ }
+
+ if (crtl->stack_realign_needed)
+ {
+ int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
+
+ gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
+ insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-align_bytes)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ RTVEC_ELT (v, i++) = const1_rtx;
+ }
+ else
+ RTVEC_ELT (v, i++) = const0_rtx;
+
+ tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-rax_offset));
+ insn = emit_insn (gen_rtx_SET (rax, tmp));
+
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-stack_alloc_size), -1,
+ m->fs.cfa_reg == stack_pointer_rtx);
+
+ for (j = 0; j < ncregs; ++j)
+ {
+ const xlogue_layout::reginfo &r = xlogue.get_reginfo (j);
+ rtx store;
+ rtx reg;
+
+ reg = gen_rtx_REG (SSE_REGNO_P (r.regno) ? V4SFmode : word_mode,
+ r.regno);
+ store = gen_frame_store (reg, rax, -r.offset);
+ RTVEC_ELT (v, i++) = store;
+ }
+
+ gcc_assert (i == (unsigned)GET_NUM_ELEM (v));
+
+ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
+ RTX_FRAME_RELATED_P (insn) = true;
+}
+
/* Expand the prologue into a bunch of separate insns. */
void
@@ -14095,6 +14192,11 @@ ix86_expand_prologue (void)
}
}
+ /* Call to outlining stub occurs after pushing frame pointer (if it was
+ needed). */
+ if (m->outline_ms_sysv)
+ emit_msabi_outlined_save (frame);
+
if (!int_registers_saved)
{
/* If saving registers via PUSH, do so now. */
@@ -14123,20 +14225,24 @@ ix86_expand_prologue (void)
int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
- /* The computation of the size of the re-aligned stack frame means
- that we must allocate the size of the register save area before
- performing the actual alignment. Otherwise we cannot guarantee
- that there's enough storage above the realignment point. */
- if (m->fs.sp_offset != frame.sse_reg_save_offset)
- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (m->fs.sp_offset
- - frame.sse_reg_save_offset),
- -1, false);
+ /* If using stub, stack will have already been aligned. */
+ if (!m->outline_ms_sysv)
+ {
+ /* The computation of the size of the re-aligned stack frame means
+ that we must allocate the size of the register save area before
+ performing the actual alignment. Otherwise we cannot guarantee
+ that there's enough storage above the realignment point. */
+ if (m->fs.sp_offset != frame.sse_reg_save_offset)
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (m->fs.sp_offset
+ - frame.sse_reg_save_offset),
+ -1, false);
- /* Align the stack. */
- insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
- stack_pointer_rtx,
- GEN_INT (-align_bytes)));
+ /* Align the stack. */
+ insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (-align_bytes)));
+ }
/* For the purposes of register save area addressing, the stack
pointer is no longer valid. As for the value of sp_offset,
@@ -14466,17 +14572,19 @@ ix86_emit_restore_regs_using_pop (void)
unsigned int regno;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
- if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false))
+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
}
-/* Emit code and notes for the LEAVE instruction. */
+/* Emit code and notes for the LEAVE instruction. If insn is non-null,
+ omits the emit and only attaches the notes. */
static void
-ix86_emit_leave (void)
+ix86_emit_leave (rtx_insn *insn)
{
struct machine_function *m = cfun->machine;
- rtx_insn *insn = emit_insn (ix86_gen_leave ());
+ if (!insn)
+ insn = emit_insn (ix86_gen_leave ());
ix86_add_queued_cfa_restore_notes (insn);
@@ -14568,6 +14676,140 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
}
}
+static void
+emit_msabi_outlined_restore (const struct ix86_frame &frame, bool use_call,
+ int style)
+{
+ struct machine_function *m = cfun->machine;
+ const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
+ + m->outline_ms_sysv_extra_regs;
+ unsigned elems_needed = ncregs + 1;
+ rtvec v;
+ rtx_insn *insn;
+ rtx sym, tmp;
+ rtx rsi = gen_rtx_REG (word_mode, SI_REG);
+ rtx r10 = NULL_RTX;
+ rtx cfa_adjust_note = NULL_RTX;
+ unsigned i = 0;
+ unsigned j;
+ const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
+ HOST_WIDE_INT stack_restore_offset;
+ HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
+ HOST_WIDE_INT rsi_offset;
+ rtx rsi_frame_load = NULL_RTX;
+ HOST_WIDE_INT rsi_restore_offset = 0x7fffffff;
+ enum xlogue_stub stub;
+
+ stack_restore_offset = m->fs.sp_offset - frame.hard_frame_pointer_offset;
+ rsi_offset = stack_restore_offset - stub_ptr_offset;
+ gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
+ tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (rsi_offset));
+ insn = emit_insn (gen_rtx_SET (rsi, tmp));
+
+ if (frame_pointer_needed)
+ stub = use_call ? XLOGUE_STUB_RESTORE_HFP
+ : XLOGUE_STUB_RESTORE_HFP_TAIL;
+ else
+ stub = use_call ? XLOGUE_STUB_RESTORE
+ : XLOGUE_STUB_RESTORE_TAIL;
+
+ sym = xlogue.get_stub_rtx (stub);
+
+ if (!use_call)
+ elems_needed += frame_pointer_needed ? 2 : 3;
+ v = rtvec_alloc (elems_needed);
+
+ /* If: we need to pop incoming args or a sibling call will follow, then
+ we want to call the epilogue stub instead of jumping to it. */
+ if (use_call)
+ RTVEC_ELT (v, i++) = gen_rtx_USE (VOIDmode, sym);
+ else
+ {
+ RTVEC_ELT (v, i++) = ret_rtx;
+ RTVEC_ELT (v, i++) = gen_rtx_USE (VOIDmode, sym);
+ if (!frame_pointer_needed)
+ {
+ gcc_assert (!m->fs.fp_valid);
+ gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
+ gcc_assert (m->fs.sp_valid);
+
+ tmp = GEN_INT (stub_ptr_offset);
+ tmp = gen_rtx_PLUS (Pmode, rsi, tmp);
+ r10 = gen_rtx_REG (DImode, R10_REG);
+ insn = emit_insn (gen_rtx_SET (r10, tmp));
+ RTVEC_ELT (v, i++) = const0_rtx;
+ }
+ else
+ {
+ gcc_assert (m->fs.fp_valid);
+ gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
+
+ RTVEC_ELT (v, i++) = const1_rtx;
+ }
+ }
+
+ for (j = 0; j < ncregs; ++j)
+ {
+ const xlogue_layout::reginfo &r = xlogue.get_reginfo (j);
+ enum machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
+ rtx reg, restore_note;
+
+ reg = gen_rtx_REG (mode, r.regno);
+ restore_note = gen_frame_load (reg, rsi, r.offset);
+
+ /* Save RSI frame load insn & note to add later. */
+ if (r.regno == SI_REG)
+ {
+ gcc_assert (!rsi_frame_load);
+ rsi_frame_load = restore_note;
+ rsi_restore_offset = r.offset;
+ }
+ else
+ {
+ RTVEC_ELT (v, i++) = restore_note;
+ ix86_add_cfa_restore_note (NULL, reg, r.offset);
+ }
+ }
+
+ /* Add RSI frame load & restore note at the end. */
+ gcc_assert (rsi_frame_load);
+ RTVEC_ELT (v, i++) = rsi_frame_load;
+ ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
+ rsi_restore_offset);
+
+ /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
+ if (!use_call && !frame_pointer_needed)
+ {
+ cfa_adjust_note = gen_rtx_SET(stack_pointer_rtx, r10);
+ RTVEC_ELT (v, i++) = cfa_adjust_note;
+ m->fs.cfa_offset -= stack_restore_offset;
+ m->fs.sp_offset -= stack_restore_offset;
+ }
+
+ gcc_assert (i == (unsigned)GET_NUM_ELEM (v));
+ tmp = gen_rtx_PARALLEL (VOIDmode, v);
+ if (use_call)
+ insn = emit_insn (tmp);
+ else
+ {
+ insn = emit_jump_insn (tmp);
+ JUMP_LABEL (insn) = ret_rtx;
+
+ if (frame_pointer_needed)
+ ix86_emit_leave (insn);
+ else
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa_adjust_note);
+ }
+
+ RTX_FRAME_RELATED_P (insn) = true;
+ ix86_add_queued_cfa_restore_notes (insn);
+
+ if (use_call)
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (stack_restore_offset), style,
+ m->fs.cfa_reg == stack_pointer_rtx);
+}
+
/* Restore function stack, frame, and registers. */
void
@@ -14578,6 +14820,7 @@ ix86_expand_epilogue (int style)
struct ix86_frame frame;
bool restore_regs_via_mov;
bool using_drap;
+ bool restore_stub_uses_call = false;
ix86_finalize_stack_realign_flags ();
ix86_compute_frame_layout (&frame);
@@ -14782,6 +15025,10 @@ ix86_expand_epilogue (int style)
- frame.reg_save_offset),
style, false);
}
+ /* If using an out-of-lined stub and there are no int regs to restore
+ inline then we want to let the stub handle the stack restore. */
+ else if (m->outline_ms_sysv && !frame.nregs)
+ ;
else if (m->fs.sp_offset != frame.reg_save_offset)
{
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
@@ -14794,6 +15041,15 @@ ix86_expand_epilogue (int style)
ix86_emit_restore_regs_using_pop ();
}
+ if (m->outline_ms_sysv)
+ {
+ int popc = crtl->args.pops_args && crtl->args.size ? crtl->args.size : 0;
+
+ restore_stub_uses_call = popc || style == 0 || (m->fs.fp_valid
+ && !crtl->stack_realign_needed);
+ emit_msabi_outlined_restore (frame, restore_stub_uses_call, style);
+ }
+
/* If we used a stack pointer and haven't already got rid of it,
then do so now. */
if (m->fs.fp_valid)
@@ -14807,7 +15063,7 @@ ix86_expand_epilogue (int style)
else if (TARGET_USE_LEAVE
|| optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
|| !cfun->machine->use_fast_prologue_epilogue)
- ix86_emit_leave ();
+ ix86_emit_leave (NULL);
else
{
pro_epilogue_adjust_stack (stack_pointer_rtx,
@@ -14917,7 +15173,7 @@ ix86_expand_epilogue (int style)
else
emit_jump_insn (gen_simple_return_pop_internal (popc));
}
- else
+ else if (!m->outline_ms_sysv || restore_stub_uses_call)
emit_jump_insn (gen_simple_return_internal ());
/* Restore the state back to the state from the prologue,
@@ -28568,6 +28824,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
clobber_reg (&use, gen_rtx_REG (mode, regno));
}
+
+ /* Set here, but it may get cleared later. */
+ if (TARGET_OUTLINE_MSABI_XLOGUES)
+ cfun->machine->outline_ms_sysv = true;
}
if (vec_len > 1)
--
2.9.0
next prev parent reply other threads:[~2016-11-23 5:16 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-11-23 5:11 [PATCH v2 0/9] Add optimization -moutline-msabi-xlougues (for Wine 64) Daniel Santos
2016-11-23 5:16 ` [PATCH 7/9] Add patterns and predicates foutline-msabi-xlouges Daniel Santos
2016-11-23 5:16 ` [PATCH 2/9] Minor refactor in ix86_compute_frame_layout Daniel Santos
2016-11-23 5:16 ` Daniel Santos [this message]
2016-11-23 5:16 ` [PATCH 8/9] Add msabi pro/epilogue stubs to libgcc Daniel Santos
2016-11-23 5:16 ` [PATCH 3/9] Add option -moutline-msabi-xlogues Daniel Santos
2016-11-25 23:51 ` Sandra Loosemore
2016-11-26 1:27 ` Daniel Santos
2016-11-23 5:16 ` [PATCH 6/9] Modify ix86_compute_frame_layout for foutline-msabi-xlogues Daniel Santos
2016-11-23 5:16 ` [PATCH 1/9] Change type of x86_64_ms_sysv_extra_clobbered_registers Daniel Santos
2016-11-23 5:16 ` [PATCH 4/9] Adds class xlouge_layout and new fields to struct machine_function Daniel Santos
2016-11-23 5:16 ` [PATCH 5/9] Modify ix86_save_reg to optionally omit stub-managed registers Daniel Santos
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20161123051918.22517-9-daniel.santos@pobox.com \
--to=daniel.santos@pobox.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).