From: madvenka@linux.microsoft.com
To: libffi-discuss@sourceware.org
Cc: green@moxielogic.com, fweimer@redhat.com, dj@redhat.com,
madvenka@linux.microsoft.com
Subject: [RFC PATCH v3 3/5] i386: Support for Static Trampolines
Date: Fri, 15 Jan 2021 12:46:51 -0600 [thread overview]
Message-ID: <20210115184653.124913-4-madvenka@linux.microsoft.com> (raw)
In-Reply-To: <20210115184653.124913-1-madvenka@linux.microsoft.com>
From: "Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>
- Define the arch-specific initialization function ffi_tramp_arch ()
that returns trampoline size information to common code.
- Define the trampoline code table statically. Define two tables,
actually, one with CET and one without.
- Define the trampoline code table statically.
- Introduce a tiny prolog for each ABI handling function. The ABI
handlers addressed are:
- ffi_closure_i386
- ffi_closure_STDCALL
- ffi_closure_REGISTER
The prolog functions are called:
- ffi_closure_i386_alt
- ffi_closure_STDCALL_alt
- ffi_closure_REGISTER_alt
The legacy trampoline jumps to the ABI handler. The static
trampoline jumps to the prolog function. The prolog function uses
the information provided by the static trampoline, sets things up
for the ABI handler and then jumps to the ABI handler.
- Call ffi_closure_tramp_init () in ffi_prep_closure_loc () to
initialize static trampoline parameters.
Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
---
src/x86/ffi.c | 35 ++++++++++++++
src/x86/internal.h | 11 +++++
src/x86/sysv.S | 116 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 162 insertions(+)
diff --git a/src/x86/ffi.c b/src/x86/ffi.c
index 5f7fd81..dced0c8 100644
--- a/src/x86/ffi.c
+++ b/src/x86/ffi.c
@@ -409,8 +409,11 @@ ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
/** private members **/
void FFI_HIDDEN ffi_closure_i386(void);
+void FFI_HIDDEN ffi_closure_i386_alt(void);
void FFI_HIDDEN ffi_closure_STDCALL(void);
+void FFI_HIDDEN ffi_closure_STDCALL_alt(void);
void FFI_HIDDEN ffi_closure_REGISTER(void);
+void FFI_HIDDEN ffi_closure_REGISTER_alt(void);
struct closure_frame
{
@@ -537,6 +540,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
{
char *tramp = closure->tramp;
void (*dest)(void);
+ void (*dest_alt)(void);
int op = 0xb8; /* movl imm, %eax */
switch (cif->abi)
@@ -546,19 +550,30 @@ ffi_prep_closure_loc (ffi_closure* closure,
case FFI_FASTCALL:
case FFI_MS_CDECL:
dest = ffi_closure_i386;
+ dest_alt = ffi_closure_i386_alt;
break;
case FFI_STDCALL:
case FFI_PASCAL:
dest = ffi_closure_STDCALL;
+ dest_alt = ffi_closure_STDCALL_alt;
break;
case FFI_REGISTER:
dest = ffi_closure_REGISTER;
+ dest_alt = ffi_closure_REGISTER_alt;
op = 0x68; /* pushl imm */
break;
default:
return FFI_BAD_ABI;
}
+ if (ffi_tramp_is_present(closure))
+ {
+ /* Initialize the static trampoline's parameters. */
+ ffi_tramp_set_parms (closure->ftramp, dest_alt, closure);
+ goto out;
+ }
+
+ /* Initialize the dynamic trampoline. */
/* endbr32. */
*(UINT32 *) tramp = 0xfb1e0ff3;
@@ -570,6 +585,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
tramp[9] = 0xe9;
*(unsigned *)(tramp + 10) = (unsigned)dest - ((unsigned)codeloc + 14);
+out:
closure->cif = cif;
closure->fun = fun;
closure->user_data = user_data;
@@ -767,4 +783,23 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue)
ffi_call_i386 (frame, stack);
}
#endif /* !FFI_NO_RAW_API */
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+void *
+ffi_tramp_arch (size_t *tramp_size, size_t *map_size)
+{
+ extern void *trampoline_code_table_cet;
+ extern void *trampoline_code_table;
+ extern int ffi_cet_present;
+
+ *map_size = X86_TRAMP_MAP_SIZE;
+ if (ffi_cet_present) {
+ *tramp_size = X86_TRAMP_SIZE_CET;
+ return &trampoline_code_table_cet;
+ }
+ *tramp_size = X86_TRAMP_SIZE;
+ return &trampoline_code_table;
+}
+#endif
+
#endif /* __i386__ */
diff --git a/src/x86/internal.h b/src/x86/internal.h
index 09771ba..f648623 100644
--- a/src/x86/internal.h
+++ b/src/x86/internal.h
@@ -27,3 +27,14 @@
#else
# define HAVE_FASTCALL 1
#endif
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+/*
+ * For the trampoline code table mapping, a mapping size of 4K (base page size)
+ * is chosen.
+ */
+#define X86_TRAMP_MAP_SHIFT 12
+#define X86_TRAMP_MAP_SIZE (1 << X86_TRAMP_MAP_SHIFT)
+#define X86_TRAMP_SIZE_CET 44
+#define X86_TRAMP_SIZE 40
+#endif
diff --git a/src/x86/sysv.S b/src/x86/sysv.S
index d8ab4b0..09e9c4f 100644
--- a/src/x86/sysv.S
+++ b/src/x86/sysv.S
@@ -93,6 +93,7 @@ ffi_call_i386:
L(UW0):
# cfi_startproc
_CET_ENDBR
+L(endbr):
#if !HAVE_FASTCALL
movl 4(%esp), %ecx
movl 8(%esp), %edx
@@ -421,6 +422,16 @@ L(UW20):
# cfi_endproc
ENDF(C(ffi_closure_i386))
+ .balign 16
+ .globl C(ffi_closure_i386_alt)
+ FFI_HIDDEN(C(ffi_closure_i386_alt))
+C(ffi_closure_i386_alt):
+ _CET_ENDBR
+ movl 4(%esp), %eax
+ add $8, %esp
+ jmp C(ffi_closure_i386)
+ENDF(C(ffi_closure_i386_alt))
+
.balign 16
.globl C(ffi_go_closure_STDCALL)
FFI_HIDDEN(C(ffi_go_closure_STDCALL))
@@ -466,6 +477,16 @@ L(UW26):
# cfi_endproc
ENDF(C(ffi_closure_REGISTER))
+ .balign 16
+ .globl C(ffi_closure_REGISTER_alt)
+ FFI_HIDDEN(C(ffi_closure_REGISTER_alt))
+C(ffi_closure_REGISTER_alt):
+ _CET_ENDBR
+ movl (%esp), %eax
+ add $4, %esp
+ jmp C(ffi_closure_REGISTER)
+ENDF(C(ffi_closure_REGISTER_alt))
+
/* For STDCALL (and others), we need to pop N bytes of arguments off
the stack following the closure. The amount needing to be popped
is returned to us from ffi_closure_inner. */
@@ -573,6 +594,93 @@ L(UW31):
# cfi_endproc
ENDF(C(ffi_closure_STDCALL))
+ .balign 16
+ .globl C(ffi_closure_STDCALL_alt)
+ FFI_HIDDEN(C(ffi_closure_STDCALL_alt))
+C(ffi_closure_STDCALL_alt):
+ _CET_ENDBR
+ movl 4(%esp), %eax
+ add $8, %esp
+ jmp C(ffi_closure_STDCALL)
+ENDF(C(ffi_closure_STDCALL_alt))
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+/*
+ * Below is the definition of the trampoline code table. Each element in
+ * the code table is a trampoline.
+ *
+ * Because we jump to the trampoline, we place a _CET_ENDBR at the
+ * beginning of the trampoline to mark it as a valid branch target. This is
+ * part of the the Intel CET (Control Flow Enforcement Technology).
+ *
+ * If CET is present, _CET_ENDBR is defined as the endbr32 instruction. Else,
+ * _CET_ENDBR is defined empty. Consequently, the size of the trampoline and
+ * the PC-relative offsets in the trampoline code also differ. So, two versions
+ * of the code table have been defined - one with the endbr32 instruction and
+ * one without. ffi_tramp_arch() figures out which version of the code table
+ * should be used by looking at ffi_cet_present (defined at the end of this
+ * file).
+ */
+/*
+ * The trampoline uses register eax. It saves the original value of eax on
+ * the stack.
+ *
+ * The trampoline has two parameters - target code to jump to and data for
+ * the target code. The trampoline extracts the parameters from its parameter
+ * block (see tramp_table_map()). The trampoline saves the data address on
+ * the stack. Finally, it jumps to the target code.
+ *
+ * The target code can choose to:
+ *
+ * - restore the value of eax
+ * - load the data address in a register
+ * - restore the stack pointer to what it was when the trampoline was invoked.
+ */
+ .align X86_TRAMP_MAP_SIZE
+ .globl C(trampoline_code_table_cet)
+ FFI_HIDDEN(C(trampoline_code_table_cet))
+C(trampoline_code_table_cet):
+ .rept X86_TRAMP_MAP_SIZE / X86_TRAMP_SIZE_CET
+ _CET_ENDBR
+ sub $8, %esp
+ movl %eax, (%esp) /* Save %eax on stack */
+ call 1f /* Get next PC into %eax */
+ movl 4081(%eax), %eax /* Copy data into %eax */
+ movl %eax, 4(%esp) /* Save data on stack */
+ call 1f /* Get next PC into %eax */
+ movl 4070(%eax), %eax /* Copy data into %eax */
+ jmp *%eax /* Jump to code */
+1:
+ mov (%esp), %eax
+ ret
+ nop /* Pad to 4 byte boundary */
+ nop
+ .endr
+ENDF(C(trampoline_code_table_cet))
+
+ .align X86_TRAMP_MAP_SIZE
+ .globl C(trampoline_code_table)
+ FFI_HIDDEN(C(trampoline_code_table))
+C(trampoline_code_table):
+ .rept X86_TRAMP_MAP_SIZE / X86_TRAMP_SIZE
+ sub $8, %esp
+ movl %eax, (%esp) /* Save %eax on stack */
+ call 1f /* Get next PC into %eax */
+ movl 4085(%eax), %eax /* Copy data into %eax */
+ movl %eax, 4(%esp) /* Save data on stack */
+ call 1f /* Get next PC into %eax */
+ movl 4074(%eax), %eax /* Copy data into %eax */
+ jmp *%eax /* Jump to code */
+1:
+ mov (%esp), %eax
+ ret
+ nop /* Pad to 4 byte boundary */
+ nop
+ .endr
+ENDF(C(trampoline_code_table))
+ .align X86_TRAMP_MAP_SIZE
+#endif /* FFI_EXEC_STATIC_TRAMP */
+
#if !FFI_NO_RAW_API
#define raw_closure_S_FS (16+16+12)
@@ -1131,6 +1239,14 @@ L(EFDE9):
#endif /* __APPLE__ */
#endif /* ifndef _MSC_VER */
+
+ .section .rodata
+ .align 8
+ .globl ffi_cet_present
+ffi_cet_present:
+ .set L10,L(endbr)-L(UW0)
+ .int L10
+
#endif /* ifdef __i386__ */
#if defined __ELF__ && defined __linux__
--
2.25.1
next prev parent reply other threads:[~2021-01-15 18:47 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <1ef5c7e1c9a6ebb140a476ba555ec955681f4fba>
2021-01-15 18:46 ` [RFC PATCH v3 0/5] Libffi " madvenka
2021-01-15 18:46 ` [RFC PATCH v3 1/5] " madvenka
2021-01-27 3:31 ` DJ Delorie
2021-01-27 21:51 ` Madhavan T. Venkataraman
2021-01-27 22:15 ` DJ Delorie
2021-01-27 22:43 ` Madhavan T. Venkataraman
2021-01-15 18:46 ` [RFC PATCH v3 2/5] x86: Support for " madvenka
2021-01-27 3:31 ` DJ Delorie
2021-01-28 21:59 ` Madhavan T. Venkataraman
2021-01-28 22:17 ` DJ Delorie
2021-01-28 23:25 ` Madhavan T. Venkataraman
2021-01-29 2:09 ` DJ Delorie
2021-01-29 2:38 ` Madhavan T. Venkataraman
2021-01-29 2:48 ` DJ Delorie
2021-01-29 3:24 ` Madhavan T. Venkataraman
2021-01-29 6:07 ` DJ Delorie
2021-02-01 19:46 ` DJ Delorie
2021-01-15 18:46 ` madvenka [this message]
2021-01-15 18:46 ` [RFC PATCH v3 4/5] arm64: " madvenka
2021-01-15 18:46 ` [RFC PATCH v3 5/5] arm: " madvenka
2021-01-26 23:41 ` [RFC PATCH v3 0/5] Libffi " Anthony Green
2021-01-27 17:20 ` Madhavan T. Venkataraman
2021-01-27 18:00 ` Anthony Green
2021-01-27 19:45 ` Madhavan T. Venkataraman
2021-01-28 14:21 ` Anthony Green
2021-01-28 17:01 ` Madhavan T. Venkataraman
2021-02-05 18:20 ` Madhavan T. Venkataraman
2021-02-05 18:46 ` Anthony Green
2021-02-05 19:38 ` Madhavan T. Venkataraman
2021-02-07 16:09 ` Madhavan T. Venkataraman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210115184653.124913-4-madvenka@linux.microsoft.com \
--to=madvenka@linux.microsoft.com \
--cc=dj@redhat.com \
--cc=fweimer@redhat.com \
--cc=green@moxielogic.com \
--cc=libffi-discuss@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).