public inbox for libffi-discuss@sourceware.org
 help / color / mirror / Atom feed
From: madvenka@linux.microsoft.com
To: libffi-discuss@sourceware.org
Cc: fw@deneb.enyo.de, dj@redhat.com, madvenka@linux.microsoft.com
Subject: [RFC PATCH v1 2/4] x86: Support for Static Trampolines
Date: Tue, 24 Nov 2020 13:32:04 -0600	[thread overview]
Message-ID: <20201124193206.10289-3-madvenka@linux.microsoft.com> (raw)
In-Reply-To: <20201124193206.10289-1-madvenka@linux.microsoft.com>

From: "Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>

	- Define the arch-specific initialization function ffi_tramp_arch ()
	  that returns trampoline size information to common code.

	- Define the trampoline code and data mapping sizes.

	- Introduce a tiny amount of code at the beginning of each ABI
	  handler to retrieve the information saved by the trampoline on
	  stack.

	- Define the trampoline code table statically.

	- Call ffi_closure_tramp_init () to initialize static trampoline
	  parameters from ffi_prep_closure_loc ().

Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
---
 src/x86/ffi.c        | 17 +++++++++++++++
 src/x86/ffi64.c      | 19 ++++++++++++++--
 src/x86/ffiw64.c     |  8 ++++---
 src/x86/internal.h   | 10 +++++++++
 src/x86/internal64.h | 10 +++++++++
 src/x86/sysv.S       | 52 ++++++++++++++++++++++++++++++++++++++++++++
 src/x86/unix64.S     | 48 ++++++++++++++++++++++++++++++++++++++++
 src/x86/win64.S      |  4 ++++
 8 files changed, 163 insertions(+), 5 deletions(-)

diff --git a/src/x86/ffi.c b/src/x86/ffi.c
index 5f7fd81..3407fb8 100644
--- a/src/x86/ffi.c
+++ b/src/x86/ffi.c
@@ -559,6 +559,9 @@ ffi_prep_closure_loc (ffi_closure* closure,
       return FFI_BAD_ABI;
     }
 
+  if (ffi_closure_tramp_set_parms (closure, dest))
+    goto out;
+
   /* endbr32.  */
   *(UINT32 *) tramp = 0xfb1e0ff3;
 
@@ -570,6 +573,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
   tramp[9] = 0xe9;
   *(unsigned *)(tramp + 10) = (unsigned)dest - ((unsigned)codeloc + 14);
 
+out:
   closure->cif = cif;
   closure->fun = fun;
   closure->user_data = user_data;
@@ -767,4 +771,17 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue)
   ffi_call_i386 (frame, stack);
 }
 #endif /* !FFI_NO_RAW_API */
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+void *
+ffi_tramp_arch (size_t *tramp_size, size_t *map_size)
+{
+  extern void *trampoline_code_table;
+
+  *tramp_size = X86_TRAMP_SIZE;
+  *map_size = X86_TRAMP_MAP_SIZE;
+  return &trampoline_code_table;
+}
+#endif
+
 #endif /* __i386__ */
diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c
index 39f9598..fdef9b0 100644
--- a/src/x86/ffi64.c
+++ b/src/x86/ffi64.c
@@ -756,8 +756,11 @@ ffi_prep_closure_loc (ffi_closure* closure,
   else
     dest = ffi_closure_unix64;
 
-  memcpy (tramp, trampoline, sizeof(trampoline));
-  *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)dest;
+  if (!ffi_closure_tramp_set_parms (closure, dest))
+    {
+      memcpy (tramp, trampoline, sizeof(trampoline));
+      *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)dest;
+    }
 
   closure->cif = cif;
   closure->fun = fun;
@@ -892,4 +895,16 @@ ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
 
 #endif /* FFI_GO_CLOSURES */
 
+#if defined(FFI_EXEC_STATIC_TRAMP)
+void *
+ffi_tramp_arch (size_t *tramp_size, size_t *map_size)
+{
+  extern void *trampoline_code_table;
+
+  *tramp_size = UNIX64_TRAMP_SIZE;
+  *map_size = UNIX64_TRAMP_MAP_SIZE;
+  return &trampoline_code_table;
+}
+#endif
+
 #endif /* __x86_64__ */
diff --git a/src/x86/ffiw64.c b/src/x86/ffiw64.c
index a43a9eb..74cd092 100644
--- a/src/x86/ffiw64.c
+++ b/src/x86/ffiw64.c
@@ -185,7 +185,6 @@ EFI64(ffi_call_go)(ffi_cif *cif, void (*fn)(void), void *rvalue,
   ffi_call_int (cif, fn, rvalue, avalue, closure);
 }
 
-
 extern void ffi_closure_win64(void) FFI_HIDDEN;
 
 #ifdef FFI_GO_CLOSURES
@@ -220,8 +219,11 @@ EFI64(ffi_prep_closure_loc)(ffi_closure* closure,
       return FFI_BAD_ABI;
     }
 
-  memcpy (tramp, trampoline, sizeof(trampoline));
-  *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)ffi_closure_win64;
+  if (!ffi_closure_tramp_set_parms (closure, ffi_closure_win64))
+    {
+      memcpy (tramp, trampoline, sizeof(trampoline));
+      *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)ffi_closure_win64;
+    }
 
   closure->cif = cif;
   closure->fun = fun;
diff --git a/src/x86/internal.h b/src/x86/internal.h
index 09771ba..f782aad 100644
--- a/src/x86/internal.h
+++ b/src/x86/internal.h
@@ -27,3 +27,13 @@
 #else
 # define HAVE_FASTCALL 1
 #endif
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+/*
+ * For the trampoline code table mapping, a mapping size of 4K (base page size)
+ * is chosen.
+ */
+#define X86_TRAMP_MAP_SHIFT	12
+#define X86_TRAMP_MAP_SIZE	(1 << X86_TRAMP_MAP_SHIFT)
+#define X86_TRAMP_SIZE		44
+#endif
diff --git a/src/x86/internal64.h b/src/x86/internal64.h
index 512e955..272b914 100644
--- a/src/x86/internal64.h
+++ b/src/x86/internal64.h
@@ -20,3 +20,13 @@
 #define UNIX64_FLAG_RET_IN_MEM	(1 << 10)
 #define UNIX64_FLAG_XMM_ARGS	(1 << 11)
 #define UNIX64_SIZE_SHIFT	12
+
+#if defined(FFI_EXEC_STATIC_TRAMP)
+/*
+ * For the trampoline code table mapping, a mapping size of 4K (base page size)
+ * is chosen.
+ */
+#define UNIX64_TRAMP_MAP_SHIFT	12
+#define UNIX64_TRAMP_MAP_SIZE	(1 << UNIX64_TRAMP_MAP_SHIFT)
+#define UNIX64_TRAMP_SIZE	40
+#endif
diff --git a/src/x86/sysv.S b/src/x86/sysv.S
index d8ab4b0..a1d499d 100644
--- a/src/x86/sysv.S
+++ b/src/x86/sysv.S
@@ -344,6 +344,10 @@ C(ffi_closure_i386):
 L(UW12):
 	# cfi_startproc
 	_CET_ENDBR
+#ifdef FFI_EXEC_STATIC_TRAMP
+	movl	4(%esp), %eax
+	add	$8, %esp
+#endif
 	subl	$closure_FS, %esp
 L(UW13):
 	# cfi_def_cfa_offset(closure_FS + 4)
@@ -454,6 +458,10 @@ L(UW24):
 	# cfi_def_cfa(%esp, 8)
 	# cfi_offset(%eip, -8)
 	_CET_ENDBR
+#ifdef FFI_EXEC_STATIC_TRAMP
+	movl	(%esp), %eax
+	add	$4, %esp
+#endif
 	subl	$closure_FS-4, %esp
 L(UW25):
 	# cfi_def_cfa_offset(closure_FS + 4)
@@ -477,6 +485,10 @@ C(ffi_closure_STDCALL):
 L(UW27):
 	# cfi_startproc
 	_CET_ENDBR
+#ifdef FFI_EXEC_STATIC_TRAMP
+	movl	4(%esp), %eax
+	add	$8, %esp
+#endif
 	subl	$closure_FS, %esp
 L(UW28):
 	# cfi_def_cfa_offset(closure_FS + 4)
@@ -573,6 +585,46 @@ L(UW31):
 	# cfi_endproc
 ENDF(C(ffi_closure_STDCALL))
 
+#if defined(FFI_EXEC_STATIC_TRAMP)
+/*
+ * The trampoline uses register eax.  It saves the original value of eax on
+ * the stack.
+ *
+ * The trampoline has two parameters - target code to jump to and data for
+ * the target code. The trampoline extracts the parameters from its parameter
+ * block (see tramp_table_map()). The trampoline saves the data address on
+ * the stack. Finally, it jumps to the target code.
+ *
+ * The target code can choose to:
+ *
+ * - restore the value of eax
+ * - load the data address in a register
+ * - restore the stack pointer to what it was when the trampoline was invoked.
+ */
+	.align	X86_TRAMP_MAP_SIZE
+	.globl	C(trampoline_code_table)
+	FFI_HIDDEN(C(trampoline_code_table))
+C(trampoline_code_table):
+	.rept	X86_TRAMP_MAP_SIZE / X86_TRAMP_SIZE
+	endbr32
+	sub	$8, %esp
+	movl	%eax, (%esp)		/* Save %eax on stack */
+	call	1f			/* Get next PC into %eax */
+	movl	4081(%eax), %eax	/* Copy data into %eax */
+	movl	%eax, 4(%esp)		/* Save data on stack */
+	call	1f			/* Get next PC into %eax */
+	movl	4070(%eax), %eax	/* Copy data into %eax */
+	jmp	*%eax			/* Jump to code */
+1:
+	mov	(%esp), %eax
+	ret
+	nop				/* Pad to 4 byte boundary */
+	nop
+	.endr
+ENDF(C(trampoline_code_table))
+	.align	X86_TRAMP_MAP_SIZE
+#endif /* FFI_EXEC_STATIC_TRAMP */
+
 #if !FFI_NO_RAW_API
 
 #define raw_closure_S_FS	(16+16+12)
diff --git a/src/x86/unix64.S b/src/x86/unix64.S
index 89d7db1..c340315 100644
--- a/src/x86/unix64.S
+++ b/src/x86/unix64.S
@@ -253,6 +253,10 @@ ENDF(C(ffi_call_unix64))
 C(ffi_closure_unix64_sse):
 L(UW5):
 	_CET_ENDBR
+#ifdef FFI_EXEC_STATIC_TRAMP
+	movq	8(%rsp), %r10
+	addq	$16, %rsp
+#endif
 	subq	$ffi_closure_FS, %rsp
 L(UW6):
 	/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -277,6 +281,10 @@ ENDF(C(ffi_closure_unix64_sse))
 C(ffi_closure_unix64):
 L(UW8):
 	_CET_ENDBR
+#ifdef FFI_EXEC_STATIC_TRAMP
+	movq	8(%rsp), %r10
+	addq	$16, %rsp
+#endif
 	subq	$ffi_closure_FS, %rsp
 L(UW9):
 	/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -456,6 +464,46 @@ L(sse_entry2):
 L(UW17):
 ENDF(C(ffi_go_closure_unix64))
 
+#if defined(FFI_EXEC_STATIC_TRAMP)
+/*
+ * The trampoline uses register r10. It saves the original value of r10 on
+ * the stack.
+ *
+ * The trampoline has two parameters - target code to jump to and data for
+ * the target code. The trampoline extracts the parameters from its parameter
+ * block (see tramp_table_map()). The trampoline saves the data address on
+ * the stack. Finally, it jumps to the target code.
+ *
+ * The target code can choose to:
+ *
+ * - restore the value of r10
+ * - load the data address in a register
+ * - restore the stack pointer to what it was when the trampoline was invoked.
+ */
+	.align	UNIX64_TRAMP_MAP_SIZE
+	.globl	trampoline_code_table
+	FFI_HIDDEN(C(trampoline_code_table))
+
+C(trampoline_code_table):
+	.rept	UNIX64_TRAMP_MAP_SIZE / UNIX64_TRAMP_SIZE
+	endbr64
+	subq	$16, %rsp		/* Make space on the stack */
+	movq	%r10, (%rsp)		/* Save %r10 on stack */
+	movq	4077(%rip), %r10	/* Copy data into %r10 */
+	movq	%r10, 8(%rsp)		/* Save data on stack */
+	movq	4073(%rip), %r10	/* Copy code into %r10 */
+	jmp	*%r10			/* Jump to code */
+	nop				/* Pad to 8 byte boundary */
+	nop
+	nop
+	nop
+	nop
+	nop
+	.endr
+ENDF(C(trampoline_code_table))
+	.align	UNIX64_TRAMP_MAP_SIZE
+#endif /* FFI_EXEC_STATIC_TRAMP */
+
 /* Sadly, OSX cctools-as doesn't understand .cfi directives at all.  */
 
 #ifdef __APPLE__
diff --git a/src/x86/win64.S b/src/x86/win64.S
index 8315e8b..5db607c 100644
--- a/src/x86/win64.S
+++ b/src/x86/win64.S
@@ -200,6 +200,10 @@ C(ffi_go_closure_win64):
 C(ffi_closure_win64):
 	cfi_startproc
 	_CET_ENDBR
+#ifdef FFI_EXEC_STATIC_TRAMP
+	movq	8(%rsp), %r10
+	addq	$16, %rsp
+#endif
 	/* Save all integer arguments into the incoming reg stack space.  */
 	movq	%rcx, 8(%rsp)
 	movq	%rdx, 16(%rsp)
-- 
2.25.1


  parent reply	other threads:[~2020-11-24 19:32 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <9bd94fd78a3c8f638b8a0d2269258da99d58e70f>
2020-11-24 19:32 ` [RFC PATCH v1 0/4] Libffi " madvenka
2020-11-24 19:32   ` [RFC PATCH v1 1/4] " madvenka
2020-11-24 19:49     ` Anthony Green
2020-11-24 20:02       ` Madhavan T. Venkataraman
2020-12-02 16:49       ` Madhavan T. Venkataraman
2020-12-02 18:14         ` Anthony Green
2020-12-02 21:33           ` Madhavan T. Venkataraman
2020-12-03 18:45             ` Madhavan T. Venkataraman
2020-12-05  2:38               ` [RFC PATCH v1 1/4] Static Trampolines - Quick question Madhavan T. Venkataraman
2020-11-24 19:32   ` madvenka [this message]
2020-11-24 19:32   ` [RFC PATCH v1 3/4] aarch64: Support for Static Trampolines madvenka
2020-11-24 19:32   ` [RFC PATCH v1 4/4] arm: " madvenka

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201124193206.10289-3-madvenka@linux.microsoft.com \
    --to=madvenka@linux.microsoft.com \
    --cc=dj@redhat.com \
    --cc=fw@deneb.enyo.de \
    --cc=libffi-discuss@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).