From: DJ Delorie <dj@redhat.com>
To: madvenka@linux.microsoft.com
Cc: libffi-discuss@sourceware.org
Subject: Re: [RFC PATCH v3 2/5] x86: Support for Static Trampolines
Date: Tue, 26 Jan 2021 22:31:24 -0500 [thread overview]
Message-ID: <xn4kj3hx83.fsf@greed.delorie.com> (raw)
In-Reply-To: <20210115184653.124913-3-madvenka@linux.microsoft.com>
madvenka@linux.microsoft.com writes:
> diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c
> index 39f9598..2a5cf5a 100644
> --- a/src/x86/ffi64.c
> +++ b/src/x86/ffi64.c
> @@ -713,7 +713,9 @@ ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
> #endif /* FFI_GO_CLOSURES */
>
> extern void ffi_closure_unix64(void) FFI_HIDDEN;
> +extern void ffi_closure_unix64_alt(void) FFI_HIDDEN;
> extern void ffi_closure_unix64_sse(void) FFI_HIDDEN;
> +extern void ffi_closure_unix64_sse_alt(void) FFI_HIDDEN;
Extern, but local to this port, yes?
> @@ -742,6 +744,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
> 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
> };
> void (*dest)(void);
> + void (*dest_alt)(void);
> char *tramp = closure->tramp;
Ok
> @@ -752,13 +755,28 @@ ffi_prep_closure_loc (ffi_closure* closure,
> return FFI_BAD_ABI;
>
> if (cif->flags & UNIX64_FLAG_XMM_ARGS)
> - dest = ffi_closure_unix64_sse;
> + {
> + dest = ffi_closure_unix64_sse;
> + dest_alt = ffi_closure_unix64_sse_alt;
> + }
> else
> - dest = ffi_closure_unix64;
> + {
> + dest = ffi_closure_unix64;
> + dest_alt = ffi_closure_unix64_alt;
> + }
>
> + if (ffi_tramp_is_present(closure))
> + {
> + /* Initialize the static trampoline's parameters. */
> + ffi_tramp_set_parms (closure->ftramp, dest_alt, closure);
> + goto out;
> + }
> +
> + /* Initialize the dynamic trampoline. */
Should these new APIs be inside #if FFI_EXEC_STATIC_TRAMP ?
> +#if defined(FFI_EXEC_STATIC_TRAMP)
> +void *
> +ffi_tramp_arch (size_t *tramp_size, size_t *map_size)
> +{
> + extern void *trampoline_code_table_cet;
> + extern void *trampoline_code_table;
> + extern int ffi_cet_present;
> +
> + *map_size = UNIX64_TRAMP_MAP_SIZE;
> + if (ffi_cet_present) {
> + *tramp_size = UNIX64_TRAMP_SIZE_CET;
> + return &trampoline_code_table_cet;
> + }
> + *tramp_size = UNIX64_TRAMP_SIZE;
> + return &trampoline_code_table;
> +}
> +#endif
Ok.
> diff --git a/src/x86/ffiw64.c b/src/x86/ffiw64.c
> index a43a9eb..df81d66 100644
> --- a/src/x86/ffiw64.c
> +++ b/src/x86/ffiw64.c
> @@ -187,6 +187,7 @@ EFI64(ffi_call_go)(ffi_cif *cif, void (*fn)(void), void *rvalue,
>
>
> extern void ffi_closure_win64(void) FFI_HIDDEN;
> +extern void ffi_closure_win64_alt(void) FFI_HIDDEN;
>
> #ifdef FFI_GO_CLOSURES
> extern void ffi_go_closure_win64(void) FFI_HIDDEN;
> @@ -220,9 +221,18 @@ EFI64(ffi_prep_closure_loc)(ffi_closure* closure,
> return FFI_BAD_ABI;
> }
>
> + if (ffi_tramp_is_present(closure))
> + {
> + /* Initialize the static trampoline's parameters. */
> + ffi_tramp_set_parms (closure->ftramp, ffi_closure_win64_alt, closure);
> + goto out;
> + }
> +
> + /* Initialize the dynamic trampoline. */
> memcpy (tramp, trampoline, sizeof(trampoline));
> *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)ffi_closure_win64;
>
> +out:
> closure->cif = cif;
> closure->fun = fun;
> closure->user_data = user_data;
Ok.
> diff --git a/src/x86/internal64.h b/src/x86/internal64.h
> index 512e955..410bdf2 100644
> --- a/src/x86/internal64.h
> +++ b/src/x86/internal64.h
> @@ -20,3 +20,14 @@
> #define UNIX64_FLAG_RET_IN_MEM (1 << 10)
> #define UNIX64_FLAG_XMM_ARGS (1 << 11)
> #define UNIX64_SIZE_SHIFT 12
> +
> +#if defined(FFI_EXEC_STATIC_TRAMP)
> +/*
> + * For the trampoline code table mapping, a mapping size of 4K (base page size)
> + * is chosen.
> + */
> +#define UNIX64_TRAMP_MAP_SHIFT 12
> +#define UNIX64_TRAMP_MAP_SIZE (1 << UNIX64_TRAMP_MAP_SHIFT)
> +#define UNIX64_TRAMP_SIZE_CET 40
> +#define UNIX64_TRAMP_SIZE 32
> +#endif
Ok.
> diff --git a/src/x86/unix64.S b/src/x86/unix64.S
> index 89d7db1..e26ea2c 100644
> --- a/src/x86/unix64.S
> +++ b/src/x86/unix64.S
> @@ -63,6 +63,7 @@
> C(ffi_call_unix64):
> L(UW0):
> _CET_ENDBR
> +L(endbr):
This hack to detect CET should be replaced by the logic in ffitarget.h,
or add a #define CET_ENABLED to ffitarget.h
> @@ -270,6 +271,17 @@ L(UW6):
> L(UW7):
> ENDF(C(ffi_closure_unix64_sse))
>
> + .balign 2
> + .globl C(ffi_closure_unix64_sse_alt)
> + FFI_HIDDEN(C(ffi_closure_unix64_sse_alt))
> +
> +C(ffi_closure_unix64_sse_alt):
> + _CET_ENDBR
> + movq 8(%rsp), %r10
> + addq $16, %rsp
Copies first argument to %r10, discards return address and arg - closure
will return to whoever called it's caller. I'm not sure how this works,
which means *at least* a comment needs to be here ;-)
> + jmp C(ffi_closure_unix64_sse)
> +ENDF(C(ffi_closure_unix64_sse_alt))
> +
> .balign 2
> .globl C(ffi_closure_unix64)
> FFI_HIDDEN(C(ffi_closure_unix64))
> @@ -400,6 +412,17 @@ L(la): call PLT(C(abort))
> L(UW11):
> ENDF(C(ffi_closure_unix64))
>
> + .balign 8
> + .globl C(ffi_closure_unix64_alt)
> + FFI_HIDDEN(C(ffi_closure_unix64_alt))
> +
> +C(ffi_closure_unix64_alt):
> + _CET_ENDBR
> + movq 8(%rsp), %r10
> + addq $16, %rsp
> + jmp C(ffi_closure_unix64)
> + ENDF(C(ffi_closure_unix64_alt))
> +
> .balign 2
> .globl C(ffi_go_closure_unix64_sse)
> FFI_HIDDEN(C(ffi_go_closure_unix64_sse))
Likewise.
> +/*
> + * The trampoline uses register r10. It saves the original value of r10 on
> + * the stack.
> + *
> + * The trampoline has two parameters - target code to jump to and data for
> + * the target code. The trampoline extracts the parameters from its parameter
> + * block (see tramp_table_map()). The trampoline saves the data address on
> + * the stack. Finally, it jumps to the target code.
> + *
> + * The target code can choose to:
> + *
> + * - restore the value of r10
> + * - load the data address in a register
> + * - restore the stack pointer to what it was when the trampoline was invoked.
> + */
> +
> + .align UNIX64_TRAMP_MAP_SIZE
> + .globl trampoline_code_table_cet
> + FFI_HIDDEN(C(trampoline_code_table_cet))
> +
> +C(trampoline_code_table_cet):
> + .rept UNIX64_TRAMP_MAP_SIZE / UNIX64_TRAMP_SIZE_CET
> + _CET_ENDBR
> + subq $16, %rsp /* Make space on the stack */
> + movq %r10, (%rsp) /* Save %r10 on stack */
> + movq 4077(%rip), %r10 /* Copy data into %r10 */
> + movq %r10, 8(%rsp) /* Save data on stack */
> + movq 4073(%rip), %r10 /* Copy code into %r10 */
> + jmp *%r10 /* Jump to code */
> + nop
> + nop
> + nop
> + nop
> + nop
> + nop
> + .endr
> +ENDF(C(trampoline_code_table_cet))
> +
> + .align UNIX64_TRAMP_MAP_SIZE
> + .globl trampoline_code_table
> + FFI_HIDDEN(C(trampoline_code_table))
> +
> +C(trampoline_code_table):
> + .rept UNIX64_TRAMP_MAP_SIZE / UNIX64_TRAMP_SIZE
> + subq $16, %rsp /* Make space on the stack */
> + movq %r10, (%rsp) /* Save %r10 on stack */
> + movq 4081(%rip), %r10 /* Copy data into %r10 */
> + movq %r10, 8(%rsp) /* Save data on stack */
> + movq 4077(%rip), %r10 /* Copy code into %r10 */
> + jmp *%r10 /* Jump to code */
> + nop
> + nop
> + .endr
> +ENDF(C(trampoline_code_table))
> + .align UNIX64_TRAMP_MAP_SIZE
> +#endif /* FFI_EXEC_STATIC_TRAMP */
Why does the longer trampoline (with endbr) have *more* nops? Is it for
8-byte alignment? If so, comment ;-)
> @@ -615,6 +712,13 @@ L(EFDE5):
> .quad 0
> #endif
>
> + .section .rodata
> + .align 8
> + .globl ffi_cet_present
> +ffi_cet_present:
> + .set L6,L(endbr)-L(UW0)
> + .int L6
> +
Again, there are preprocessor directives that do this better.
> diff --git a/src/x86/win64.S b/src/x86/win64.S
> index 8315e8b..6ca3068 100644
> --- a/src/x86/win64.S
> +++ b/src/x86/win64.S
> @@ -234,6 +234,18 @@ C(ffi_closure_win64):
>
> cfi_endproc
> SEH(.seh_endproc)
> +
> + .align 8
> + .globl C(ffi_closure_win64_alt)
> + FFI_HIDDEN(C(ffi_closure_win64_alt))
> +
> + SEH(.seh_proc ffi_closure_win64_alt)
> +C(ffi_closure_win64_alt):
> + _CET_ENDBR
> + movq 8(%rsp), %r10
> + addq $16, %rsp
> + jmp C(ffi_closure_win64)
> + SEH(.seh_endproc)
> #endif /* __x86_64__ */
Ok.
next prev parent reply other threads:[~2021-01-27 3:31 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <1ef5c7e1c9a6ebb140a476ba555ec955681f4fba>
2021-01-15 18:46 ` [RFC PATCH v3 0/5] Libffi " madvenka
2021-01-15 18:46 ` [RFC PATCH v3 1/5] " madvenka
2021-01-27 3:31 ` DJ Delorie
2021-01-27 21:51 ` Madhavan T. Venkataraman
2021-01-27 22:15 ` DJ Delorie
2021-01-27 22:43 ` Madhavan T. Venkataraman
2021-01-15 18:46 ` [RFC PATCH v3 2/5] x86: Support for " madvenka
2021-01-27 3:31 ` DJ Delorie [this message]
2021-01-28 21:59 ` Madhavan T. Venkataraman
2021-01-28 22:17 ` DJ Delorie
2021-01-28 23:25 ` Madhavan T. Venkataraman
2021-01-29 2:09 ` DJ Delorie
2021-01-29 2:38 ` Madhavan T. Venkataraman
2021-01-29 2:48 ` DJ Delorie
2021-01-29 3:24 ` Madhavan T. Venkataraman
2021-01-29 6:07 ` DJ Delorie
2021-02-01 19:46 ` DJ Delorie
2021-01-15 18:46 ` [RFC PATCH v3 3/5] i386: " madvenka
2021-01-15 18:46 ` [RFC PATCH v3 4/5] arm64: " madvenka
2021-01-15 18:46 ` [RFC PATCH v3 5/5] arm: " madvenka
2021-01-26 23:41 ` [RFC PATCH v3 0/5] Libffi " Anthony Green
2021-01-27 17:20 ` Madhavan T. Venkataraman
2021-01-27 18:00 ` Anthony Green
2021-01-27 19:45 ` Madhavan T. Venkataraman
2021-01-28 14:21 ` Anthony Green
2021-01-28 17:01 ` Madhavan T. Venkataraman
2021-02-05 18:20 ` Madhavan T. Venkataraman
2021-02-05 18:46 ` Anthony Green
2021-02-05 19:38 ` Madhavan T. Venkataraman
2021-02-07 16:09 ` Madhavan T. Venkataraman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=xn4kj3hx83.fsf@greed.delorie.com \
--to=dj@redhat.com \
--cc=libffi-discuss@sourceware.org \
--cc=madvenka@linux.microsoft.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).