public inbox for libffi-discuss@sourceware.org
 help / color / mirror / Atom feed
From: DJ Delorie <dj@redhat.com>
To: madvenka@linux.microsoft.com
Cc: libffi-discuss@sourceware.org
Subject: Re: [RFC PATCH v3 2/5] x86: Support for Static Trampolines
Date: Tue, 26 Jan 2021 22:31:24 -0500	[thread overview]
Message-ID: <xn4kj3hx83.fsf@greed.delorie.com> (raw)
In-Reply-To: <20210115184653.124913-3-madvenka@linux.microsoft.com>


madvenka@linux.microsoft.com writes:

> diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c
> index 39f9598..2a5cf5a 100644
> --- a/src/x86/ffi64.c
> +++ b/src/x86/ffi64.c
> @@ -713,7 +713,9 @@ ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
>  #endif /* FFI_GO_CLOSURES */
>  
>  extern void ffi_closure_unix64(void) FFI_HIDDEN;
> +extern void ffi_closure_unix64_alt(void) FFI_HIDDEN;
>  extern void ffi_closure_unix64_sse(void) FFI_HIDDEN;
> +extern void ffi_closure_unix64_sse_alt(void) FFI_HIDDEN;

Extern, but local to this port, yes?

> @@ -742,6 +744,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
>      0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
>    };
>    void (*dest)(void);
> +  void (*dest_alt)(void);
>    char *tramp = closure->tramp;

Ok

> @@ -752,13 +755,28 @@ ffi_prep_closure_loc (ffi_closure* closure,
>      return FFI_BAD_ABI;
>  
>    if (cif->flags & UNIX64_FLAG_XMM_ARGS)
> -    dest = ffi_closure_unix64_sse;
> +    {
> +      dest = ffi_closure_unix64_sse;
> +      dest_alt = ffi_closure_unix64_sse_alt;
> +    }
>    else
> -    dest = ffi_closure_unix64;
> +    {
> +      dest = ffi_closure_unix64;
> +      dest_alt = ffi_closure_unix64_alt;
> +    }
>  
> +  if (ffi_tramp_is_present(closure))
> +    {
> +      /* Initialize the static trampoline's parameters. */
> +      ffi_tramp_set_parms (closure->ftramp, dest_alt, closure);
> +      goto out;
> +    }
> +
> +  /* Initialize the dynamic trampoline. */

Should these new APIs be inside #if FFI_EXEC_STATIC_TRAMP ?

> +#if defined(FFI_EXEC_STATIC_TRAMP)
> +void *
> +ffi_tramp_arch (size_t *tramp_size, size_t *map_size)
> +{
> +  extern void *trampoline_code_table_cet;
> +  extern void *trampoline_code_table;
> +  extern int ffi_cet_present;
> +
> +  *map_size = UNIX64_TRAMP_MAP_SIZE;
> +  if (ffi_cet_present) {
> +    *tramp_size = UNIX64_TRAMP_SIZE_CET;
> +    return &trampoline_code_table_cet;
> +  }
> +  *tramp_size = UNIX64_TRAMP_SIZE;
> +  return &trampoline_code_table;
> +}
> +#endif

Ok.

> diff --git a/src/x86/ffiw64.c b/src/x86/ffiw64.c
> index a43a9eb..df81d66 100644
> --- a/src/x86/ffiw64.c
> +++ b/src/x86/ffiw64.c
> @@ -187,6 +187,7 @@ EFI64(ffi_call_go)(ffi_cif *cif, void (*fn)(void), void *rvalue,
>  
>  
>  extern void ffi_closure_win64(void) FFI_HIDDEN;
> +extern void ffi_closure_win64_alt(void) FFI_HIDDEN;
>  
>  #ifdef FFI_GO_CLOSURES
>  extern void ffi_go_closure_win64(void) FFI_HIDDEN;
> @@ -220,9 +221,18 @@ EFI64(ffi_prep_closure_loc)(ffi_closure* closure,
>        return FFI_BAD_ABI;
>      }
>  
> +  if (ffi_tramp_is_present(closure))
> +    {
> +      /* Initialize the static trampoline's parameters. */
> +      ffi_tramp_set_parms (closure->ftramp, ffi_closure_win64_alt, closure);
> +      goto out;
> +    }
> +
> +  /* Initialize the dynamic trampoline. */
>    memcpy (tramp, trampoline, sizeof(trampoline));
>    *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)ffi_closure_win64;
>  
> +out:
>    closure->cif = cif;
>    closure->fun = fun;
>    closure->user_data = user_data;

Ok.

> diff --git a/src/x86/internal64.h b/src/x86/internal64.h
> index 512e955..410bdf2 100644
> --- a/src/x86/internal64.h
> +++ b/src/x86/internal64.h
> @@ -20,3 +20,14 @@
>  #define UNIX64_FLAG_RET_IN_MEM	(1 << 10)
>  #define UNIX64_FLAG_XMM_ARGS	(1 << 11)
>  #define UNIX64_SIZE_SHIFT	12
> +
> +#if defined(FFI_EXEC_STATIC_TRAMP)
> +/*
> + * For the trampoline code table mapping, a mapping size of 4K (base page size)
> + * is chosen.
> + */
> +#define UNIX64_TRAMP_MAP_SHIFT	12
> +#define UNIX64_TRAMP_MAP_SIZE	(1 << UNIX64_TRAMP_MAP_SHIFT)
> +#define UNIX64_TRAMP_SIZE_CET	40
> +#define UNIX64_TRAMP_SIZE	32
> +#endif

Ok.

> diff --git a/src/x86/unix64.S b/src/x86/unix64.S
> index 89d7db1..e26ea2c 100644
> --- a/src/x86/unix64.S
> +++ b/src/x86/unix64.S
> @@ -63,6 +63,7 @@
>  C(ffi_call_unix64):
>  L(UW0):
>  	_CET_ENDBR
> +L(endbr):

This hack to detect CET should be replaced by the logic in ffitarget.h,
or add a #define CET_ENABLED to ffitarget.h

> @@ -270,6 +271,17 @@ L(UW6):
>  L(UW7):
>  ENDF(C(ffi_closure_unix64_sse))
>  
> +	.balign	2
> +	.globl	C(ffi_closure_unix64_sse_alt)
> +	FFI_HIDDEN(C(ffi_closure_unix64_sse_alt))
> +
> +C(ffi_closure_unix64_sse_alt):
> +	_CET_ENDBR
> +	movq	8(%rsp), %r10
> +	addq	$16, %rsp

Copies first argument to %r10, discards return address and arg - closure
will return to whoever called it's caller.  I'm not sure how this works,
which means *at least* a comment needs to be here ;-)

> +	jmp	C(ffi_closure_unix64_sse)
> +ENDF(C(ffi_closure_unix64_sse_alt))
> +
>  	.balign	2
>  	.globl	C(ffi_closure_unix64)
>  	FFI_HIDDEN(C(ffi_closure_unix64))
> @@ -400,6 +412,17 @@ L(la):	call	PLT(C(abort))
>  L(UW11):
>  ENDF(C(ffi_closure_unix64))
>  
> +	.balign	8
> +	.globl	C(ffi_closure_unix64_alt)
> +	FFI_HIDDEN(C(ffi_closure_unix64_alt))
> +
> +C(ffi_closure_unix64_alt):
> +	_CET_ENDBR
> +	movq	8(%rsp), %r10
> +	addq	$16, %rsp
> +	jmp	C(ffi_closure_unix64)
> +	ENDF(C(ffi_closure_unix64_alt))
> +
>  	.balign	2
>  	.globl	C(ffi_go_closure_unix64_sse)
>  	FFI_HIDDEN(C(ffi_go_closure_unix64_sse))

Likewise.

> +/*
> + * The trampoline uses register r10. It saves the original value of r10 on
> + * the stack.
> + *
> + * The trampoline has two parameters - target code to jump to and data for
> + * the target code. The trampoline extracts the parameters from its parameter
> + * block (see tramp_table_map()). The trampoline saves the data address on
> + * the stack. Finally, it jumps to the target code.
> + *
> + * The target code can choose to:
> + *
> + * - restore the value of r10
> + * - load the data address in a register
> + * - restore the stack pointer to what it was when the trampoline was invoked.
> + */
> +
> +	.align	UNIX64_TRAMP_MAP_SIZE
> +	.globl	trampoline_code_table_cet
> +	FFI_HIDDEN(C(trampoline_code_table_cet))
> +
> +C(trampoline_code_table_cet):
> +	.rept	UNIX64_TRAMP_MAP_SIZE / UNIX64_TRAMP_SIZE_CET
> +	_CET_ENDBR
> +	subq	$16, %rsp		/* Make space on the stack */
> +	movq	%r10, (%rsp)		/* Save %r10 on stack */
> +	movq	4077(%rip), %r10	/* Copy data into %r10 */
> +	movq	%r10, 8(%rsp)		/* Save data on stack */
> +	movq	4073(%rip), %r10	/* Copy code into %r10 */
> +	jmp	*%r10			/* Jump to code */
> +	nop
> +	nop
> +	nop
> +	nop
> +	nop
> +	nop
> +	.endr
> +ENDF(C(trampoline_code_table_cet))
> +
> +	.align	UNIX64_TRAMP_MAP_SIZE
> +	.globl	trampoline_code_table
> +	FFI_HIDDEN(C(trampoline_code_table))
> +
> +C(trampoline_code_table):
> +	.rept	UNIX64_TRAMP_MAP_SIZE / UNIX64_TRAMP_SIZE
> +	subq	$16, %rsp		/* Make space on the stack */
> +	movq	%r10, (%rsp)		/* Save %r10 on stack */
> +	movq	4081(%rip), %r10	/* Copy data into %r10 */
> +	movq	%r10, 8(%rsp)		/* Save data on stack */
> +	movq	4077(%rip), %r10	/* Copy code into %r10 */
> +	jmp	*%r10			/* Jump to code */
> +	nop
> +	nop
> +	.endr
> +ENDF(C(trampoline_code_table))
> +	.align	UNIX64_TRAMP_MAP_SIZE
> +#endif /* FFI_EXEC_STATIC_TRAMP */

Why does the longer trampoline (with endbr) have *more* nops?  Is it for
8-byte alignment?  If so, comment ;-)

> @@ -615,6 +712,13 @@ L(EFDE5):
>  	.quad    0
>  #endif
>  
> +	.section .rodata
> +	.align 8
> +	.globl ffi_cet_present
> +ffi_cet_present:
> +	.set	L6,L(endbr)-L(UW0)
> +	.int	L6
> +

Again, there are preprocessor directives that do this better.

> diff --git a/src/x86/win64.S b/src/x86/win64.S
> index 8315e8b..6ca3068 100644
> --- a/src/x86/win64.S
> +++ b/src/x86/win64.S
> @@ -234,6 +234,18 @@ C(ffi_closure_win64):
>  
>  	cfi_endproc
>  	SEH(.seh_endproc)
> +
> +	.align	8
> +	.globl	C(ffi_closure_win64_alt)
> +	FFI_HIDDEN(C(ffi_closure_win64_alt))
> +
> +	SEH(.seh_proc ffi_closure_win64_alt)
> +C(ffi_closure_win64_alt):
> +	_CET_ENDBR
> +	movq	8(%rsp), %r10
> +	addq	$16, %rsp
> +	jmp	C(ffi_closure_win64)
> +	SEH(.seh_endproc)
>  #endif /* __x86_64__ */

Ok.


  reply	other threads:[~2021-01-27  3:31 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1ef5c7e1c9a6ebb140a476ba555ec955681f4fba>
2021-01-15 18:46 ` [RFC PATCH v3 0/5] Libffi " madvenka
2021-01-15 18:46   ` [RFC PATCH v3 1/5] " madvenka
2021-01-27  3:31     ` DJ Delorie
2021-01-27 21:51       ` Madhavan T. Venkataraman
2021-01-27 22:15         ` DJ Delorie
2021-01-27 22:43           ` Madhavan T. Venkataraman
2021-01-15 18:46   ` [RFC PATCH v3 2/5] x86: Support for " madvenka
2021-01-27  3:31     ` DJ Delorie [this message]
2021-01-28 21:59       ` Madhavan T. Venkataraman
2021-01-28 22:17         ` DJ Delorie
2021-01-28 23:25           ` Madhavan T. Venkataraman
2021-01-29  2:09             ` DJ Delorie
2021-01-29  2:38               ` Madhavan T. Venkataraman
2021-01-29  2:48                 ` DJ Delorie
2021-01-29  3:24                   ` Madhavan T. Venkataraman
2021-01-29  6:07                     ` DJ Delorie
2021-02-01 19:46                 ` DJ Delorie
2021-01-15 18:46   ` [RFC PATCH v3 3/5] i386: " madvenka
2021-01-15 18:46   ` [RFC PATCH v3 4/5] arm64: " madvenka
2021-01-15 18:46   ` [RFC PATCH v3 5/5] arm: " madvenka
2021-01-26 23:41   ` [RFC PATCH v3 0/5] Libffi " Anthony Green
2021-01-27 17:20     ` Madhavan T. Venkataraman
2021-01-27 18:00       ` Anthony Green
2021-01-27 19:45         ` Madhavan T. Venkataraman
2021-01-28 14:21           ` Anthony Green
2021-01-28 17:01             ` Madhavan T. Venkataraman
2021-02-05 18:20               ` Madhavan T. Venkataraman
2021-02-05 18:46                 ` Anthony Green
2021-02-05 19:38                   ` Madhavan T. Venkataraman
2021-02-07 16:09                     ` Madhavan T. Venkataraman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=xn4kj3hx83.fsf@greed.delorie.com \
    --to=dj@redhat.com \
    --cc=libffi-discuss@sourceware.org \
    --cc=madvenka@linux.microsoft.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).