From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [63.128.21.124]) by sourceware.org (Postfix) with ESMTP id 4C2DB384601D for ; Wed, 27 Jan 2021 03:31:28 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org 4C2DB384601D Received: from mimecast-mx01.redhat.com (mimecast-mx01.redhat.com [209.132.183.4]) (Using TLS) by relay.mimecast.com with ESMTP id us-mta-280-3gICXtzvPrmEdh_ynvolLw-1; Tue, 26 Jan 2021 22:31:26 -0500 X-MC-Unique: 3gICXtzvPrmEdh_ynvolLw-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.12]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx01.redhat.com (Postfix) with ESMTPS id 23A4A8017FB; Wed, 27 Jan 2021 03:31:25 +0000 (UTC) Received: from greed.delorie.com (ovpn-114-77.rdu2.redhat.com [10.10.114.77]) by smtp.corp.redhat.com (Postfix) with ESMTPS id E82A760C05; Wed, 27 Jan 2021 03:31:24 +0000 (UTC) Received: from greed.delorie.com.redhat.com (localhost [127.0.0.1]) by greed.delorie.com (8.14.7/8.14.7) with ESMTP id 10R3VOLO005891; Tue, 26 Jan 2021 22:31:24 -0500 From: DJ Delorie To: madvenka@linux.microsoft.com Cc: libffi-discuss@sourceware.org Subject: Re: [RFC PATCH v3 2/5] x86: Support for Static Trampolines In-Reply-To: <20210115184653.124913-3-madvenka@linux.microsoft.com> Date: Tue, 26 Jan 2021 22:31:24 -0500 Message-ID: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.79 on 10.5.11.12 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Type: text/plain X-Spam-Status: No, score=-13.0 required=5.0 tests=BAYES_00, DKIMWL_WL_HIGH, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, RCVD_IN_DNSWL_LOW, RCVD_IN_MSPIKE_H4, RCVD_IN_MSPIKE_WL, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.2 X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on server2.sourceware.org X-BeenThere: libffi-discuss@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libffi-discuss mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 27 Jan 2021 03:31:30 -0000 madvenka@linux.microsoft.com writes: > diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c > index 39f9598..2a5cf5a 100644 > --- a/src/x86/ffi64.c > +++ b/src/x86/ffi64.c > @@ -713,7 +713,9 @@ ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, > #endif /* FFI_GO_CLOSURES */ > > extern void ffi_closure_unix64(void) FFI_HIDDEN; > +extern void ffi_closure_unix64_alt(void) FFI_HIDDEN; > extern void ffi_closure_unix64_sse(void) FFI_HIDDEN; > +extern void ffi_closure_unix64_sse_alt(void) FFI_HIDDEN; Extern, but local to this port, yes? > @@ -742,6 +744,7 @@ ffi_prep_closure_loc (ffi_closure* closure, > 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00 > }; > void (*dest)(void); > + void (*dest_alt)(void); > char *tramp = closure->tramp; Ok > @@ -752,13 +755,28 @@ ffi_prep_closure_loc (ffi_closure* closure, > return FFI_BAD_ABI; > > if (cif->flags & UNIX64_FLAG_XMM_ARGS) > - dest = ffi_closure_unix64_sse; > + { > + dest = ffi_closure_unix64_sse; > + dest_alt = ffi_closure_unix64_sse_alt; > + } > else > - dest = ffi_closure_unix64; > + { > + dest = ffi_closure_unix64; > + dest_alt = ffi_closure_unix64_alt; > + } > > + if (ffi_tramp_is_present(closure)) > + { > + /* Initialize the static trampoline's parameters. */ > + ffi_tramp_set_parms (closure->ftramp, dest_alt, closure); > + goto out; > + } > + > + /* Initialize the dynamic trampoline. */ Should these new APIs be inside #if FFI_EXEC_STATIC_TRAMP ? > +#if defined(FFI_EXEC_STATIC_TRAMP) > +void * > +ffi_tramp_arch (size_t *tramp_size, size_t *map_size) > +{ > + extern void *trampoline_code_table_cet; > + extern void *trampoline_code_table; > + extern int ffi_cet_present; > + > + *map_size = UNIX64_TRAMP_MAP_SIZE; > + if (ffi_cet_present) { > + *tramp_size = UNIX64_TRAMP_SIZE_CET; > + return &trampoline_code_table_cet; > + } > + *tramp_size = UNIX64_TRAMP_SIZE; > + return &trampoline_code_table; > +} > +#endif Ok. > diff --git a/src/x86/ffiw64.c b/src/x86/ffiw64.c > index a43a9eb..df81d66 100644 > --- a/src/x86/ffiw64.c > +++ b/src/x86/ffiw64.c > @@ -187,6 +187,7 @@ EFI64(ffi_call_go)(ffi_cif *cif, void (*fn)(void), void *rvalue, > > > extern void ffi_closure_win64(void) FFI_HIDDEN; > +extern void ffi_closure_win64_alt(void) FFI_HIDDEN; > > #ifdef FFI_GO_CLOSURES > extern void ffi_go_closure_win64(void) FFI_HIDDEN; > @@ -220,9 +221,18 @@ EFI64(ffi_prep_closure_loc)(ffi_closure* closure, > return FFI_BAD_ABI; > } > > + if (ffi_tramp_is_present(closure)) > + { > + /* Initialize the static trampoline's parameters. */ > + ffi_tramp_set_parms (closure->ftramp, ffi_closure_win64_alt, closure); > + goto out; > + } > + > + /* Initialize the dynamic trampoline. */ > memcpy (tramp, trampoline, sizeof(trampoline)); > *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)ffi_closure_win64; > > +out: > closure->cif = cif; > closure->fun = fun; > closure->user_data = user_data; Ok. > diff --git a/src/x86/internal64.h b/src/x86/internal64.h > index 512e955..410bdf2 100644 > --- a/src/x86/internal64.h > +++ b/src/x86/internal64.h > @@ -20,3 +20,14 @@ > #define UNIX64_FLAG_RET_IN_MEM (1 << 10) > #define UNIX64_FLAG_XMM_ARGS (1 << 11) > #define UNIX64_SIZE_SHIFT 12 > + > +#if defined(FFI_EXEC_STATIC_TRAMP) > +/* > + * For the trampoline code table mapping, a mapping size of 4K (base page size) > + * is chosen. > + */ > +#define UNIX64_TRAMP_MAP_SHIFT 12 > +#define UNIX64_TRAMP_MAP_SIZE (1 << UNIX64_TRAMP_MAP_SHIFT) > +#define UNIX64_TRAMP_SIZE_CET 40 > +#define UNIX64_TRAMP_SIZE 32 > +#endif Ok. > diff --git a/src/x86/unix64.S b/src/x86/unix64.S > index 89d7db1..e26ea2c 100644 > --- a/src/x86/unix64.S > +++ b/src/x86/unix64.S > @@ -63,6 +63,7 @@ > C(ffi_call_unix64): > L(UW0): > _CET_ENDBR > +L(endbr): This hack to detect CET should be replaced by the logic in ffitarget.h, or add a #define CET_ENABLED to ffitarget.h > @@ -270,6 +271,17 @@ L(UW6): > L(UW7): > ENDF(C(ffi_closure_unix64_sse)) > > + .balign 2 > + .globl C(ffi_closure_unix64_sse_alt) > + FFI_HIDDEN(C(ffi_closure_unix64_sse_alt)) > + > +C(ffi_closure_unix64_sse_alt): > + _CET_ENDBR > + movq 8(%rsp), %r10 > + addq $16, %rsp Copies first argument to %r10, discards return address and arg - closure will return to whoever called it's caller. I'm not sure how this works, which means *at least* a comment needs to be here ;-) > + jmp C(ffi_closure_unix64_sse) > +ENDF(C(ffi_closure_unix64_sse_alt)) > + > .balign 2 > .globl C(ffi_closure_unix64) > FFI_HIDDEN(C(ffi_closure_unix64)) > @@ -400,6 +412,17 @@ L(la): call PLT(C(abort)) > L(UW11): > ENDF(C(ffi_closure_unix64)) > > + .balign 8 > + .globl C(ffi_closure_unix64_alt) > + FFI_HIDDEN(C(ffi_closure_unix64_alt)) > + > +C(ffi_closure_unix64_alt): > + _CET_ENDBR > + movq 8(%rsp), %r10 > + addq $16, %rsp > + jmp C(ffi_closure_unix64) > + ENDF(C(ffi_closure_unix64_alt)) > + > .balign 2 > .globl C(ffi_go_closure_unix64_sse) > FFI_HIDDEN(C(ffi_go_closure_unix64_sse)) Likewise. > +/* > + * The trampoline uses register r10. It saves the original value of r10 on > + * the stack. > + * > + * The trampoline has two parameters - target code to jump to and data for > + * the target code. The trampoline extracts the parameters from its parameter > + * block (see tramp_table_map()). The trampoline saves the data address on > + * the stack. Finally, it jumps to the target code. > + * > + * The target code can choose to: > + * > + * - restore the value of r10 > + * - load the data address in a register > + * - restore the stack pointer to what it was when the trampoline was invoked. > + */ > + > + .align UNIX64_TRAMP_MAP_SIZE > + .globl trampoline_code_table_cet > + FFI_HIDDEN(C(trampoline_code_table_cet)) > + > +C(trampoline_code_table_cet): > + .rept UNIX64_TRAMP_MAP_SIZE / UNIX64_TRAMP_SIZE_CET > + _CET_ENDBR > + subq $16, %rsp /* Make space on the stack */ > + movq %r10, (%rsp) /* Save %r10 on stack */ > + movq 4077(%rip), %r10 /* Copy data into %r10 */ > + movq %r10, 8(%rsp) /* Save data on stack */ > + movq 4073(%rip), %r10 /* Copy code into %r10 */ > + jmp *%r10 /* Jump to code */ > + nop > + nop > + nop > + nop > + nop > + nop > + .endr > +ENDF(C(trampoline_code_table_cet)) > + > + .align UNIX64_TRAMP_MAP_SIZE > + .globl trampoline_code_table > + FFI_HIDDEN(C(trampoline_code_table)) > + > +C(trampoline_code_table): > + .rept UNIX64_TRAMP_MAP_SIZE / UNIX64_TRAMP_SIZE > + subq $16, %rsp /* Make space on the stack */ > + movq %r10, (%rsp) /* Save %r10 on stack */ > + movq 4081(%rip), %r10 /* Copy data into %r10 */ > + movq %r10, 8(%rsp) /* Save data on stack */ > + movq 4077(%rip), %r10 /* Copy code into %r10 */ > + jmp *%r10 /* Jump to code */ > + nop > + nop > + .endr > +ENDF(C(trampoline_code_table)) > + .align UNIX64_TRAMP_MAP_SIZE > +#endif /* FFI_EXEC_STATIC_TRAMP */ Why does the longer trampoline (with endbr) have *more* nops? Is it for 8-byte alignment? If so, comment ;-) > @@ -615,6 +712,13 @@ L(EFDE5): > .quad 0 > #endif > > + .section .rodata > + .align 8 > + .globl ffi_cet_present > +ffi_cet_present: > + .set L6,L(endbr)-L(UW0) > + .int L6 > + Again, there are preprocessor directives that do this better. > diff --git a/src/x86/win64.S b/src/x86/win64.S > index 8315e8b..6ca3068 100644 > --- a/src/x86/win64.S > +++ b/src/x86/win64.S > @@ -234,6 +234,18 @@ C(ffi_closure_win64): > > cfi_endproc > SEH(.seh_endproc) > + > + .align 8 > + .globl C(ffi_closure_win64_alt) > + FFI_HIDDEN(C(ffi_closure_win64_alt)) > + > + SEH(.seh_proc ffi_closure_win64_alt) > +C(ffi_closure_win64_alt): > + _CET_ENDBR > + movq 8(%rsp), %r10 > + addq $16, %rsp > + jmp C(ffi_closure_win64) > + SEH(.seh_endproc) > #endif /* __x86_64__ */ Ok.