From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by sourceware.org (Postfix) with ESMTP id EC01C3987961 for ; Fri, 15 Jan 2021 18:47:05 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org EC01C3987961 Received: from x64host.home (unknown [47.187.219.45]) by linux.microsoft.com (Postfix) with ESMTPSA id 144EB20B6C41; Fri, 15 Jan 2021 10:47:05 -0800 (PST) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com 144EB20B6C41 From: madvenka@linux.microsoft.com To: libffi-discuss@sourceware.org Cc: green@moxielogic.com, fweimer@redhat.com, dj@redhat.com, madvenka@linux.microsoft.com Subject: [RFC PATCH v3 2/5] x86: Support for Static Trampolines Date: Fri, 15 Jan 2021 12:46:50 -0600 Message-Id: <20210115184653.124913-3-madvenka@linux.microsoft.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20210115184653.124913-1-madvenka@linux.microsoft.com> References: <1ef5c7e1c9a6ebb140a476ba555ec955681f4fba> <20210115184653.124913-1-madvenka@linux.microsoft.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-27.1 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, ENV_AND_HDR_SPF_MATCH, GIT_PATCH_0, SPF_HELO_PASS, SPF_PASS, TXREP, USER_IN_DEF_DKIM_WL, USER_IN_DEF_SPF_WL autolearn=ham autolearn_force=no version=3.4.2 X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on server2.sourceware.org X-BeenThere: libffi-discuss@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libffi-discuss mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 15 Jan 2021 18:47:07 -0000 From: "Madhavan T. Venkataraman" - Define the arch-specific initialization function ffi_tramp_arch () that returns trampoline size information to common code. - Define the trampoline code mapping and data mapping sizes. - Define the trampoline code table statically. Define two tables, actually, one with CET and one without. - Introduce a tiny prolog for each ABI handling function. The ABI handlers addressed are: - ffi_closure_unix64 - ffi_closure_unix64_sse - ffi_closure_win64 The prolog functions are called: - ffi_closure_unix64_alt - ffi_closure_unix64_sse_alt - ffi_closure_win64_alt The legacy trampoline jumps to the ABI handler. The static trampoline jumps to the prolog function. The prolog function uses the information provided by the static trampoline, sets things up for the ABI handler and then jumps to the ABI handler. - Call ffi_closure_tramp_init () in ffi_prep_closure_loc () to initialize static trampoline parameters. Signed-off-by: Madhavan T. Venkataraman --- src/x86/ffi64.c | 40 ++++++++++++++++- src/x86/ffiw64.c | 10 +++++ src/x86/internal64.h | 11 +++++ src/x86/unix64.S | 104 +++++++++++++++++++++++++++++++++++++++++++ src/x86/win64.S | 12 +++++ 5 files changed, 175 insertions(+), 2 deletions(-) diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c index 39f9598..2a5cf5a 100644 --- a/src/x86/ffi64.c +++ b/src/x86/ffi64.c @@ -713,7 +713,9 @@ ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, #endif /* FFI_GO_CLOSURES */ extern void ffi_closure_unix64(void) FFI_HIDDEN; +extern void ffi_closure_unix64_alt(void) FFI_HIDDEN; extern void ffi_closure_unix64_sse(void) FFI_HIDDEN; +extern void ffi_closure_unix64_sse_alt(void) FFI_HIDDEN; #ifndef __ILP32__ extern ffi_status @@ -742,6 +744,7 @@ ffi_prep_closure_loc (ffi_closure* closure, 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00 }; void (*dest)(void); + void (*dest_alt)(void); char *tramp = closure->tramp; #ifndef __ILP32__ @@ -752,13 +755,28 @@ ffi_prep_closure_loc (ffi_closure* closure, return FFI_BAD_ABI; if (cif->flags & UNIX64_FLAG_XMM_ARGS) - dest = ffi_closure_unix64_sse; + { + dest = ffi_closure_unix64_sse; + dest_alt = ffi_closure_unix64_sse_alt; + } else - dest = ffi_closure_unix64; + { + dest = ffi_closure_unix64; + dest_alt = ffi_closure_unix64_alt; + } + if (ffi_tramp_is_present(closure)) + { + /* Initialize the static trampoline's parameters. */ + ffi_tramp_set_parms (closure->ftramp, dest_alt, closure); + goto out; + } + + /* Initialize the dynamic trampoline. */ memcpy (tramp, trampoline, sizeof(trampoline)); *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)dest; +out: closure->cif = cif; closure->fun = fun; closure->user_data = user_data; @@ -892,4 +910,22 @@ ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif, #endif /* FFI_GO_CLOSURES */ +#if defined(FFI_EXEC_STATIC_TRAMP) +void * +ffi_tramp_arch (size_t *tramp_size, size_t *map_size) +{ + extern void *trampoline_code_table_cet; + extern void *trampoline_code_table; + extern int ffi_cet_present; + + *map_size = UNIX64_TRAMP_MAP_SIZE; + if (ffi_cet_present) { + *tramp_size = UNIX64_TRAMP_SIZE_CET; + return &trampoline_code_table_cet; + } + *tramp_size = UNIX64_TRAMP_SIZE; + return &trampoline_code_table; +} +#endif + #endif /* __x86_64__ */ diff --git a/src/x86/ffiw64.c b/src/x86/ffiw64.c index a43a9eb..df81d66 100644 --- a/src/x86/ffiw64.c +++ b/src/x86/ffiw64.c @@ -187,6 +187,7 @@ EFI64(ffi_call_go)(ffi_cif *cif, void (*fn)(void), void *rvalue, extern void ffi_closure_win64(void) FFI_HIDDEN; +extern void ffi_closure_win64_alt(void) FFI_HIDDEN; #ifdef FFI_GO_CLOSURES extern void ffi_go_closure_win64(void) FFI_HIDDEN; @@ -220,9 +221,18 @@ EFI64(ffi_prep_closure_loc)(ffi_closure* closure, return FFI_BAD_ABI; } + if (ffi_tramp_is_present(closure)) + { + /* Initialize the static trampoline's parameters. */ + ffi_tramp_set_parms (closure->ftramp, ffi_closure_win64_alt, closure); + goto out; + } + + /* Initialize the dynamic trampoline. */ memcpy (tramp, trampoline, sizeof(trampoline)); *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)ffi_closure_win64; +out: closure->cif = cif; closure->fun = fun; closure->user_data = user_data; diff --git a/src/x86/internal64.h b/src/x86/internal64.h index 512e955..410bdf2 100644 --- a/src/x86/internal64.h +++ b/src/x86/internal64.h @@ -20,3 +20,14 @@ #define UNIX64_FLAG_RET_IN_MEM (1 << 10) #define UNIX64_FLAG_XMM_ARGS (1 << 11) #define UNIX64_SIZE_SHIFT 12 + +#if defined(FFI_EXEC_STATIC_TRAMP) +/* + * For the trampoline code table mapping, a mapping size of 4K (base page size) + * is chosen. + */ +#define UNIX64_TRAMP_MAP_SHIFT 12 +#define UNIX64_TRAMP_MAP_SIZE (1 << UNIX64_TRAMP_MAP_SHIFT) +#define UNIX64_TRAMP_SIZE_CET 40 +#define UNIX64_TRAMP_SIZE 32 +#endif diff --git a/src/x86/unix64.S b/src/x86/unix64.S index 89d7db1..e26ea2c 100644 --- a/src/x86/unix64.S +++ b/src/x86/unix64.S @@ -63,6 +63,7 @@ C(ffi_call_unix64): L(UW0): _CET_ENDBR +L(endbr): movq (%rsp), %r10 /* Load return address. */ leaq (%rdi, %rsi), %rax /* Find local stack base. */ movq %rdx, (%rax) /* Save flags. */ @@ -270,6 +271,17 @@ L(UW6): L(UW7): ENDF(C(ffi_closure_unix64_sse)) + .balign 2 + .globl C(ffi_closure_unix64_sse_alt) + FFI_HIDDEN(C(ffi_closure_unix64_sse_alt)) + +C(ffi_closure_unix64_sse_alt): + _CET_ENDBR + movq 8(%rsp), %r10 + addq $16, %rsp + jmp C(ffi_closure_unix64_sse) +ENDF(C(ffi_closure_unix64_sse_alt)) + .balign 2 .globl C(ffi_closure_unix64) FFI_HIDDEN(C(ffi_closure_unix64)) @@ -400,6 +412,17 @@ L(la): call PLT(C(abort)) L(UW11): ENDF(C(ffi_closure_unix64)) + .balign 8 + .globl C(ffi_closure_unix64_alt) + FFI_HIDDEN(C(ffi_closure_unix64_alt)) + +C(ffi_closure_unix64_alt): + _CET_ENDBR + movq 8(%rsp), %r10 + addq $16, %rsp + jmp C(ffi_closure_unix64) + ENDF(C(ffi_closure_unix64_alt)) + .balign 2 .globl C(ffi_go_closure_unix64_sse) FFI_HIDDEN(C(ffi_go_closure_unix64_sse)) @@ -456,6 +479,80 @@ L(sse_entry2): L(UW17): ENDF(C(ffi_go_closure_unix64)) +#if defined(FFI_EXEC_STATIC_TRAMP) +/* + * Below is the definition of the trampoline code table. Each element in + * the code table is a trampoline. + * + * Because we jump to the trampoline, we place a _CET_ENDBR at the + * beginning of the trampoline to mark it as a valid branch target. This is + * part of the the Intel CET (Control Flow Enforcement Technology). + * + * If CET is present, _CET_ENDBR is defined as the endbr64 instruction. Else, + * _CET_ENDBR is defined empty. Consequently, the size of the trampoline and + * the PC-relative offsets in the trampoline code also differ. So, two versions + * of the code table have been defined - one with the endbr64 instruction and + * one without. ffi_tramp_arch() figures out which version of the code table + * should be used by looking at ffi_cet_present (defined at the end of this + * file). + */ +/* + * The trampoline uses register r10. It saves the original value of r10 on + * the stack. + * + * The trampoline has two parameters - target code to jump to and data for + * the target code. The trampoline extracts the parameters from its parameter + * block (see tramp_table_map()). The trampoline saves the data address on + * the stack. Finally, it jumps to the target code. + * + * The target code can choose to: + * + * - restore the value of r10 + * - load the data address in a register + * - restore the stack pointer to what it was when the trampoline was invoked. + */ + + .align UNIX64_TRAMP_MAP_SIZE + .globl trampoline_code_table_cet + FFI_HIDDEN(C(trampoline_code_table_cet)) + +C(trampoline_code_table_cet): + .rept UNIX64_TRAMP_MAP_SIZE / UNIX64_TRAMP_SIZE_CET + _CET_ENDBR + subq $16, %rsp /* Make space on the stack */ + movq %r10, (%rsp) /* Save %r10 on stack */ + movq 4077(%rip), %r10 /* Copy data into %r10 */ + movq %r10, 8(%rsp) /* Save data on stack */ + movq 4073(%rip), %r10 /* Copy code into %r10 */ + jmp *%r10 /* Jump to code */ + nop + nop + nop + nop + nop + nop + .endr +ENDF(C(trampoline_code_table_cet)) + + .align UNIX64_TRAMP_MAP_SIZE + .globl trampoline_code_table + FFI_HIDDEN(C(trampoline_code_table)) + +C(trampoline_code_table): + .rept UNIX64_TRAMP_MAP_SIZE / UNIX64_TRAMP_SIZE + subq $16, %rsp /* Make space on the stack */ + movq %r10, (%rsp) /* Save %r10 on stack */ + movq 4081(%rip), %r10 /* Copy data into %r10 */ + movq %r10, 8(%rsp) /* Save data on stack */ + movq 4077(%rip), %r10 /* Copy code into %r10 */ + jmp *%r10 /* Jump to code */ + nop + nop + .endr +ENDF(C(trampoline_code_table)) + .align UNIX64_TRAMP_MAP_SIZE +#endif /* FFI_EXEC_STATIC_TRAMP */ + /* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */ #ifdef __APPLE__ @@ -615,6 +712,13 @@ L(EFDE5): .quad 0 #endif + .section .rodata + .align 8 + .globl ffi_cet_present +ffi_cet_present: + .set L6,L(endbr)-L(UW0) + .int L6 + #endif /* __x86_64__ */ #if defined __ELF__ && defined __linux__ .section .note.GNU-stack,"",@progbits diff --git a/src/x86/win64.S b/src/x86/win64.S index 8315e8b..6ca3068 100644 --- a/src/x86/win64.S +++ b/src/x86/win64.S @@ -234,6 +234,18 @@ C(ffi_closure_win64): cfi_endproc SEH(.seh_endproc) + + .align 8 + .globl C(ffi_closure_win64_alt) + FFI_HIDDEN(C(ffi_closure_win64_alt)) + + SEH(.seh_proc ffi_closure_win64_alt) +C(ffi_closure_win64_alt): + _CET_ENDBR + movq 8(%rsp), %r10 + addq $16, %rsp + jmp C(ffi_closure_win64) + SEH(.seh_endproc) #endif /* __x86_64__ */ #if defined __ELF__ && defined __linux__ -- 2.25.1