From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by sourceware.org (Postfix) with ESMTP id EB90D3836C62 for ; Wed, 16 Dec 2020 19:40:18 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org EB90D3836C62 Received: from microsoft-linux.home (unknown [47.187.219.45]) by linux.microsoft.com (Postfix) with ESMTPSA id 1EF7C20B7187; Wed, 16 Dec 2020 11:40:18 -0800 (PST) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com 1EF7C20B7187 From: madvenka@linux.microsoft.com To: libffi-discuss@sourceware.org Cc: green@moxielogic.com, fw@deneb.enyo.de, dj@redhat.com, madvenka@linux.microsoft.com Subject: [RFC PATCH v2 3/5] i386: Support for Static Trampolines Date: Wed, 16 Dec 2020 13:40:07 -0600 Message-Id: <20201216194009.8268-4-madvenka@linux.microsoft.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20201216194009.8268-1-madvenka@linux.microsoft.com> References: <20201216194009.8268-1-madvenka@linux.microsoft.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-26.9 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, ENV_AND_HDR_SPF_MATCH, GIT_PATCH_0, SPF_HELO_PASS, SPF_PASS, TXREP, USER_IN_DEF_DKIM_WL, USER_IN_DEF_SPF_WL autolearn=ham autolearn_force=no version=3.4.2 X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on server2.sourceware.org X-BeenThere: libffi-discuss@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libffi-discuss mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 16 Dec 2020 19:40:20 -0000 From: "Madhavan T. Venkataraman" - Define the arch-specific initialization function ffi_tramp_arch () that returns trampoline size information to common code. - Define the trampoline code mapping and data mapping sizes. - Define the trampoline code table statically. - Introduce a tiny prolog for each ABI handling function. The ABI handlers addressed are: - ffi_closure_i386 - ffi_closure_STDCALL - ffi_closure_REGISTER Their prolog functions are called: - ffi_closure_i386_alt - ffi_closure_STDCALL_alt - ffi_closure_REGISTER_alt The legacy trampoline jumps to the ABI handler. The static trampoline jumps to the prolog function. The prolog function uses the information provided by the static trampoline, sets things up for the ABI handler and then jumps to the ABI handler. - Call ffi_closure_tramp_init () in ffi_prep_closure_loc () to initialize static trampoline parameters. Signed-off-by: Madhavan T. Venkataraman --- src/x86/ffi.c | 29 +++++++++++++++++++ src/x86/internal.h | 10 +++++++ src/x86/sysv.S | 70 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+) diff --git a/src/x86/ffi.c b/src/x86/ffi.c index 5f7fd81..12ae791 100644 --- a/src/x86/ffi.c +++ b/src/x86/ffi.c @@ -409,8 +409,11 @@ ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, /** private members **/ void FFI_HIDDEN ffi_closure_i386(void); +void FFI_HIDDEN ffi_closure_i386_alt(void); void FFI_HIDDEN ffi_closure_STDCALL(void); +void FFI_HIDDEN ffi_closure_STDCALL_alt(void); void FFI_HIDDEN ffi_closure_REGISTER(void); +void FFI_HIDDEN ffi_closure_REGISTER_alt(void); struct closure_frame { @@ -537,6 +540,7 @@ ffi_prep_closure_loc (ffi_closure* closure, { char *tramp = closure->tramp; void (*dest)(void); + void (*dest_alt)(void); int op = 0xb8; /* movl imm, %eax */ switch (cif->abi) @@ -546,19 +550,30 @@ ffi_prep_closure_loc (ffi_closure* closure, case FFI_FASTCALL: case FFI_MS_CDECL: dest = ffi_closure_i386; + dest_alt = ffi_closure_i386_alt; break; case FFI_STDCALL: case FFI_PASCAL: dest = ffi_closure_STDCALL; + dest_alt = ffi_closure_STDCALL_alt; break; case FFI_REGISTER: dest = ffi_closure_REGISTER; + dest_alt = ffi_closure_REGISTER_alt; op = 0x68; /* pushl imm */ break; default: return FFI_BAD_ABI; } + if (ffi_tramp_is_present(closure)) + { + /* Initialize the static trampoline's parameters. */ + ffi_tramp_set_parms (closure->ftramp, dest_alt, closure); + goto out; + } + + /* Initialize the dynamic trampoline. */ /* endbr32. */ *(UINT32 *) tramp = 0xfb1e0ff3; @@ -570,6 +585,7 @@ ffi_prep_closure_loc (ffi_closure* closure, tramp[9] = 0xe9; *(unsigned *)(tramp + 10) = (unsigned)dest - ((unsigned)codeloc + 14); +out: closure->cif = cif; closure->fun = fun; closure->user_data = user_data; @@ -767,4 +783,17 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue) ffi_call_i386 (frame, stack); } #endif /* !FFI_NO_RAW_API */ + +#if defined(FFI_EXEC_STATIC_TRAMP) +void * +ffi_tramp_arch (size_t *tramp_size, size_t *map_size) +{ + extern void *trampoline_code_table; + + *tramp_size = X86_TRAMP_SIZE; + *map_size = X86_TRAMP_MAP_SIZE; + return &trampoline_code_table; +} +#endif + #endif /* __i386__ */ diff --git a/src/x86/internal.h b/src/x86/internal.h index 09771ba..f782aad 100644 --- a/src/x86/internal.h +++ b/src/x86/internal.h @@ -27,3 +27,13 @@ #else # define HAVE_FASTCALL 1 #endif + +#if defined(FFI_EXEC_STATIC_TRAMP) +/* + * For the trampoline code table mapping, a mapping size of 4K (base page size) + * is chosen. + */ +#define X86_TRAMP_MAP_SHIFT 12 +#define X86_TRAMP_MAP_SIZE (1 << X86_TRAMP_MAP_SHIFT) +#define X86_TRAMP_SIZE 44 +#endif diff --git a/src/x86/sysv.S b/src/x86/sysv.S index d8ab4b0..a83b36d 100644 --- a/src/x86/sysv.S +++ b/src/x86/sysv.S @@ -421,6 +421,16 @@ L(UW20): # cfi_endproc ENDF(C(ffi_closure_i386)) + .balign 16 + .globl C(ffi_closure_i386_alt) + FFI_HIDDEN(C(ffi_closure_i386_alt)) +C(ffi_closure_i386_alt): + _CET_ENDBR + movl 4(%esp), %eax + add $8, %esp + jmp C(ffi_closure_i386) +ENDF(C(ffi_closure_i386_alt)) + .balign 16 .globl C(ffi_go_closure_STDCALL) FFI_HIDDEN(C(ffi_go_closure_STDCALL)) @@ -466,6 +476,16 @@ L(UW26): # cfi_endproc ENDF(C(ffi_closure_REGISTER)) + .balign 16 + .globl C(ffi_closure_REGISTER_alt) + FFI_HIDDEN(C(ffi_closure_REGISTER_alt)) +C(ffi_closure_REGISTER_alt): + _CET_ENDBR + movl (%esp), %eax + add $4, %esp + jmp C(ffi_closure_REGISTER) +ENDF(C(ffi_closure_REGISTER_alt)) + /* For STDCALL (and others), we need to pop N bytes of arguments off the stack following the closure. The amount needing to be popped is returned to us from ffi_closure_inner. */ @@ -573,6 +593,56 @@ L(UW31): # cfi_endproc ENDF(C(ffi_closure_STDCALL)) + .balign 16 + .globl C(ffi_closure_STDCALL_alt) + FFI_HIDDEN(C(ffi_closure_STDCALL_alt)) +C(ffi_closure_STDCALL_alt): + _CET_ENDBR + movl 4(%esp), %eax + add $8, %esp + jmp C(ffi_closure_STDCALL) +ENDF(C(ffi_closure_STDCALL_alt)) + +#if defined(FFI_EXEC_STATIC_TRAMP) +/* + * The trampoline uses register eax. It saves the original value of eax on + * the stack. + * + * The trampoline has two parameters - target code to jump to and data for + * the target code. The trampoline extracts the parameters from its parameter + * block (see tramp_table_map()). The trampoline saves the data address on + * the stack. Finally, it jumps to the target code. + * + * The target code can choose to: + * + * - restore the value of eax + * - load the data address in a register + * - restore the stack pointer to what it was when the trampoline was invoked. + */ + .align X86_TRAMP_MAP_SIZE + .globl C(trampoline_code_table) + FFI_HIDDEN(C(trampoline_code_table)) +C(trampoline_code_table): + .rept X86_TRAMP_MAP_SIZE / X86_TRAMP_SIZE + endbr32 + sub $8, %esp + movl %eax, (%esp) /* Save %eax on stack */ + call 1f /* Get next PC into %eax */ + movl 4081(%eax), %eax /* Copy data into %eax */ + movl %eax, 4(%esp) /* Save data on stack */ + call 1f /* Get next PC into %eax */ + movl 4070(%eax), %eax /* Copy data into %eax */ + jmp *%eax /* Jump to code */ +1: + mov (%esp), %eax + ret + nop /* Pad to 4 byte boundary */ + nop + .endr +ENDF(C(trampoline_code_table)) + .align X86_TRAMP_MAP_SIZE +#endif /* FFI_EXEC_STATIC_TRAMP */ + #if !FFI_NO_RAW_API #define raw_closure_S_FS (16+16+12) -- 2.27.0