2010-01-12 Daniel Witte * src/x86/win32.S: Port assembly routines to MSVC and #ifdef. * src/x86/ffi.c: Tweak function declaration and remove excess parens. * include/ffi.h.in: Add __declspec(align(8)) to typedef struct ffi_closure. diff --git a/libffi/ChangeLog b/libffi/ChangeLog --- a/libffi/ChangeLog +++ b/libffi/ChangeLog @@ -1,8 +1,13 @@ +2010-01-12 Daniel Witte + * src/x86/win32.S: Port assembly routines to MSVC and #ifdef. + * src/x86/ffi.c: Tweak function declaration and remove excess parens. + * include/ffi.h.in: Add __declspec(align(8)) to typedef struct ffi_closure. + 2010-01-12 Daniel Witte * src/x86/ffi.c: Merge ffi_call_SYSV and ffi_call_STDCALL into new function ffi_call_win32 on X86_WIN32. * src/x86/win32.S (ffi_call_SYSV): Rename to ffi_call_win32. (ffi_call_STDCALL): Remove. 2010-01-12 Daniel Witte * src/prep_cif.c (ffi_prep_cif): Move stack space allocation code to diff --git a/libffi/include/ffi.h.in b/libffi/include/ffi.h.in --- a/libffi/include/ffi.h.in +++ b/libffi/include/ffi.h.in @@ -246,16 +246,19 @@ void ffi_java_raw_call (ffi_cif *cif, void ffi_java_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_java_raw *raw); void ffi_java_raw_to_ptrarray (ffi_cif *cif, ffi_java_raw *raw, void **args); size_t ffi_java_raw_size (ffi_cif *cif); /* ---- Definitions for closures ----------------------------------------- */ #if FFI_CLOSURES +#ifdef _MSC_VER +__declspec(align(8)) +#endif typedef struct { char tramp[FFI_TRAMPOLINE_SIZE]; ffi_cif *cif; void (*fun)(ffi_cif*,void*,void**,void*); void *user_data; #ifdef __GNUC__ } ffi_closure __attribute__((aligned (8))); #else diff --git a/libffi/src/x86/ffi.c b/libffi/src/x86/ffi.c --- a/libffi/src/x86/ffi.c +++ b/libffi/src/x86/ffi.c @@ -329,16 +329,18 @@ void ffi_call(ffi_cif *cif, void (*fn)(v FFI_ASSERT(0); break; } } /** private members **/ +/* The following __attribute__((regparm(1))) decorations will have no effect + on MSVC - standard cdecl convention applies. */ static void ffi_prep_incoming_args_SYSV (char *stack, void **ret, void** args, ffi_cif* cif); void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *) __attribute__ ((regparm(1))); unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *) __attribute__ ((regparm(1))); void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *) __attribute__ ((regparm(1))); @@ -377,21 +379,18 @@ ffi_closure_win64_inner (ffi_closure *cl result types except for floats; we have to 'mov xmm0, rax' in the caller to correct this. TODO: structure sizes of 3 5 6 7 are returned by reference, too!!! */ return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp; } #else -unsigned int FFI_HIDDEN -ffi_closure_SYSV_inner (closure, respp, args) - ffi_closure *closure; - void **respp; - void *args; +unsigned int FFI_HIDDEN __attribute__ ((regparm(1))) +ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args) { /* our various things... */ ffi_cif *cif; void **arg_area; cif = closure->cif; arg_area = (void**) alloca (cif->nargs * sizeof (void*)); @@ -492,39 +491,39 @@ ffi_prep_incoming_args_SYSV(char *stack, *(unsigned char *) &__tramp[26] = 0x41; \ *(unsigned char *) &__tramp[27] = 0xff; \ *(unsigned char *) &__tramp[28] = 0xe2; /* jmp %r10 */ \ } /* How to make a trampoline. Derived from gcc/config/i386/i386.c. */ #define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \ -({ unsigned char *__tramp = (unsigned char*)(TRAMP); \ +{ unsigned char *__tramp = (unsigned char*)(TRAMP); \ unsigned int __fun = (unsigned int)(FUN); \ unsigned int __ctx = (unsigned int)(CTX); \ unsigned int __dis = __fun - (__ctx + 10); \ *(unsigned char*) &__tramp[0] = 0xb8; \ *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \ *(unsigned char *) &__tramp[5] = 0xe9; \ *(unsigned int*) &__tramp[6] = __dis; /* jmp __fun */ \ - }) + } #define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE) \ -({ unsigned char *__tramp = (unsigned char*)(TRAMP); \ +{ unsigned char *__tramp = (unsigned char*)(TRAMP); \ unsigned int __fun = (unsigned int)(FUN); \ unsigned int __ctx = (unsigned int)(CTX); \ unsigned int __dis = __fun - (__ctx + 10); \ unsigned short __size = (unsigned short)(SIZE); \ *(unsigned char*) &__tramp[0] = 0xb8; \ *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \ *(unsigned char *) &__tramp[5] = 0xe8; \ *(unsigned int*) &__tramp[6] = __dis; /* call __fun */ \ *(unsigned char *) &__tramp[10] = 0xc2; \ *(unsigned short*) &__tramp[11] = __size; /* ret __size */ \ - }) + } /* the cif must already be prep'ed */ ffi_status ffi_prep_closure_loc (ffi_closure* closure, ffi_cif* cif, void (*fun)(ffi_cif*,void*,void**,void*), void *user_data, diff --git a/libffi/src/x86/win32.S b/libffi/src/x86/win32.S --- a/libffi/src/x86/win32.S +++ b/libffi/src/x86/win32.S @@ -1,12 +1,13 @@ /* ----------------------------------------------------------------------- win32.S - Copyright (c) 1996, 1998, 2001, 2002, 2009 Red Hat, Inc. Copyright (c) 2001 John Beniton Copyright (c) 2002 Ranjit Mathew + Copyright (c) 2009 Daniel Witte X86 Foreign Function Interface Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the ``Software''), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, @@ -26,17 +27,374 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ----------------------------------------------------------------------- */ #define LIBFFI_ASM #include #include - + +#ifdef _MSC_VER + +.386 +.MODEL FLAT, C + +EXTRN ffi_closure_SYSV_inner:NEAR + +_TEXT SEGMENT + +ffi_call_win32 PROC NEAR, + ffi_prep_args : NEAR PTR DWORD, + ecif : NEAR PTR DWORD, + cif_bytes : DWORD, + cif_flags : DWORD, + rvalue : NEAR PTR DWORD, + fn : NEAR PTR DWORD + + ;; Make room for all of the new args. + mov ecx, cif_bytes + sub esp, ecx + + mov eax, esp + + ;; Place all of the ffi_prep_args in position + push ecif + push eax + call ffi_prep_args + + ;; Return stack to previous state and call the function + add esp, 8 + + call fn + + ;; cdecl: we restore esp in the epilogue, so there's no need to + ;; remove the space we pushed for the args. + ;; stdcall: the callee has already cleaned the stack. + + ;; Load ecx with the return type code + mov ecx, cif_flags + + ;; If the return value pointer is NULL, assume no return value. + cmp rvalue, 0 + jne ca_jumptable + + ;; Even if there is no space for the return value, we are + ;; obliged to handle floating-point values. + cmp ecx, FFI_TYPE_FLOAT + jne ca_epilogue + fstp st(0) + + jmp ca_epilogue + +ca_jumptable: + jmp [ca_jumpdata + 4 * ecx] +ca_jumpdata: + ;; Do not insert anything here between label and jump table. + dd offset ca_epilogue ;; FFI_TYPE_VOID + dd offset ca_retint ;; FFI_TYPE_INT + dd offset ca_retfloat ;; FFI_TYPE_FLOAT + dd offset ca_retdouble ;; FFI_TYPE_DOUBLE + dd offset ca_retlongdouble ;; FFI_TYPE_LONGDOUBLE + dd offset ca_retint8 ;; FFI_TYPE_UINT8 + dd offset ca_retint8 ;; FFI_TYPE_SINT8 + dd offset ca_retint16 ;; FFI_TYPE_UINT16 + dd offset ca_retint16 ;; FFI_TYPE_SINT16 + dd offset ca_retint ;; FFI_TYPE_UINT32 + dd offset ca_retint ;; FFI_TYPE_SINT32 + dd offset ca_retint64 ;; FFI_TYPE_UINT64 + dd offset ca_retint64 ;; FFI_TYPE_SINT64 + dd offset ca_epilogue ;; FFI_TYPE_STRUCT + dd offset ca_retint ;; FFI_TYPE_POINTER + dd offset ca_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B + dd offset ca_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B + dd offset ca_retint ;; FFI_TYPE_SMALL_STRUCT_4B + +ca_retint8: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + mov [ecx + 0], al + jmp ca_epilogue + +ca_retint16: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + mov [ecx + 0], ax + jmp ca_epilogue + +ca_retint: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + mov [ecx + 0], eax + jmp ca_epilogue + +ca_retint64: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + mov [ecx + 0], eax + mov [ecx + 4], edx + jmp ca_epilogue + +ca_retfloat: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + fstp DWORD PTR [ecx] + jmp ca_epilogue + +ca_retdouble: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + fstp QWORD PTR [ecx] + jmp ca_epilogue + +ca_retlongdouble: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + fstp TBYTE PTR [ecx] + jmp ca_epilogue + +ca_epilogue: + ;; Epilogue code is autogenerated. + ret +ffi_call_win32 ENDP + +ffi_closure_SYSV PROC NEAR FORCEFRAME + ;; the ffi_closure ctx is passed in eax by the trampoline. + + sub esp, 40 + lea edx, [ebp - 24] + mov [ebp - 12], edx ;; resp + lea edx, [ebp + 8] + mov [esp + 8], edx ;; args + lea edx, [ebp - 12] + mov [esp + 4], edx ;; &resp + mov [esp], eax ;; closure + call ffi_closure_SYSV_inner + mov ecx, [ebp - 12] + +cs_jumptable: + jmp [cs_jumpdata + 4 * eax] +cs_jumpdata: + ;; Do not insert anything here between the label and jump table. + dd offset cs_epilogue ;; FFI_TYPE_VOID + dd offset cs_retint ;; FFI_TYPE_INT + dd offset cs_retfloat ;; FFI_TYPE_FLOAT + dd offset cs_retdouble ;; FFI_TYPE_DOUBLE + dd offset cs_retlongdouble ;; FFI_TYPE_LONGDOUBLE + dd offset cs_retint8 ;; FFI_TYPE_UINT8 + dd offset cs_retint8 ;; FFI_TYPE_SINT8 + dd offset cs_retint16 ;; FFI_TYPE_UINT16 + dd offset cs_retint16 ;; FFI_TYPE_SINT16 + dd offset cs_retint ;; FFI_TYPE_UINT32 + dd offset cs_retint ;; FFI_TYPE_SINT32 + dd offset cs_retint64 ;; FFI_TYPE_UINT64 + dd offset cs_retint64 ;; FFI_TYPE_SINT64 + dd offset cs_retstruct ;; FFI_TYPE_STRUCT + dd offset cs_retint ;; FFI_TYPE_POINTER + dd offset cs_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B + dd offset cs_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B + dd offset cs_retint ;; FFI_TYPE_SMALL_STRUCT_4B + +cs_retint8: + mov al, [ecx] + jmp cs_epilogue + +cs_retint16: + mov ax, [ecx] + jmp cs_epilogue + +cs_retint: + mov eax, [ecx] + jmp cs_epilogue + +cs_retint64: + mov eax, [ecx + 0] + mov edx, [ecx + 4] + jmp cs_epilogue + +cs_retfloat: + fld DWORD PTR [ecx] + jmp cs_epilogue + +cs_retdouble: + fld QWORD PTR [ecx] + jmp cs_epilogue + +cs_retlongdouble: + fld TBYTE PTR [ecx] + jmp cs_epilogue + +cs_retstruct: + ;; Caller expects us to pop struct return value pointer hidden arg. + ;; Epilogue code is autogenerated. + ret 4 + +cs_epilogue: + ;; Epilogue code is autogenerated. + ret +ffi_closure_SYSV ENDP + +#if !FFI_NO_RAW_API + +#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3) +#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) +#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) +#define CIF_FLAGS_OFFSET 20 + +ffi_closure_raw_SYSV PROC NEAR USES esi + ;; the ffi_closure ctx is passed in eax by the trampoline. + + sub esp, 40 + mov esi, [eax + RAW_CLOSURE_CIF_OFFSET] ;; closure->cif + mov edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET] ;; closure->user_data + mov [esp + 12], edx ;; user_data + lea edx, [ebp + 8] + mov [esp + 8], edx ;; raw_args + lea edx, [ebp - 24] + mov [esp + 4], edx ;; &res + mov [esp], esi ;; cif + call DWORD PTR [eax + RAW_CLOSURE_FUN_OFFSET] ;; closure->fun + mov eax, [esi + CIF_FLAGS_OFFSET] ;; cif->flags + lea ecx, [ebp - 24] + +cr_jumptable: + jmp [cr_jumpdata + 4 * eax] +cr_jumpdata: + ;; Do not insert anything here between the label and jump table. + dd offset cr_epilogue ;; FFI_TYPE_VOID + dd offset cr_retint ;; FFI_TYPE_INT + dd offset cr_retfloat ;; FFI_TYPE_FLOAT + dd offset cr_retdouble ;; FFI_TYPE_DOUBLE + dd offset cr_retlongdouble ;; FFI_TYPE_LONGDOUBLE + dd offset cr_retint8 ;; FFI_TYPE_UINT8 + dd offset cr_retint8 ;; FFI_TYPE_SINT8 + dd offset cr_retint16 ;; FFI_TYPE_UINT16 + dd offset cr_retint16 ;; FFI_TYPE_SINT16 + dd offset cr_retint ;; FFI_TYPE_UINT32 + dd offset cr_retint ;; FFI_TYPE_SINT32 + dd offset cr_retint64 ;; FFI_TYPE_UINT64 + dd offset cr_retint64 ;; FFI_TYPE_SINT64 + dd offset cr_epilogue ;; FFI_TYPE_STRUCT + dd offset cr_retint ;; FFI_TYPE_POINTER + dd offset cr_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B + dd offset cr_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B + dd offset cr_retint ;; FFI_TYPE_SMALL_STRUCT_4B + +cr_retint8: + mov al, [ecx] + jmp cr_epilogue + +cr_retint16: + mov ax, [ecx] + jmp cr_epilogue + +cr_retint: + mov eax, [ecx] + jmp cr_epilogue + +cr_retint64: + mov eax, [ecx + 0] + mov edx, [ecx + 4] + jmp cr_epilogue + +cr_retfloat: + fld DWORD PTR [ecx] + jmp cr_epilogue + +cr_retdouble: + fld QWORD PTR [ecx] + jmp cr_epilogue + +cr_retlongdouble: + fld TBYTE PTR [ecx] + jmp cr_epilogue + +cr_epilogue: + ;; Epilogue code is autogenerated. + ret +ffi_closure_raw_SYSV ENDP + +#endif /* !FFI_NO_RAW_API */ + +ffi_closure_STDCALL PROC NEAR FORCEFRAME + ;; the ffi_closure ctx is passed in eax by the trampoline. + + sub esp, 40 + lea edx, [ebp - 24] + mov [ebp - 12], edx ;; resp + lea edx, [ebp + 12] ;; account for stub return address on stack + mov [esp + 8], edx ;; args + lea edx, [ebp - 12] + mov [esp + 4], edx ;; &resp + mov [esp], eax ;; closure + call ffi_closure_SYSV_inner + mov ecx, [ebp - 12] + +cd_jumptable: + jmp [cd_jumpdata + 4 * eax] +cd_jumpdata: + ;; Do not insert anything here between the label and jump table. + dd offset cd_epilogue ;; FFI_TYPE_VOID + dd offset cd_retint ;; FFI_TYPE_INT + dd offset cd_retfloat ;; FFI_TYPE_FLOAT + dd offset cd_retdouble ;; FFI_TYPE_DOUBLE + dd offset cd_retlongdouble ;; FFI_TYPE_LONGDOUBLE + dd offset cd_retint8 ;; FFI_TYPE_UINT8 + dd offset cd_retint8 ;; FFI_TYPE_SINT8 + dd offset cd_retint16 ;; FFI_TYPE_UINT16 + dd offset cd_retint16 ;; FFI_TYPE_SINT16 + dd offset cd_retint ;; FFI_TYPE_UINT32 + dd offset cd_retint ;; FFI_TYPE_SINT32 + dd offset cd_retint64 ;; FFI_TYPE_UINT64 + dd offset cd_retint64 ;; FFI_TYPE_SINT64 + dd offset cd_epilogue ;; FFI_TYPE_STRUCT + dd offset cd_retint ;; FFI_TYPE_POINTER + dd offset cd_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B + dd offset cd_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B + dd offset cd_retint ;; FFI_TYPE_SMALL_STRUCT_4B + +cd_retint8: + mov al, [ecx] + jmp cd_epilogue + +cd_retint16: + mov ax, [ecx] + jmp cd_epilogue + +cd_retint: + mov eax, [ecx] + jmp cd_epilogue + +cd_retint64: + mov eax, [ecx + 0] + mov edx, [ecx + 4] + jmp cd_epilogue + +cd_retfloat: + fld DWORD PTR [ecx] + jmp cd_epilogue + +cd_retdouble: + fld QWORD PTR [ecx] + jmp cd_epilogue + +cd_retlongdouble: + fld TBYTE PTR [ecx] + jmp cd_epilogue + +cd_epilogue: + ;; Epilogue code is autogenerated. + ret +ffi_closure_STDCALL ENDP + +_TEXT ENDS +END + +#else + .text # This assumes we are using gas. .balign 16 .globl _ffi_call_win32 .def _ffi_call_win32; .scl 2; .type 32; .endef _ffi_call_win32: .LFB1: @@ -687,8 +1045,11 @@ 1: .byte 0x4 /* DW_CFA_advance_loc4 */ .long .LCFI10-.LCFI9 .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ .byte 0x5 /* .uleb128 0x5 */ /* End of DW_CFA_xxx CFI instructions. */ .align 4 .LEFDE5: + +#endif /* !_MSC_VER */ +