* [PATCH 0/8] Go closures for x86_64
@ 2014-10-28 18:32 Richard Henderson
2014-10-28 18:32 ` [PATCH 5/8] win64: Remove support from ffi.c Richard Henderson
` (7 more replies)
0 siblings, 8 replies; 9+ messages in thread
From: Richard Henderson @ 2014-10-28 18:32 UTC (permalink / raw)
To: libffi-discuss
The first in a series of patch sets to implement the Go interfaces
I proposed a week or so ago, but mostly split into smaller pieces.
These patches will also include support for complex types as I go,
since Go needs them, and performing both updates at the same time
will make things easier.
This first set includes x86_64-linux and x86_64-cygwin.
It leaves x86_64-darwin in a broken state, but I'm unsure why it
has a completely different assembly file, rather than merely using
the preprocessor to handle the __USER_LABEL_PREFIX__. I'm hoping
that someone who can test darwin can do this update.
The existing win64 support was poor, having a significant amount
of failures (and some xfails to cover that up). IMO the biggest
problem there was trying to combine it with the 32-bit targets.
The ABI is significantly different, and the amount of ifdefs
needed to force it in was ... ugly.
It leaves the visual studio build broken, in that I totally
rewrote all of the win64.S assembly, but did not keep the microsoft
assembly. Hopefully someone who cares about VS can handle that.
This patch set is available at
git://github.com/rth7680/libffi.git go/x86
r~
Richard Henderson (8):
Add entry points for interacting with Go
Add ffi_cfi.h
x86-64: Support go closures
win64: Rewrite
win64: Remove support from ffi.c
x86_64: Fixups for x32
x86_64: Decouple return types from FFI_TYPE constants
x86_64: Add support for complex types
Makefile.am | 4 +-
include/ffi.h.in | 16 +
include/ffi_cfi.h | 53 ++
src/x86/ffi.c | 212 +------
src/x86/ffi64.c | 327 +++++++---
src/x86/ffitarget.h | 29 +-
src/x86/ffiw64.c | 281 +++++++++
src/x86/internal64.h | 22 +
src/x86/unix64.S | 552 ++++++++--------
src/x86/win64.S | 693 ++++++---------------
testsuite/libffi.call/call.exp | 13 +-
testsuite/libffi.call/cls_align_longdouble_split.c | 2 -
.../libffi.call/cls_align_longdouble_split2.c | 2 -
testsuite/libffi.call/cls_longdouble.c | 2 -
testsuite/libffi.call/float2.c | 3 -
testsuite/libffi.call/huge_struct.c | 2 -
testsuite/libffi.call/return_ldl.c | 1 -
17 files changed, 1088 insertions(+), 1126 deletions(-)
create mode 100644 include/ffi_cfi.h
create mode 100644 src/x86/ffiw64.c
create mode 100644 src/x86/internal64.h
--
1.9.3
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/8] Add entry points for interacting with Go
2014-10-28 18:32 [PATCH 0/8] Go closures for x86_64 Richard Henderson
` (6 preceding siblings ...)
2014-10-28 18:32 ` [PATCH 7/8] x86_64: Decouple return types from FFI_TYPE constants Richard Henderson
@ 2014-10-28 18:32 ` Richard Henderson
7 siblings, 0 replies; 9+ messages in thread
From: Richard Henderson @ 2014-10-28 18:32 UTC (permalink / raw)
To: libffi-discuss
A "ffi_go_closure" is intended to be compatible with the
function descriptors used by Go, and ffi_call_go sets up
the static chain parameter for calling a Go function.
The entry points are disabled when a backend has not been
updated, much like we do for "normal" closures.
---
include/ffi.h.in | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/include/ffi.h.in b/include/ffi.h.in
index f403ae0..c43d52f 100644
--- a/include/ffi.h.in
+++ b/include/ffi.h.in
@@ -428,6 +428,22 @@ ffi_prep_java_raw_closure_loc (ffi_java_raw_closure*,
#endif /* FFI_CLOSURES */
+#if FFI_GO_CLOSURES
+
+typedef struct {
+ void *tramp;
+ ffi_cif *cif;
+ void (*fun)(ffi_cif*,void*,void**,void*);
+} ffi_go_closure;
+
+ffi_status ffi_prep_go_closure (ffi_go_closure*, ffi_cif *,
+ void (*fun)(ffi_cif*,void*,void**,void*));
+
+void ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure);
+
+#endif /* FFI_GO_CLOSURES */
+
/* ---- Public interface definition -------------------------------------- */
ffi_status ffi_prep_cif(ffi_cif *cif,
--
1.9.3
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 3/8] x86-64: Support go closures
2014-10-28 18:32 [PATCH 0/8] Go closures for x86_64 Richard Henderson
` (4 preceding siblings ...)
2014-10-28 18:32 ` [PATCH 2/8] Add ffi_cfi.h Richard Henderson
@ 2014-10-28 18:32 ` Richard Henderson
2014-10-28 18:32 ` [PATCH 7/8] x86_64: Decouple return types from FFI_TYPE constants Richard Henderson
2014-10-28 18:32 ` [PATCH 1/8] Add entry points for interacting with Go Richard Henderson
7 siblings, 0 replies; 9+ messages in thread
From: Richard Henderson @ 2014-10-28 18:32 UTC (permalink / raw)
To: libffi-discuss
Dumps all of the hand-coded unwind info for gas generated. Move jump
table data into .rodata. Adjust ffi_call_unix64 to load the static
chain. Split out sse portions of ffi_closure_unix64 to
ffi_closure_unix64_sse rather than test cif->flags at runtime.
---
src/x86/ffi64.c | 103 ++++++++++++-----
src/x86/ffitarget.h | 2 +
src/x86/unix64.S | 323 +++++++++++++++++++++++++---------------------------
3 files changed, 230 insertions(+), 198 deletions(-)
diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c
index 5a5e043..384a93a 100644
--- a/src/x86/ffi64.c
+++ b/src/x86/ffi64.c
@@ -32,6 +32,7 @@
#include <stdlib.h>
#include <stdarg.h>
+#include <stdint.h>
#ifdef __x86_64__
@@ -62,10 +63,12 @@ struct register_args
/* Registers for argument passing. */
UINT64 gpr[MAX_GPR_REGS];
union big_int_union sse[MAX_SSE_REGS];
+ UINT64 rax; /* ssecount */
+ UINT64 r10; /* static chain */
};
extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
- void *raddr, void (*fnaddr)(void), unsigned ssecount);
+ void *raddr, void (*fnaddr)(void)) FFI_HIDDEN;
/* All reference to register classes here is identical to the code in
gcc/config/i386/i386.c. Do *not* change one without the other. */
@@ -358,6 +361,9 @@ ffi_prep_cif_machdep (ffi_cif *cif)
enum x86_64_reg_class classes[MAX_CLASSES];
size_t bytes, n;
+ if (cif->abi != FFI_UNIX64)
+ return FFI_BAD_ABI;
+
gprcount = ssecount = 0;
flags = cif->rtype->type;
@@ -419,8 +425,9 @@ ffi_prep_cif_machdep (ffi_cif *cif)
return FFI_OK;
}
-void
-ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
{
enum x86_64_reg_class classes[MAX_CLASSES];
char *stack, *argp;
@@ -445,6 +452,8 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
reg_args = (struct register_args *) stack;
argp = stack + sizeof (struct register_args);
+ reg_args->r10 = (uintptr_t) closure;
+
gprcount = ssecount = 0;
/* If the return value is passed in memory, add the pointer as the
@@ -521,13 +530,27 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
}
}
}
+ reg_args->rax = ssecount;
ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
- cif->flags, rvalue, fn, ssecount);
+ cif->flags, rvalue, fn);
}
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
-extern void ffi_closure_unix64(void);
+extern void ffi_closure_unix64(void) FFI_HIDDEN;
+extern void ffi_closure_unix64_sse(void) FFI_HIDDEN;
ffi_status
ffi_prep_closure_loc (ffi_closure* closure,
@@ -536,29 +559,26 @@ ffi_prep_closure_loc (ffi_closure* closure,
void *user_data,
void *codeloc)
{
- volatile unsigned short *tramp;
-
- /* Sanity check on the cif ABI. */
- {
- int abi = cif->abi;
- if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)))
- return FFI_BAD_ABI;
- }
-
- tramp = (volatile unsigned short *) &closure->tramp[0];
+ static const unsigned char trampoline[16] = {
+ /* leaq -0x7(%rip),%r10 # 0x0 */
+ 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
+ /* jmpq *0x3(%rip) # 0x10 */
+ 0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
+ /* nopl (%rax) */
+ 0x0f, 0x1f, 0x00
+ };
+ void (*dest)(void);
- tramp[0] = 0xbb49; /* mov <code>, %r11 */
- *((unsigned long long * volatile) &tramp[1])
- = (unsigned long) ffi_closure_unix64;
- tramp[5] = 0xba49; /* mov <data>, %r10 */
- *((unsigned long long * volatile) &tramp[6])
- = (unsigned long) codeloc;
+ if (cif->abi != FFI_UNIX64)
+ return FFI_BAD_ABI;
- /* Set the carry bit iff the function uses any sse registers.
- This is clc or stc, together with the first byte of the jmp. */
- tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
+ if (cif->flags & (1 << 11))
+ dest = ffi_closure_unix64_sse;
+ else
+ dest = ffi_closure_unix64;
- tramp[11] = 0xe3ff; /* jmp *%r11 */
+ memcpy (closure->tramp, trampoline, sizeof(trampoline));
+ *(UINT64 *)(closure->tramp + 16) = (uintptr_t)dest;
closure->cif = cif;
closure->fun = fun;
@@ -567,18 +587,20 @@ ffi_prep_closure_loc (ffi_closure* closure,
return FFI_OK;
}
-int
-ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
- struct register_args *reg_args, char *argp)
+int FFI_HIDDEN
+ffi_closure_unix64_inner(ffi_cif *cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ void *rvalue,
+ struct register_args *reg_args,
+ char *argp)
{
- ffi_cif *cif;
void **avalue;
ffi_type **arg_types;
long i, avn;
int gprcount, ssecount, ngpr, nsse;
int ret;
- cif = closure->cif;
avalue = alloca(cif->nargs * sizeof(void *));
gprcount = ssecount = 0;
@@ -667,10 +689,29 @@ ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
}
/* Invoke the closure. */
- closure->fun (cif, rvalue, avalue, closure->user_data);
+ fun (cif, rvalue, avalue, user_data);
/* Tell assembly how to perform return type promotions. */
return ret;
}
+extern void ffi_go_closure_unix64(void) FFI_HIDDEN;
+extern void ffi_go_closure_unix64_sse(void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*))
+{
+ if (cif->abi != FFI_UNIX64)
+ return FFI_BAD_ABI;
+
+ closure->tramp = (cif->flags & (1 << 11)
+ ? ffi_go_closure_unix64_sse
+ : ffi_go_closure_unix64);
+ closure->cif = cif;
+ closure->fun = fun;
+
+ return FFI_OK;
+}
+
#endif /* __x86_64__ */
diff --git a/src/x86/ffitarget.h b/src/x86/ffitarget.h
index a236677..0d295e0 100644
--- a/src/x86/ffitarget.h
+++ b/src/x86/ffitarget.h
@@ -121,6 +121,7 @@ typedef enum ffi_abi {
/* ---- Definitions for closures ----------------------------------------- */
#define FFI_CLOSURES 1
+
#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1)
#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2)
#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3)
@@ -129,6 +130,7 @@ typedef enum ffi_abi {
#if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
#define FFI_TRAMPOLINE_SIZE 24
#define FFI_NATIVE_RAW_API 0
+#define FFI_GO_CLOSURES 1
#else
#ifdef X86_WIN32
#define FFI_TRAMPOLINE_SIZE 52
diff --git a/src/x86/unix64.S b/src/x86/unix64.S
index dcd6bc7..134cb3d 100644
--- a/src/x86/unix64.S
+++ b/src/x86/unix64.S
@@ -30,6 +30,7 @@
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
+#include <ffi_cfi.h>
.text
@@ -43,9 +44,10 @@
.align 2
.globl ffi_call_unix64
.type ffi_call_unix64,@function
+ FFI_HIDDEN(ffi_call_unix64)
ffi_call_unix64:
-.LUW0:
+ cfi_startproc
movq (%rsp), %r10 /* Load return address. */
leaq (%rdi, %rsi), %rax /* Find local stack base. */
movq %rdx, (%rax) /* Save flags. */
@@ -53,24 +55,36 @@ ffi_call_unix64:
movq %rbp, 16(%rax) /* Save old frame pointer. */
movq %r10, 24(%rax) /* Relocate return address. */
movq %rax, %rbp /* Finalize local stack frame. */
-.LUW1:
+
+ /* New stack frame based off rbp. This is a itty bit of unwind
+ trickery in that the CFA *has* changed. There is no easy way
+ to describe it correctly on entry to the function. Fortunately,
+ it doesn't matter too much since at all points we can correctly
+ unwind back to ffi_call. Note that the location to which we
+ moved the return address is (the new) CFA-8, so from the
+ perspective of the unwind info, it hasn't moved. */
+ cfi_def_cfa(%rbp, 32)
+ cfi_rel_offset(%rbp, 16)
+
movq %rdi, %r10 /* Save a copy of the register area. */
movq %r8, %r11 /* Save a copy of the target fn. */
movl %r9d, %eax /* Set number of SSE registers. */
/* Load up all argument registers. */
movq (%r10), %rdi
- movq 8(%r10), %rsi
- movq 16(%r10), %rdx
- movq 24(%r10), %rcx
- movq 32(%r10), %r8
- movq 40(%r10), %r9
+ movq 0x08(%r10), %rsi
+ movq 0x10(%r10), %rdx
+ movq 0x18(%r10), %rcx
+ movq 0x20(%r10), %r8
+ movq 0x28(%r10), %r9
+ movl 0xb0(%r10), %eax
testl %eax, %eax
jnz .Lload_sse
.Lret_from_load_sse:
- /* Deallocate the reg arg area. */
- leaq 176(%r10), %rsp
+ /* Deallocate the reg arg area, except for r10, then load via pop. */
+ leaq 0xb8(%r10), %rsp
+ popq %r10
/* Call the user function. */
call *%r11
@@ -81,7 +95,9 @@ ffi_call_unix64:
movq 0(%rbp), %rcx /* Reload flags. */
movq 8(%rbp), %rdi /* Reload raddr. */
movq 16(%rbp), %rbp /* Reload old frame pointer. */
-.LUW2:
+ cfi_remember_state
+ cfi_def_cfa(%rsp, 8)
+ cfi_restore(%rbp)
/* The first byte of the flags contains the FFI_TYPE. */
movzbl %cl, %r10d
@@ -90,6 +106,8 @@ ffi_call_unix64:
addq %r11, %r10
jmp *%r10
+ .section .rodata
+ .align 2
.Lstore_table:
.long .Lst_void-.Lstore_table /* FFI_TYPE_VOID */
.long .Lst_sint32-.Lstore_table /* FFI_TYPE_INT */
@@ -106,6 +124,7 @@ ffi_call_unix64:
.long .Lst_int64-.Lstore_table /* FFI_TYPE_SINT64 */
.long .Lst_struct-.Lstore_table /* FFI_TYPE_STRUCT */
.long .Lst_int64-.Lstore_table /* FFI_TYPE_POINTER */
+ .previous
.align 2
.Lst_void:
@@ -188,49 +207,83 @@ ffi_call_unix64:
It's not worth an indirect jump to load the exact set of
SSE registers needed; zero or all is a good compromise. */
.align 2
-.LUW3:
+ cfi_restore_state
.Lload_sse:
- movdqa 48(%r10), %xmm0
- movdqa 64(%r10), %xmm1
- movdqa 80(%r10), %xmm2
- movdqa 96(%r10), %xmm3
- movdqa 112(%r10), %xmm4
- movdqa 128(%r10), %xmm5
- movdqa 144(%r10), %xmm6
- movdqa 160(%r10), %xmm7
+ movdqa 0x30(%r10), %xmm0
+ movdqa 0x40(%r10), %xmm1
+ movdqa 0x50(%r10), %xmm2
+ movdqa 0x60(%r10), %xmm3
+ movdqa 0x70(%r10), %xmm4
+ movdqa 0x80(%r10), %xmm5
+ movdqa 0x90(%r10), %xmm6
+ movdqa 0xa0(%r10), %xmm7
jmp .Lret_from_load_sse
-.LUW4:
+ cfi_endproc
.size ffi_call_unix64,.-ffi_call_unix64
+/* 6 general registers, 8 vector registers,
+ 16 bytes of rvalue, 8 bytes of alignment. */
+#define ffi_closure_OFS_G 0
+#define ffi_closure_OFS_V (6*8)
+#define ffi_closure_OFS_RVALUE (ffi_closure_OFS_V + 8*16)
+#define ffi_closure_FS (ffi_closure_OFS_RVALUE + 16 + 8)
+
+/* The location of rvalue within the red zone after deallocating the frame. */
+#define ffi_closure_RED_RVALUE (ffi_closure_OFS_RVALUE - ffi_closure_FS)
+
+ .align 2
+ .globl ffi_closure_unix64_sse
+ .type ffi_closure_unix64_sse,@function
+ FFI_HIDDEN(ffi_closure_unix64_sse)
+
+ffi_closure_unix64_sse:
+ cfi_startproc
+ subq $ffi_closure_FS, %rsp
+ cfi_adjust_cfa_offset(ffi_closure_FS)
+
+ movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp)
+ movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp)
+ movdqa %xmm2, ffi_closure_OFS_V+0x20(%rsp)
+ movdqa %xmm3, ffi_closure_OFS_V+0x30(%rsp)
+ movdqa %xmm4, ffi_closure_OFS_V+0x40(%rsp)
+ movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp)
+ movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp)
+ movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp)
+ jmp 0f
+
+ cfi_endproc
+ .size ffi_closure_unix64_sse,.-ffi_closure_unix64_sse
+
.align 2
- .globl ffi_closure_unix64
+ .globl ffi_closure_unix64
.type ffi_closure_unix64,@function
+ FFI_HIDDEN(ffi_closure_unix64)
ffi_closure_unix64:
-.LUW5:
- /* The carry flag is set by the trampoline iff SSE registers
- are used. Don't clobber it before the branch instruction. */
- leaq -200(%rsp), %rsp
-.LUW6:
- movq %rdi, (%rsp)
- movq %rsi, 8(%rsp)
- movq %rdx, 16(%rsp)
- movq %rcx, 24(%rsp)
- movq %r8, 32(%rsp)
- movq %r9, 40(%rsp)
- jc .Lsave_sse
-.Lret_from_save_sse:
-
- movq %r10, %rdi
- leaq 176(%rsp), %rsi
- movq %rsp, %rdx
- leaq 208(%rsp), %rcx
- call ffi_closure_unix64_inner@PLT
+ cfi_startproc
+ subq $ffi_closure_FS, %rsp
+ cfi_adjust_cfa_offset(ffi_closure_FS)
+0:
+ movq %rdi, ffi_closure_OFS_G+0x00(%rsp)
+ movq %rsi, ffi_closure_OFS_G+0x08(%rsp)
+ movq %rdx, ffi_closure_OFS_G+0x10(%rsp)
+ movq %rcx, ffi_closure_OFS_G+0x18(%rsp)
+ movq %r8, ffi_closure_OFS_G+0x20(%rsp)
+ movq %r9, ffi_closure_OFS_G+0x28(%rsp)
+
+ movq 24(%r10), %rdi /* Load cif */
+ movq 32(%r10), %rsi /* Load fun */
+ movq 40(%r10), %rdx /* Load user_data */
+.Ldo_closure:
+ leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */
+ movq %rsp, %r8 /* Load reg_args */
+ leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */
+ call ffi_closure_unix64_inner
/* Deallocate stack frame early; return value is now in redzone. */
- addq $200, %rsp
-.LUW7:
+ addq $ffi_closure_FS, %rsp
+ cfi_adjust_cfa_offset(-ffi_closure_FS)
/* The first byte of the return value contains the FFI_TYPE. */
movzbl %al, %r10d
@@ -239,6 +292,8 @@ ffi_closure_unix64:
addq %r11, %r10
jmp *%r10
+ .section .rodata
+ .align 2
.Lload_table:
.long .Lld_void-.Lload_table /* FFI_TYPE_VOID */
.long .Lld_int32-.Lload_table /* FFI_TYPE_INT */
@@ -255,6 +310,7 @@ ffi_closure_unix64:
.long .Lld_int64-.Lload_table /* FFI_TYPE_SINT64 */
.long .Lld_struct-.Lload_table /* FFI_TYPE_STRUCT */
.long .Lld_int64-.Lload_table /* FFI_TYPE_POINTER */
+ .previous
.align 2
.Lld_void:
@@ -262,32 +318,32 @@ ffi_closure_unix64:
.align 2
.Lld_int8:
- movzbl -24(%rsp), %eax
+ movzbl ffi_closure_RED_RVALUE(%rsp), %eax
ret
.align 2
.Lld_int16:
- movzwl -24(%rsp), %eax
+ movzwl ffi_closure_RED_RVALUE(%rsp), %eax
ret
.align 2
.Lld_int32:
- movl -24(%rsp), %eax
+ movl ffi_closure_RED_RVALUE(%rsp), %eax
ret
.align 2
.Lld_int64:
- movq -24(%rsp), %rax
+ movq ffi_closure_RED_RVALUE(%rsp), %rax
ret
.align 2
.Lld_float:
- movss -24(%rsp), %xmm0
+ movss ffi_closure_RED_RVALUE(%rsp), %xmm0
ret
.align 2
.Lld_double:
- movsd -24(%rsp), %xmm0
+ movsd ffi_closure_RED_RVALUE(%rsp), %xmm0
ret
.align 2
.Lld_ldouble:
- fldt -24(%rsp)
+ fldt ffi_closure_RED_RVALUE(%rsp)
ret
.align 2
@@ -297,136 +353,69 @@ ffi_closure_unix64:
both rdx and xmm1 with the second word. For the remaining,
bit 8 set means xmm0 gets the second word, and bit 9 means
that rax gets the second word. */
- movq -24(%rsp), %rcx
- movq -16(%rsp), %rdx
- movq -16(%rsp), %xmm1
+ movq ffi_closure_RED_RVALUE(%rsp), %rcx
+ movq ffi_closure_RED_RVALUE+8(%rsp), %rdx
+ movq ffi_closure_RED_RVALUE+8(%rsp), %xmm1
testl $0x100, %eax
cmovnz %rdx, %rcx
movd %rcx, %xmm0
testl $0x200, %eax
- movq -24(%rsp), %rax
+ movq ffi_closure_RED_RVALUE(%rsp), %rax
cmovnz %rdx, %rax
ret
- /* See the comment above .Lload_sse; the same logic applies here. */
- .align 2
-.LUW8:
-.Lsave_sse:
- movdqa %xmm0, 48(%rsp)
- movdqa %xmm1, 64(%rsp)
- movdqa %xmm2, 80(%rsp)
- movdqa %xmm3, 96(%rsp)
- movdqa %xmm4, 112(%rsp)
- movdqa %xmm5, 128(%rsp)
- movdqa %xmm6, 144(%rsp)
- movdqa %xmm7, 160(%rsp)
- jmp .Lret_from_save_sse
-
-.LUW9:
+ cfi_endproc
.size ffi_closure_unix64,.-ffi_closure_unix64
-#ifdef __GNUC__
-/* Only emit DWARF unwind info when building with the GNU toolchain. */
-
-#ifdef HAVE_AS_X86_64_UNWIND_SECTION_TYPE
- .section .eh_frame,"a",@unwind
-#else
- .section .eh_frame,"a",@progbits
-#endif
-.Lframe1:
- .long .LECIE1-.LSCIE1 /* CIE Length */
-.LSCIE1:
- .long 0 /* CIE Identifier Tag */
- .byte 1 /* CIE Version */
- .ascii "zR\0" /* CIE Augmentation */
- .uleb128 1 /* CIE Code Alignment Factor */
- .sleb128 -8 /* CIE Data Alignment Factor */
- .byte 0x10 /* CIE RA Column */
- .uleb128 1 /* Augmentation size */
- .byte 0x1b /* FDE Encoding (pcrel sdata4) */
- .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
- .uleb128 7
- .uleb128 8
- .byte 0x80+16 /* DW_CFA_offset, %rip offset 1*-8 */
- .uleb128 1
- .align 8
-.LECIE1:
-.LSFDE1:
- .long .LEFDE1-.LASFDE1 /* FDE Length */
-.LASFDE1:
- .long .LASFDE1-.Lframe1 /* FDE CIE offset */
-#if HAVE_AS_X86_PCREL
- .long .LUW0-. /* FDE initial location */
-#else
- .long .LUW0@rel
-#endif
- .long .LUW4-.LUW0 /* FDE address range */
- .uleb128 0x0 /* Augmentation size */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LUW1-.LUW0
-
- /* New stack frame based off rbp. This is a itty bit of unwind
- trickery in that the CFA *has* changed. There is no easy way
- to describe it correctly on entry to the function. Fortunately,
- it doesn't matter too much since at all points we can correctly
- unwind back to ffi_call. Note that the location to which we
- moved the return address is (the new) CFA-8, so from the
- perspective of the unwind info, it hasn't moved. */
- .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */
- .uleb128 6
- .uleb128 32
- .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */
- .uleb128 2
- .byte 0xa /* DW_CFA_remember_state */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LUW2-.LUW1
- .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
- .uleb128 7
- .uleb128 8
- .byte 0xc0+6 /* DW_CFA_restore, %rbp */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LUW3-.LUW2
- .byte 0xb /* DW_CFA_restore_state */
-
- .align 8
-.LEFDE1:
-.LSFDE3:
- .long .LEFDE3-.LASFDE3 /* FDE Length */
-.LASFDE3:
- .long .LASFDE3-.Lframe1 /* FDE CIE offset */
-#if HAVE_AS_X86_PCREL
- .long .LUW5-. /* FDE initial location */
-#else
- .long .LUW5@rel
-#endif
- .long .LUW9-.LUW5 /* FDE address range */
- .uleb128 0x0 /* Augmentation size */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LUW6-.LUW5
- .byte 0xe /* DW_CFA_def_cfa_offset */
- .uleb128 208
- .byte 0xa /* DW_CFA_remember_state */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LUW7-.LUW6
- .byte 0xe /* DW_CFA_def_cfa_offset */
- .uleb128 8
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .long .LUW8-.LUW7
- .byte 0xb /* DW_CFA_restore_state */
+ .align 2
+ .globl ffi_go_closure_unix64_sse
+ .type ffi_go_closure_unix64_sse,@function
+ FFI_HIDDEN(ffi_go_closure_unix64_sse)
+
+ffi_go_closure_unix64_sse:
+ cfi_startproc
+ subq $ffi_closure_FS, %rsp
+ cfi_adjust_cfa_offset(ffi_closure_FS)
+
+ movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp)
+ movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp)
+ movdqa %xmm2, ffi_closure_OFS_V+0x20(%rsp)
+ movdqa %xmm3, ffi_closure_OFS_V+0x30(%rsp)
+ movdqa %xmm4, ffi_closure_OFS_V+0x40(%rsp)
+ movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp)
+ movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp)
+ movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp)
+ jmp 0f
+
+ cfi_endproc
+ .size ffi_go_closure_unix64_sse,.-ffi_go_closure_unix64_sse
- .align 8
-.LEFDE3:
+ .align 2
+ .globl ffi_go_closure_unix64
+ .type ffi_go_closure_unix64,@function
+ FFI_HIDDEN(ffi_go_closure_unix64)
+
+ffi_go_closure_unix64:
+ cfi_startproc
+ subq $ffi_closure_FS, %rsp
+ cfi_adjust_cfa_offset(ffi_closure_FS)
+0:
+ movq %rdi, ffi_closure_OFS_G+0x00(%rsp)
+ movq %rsi, ffi_closure_OFS_G+0x08(%rsp)
+ movq %rdx, ffi_closure_OFS_G+0x10(%rsp)
+ movq %rcx, ffi_closure_OFS_G+0x18(%rsp)
+ movq %r8, ffi_closure_OFS_G+0x20(%rsp)
+ movq %r9, ffi_closure_OFS_G+0x28(%rsp)
+
+ movq 8(%r10), %rdi /* Load cif */
+ movq 16(%r10), %rsi /* Load fun */
+ movq %r10, %rdx /* Load closure (user_data) */
+ jmp .Ldo_closure
+
+ cfi_endproc
+ .size ffi_go_closure_unix64,.-ffi_go_closure_unix64
-#endif /* __GNUC__ */
-
#endif /* __x86_64__ */
-
#if defined __ELF__ && defined __linux__
.section .note.GNU-stack,"",@progbits
#endif
--
1.9.3
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 2/8] Add ffi_cfi.h
2014-10-28 18:32 [PATCH 0/8] Go closures for x86_64 Richard Henderson
` (3 preceding siblings ...)
2014-10-28 18:32 ` [PATCH 4/8] win64: Rewrite Richard Henderson
@ 2014-10-28 18:32 ` Richard Henderson
2014-10-28 18:32 ` [PATCH 3/8] x86-64: Support go closures Richard Henderson
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Richard Henderson @ 2014-10-28 18:32 UTC (permalink / raw)
To: libffi-discuss
Have one copy of the HAVE_AS_CFI_PSEUDO_OP code
to share between all backends.
---
include/ffi_cfi.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 53 insertions(+)
create mode 100644 include/ffi_cfi.h
diff --git a/include/ffi_cfi.h b/include/ffi_cfi.h
new file mode 100644
index 0000000..6cca20c
--- /dev/null
+++ b/include/ffi_cfi.h
@@ -0,0 +1,53 @@
+/* -----------------------------------------------------------------------
+ ffi_cfi.h - Copyright (c) 2014 Red Hat, Inc.
+
+ Conditionally assemble cfi directives. Only necessary for building libffi.
+ ----------------------------------------------------------------------- */
+
+#ifndef FFI_CFI_H
+#define FFI_CFI_H
+
+#ifdef HAVE_AS_CFI_PSEUDO_OP
+
+# define cfi_startproc .cfi_startproc
+# define cfi_endproc .cfi_endproc
+# define cfi_def_cfa(reg, off) .cfi_def_cfa reg, off
+# define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg
+# define cfi_def_cfa_offset(off) .cfi_def_cfa_offset off
+# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
+# define cfi_offset(reg, off) .cfi_offset reg, off
+# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
+# define cfi_register(r1, r2) .cfi_register r1, r2
+# define cfi_return_column(reg) .cfi_return_column reg
+# define cfi_restore(reg) .cfi_restore reg
+# define cfi_same_value(reg) .cfi_same_value reg
+# define cfi_undefined(reg) .cfi_undefined reg
+# define cfi_remember_state .cfi_remember_state
+# define cfi_restore_state .cfi_restore_state
+# define cfi_window_save .cfi_window_save
+# define cfi_personality(enc, exp) .cfi_personality enc, exp
+# define cfi_lsda(enc, exp) .cfi_lsda enc, exp
+
+#else
+
+# define cfi_startproc
+# define cfi_endproc
+# define cfi_def_cfa(reg, off)
+# define cfi_def_cfa_register(reg)
+# define cfi_def_cfa_offset(off)
+# define cfi_adjust_cfa_offset(off)
+# define cfi_offset(reg, off)
+# define cfi_rel_offset(reg, off)
+# define cfi_register(r1, r2)
+# define cfi_return_column(reg)
+# define cfi_restore(reg)
+# define cfi_same_value(reg)
+# define cfi_undefined(reg)
+# define cfi_remember_state
+# define cfi_restore_state
+# define cfi_window_save
+# define cfi_personality(enc, exp)
+# define cfi_lsda(enc, exp)
+
+#endif /* HAVE_AS_CFI_PSEUDO_OP */
+#endif /* FFI_CFI_H */
--
1.9.3
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 4/8] win64: Rewrite
2014-10-28 18:32 [PATCH 0/8] Go closures for x86_64 Richard Henderson
` (2 preceding siblings ...)
2014-10-28 18:32 ` [PATCH 6/8] x86_64: Fixups for x32 Richard Henderson
@ 2014-10-28 18:32 ` Richard Henderson
2014-10-28 18:32 ` [PATCH 2/8] Add ffi_cfi.h Richard Henderson
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Richard Henderson @ 2014-10-28 18:32 UTC (permalink / raw)
To: libffi-discuss; +Cc: Kai Tietz
It's way too different from the 32-bit ABIs with which it is
currently associated. As seen from all of the existing XFAILs.
Cc: Kai Tietz <ktietz@redhat.com>
---
Makefile.am | 4 +-
src/x86/ffitarget.h | 29 +-
src/x86/ffiw64.c | 281 +++++++++
src/x86/win64.S | 693 ++++++---------------
testsuite/libffi.call/call.exp | 13 +-
testsuite/libffi.call/cls_align_longdouble_split.c | 2 -
.../libffi.call/cls_align_longdouble_split2.c | 2 -
testsuite/libffi.call/cls_longdouble.c | 2 -
testsuite/libffi.call/float2.c | 3 -
testsuite/libffi.call/huge_struct.c | 2 -
testsuite/libffi.call/return_ldl.c | 1 -
11 files changed, 496 insertions(+), 536 deletions(-)
create mode 100644 src/x86/ffiw64.c
diff --git a/Makefile.am b/Makefile.am
index 0e40451..3d1ecae 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -37,7 +37,7 @@ EXTRA_DIST = LICENSE ChangeLog.v1 ChangeLog.libgcj \
src/sh64/sysv.S src/sh64/ffitarget.h src/sparc/v8.S \
src/sparc/v9.S src/sparc/ffitarget.h src/sparc/ffi.c \
src/x86/darwin64.S src/x86/ffi.c src/x86/sysv.S \
- src/x86/win32.S src/x86/darwin.S src/x86/win64.S \
+ src/x86/win32.S src/x86/darwin.S src/x86/ffiw64.c src/x86/win64.S \
src/x86/freebsd.S src/x86/ffi64.c src/x86/unix64.S \
src/x86/ffitarget.h src/pa/ffitarget.h src/pa/ffi.c \
src/pa/linux.S src/pa/hpux32.S src/frv/ffi.c src/bfin/ffi.c \
@@ -135,7 +135,7 @@ if X86_WIN32
nodist_libffi_la_SOURCES += src/x86/ffi.c src/x86/win32.S
endif
if X86_WIN64
-nodist_libffi_la_SOURCES += src/x86/ffi.c src/x86/win64.S
+nodist_libffi_la_SOURCES += src/x86/ffiw64.c src/x86/win64.S
endif
if X86_DARWIN
nodist_libffi_la_SOURCES += src/x86/ffi.c src/x86/darwin.S src/x86/ffi64.c src/x86/darwin64.S
diff --git a/src/x86/ffitarget.h b/src/x86/ffitarget.h
index 0d295e0..8c52573 100644
--- a/src/x86/ffitarget.h
+++ b/src/x86/ffitarget.h
@@ -127,25 +127,18 @@ typedef enum ffi_abi {
#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3)
#define FFI_TYPE_MS_STRUCT (FFI_TYPE_LAST + 4)
-#if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
-#define FFI_TRAMPOLINE_SIZE 24
-#define FFI_NATIVE_RAW_API 0
-#define FFI_GO_CLOSURES 1
+#if defined (X86_64) || defined(X86_WIN64) \
+ || (defined (__x86_64__) && defined (X86_DARWIN))
+# define FFI_TRAMPOLINE_SIZE 24
+# define FFI_NATIVE_RAW_API 0
+# define FFI_GO_CLOSURES 1
#else
-#ifdef X86_WIN32
-#define FFI_TRAMPOLINE_SIZE 52
-#else
-#ifdef X86_WIN64
-#define FFI_TRAMPOLINE_SIZE 29
-#define FFI_NATIVE_RAW_API 0
-#define FFI_NO_RAW_API 1
-#else
-#define FFI_TRAMPOLINE_SIZE 10
-#endif
-#endif
-#ifndef X86_WIN64
-#define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */
-#endif
+# ifdef X86_WIN32
+# define FFI_TRAMPOLINE_SIZE 52
+# else
+# define FFI_TRAMPOLINE_SIZE 10
+# endif
+# define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */
#endif
#endif
diff --git a/src/x86/ffiw64.c b/src/x86/ffiw64.c
new file mode 100644
index 0000000..316f544
--- /dev/null
+++ b/src/x86/ffiw64.c
@@ -0,0 +1,281 @@
+/* -----------------------------------------------------------------------
+ ffiw64.c - Copyright (c) 2014 Red Hat, Inc.
+
+ x86 win64 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#ifdef X86_WIN64
+
+struct win64_call_frame
+{
+ UINT64 rbp; /* 0 */
+ UINT64 retaddr; /* 8 */
+ UINT64 fn; /* 16 */
+ UINT64 flags; /* 24 */
+ UINT64 rvalue; /* 32 */
+};
+
+extern void ffi_call_win64 (void *stack, struct win64_call_frame *,
+ void *closure) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+ int flags, n;
+
+ if (cif->abi != FFI_WIN64)
+ return FFI_BAD_ABI;
+
+ flags = cif->rtype->type;
+ switch (flags)
+ {
+ default:
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ flags = FFI_TYPE_STRUCT;
+ break;
+ case FFI_TYPE_COMPLEX:
+ flags = FFI_TYPE_STRUCT;
+ /* FALLTHRU */
+ case FFI_TYPE_STRUCT:
+ switch (cif->rtype->size)
+ {
+ case 8:
+ flags = FFI_TYPE_UINT64;
+ break;
+ case 4:
+ flags = FFI_TYPE_SMALL_STRUCT_4B;
+ break;
+ case 2:
+ flags = FFI_TYPE_SMALL_STRUCT_2B;
+ break;
+ case 1:
+ flags = FFI_TYPE_SMALL_STRUCT_1B;
+ break;
+ }
+ break;
+ }
+ cif->flags = flags;
+
+ /* Each argument either fits in a register, an 8 byte slot, or is
+ passed by reference with the pointer in the 8 byte slot. */
+ n = cif->nargs;
+ n += (flags == FFI_TYPE_STRUCT);
+ if (n < 4)
+ n = 4;
+ cif->bytes = n * 8;
+
+ return FFI_OK;
+}
+
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
+{
+ int i, j, n, flags;
+ UINT64 *stack;
+ size_t rsize;
+ struct win64_call_frame *frame;
+
+ FFI_ASSERT(cif->abi == FFI_WIN64);
+
+ flags = cif->flags;
+ rsize = 0;
+
+ /* If we have no return value for a structure, we need to create one.
+ Otherwise we can ignore the return type entirely. */
+ if (rvalue == NULL)
+ {
+ if (flags == FFI_TYPE_STRUCT)
+ rsize = cif->rtype->size;
+ else
+ flags = FFI_TYPE_VOID;
+ }
+
+ stack = alloca(cif->bytes + sizeof(struct win64_call_frame) + rsize);
+ frame = (struct win64_call_frame *)((char *)stack + cif->bytes);
+ if (rsize)
+ rvalue = frame + 1;
+
+ frame->fn = (uintptr_t)fn;
+ frame->flags = flags;
+ frame->rvalue = (uintptr_t)rvalue;
+
+ j = 0;
+ if (flags == FFI_TYPE_STRUCT)
+ {
+ stack[0] = (uintptr_t)rvalue;
+ j = 1;
+ }
+
+ for (i = 0, n = cif->nargs; i < n; ++i, ++j)
+ {
+ switch (cif->arg_types[i]->size)
+ {
+ case 8:
+ stack[j] = *(UINT64 *)avalue[i];
+ break;
+ case 4:
+ stack[j] = *(UINT32 *)avalue[i];
+ break;
+ case 2:
+ stack[j] = *(UINT16 *)avalue[i];
+ break;
+ case 1:
+ stack[j] = *(UINT8 *)avalue[i];
+ break;
+ default:
+ stack[j] = (uintptr_t)avalue[i];
+ break;
+ }
+ }
+
+ ffi_call_win64 (stack, frame, closure);
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+
+extern void ffi_closure_win64(void) FFI_HIDDEN;
+extern void ffi_go_closure_win64(void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ void *codeloc)
+{
+ static const unsigned char trampoline[16] = {
+ /* leaq -0x7(%rip),%r10 # 0x0 */
+ 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
+ /* jmpq *0x3(%rip) # 0x10 */
+ 0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
+ /* nopl (%rax) */
+ 0x0f, 0x1f, 0x00
+ };
+ void *tramp = closure->tramp;
+
+ if (cif->abi != FFI_WIN64)
+ return FFI_BAD_ABI;
+
+ memcpy (tramp, trampoline, sizeof(trampoline));
+ *(UINT64 *)(tramp + 16) = (uintptr_t)ffi_closure_win64;
+
+ closure->cif = cif;
+ closure->fun = fun;
+ closure->user_data = user_data;
+
+ return FFI_OK;
+}
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*))
+{
+ if (cif->abi != FFI_WIN64)
+ return FFI_BAD_ABI;
+
+ closure->tramp = ffi_go_closure_win64;
+ closure->cif = cif;
+ closure->fun = fun;
+
+ return FFI_OK;
+}
+
+struct win64_closure_frame
+{
+ UINT64 rvalue[2];
+ UINT64 fargs[4];
+ UINT64 retaddr;
+ UINT64 args[];
+};
+
+int FFI_HIDDEN
+ffi_closure_win64_inner(ffi_cif *cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ struct win64_closure_frame *frame)
+{
+ void **avalue;
+ void *rvalue;
+ int i, n, nreg, flags;
+
+ avalue = alloca(cif->nargs * sizeof(void *));
+ rvalue = frame->rvalue;
+ nreg = 0;
+
+ /* When returning a structure, the address is in the first argument.
+ We must also be prepared to return the same address in eax, so
+ install that address in the frame and pretend we return a pointer. */
+ flags = cif->flags;
+ if (flags == FFI_TYPE_STRUCT)
+ {
+ rvalue = (void *)(uintptr_t)frame->args[0];
+ frame->rvalue[0] = frame->args[0];
+ nreg = 1;
+ }
+
+ for (i = 0, n = cif->nargs; i < n; ++i, ++nreg)
+ {
+ size_t size = cif->arg_types[i]->size;
+ size_t type = cif->arg_types[i]->type;
+ void *a;
+
+ if (type == FFI_TYPE_DOUBLE || type == FFI_TYPE_FLOAT)
+ {
+ if (nreg < 4)
+ a = &frame->fargs[nreg];
+ else
+ a = &frame->args[nreg];
+ }
+ else if (size == 1 || size == 2 || size == 4 || size == 8)
+ a = &frame->args[nreg];
+ else
+ a = (void *)(uintptr_t)frame->args[nreg];
+
+ avalue[i] = a;
+ }
+
+ /* Invoke the closure. */
+ fun (cif, rvalue, avalue, user_data);
+ return flags;
+}
+
+#endif /* X86_WIN64 */
diff --git a/src/x86/win64.S b/src/x86/win64.S
index 687f97c..a5a20b6 100644
--- a/src/x86/win64.S
+++ b/src/x86/win64.S
@@ -1,264 +1,16 @@
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
+#include <ffi_cfi.h>
-/* Constants for ffi_call_win64 */
-#define STACK 0
-#define PREP_ARGS_FN 32
-#define ECIF 40
-#define CIF_BYTES 48
-#define CIF_FLAGS 56
-#define RVALUE 64
-#define FN 72
-
-/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *),
- extended_cif *ecif, unsigned bytes, unsigned flags,
- unsigned *rvalue, void (*fn)());
- */
-
-#ifdef _MSC_VER
-PUBLIC ffi_call_win64
-
-EXTRN __chkstk:NEAR
-EXTRN ffi_closure_win64_inner:NEAR
-
-_TEXT SEGMENT
-
-;;; ffi_closure_win64 will be called with these registers set:
-;;; rax points to 'closure'
-;;; r11 contains a bit mask that specifies which of the
-;;; first four parameters are float or double
-;;;
-;;; It must move the parameters passed in registers to their stack location,
-;;; call ffi_closure_win64_inner for the actual work, then return the result.
-;;;
-ffi_closure_win64 PROC FRAME
- ;; copy register arguments onto stack
- test r11, 1
- jne first_is_float
- mov QWORD PTR [rsp+8], rcx
- jmp second
-first_is_float:
- movlpd QWORD PTR [rsp+8], xmm0
-
-second:
- test r11, 2
- jne second_is_float
- mov QWORD PTR [rsp+16], rdx
- jmp third
-second_is_float:
- movlpd QWORD PTR [rsp+16], xmm1
-
-third:
- test r11, 4
- jne third_is_float
- mov QWORD PTR [rsp+24], r8
- jmp fourth
-third_is_float:
- movlpd QWORD PTR [rsp+24], xmm2
-
-fourth:
- test r11, 8
- jne fourth_is_float
- mov QWORD PTR [rsp+32], r9
- jmp done
-fourth_is_float:
- movlpd QWORD PTR [rsp+32], xmm3
-
-done:
- .ALLOCSTACK 40
- sub rsp, 40
- .ENDPROLOG
- mov rcx, rax ; context is first parameter
- mov rdx, rsp ; stack is second parameter
- add rdx, 48 ; point to start of arguments
- mov rax, ffi_closure_win64_inner
- call rax ; call the real closure function
- add rsp, 40
- movd xmm0, rax ; If the closure returned a float,
- ; ffi_closure_win64_inner wrote it to rax
- ret 0
-ffi_closure_win64 ENDP
-
-ffi_call_win64 PROC FRAME
- ;; copy registers onto stack
- mov QWORD PTR [rsp+32], r9
- mov QWORD PTR [rsp+24], r8
- mov QWORD PTR [rsp+16], rdx
- mov QWORD PTR [rsp+8], rcx
- .PUSHREG rbp
- push rbp
- .ALLOCSTACK 48
- sub rsp, 48 ; 00000030H
- .SETFRAME rbp, 32
- lea rbp, QWORD PTR [rsp+32]
- .ENDPROLOG
-
- mov eax, DWORD PTR CIF_BYTES[rbp]
- add rax, 15
- and rax, -16
- call __chkstk
- sub rsp, rax
- lea rax, QWORD PTR [rsp+32]
- mov QWORD PTR STACK[rbp], rax
-
- mov rdx, QWORD PTR ECIF[rbp]
- mov rcx, QWORD PTR STACK[rbp]
- call QWORD PTR PREP_ARGS_FN[rbp]
-
- mov rsp, QWORD PTR STACK[rbp]
-
- movlpd xmm3, QWORD PTR [rsp+24]
- movd r9, xmm3
-
- movlpd xmm2, QWORD PTR [rsp+16]
- movd r8, xmm2
-
- movlpd xmm1, QWORD PTR [rsp+8]
- movd rdx, xmm1
-
- movlpd xmm0, QWORD PTR [rsp]
- movd rcx, xmm0
-
- call QWORD PTR FN[rbp]
-ret_struct4b$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B
- jne ret_struct2b$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- mov DWORD PTR [rcx], eax
- jmp ret_void$
-
-ret_struct2b$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B
- jne ret_struct1b$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- mov WORD PTR [rcx], ax
- jmp ret_void$
-
-ret_struct1b$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B
- jne ret_uint8$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- mov BYTE PTR [rcx], al
- jmp ret_void$
-
-ret_uint8$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8
- jne ret_sint8$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- movzx rax, al
- mov QWORD PTR [rcx], rax
- jmp ret_void$
-
-ret_sint8$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8
- jne ret_uint16$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- movsx rax, al
- mov QWORD PTR [rcx], rax
- jmp ret_void$
-
-ret_uint16$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16
- jne ret_sint16$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- movzx rax, ax
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_sint16$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16
- jne ret_uint32$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- movsx rax, ax
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_uint32$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32
- jne ret_sint32$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- mov eax, eax
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_sint32$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32
- jne ret_float$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- cdqe
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_float$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT
- jne SHORT ret_double$
-
- mov rax, QWORD PTR RVALUE[rbp]
- movss DWORD PTR [rax], xmm0
- jmp SHORT ret_void$
-
-ret_double$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE
- jne SHORT ret_uint64$
-
- mov rax, QWORD PTR RVALUE[rbp]
- movlpd QWORD PTR [rax], xmm0
- jmp SHORT ret_void$
-
-ret_uint64$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT64
- jne SHORT ret_sint64$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_sint64$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64
- jne SHORT ret_pointer$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_pointer$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_POINTER
- jne SHORT ret_int$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_int$:
- cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_INT
- jne SHORT ret_void$
-
- mov rcx, QWORD PTR RVALUE[rbp]
- cdqe
- mov QWORD PTR [rcx], rax
- jmp SHORT ret_void$
-
-ret_void$:
- xor rax, rax
-
- lea rsp, QWORD PTR [rbp+16]
- pop rbp
- ret 0
-ffi_call_win64 ENDP
-_TEXT ENDS
-END
+#if defined(HAVE_AS_CFI_PSEUDO_OP)
+ .cfi_sections .debug_frame
+#endif
-#else
+#define arg0 %rcx
+#define arg1 %rdx
+#define arg2 %r8
+#define arg3 %r9
#ifdef SYMBOL_UNDERSCORE
#define SYMBOL_NAME(name) _##name
@@ -266,255 +18,202 @@ END
#define SYMBOL_NAME(name) name
#endif
-.text
-
-.extern SYMBOL_NAME(ffi_closure_win64_inner)
-
-# ffi_closure_win64 will be called with these registers set:
-# rax points to 'closure'
-# r11 contains a bit mask that specifies which of the
-# first four parameters are float or double
-#
-# It must move the parameters passed in registers to their stack location,
-# call ffi_closure_win64_inner for the actual work, then return the result.
-#
- .balign 16
- .globl SYMBOL_NAME(ffi_closure_win64)
- .seh_proc SYMBOL_NAME(ffi_closure_win64)
-SYMBOL_NAME(ffi_closure_win64):
- # copy register arguments onto stack
- test $1,%r11
- jne .Lfirst_is_float
- mov %rcx, 8(%rsp)
- jmp .Lsecond
-.Lfirst_is_float:
- movlpd %xmm0, 8(%rsp)
-
-.Lsecond:
- test $2, %r11
- jne .Lsecond_is_float
- mov %rdx, 16(%rsp)
- jmp .Lthird
-.Lsecond_is_float:
- movlpd %xmm1, 16(%rsp)
-
-.Lthird:
- test $4, %r11
- jne .Lthird_is_float
- mov %r8,24(%rsp)
- jmp .Lfourth
-.Lthird_is_float:
- movlpd %xmm2, 24(%rsp)
-
-.Lfourth:
- test $8, %r11
- jne .Lfourth_is_float
- mov %r9, 32(%rsp)
- jmp .Ldone
-.Lfourth_is_float:
- movlpd %xmm3, 32(%rsp)
-
-.Ldone:
- .seh_stackalloc 40
- sub $40, %rsp
+.macro E which
+ .align 8
+ .org 0b + \which * 8
+.endm
+
+ .text
+
+/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10)
+
+ Bit o trickiness here -- FRAME is the base of the stack frame
+ for this function. This has been allocated by ffi_call. We also
+ deallocate some of the stack that has been alloca'd. */
+
+ .align 8
+ .globl ffi_call_win64
+
+ .seh_proc ffi_call_win64
+ffi_call_win64:
+ cfi_startproc
+ /* Set up the local stack frame and install it in rbp/rsp. */
+ movq (%rsp), %rax
+ movq %rbp, (arg1)
+ movq %rax, 8(arg1)
+ movq arg1, %rbp
+ cfi_def_cfa(%rbp, 16)
+ cfi_rel_offset(%rbp, 0)
+ .seh_pushreg %rbp
+ .seh_setframe %rbp, 0
.seh_endprologue
- mov %rax, %rcx # context is first parameter
- mov %rsp, %rdx # stack is second parameter
- add $48, %rdx # point to start of arguments
- leaq SYMBOL_NAME(ffi_closure_win64_inner)(%rip), %rax
- callq *%rax # call the real closure function
- add $40, %rsp
- movq %rax, %xmm0 # If the closure returned a float,
- # ffi_closure_win64_inner wrote it to rax
- retq
+ movq arg0, %rsp
+
+ movq arg2, %r10
+
+ /* Load all slots into both general and xmm registers. */
+ movq (%rsp), %rcx
+ movsd (%rsp), %xmm0
+ movq 8(%rsp), %rdx
+ movsd 8(%rsp), %xmm1
+ movq 16(%rsp), %r8
+ movsd 16(%rsp), %xmm2
+ movq 24(%rsp), %r9
+ movsd 24(%rsp), %xmm3
+
+ call *16(%rbp)
+
+ movl 24(%rbp), %ecx
+ movq 32(%rbp), %r8
+ leaq 0f(%rip), %r10
+ cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx
+ leaq (%r10, %rcx, 8), %r10
+ ja 99f
+ jmp *%r10
+
+/* Below, we're space constrained most of the time. Thus we eschew the
+ modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */
+.macro epilogue
+ leaveq
+ cfi_remember_state
+ cfi_def_cfa(%rsp, 8)
+ cfi_restore(%rbp)
+ ret
+ cfi_restore_state
+.endm
+
+ .align 8
+0:
+E FFI_TYPE_VOID
+ epilogue
+E FFI_TYPE_INT
+ movslq %eax, %rax
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_FLOAT
+ movss %xmm0, (%r8)
+ epilogue
+E FFI_TYPE_DOUBLE
+ movsd %xmm0, (%r8)
+ epilogue
+E FFI_TYPE_LONGDOUBLE
+ call abort
+E FFI_TYPE_UINT8
+ movzbl %al, %eax
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_SINT8
+ movsbq %al, %rax
+ jmp 98f
+E FFI_TYPE_UINT16
+ movzwl %ax, %eax
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_SINT16
+ movswq %ax, %rax
+ jmp 98f
+E FFI_TYPE_UINT32
+ movl %eax, %eax
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_SINT32
+ movslq %eax, %rax
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_UINT64
+98: movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_SINT64
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_STRUCT
+ epilogue
+E FFI_TYPE_POINTER
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_COMPLEX
+ call abort
+E FFI_TYPE_SMALL_STRUCT_1B
+ movb %al, (%r8)
+ epilogue
+E FFI_TYPE_SMALL_STRUCT_2B
+ movw %ax, (%r8)
+ epilogue
+E FFI_TYPE_SMALL_STRUCT_4B
+ movl %eax, (%r8)
+ epilogue
+
+ .align 8
+99: call abort
+
+.purgem epilogue
+
+ cfi_endproc
.seh_endproc
- .balign 16
- .globl SYMBOL_NAME(ffi_call_win64)
- .seh_proc SYMBOL_NAME(ffi_call_win64)
-SYMBOL_NAME(ffi_call_win64):
- # copy registers onto stack
- mov %r9,32(%rsp)
- mov %r8,24(%rsp)
- mov %rdx,16(%rsp)
- mov %rcx,8(%rsp)
- .seh_pushreg rbp
- push %rbp
- .seh_stackalloc 48
- sub $48,%rsp
- .seh_setframe rbp, 32
- lea 32(%rsp),%rbp
- .seh_endprologue
-
- mov CIF_BYTES(%rbp),%eax
- add $15, %rax
- and $-16, %rax
- cmpq $0x1000, %rax
- jb Lch_done
-Lch_probe:
- subq $0x1000,%rsp
- orl $0x0, (%rsp)
- subq $0x1000,%rax
- cmpq $0x1000,%rax
- ja Lch_probe
-Lch_done:
- subq %rax, %rsp
- orl $0x0, (%rsp)
- lea 32(%rsp), %rax
- mov %rax, STACK(%rbp)
-
- mov ECIF(%rbp), %rdx
- mov STACK(%rbp), %rcx
- callq *PREP_ARGS_FN(%rbp)
-
- mov STACK(%rbp), %rsp
-
- movlpd 24(%rsp), %xmm3
- movd %xmm3, %r9
-
- movlpd 16(%rsp), %xmm2
- movd %xmm2, %r8
-
- movlpd 8(%rsp), %xmm1
- movd %xmm1, %rdx
-
- movlpd (%rsp), %xmm0
- movd %xmm0, %rcx
-
- callq *FN(%rbp)
-.Lret_struct4b:
- cmpl $FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp)
- jne .Lret_struct2b
-
- mov RVALUE(%rbp), %rcx
- mov %eax, (%rcx)
- jmp .Lret_void
-
-.Lret_struct2b:
- cmpl $FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp)
- jne .Lret_struct1b
-
- mov RVALUE(%rbp), %rcx
- mov %ax, (%rcx)
- jmp .Lret_void
-
-.Lret_struct1b:
- cmpl $FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp)
- jne .Lret_uint8
- mov RVALUE(%rbp), %rcx
- mov %al, (%rcx)
- jmp .Lret_void
-
-.Lret_uint8:
- cmpl $FFI_TYPE_UINT8, CIF_FLAGS(%rbp)
- jne .Lret_sint8
-
- mov RVALUE(%rbp), %rcx
- movzbq %al, %rax
- movq %rax, (%rcx)
- jmp .Lret_void
-
-.Lret_sint8:
- cmpl $FFI_TYPE_SINT8, CIF_FLAGS(%rbp)
- jne .Lret_uint16
-
- mov RVALUE(%rbp), %rcx
- movsbq %al, %rax
- movq %rax, (%rcx)
- jmp .Lret_void
-
-.Lret_uint16:
- cmpl $FFI_TYPE_UINT16, CIF_FLAGS(%rbp)
- jne .Lret_sint16
-
- mov RVALUE(%rbp), %rcx
- movzwq %ax, %rax
- movq %rax, (%rcx)
- jmp .Lret_void
-
-.Lret_sint16:
- cmpl $FFI_TYPE_SINT16, CIF_FLAGS(%rbp)
- jne .Lret_uint32
-
- mov RVALUE(%rbp), %rcx
- movswq %ax, %rax
- movq %rax, (%rcx)
- jmp .Lret_void
-
-.Lret_uint32:
- cmpl $FFI_TYPE_UINT32, CIF_FLAGS(%rbp)
- jne .Lret_sint32
-
- mov RVALUE(%rbp), %rcx
- movl %eax, %eax
- movq %rax, (%rcx)
- jmp .Lret_void
-
-.Lret_sint32:
- cmpl $FFI_TYPE_SINT32, CIF_FLAGS(%rbp)
- jne .Lret_float
-
- mov RVALUE(%rbp), %rcx
- cltq
- movq %rax, (%rcx)
- jmp .Lret_void
-
-.Lret_float:
- cmpl $FFI_TYPE_FLOAT, CIF_FLAGS(%rbp)
- jne .Lret_double
-
- mov RVALUE(%rbp), %rax
- movss %xmm0, (%rax)
- jmp .Lret_void
-
-.Lret_double:
- cmpl $FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp)
- jne .Lret_uint64
-
- mov RVALUE(%rbp), %rax
- movlpd %xmm0, (%rax)
- jmp .Lret_void
-
-.Lret_uint64:
- cmpl $FFI_TYPE_UINT64, CIF_FLAGS(%rbp)
- jne .Lret_sint64
-
- mov RVALUE(%rbp), %rcx
- mov %rax, (%rcx)
- jmp .Lret_void
-
-.Lret_sint64:
- cmpl $FFI_TYPE_SINT64, CIF_FLAGS(%rbp)
- jne .Lret_pointer
-
- mov RVALUE(%rbp), %rcx
- mov %rax, (%rcx)
- jmp .Lret_void
+/* 32 bytes of outgoing register stack space, 8 bytes of alignment,
+ 16 bytes of result, 32 bytes of xmm registers. */
+#define ffi_clo_FS (32+8+16+32)
+#define ffi_clo_OFF_R (32+8)
+#define ffi_clo_OFF_X (32+8+16)
+
+ .align 8
+ .globl ffi_go_closure_win64
+
+ .seh_proc ffi_go_closure_win64
+ffi_go_closure_win64:
+ cfi_startproc
+ /* Save all integer arguments into the incoming reg stack space. */
+ movq arg0, 8(%rsp)
+ movq arg1, 16(%rsp)
+ movq arg2, 24(%rsp)
+ movq arg3, 32(%rsp)
+
+ movq 8(%r10), arg0 /* load cif */
+ movq 16(%r10), arg1 /* load fun */
+ movq %r10, arg2 /* closure is user_data */
+ jmp 0f
+ cfi_endproc
+ .seh_endproc
-.Lret_pointer:
- cmpl $FFI_TYPE_POINTER, CIF_FLAGS(%rbp)
- jne .Lret_int
+ .align 8
+ .globl ffi_closure_win64
+
+ .seh_proc ffi_closure_win64
+ffi_closure_win64:
+ cfi_startproc
+ /* Save all integer arguments into the incoming reg stack space. */
+ movq arg0, 8(%rsp)
+ movq arg1, 16(%rsp)
+ movq arg2, 24(%rsp)
+ movq arg3, 32(%rsp)
+
+ movq FFI_TRAMPOLINE_SIZE(%r10), arg0 /* load cif */
+ movq FFI_TRAMPOLINE_SIZE+8(%r10), arg1 /* load fun */
+ movq FFI_TRAMPOLINE_SIZE+16(%r10), arg2 /* load user_data */
+0:
+ subq $ffi_clo_FS, %rsp
+ cfi_adjust_cfa_offset(ffi_clo_FS)
+ .seh_stackalloc ffi_clo_FS
+ .seh_endprologue
- mov RVALUE(%rbp), %rcx
- mov %rax, (%rcx)
- jmp .Lret_void
+ /* Save all sse arguments into the stack frame. */
+ movsd %xmm0, ffi_clo_OFF_X(%rsp)
+ movsd %xmm1, ffi_clo_OFF_X+8(%rsp)
+ movsd %xmm2, ffi_clo_OFF_X+16(%rsp)
+ movsd %xmm3, ffi_clo_OFF_X+24(%rsp)
-.Lret_int:
- cmpl $FFI_TYPE_INT, CIF_FLAGS(%rbp)
- jne .Lret_void
+ leaq ffi_clo_OFF_R(%rsp), arg3
+ call ffi_closure_win64_inner
- mov RVALUE(%rbp), %rcx
- cltq
- movq %rax, (%rcx)
- jmp .Lret_void
+ /* Load the result into both possible result registers. */
+ movq ffi_clo_OFF_R(%rsp), %rax
+ movsd ffi_clo_OFF_R(%rsp), %xmm0
-.Lret_void:
- xor %rax, %rax
+ addq $ffi_clo_FS, %rsp
+ cfi_adjust_cfa_offset(-ffi_clo_FS)
+ ret
- lea 16(%rbp), %rsp
- pop %rbp
- retq
+ cfi_endproc
.seh_endproc
-#endif /* !_MSC_VER */
-
diff --git a/testsuite/libffi.call/call.exp b/testsuite/libffi.call/call.exp
index 5177f07..55de25c 100644
--- a/testsuite/libffi.call/call.exp
+++ b/testsuite/libffi.call/call.exp
@@ -24,16 +24,15 @@ set ctlist [lsearch -inline -all -glob [lsort [glob -nocomplain -- $srcdir/$subd
run-many-tests $tlist ""
-if { ![istarget s390*] } {
-
+# ??? We really should preprocess ffi.h and grep
+# for FFI_TARGET_HAS_COMPLEX_TYPE.
+if { [istarget s390*]
+ || [istarget x86_64*] } {
+ run-many-tests $ctlist ""
+} else {
foreach test $ctlist {
unsupported "$test"
}
-
-} else {
-
- run-many-tests $ctlist ""
-
}
dg-finish
diff --git a/testsuite/libffi.call/cls_align_longdouble_split.c b/testsuite/libffi.call/cls_align_longdouble_split.c
index 15f9365..cc1c43b 100644
--- a/testsuite/libffi.call/cls_align_longdouble_split.c
+++ b/testsuite/libffi.call/cls_align_longdouble_split.c
@@ -4,10 +4,8 @@
PR: none.
Originator: <hos@tamanegi.org> 20031203 */
-/* { dg-excess-errors "no long double format" { xfail x86_64-*-mingw* x86_64-*-cygwin* } } */
/* { dg-do run { xfail strongarm*-*-* xscale*-*-* } } */
/* { dg-options -mlong-double-128 { target powerpc64*-*-linux* } } */
-/* { dg-output "" { xfail x86_64-*-mingw* x86_64-*-cygwin* } } */
#include "ffitest.h"
diff --git a/testsuite/libffi.call/cls_align_longdouble_split2.c b/testsuite/libffi.call/cls_align_longdouble_split2.c
index ca1c356..5d3bec0 100644
--- a/testsuite/libffi.call/cls_align_longdouble_split2.c
+++ b/testsuite/libffi.call/cls_align_longdouble_split2.c
@@ -5,10 +5,8 @@
Originator: Blake Chaffin 6/18/2007
*/
-/* { dg-excess-errors "no long double format" { xfail x86_64-*-mingw* x86_64-*-cygwin* } } */
/* { dg-do run { xfail strongarm*-*-* } } */
/* { dg-options -mlong-double-128 { target powerpc64*-*-linux* } } */
-/* { dg-output "" { xfail x86_64-*-mingw* x86_64-*-cygwin* } } */
#include "ffitest.h"
diff --git a/testsuite/libffi.call/cls_longdouble.c b/testsuite/libffi.call/cls_longdouble.c
index 5dc9ac7..d24e72e 100644
--- a/testsuite/libffi.call/cls_longdouble.c
+++ b/testsuite/libffi.call/cls_longdouble.c
@@ -4,12 +4,10 @@
PR: none.
Originator: Blake Chaffin */
-/* { dg-excess-errors "no long double format" { xfail x86_64-*-mingw* x86_64-*-cygwin* } } */
/* This test is known to PASS on armv7l-unknown-linux-gnueabihf, so I have
remove the xfail for arm*-*-* below, until we know more. */
/* { dg-do run { xfail strongarm*-*-* xscale*-*-* } } */
/* { dg-options -mlong-double-128 { target powerpc64*-*-linux* } } */
-/* { dg-output "" { xfail x86_64-*-mingw* x86_64-*-cygwin* } } */
#include "ffitest.h"
diff --git a/testsuite/libffi.call/float2.c b/testsuite/libffi.call/float2.c
index a0b296c..aae1abf 100644
--- a/testsuite/libffi.call/float2.c
+++ b/testsuite/libffi.call/float2.c
@@ -4,9 +4,6 @@
PR: none.
Originator: From the original ffitest.c */
-/* { dg-excess-errors "fails" { target x86_64-*-mingw* x86_64-*-cygwin* } } */
-/* { dg-do run { xfail x86_64-*-mingw* x86_64-*-cygwin* } } */
-
#include "ffitest.h"
#include "float.h"
diff --git a/testsuite/libffi.call/huge_struct.c b/testsuite/libffi.call/huge_struct.c
index 657fe54..187c42c 100644
--- a/testsuite/libffi.call/huge_struct.c
+++ b/testsuite/libffi.call/huge_struct.c
@@ -5,11 +5,9 @@
Originator: Blake Chaffin 6/18/2007
*/
-/* { dg-excess-errors "" { target x86_64-*-mingw* x86_64-*-cygwin* } } */
/* { dg-do run { xfail strongarm*-*-* xscale*-*-* } } */
/* { dg-options -mlong-double-128 { target powerpc64*-*-linux* } } */
/* { dg-options -Wformat=0 { target moxie*-*-elf } } */
-/* { dg-output "" { xfail x86_64-*-mingw* x86_64-*-cygwin* } } */
#include "ffitest.h"
diff --git a/testsuite/libffi.call/return_ldl.c b/testsuite/libffi.call/return_ldl.c
index 5c2fe65..520e710 100644
--- a/testsuite/libffi.call/return_ldl.c
+++ b/testsuite/libffi.call/return_ldl.c
@@ -4,7 +4,6 @@
PR: none.
Originator: <andreast@gcc.gnu.org> 20071113 */
-/* { dg-do run { xfail x86_64-*-mingw* x86_64-*-cygwin* } } */
#include "ffitest.h"
static long double return_ldl(long double ldl)
--
1.9.3
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 8/8] x86_64: Add support for complex types
2014-10-28 18:32 [PATCH 0/8] Go closures for x86_64 Richard Henderson
2014-10-28 18:32 ` [PATCH 5/8] win64: Remove support from ffi.c Richard Henderson
@ 2014-10-28 18:32 ` Richard Henderson
2014-10-28 18:32 ` [PATCH 6/8] x86_64: Fixups for x32 Richard Henderson
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Richard Henderson @ 2014-10-28 18:32 UTC (permalink / raw)
To: libffi-discuss
---
src/x86/ffi64.c | 97 +++++++++++++++++++++++++++++++++++++++++++++-------
src/x86/internal64.h | 6 ++--
src/x86/unix64.S | 63 ++++++++++++++++++----------------
3 files changed, 122 insertions(+), 44 deletions(-)
diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c
index a03061b..650f7bb 100644
--- a/src/x86/ffi64.c
+++ b/src/x86/ffi64.c
@@ -171,6 +171,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
case FFI_TYPE_UINT64:
case FFI_TYPE_SINT64:
case FFI_TYPE_POINTER:
+ do_integer:
{
size_t size = byte_offset + type->size;
@@ -301,11 +302,42 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
}
return words;
}
-
- default:
- FFI_ASSERT(0);
+ case FFI_TYPE_COMPLEX:
+ {
+ ffi_type *inner = type->elements[0];
+ switch (inner->type)
+ {
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ goto do_integer;
+
+ case FFI_TYPE_FLOAT:
+ classes[0] = X86_64_SSE_CLASS;
+ if (byte_offset % 8)
+ {
+ classes[1] = X86_64_SSESF_CLASS;
+ return 2;
+ }
+ return 1;
+ case FFI_TYPE_DOUBLE:
+ classes[0] = classes[1] = X86_64_SSEDF_CLASS;
+ return 2;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+ classes[0] = X86_64_COMPLEX_X87_CLASS;
+ return 1;
+#endif
+ }
+ }
}
- return 0; /* Never reached. */
+ abort();
}
/* Examine the argument and return set number of register required in each
@@ -360,7 +392,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
{
int gprcount, ssecount, i, avn, ngpr, nsse, flags;
enum x86_64_reg_class classes[MAX_CLASSES];
- size_t bytes, n;
+ size_t bytes, n, rtype_size;
ffi_type *rtype;
if (cif->abi != FFI_UNIX64)
@@ -369,6 +401,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
gprcount = ssecount = 0;
rtype = cif->rtype;
+ rtype_size = rtype->size;
switch (rtype->type)
{
case FFI_TYPE_VOID:
@@ -421,16 +454,54 @@ ffi_prep_cif_machdep (ffi_cif *cif)
}
else
{
- /* Mark which registers the result appears in. */
_Bool sse0 = SSE_CLASS_P (classes[0]);
- _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
- if (sse0)
- flags = (sse1 ? UNIX64_RET_ST_XMM0_XMM1 : UNIX64_RET_ST_XMM0_RAX);
- else
- flags = (sse1 ? UNIX64_RET_ST_RAX_XMM0 : UNIX64_RET_ST_RAX_RDX);
- /* Mark the true size of the structure. */
- flags |= rtype->size << UNIX64_SIZE_SHIFT;
+ if (rtype_size == 4 && sse0)
+ flags = UNIX64_RET_XMM32;
+ else if (rtype_size == 8)
+ flags = sse0 ? UNIX64_RET_XMM64 : UNIX64_RET_INT64;
+ else
+ {
+ _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+ if (sse0 && sse1)
+ flags = UNIX64_RET_ST_XMM0_XMM1;
+ else if (sse0)
+ flags = UNIX64_RET_ST_XMM0_RAX;
+ else if (sse1)
+ flags = UNIX64_RET_ST_RAX_XMM0;
+ else
+ flags = UNIX64_RET_ST_RAX_RDX;
+ flags |= rtype_size << UNIX64_SIZE_SHIFT;
+ }
+ }
+ break;
+ case FFI_TYPE_COMPLEX:
+ switch (rtype->elements[0]->type)
+ {
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ flags = UNIX64_RET_ST_RAX_RDX | (rtype_size << UNIX64_SIZE_SHIFT);
+ break;
+ case FFI_TYPE_FLOAT:
+ flags = UNIX64_RET_XMM64;
+ break;
+ case FFI_TYPE_DOUBLE:
+ flags = UNIX64_RET_ST_XMM0_XMM1 | (16 << UNIX64_SIZE_SHIFT);
+ break;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+ flags = UNIX64_RET_X87_2;
+ break;
+#endif
+ default:
+ return FFI_BAD_TYPEDEF;
}
break;
default:
diff --git a/src/x86/internal64.h b/src/x86/internal64.h
index 07b1b10..512e955 100644
--- a/src/x86/internal64.h
+++ b/src/x86/internal64.h
@@ -9,11 +9,13 @@
#define UNIX64_RET_XMM32 8
#define UNIX64_RET_XMM64 9
#define UNIX64_RET_X87 10
-#define UNIX64_RET_ST_RAX_RDX 11
+#define UNIX64_RET_X87_2 11
#define UNIX64_RET_ST_XMM0_RAX 12
#define UNIX64_RET_ST_RAX_XMM0 13
#define UNIX64_RET_ST_XMM0_XMM1 14
-#define UNIX64_RET_LAST 14
+#define UNIX64_RET_ST_RAX_RDX 15
+
+#define UNIX64_RET_LAST 15
#define UNIX64_FLAG_RET_IN_MEM (1 << 10)
#define UNIX64_FLAG_XMM_ARGS (1 << 11)
diff --git a/src/x86/unix64.S b/src/x86/unix64.S
index 0151229..6066bbf 100644
--- a/src/x86/unix64.S
+++ b/src/x86/unix64.S
@@ -156,9 +156,10 @@ E UNIX64_RET_XMM64
E UNIX64_RET_X87
fstpt (%rdi)
ret
-E UNIX64_RET_ST_RAX_RDX
- movq %rdx, 8(%rsi)
- jmp 2f
+E UNIX64_RET_X87_2
+ fstpt (%rdi)
+ fstpt 16(%rdi)
+ ret
E UNIX64_RET_ST_XMM0_RAX
movq %rax, 8(%rsi)
jmp 3f
@@ -167,14 +168,15 @@ E UNIX64_RET_ST_RAX_XMM0
jmp 2f
E UNIX64_RET_ST_XMM0_XMM1
movq %xmm1, 8(%rsi)
-
- .align 8
-3: movq %xmm0, (%rsi)
+ jmp 3f
+E UNIX64_RET_ST_RAX_RDX
+ movq %rdx, 8(%rsi)
+2: movq %rax, (%rsi)
shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb
ret
.align 8
-2: movq %rax, (%rsi)
+3: movq %xmm0, (%rsi)
shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb
ret
@@ -201,11 +203,11 @@ E UNIX64_RET_ST_XMM0_XMM1
.size ffi_call_unix64,.-ffi_call_unix64
/* 6 general registers, 8 vector registers,
- 16 bytes of rvalue, 8 bytes of alignment. */
+ 32 bytes of rvalue, 8 bytes of alignment. */
#define ffi_closure_OFS_G 0
#define ffi_closure_OFS_V (6*8)
#define ffi_closure_OFS_RVALUE (ffi_closure_OFS_V + 8*16)
-#define ffi_closure_FS (ffi_closure_OFS_RVALUE + 16 + 8)
+#define ffi_closure_FS (ffi_closure_OFS_RVALUE + 32 + 8)
/* The location of rvalue within the red zone after deallocating the frame. */
#define ffi_closure_RED_RVALUE (ffi_closure_OFS_RVALUE - ffi_closure_FS)
@@ -275,6 +277,7 @@ ffi_closure_unix64:
leaq 0f(%rip), %r11
ja 9f
leaq (%r11, %r10, 8), %r10
+ leaq ffi_closure_RED_RVALUE(%rsp), %rsi
jmp *%r10
.align 8
@@ -282,52 +285,54 @@ ffi_closure_unix64:
E UNIX64_RET_VOID
ret
E UNIX64_RET_UINT8
- movzbl ffi_closure_RED_RVALUE(%rsp), %eax
+ movzbl (%rsi), %eax
ret
E UNIX64_RET_UINT16
- movzwl ffi_closure_RED_RVALUE(%rsp), %eax
+ movzwl (%rsi), %eax
ret
E UNIX64_RET_UINT32
- movl ffi_closure_RED_RVALUE(%rsp), %eax
+ movl (%rsi), %eax
ret
E UNIX64_RET_SINT8
- movsbl ffi_closure_RED_RVALUE(%rsp), %eax
+ movsbl (%rsi), %eax
ret
E UNIX64_RET_SINT16
- movswl ffi_closure_RED_RVALUE(%rsp), %eax
+ movswl (%rsi), %eax
ret
E UNIX64_RET_SINT32
- movl ffi_closure_RED_RVALUE(%rsp), %eax
+ movl (%rsi), %eax
ret
E UNIX64_RET_INT64
- movq ffi_closure_RED_RVALUE(%rsp), %rax
+ movq (%rsi), %rax
ret
E UNIX64_RET_XMM32
- movd ffi_closure_RED_RVALUE(%rsp), %xmm0
+ movd (%rsi), %xmm0
ret
E UNIX64_RET_XMM64
- movq ffi_closure_RED_RVALUE(%rsp), %xmm0
+ movq (%rsi), %xmm0
ret
E UNIX64_RET_X87
- fldt ffi_closure_RED_RVALUE(%rsp)
+ fldt (%rsi)
+ ret
+E UNIX64_RET_X87_2
+ fldt 16(%rsi)
+ fldt (%rsi)
ret
-E UNIX64_RET_ST_RAX_RDX
- movq ffi_closure_RED_RVALUE+8(%rsp), %rdx
- jmp 2f
E UNIX64_RET_ST_XMM0_RAX
- movq ffi_closure_RED_RVALUE+8(%rsp), %rax
+ movq 8(%rsi), %rax
jmp 3f
E UNIX64_RET_ST_RAX_XMM0
- movq ffi_closure_RED_RVALUE+8(%rsp), %xmm0
+ movq 8(%rsi), %xmm0
jmp 2f
E UNIX64_RET_ST_XMM0_XMM1
- movq ffi_closure_RED_RVALUE+8(%rsp), %xmm1
-
- .align 8
-3: movq ffi_closure_RED_RVALUE(%rsp), %xmm0
+ movq 8(%rsi), %xmm1
+ jmp 3f
+E UNIX64_RET_ST_RAX_RDX
+ movq 8(%rsi), %rdx
+2: movq (%rsi), %rax
ret
.align 8
-2: movq ffi_closure_RED_RVALUE(%rsp), %rax
+3: movq (%rsi), %xmm0
ret
9: call abort@PLT
--
1.9.3
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 7/8] x86_64: Decouple return types from FFI_TYPE constants
2014-10-28 18:32 [PATCH 0/8] Go closures for x86_64 Richard Henderson
` (5 preceding siblings ...)
2014-10-28 18:32 ` [PATCH 3/8] x86-64: Support go closures Richard Henderson
@ 2014-10-28 18:32 ` Richard Henderson
2014-10-28 18:32 ` [PATCH 1/8] Add entry points for interacting with Go Richard Henderson
7 siblings, 0 replies; 9+ messages in thread
From: Richard Henderson @ 2014-10-28 18:32 UTC (permalink / raw)
To: libffi-discuss
We can better support structure returns, and as prep for
complex types.
---
src/x86/ffi64.c | 142 ++++++++++++++++++-------------
src/x86/internal64.h | 20 +++++
src/x86/unix64.S | 236 +++++++++++++++++++++------------------------------
3 files changed, 202 insertions(+), 196 deletions(-)
create mode 100644 src/x86/internal64.h
diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c
index 65fb595..a03061b 100644
--- a/src/x86/ffi64.c
+++ b/src/x86/ffi64.c
@@ -33,6 +33,7 @@
#include <stdlib.h>
#include <stdarg.h>
#include <stdint.h>
+#include "internal64.h"
#ifdef __x86_64__
@@ -191,7 +192,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
}
else if (size <= 16)
{
- classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
+ classes[0] = classes[1] = X86_64_INTEGER_CLASS;
return 2;
}
else
@@ -360,15 +361,55 @@ ffi_prep_cif_machdep (ffi_cif *cif)
int gprcount, ssecount, i, avn, ngpr, nsse, flags;
enum x86_64_reg_class classes[MAX_CLASSES];
size_t bytes, n;
+ ffi_type *rtype;
if (cif->abi != FFI_UNIX64)
return FFI_BAD_ABI;
gprcount = ssecount = 0;
- flags = cif->rtype->type;
- if (flags != FFI_TYPE_VOID)
+ rtype = cif->rtype;
+ switch (rtype->type)
{
+ case FFI_TYPE_VOID:
+ flags = UNIX64_RET_VOID;
+ break;
+ case FFI_TYPE_UINT8:
+ flags = UNIX64_RET_UINT8;
+ break;
+ case FFI_TYPE_SINT8:
+ flags = UNIX64_RET_SINT8;
+ break;
+ case FFI_TYPE_UINT16:
+ flags = UNIX64_RET_UINT16;
+ break;
+ case FFI_TYPE_SINT16:
+ flags = UNIX64_RET_SINT16;
+ break;
+ case FFI_TYPE_UINT32:
+ flags = UNIX64_RET_UINT32;
+ break;
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT32:
+ flags = UNIX64_RET_SINT32;
+ break;
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ flags = UNIX64_RET_INT64;
+ break;
+ case FFI_TYPE_POINTER:
+ flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64);
+ break;
+ case FFI_TYPE_FLOAT:
+ flags = UNIX64_RET_XMM32;
+ break;
+ case FFI_TYPE_DOUBLE:
+ flags = UNIX64_RET_XMM64;
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ flags = UNIX64_RET_X87;
+ break;
+ case FFI_TYPE_STRUCT:
n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
if (n == 0)
{
@@ -376,22 +417,24 @@ ffi_prep_cif_machdep (ffi_cif *cif)
memory is the first argument. Allocate a register for it. */
gprcount++;
/* We don't have to do anything in asm for the return. */
- flags = FFI_TYPE_VOID;
+ flags = UNIX64_RET_VOID | UNIX64_FLAG_RET_IN_MEM;
}
- else if (flags == FFI_TYPE_STRUCT)
+ else
{
/* Mark which registers the result appears in. */
_Bool sse0 = SSE_CLASS_P (classes[0]);
_Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
- if (sse0 && !sse1)
- flags |= 1 << 8;
- else if (!sse0 && sse1)
- flags |= 1 << 9;
- else if (sse0 && sse1)
- flags |= 1 << 10;
+ if (sse0)
+ flags = (sse1 ? UNIX64_RET_ST_XMM0_XMM1 : UNIX64_RET_ST_XMM0_RAX);
+ else
+ flags = (sse1 ? UNIX64_RET_ST_RAX_XMM0 : UNIX64_RET_ST_RAX_RDX);
+
/* Mark the true size of the structure. */
- flags |= cif->rtype->size << 12;
+ flags |= rtype->size << UNIX64_SIZE_SHIFT;
}
+ break;
+ default:
+ return FFI_BAD_TYPEDEF;
}
/* Go over all arguments and determine the way they should be passed.
@@ -418,9 +461,10 @@ ffi_prep_cif_machdep (ffi_cif *cif)
}
}
if (ssecount)
- flags |= 1 << 11;
+ flags |= UNIX64_FLAG_XMM_ARGS;
+
cif->flags = flags;
- cif->bytes = (unsigned)ALIGN (bytes, 8);
+ cif->bytes = ALIGN (bytes, 8);
return FFI_OK;
}
@@ -432,20 +476,22 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
enum x86_64_reg_class classes[MAX_CLASSES];
char *stack, *argp;
ffi_type **arg_types;
- int gprcount, ssecount, ngpr, nsse, i, avn;
- _Bool ret_in_memory;
+ int gprcount, ssecount, ngpr, nsse, i, avn, flags;
struct register_args *reg_args;
/* Can't call 32-bit mode from 64-bit mode. */
FFI_ASSERT (cif->abi == FFI_UNIX64);
/* If the return value is a struct and we don't have a return value
- address then we need to make one. Note the setting of flags to
- VOID above in ffi_prep_cif_machdep. */
- ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
- && (cif->flags & 0xff) == FFI_TYPE_VOID);
- if (rvalue == NULL && ret_in_memory)
- rvalue = alloca (cif->rtype->size);
+ address then we need to make one. Otherwise we can ignore it. */
+ flags = cif->flags;
+ if (rvalue == NULL)
+ {
+ if (flags & UNIX64_FLAG_RET_IN_MEM)
+ rvalue = alloca (cif->rtype->size);
+ else
+ flags = UNIX64_RET_VOID;
+ }
/* Allocate the space for the arguments, plus 4 words of temp space. */
stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
@@ -458,7 +504,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
/* If the return value is passed in memory, add the pointer as the
first integer argument. */
- if (ret_in_memory)
+ if (flags & UNIX64_FLAG_RET_IN_MEM)
reg_args->gpr[gprcount++] = (unsigned long) rvalue;
avn = cif->nargs;
@@ -503,17 +549,17 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
switch (arg_types[i]->type)
{
case FFI_TYPE_SINT8:
- *(SINT64 *)®_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
+ reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
break;
case FFI_TYPE_SINT16:
- *(SINT64 *)®_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
+ reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
break;
case FFI_TYPE_SINT32:
- *(SINT64 *)®_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
+ reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
break;
default:
reg_args->gpr[gprcount] = 0;
- memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8);
+ memcpy (®_args->gpr[gprcount], a, size);
}
gprcount++;
break;
@@ -533,7 +579,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
reg_args->rax = ssecount;
ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
- cif->flags, rvalue, fn);
+ flags, rvalue, fn);
}
void
@@ -573,7 +619,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
if (cif->abi != FFI_UNIX64)
return FFI_BAD_ABI;
- if (cif->flags & (1 << 11))
+ if (cif->flags & UNIX64_FLAG_XMM_ARGS)
dest = ffi_closure_unix64_sse;
else
dest = ffi_closure_unix64;
@@ -600,39 +646,17 @@ ffi_closure_unix64_inner(ffi_cif *cif,
ffi_type **arg_types;
long i, avn;
int gprcount, ssecount, ngpr, nsse;
- int ret;
+ int flags;
- avalue = alloca(cif->nargs * sizeof(void *));
+ avn = cif->nargs;
+ flags = cif->flags;
+ avalue = alloca(avn * sizeof(void *));
gprcount = ssecount = 0;
- ret = cif->rtype->type;
- if (ret != FFI_TYPE_VOID)
- {
- enum x86_64_reg_class classes[MAX_CLASSES];
- size_t n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
- if (n == 0)
- {
- /* The return value goes in memory. Arrange for the closure
- return value to go directly back to the original caller. */
- rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
- /* We don't have to do anything in asm for the return. */
- ret = FFI_TYPE_VOID;
- }
- else if (ret == FFI_TYPE_STRUCT && n == 2)
- {
- /* Mark which register the second word of the structure goes in. */
- _Bool sse0 = SSE_CLASS_P (classes[0]);
- _Bool sse1 = SSE_CLASS_P (classes[1]);
- if (!sse0 && sse1)
- ret |= 1 << 8;
- else if (sse0 && !sse1)
- ret |= 1 << 9;
- }
- }
+ if (flags & UNIX64_FLAG_RET_IN_MEM)
+ rvalue = (void *)(uintptr_t)reg_args->gpr[gprcount++];
- avn = cif->nargs;
arg_types = cif->arg_types;
-
for (i = 0; i < avn; ++i)
{
enum x86_64_reg_class classes[MAX_CLASSES];
@@ -693,7 +717,7 @@ ffi_closure_unix64_inner(ffi_cif *cif,
fun (cif, rvalue, avalue, user_data);
/* Tell assembly how to perform return type promotions. */
- return ret;
+ return flags;
}
extern void ffi_go_closure_unix64(void) FFI_HIDDEN;
@@ -706,7 +730,7 @@ ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
if (cif->abi != FFI_UNIX64)
return FFI_BAD_ABI;
- closure->tramp = (cif->flags & (1 << 11)
+ closure->tramp = (cif->flags & UNIX64_FLAG_XMM_ARGS
? ffi_go_closure_unix64_sse
: ffi_go_closure_unix64);
closure->cif = cif;
diff --git a/src/x86/internal64.h b/src/x86/internal64.h
new file mode 100644
index 0000000..07b1b10
--- /dev/null
+++ b/src/x86/internal64.h
@@ -0,0 +1,20 @@
+#define UNIX64_RET_VOID 0
+#define UNIX64_RET_UINT8 1
+#define UNIX64_RET_UINT16 2
+#define UNIX64_RET_UINT32 3
+#define UNIX64_RET_SINT8 4
+#define UNIX64_RET_SINT16 5
+#define UNIX64_RET_SINT32 6
+#define UNIX64_RET_INT64 7
+#define UNIX64_RET_XMM32 8
+#define UNIX64_RET_XMM64 9
+#define UNIX64_RET_X87 10
+#define UNIX64_RET_ST_RAX_RDX 11
+#define UNIX64_RET_ST_XMM0_RAX 12
+#define UNIX64_RET_ST_RAX_XMM0 13
+#define UNIX64_RET_ST_XMM0_XMM1 14
+#define UNIX64_RET_LAST 14
+
+#define UNIX64_FLAG_RET_IN_MEM (1 << 10)
+#define UNIX64_FLAG_XMM_ARGS (1 << 11)
+#define UNIX64_SIZE_SHIFT 12
diff --git a/src/x86/unix64.S b/src/x86/unix64.S
index 797b9d9..0151229 100644
--- a/src/x86/unix64.S
+++ b/src/x86/unix64.S
@@ -31,9 +31,15 @@
#include <fficonfig.h>
#include <ffi.h>
#include <ffi_cfi.h>
+#include "internal64.h"
.text
+.macro E index
+ .align 8
+ .org 0b + \index * 8, 0x90
+.endm
+
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
void *raddr, void (*fnaddr)(void));
@@ -41,7 +47,7 @@
for this function. This has been allocated by ffi_call. We also
deallocate some of the stack that has been alloca'd. */
- .align 2
+ .align 8
.globl ffi_call_unix64
.type ffi_call_unix64,@function
FFI_HIDDEN(ffi_call_unix64)
@@ -100,109 +106,81 @@ ffi_call_unix64:
cfi_restore(%rbp)
/* The first byte of the flags contains the FFI_TYPE. */
+ cmpb $UNIX64_RET_LAST, %cl
movzbl %cl, %r10d
- leaq .Lstore_table(%rip), %r11
- movslq (%r11, %r10, 4), %r10
- addq %r11, %r10
- jmp *%r10
+ leaq 0f(%rip), %r11
+ ja 9f
+ leaq (%r11, %r10, 8), %r10
- .section .rodata
- .align 2
-.Lstore_table:
- .long .Lst_void-.Lstore_table /* FFI_TYPE_VOID */
- .long .Lst_sint32-.Lstore_table /* FFI_TYPE_INT */
- .long .Lst_float-.Lstore_table /* FFI_TYPE_FLOAT */
- .long .Lst_double-.Lstore_table /* FFI_TYPE_DOUBLE */
- .long .Lst_ldouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
- .long .Lst_uint8-.Lstore_table /* FFI_TYPE_UINT8 */
- .long .Lst_sint8-.Lstore_table /* FFI_TYPE_SINT8 */
- .long .Lst_uint16-.Lstore_table /* FFI_TYPE_UINT16 */
- .long .Lst_sint16-.Lstore_table /* FFI_TYPE_SINT16 */
- .long .Lst_uint32-.Lstore_table /* FFI_TYPE_UINT32 */
- .long .Lst_sint32-.Lstore_table /* FFI_TYPE_SINT32 */
- .long .Lst_int64-.Lstore_table /* FFI_TYPE_UINT64 */
- .long .Lst_int64-.Lstore_table /* FFI_TYPE_SINT64 */
- .long .Lst_struct-.Lstore_table /* FFI_TYPE_STRUCT */
- .long .Lst_int64-.Lstore_table /* FFI_TYPE_POINTER */
- .previous
+ /* Prep for the structure cases: scratch area in redzone. */
+ leaq -20(%rsp), %rsi
+ jmp *%r10
- .align 2
-.Lst_void:
+ .align 8
+0:
+E UNIX64_RET_VOID
ret
- .align 2
-
-.Lst_uint8:
- movzbq %al, %rax
+E UNIX64_RET_UINT8
+ movzbl %al, %eax
movq %rax, (%rdi)
ret
- .align 2
-.Lst_sint8:
- movsbq %al, %rax
+E UNIX64_RET_UINT16
+ movzwl %ax, %eax
movq %rax, (%rdi)
ret
- .align 2
-.Lst_uint16:
- movzwq %ax, %rax
+E UNIX64_RET_UINT32
+ movl %eax, %eax
movq %rax, (%rdi)
- .align 2
-.Lst_sint16:
- movswq %ax, %rax
+ ret
+E UNIX64_RET_SINT8
+ movsbq %al, %rax
movq %rax, (%rdi)
ret
- .align 2
-.Lst_uint32:
- movl %eax, %eax
+E UNIX64_RET_SINT16
+ movswq %ax, %rax
movq %rax, (%rdi)
- .align 2
-.Lst_sint32:
+ ret
+E UNIX64_RET_SINT32
cltq
movq %rax, (%rdi)
ret
- .align 2
-.Lst_int64:
+E UNIX64_RET_INT64
movq %rax, (%rdi)
ret
-
- .align 2
-.Lst_float:
- movss %xmm0, (%rdi)
+E UNIX64_RET_XMM32
+ movd %xmm0, (%rdi)
ret
- .align 2
-.Lst_double:
- movsd %xmm0, (%rdi)
+E UNIX64_RET_XMM64
+ movq %xmm0, (%rdi)
ret
-.Lst_ldouble:
+E UNIX64_RET_X87
fstpt (%rdi)
ret
-
- .align 2
-.Lst_struct:
- leaq -20(%rsp), %rsi /* Scratch area in redzone. */
-
- /* We have to locate the values now, and since we don't want to
- write too much data into the user's return value, we spill the
- value to a 16 byte scratch area first. Bits 8, 9, and 10
- control where the values are located. Only one of the three
- bits will be set; see ffi_prep_cif_machdep for the pattern. */
- movd %xmm0, %r10
- movd %xmm1, %r11
- testl $0x100, %ecx
- cmovnz %rax, %rdx
- cmovnz %r10, %rax
- testl $0x200, %ecx
- cmovnz %r10, %rdx
- testl $0x400, %ecx
- cmovnz %r10, %rax
- cmovnz %r11, %rdx
- movq %rax, (%rsi)
+E UNIX64_RET_ST_RAX_RDX
movq %rdx, 8(%rsi)
-
- /* Bits 12-31 contain the true size of the structure. Copy from
- the scratch area to the true destination. */
- shrl $12, %ecx
+ jmp 2f
+E UNIX64_RET_ST_XMM0_RAX
+ movq %rax, 8(%rsi)
+ jmp 3f
+E UNIX64_RET_ST_RAX_XMM0
+ movq %xmm0, 8(%rsi)
+ jmp 2f
+E UNIX64_RET_ST_XMM0_XMM1
+ movq %xmm1, 8(%rsi)
+
+ .align 8
+3: movq %xmm0, (%rsi)
+ shrl $UNIX64_SIZE_SHIFT, %ecx
+ rep movsb
+ ret
+ .align 8
+2: movq %rax, (%rsi)
+ shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb
ret
+9: call abort@PLT
+
/* Many times we can avoid loading any SSE registers at all.
It's not worth an indirect jump to load the exact set of
SSE registers needed; zero or all is a good compromise. */
@@ -292,84 +270,68 @@ ffi_closure_unix64:
cfi_adjust_cfa_offset(-ffi_closure_FS)
/* The first byte of the return value contains the FFI_TYPE. */
+ cmpb $UNIX64_RET_LAST, %al
movzbl %al, %r10d
- leaq .Lload_table(%rip), %r11
- movslq (%r11, %r10, 4), %r10
- addq %r11, %r10
+ leaq 0f(%rip), %r11
+ ja 9f
+ leaq (%r11, %r10, 8), %r10
jmp *%r10
- .section .rodata
- .align 2
-.Lload_table:
- .long .Lld_void-.Lload_table /* FFI_TYPE_VOID */
- .long .Lld_int32-.Lload_table /* FFI_TYPE_INT */
- .long .Lld_float-.Lload_table /* FFI_TYPE_FLOAT */
- .long .Lld_double-.Lload_table /* FFI_TYPE_DOUBLE */
- .long .Lld_ldouble-.Lload_table /* FFI_TYPE_LONGDOUBLE */
- .long .Lld_int8-.Lload_table /* FFI_TYPE_UINT8 */
- .long .Lld_int8-.Lload_table /* FFI_TYPE_SINT8 */
- .long .Lld_int16-.Lload_table /* FFI_TYPE_UINT16 */
- .long .Lld_int16-.Lload_table /* FFI_TYPE_SINT16 */
- .long .Lld_int32-.Lload_table /* FFI_TYPE_UINT32 */
- .long .Lld_int32-.Lload_table /* FFI_TYPE_SINT32 */
- .long .Lld_int64-.Lload_table /* FFI_TYPE_UINT64 */
- .long .Lld_int64-.Lload_table /* FFI_TYPE_SINT64 */
- .long .Lld_struct-.Lload_table /* FFI_TYPE_STRUCT */
- .long .Lld_int64-.Lload_table /* FFI_TYPE_POINTER */
- .previous
-
- .align 2
-.Lld_void:
+ .align 8
+0:
+E UNIX64_RET_VOID
ret
-
- .align 2
-.Lld_int8:
+E UNIX64_RET_UINT8
movzbl ffi_closure_RED_RVALUE(%rsp), %eax
ret
- .align 2
-.Lld_int16:
+E UNIX64_RET_UINT16
movzwl ffi_closure_RED_RVALUE(%rsp), %eax
ret
- .align 2
-.Lld_int32:
+E UNIX64_RET_UINT32
movl ffi_closure_RED_RVALUE(%rsp), %eax
ret
- .align 2
-.Lld_int64:
+E UNIX64_RET_SINT8
+ movsbl ffi_closure_RED_RVALUE(%rsp), %eax
+ ret
+E UNIX64_RET_SINT16
+ movswl ffi_closure_RED_RVALUE(%rsp), %eax
+ ret
+E UNIX64_RET_SINT32
+ movl ffi_closure_RED_RVALUE(%rsp), %eax
+ ret
+E UNIX64_RET_INT64
movq ffi_closure_RED_RVALUE(%rsp), %rax
ret
-
- .align 2
-.Lld_float:
- movss ffi_closure_RED_RVALUE(%rsp), %xmm0
+E UNIX64_RET_XMM32
+ movd ffi_closure_RED_RVALUE(%rsp), %xmm0
ret
- .align 2
-.Lld_double:
- movsd ffi_closure_RED_RVALUE(%rsp), %xmm0
+E UNIX64_RET_XMM64
+ movq ffi_closure_RED_RVALUE(%rsp), %xmm0
ret
- .align 2
-.Lld_ldouble:
+E UNIX64_RET_X87
fldt ffi_closure_RED_RVALUE(%rsp)
ret
-
- .align 2
-.Lld_struct:
- /* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
- %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading
- both rdx and xmm1 with the second word. For the remaining,
- bit 8 set means xmm0 gets the second word, and bit 9 means
- that rax gets the second word. */
- movq ffi_closure_RED_RVALUE(%rsp), %rcx
+E UNIX64_RET_ST_RAX_RDX
movq ffi_closure_RED_RVALUE+8(%rsp), %rdx
+ jmp 2f
+E UNIX64_RET_ST_XMM0_RAX
+ movq ffi_closure_RED_RVALUE+8(%rsp), %rax
+ jmp 3f
+E UNIX64_RET_ST_RAX_XMM0
+ movq ffi_closure_RED_RVALUE+8(%rsp), %xmm0
+ jmp 2f
+E UNIX64_RET_ST_XMM0_XMM1
movq ffi_closure_RED_RVALUE+8(%rsp), %xmm1
- testl $0x100, %eax
- cmovnz %rdx, %rcx
- movd %rcx, %xmm0
- testl $0x200, %eax
- movq ffi_closure_RED_RVALUE(%rsp), %rax
- cmovnz %rdx, %rax
+
+ .align 8
+3: movq ffi_closure_RED_RVALUE(%rsp), %xmm0
+ ret
+ .align 8
+2: movq ffi_closure_RED_RVALUE(%rsp), %rax
ret
+9: call abort@PLT
+
cfi_endproc
.size ffi_closure_unix64,.-ffi_closure_unix64
--
1.9.3
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 5/8] win64: Remove support from ffi.c
2014-10-28 18:32 [PATCH 0/8] Go closures for x86_64 Richard Henderson
@ 2014-10-28 18:32 ` Richard Henderson
2014-10-28 18:32 ` [PATCH 8/8] x86_64: Add support for complex types Richard Henderson
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Richard Henderson @ 2014-10-28 18:32 UTC (permalink / raw)
To: libffi-discuss; +Cc: Kai Tietz
This file is now 32-bit only.
Cc: Kai Tietz <ktietz@redhat.com>
---
src/x86/ffi.c | 212 +++-------------------------------------------------------
1 file changed, 8 insertions(+), 204 deletions(-)
diff --git a/src/x86/ffi.c b/src/x86/ffi.c
index 006c95d..c387fb5 100644
--- a/src/x86/ffi.c
+++ b/src/x86/ffi.c
@@ -28,18 +28,12 @@
DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
-#if !defined(__x86_64__) || defined(_WIN64) || defined(__CYGWIN__)
-
-#ifdef _WIN64
-#include <windows.h>
-#endif
+#ifndef __x86_64__
#include <ffi.h>
#include <ffi_common.h>
-
#include <stdlib.h>
-
/* ffi_prep_args is called by the assembly routine once stack space
has been allocated for the function's arguments */
@@ -50,26 +44,17 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif)
register void **p_argv;
register char *argp;
register ffi_type **p_arg;
-#ifndef X86_WIN64
const int cabi = ecif->cif->abi;
const int dir = (cabi == FFI_PASCAL || cabi == FFI_REGISTER) ? -1 : +1;
unsigned int stack_args_count = 0;
void *p_stack_data[3];
char *argp2 = stack;
-#else
- #define dir 1
-#endif
argp = stack;
if ((ecif->cif->flags == FFI_TYPE_STRUCT
- || ecif->cif->flags == FFI_TYPE_MS_STRUCT)
-#ifdef X86_WIN64
- && ((ecif->cif->rtype->size & (1 | 2 | 4 | 8)) == 0)
-#endif
- )
+ || ecif->cif->flags == FFI_TYPE_MS_STRUCT))
{
-#ifndef X86_WIN64
/* For fastcall/thiscall/register this is first register-passed
argument. */
if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL || cabi == FFI_REGISTER)
@@ -77,7 +62,6 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif)
p_stack_data[stack_args_count] = argp;
++stack_args_count;
}
-#endif
*(void **) argp = ecif->rvalue;
argp += sizeof(void*);
@@ -105,24 +89,6 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif)
size_t z = (*p_arg)->size;
-#ifdef X86_WIN64
- if (z > FFI_SIZEOF_ARG
- || ((*p_arg)->type == FFI_TYPE_STRUCT
- && (z & (1 | 2 | 4 | 8)) == 0)
-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
- || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
-#endif
- )
- {
- z = FFI_SIZEOF_ARG;
- *(void **)argp = *p_argv;
- }
- else if ((*p_arg)->type == FFI_TYPE_FLOAT)
- {
- memcpy(argp, *p_argv, z);
- }
- else
-#endif
if (z < FFI_SIZEOF_ARG)
{
z = FFI_SIZEOF_ARG;
@@ -165,7 +131,6 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif)
memcpy(argp, *p_argv, z);
}
-#ifndef X86_WIN64
/* For thiscall/fastcall/register convention register-passed arguments
are the first two none-floating-point arguments with a size
smaller or equal to sizeof (void*). */
@@ -188,18 +153,13 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif)
p_stack_data[stack_args_count] = argp;
++stack_args_count;
}
-#endif
-#ifdef X86_WIN64
- argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
-#else
argp += z;
-#endif
}
-#ifndef X86_WIN64
- /* We need to move the register-passed arguments for thiscall/fastcall/register
- on top of stack, so that those can be moved to registers by call-handler. */
+ /* We need to move the register-passed arguments for thiscall,
+ fastcall, register on top of stack, so that those can be moved
+ to registers by call-handler. */
if (stack_args_count > 0)
{
if (dir < 0 && stack_args_count > 1)
@@ -225,7 +185,6 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif)
}
return stack_args_count;
-#endif
return 0;
}
@@ -243,25 +202,16 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
case FFI_TYPE_UINT16:
case FFI_TYPE_SINT8:
case FFI_TYPE_SINT16:
-#ifdef X86_WIN64
- case FFI_TYPE_UINT32:
- case FFI_TYPE_SINT32:
-#endif
case FFI_TYPE_SINT64:
case FFI_TYPE_FLOAT:
case FFI_TYPE_DOUBLE:
-#ifndef X86_WIN64
#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
case FFI_TYPE_LONGDOUBLE:
#endif
-#endif
cif->flags = (unsigned) cif->rtype->type;
break;
case FFI_TYPE_UINT64:
-#ifdef X86_WIN64
- case FFI_TYPE_POINTER:
-#endif
cif->flags = FFI_TYPE_SINT64;
break;
@@ -277,11 +227,7 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
}
else if (cif->rtype->size == 4)
{
-#ifdef X86_WIN64
- cif->flags = FFI_TYPE_SMALL_STRUCT_4B;
-#else
cif->flags = FFI_TYPE_INT; /* same as int type */
-#endif
}
else if (cif->rtype->size == 8)
{
@@ -302,14 +248,7 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
break;
default:
-#ifdef X86_WIN64
- cif->flags = FFI_TYPE_SINT64;
- break;
- case FFI_TYPE_INT:
- cif->flags = FFI_TYPE_SINT32;
-#else
cif->flags = FFI_TYPE_INT;
-#endif
break;
}
@@ -320,32 +259,19 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
cif->bytes += (unsigned)ALIGN((*ptr)->size, FFI_SIZEOF_ARG);
}
-#ifdef X86_WIN64
- /* ensure space for storing four registers */
- cif->bytes += 4 * FFI_SIZEOF_ARG;
-#endif
-
#ifndef X86_WIN32
-#ifndef X86_WIN64
if (cif->abi == FFI_SYSV || cif->abi == FFI_UNIX64)
-#endif
cif->bytes = (cif->bytes + 15) & ~0xF;
#endif
return FFI_OK;
}
-#ifdef X86_WIN64
-extern int
-ffi_call_win64(unsigned int (*)(char *, extended_cif *), extended_cif *,
- unsigned, unsigned, unsigned *, void (*fn)(void));
-#else
extern void
ffi_call_win32(unsigned int (*)(char *, extended_cif *), extended_cif *,
unsigned, unsigned, unsigned, unsigned *, void (*fn)(void));
extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
unsigned, unsigned, unsigned *, void (*fn)(void));
-#endif
void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
@@ -357,33 +283,18 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
/* If the return value is a struct and we don't have a return */
/* value address then we need to make one */
-#ifdef X86_WIN64
- if (rvalue == NULL
- && cif->flags == FFI_TYPE_STRUCT
- && ((cif->rtype->size & (1 | 2 | 4 | 8)) == 0))
- {
- ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF);
- }
-#else
if (rvalue == NULL
&& (cif->flags == FFI_TYPE_STRUCT
|| cif->flags == FFI_TYPE_MS_STRUCT))
{
ecif.rvalue = alloca(cif->rtype->size);
}
-#endif
else
ecif.rvalue = rvalue;
switch (cif->abi)
{
-#ifdef X86_WIN64
- case FFI_WIN64:
- ffi_call_win64(ffi_prep_args, &ecif, cif->bytes,
- cif->flags, ecif.rvalue, fn);
- break;
-#else
#ifndef X86_WIN32
case FFI_SYSV:
ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
@@ -401,7 +312,6 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags,
ecif.rvalue, fn);
break;
-#endif
default:
FFI_ASSERT(0);
break;
@@ -427,47 +337,13 @@ void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *)
__attribute__ ((regparm(1)));
#endif
-#ifndef X86_WIN64
void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *);
void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *);
void FFI_HIDDEN ffi_closure_FASTCALL (ffi_closure *);
void FFI_HIDDEN ffi_closure_REGISTER (ffi_closure *);
-#else
-void FFI_HIDDEN ffi_closure_win64 (ffi_closure *);
-#endif
/* This function is jumped to by the trampoline */
-#ifdef X86_WIN64
-void * FFI_HIDDEN
-ffi_closure_win64_inner (ffi_closure *closure, void *args) {
- ffi_cif *cif;
- void **arg_area;
- void *result;
- void *resp = &result;
-
- cif = closure->cif;
- arg_area = (void**) alloca (cif->nargs * sizeof (void*));
-
- /* this call will initialize ARG_AREA, such that each
- * element in that array points to the corresponding
- * value on the stack; and if the function returns
- * a structure, it will change RESP to point to the
- * structure return address. */
-
- ffi_prep_incoming_args(args, &resp, arg_area, cif);
-
- (closure->fun) (cif, resp, arg_area, closure->user_data);
-
- /* The result is returned in rax. This does the right thing for
- result types except for floats; we have to 'mov xmm0, rax' in the
- caller to correct this.
- TODO: structure sizes of 3 5 6 7 are returned by reference, too!!!
- */
- return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp;
-}
-
-#else
unsigned int FFI_HIDDEN __attribute__ ((regparm(1)))
ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args)
{
@@ -514,7 +390,6 @@ ffi_closure_WIN32_inner (ffi_closure *closure, void **respp, void *args)
return ret;
}
-#endif /* !X86_WIN64 */
static unsigned int
ffi_prep_incoming_args(char *stack, void **rvalue, void **avalue,
@@ -524,7 +399,6 @@ ffi_prep_incoming_args(char *stack, void **rvalue, void **avalue,
register void **p_argv;
register char *argp;
register ffi_type **p_arg;
-#ifndef X86_WIN64
const int cabi = cif->abi;
const int dir = (cabi == FFI_PASCAL || cabi == FFI_REGISTER) ? -1 : +1;
const unsigned int max_stack_count = (cabi == FFI_THISCALL) ? 1
@@ -533,37 +407,25 @@ ffi_prep_incoming_args(char *stack, void **rvalue, void **avalue,
: 0;
unsigned int passed_regs = 0;
void *p_stack_data[3] = { stack - 1 };
-#else
- #define dir 1
-#endif
argp = stack;
-#ifndef X86_WIN64
argp += max_stack_count * FFI_SIZEOF_ARG;
-#endif
if ((cif->flags == FFI_TYPE_STRUCT
- || cif->flags == FFI_TYPE_MS_STRUCT)
-#ifdef X86_WIN64
- && ((cif->rtype->size & (1 | 2 | 4 | 8)) == 0)
-#endif
- )
+ || cif->flags == FFI_TYPE_MS_STRUCT))
{
-#ifndef X86_WIN64
if (passed_regs < max_stack_count)
{
*rvalue = *(void**) (stack + (passed_regs*FFI_SIZEOF_ARG));
++passed_regs;
}
else
-#endif
{
*rvalue = *(void **) argp;
argp += sizeof(void *);
}
}
-#ifndef X86_WIN64
/* Do register arguments first */
for (i = 0, p_arg = cif->arg_types;
i < cif->nargs && passed_regs < max_stack_count;
@@ -581,7 +443,6 @@ ffi_prep_incoming_args(char *stack, void **rvalue, void **avalue,
avalue[i] = stack + (passed_regs*FFI_SIZEOF_ARG);
++passed_regs;
}
-#endif
p_arg = cif->arg_types;
p_argv = avalue;
@@ -605,20 +466,6 @@ ffi_prep_incoming_args(char *stack, void **rvalue, void **avalue,
size_t z = (*p_arg)->size;
-#ifdef X86_WIN64
- if (z > FFI_SIZEOF_ARG
- || ((*p_arg)->type == FFI_TYPE_STRUCT
- && (z & (1 | 2 | 4 | 8)) == 0)
-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
- || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
-#endif
- )
- {
- z = FFI_SIZEOF_ARG;
- *p_argv = *(void **)argp;
- }
- else
-#else
if (passed_regs > 0
&& z <= FFI_SIZEOF_ARG
&& (p_argv == p_stack_data[0]
@@ -629,40 +476,17 @@ ffi_prep_incoming_args(char *stack, void **rvalue, void **avalue,
continue;
}
else
-#endif
{
/* because we're little endian, this is what it turns into. */
*p_argv = (void*) argp;
}
-#ifdef X86_WIN64
- argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
-#else
argp += z;
-#endif
}
return (size_t)argp - (size_t)stack;
}
-#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \
-{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
- void* __fun = (void*)(FUN); \
- void* __ctx = (void*)(CTX); \
- *(unsigned char*) &__tramp[0] = 0x41; \
- *(unsigned char*) &__tramp[1] = 0xbb; \
- *(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \
- *(unsigned char*) &__tramp[6] = 0x48; \
- *(unsigned char*) &__tramp[7] = 0xb8; \
- *(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \
- *(unsigned char *) &__tramp[16] = 0x49; \
- *(unsigned char *) &__tramp[17] = 0xba; \
- *(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \
- *(unsigned char *) &__tramp[26] = 0x41; \
- *(unsigned char *) &__tramp[27] = 0xff; \
- *(unsigned char *) &__tramp[28] = 0xe2; /* jmp %r10 */ \
- }
-
/* How to make a trampoline. Derived from gcc/config/i386/i386.c. */
#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
@@ -723,18 +547,6 @@ ffi_prep_closure_loc (ffi_closure* closure,
void *user_data,
void *codeloc)
{
-#ifdef X86_WIN64
-#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE)
-#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0)
- if (cif->abi == FFI_WIN64)
- {
- int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3);
- FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0],
- &ffi_closure_win64,
- codeloc, mask);
- /* make sure we can execute here */
- }
-#else
if (cif->abi == FFI_SYSV)
{
FFI_INIT_TRAMPOLINE (&closure->tramp[0],
@@ -773,7 +585,6 @@ ffi_prep_closure_loc (ffi_closure* closure,
(void*)codeloc);
}
#endif /* X86_WIN32 */
-#endif /* !X86_WIN64 */
else
{
return FFI_BAD_ABI;
@@ -843,7 +654,6 @@ ffi_prep_args_raw(char *stack, extended_cif *ecif)
const ffi_cif *cif = ecif->cif;
unsigned int i, passed_regs = 0;
-#ifndef X86_WIN64
const unsigned int abi = cif->abi;
const unsigned int max_regs = (abi == FFI_THISCALL) ? 1
: (abi == FFI_FASTCALL) ? 2
@@ -865,7 +675,6 @@ ffi_prep_args_raw(char *stack, extended_cif *ecif)
++passed_regs;
}
-#endif
memcpy (stack, ecif->avalue, cif->bytes);
return passed_regs;
@@ -909,7 +718,6 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
case FFI_SYSV:
case FFI_MS_CDECL:
#endif
-#ifndef X86_WIN64
case FFI_STDCALL:
case FFI_THISCALL:
case FFI_FASTCALL:
@@ -918,14 +726,10 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags,
ecif.rvalue, fn);
break;
-#endif
default:
FFI_ASSERT(0);
break;
}
}
-
-#endif
-
-#endif /* !__x86_64__ || X86_WIN64 */
-
+#endif /* !FFI_NO_RAW_API */
+#endif /* !__x86_64__ */
--
1.9.3
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 6/8] x86_64: Fixups for x32
2014-10-28 18:32 [PATCH 0/8] Go closures for x86_64 Richard Henderson
2014-10-28 18:32 ` [PATCH 5/8] win64: Remove support from ffi.c Richard Henderson
2014-10-28 18:32 ` [PATCH 8/8] x86_64: Add support for complex types Richard Henderson
@ 2014-10-28 18:32 ` Richard Henderson
2014-10-28 18:32 ` [PATCH 4/8] win64: Rewrite Richard Henderson
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Richard Henderson @ 2014-10-28 18:32 UTC (permalink / raw)
To: libffi-discuss
---
src/x86/ffi64.c | 5 +++--
src/x86/unix64.S | 20 ++++++++++++++++----
2 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c
index 384a93a..65fb595 100644
--- a/src/x86/ffi64.c
+++ b/src/x86/ffi64.c
@@ -568,6 +568,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
0x0f, 0x1f, 0x00
};
void (*dest)(void);
+ char *tramp = closure->tramp;
if (cif->abi != FFI_UNIX64)
return FFI_BAD_ABI;
@@ -577,8 +578,8 @@ ffi_prep_closure_loc (ffi_closure* closure,
else
dest = ffi_closure_unix64;
- memcpy (closure->tramp, trampoline, sizeof(trampoline));
- *(UINT64 *)(closure->tramp + 16) = (uintptr_t)dest;
+ memcpy (tramp, trampoline, sizeof(trampoline));
+ *(UINT64 *)(tramp + 16) = (uintptr_t)dest;
closure->cif = cif;
closure->fun = fun;
diff --git a/src/x86/unix64.S b/src/x86/unix64.S
index 134cb3d..797b9d9 100644
--- a/src/x86/unix64.S
+++ b/src/x86/unix64.S
@@ -32,7 +32,7 @@
#include <ffi.h>
#include <ffi_cfi.h>
-.text
+ .text
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
void *raddr, void (*fnaddr)(void));
@@ -272,9 +272,15 @@ ffi_closure_unix64:
movq %r8, ffi_closure_OFS_G+0x20(%rsp)
movq %r9, ffi_closure_OFS_G+0x28(%rsp)
- movq 24(%r10), %rdi /* Load cif */
- movq 32(%r10), %rsi /* Load fun */
- movq 40(%r10), %rdx /* Load user_data */
+#ifdef __ILP32__
+ movl FFI_TRAMPOLINE_SIZE(%r10), %edi /* Load cif */
+ movl FFI_TRAMPOLINE_SIZE+4(%r10), %esi /* Load fun */
+ movl FFI_TRAMPOLINE_SIZE+8(%r10), %edx /* Load user_data */
+#else
+ movq FFI_TRAMPOLINE_SIZE(%r10), %rdi /* Load cif */
+ movq FFI_TRAMPOLINE_SIZE+8(%r10), %rsi /* Load fun */
+ movq FFI_TRAMPOLINE_SIZE+16(%r10), %rdx /* Load user_data */
+#endif
.Ldo_closure:
leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */
movq %rsp, %r8 /* Load reg_args */
@@ -407,9 +413,15 @@ ffi_go_closure_unix64:
movq %r8, ffi_closure_OFS_G+0x20(%rsp)
movq %r9, ffi_closure_OFS_G+0x28(%rsp)
+#ifdef __ILP32__
+ movl 4(%r10), %edi /* Load cif */
+ movl 8(%r10), %esi /* Load fun */
+ movl %r10d, %edx /* Load closure (user_data) */
+#else
movq 8(%r10), %rdi /* Load cif */
movq 16(%r10), %rsi /* Load fun */
movq %r10, %rdx /* Load closure (user_data) */
+#endif
jmp .Ldo_closure
cfi_endproc
--
1.9.3
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2014-10-28 18:32 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-10-28 18:32 [PATCH 0/8] Go closures for x86_64 Richard Henderson
2014-10-28 18:32 ` [PATCH 5/8] win64: Remove support from ffi.c Richard Henderson
2014-10-28 18:32 ` [PATCH 8/8] x86_64: Add support for complex types Richard Henderson
2014-10-28 18:32 ` [PATCH 6/8] x86_64: Fixups for x32 Richard Henderson
2014-10-28 18:32 ` [PATCH 4/8] win64: Rewrite Richard Henderson
2014-10-28 18:32 ` [PATCH 2/8] Add ffi_cfi.h Richard Henderson
2014-10-28 18:32 ` [PATCH 3/8] x86-64: Support go closures Richard Henderson
2014-10-28 18:32 ` [PATCH 7/8] x86_64: Decouple return types from FFI_TYPE constants Richard Henderson
2014-10-28 18:32 ` [PATCH 1/8] Add entry points for interacting with Go Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).