From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 9609 invoked by alias); 28 Oct 2014 18:32:31 -0000 Mailing-List: contact libffi-discuss-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libffi-discuss-owner@sourceware.org Received: (qmail 9252 invoked by uid 89); 28 Oct 2014 18:32:27 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.3 required=5.0 tests=AWL,BAYES_00,FREEMAIL_ENVFROM_END_DIGIT,FREEMAIL_FROM,RCVD_IN_DNSWL_LOW,SPF_PASS,UNSUBSCRIBE_BODY autolearn=no version=3.3.2 X-HELO: mail-qg0-f45.google.com Received: from mail-qg0-f45.google.com (HELO mail-qg0-f45.google.com) (209.85.192.45) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Tue, 28 Oct 2014 18:32:20 +0000 Received: by mail-qg0-f45.google.com with SMTP id z107so994008qgd.4 for ; Tue, 28 Oct 2014 11:32:18 -0700 (PDT) X-Received: by 10.229.248.5 with SMTP id me5mr7917591qcb.2.1414521137898; Tue, 28 Oct 2014 11:32:17 -0700 (PDT) Received: from anchor.com (50-194-63-110-static.hfc.comcastbusiness.net. [50.194.63.110]) by mx.google.com with ESMTPSA id j1sm1948207qao.38.2014.10.28.11.32.16 for (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 28 Oct 2014 11:32:17 -0700 (PDT) From: Richard Henderson To: libffi-discuss@sourceware.org Subject: [PATCH 7/8] x86_64: Decouple return types from FFI_TYPE constants Date: Tue, 28 Oct 2014 18:32:00 -0000 Message-Id: <1414521094-18403-8-git-send-email-rth@twiddle.net> In-Reply-To: <1414521094-18403-1-git-send-email-rth@twiddle.net> References: <1414521094-18403-1-git-send-email-rth@twiddle.net> X-SW-Source: 2014/txt/msg00131.txt.bz2 We can better support structure returns, and as prep for complex types. --- src/x86/ffi64.c | 142 ++++++++++++++++++------------- src/x86/internal64.h | 20 +++++ src/x86/unix64.S | 236 +++++++++++++++++++++------------------------------ 3 files changed, 202 insertions(+), 196 deletions(-) create mode 100644 src/x86/internal64.h diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c index 65fb595..a03061b 100644 --- a/src/x86/ffi64.c +++ b/src/x86/ffi64.c @@ -33,6 +33,7 @@ #include #include #include +#include "internal64.h" #ifdef __x86_64__ @@ -191,7 +192,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], } else if (size <= 16) { - classes[0] = classes[1] = X86_64_INTEGERSI_CLASS; + classes[0] = classes[1] = X86_64_INTEGER_CLASS; return 2; } else @@ -360,15 +361,55 @@ ffi_prep_cif_machdep (ffi_cif *cif) int gprcount, ssecount, i, avn, ngpr, nsse, flags; enum x86_64_reg_class classes[MAX_CLASSES]; size_t bytes, n; + ffi_type *rtype; if (cif->abi != FFI_UNIX64) return FFI_BAD_ABI; gprcount = ssecount = 0; - flags = cif->rtype->type; - if (flags != FFI_TYPE_VOID) + rtype = cif->rtype; + switch (rtype->type) { + case FFI_TYPE_VOID: + flags = UNIX64_RET_VOID; + break; + case FFI_TYPE_UINT8: + flags = UNIX64_RET_UINT8; + break; + case FFI_TYPE_SINT8: + flags = UNIX64_RET_SINT8; + break; + case FFI_TYPE_UINT16: + flags = UNIX64_RET_UINT16; + break; + case FFI_TYPE_SINT16: + flags = UNIX64_RET_SINT16; + break; + case FFI_TYPE_UINT32: + flags = UNIX64_RET_UINT32; + break; + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + flags = UNIX64_RET_SINT32; + break; + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + flags = UNIX64_RET_INT64; + break; + case FFI_TYPE_POINTER: + flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64); + break; + case FFI_TYPE_FLOAT: + flags = UNIX64_RET_XMM32; + break; + case FFI_TYPE_DOUBLE: + flags = UNIX64_RET_XMM64; + break; + case FFI_TYPE_LONGDOUBLE: + flags = UNIX64_RET_X87; + break; + case FFI_TYPE_STRUCT: n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); if (n == 0) { @@ -376,22 +417,24 @@ ffi_prep_cif_machdep (ffi_cif *cif) memory is the first argument. Allocate a register for it. */ gprcount++; /* We don't have to do anything in asm for the return. */ - flags = FFI_TYPE_VOID; + flags = UNIX64_RET_VOID | UNIX64_FLAG_RET_IN_MEM; } - else if (flags == FFI_TYPE_STRUCT) + else { /* Mark which registers the result appears in. */ _Bool sse0 = SSE_CLASS_P (classes[0]); _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]); - if (sse0 && !sse1) - flags |= 1 << 8; - else if (!sse0 && sse1) - flags |= 1 << 9; - else if (sse0 && sse1) - flags |= 1 << 10; + if (sse0) + flags = (sse1 ? UNIX64_RET_ST_XMM0_XMM1 : UNIX64_RET_ST_XMM0_RAX); + else + flags = (sse1 ? UNIX64_RET_ST_RAX_XMM0 : UNIX64_RET_ST_RAX_RDX); + /* Mark the true size of the structure. */ - flags |= cif->rtype->size << 12; + flags |= rtype->size << UNIX64_SIZE_SHIFT; } + break; + default: + return FFI_BAD_TYPEDEF; } /* Go over all arguments and determine the way they should be passed. @@ -418,9 +461,10 @@ ffi_prep_cif_machdep (ffi_cif *cif) } } if (ssecount) - flags |= 1 << 11; + flags |= UNIX64_FLAG_XMM_ARGS; + cif->flags = flags; - cif->bytes = (unsigned)ALIGN (bytes, 8); + cif->bytes = ALIGN (bytes, 8); return FFI_OK; } @@ -432,20 +476,22 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, enum x86_64_reg_class classes[MAX_CLASSES]; char *stack, *argp; ffi_type **arg_types; - int gprcount, ssecount, ngpr, nsse, i, avn; - _Bool ret_in_memory; + int gprcount, ssecount, ngpr, nsse, i, avn, flags; struct register_args *reg_args; /* Can't call 32-bit mode from 64-bit mode. */ FFI_ASSERT (cif->abi == FFI_UNIX64); /* If the return value is a struct and we don't have a return value - address then we need to make one. Note the setting of flags to - VOID above in ffi_prep_cif_machdep. */ - ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT - && (cif->flags & 0xff) == FFI_TYPE_VOID); - if (rvalue == NULL && ret_in_memory) - rvalue = alloca (cif->rtype->size); + address then we need to make one. Otherwise we can ignore it. */ + flags = cif->flags; + if (rvalue == NULL) + { + if (flags & UNIX64_FLAG_RET_IN_MEM) + rvalue = alloca (cif->rtype->size); + else + flags = UNIX64_RET_VOID; + } /* Allocate the space for the arguments, plus 4 words of temp space. */ stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8); @@ -458,7 +504,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, /* If the return value is passed in memory, add the pointer as the first integer argument. */ - if (ret_in_memory) + if (flags & UNIX64_FLAG_RET_IN_MEM) reg_args->gpr[gprcount++] = (unsigned long) rvalue; avn = cif->nargs; @@ -503,17 +549,17 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, switch (arg_types[i]->type) { case FFI_TYPE_SINT8: - *(SINT64 *)®_args->gpr[gprcount] = (SINT64) *((SINT8 *) a); + reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a); break; case FFI_TYPE_SINT16: - *(SINT64 *)®_args->gpr[gprcount] = (SINT64) *((SINT16 *) a); + reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a); break; case FFI_TYPE_SINT32: - *(SINT64 *)®_args->gpr[gprcount] = (SINT64) *((SINT32 *) a); + reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a); break; default: reg_args->gpr[gprcount] = 0; - memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8); + memcpy (®_args->gpr[gprcount], a, size); } gprcount++; break; @@ -533,7 +579,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, reg_args->rax = ssecount; ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args), - cif->flags, rvalue, fn); + flags, rvalue, fn); } void @@ -573,7 +619,7 @@ ffi_prep_closure_loc (ffi_closure* closure, if (cif->abi != FFI_UNIX64) return FFI_BAD_ABI; - if (cif->flags & (1 << 11)) + if (cif->flags & UNIX64_FLAG_XMM_ARGS) dest = ffi_closure_unix64_sse; else dest = ffi_closure_unix64; @@ -600,39 +646,17 @@ ffi_closure_unix64_inner(ffi_cif *cif, ffi_type **arg_types; long i, avn; int gprcount, ssecount, ngpr, nsse; - int ret; + int flags; - avalue = alloca(cif->nargs * sizeof(void *)); + avn = cif->nargs; + flags = cif->flags; + avalue = alloca(avn * sizeof(void *)); gprcount = ssecount = 0; - ret = cif->rtype->type; - if (ret != FFI_TYPE_VOID) - { - enum x86_64_reg_class classes[MAX_CLASSES]; - size_t n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); - if (n == 0) - { - /* The return value goes in memory. Arrange for the closure - return value to go directly back to the original caller. */ - rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++]; - /* We don't have to do anything in asm for the return. */ - ret = FFI_TYPE_VOID; - } - else if (ret == FFI_TYPE_STRUCT && n == 2) - { - /* Mark which register the second word of the structure goes in. */ - _Bool sse0 = SSE_CLASS_P (classes[0]); - _Bool sse1 = SSE_CLASS_P (classes[1]); - if (!sse0 && sse1) - ret |= 1 << 8; - else if (sse0 && !sse1) - ret |= 1 << 9; - } - } + if (flags & UNIX64_FLAG_RET_IN_MEM) + rvalue = (void *)(uintptr_t)reg_args->gpr[gprcount++]; - avn = cif->nargs; arg_types = cif->arg_types; - for (i = 0; i < avn; ++i) { enum x86_64_reg_class classes[MAX_CLASSES]; @@ -693,7 +717,7 @@ ffi_closure_unix64_inner(ffi_cif *cif, fun (cif, rvalue, avalue, user_data); /* Tell assembly how to perform return type promotions. */ - return ret; + return flags; } extern void ffi_go_closure_unix64(void) FFI_HIDDEN; @@ -706,7 +730,7 @@ ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif, if (cif->abi != FFI_UNIX64) return FFI_BAD_ABI; - closure->tramp = (cif->flags & (1 << 11) + closure->tramp = (cif->flags & UNIX64_FLAG_XMM_ARGS ? ffi_go_closure_unix64_sse : ffi_go_closure_unix64); closure->cif = cif; diff --git a/src/x86/internal64.h b/src/x86/internal64.h new file mode 100644 index 0000000..07b1b10 --- /dev/null +++ b/src/x86/internal64.h @@ -0,0 +1,20 @@ +#define UNIX64_RET_VOID 0 +#define UNIX64_RET_UINT8 1 +#define UNIX64_RET_UINT16 2 +#define UNIX64_RET_UINT32 3 +#define UNIX64_RET_SINT8 4 +#define UNIX64_RET_SINT16 5 +#define UNIX64_RET_SINT32 6 +#define UNIX64_RET_INT64 7 +#define UNIX64_RET_XMM32 8 +#define UNIX64_RET_XMM64 9 +#define UNIX64_RET_X87 10 +#define UNIX64_RET_ST_RAX_RDX 11 +#define UNIX64_RET_ST_XMM0_RAX 12 +#define UNIX64_RET_ST_RAX_XMM0 13 +#define UNIX64_RET_ST_XMM0_XMM1 14 +#define UNIX64_RET_LAST 14 + +#define UNIX64_FLAG_RET_IN_MEM (1 << 10) +#define UNIX64_FLAG_XMM_ARGS (1 << 11) +#define UNIX64_SIZE_SHIFT 12 diff --git a/src/x86/unix64.S b/src/x86/unix64.S index 797b9d9..0151229 100644 --- a/src/x86/unix64.S +++ b/src/x86/unix64.S @@ -31,9 +31,15 @@ #include #include #include +#include "internal64.h" .text +.macro E index + .align 8 + .org 0b + \index * 8, 0x90 +.endm + /* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, void *raddr, void (*fnaddr)(void)); @@ -41,7 +47,7 @@ for this function. This has been allocated by ffi_call. We also deallocate some of the stack that has been alloca'd. */ - .align 2 + .align 8 .globl ffi_call_unix64 .type ffi_call_unix64,@function FFI_HIDDEN(ffi_call_unix64) @@ -100,109 +106,81 @@ ffi_call_unix64: cfi_restore(%rbp) /* The first byte of the flags contains the FFI_TYPE. */ + cmpb $UNIX64_RET_LAST, %cl movzbl %cl, %r10d - leaq .Lstore_table(%rip), %r11 - movslq (%r11, %r10, 4), %r10 - addq %r11, %r10 - jmp *%r10 + leaq 0f(%rip), %r11 + ja 9f + leaq (%r11, %r10, 8), %r10 - .section .rodata - .align 2 -.Lstore_table: - .long .Lst_void-.Lstore_table /* FFI_TYPE_VOID */ - .long .Lst_sint32-.Lstore_table /* FFI_TYPE_INT */ - .long .Lst_float-.Lstore_table /* FFI_TYPE_FLOAT */ - .long .Lst_double-.Lstore_table /* FFI_TYPE_DOUBLE */ - .long .Lst_ldouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */ - .long .Lst_uint8-.Lstore_table /* FFI_TYPE_UINT8 */ - .long .Lst_sint8-.Lstore_table /* FFI_TYPE_SINT8 */ - .long .Lst_uint16-.Lstore_table /* FFI_TYPE_UINT16 */ - .long .Lst_sint16-.Lstore_table /* FFI_TYPE_SINT16 */ - .long .Lst_uint32-.Lstore_table /* FFI_TYPE_UINT32 */ - .long .Lst_sint32-.Lstore_table /* FFI_TYPE_SINT32 */ - .long .Lst_int64-.Lstore_table /* FFI_TYPE_UINT64 */ - .long .Lst_int64-.Lstore_table /* FFI_TYPE_SINT64 */ - .long .Lst_struct-.Lstore_table /* FFI_TYPE_STRUCT */ - .long .Lst_int64-.Lstore_table /* FFI_TYPE_POINTER */ - .previous + /* Prep for the structure cases: scratch area in redzone. */ + leaq -20(%rsp), %rsi + jmp *%r10 - .align 2 -.Lst_void: + .align 8 +0: +E UNIX64_RET_VOID ret - .align 2 - -.Lst_uint8: - movzbq %al, %rax +E UNIX64_RET_UINT8 + movzbl %al, %eax movq %rax, (%rdi) ret - .align 2 -.Lst_sint8: - movsbq %al, %rax +E UNIX64_RET_UINT16 + movzwl %ax, %eax movq %rax, (%rdi) ret - .align 2 -.Lst_uint16: - movzwq %ax, %rax +E UNIX64_RET_UINT32 + movl %eax, %eax movq %rax, (%rdi) - .align 2 -.Lst_sint16: - movswq %ax, %rax + ret +E UNIX64_RET_SINT8 + movsbq %al, %rax movq %rax, (%rdi) ret - .align 2 -.Lst_uint32: - movl %eax, %eax +E UNIX64_RET_SINT16 + movswq %ax, %rax movq %rax, (%rdi) - .align 2 -.Lst_sint32: + ret +E UNIX64_RET_SINT32 cltq movq %rax, (%rdi) ret - .align 2 -.Lst_int64: +E UNIX64_RET_INT64 movq %rax, (%rdi) ret - - .align 2 -.Lst_float: - movss %xmm0, (%rdi) +E UNIX64_RET_XMM32 + movd %xmm0, (%rdi) ret - .align 2 -.Lst_double: - movsd %xmm0, (%rdi) +E UNIX64_RET_XMM64 + movq %xmm0, (%rdi) ret -.Lst_ldouble: +E UNIX64_RET_X87 fstpt (%rdi) ret - - .align 2 -.Lst_struct: - leaq -20(%rsp), %rsi /* Scratch area in redzone. */ - - /* We have to locate the values now, and since we don't want to - write too much data into the user's return value, we spill the - value to a 16 byte scratch area first. Bits 8, 9, and 10 - control where the values are located. Only one of the three - bits will be set; see ffi_prep_cif_machdep for the pattern. */ - movd %xmm0, %r10 - movd %xmm1, %r11 - testl $0x100, %ecx - cmovnz %rax, %rdx - cmovnz %r10, %rax - testl $0x200, %ecx - cmovnz %r10, %rdx - testl $0x400, %ecx - cmovnz %r10, %rax - cmovnz %r11, %rdx - movq %rax, (%rsi) +E UNIX64_RET_ST_RAX_RDX movq %rdx, 8(%rsi) - - /* Bits 12-31 contain the true size of the structure. Copy from - the scratch area to the true destination. */ - shrl $12, %ecx + jmp 2f +E UNIX64_RET_ST_XMM0_RAX + movq %rax, 8(%rsi) + jmp 3f +E UNIX64_RET_ST_RAX_XMM0 + movq %xmm0, 8(%rsi) + jmp 2f +E UNIX64_RET_ST_XMM0_XMM1 + movq %xmm1, 8(%rsi) + + .align 8 +3: movq %xmm0, (%rsi) + shrl $UNIX64_SIZE_SHIFT, %ecx + rep movsb + ret + .align 8 +2: movq %rax, (%rsi) + shrl $UNIX64_SIZE_SHIFT, %ecx rep movsb ret +9: call abort@PLT + /* Many times we can avoid loading any SSE registers at all. It's not worth an indirect jump to load the exact set of SSE registers needed; zero or all is a good compromise. */ @@ -292,84 +270,68 @@ ffi_closure_unix64: cfi_adjust_cfa_offset(-ffi_closure_FS) /* The first byte of the return value contains the FFI_TYPE. */ + cmpb $UNIX64_RET_LAST, %al movzbl %al, %r10d - leaq .Lload_table(%rip), %r11 - movslq (%r11, %r10, 4), %r10 - addq %r11, %r10 + leaq 0f(%rip), %r11 + ja 9f + leaq (%r11, %r10, 8), %r10 jmp *%r10 - .section .rodata - .align 2 -.Lload_table: - .long .Lld_void-.Lload_table /* FFI_TYPE_VOID */ - .long .Lld_int32-.Lload_table /* FFI_TYPE_INT */ - .long .Lld_float-.Lload_table /* FFI_TYPE_FLOAT */ - .long .Lld_double-.Lload_table /* FFI_TYPE_DOUBLE */ - .long .Lld_ldouble-.Lload_table /* FFI_TYPE_LONGDOUBLE */ - .long .Lld_int8-.Lload_table /* FFI_TYPE_UINT8 */ - .long .Lld_int8-.Lload_table /* FFI_TYPE_SINT8 */ - .long .Lld_int16-.Lload_table /* FFI_TYPE_UINT16 */ - .long .Lld_int16-.Lload_table /* FFI_TYPE_SINT16 */ - .long .Lld_int32-.Lload_table /* FFI_TYPE_UINT32 */ - .long .Lld_int32-.Lload_table /* FFI_TYPE_SINT32 */ - .long .Lld_int64-.Lload_table /* FFI_TYPE_UINT64 */ - .long .Lld_int64-.Lload_table /* FFI_TYPE_SINT64 */ - .long .Lld_struct-.Lload_table /* FFI_TYPE_STRUCT */ - .long .Lld_int64-.Lload_table /* FFI_TYPE_POINTER */ - .previous - - .align 2 -.Lld_void: + .align 8 +0: +E UNIX64_RET_VOID ret - - .align 2 -.Lld_int8: +E UNIX64_RET_UINT8 movzbl ffi_closure_RED_RVALUE(%rsp), %eax ret - .align 2 -.Lld_int16: +E UNIX64_RET_UINT16 movzwl ffi_closure_RED_RVALUE(%rsp), %eax ret - .align 2 -.Lld_int32: +E UNIX64_RET_UINT32 movl ffi_closure_RED_RVALUE(%rsp), %eax ret - .align 2 -.Lld_int64: +E UNIX64_RET_SINT8 + movsbl ffi_closure_RED_RVALUE(%rsp), %eax + ret +E UNIX64_RET_SINT16 + movswl ffi_closure_RED_RVALUE(%rsp), %eax + ret +E UNIX64_RET_SINT32 + movl ffi_closure_RED_RVALUE(%rsp), %eax + ret +E UNIX64_RET_INT64 movq ffi_closure_RED_RVALUE(%rsp), %rax ret - - .align 2 -.Lld_float: - movss ffi_closure_RED_RVALUE(%rsp), %xmm0 +E UNIX64_RET_XMM32 + movd ffi_closure_RED_RVALUE(%rsp), %xmm0 ret - .align 2 -.Lld_double: - movsd ffi_closure_RED_RVALUE(%rsp), %xmm0 +E UNIX64_RET_XMM64 + movq ffi_closure_RED_RVALUE(%rsp), %xmm0 ret - .align 2 -.Lld_ldouble: +E UNIX64_RET_X87 fldt ffi_closure_RED_RVALUE(%rsp) ret - - .align 2 -.Lld_struct: - /* There are four possibilities here, %rax/%rdx, %xmm0/%rax, - %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading - both rdx and xmm1 with the second word. For the remaining, - bit 8 set means xmm0 gets the second word, and bit 9 means - that rax gets the second word. */ - movq ffi_closure_RED_RVALUE(%rsp), %rcx +E UNIX64_RET_ST_RAX_RDX movq ffi_closure_RED_RVALUE+8(%rsp), %rdx + jmp 2f +E UNIX64_RET_ST_XMM0_RAX + movq ffi_closure_RED_RVALUE+8(%rsp), %rax + jmp 3f +E UNIX64_RET_ST_RAX_XMM0 + movq ffi_closure_RED_RVALUE+8(%rsp), %xmm0 + jmp 2f +E UNIX64_RET_ST_XMM0_XMM1 movq ffi_closure_RED_RVALUE+8(%rsp), %xmm1 - testl $0x100, %eax - cmovnz %rdx, %rcx - movd %rcx, %xmm0 - testl $0x200, %eax - movq ffi_closure_RED_RVALUE(%rsp), %rax - cmovnz %rdx, %rax + + .align 8 +3: movq ffi_closure_RED_RVALUE(%rsp), %xmm0 + ret + .align 8 +2: movq ffi_closure_RED_RVALUE(%rsp), %rax ret +9: call abort@PLT + cfi_endproc .size ffi_closure_unix64,.-ffi_closure_unix64 -- 1.9.3