From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 13912 invoked by alias); 10 Oct 2014 20:43:44 -0000 Mailing-List: contact libffi-discuss-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libffi-discuss-owner@sourceware.org Received: (qmail 13810 invoked by uid 89); 10 Oct 2014 20:43:43 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.2 required=5.0 tests=AWL,BAYES_00,FREEMAIL_ENVFROM_END_DIGIT,FREEMAIL_FROM,RCVD_IN_DNSWL_LOW,SPF_PASS autolearn=ham version=3.3.2 X-HELO: mail-qc0-f172.google.com Received: from mail-qc0-f172.google.com (HELO mail-qc0-f172.google.com) (209.85.216.172) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Fri, 10 Oct 2014 20:43:39 +0000 Received: by mail-qc0-f172.google.com with SMTP id o8so2635714qcw.3 for ; Fri, 10 Oct 2014 13:43:37 -0700 (PDT) X-Received: by 10.224.67.196 with SMTP id s4mr12759639qai.27.1412973817134; Fri, 10 Oct 2014 13:43:37 -0700 (PDT) Received: from anchor.com (50-194-63-110-static.hfc.comcastbusiness.net. [50.194.63.110]) by mx.google.com with ESMTPSA id s49sm5909008qge.15.2014.10.10.13.43.35 for (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Fri, 10 Oct 2014 13:43:36 -0700 (PDT) From: Richard Henderson To: gcc-patches@gcc.gnu.org Cc: libffi-discuss@sourceware.org, gofrontend-dev@googlegroups.com Subject: [PATCH 07/13] libffi: Support go closures on x86_64 Date: Fri, 10 Oct 2014 20:43:00 -0000 Message-Id: <1412973773-3942-8-git-send-email-rth@redhat.com> In-Reply-To: <1412973773-3942-1-git-send-email-rth@redhat.com> References: <1412973773-3942-1-git-send-email-rth@redhat.com> X-IsSubscribed: yes X-SW-Source: 2014/txt/msg00109.txt.bz2 Still missing changes for darwin, win64, and all 32-bit abis. Dumps all of the hand-coded unwind info for gas generated, as I can't be bothered to do the updates by hand again. --- libffi/src/x86/ffi64.c | 103 ++++++++++----- libffi/src/x86/ffitarget.h | 2 + libffi/src/x86/unix64.S | 319 ++++++++++++++++++++++----------------------- 3 files changed, 230 insertions(+), 194 deletions(-) diff --git a/libffi/src/x86/ffi64.c b/libffi/src/x86/ffi64.c index 1daa1c0..428168c 100644 --- a/libffi/src/x86/ffi64.c +++ b/libffi/src/x86/ffi64.c @@ -31,6 +31,7 @@ #include #include +#include #ifdef __x86_64__ @@ -48,10 +49,12 @@ struct register_args /* Registers for argument passing. */ UINT64 gpr[MAX_GPR_REGS]; UINT128 sse[MAX_SSE_REGS]; + UINT64 rax; /* ssecount */ + UINT64 r10; /* static chain */ }; extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, - void *raddr, void (*fnaddr)(void), unsigned ssecount); + void *raddr, void (*fnaddr)(void)) FFI_HIDDEN; /* All reference to register classes here is identical to the code in gcc/config/i386/i386.c. Do *not* change one without the other. */ @@ -341,6 +344,9 @@ ffi_prep_cif_machdep (ffi_cif *cif) enum x86_64_reg_class classes[MAX_CLASSES]; size_t bytes; + if (cif->abi != FFI_UNIX64) + return FFI_BAD_ABI; + gprcount = ssecount = 0; flags = cif->rtype->type; @@ -402,8 +408,9 @@ ffi_prep_cif_machdep (ffi_cif *cif) return FFI_OK; } -void -ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +static void +ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, + void **avalue, void *closure) { enum x86_64_reg_class classes[MAX_CLASSES]; char *stack, *argp; @@ -428,6 +435,8 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) reg_args = (struct register_args *) stack; argp = stack + sizeof (struct register_args); + reg_args->r10 = (unsigned long) closure; + gprcount = ssecount = 0; /* If the return value is passed in memory, add the pointer as the @@ -488,13 +497,27 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) } } } + reg_args->rax = ssecount; ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args), - cif->flags, rvalue, fn, ssecount); + cif->flags, rvalue, fn); } +void +ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + ffi_call_int (cif, fn, rvalue, avalue, NULL); +} + +void +ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, + void **avalue, void *closure) +{ + ffi_call_int (cif, fn, rvalue, avalue, closure); +} -extern void ffi_closure_unix64(void); +extern void ffi_closure_unix64(void) FFI_HIDDEN; +extern void ffi_closure_unix64_sse(void) FFI_HIDDEN; ffi_status ffi_prep_closure_loc (ffi_closure* closure, @@ -503,29 +526,26 @@ ffi_prep_closure_loc (ffi_closure* closure, void *user_data, void *codeloc) { - volatile unsigned short *tramp; - - /* Sanity check on the cif ABI. */ - { - int abi = cif->abi; - if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI))) - return FFI_BAD_ABI; - } - - tramp = (volatile unsigned short *) &closure->tramp[0]; + static const unsigned char trampoline[16] = { + /* leaq -0x7(%rip),%r10 # 0x0 */ + 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff, + /* jmpq *0x3(%rip) # 0x10 */ + 0xff, 0x25, 0x03, 0x00, 0x00, 0x00, + /* nopl (%rax) */ + 0x0f, 0x1f, 0x00 + }; + void (*dest)(void); - tramp[0] = 0xbb49; /* mov , %r11 */ - *((unsigned long long * volatile) &tramp[1]) - = (unsigned long) ffi_closure_unix64; - tramp[5] = 0xba49; /* mov , %r10 */ - *((unsigned long long * volatile) &tramp[6]) - = (unsigned long) codeloc; + if (cif->abi != FFI_UNIX64) + return FFI_BAD_ABI; - /* Set the carry bit iff the function uses any sse registers. - This is clc or stc, together with the first byte of the jmp. */ - tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8; + if (cif->flags & (1 << 11)) + dest = ffi_closure_unix64_sse; + else + dest = ffi_closure_unix64; - tramp[11] = 0xe3ff; /* jmp *%r11 */ + memcpy (closure->tramp, trampoline, sizeof(trampoline)); + *(UINT64 *)(closure->tramp + 16) = (uintptr_t)dest; closure->cif = cif; closure->fun = fun; @@ -534,18 +554,20 @@ ffi_prep_closure_loc (ffi_closure* closure, return FFI_OK; } -int -ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue, - struct register_args *reg_args, char *argp) +int FFI_HIDDEN +ffi_closure_unix64_inner(ffi_cif *cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *rvalue, + struct register_args *reg_args, + char *argp) { - ffi_cif *cif; void **avalue; ffi_type **arg_types; long i, avn; int gprcount, ssecount, ngpr, nsse; int ret; - cif = closure->cif; avalue = alloca(cif->nargs * sizeof(void *)); gprcount = ssecount = 0; @@ -634,10 +656,29 @@ ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue, } /* Invoke the closure. */ - closure->fun (cif, rvalue, avalue, closure->user_data); + fun (cif, rvalue, avalue, user_data); /* Tell assembly how to perform return type promotions. */ return ret; } +extern void ffi_go_closure_unix64(void) FFI_HIDDEN; +extern void ffi_go_closure_unix64_sse(void) FFI_HIDDEN; + +ffi_status +ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*)) +{ + if (cif->abi != FFI_UNIX64) + return FFI_BAD_ABI; + + closure->tramp = (cif->flags & (1 << 11) + ? ffi_go_closure_unix64_sse + : ffi_go_closure_unix64); + closure->cif = cif; + closure->fun = fun; + + return FFI_OK; +} + #endif /* __x86_64__ */ diff --git a/libffi/src/x86/ffitarget.h b/libffi/src/x86/ffitarget.h index 46f294c..592d6f8 100644 --- a/libffi/src/x86/ffitarget.h +++ b/libffi/src/x86/ffitarget.h @@ -111,6 +111,8 @@ typedef enum ffi_abi { /* ---- Definitions for closures ----------------------------------------- */ #define FFI_CLOSURES 1 +#define FFI_GO_CLOSURES 1 + #define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1) #define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2) #define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3) diff --git a/libffi/src/x86/unix64.S b/libffi/src/x86/unix64.S index 7a6619a..3881f51 100644 --- a/libffi/src/x86/unix64.S +++ b/libffi/src/x86/unix64.S @@ -41,10 +41,11 @@ .align 2 .globl ffi_call_unix64 + .hidden ffi_call_unix64 .type ffi_call_unix64,@function ffi_call_unix64: -.LUW0: + .cfi_startproc movq (%rsp), %r10 /* Load return address. */ leaq (%rdi, %rsi), %rax /* Find local stack base. */ movq %rdx, (%rax) /* Save flags. */ @@ -52,24 +53,36 @@ ffi_call_unix64: movq %rbp, 16(%rax) /* Save old frame pointer. */ movq %r10, 24(%rax) /* Relocate return address. */ movq %rax, %rbp /* Finalize local stack frame. */ -.LUW1: + + /* New stack frame based off rbp. This is a itty bit of unwind + trickery in that the CFA *has* changed. There is no easy way + to describe it correctly on entry to the function. Fortunately, + it doesn't matter too much since at all points we can correctly + unwind back to ffi_call. Note that the location to which we + moved the return address is (the new) CFA-8, so from the + perspective of the unwind info, it hasn't moved. */ + .cfi_def_cfa %rbp, 32 + .cfi_rel_offset %rbp, 16 + movq %rdi, %r10 /* Save a copy of the register area. */ movq %r8, %r11 /* Save a copy of the target fn. */ movl %r9d, %eax /* Set number of SSE registers. */ /* Load up all argument registers. */ movq (%r10), %rdi - movq 8(%r10), %rsi - movq 16(%r10), %rdx - movq 24(%r10), %rcx - movq 32(%r10), %r8 - movq 40(%r10), %r9 + movq 0x08(%r10), %rsi + movq 0x10(%r10), %rdx + movq 0x18(%r10), %rcx + movq 0x20(%r10), %r8 + movq 0x28(%r10), %r9 + movl 0xb0(%r10), %eax testl %eax, %eax jnz .Lload_sse .Lret_from_load_sse: - /* Deallocate the reg arg area. */ - leaq 176(%r10), %rsp + /* Deallocate the reg arg area, except for r10, then load via pop. */ + leaq 0xb8(%r10), %rsp + popq %r10 /* Call the user function. */ call *%r11 @@ -80,7 +93,9 @@ ffi_call_unix64: movq 0(%rbp), %rcx /* Reload flags. */ movq 8(%rbp), %rdi /* Reload raddr. */ movq 16(%rbp), %rbp /* Reload old frame pointer. */ -.LUW2: + .cfi_remember_state + .cfi_def_cfa %rsp, 8 + .cfi_restore %rbp /* The first byte of the flags contains the FFI_TYPE. */ movzbl %cl, %r10d @@ -89,6 +104,8 @@ ffi_call_unix64: addq %r11, %r10 jmp *%r10 + .section .rodata + .align 2 .Lstore_table: .long .Lst_void-.Lstore_table /* FFI_TYPE_VOID */ .long .Lst_sint32-.Lstore_table /* FFI_TYPE_INT */ @@ -105,6 +122,7 @@ ffi_call_unix64: .long .Lst_int64-.Lstore_table /* FFI_TYPE_SINT64 */ .long .Lst_struct-.Lstore_table /* FFI_TYPE_STRUCT */ .long .Lst_int64-.Lstore_table /* FFI_TYPE_POINTER */ + .previous .align 2 .Lst_void: @@ -187,49 +205,83 @@ ffi_call_unix64: It's not worth an indirect jump to load the exact set of SSE registers needed; zero or all is a good compromise. */ .align 2 -.LUW3: + .cfi_restore_state .Lload_sse: - movdqa 48(%r10), %xmm0 - movdqa 64(%r10), %xmm1 - movdqa 80(%r10), %xmm2 - movdqa 96(%r10), %xmm3 - movdqa 112(%r10), %xmm4 - movdqa 128(%r10), %xmm5 - movdqa 144(%r10), %xmm6 - movdqa 160(%r10), %xmm7 + movdqa 0x30(%r10), %xmm0 + movdqa 0x40(%r10), %xmm1 + movdqa 0x50(%r10), %xmm2 + movdqa 0x60(%r10), %xmm3 + movdqa 0x70(%r10), %xmm4 + movdqa 0x80(%r10), %xmm5 + movdqa 0x90(%r10), %xmm6 + movdqa 0xa0(%r10), %xmm7 jmp .Lret_from_load_sse -.LUW4: + .cfi_endproc .size ffi_call_unix64,.-ffi_call_unix64 +/* 6 general registers, 8 vector registers, + 16 bytes of rvalue, 8 bytes of alignment. */ +#define ffi_closure_OFS_G 0 +#define ffi_closure_OFS_V (6*8) +#define ffi_closure_OFS_RVALUE (ffi_closure_OFS_V + 8*16) +#define ffi_closure_FS (ffi_closure_OFS_RVALUE + 16 + 8) + +/* The location of rvalue within the red zone after deallocating the frame. */ +#define ffi_closure_RED_RVALUE (ffi_closure_OFS_RVALUE - ffi_closure_FS) + + .align 2 + .globl ffi_closure_unix64_sse + .hidden ffi_closure_unix64_sse + .type ffi_closure_unix64_sse,@function + +ffi_closure_unix64_sse: + .cfi_startproc + subq $ffi_closure_FS, %rsp + .cfi_adjust_cfa_offset ffi_closure_FS + + movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp) + movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp) + movdqa %xmm2, ffi_closure_OFS_V+0x20(%rsp) + movdqa %xmm3, ffi_closure_OFS_V+0x30(%rsp) + movdqa %xmm4, ffi_closure_OFS_V+0x40(%rsp) + movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp) + movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp) + movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp) + jmp 0f + + .cfi_endproc + .size ffi_closure_unix64_sse,.-ffi_closure_unix64_sse + .align 2 - .globl ffi_closure_unix64 + .globl ffi_closure_unix64 + .hidden ffi_closure_unix64 .type ffi_closure_unix64,@function ffi_closure_unix64: -.LUW5: - /* The carry flag is set by the trampoline iff SSE registers - are used. Don't clobber it before the branch instruction. */ - leaq -200(%rsp), %rsp -.LUW6: - movq %rdi, (%rsp) - movq %rsi, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rcx, 24(%rsp) - movq %r8, 32(%rsp) - movq %r9, 40(%rsp) - jc .Lsave_sse -.Lret_from_save_sse: - - movq %r10, %rdi - leaq 176(%rsp), %rsi - movq %rsp, %rdx - leaq 208(%rsp), %rcx - call ffi_closure_unix64_inner@PLT + .cfi_startproc + subq $ffi_closure_FS, %rsp + .cfi_adjust_cfa_offset ffi_closure_FS +0: + movq %rdi, ffi_closure_OFS_G+0x00(%rsp) + movq %rsi, ffi_closure_OFS_G+0x08(%rsp) + movq %rdx, ffi_closure_OFS_G+0x10(%rsp) + movq %rcx, ffi_closure_OFS_G+0x18(%rsp) + movq %r8, ffi_closure_OFS_G+0x20(%rsp) + movq %r9, ffi_closure_OFS_G+0x28(%rsp) + + movq 24(%r10), %rdi /* Load cif */ + movq 32(%r10), %rsi /* Load fun */ + movq 40(%r10), %rdx /* Load user_data */ +.Ldo_closure: + leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */ + movq %rsp, %r8 /* Load reg_args */ + leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */ + call ffi_closure_unix64_inner /* Deallocate stack frame early; return value is now in redzone. */ - addq $200, %rsp -.LUW7: + addq $ffi_closure_FS, %rsp + .cfi_adjust_cfa_offset -ffi_closure_FS /* The first byte of the return value contains the FFI_TYPE. */ movzbl %al, %r10d @@ -238,6 +290,8 @@ ffi_closure_unix64: addq %r11, %r10 jmp *%r10 + .section .rodata + .align 2 .Lload_table: .long .Lld_void-.Lload_table /* FFI_TYPE_VOID */ .long .Lld_int32-.Lload_table /* FFI_TYPE_INT */ @@ -254,6 +308,7 @@ ffi_closure_unix64: .long .Lld_int64-.Lload_table /* FFI_TYPE_SINT64 */ .long .Lld_struct-.Lload_table /* FFI_TYPE_STRUCT */ .long .Lld_int64-.Lload_table /* FFI_TYPE_POINTER */ + .previous .align 2 .Lld_void: @@ -261,32 +316,32 @@ ffi_closure_unix64: .align 2 .Lld_int8: - movzbl -24(%rsp), %eax + movzbl ffi_closure_RED_RVALUE(%rsp), %eax ret .align 2 .Lld_int16: - movzwl -24(%rsp), %eax + movzwl ffi_closure_RED_RVALUE(%rsp), %eax ret .align 2 .Lld_int32: - movl -24(%rsp), %eax + movl ffi_closure_RED_RVALUE(%rsp), %eax ret .align 2 .Lld_int64: - movq -24(%rsp), %rax + movq ffi_closure_RED_RVALUE(%rsp), %rax ret .align 2 .Lld_float: - movss -24(%rsp), %xmm0 + movss ffi_closure_RED_RVALUE(%rsp), %xmm0 ret .align 2 .Lld_double: - movsd -24(%rsp), %xmm0 + movsd ffi_closure_RED_RVALUE(%rsp), %xmm0 ret .align 2 .Lld_ldouble: - fldt -24(%rsp) + fldt ffi_closure_RED_RVALUE(%rsp) ret .align 2 @@ -296,131 +351,69 @@ ffi_closure_unix64: both rdx and xmm1 with the second word. For the remaining, bit 8 set means xmm0 gets the second word, and bit 9 means that rax gets the second word. */ - movq -24(%rsp), %rcx - movq -16(%rsp), %rdx - movq -16(%rsp), %xmm1 + movq ffi_closure_RED_RVALUE(%rsp), %rcx + movq ffi_closure_RED_RVALUE+8(%rsp), %rdx + movq ffi_closure_RED_RVALUE+8(%rsp), %xmm1 testl $0x100, %eax cmovnz %rdx, %rcx movd %rcx, %xmm0 testl $0x200, %eax - movq -24(%rsp), %rax + movq ffi_closure_RED_RVALUE(%rsp), %rax cmovnz %rdx, %rax ret - /* See the comment above .Lload_sse; the same logic applies here. */ - .align 2 -.LUW8: -.Lsave_sse: - movdqa %xmm0, 48(%rsp) - movdqa %xmm1, 64(%rsp) - movdqa %xmm2, 80(%rsp) - movdqa %xmm3, 96(%rsp) - movdqa %xmm4, 112(%rsp) - movdqa %xmm5, 128(%rsp) - movdqa %xmm6, 144(%rsp) - movdqa %xmm7, 160(%rsp) - jmp .Lret_from_save_sse - -.LUW9: + .cfi_endproc .size ffi_closure_unix64,.-ffi_closure_unix64 -#ifdef HAVE_AS_X86_64_UNWIND_SECTION_TYPE - .section .eh_frame,"a",@unwind -#else - .section .eh_frame,"a",@progbits -#endif -.Lframe1: - .long .LECIE1-.LSCIE1 /* CIE Length */ -.LSCIE1: - .long 0 /* CIE Identifier Tag */ - .byte 1 /* CIE Version */ - .ascii "zR\0" /* CIE Augmentation */ - .uleb128 1 /* CIE Code Alignment Factor */ - .sleb128 -8 /* CIE Data Alignment Factor */ - .byte 0x10 /* CIE RA Column */ - .uleb128 1 /* Augmentation size */ - .byte 0x1b /* FDE Encoding (pcrel sdata4) */ - .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ - .uleb128 7 - .uleb128 8 - .byte 0x80+16 /* DW_CFA_offset, %rip offset 1*-8 */ - .uleb128 1 - .align 8 -.LECIE1: -.LSFDE1: - .long .LEFDE1-.LASFDE1 /* FDE Length */ -.LASFDE1: - .long .LASFDE1-.Lframe1 /* FDE CIE offset */ -#if HAVE_AS_X86_PCREL - .long .LUW0-. /* FDE initial location */ -#else - .long .LUW0@rel -#endif - .long .LUW4-.LUW0 /* FDE address range */ - .uleb128 0x0 /* Augmentation size */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LUW1-.LUW0 - - /* New stack frame based off rbp. This is a itty bit of unwind - trickery in that the CFA *has* changed. There is no easy way - to describe it correctly on entry to the function. Fortunately, - it doesn't matter too much since at all points we can correctly - unwind back to ffi_call. Note that the location to which we - moved the return address is (the new) CFA-8, so from the - perspective of the unwind info, it hasn't moved. */ - .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */ - .uleb128 6 - .uleb128 32 - .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */ - .uleb128 2 - .byte 0xa /* DW_CFA_remember_state */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LUW2-.LUW1 - .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ - .uleb128 7 - .uleb128 8 - .byte 0xc0+6 /* DW_CFA_restore, %rbp */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LUW3-.LUW2 - .byte 0xb /* DW_CFA_restore_state */ - - .align 8 -.LEFDE1: -.LSFDE3: - .long .LEFDE3-.LASFDE3 /* FDE Length */ -.LASFDE3: - .long .LASFDE3-.Lframe1 /* FDE CIE offset */ -#if HAVE_AS_X86_PCREL - .long .LUW5-. /* FDE initial location */ -#else - .long .LUW5@rel -#endif - .long .LUW9-.LUW5 /* FDE address range */ - .uleb128 0x0 /* Augmentation size */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LUW6-.LUW5 - .byte 0xe /* DW_CFA_def_cfa_offset */ - .uleb128 208 - .byte 0xa /* DW_CFA_remember_state */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LUW7-.LUW6 - .byte 0xe /* DW_CFA_def_cfa_offset */ - .uleb128 8 - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LUW8-.LUW7 - .byte 0xb /* DW_CFA_restore_state */ - - .align 8 -.LEFDE3: + .align 2 + .globl ffi_go_closure_unix64_sse + .hidden ffi_go_closure_unix64_sse + .type ffi_go_closure_unix64_sse,@function + +ffi_go_closure_unix64_sse: + .cfi_startproc + subq $ffi_closure_FS, %rsp + .cfi_adjust_cfa_offset ffi_closure_FS + + movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp) + movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp) + movdqa %xmm2, ffi_closure_OFS_V+0x20(%rsp) + movdqa %xmm3, ffi_closure_OFS_V+0x30(%rsp) + movdqa %xmm4, ffi_closure_OFS_V+0x40(%rsp) + movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp) + movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp) + movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp) + jmp 0f + + .cfi_endproc + .size ffi_go_closure_unix64_sse,.-ffi_go_closure_unix64_sse -#endif /* __x86_64__ */ + .align 2 + .globl ffi_go_closure_unix64 + .hidden ffi_go_closure_unix64 + .type ffi_go_closure_unix64,@function + +ffi_go_closure_unix64: + .cfi_startproc + subq $ffi_closure_FS, %rsp + .cfi_adjust_cfa_offset ffi_closure_FS +0: + movq %rdi, ffi_closure_OFS_G+0x00(%rsp) + movq %rsi, ffi_closure_OFS_G+0x08(%rsp) + movq %rdx, ffi_closure_OFS_G+0x10(%rsp) + movq %rcx, ffi_closure_OFS_G+0x18(%rsp) + movq %r8, ffi_closure_OFS_G+0x20(%rsp) + movq %r9, ffi_closure_OFS_G+0x28(%rsp) + + movq 8(%r10), %rdi /* Load cif */ + movq 16(%r10), %rsi /* Load fun */ + movq %r10, %rdx /* Load closure (user_data) */ + jmp .Ldo_closure + + .cfi_endproc + .size ffi_go_closure_unix64,.-ffi_go_closure_unix64 #if defined __ELF__ && defined __linux__ .section .note.GNU-stack,"",@progbits #endif +#endif /* x86_64 */ -- 1.9.3