From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 32409 invoked by alias); 28 Oct 2014 18:54:10 -0000 Mailing-List: contact libffi-discuss-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libffi-discuss-owner@sourceware.org Received: (qmail 32328 invoked by uid 89); 28 Oct 2014 18:54:09 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.3 required=5.0 tests=AWL,BAYES_00,FREEMAIL_ENVFROM_END_DIGIT,FREEMAIL_FROM,RCVD_IN_DNSWL_LOW,SPF_PASS autolearn=ham version=3.3.2 X-HELO: mail-qc0-f175.google.com Received: from mail-qc0-f175.google.com (HELO mail-qc0-f175.google.com) (209.85.216.175) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Tue, 28 Oct 2014 18:54:06 +0000 Received: by mail-qc0-f175.google.com with SMTP id b13so1202816qcw.20 for ; Tue, 28 Oct 2014 11:54:04 -0700 (PDT) X-Received: by 10.224.90.3 with SMTP id g3mr7865182qam.90.1414522444047; Tue, 28 Oct 2014 11:54:04 -0700 (PDT) Received: from anchor.com (50-194-63-110-static.hfc.comcastbusiness.net. [50.194.63.110]) by mx.google.com with ESMTPSA id 69sm1717430qgy.19.2014.10.28.11.54.02 for (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 28 Oct 2014 11:54:03 -0700 (PDT) From: Richard Henderson To: libffi-discuss@sourceware.org Cc: Richard Henderson Subject: [PATCH 11/16] aarch64: Move return value handling into ffi_closure_SYSV Date: Tue, 28 Oct 2014 18:54:00 -0000 Message-Id: <1414522393-19169-12-git-send-email-rth@twiddle.net> In-Reply-To: <1414522393-19169-1-git-send-email-rth@twiddle.net> References: <1414522393-19169-1-git-send-email-rth@twiddle.net> X-SW-Source: 2014/txt/msg00147.txt.bz2 From: Richard Henderson As with the change to ffi_call_SYSV, this avoids copying data into a temporary buffer. --- src/aarch64/ffi.c | 196 +++++++------------------------------ src/aarch64/ffitarget.h | 2 +- src/aarch64/sysv.S | 249 +++++++++++++++++++++++++++--------------------- 3 files changed, 176 insertions(+), 271 deletions(-) diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c index ffa1363..c5a429a 100644 --- a/src/aarch64/ffi.c +++ b/src/aarch64/ffi.c @@ -71,9 +71,6 @@ ffi_clear_cache (void *start, void *end) #endif } -extern void -ffi_closure_SYSV (ffi_closure *); - /* Test for an FFI floating point representation. */ static unsigned @@ -211,69 +208,6 @@ is_hfa(const ffi_type *ty) return (ele_count << 8) | candidate; } -/* Test if an ffi_type is a candidate for passing in a register. - - This test does not check that sufficient registers of the - appropriate class are actually available, merely that IFF - sufficient registers are available then the argument will be passed - in register(s). - - Note that an ffi_type that is deemed to be a register candidate - will always be returned in registers. - - Returns 1 if a register candidate else 0. */ - -static int -is_register_candidate (ffi_type *ty) -{ - switch (ty->type) - { - case FFI_TYPE_VOID: - return 0; - case FFI_TYPE_FLOAT: - case FFI_TYPE_DOUBLE: - case FFI_TYPE_LONGDOUBLE: - case FFI_TYPE_UINT8: - case FFI_TYPE_UINT16: - case FFI_TYPE_UINT32: - case FFI_TYPE_UINT64: - case FFI_TYPE_POINTER: - case FFI_TYPE_SINT8: - case FFI_TYPE_SINT16: - case FFI_TYPE_SINT32: - case FFI_TYPE_INT: - case FFI_TYPE_SINT64: - return 1; - - case FFI_TYPE_STRUCT: - if (is_hfa (ty)) - { - return 1; - } - else if (ty->size > 16) - { - /* Too large. Will be replaced with a pointer to memory. The - pointer MAY be passed in a register, but the value will - not. This test specifically fails since the argument will - never be passed by value in registers. */ - return 0; - } - else - { - /* Might be passed in registers depending on the number of - registers required. */ - return (ty->size + 7) / 8 < N_X_ARG_REG; - } - break; - - default: - FFI_ASSERT (0); - break; - } - - return 0; -} - /* Test if an ffi_type argument or result is a candidate for a vector register. */ @@ -797,42 +731,42 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *orig_rvalue, void **avalue) memcpy (orig_rvalue, rvalue, rtype_size); } -static unsigned char trampoline [] = -{ 0x70, 0x00, 0x00, 0x58, /* ldr x16, 1f */ - 0x91, 0x00, 0x00, 0x10, /* adr x17, 2f */ - 0x00, 0x02, 0x1f, 0xd6 /* br x16 */ -}; - /* Build a trampoline. */ -#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS) \ - ({unsigned char *__tramp = (unsigned char*)(TRAMP); \ - UINT64 __fun = (UINT64)(FUN); \ - UINT64 __ctx = (UINT64)(CTX); \ - UINT64 __flags = (UINT64)(FLAGS); \ - memcpy (__tramp, trampoline, sizeof (trampoline)); \ - memcpy (__tramp + 12, &__fun, sizeof (__fun)); \ - memcpy (__tramp + 20, &__ctx, sizeof (__ctx)); \ - memcpy (__tramp + 28, &__flags, sizeof (__flags)); \ - ffi_clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE); \ - }) +extern void ffi_closure_SYSV (void) FFI_HIDDEN; +extern void ffi_closure_SYSV_V (void) FFI_HIDDEN; ffi_status -ffi_prep_closure_loc (ffi_closure* closure, +ffi_prep_closure_loc (ffi_closure *closure, ffi_cif* cif, void (*fun)(ffi_cif*,void*,void**,void*), void *user_data, void *codeloc) { + static const unsigned char trampoline[16] = { + 0x90, 0x00, 0x00, 0x58, /* ldr x16, tramp+16 */ + 0xf1, 0xff, 0xff, 0x10, /* adr x17, tramp+0 */ + 0x00, 0x02, 0x1f, 0xd6 /* br x16 */ + }; + char *tramp = closure->tramp; + void (*start)(void); + if (cif->abi != FFI_SYSV) return FFI_BAD_ABI; - FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_SYSV, codeloc, - cif->aarch64_flags); - - closure->cif = cif; + closure->cif = cif; + closure->fun = fun; closure->user_data = user_data; - closure->fun = fun; + + memcpy (tramp, trampoline, sizeof(trampoline)); + + if (cif->flags & AARCH64_FLAG_ARG_V) + start = ffi_closure_SYSV_V; + else + start = ffi_closure_SYSV; + *(UINT64 *)(tramp + 16) = (uintptr_t)start; + + ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE); return FFI_OK; } @@ -853,20 +787,20 @@ ffi_prep_closure_loc (ffi_closure* closure, descriptors, invokes the wrapped function, then marshalls the return value back into the call context. */ -void FFI_HIDDEN -ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context, - void *stack) +int FFI_HIDDEN +ffi_closure_SYSV_inner (ffi_cif *cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data, + struct call_context *context, + void *stack, void *rvalue) { - ffi_cif *cif = closure->cif; void **avalue = (void**) alloca (cif->nargs * sizeof (void*)); - void *rvalue = NULL; - int i, h, nargs = cif->nargs; + int i, h, nargs, flags; struct arg_state state; - ffi_type *rtype; arg_init (&state); - for (i = 0; i < nargs; i++) + for (i = 0, nargs = cif->nargs; i < nargs; i++) { ffi_type *ty = cif->arg_types[i]; int t = ty->type; @@ -955,69 +889,11 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context, } } - /* Figure out where the return value will be passed, either in registers - or in a memory block allocated by the caller and passed in x8. */ - rtype = cif->rtype; - if (is_register_candidate (rtype)) - { - size_t s = rtype->size; - int t; - - /* Register candidates are *always* returned in registers. */ - - /* Allocate a scratchpad for the return value, we will let the - callee scrible the result into the scratch pad then move the - contents into the appropriate return value location for the - call convention. */ - rvalue = alloca (s); - (closure->fun) (cif, rvalue, avalue, closure->user_data); - - /* Copy the return value into the call context so that it is returned - as expected to our caller. */ - t = rtype->type; - switch (t) - { - case FFI_TYPE_VOID: - break; - - case FFI_TYPE_INT: - case FFI_TYPE_UINT8: - case FFI_TYPE_UINT16: - case FFI_TYPE_UINT32: - case FFI_TYPE_UINT64: - case FFI_TYPE_SINT8: - case FFI_TYPE_SINT16: - case FFI_TYPE_SINT32: - case FFI_TYPE_SINT64: - case FFI_TYPE_POINTER: - context->x[0] = extend_integer_type (rvalue, t); - break; - - case FFI_TYPE_FLOAT: - case FFI_TYPE_DOUBLE: - case FFI_TYPE_LONGDOUBLE: - extend_hfa_type (&context->v[0], rvalue, 0x100 + t); - break; + flags = cif->flags; + if (flags & AARCH64_RET_IN_MEM) + rvalue = (void *)(uintptr_t)context->x8; - case FFI_TYPE_STRUCT: - h = is_hfa (cif->rtype); - if (h) - extend_hfa_type (&context->v[0], rvalue, h); - else - { - FFI_ASSERT (s <= 16); - memcpy (&context->x[0], rvalue, s); - } - break; + fun (cif, rvalue, avalue, user_data); - default: - abort(); - } - } - else - { - rvalue = (void *)(uintptr_t)context->x8; - (closure->fun) (cif, rvalue, avalue, closure->user_data); - } + return flags; } - diff --git a/src/aarch64/ffitarget.h b/src/aarch64/ffitarget.h index 336f28a..b488bbe 100644 --- a/src/aarch64/ffitarget.h +++ b/src/aarch64/ffitarget.h @@ -42,7 +42,7 @@ typedef enum ffi_abi /* ---- Definitions for closures ----------------------------------------- */ #define FFI_CLOSURES 1 -#define FFI_TRAMPOLINE_SIZE 36 +#define FFI_TRAMPOLINE_SIZE 24 #define FFI_NATIVE_RAW_API 0 /* ---- Internal ---- */ diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S index ba15663..abd848d 100644 --- a/src/aarch64/sysv.S +++ b/src/aarch64/sysv.S @@ -39,15 +39,15 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #endif #endif +#ifdef __AARCH64EB__ +# define BE(X) X +#else +# define BE(X) 0 +#endif + .text .align 4 - .globl CNAME(ffi_call_SYSV) -#ifdef __ELF__ - .type CNAME(ffi_call_SYSV), #function - .hidden CNAME(ffi_call_SYSV) -#endif - /* ffi_call_SYSV extern void ffi_call_SYSV (void *stack, void *frame, void (*fn)(void), void *rvalue, int flags); @@ -179,131 +179,160 @@ CNAME(ffi_call_SYSV): nop cfi_endproc + + .globl CNAME(ffi_call_SYSV) #ifdef __ELF__ - .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV) + .type CNAME(ffi_call_SYSV), #function + .hidden CNAME(ffi_call_SYSV) + .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV) #endif -#define ffi_closure_SYSV_FS (8 * 2 + CALL_CONTEXT_SIZE) - /* ffi_closure_SYSV Closure invocation glue. This is the low level code invoked directly by the closure trampoline to setup and call a closure. - On entry x17 points to a struct trampoline_data, x16 has been clobbered + On entry x17 points to a struct ffi_closure, x16 has been clobbered all other registers are preserved. We allocate a call context and save the argument passing registers, then invoked the generic C ffi_closure_SYSV_inner() function to do all the real work, on return we load the result passing registers back from the call context. +*/ - On entry - - extern void - ffi_closure_SYSV (struct trampoline_data *); - - struct trampoline_data - { - UINT64 *ffi_closure; - UINT64 flags; - }; - - This function uses the following stack frame layout: - - == - saved x30(lr) - x29(fp)-> saved x29(fp) - saved x22 - saved x21 - ... - sp -> call_context - == +#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64) - Voila! */ + .align 4 +CNAME(ffi_closure_SYSV_V): + cfi_startproc + stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! + cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) - .text - .align 4 + /* Save the argument passing vector registers. */ + stp q0, q1, [sp, #16 + 0] + stp q2, q3, [sp, #16 + 32] + stp q4, q5, [sp, #16 + 64] + stp q6, q7, [sp, #16 + 96] + b 0f + cfi_endproc - .globl CNAME(ffi_closure_SYSV) + .globl CNAME(ffi_closure_SYSV_V) #ifdef __ELF__ - .type CNAME(ffi_closure_SYSV), #function - .hidden CNAME(ffi_closure_SYSV) + .type CNAME(ffi_closure_SYSV_V), #function + .hidden CNAME(ffi_closure_SYSV_V) + .size CNAME(ffi_closure_SYSV_V), . - CNAME(ffi_closure_SYSV_V) #endif - cfi_startproc -CNAME(ffi_closure_SYSV): - stp x29, x30, [sp, #-16]! - cfi_adjust_cfa_offset (16) - cfi_rel_offset (x29, 0) - cfi_rel_offset (x30, 8) - - mov x29, sp - cfi_def_cfa_register (x29) - - sub sp, sp, #ffi_closure_SYSV_FS - - stp x21, x22, [x29, #-16] - cfi_rel_offset (x21, -16) - cfi_rel_offset (x22, -8) - - /* Load x21 with &call_context. */ - mov x21, sp - /* Preserve our struct trampoline_data * */ - mov x22, x17 - - /* Save the rest of the argument passing registers, including - the structure return pointer. */ - stp x0, x1, [x21, #16*N_V_ARG_REG + 0] - stp x2, x3, [x21, #16*N_V_ARG_REG + 16] - stp x4, x5, [x21, #16*N_V_ARG_REG + 32] - stp x6, x7, [x21, #16*N_V_ARG_REG + 48] - str x8, [x21, #16*N_V_ARG_REG + 64] - - /* Figure out if we should touch the vector registers. */ - ldr x0, [x22, #8] - tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f - - /* Save the argument passing vector registers. */ - stp q0, q1, [x21, #0] - stp q2, q3, [x21, #32] - stp q4, q5, [x21, #64] - stp q6, q7, [x21, #96] -1: - /* Load &ffi_closure.. */ - ldr x0, [x22, #0] - mov x1, x21 - /* Compute the location of the stack at the point that the - trampoline was called. */ - add x2, x29, #16 - - bl CNAME(ffi_closure_SYSV_inner) - - /* Figure out if we should touch the vector registers. */ - ldr x0, [x22, #8] - tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f - - /* Load the result passing vector registers. */ - ldp q0, q1, [x21, #0] - ldp q2, q3, [x21, #32] -1: - /* Load the result passing core registers. */ - ldp x0, x1, [x21, #16*N_V_ARG_REG + 0] - - /* We are done, unwind our frame. */ - ldp x21, x22, [x29, #-16] - cfi_restore (x21) - cfi_restore (x22) - mov sp, x29 - cfi_def_cfa_register (sp) - - ldp x29, x30, [sp], #16 - cfi_adjust_cfa_offset (-16) - cfi_restore (x29) - cfi_restore (x30) - - ret + .align 4 + cfi_startproc +CNAME(ffi_closure_SYSV): + stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! + cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) +0: + mov x29, sp + + /* Save the argument passing core registers. */ + stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] + stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] + stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] + stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] + str x8, [sp, #16 + 16*N_V_ARG_REG + 64] + + /* Load ffi_closure_inner arguments. */ + ldp x0, x1, [x17, #FFI_TRAMPOLINE_SIZE] /* load cif, fn */ + ldr x2, [x17, #FFI_TRAMPOLINE_SIZE+16] /* load user_data */ + add x3, sp, #16 /* load context */ + add x4, sp, #ffi_closure_SYSV_FS /* load stack */ + add x5, sp, #16+CALL_CONTEXT_SIZE /* load rvalue */ + bl CNAME(ffi_closure_SYSV_inner) + + /* Load the return value as directed. */ + adr x1, 0f + and w0, w0, #AARCH64_RET_MASK + add x1, x1, x0, lsl #3 + add x3, sp, #16+CALL_CONTEXT_SIZE + br x1 + + /* Note that each table entry is 2 insns, and thus 8 bytes. */ + .align 4 +0: b 99f /* VOID */ + nop +1: ldr x0, [x3] /* INT64 */ + b 99f +2: ldp x0, x1, [x3] /* INT128 */ + b 99f +3: brk #1000 /* UNUSED */ + nop +4: brk #1000 /* UNUSED */ + nop +5: brk #1000 /* UNUSED */ + nop +6: brk #1000 /* UNUSED */ + nop +7: brk #1000 /* UNUSED */ + nop +8: ldr s3, [x3, #12] /* S4 */ + nop +9: ldr s2, [x2, #8] /* S3 */ + nop +10: ldp s0, s1, [x3] /* S2 */ + b 99f +11: ldr s0, [x3] /* S1 */ + b 99f +12: ldr d3, [x3, #24] /* D4 */ + nop +13: ldr d2, [x3, #16] /* D3 */ + nop +14: ldp d0, d1, [x3] /* D2 */ + b 99f +15: ldr d0, [x3] /* D1 */ + b 99f +16: ldr q3, [x3, #48] /* Q4 */ + nop +17: ldr q2, [x3, #32] /* Q3 */ + nop +18: ldp q0, q1, [x3] /* Q2 */ + b 99f +19: ldr q0, [x3] /* Q1 */ + b 99f +20: ldrb w0, [x3, #BE(7)] /* UINT8 */ + b 99f +21: brk #1000 /* reserved */ + nop +22: ldrh w0, [x3, #BE(6)] /* UINT16 */ + b 99f +23: brk #1000 /* reserved */ + nop +24: ldr w0, [x3, #BE(4)] /* UINT32 */ + b 99f +25: brk #1000 /* reserved */ + nop +26: ldrsb x0, [x3, #BE(7)] /* SINT8 */ + b 99f +27: brk #1000 /* reserved */ + nop +28: ldrsh x0, [x3, #BE(6)] /* SINT16 */ + b 99f +29: brk #1000 /* reserved */ + nop +30: ldrsw x0, [x3, #BE(4)] /* SINT32 */ + nop +31: /* reserved */ +99: ldp x29, x30, [sp], #ffi_closure_SYSV_FS + cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS) + cfi_restore (x29) + cfi_restore (x30) + ret cfi_endproc + + .globl CNAME(ffi_closure_SYSV) #ifdef __ELF__ - .size CNAME(ffi_closure_SYSV), .-CNAME(ffi_closure_SYSV) + .type CNAME(ffi_closure_SYSV), #function + .hidden CNAME(ffi_closure_SYSV) + .size CNAME(ffi_closure_SYSV), . - CNAME(ffi_closure_SYSV) #endif -- 1.9.3