From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 31921 invoked by alias); 28 Oct 2014 18:54:06 -0000 Mailing-List: contact libffi-discuss-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libffi-discuss-owner@sourceware.org Received: (qmail 31855 invoked by uid 89); 28 Oct 2014 18:54:05 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.3 required=5.0 tests=AWL,BAYES_00,FREEMAIL_ENVFROM_END_DIGIT,FREEMAIL_FROM,RCVD_IN_DNSWL_LOW,SPF_PASS,T_FILL_THIS_FORM_SHORT autolearn=ham version=3.3.2 X-HELO: mail-qg0-f47.google.com Received: from mail-qg0-f47.google.com (HELO mail-qg0-f47.google.com) (209.85.192.47) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Tue, 28 Oct 2014 18:54:03 +0000 Received: by mail-qg0-f47.google.com with SMTP id j107so1039954qga.6 for ; Tue, 28 Oct 2014 11:54:00 -0700 (PDT) X-Received: by 10.224.46.66 with SMTP id i2mr7815650qaf.72.1414522440789; Tue, 28 Oct 2014 11:54:00 -0700 (PDT) Received: from anchor.com (50-194-63-110-static.hfc.comcastbusiness.net. [50.194.63.110]) by mx.google.com with ESMTPSA id 69sm1717430qgy.19.2014.10.28.11.53.59 for (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 28 Oct 2014 11:54:00 -0700 (PDT) From: Richard Henderson To: libffi-discuss@sourceware.org Cc: Richard Henderson Subject: [PATCH 09/16] aarch64: Merge prep_args with ffi_call Date: Tue, 28 Oct 2014 18:54:00 -0000 Message-Id: <1414522393-19169-10-git-send-email-rth@twiddle.net> In-Reply-To: <1414522393-19169-1-git-send-email-rth@twiddle.net> References: <1414522393-19169-1-git-send-email-rth@twiddle.net> X-SW-Source: 2014/txt/msg00137.txt.bz2 From: Richard Henderson Use the trick to allocate the stack frame for ffi_call_SYSV within ffi_call itself. --- src/aarch64/ffi.c | 193 ++++++++++++++++++++++++----------------------------- src/aarch64/sysv.S | 192 ++++++++++++++++------------------------------------ 2 files changed, 144 insertions(+), 241 deletions(-) diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c index d19384b..a067303 100644 --- a/src/aarch64/ffi.c +++ b/src/aarch64/ffi.c @@ -72,14 +72,6 @@ ffi_clear_cache (void *start, void *end) } extern void -ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *, - extended_cif *), - struct call_context *context, - extended_cif *, - size_t, - void (*fn)(void)); - -extern void ffi_closure_SYSV (ffi_closure *); /* Test for an FFI floating point representation. */ @@ -311,12 +303,11 @@ struct arg_state /* Initialize a procedure call argument marshalling state. */ static void -arg_init (struct arg_state *state, size_t call_frame_size) +arg_init (struct arg_state *state) { state->ngrn = 0; state->nsrn = 0; state->nsaa = 0; - #if defined (__APPLE__) state->allocating_variadic = 0; #endif @@ -529,27 +520,88 @@ allocate_int_to_reg_or_stack (struct call_context *context, return allocate_to_stack (state, stack, size, size); } -/* Marshall the arguments from FFI representation to procedure call - context and stack. */ +ffi_status +ffi_prep_cif_machdep (ffi_cif *cif) +{ + /* Round the stack up to a multiple of the stack alignment requirement. */ + cif->bytes = ALIGN(cif->bytes, 16); -static unsigned -aarch64_prep_args (struct call_context *context, unsigned char *stack, - extended_cif *ecif) + /* Initialize our flags. We are interested if this CIF will touch a + vector register, if so we will enable context save and load to + those registers, otherwise not. This is intended to be friendly + to lazy float context switching in the kernel. */ + cif->aarch64_flags = 0; + + if (is_v_register_candidate (cif->rtype)) + { + cif->aarch64_flags |= AARCH64_FLAG_ARG_V; + } + else + { + int i; + for (i = 0; i < cif->nargs; i++) + if (is_v_register_candidate (cif->arg_types[i])) + { + cif->aarch64_flags |= AARCH64_FLAG_ARG_V; + break; + } + } + +#if defined (__APPLE__) + cif->aarch64_nfixedargs = 0; +#endif + + return FFI_OK; +} + +#if defined (__APPLE__) + +/* Perform Apple-specific cif processing for variadic calls */ +ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif, + unsigned int nfixedargs, + unsigned int ntotalargs) { - ffi_cif *cif = ecif->cif; - void **avalue = ecif->avalue; - int i, nargs = cif->nargs; + ffi_status status; + + status = ffi_prep_cif_machdep (cif); + + cif->aarch64_nfixedargs = nfixedargs; + + return status; +} + +#endif + +extern void ffi_call_SYSV (void *stack, void *frame, + void (*fn)(void), int flags) FFI_HIDDEN; + +/* Call a function with the provided arguments and capture the return + value. */ +void +ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + struct call_context *context; + void *stack, *frame; struct arg_state state; + size_t stack_bytes; + int i, nargs = cif->nargs; + int h, t; + ffi_type *rtype; - arg_init (&state, cif->bytes); + /* Allocate consectutive stack for everything we'll need. */ + stack_bytes = cif->bytes; + stack = alloca (stack_bytes + 32 + sizeof(struct call_context)); + frame = stack + stack_bytes; + context = frame + 32; + arg_init (&state); for (i = 0; i < nargs; i++) { ffi_type *ty = cif->arg_types[i]; size_t s = ty->size; - int h, t = ty->type; void *a = avalue[i]; + t = ty->type; switch (t) { case FFI_TYPE_VOID: @@ -665,83 +717,12 @@ aarch64_prep_args (struct call_context *context, unsigned char *stack, #endif } - return cif->aarch64_flags; -} - -ffi_status -ffi_prep_cif_machdep (ffi_cif *cif) -{ - /* Round the stack up to a multiple of the stack alignment requirement. */ - cif->bytes = ALIGN(cif->bytes, 16); - - /* Initialize our flags. We are interested if this CIF will touch a - vector register, if so we will enable context save and load to - those registers, otherwise not. This is intended to be friendly - to lazy float context switching in the kernel. */ - cif->aarch64_flags = 0; - - if (is_v_register_candidate (cif->rtype)) - { - cif->aarch64_flags |= AARCH64_FLAG_ARG_V; - } - else - { - int i; - for (i = 0; i < cif->nargs; i++) - if (is_v_register_candidate (cif->arg_types[i])) - { - cif->aarch64_flags |= AARCH64_FLAG_ARG_V; - break; - } - } - -#if defined (__APPLE__) - cif->aarch64_nfixedargs = 0; -#endif - - return FFI_OK; -} - -#if defined (__APPLE__) - -/* Perform Apple-specific cif processing for variadic calls */ -ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif, - unsigned int nfixedargs, - unsigned int ntotalargs) -{ - ffi_status status; - - status = ffi_prep_cif_machdep (cif); - - cif->aarch64_nfixedargs = nfixedargs; - - return status; -} - -#endif - -/* Call a function with the provided arguments and capture the return - value. */ -void -ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) -{ - extended_cif ecif; - struct call_context context; - size_t stack_bytes; - int h, t; - - ecif.cif = cif; - ecif.avalue = avalue; - ecif.rvalue = rvalue; - - stack_bytes = cif->bytes; - - memset (&context, 0, sizeof (context)); - if (is_register_candidate (cif->rtype)) + rtype = cif->rtype; + if (is_register_candidate (rtype)) { - ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn); + ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags); - t = cif->rtype->type; + t = rtype->type; switch (t) { case FFI_TYPE_INT: @@ -754,33 +735,35 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) case FFI_TYPE_POINTER: case FFI_TYPE_UINT64: case FFI_TYPE_SINT64: - *(ffi_arg *)rvalue = extend_integer_type (&context.x[0], t); + *(ffi_arg *)rvalue = extend_integer_type (&context->x[0], t); break; case FFI_TYPE_FLOAT: case FFI_TYPE_DOUBLE: case FFI_TYPE_LONGDOUBLE: - compress_hfa_type (rvalue, &context.v[0], 0x100 + t); + compress_hfa_type (rvalue, &context->v[0], 0x100 + t); break; case FFI_TYPE_STRUCT: h = is_hfa (cif->rtype); if (h) - compress_hfa_type (rvalue, &context.v[0], h); - else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG) - memcpy (rvalue, &context.x[0], cif->rtype->size); + compress_hfa_type (rvalue, &context->v[0], h); else - abort(); + { + FFI_ASSERT (rtype->size <= 16); + memcpy (rvalue, &context->x[0], rtype->size); + } break; default: - abort(); + FFI_ASSERT (0); + break; } } else { - context.x8 = (uintptr_t)rvalue; - ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn); + context->x8 = (uintptr_t)rvalue; + ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags); } } @@ -851,7 +834,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context, struct arg_state state; ffi_type *rtype; - arg_init (&state, ALIGN(cif->bytes, 16)); + arg_init (&state); for (i = 0; i < nargs; i++) { diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S index fa7ff5b..a5f636a 100644 --- a/src/aarch64/sysv.S +++ b/src/aarch64/sysv.S @@ -22,6 +22,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define LIBFFI_ASM #include #include +#include #include "internal.h" #ifdef HAVE_MACHINE_ASM_H @@ -38,158 +39,77 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #endif #endif -#define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off -#define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off -#define cfi_restore(reg) .cfi_restore reg -#define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg + .text + .align 2 - .text - .align 2 - - .globl CNAME(ffi_call_SYSV) + .globl CNAME(ffi_call_SYSV) #ifdef __ELF__ - .type CNAME(ffi_call_SYSV), #function + .type CNAME(ffi_call_SYSV), #function + .hidden CNAME(ffi_call_SYSV) #endif -/* ffi_call_SYSV() - - Create a stack frame, setup an argument context, call the callee - and extract the result. - - The maximum required argument stack size is provided, - ffi_call_SYSV() allocates that stack space then calls the - prepare_fn to populate register context and stack. The - argument passing registers are loaded from the register - context and the callee called, on return the register passing - register are saved back to the context. Our caller will - extract the return value from the final state of the saved - register context. - - Prototype: - - extern unsigned - ffi_call_SYSV (void (*)(struct call_context *context, unsigned char *, - extended_cif *), - struct call_context *context, - extended_cif *, - size_t required_stack_size, - void (*fn)(void)); +/* ffi_call_SYSV + extern void ffi_call_SYSV (void *stack, void *frame, + void (*fn)(void), int flags); Therefore on entry we have: - x0 prepare_fn - x1 &context - x2 &ecif - x3 bytes - x4 fn - - This function uses the following stack frame layout: + x0 stack + x1 frame + x2 fn + x3 flags +*/ - == - saved x30(lr) - x29(fp)-> saved x29(fp) - saved x24 - saved x23 - saved x22 - sp' -> saved x21 - ... - sp -> (constructed callee stack arguments) - == - - Voila! */ - -#define ffi_call_SYSV_FS (8 * 4) - - .cfi_startproc + cfi_startproc CNAME(ffi_call_SYSV): - stp x29, x30, [sp, #-16]! - cfi_adjust_cfa_offset (16) - cfi_rel_offset (x29, 0) - cfi_rel_offset (x30, 8) - - mov x29, sp - cfi_def_cfa_register (x29) - sub sp, sp, #ffi_call_SYSV_FS - - stp x21, x22, [sp, #0] - cfi_rel_offset (x21, 0 - ffi_call_SYSV_FS) - cfi_rel_offset (x22, 8 - ffi_call_SYSV_FS) - - stp x23, x24, [sp, #16] - cfi_rel_offset (x23, 16 - ffi_call_SYSV_FS) - cfi_rel_offset (x24, 24 - ffi_call_SYSV_FS) - - mov x21, x1 - mov x22, x2 - mov x24, x4 - - /* Allocate the stack space for the actual arguments, many - arguments will be passed in registers, but we assume - worst case and allocate sufficient stack for ALL of - the arguments. */ - sub sp, sp, x3 - - /* unsigned (*prepare_fn) (struct call_context *context, - unsigned char *stack, extended_cif *ecif); - */ - mov x23, x0 - mov x0, x1 - mov x1, sp - /* x2 already in place */ - blr x23 - - /* Preserve the flags returned. */ - mov x23, x0 - - /* Figure out if we should touch the vector registers. */ - tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f - - /* Load the vector argument passing registers. */ - ldp q0, q1, [x21, #0] - ldp q2, q3, [x21, #32] - ldp q4, q5, [x21, #64] - ldp q6, q7, [x21, #96] + /* Use a stack frame allocated by our caller. */ + cfi_def_cfa(x1, 32); + stp x29, x30, [x1] + mov x29, x1 + mov sp, x0 + cfi_def_cfa_register(x29) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) + + str w3, [x29, #16] /* save flags */ + mov x9, x2 /* save fn */ + + /* Load the vector argument passing registers, if necessary. */ + tbz w3, #AARCH64_FLAG_ARG_V_BIT, 1f + ldp q0, q1, [x29, #32 + 0] + ldp q2, q3, [x29, #32 + 32] + ldp q4, q5, [x29, #32 + 64] + ldp q6, q7, [x29, #32 + 96] 1: - /* Load the core argument passing registers, including + /* Load the core argument passing registers, including the structure return pointer. */ - ldp x0, x1, [x21, #16*N_V_ARG_REG + 0] - ldp x2, x3, [x21, #16*N_V_ARG_REG + 16] - ldp x4, x5, [x21, #16*N_V_ARG_REG + 32] - ldp x6, x7, [x21, #16*N_V_ARG_REG + 48] - ldr x8, [x21, #16*N_V_ARG_REG + 64] - - blr x24 + ldp x0, x1, [x29, #32 + 16*N_V_ARG_REG + 0] + ldp x2, x3, [x29, #32 + 16*N_V_ARG_REG + 16] + ldp x4, x5, [x29, #32 + 16*N_V_ARG_REG + 32] + ldp x6, x7, [x29, #32 + 16*N_V_ARG_REG + 48] + ldr x8, [x29, #32 + 16*N_V_ARG_REG + 64] - /* Save the core return registers. */ - stp x0, x1, [x21, #16*N_V_ARG_REG] + blr x9 /* call fn */ - /* Figure out if we should touch the vector registers. */ - tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f + ldr w3, [x29, #16] /* reload flags */ - /* Save the vector return registers. */ - stp q0, q1, [x21, #0] - stp q2, q3, [x21, #32] -1: - /* All done, unwind our stack frame. */ - ldp x21, x22, [x29, # - ffi_call_SYSV_FS] - cfi_restore (x21) - cfi_restore (x22) - - ldp x23, x24, [x29, # - ffi_call_SYSV_FS + 16] - cfi_restore (x23) - cfi_restore (x24) - - mov sp, x29 + /* Partially deconstruct the stack frame. */ + mov sp, x29 cfi_def_cfa_register (sp) + ldp x29, x30, [x29] - ldp x29, x30, [sp], #16 - cfi_adjust_cfa_offset (-16) - cfi_restore (x29) - cfi_restore (x30) + /* Save the core return registers. */ + stp x0, x1, [sp, #32 + 16*N_V_ARG_REG] - ret + /* Save the vector return registers, if necessary. */ + tbz w3, #AARCH64_FLAG_ARG_V_BIT, 1f + stp q0, q1, [sp, #32 + 0] + stp q2, q3, [sp, #32 + 32] +1: + /* All done. */ + ret - .cfi_endproc + cfi_endproc #ifdef __ELF__ .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV) #endif @@ -237,7 +157,7 @@ CNAME(ffi_call_SYSV): .align 2 .globl CNAME(ffi_closure_SYSV) - .cfi_startproc + cfi_startproc CNAME(ffi_closure_SYSV): stp x29, x30, [sp, #-16]! cfi_adjust_cfa_offset (16) @@ -310,7 +230,7 @@ CNAME(ffi_closure_SYSV): cfi_restore (x30) ret - .cfi_endproc + cfi_endproc #ifdef __ELF__ .size CNAME(ffi_closure_SYSV), .-CNAME(ffi_closure_SYSV) #endif -- 1.9.3