From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 20371 invoked by alias); 29 Oct 2014 20:06:22 -0000 Mailing-List: contact libffi-discuss-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libffi-discuss-owner@sourceware.org Received: (qmail 20280 invoked by uid 89); 29 Oct 2014 20:06:22 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.3 required=5.0 tests=AWL,BAYES_00,FREEMAIL_ENVFROM_END_DIGIT,FREEMAIL_FROM,RCVD_IN_DNSWL_LOW,SPF_PASS autolearn=ham version=3.3.2 X-HELO: mail-qg0-f49.google.com Received: from mail-qg0-f49.google.com (HELO mail-qg0-f49.google.com) (209.85.192.49) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Wed, 29 Oct 2014 20:06:17 +0000 Received: by mail-qg0-f49.google.com with SMTP id z60so88725qgd.36 for ; Wed, 29 Oct 2014 13:06:15 -0700 (PDT) X-Received: by 10.224.120.135 with SMTP id d7mr19749858qar.10.1414613162082; Wed, 29 Oct 2014 13:06:02 -0700 (PDT) Received: from pike.twiddle.home.com (50-194-63-110-static.hfc.comcastbusiness.net. [50.194.63.110]) by mx.google.com with ESMTPSA id a12sm5020495qai.1.2014.10.29.13.06.00 for (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Wed, 29 Oct 2014 13:06:01 -0700 (PDT) From: Richard Henderson To: libffi-discuss@sourceware.org Subject: [PATCH 05/10] arm: Rewrite ffi_call Date: Wed, 29 Oct 2014 20:06:00 -0000 Message-Id: <1414613147-10917-6-git-send-email-rth@twiddle.net> In-Reply-To: <1414613147-10917-1-git-send-email-rth@twiddle.net> References: <1414613147-10917-1-git-send-email-rth@twiddle.net> X-SW-Source: 2014/txt/msg00169.txt.bz2 Use the trick to allocate the stack frame for ffi_call_SYSV within ffi_call itself. --- src/arm/ffi.c | 285 ++++++++++++++++++---------------- src/arm/ffitarget.h | 2 +- src/arm/internal.h | 7 + src/arm/sysv.S | 440 ++++++++++++++++------------------------------------ 4 files changed, 294 insertions(+), 440 deletions(-) create mode 100644 src/arm/internal.h diff --git a/src/arm/ffi.c b/src/arm/ffi.c index d00ed89..c91b869 100644 --- a/src/arm/ffi.c +++ b/src/arm/ffi.c @@ -30,16 +30,13 @@ #include #include - #include +#include "internal.h" /* Forward declares. */ static int vfp_type_p (const ffi_type *); static void layout_vfp_args (ffi_cif *); -int ffi_prep_args_SYSV (char *stack, extended_cif *ecif, float *vfp_space); -int ffi_prep_args_VFP (char *stack, extended_cif *ecif, float *vfp_space); - static void * ffi_align (ffi_type *ty, void *p) { @@ -98,53 +95,44 @@ ffi_put_arg (ffi_type *ty, void *src, void *dst) return ALIGN (z, 4); } -/* ffi_prep_args is called by the assembly routine once stack space - has been allocated for the function's arguments +/* ffi_prep_args is called once stack space has been allocated + for the function's arguments. The vfp_space parameter is the load area for VFP regs, the return value is cif->vfp_used (word bitset of VFP regs used for passing arguments). These are only used for the VFP hard-float ABI. */ -int -ffi_prep_args_SYSV (char *stack, extended_cif *ecif, float *vfp_space) +static void +ffi_prep_args_SYSV (ffi_cif *cif, int flags, void *rvalue, + void **avalue, char *argp) { - register unsigned int i; - register void **p_argv; - register char *argp; - register ffi_type **p_arg; - argp = stack; + ffi_type **arg_types = cif->arg_types; + int i, n; - if (ecif->cif->flags == FFI_TYPE_STRUCT) + if (flags == ARM_TYPE_STRUCT) { - *(void **) argp = ecif->rvalue; + *(void **) argp = rvalue; argp += 4; } - p_argv = ecif->avalue; - - for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; - (i != 0); i--, p_arg++, p_argv++) + for (i = 0, n = cif->nargs; i < n; i++) { - argp = ffi_align (*p_arg, argp); - argp += ffi_put_arg (*p_arg, *p_argv, argp); + ffi_type *ty = arg_types[i]; + argp = ffi_align (ty, argp); + argp += ffi_put_arg (ty, avalue[i], argp); } - - return 0; } -int -ffi_prep_args_VFP (char *stack, extended_cif * ecif, float *vfp_space) +static void +ffi_prep_args_VFP (ffi_cif *cif, int flags, void *rvalue, + void **avalue, char *stack, char *vfp_space) { - register unsigned int i, vi = 0; - register void **p_argv; - register char *argp, *regp, *eo_regp; - register ffi_type **p_arg; + ffi_type **arg_types = cif->arg_types; + int i, n, vi = 0; + char *argp, *regp, *eo_regp; char stack_used = 0; char done_with_regs = 0; - /* Make sure we are using FFI_VFP. */ - FFI_ASSERT (ecif->cif->abi == FFI_VFP); - /* The first 4 words on the stack are used for values passed in core registers. */ regp = stack; @@ -152,37 +140,36 @@ ffi_prep_args_VFP (char *stack, extended_cif * ecif, float *vfp_space) /* If the function returns an FFI_TYPE_STRUCT in memory, that address is passed in r0 to the function. */ - if (ecif->cif->flags == FFI_TYPE_STRUCT) + if (flags == ARM_TYPE_STRUCT) { - *(void **) regp = ecif->rvalue; + *(void **) regp = rvalue; regp += 4; } - p_argv = ecif->avalue; - - for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; - (i != 0); i--, p_arg++, p_argv++) + for (i = 0, n = cif->nargs; i < n; i++) { - int is_vfp_type = vfp_type_p (*p_arg); + ffi_type *ty = arg_types[i]; + void *a = avalue[i]; + int is_vfp_type = vfp_type_p (ty); /* Allocated in VFP registers. */ - if (vi < ecif->cif->vfp_nargs && is_vfp_type) + if (vi < cif->vfp_nargs && is_vfp_type) { - char *vfp_slot = (char *) (vfp_space + ecif->cif->vfp_args[vi++]); - ffi_put_arg (*p_arg, *p_argv, vfp_slot); + char *vfp_slot = vfp_space + cif->vfp_args[vi++] * 4; + ffi_put_arg (ty, a, vfp_slot); continue; } /* Try allocating in core registers. */ else if (!done_with_regs && !is_vfp_type) { - char *tregp = ffi_align (*p_arg, regp); - size_t size = (*p_arg)->size; + char *tregp = ffi_align (ty, regp); + size_t size = ty->size; size = (size < 4) ? 4 : size; // pad /* Check if there is space left in the aligned register area to place the argument. */ if (tregp + size <= eo_regp) { - regp = tregp + ffi_put_arg (*p_arg, *p_argv, tregp); + regp = tregp + ffi_put_arg (ty, a, tregp); done_with_regs = (regp == argp); // ensure we did not write into the stack area FFI_ASSERT (regp <= argp); @@ -195,87 +182,97 @@ ffi_prep_args_VFP (char *stack, extended_cif * ecif, float *vfp_space) { stack_used = 1; done_with_regs = 1; - argp = tregp + ffi_put_arg (*p_arg, *p_argv, tregp); + argp = tregp + ffi_put_arg (ty, a, tregp); FFI_ASSERT (eo_regp < argp); continue; } } /* Base case, arguments are passed on the stack */ stack_used = 1; - argp = ffi_align (*p_arg, argp); - argp += ffi_put_arg (*p_arg, *p_argv, argp); + argp = ffi_align (ty, argp); + argp += ffi_put_arg (ty, a, argp); } - /* Indicate the VFP registers used. */ - return ecif->cif->vfp_used; } /* Perform machine dependent cif processing */ ffi_status -ffi_prep_cif_machdep (ffi_cif * cif) +ffi_prep_cif_machdep (ffi_cif *cif) { + int flags = 0, cabi = cif->abi; + size_t bytes; + /* Round the stack up to a multiple of 8 bytes. This isn't needed everywhere, but it is on some platforms, and it doesn't harm anything when it isn't needed. */ - cif->bytes = (cif->bytes + 7) & ~7; + bytes = ALIGN (cif->bytes, 8); + + /* Minimum stack space is the 4 register arguments that we pop. */ + if (bytes < 4*4) + bytes = 4*4; + cif->bytes = bytes; + + /* Map out the register placements of VFP register args. The VFP + hard-float calling conventions are slightly more sophisticated + than the base calling conventions, so we do it here instead of + in ffi_prep_args(). */ + if (cabi == FFI_VFP) + layout_vfp_args (cif); /* Set the return type flag */ switch (cif->rtype->type) { case FFI_TYPE_VOID: - case FFI_TYPE_FLOAT: - case FFI_TYPE_DOUBLE: - cif->flags = (unsigned) cif->rtype->type; + flags = ARM_TYPE_VOID; + break; + + case FFI_TYPE_INT: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_POINTER: + flags = ARM_TYPE_INT; break; case FFI_TYPE_SINT64: case FFI_TYPE_UINT64: - cif->flags = (unsigned) FFI_TYPE_SINT64; + flags = ARM_TYPE_INT64; + break; + + case FFI_TYPE_FLOAT: + flags = (cabi == FFI_VFP ? ARM_TYPE_VFP_S : ARM_TYPE_INT); + break; + case FFI_TYPE_DOUBLE: + flags = (cabi == FFI_VFP ? ARM_TYPE_VFP_D : ARM_TYPE_INT64); break; case FFI_TYPE_STRUCT: - if (cif->abi == FFI_VFP) + if (cabi == FFI_VFP) { int h = vfp_type_p (cif->rtype); - if (h) - { - int ele_count = h >> 8; - int type_code = h & 0xff; - if (ele_count > 1) - { - if (type_code == FFI_TYPE_FLOAT) - type_code = FFI_TYPE_STRUCT_VFP_FLOAT; - else - type_code = FFI_TYPE_STRUCT_VFP_DOUBLE; - } - cif->flags = type_code; + + flags = ARM_TYPE_VFP_N; + if (h == 0x100 + FFI_TYPE_FLOAT) + flags = ARM_TYPE_VFP_S; + if (h == 0x100 + FFI_TYPE_DOUBLE) + flags = ARM_TYPE_VFP_D; + if (h != 0) break; - } - } - if (cif->rtype->size <= 4) - { - /* A Composite Type not larger than 4 bytes is returned in r0. */ - cif->flags = (unsigned) FFI_TYPE_INT; - } - else - { - /* A Composite Type larger than 4 bytes, or whose size cannot - be determined statically ... is stored in memory at an - address passed [in r0]. */ - cif->flags = (unsigned) FFI_TYPE_STRUCT; } + + /* A Composite Type not larger than 4 bytes is returned in r0. + A Composite Type larger than 4 bytes, or whose size cannot + be determined statically ... is stored in memory at an + address passed [in r0]. */ + flags = (cif->rtype->size <= 4 ? ARM_TYPE_INT : ARM_TYPE_STRUCT); break; default: - cif->flags = FFI_TYPE_INT; - break; + abort(); } - - /* Map out the register placements of VFP register args. The VFP - hard-float calling conventions are slightly more sophisticated - than the base calling conventions, so we do it here instead of - in ffi_prep_args(). */ - if (cif->abi == FFI_VFP) - layout_vfp_args (cif); + cif->flags = flags; return FFI_OK; } @@ -293,69 +290,83 @@ ffi_prep_cif_machdep_var (ffi_cif * cif, } /* Prototypes for assembly functions, in sysv.S. */ -extern void ffi_call_SYSV (void (*fn) (void), extended_cif *, unsigned, - unsigned, unsigned *); -extern void ffi_call_VFP (void (*fn) (void), extended_cif *, unsigned, - unsigned, unsigned *); -void -ffi_call (ffi_cif * cif, void (*fn) (void), void *rvalue, void **avalue) +struct call_frame { - extended_cif ecif; - - int small_struct = (cif->flags == FFI_TYPE_INT - && cif->rtype->type == FFI_TYPE_STRUCT); - int vfp_struct = (cif->flags == FFI_TYPE_STRUCT_VFP_FLOAT - || cif->flags == FFI_TYPE_STRUCT_VFP_DOUBLE); - - unsigned int temp; - - ecif.cif = cif; - ecif.avalue = avalue; + void *fp; + void *lr; + void *rvalue; + int flags; +}; - /* If the return value is a struct and we don't have a return - value address then we need to make one. */ +extern void ffi_call_SYSV (void *stack, struct call_frame *, + void (*fn) (void)) FFI_HIDDEN; +extern void ffi_call_VFP (void *vfp_space, struct call_frame *, + void (*fn) (void), unsigned vfp_used) FFI_HIDDEN; - if ((rvalue == NULL) && (cif->flags == FFI_TYPE_STRUCT)) +void +ffi_call (ffi_cif * cif, void (*fn) (void), void *rvalue, void **avalue) +{ + int flags = cif->flags; + ffi_type *rtype = cif->rtype; + size_t bytes, rsize, vfp_size; + char *stack, *vfp_space, *new_rvalue; + struct call_frame *frame; + + rsize = 0; + if (rvalue == NULL) { - ecif.rvalue = alloca (cif->rtype->size); + /* If the return value is a struct and we don't have a return + value address then we need to make one. Otherwise the return + value is in registers and we can ignore them. */ + if (flags == ARM_TYPE_STRUCT) + rsize = rtype->size; + else + flags = ARM_TYPE_VOID; } - else if (small_struct) - ecif.rvalue = &temp; - else if (vfp_struct) + else if (flags == ARM_TYPE_VFP_N) { /* Largest case is double x 4. */ - ecif.rvalue = alloca (32); + rsize = 32; } - else - ecif.rvalue = rvalue; + else if (flags == ARM_TYPE_INT && rtype->type == FFI_TYPE_STRUCT) + rsize = 4; - switch (cif->abi) - { - case FFI_SYSV: - ffi_call_SYSV (fn, &ecif, cif->bytes, cif->flags, ecif.rvalue); - break; + /* Largest case. */ + vfp_size = (cif->abi == FFI_VFP && cif->vfp_used ? 8*8: 0); - case FFI_VFP: -#ifdef __ARM_EABI__ - ffi_call_VFP (fn, &ecif, cif->bytes, cif->flags, ecif.rvalue); - break; -#endif + bytes = cif->bytes; + stack = alloca (vfp_size + bytes + sizeof(struct call_frame) + rsize); - default: - FFI_ASSERT (0); - break; + vfp_space = NULL; + if (vfp_size) + { + vfp_space = stack; + stack += vfp_size; } - if (small_struct) + + frame = (struct call_frame *)(stack + bytes); + + new_rvalue = rvalue; + if (rsize) + new_rvalue = (void *)(frame + 1); + + frame->rvalue = new_rvalue; + frame->flags = flags; + + if (vfp_space) { - FFI_ASSERT (rvalue != NULL); - memcpy (rvalue, &temp, cif->rtype->size); + ffi_prep_args_VFP (cif, flags, new_rvalue, avalue, stack, vfp_space); + ffi_call_VFP (vfp_space, frame, fn, cif->vfp_used); } - else if (vfp_struct) + else { - FFI_ASSERT (rvalue != NULL); - memcpy (rvalue, ecif.rvalue, cif->rtype->size); + ffi_prep_args_SYSV (cif, flags, new_rvalue, avalue, stack); + ffi_call_SYSV (stack, frame, fn); } + + if (rvalue && rvalue != new_rvalue) + memcpy (rvalue, new_rvalue, rtype->size); } /** private members **/ diff --git a/src/arm/ffitarget.h b/src/arm/ffitarget.h index 26d494d..6355904 100644 --- a/src/arm/ffitarget.h +++ b/src/arm/ffitarget.h @@ -53,7 +53,7 @@ typedef enum ffi_abi { #define FFI_EXTRA_CIF_FIELDS \ int vfp_used; \ - short vfp_reg_free, vfp_nargs; \ + unsigned short vfp_reg_free, vfp_nargs; \ signed char vfp_args[16] \ /* Internally used. */ diff --git a/src/arm/internal.h b/src/arm/internal.h new file mode 100644 index 0000000..6cf0b2a --- /dev/null +++ b/src/arm/internal.h @@ -0,0 +1,7 @@ +#define ARM_TYPE_VFP_S 0 +#define ARM_TYPE_VFP_D 1 +#define ARM_TYPE_VFP_N 2 +#define ARM_TYPE_INT64 3 +#define ARM_TYPE_INT 4 +#define ARM_TYPE_VOID 5 +#define ARM_TYPE_STRUCT 6 diff --git a/src/arm/sysv.S b/src/arm/sysv.S index 541bbe9..b967d97 100644 --- a/src/arm/sysv.S +++ b/src/arm/sysv.S @@ -1,8 +1,8 @@ /* ----------------------------------------------------------------------- sysv.S - Copyright (c) 1998, 2008, 2011 Red Hat, Inc. Copyright (c) 2011 Plausible Labs Cooperative, Inc. - - ARM Foreign Function Interface + + ARM Foreign Function Interface Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -28,219 +28,155 @@ #define LIBFFI_ASM #include #include -#ifdef HAVE_MACHINE_ASM_H -#include -#else -#ifdef __USER_LABEL_PREFIX__ -#define CONCAT1(a, b) CONCAT2(a, b) -#define CONCAT2(a, b) a ## b - -/* Use the right prefix for global labels. */ -#define CNAME(x) CONCAT1 (__USER_LABEL_PREFIX__, x) -#else -#define CNAME(x) x -#endif -#ifdef __APPLE__ -#define ENTRY(x) .globl _##x; _##x: -#else -#define ENTRY(x) .globl CNAME(x); .type CNAME(x),%function; CNAME(x): -#endif /* __APPLE__ */ -#endif - -#ifdef __ELF__ -#define LSYM(x) .x -#else -#define LSYM(x) x -#endif - -/* Use the SOFTFP return value ABI on Mac OS X, as per the iOS ABI - Function Call Guide */ -#ifdef __APPLE__ -#define __SOFTFP__ -#endif - -/* We need a better way of testing for this, but for now, this is all - we can do. */ -@ This selects the minimum architecture level required. -#define __ARM_ARCH__ 3 - -#if defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__) -# undef __ARM_ARCH__ -# define __ARM_ARCH__ 4 -#endif - -#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \ - || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ - || defined(__ARM_ARCH_5TEJ__) -# undef __ARM_ARCH__ -# define __ARM_ARCH__ 5 -#endif - -#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ +#include +#include "internal.h" + +/* GCC 4.8 provides __ARM_ARCH; construct it otherwise. */ +#ifndef __ARM_ARCH +# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7EM__) +# define __ARM_ARCH 7 +# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \ || defined(__ARM_ARCH_6M__) -# undef __ARM_ARCH__ -# define __ARM_ARCH__ 6 -#endif - -#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ - || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ - || defined(__ARM_ARCH_7EM__) -# undef __ARM_ARCH__ -# define __ARM_ARCH__ 7 -#endif - -#if __ARM_ARCH__ >= 5 -# define call_reg(x) blx x -#elif defined (__ARM_ARCH_4T__) -# define call_reg(x) mov lr, pc ; bx x -# if defined(__thumb__) || defined(__THUMB_INTERWORK__) -# define __INTERWORKING__ +# define __ARM_ARCH 6 +# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \ + || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +# define __ARM_ARCH 5 +# else +# define __ARM_ARCH 4 # endif -#else -# define call_reg(x) mov lr, pc ; mov pc, x #endif /* Conditionally compile unwinder directives. */ +.macro UNWIND text:vararg #ifdef __ARM_EABI__ -#define UNWIND -#else -#define UNWIND @ + \text #endif +.endm +#if defined(HAVE_AS_CFI_PSEUDO_OP) && defined(__ARM_EABI__) + .cfi_sections .debug_frame +#endif -.syntax unified - -#if defined(__thumb__) && !defined(__THUMB_INTERWORK__) -#define ARM_FUNC_START(name) \ - .text; \ - .align 2; \ - .thumb; \ - .thumb_func; \ - ENTRY(name); \ - bx pc; \ - nop; \ - .arm; \ - UNWIND .fnstart; \ -_L__##name: +#define CONCAT(a, b) CONCAT2(a, b) +#define CONCAT2(a, b) a ## b + +#ifdef __USER_LABEL_PREFIX__ +# define CNAME(X) CONCAT (__USER_LABEL_PREFIX__, X) #else -#define ARM_FUNC_START(name) \ - .text; \ - .align 2; \ - .arm; \ - ENTRY(name); \ - UNWIND .fnstart +# define CNAME(X) X #endif - -.macro RETLDM regs=, cond=, dirn=ia -#if defined (__INTERWORKING__) - .ifc "\regs","" - ldr\cond lr, [sp], #4 - .else - ldm\cond\dirn sp!, {\regs, lr} - .endif - bx\cond lr +#ifdef __ELF__ +# define SIZE(X) .size CNAME(X), . - CNAME(X) +# define TYPE(X, Y) .type CNAME(X), Y #else - .ifc "\regs","" - ldr\cond pc, [sp], #4 - .else - ldm\cond\dirn sp!, {\regs, pc} - .endif +# define SIZE(X) +# define TYPE(X, Y) #endif -.endm - - @ r0: ffi_prep_args - @ r1: &ecif - @ r2: cif->bytes - @ r3: fig->flags - @ sp+0: ecif.rvalue - - @ This assumes we are using gas. -ARM_FUNC_START(ffi_call_SYSV) - @ Save registers - stmfd sp!, {r0-r3, fp, lr} - UNWIND .save {r0-r3, fp, lr} - mov fp, sp - - UNWIND .setfp fp, sp - - @ Make room for all of the new args. - sub sp, fp, r2 - - @ Place all of the ffi_prep_args in position - mov r0, sp - @ r1 already set - @ Call ffi_prep_args(stack, &ecif) - bl CNAME(ffi_prep_args_SYSV) +#define ARM_FUNC_START(name, gl) \ + .align 3; \ + .ifne gl; .globl CNAME(name); FFI_HIDDEN(CNAME(name)); .endif; \ + TYPE(name, %function); \ + CNAME(name): - @ move first 4 parameters in registers - ldmia sp, {r0-r3} +#define ARM_FUNC_END(name) \ + SIZE(name) - @ and adjust stack - sub lr, fp, sp @ cif->bytes == fp - sp - ldr ip, [fp] @ load fn() in advance - cmp lr, #16 - movhs lr, #16 - add sp, sp, lr +/* Aid in defining a jump table with 8 bytes between entries. */ +.macro E index + .if . - 0b - 8*\index + .error "type table out of sync" + .endif +.endm - @ call (fn) (...) - call_reg(ip) - - @ Remove the space we pushed for the args - mov sp, fp + .text + .syntax unified + .arm + + /* We require interworking on LDM, which implies ARMv5T, + which implies the existance of BLX. */ + .arch armv5t + + /* Note that we use STC and LDC to encode VFP instructions, + so that we do not need ".fpu vfp", nor get that added to + the object file attributes. These will not be executed + unless the FFI_VFP abi is used. */ + + @ r0: stack + @ r1: frame + @ r2: fn + @ r3: vfp_used + +ARM_FUNC_START(ffi_call_VFP, 1) + UNWIND .fnstart + cfi_startproc + + cmp r3, #3 @ load only d0 if possible + ldcle p11, cr0, [r0] @ vldrle d0, [sp] + ldcgt p11, cr0, [r0], {16} @ vldmgt sp, {d0-d7} + add r0, r0, #64 @ discard the vfp register args + /* FALLTHRU */ +ARM_FUNC_END(ffi_call_VFP) + +ARM_FUNC_START(ffi_call_SYSV, 1) + stm r1, {fp, lr} + mov fp, r1 + + @ This is a bit of a lie wrt the origin of the unwind info, but + @ now we've got the usual frame pointer and two saved registers. + UNWIND .save {fp,lr} + UNWIND .setfp fp, sp + cfi_def_cfa(fp, 8) + cfi_rel_offset(fp, 0) + cfi_rel_offset(lr, 4) + + mov sp, r0 @ install the stack pointer + mov lr, r2 @ move the fn pointer out of the way + ldmia sp!, {r0-r3} @ move first 4 parameters in registers. + blx lr @ call fn @ Load r2 with the pointer to storage for the return value - ldr r2, [sp, #24] - - @ Load r3 with the return type code - ldr r3, [sp, #12] - - @ If the return value pointer is NULL, assume no return value. - cmp r2, #0 - beq LSYM(Lepilogue) + @ Load r3 with the return type code + ldr r2, [fp, #8] + ldr r3, [fp, #12] -@ return INT - cmp r3, #FFI_TYPE_INT -#if defined(__SOFTFP__) || defined(__ARM_EABI__) - cmpne r3, #FFI_TYPE_FLOAT -#endif - streq r0, [r2] - beq LSYM(Lepilogue) - - @ return INT64 - cmp r3, #FFI_TYPE_SINT64 -#if defined(__SOFTFP__) || defined(__ARM_EABI__) - cmpne r3, #FFI_TYPE_DOUBLE -#endif - stmiaeq r2, {r0, r1} - -#if !defined(__SOFTFP__) && !defined(__ARM_EABI__) - beq LSYM(Lepilogue) - -@ return FLOAT - cmp r3, #FFI_TYPE_FLOAT - stfeqs f0, [r2] - beq LSYM(Lepilogue) - -@ return DOUBLE or LONGDOUBLE - cmp r3, #FFI_TYPE_DOUBLE - stfeqd f0, [r2] -#endif - -LSYM(Lepilogue): -#if defined (__INTERWORKING__) - ldmia sp!, {r0-r3,fp, lr} - bx lr -#else - ldmia sp!, {r0-r3,fp, pc} -#endif - -.ffi_call_SYSV_end: - UNWIND .fnend -#ifdef __ELF__ - .size CNAME(ffi_call_SYSV),.ffi_call_SYSV_end-CNAME(ffi_call_SYSV) -#endif + @ Deallocate the stack with the arguments. + mov sp, fp + cfi_def_cfa_register(sp) + + @ Store values stored in registers. + .align 3 + add pc, pc, r3, lsl #3 + nop +0: +E ARM_TYPE_VFP_S + stc p10, cr0, [r2] @ vstr s0, [r2] + pop {fp,pc} +E ARM_TYPE_VFP_D + stc p11, cr0, [r2] @ vstr d0, [r2] + pop {fp,pc} +E ARM_TYPE_VFP_N + stc p11, cr0, [r2], {8} @ vstm r2, {d0-d3} + pop {fp,pc} +E ARM_TYPE_INT64 + str r1, [r2, #4] + nop +E ARM_TYPE_INT + str r0, [r2] + pop {fp,pc} +E ARM_TYPE_VOID + pop {fp,pc} + nop +E ARM_TYPE_STRUCT + pop {fp,pc} + + cfi_endproc + UNWIND .fnend +ARM_FUNC_END(ffi_call_SYSV) /* @@ -251,7 +187,8 @@ LSYM(Lepilogue): void *args; */ -ARM_FUNC_START(ffi_closure_SYSV) +ARM_FUNC_START(ffi_closure_SYSV, 1) + UNWIND .fnstart UNWIND .pad #16 add ip, sp, #16 stmfd sp!, {ip, lr} @@ -310,116 +247,16 @@ ARM_FUNC_START(ffi_closure_SYSV) ldfd f0, [sp] b .Lclosure_epilogue #endif - -.ffi_closure_SYSV_end: UNWIND .fnend -#ifdef __ELF__ - .size CNAME(ffi_closure_SYSV),.ffi_closure_SYSV_end-CNAME(ffi_closure_SYSV) -#endif +ARM_FUNC_END(ffi_closure_SYSV) /* Below are VFP hard-float ABI call and closure implementations. Add VFP FPU directive here. This is only compiled into the library under EABI. */ #ifdef __ARM_EABI__ - .fpu vfp - - @ r0: fn - @ r1: &ecif - @ r2: cif->bytes - @ r3: fig->flags - @ sp+0: ecif.rvalue - -ARM_FUNC_START(ffi_call_VFP) - @ Save registers - stmfd sp!, {r0-r3, fp, lr} - UNWIND .save {r0-r3, fp, lr} - mov fp, sp - UNWIND .setfp fp, sp - - @ Make room for all of the new args. - sub sp, sp, r2 - - @ Make room for loading VFP args - sub sp, sp, #64 - - @ Place all of the ffi_prep_args in position - mov r0, sp - @ r1 already set - sub r2, fp, #64 @ VFP scratch space - - @ Call ffi_prep_args(stack, &ecif, vfp_space) - bl CNAME(ffi_prep_args_VFP) - - @ Load VFP register args if needed - cmp r0, #0 - mov ip, fp - beq LSYM(Lbase_args) - - @ Load only d0 if possible - cmp r0, #3 - sub ip, fp, #64 - flddle d0, [ip] - fldmiadgt ip, {d0-d7} - -LSYM(Lbase_args): - @ move first 4 parameters in registers - ldmia sp, {r0-r3} - - @ and adjust stack - sub lr, ip, sp @ cif->bytes == (fp - 64) - sp - ldr ip, [fp] @ load fn() in advance - cmp lr, #16 - movhs lr, #16 - add sp, sp, lr - - @ call (fn) (...) - call_reg(ip) - - @ Remove the space we pushed for the args - mov sp, fp - - @ Load r2 with the pointer to storage for - @ the return value - ldr r2, [sp, #24] - - @ Load r3 with the return type code - ldr r3, [sp, #12] - - @ If the return value pointer is NULL, - @ assume no return value. - cmp r2, #0 - beq LSYM(Lepilogue_vfp) - - cmp r3, #FFI_TYPE_INT - streq r0, [r2] - beq LSYM(Lepilogue_vfp) - - cmp r3, #FFI_TYPE_SINT64 - stmeqia r2, {r0, r1} - beq LSYM(Lepilogue_vfp) - - cmp r3, #FFI_TYPE_FLOAT - fstseq s0, [r2] - beq LSYM(Lepilogue_vfp) - - cmp r3, #FFI_TYPE_DOUBLE - fstdeq d0, [r2] - beq LSYM(Lepilogue_vfp) - - cmp r3, #FFI_TYPE_STRUCT_VFP_FLOAT - cmpne r3, #FFI_TYPE_STRUCT_VFP_DOUBLE - fstmiadeq r2, {d0-d3} - -LSYM(Lepilogue_vfp): - RETLDM "r0-r3,fp" - -.ffi_call_VFP_end: - UNWIND .fnend - .size CNAME(ffi_call_VFP),.ffi_call_VFP_end-CNAME(ffi_call_VFP) - - -ARM_FUNC_START(ffi_closure_VFP) +ARM_FUNC_START(ffi_closure_VFP, 1) + UNWIND .fnstart fstmfdd sp!, {d0-d7} @ r0-r3, then d0-d7 UNWIND .pad #80 @@ -475,16 +312,15 @@ ARM_FUNC_START(ffi_closure_VFP) .Lretdouble_struct_vfp: fldmiad sp, {d0-d3} b .Lclosure_epilogue_vfp - -.ffi_closure_VFP_end: UNWIND .fnend - .size CNAME(ffi_closure_VFP),.ffi_closure_VFP_end-CNAME(ffi_closure_VFP) +ARM_FUNC_END(ffi_closure_VFP) #endif -ENTRY(ffi_arm_trampoline) +ARM_FUNC_START(ffi_arm_trampoline, 1) stmfd sp!, {r0-r3} ldr r0, [pc] ldr pc, [pc] +ARM_FUNC_END(ffi_arm_trampoline) #if defined __ELF__ && defined __linux__ .section .note.GNU-stack,"",%progbits -- 1.9.3