From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 31289 invoked by alias); 28 Oct 2014 18:54:00 -0000 Mailing-List: contact libffi-discuss-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libffi-discuss-owner@sourceware.org Received: (qmail 31116 invoked by uid 89); 28 Oct 2014 18:53:59 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.3 required=5.0 tests=AWL,BAYES_00,FREEMAIL_ENVFROM_END_DIGIT,FREEMAIL_FROM,RCVD_IN_DNSWL_LOW,SPF_PASS autolearn=ham version=3.3.2 X-HELO: mail-qa0-f51.google.com Received: from mail-qa0-f51.google.com (HELO mail-qa0-f51.google.com) (209.85.216.51) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Tue, 28 Oct 2014 18:53:56 +0000 Received: by mail-qa0-f51.google.com with SMTP id f12so929783qad.38 for ; Tue, 28 Oct 2014 11:53:54 -0700 (PDT) X-Received: by 10.229.120.198 with SMTP id e6mr1371267qcr.25.1414522434351; Tue, 28 Oct 2014 11:53:54 -0700 (PDT) Received: from anchor.com (50-194-63-110-static.hfc.comcastbusiness.net. [50.194.63.110]) by mx.google.com with ESMTPSA id 69sm1717430qgy.19.2014.10.28.11.53.52 for (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 28 Oct 2014 11:53:53 -0700 (PDT) From: Richard Henderson To: libffi-discuss@sourceware.org Cc: Richard Henderson Subject: [PATCH 05/16] aarch64: Reduce the size of register_context Date: Tue, 28 Oct 2014 18:54:00 -0000 Message-Id: <1414522393-19169-6-git-send-email-rth@twiddle.net> In-Reply-To: <1414522393-19169-1-git-send-email-rth@twiddle.net> References: <1414522393-19169-1-git-send-email-rth@twiddle.net> X-SW-Source: 2014/txt/msg00138.txt.bz2 From: Richard Henderson We don't need to store 32 general and vector registers. Only 8 of each are used for parameter passing. --- src/aarch64/ffi.c | 35 ++++++++--------- src/aarch64/ffitarget.h | 6 --- src/aarch64/internal.h | 26 +++++++++++++ src/aarch64/sysv.S | 100 +++++++++++++++++++++++------------------------- 4 files changed, 91 insertions(+), 76 deletions(-) create mode 100644 src/aarch64/internal.h diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c index a6fcc11..58d088b 100644 --- a/src/aarch64/ffi.c +++ b/src/aarch64/ffi.c @@ -21,8 +21,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include +#include #include #include +#include "internal.h" /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE; all further uses in this file will refer to the 128-bit type. */ @@ -35,38 +37,35 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ # define FFI_TYPE_LONGDOUBLE 4 #endif -#define N_X_ARG_REG 8 -#define N_V_ARG_REG 8 - -#define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT) - union _d { UINT64 d; UINT32 s[2]; }; +struct _v +{ + union _d d[2] __attribute__((aligned(16))); +}; + struct call_context { - UINT64 x [AARCH64_N_XREG]; - struct - { - union _d d[2]; - } v [AARCH64_N_VREG]; + struct _v v[N_V_ARG_REG]; + UINT64 x[N_X_ARG_REG]; + UINT64 x8; }; #if defined (__clang__) && defined (__APPLE__) -extern void -sys_icache_invalidate (void *start, size_t len); +extern void sys_icache_invalidate (void *start, size_t len); #endif static inline void ffi_clear_cache (void *start, void *end) { #if defined (__clang__) && defined (__APPLE__) - sys_icache_invalidate (start, (char *)end - (char *)start); + sys_icache_invalidate (start, (char *)end - (char *)start); #elif defined (__GNUC__) - __builtin___clear_cache (start, end); + __builtin___clear_cache (start, end); #else #error "Missing builtin to flush instruction cache" #endif @@ -802,7 +801,7 @@ ffi_prep_cif_machdep (ffi_cif *cif) if (is_v_register_candidate (cif->rtype)) { - cif->aarch64_flags |= AARCH64_FFI_WITH_V; + cif->aarch64_flags |= AARCH64_FLAG_ARG_V; } else { @@ -810,7 +809,7 @@ ffi_prep_cif_machdep (ffi_cif *cif) for (i = 0; i < cif->nargs; i++) if (is_v_register_candidate (cif->arg_types[i])) { - cif->aarch64_flags |= AARCH64_FFI_WITH_V; + cif->aarch64_flags |= AARCH64_FLAG_ARG_V; break; } } @@ -924,7 +923,7 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) } else { - memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64)); + context.x8 = (uintptr_t)rvalue; ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn); } @@ -1201,7 +1200,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context, } else { - memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64)); + rvalue = (void *)(uintptr_t)context->x8; (closure->fun) (cif, rvalue, avalue, closure->user_data); } } diff --git a/src/aarch64/ffitarget.h b/src/aarch64/ffitarget.h index 4bbced2..336f28a 100644 --- a/src/aarch64/ffitarget.h +++ b/src/aarch64/ffitarget.h @@ -54,10 +54,4 @@ typedef enum ffi_abi #define FFI_EXTRA_CIF_FIELDS unsigned aarch64_flags #endif -#define AARCH64_FFI_WITH_V_BIT 0 - -#define AARCH64_N_XREG 32 -#define AARCH64_N_VREG 32 -#define AARCH64_CALL_CONTEXT_SIZE (AARCH64_N_XREG * 8 + AARCH64_N_VREG * 16) - #endif diff --git a/src/aarch64/internal.h b/src/aarch64/internal.h new file mode 100644 index 0000000..b6b6104 --- /dev/null +++ b/src/aarch64/internal.h @@ -0,0 +1,26 @@ +/* +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +``Software''), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#define AARCH64_FLAG_ARG_V_BIT 0 +#define AARCH64_FLAG_ARG_V (1 << AARCH64_FLAG_ARG_V_BIT) + +#define N_X_ARG_REG 8 +#define N_V_ARG_REG 8 +#define CALL_CONTEXT_SIZE (N_V_ARG_REG * 16 + N_X_ARG_REG * 8 + 16) diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S index 169eab8..70870db 100644 --- a/src/aarch64/sysv.S +++ b/src/aarch64/sysv.S @@ -22,6 +22,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define LIBFFI_ASM #include #include +#include "internal.h" #ifdef HAVE_MACHINE_ASM_H #include @@ -43,13 +44,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg .text + .align 2 + .globl CNAME(ffi_call_SYSV) #ifdef __ELF__ .type CNAME(ffi_call_SYSV), #function #endif -#ifdef __APPLE__ - .align 2 -#endif /* ffi_call_SYSV() @@ -142,42 +142,40 @@ CNAME(ffi_call_SYSV): mov x23, x0 /* Figure out if we should touch the vector registers. */ - tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f + tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f /* Load the vector argument passing registers. */ - ldp q0, q1, [x21, #8*32 + 0] - ldp q2, q3, [x21, #8*32 + 32] - ldp q4, q5, [x21, #8*32 + 64] - ldp q6, q7, [x21, #8*32 + 96] + ldp q0, q1, [x21, #0] + ldp q2, q3, [x21, #32] + ldp q4, q5, [x21, #64] + ldp q6, q7, [x21, #96] 1: - /* Load the core argument passing registers. */ - ldp x0, x1, [x21, #0] - ldp x2, x3, [x21, #16] - ldp x4, x5, [x21, #32] - ldp x6, x7, [x21, #48] - - /* Don't forget x8 which may be holding the address of a return buffer. - */ - ldr x8, [x21, #8*8] + /* Load the core argument passing registers, including + the structure return pointer. */ + ldp x0, x1, [x21, #16*N_V_ARG_REG + 0] + ldp x2, x3, [x21, #16*N_V_ARG_REG + 16] + ldp x4, x5, [x21, #16*N_V_ARG_REG + 32] + ldp x6, x7, [x21, #16*N_V_ARG_REG + 48] + ldr x8, [x21, #16*N_V_ARG_REG + 64] blr x24 /* Save the core argument passing registers. */ - stp x0, x1, [x21, #0] - stp x2, x3, [x21, #16] - stp x4, x5, [x21, #32] - stp x6, x7, [x21, #48] + stp x0, x1, [x21, #16*N_V_ARG_REG + 0] + stp x2, x3, [x21, #16*N_V_ARG_REG + 16] + stp x4, x5, [x21, #16*N_V_ARG_REG + 32] + stp x6, x7, [x21, #16*N_V_ARG_REG + 48] /* Note nothing useful ever comes back in x8! */ /* Figure out if we should touch the vector registers. */ - tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f + tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f /* Save the vector argument passing registers. */ - stp q0, q1, [x21, #8*32 + 0] - stp q2, q3, [x21, #8*32 + 32] - stp q4, q5, [x21, #8*32 + 64] - stp q6, q7, [x21, #8*32 + 96] + stp q0, q1, [x21, #0] + stp q2, q3, [x21, #32] + stp q4, q5, [x21, #64] + stp q6, q7, [x21, #96] 1: /* All done, unwind our stack frame. */ ldp x21, x22, [x29, # - ffi_call_SYSV_FS] @@ -203,7 +201,7 @@ CNAME(ffi_call_SYSV): .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV) #endif -#define ffi_closure_SYSV_FS (8 * 2 + AARCH64_CALL_CONTEXT_SIZE) +#define ffi_closure_SYSV_FS (8 * 2 + CALL_CONTEXT_SIZE) /* ffi_closure_SYSV @@ -243,10 +241,9 @@ CNAME(ffi_call_SYSV): Voila! */ .text - .globl CNAME(ffi_closure_SYSV) -#ifdef __APPLE__ .align 2 -#endif + + .globl CNAME(ffi_closure_SYSV) .cfi_startproc CNAME(ffi_closure_SYSV): stp x29, x30, [sp, #-16]! @@ -268,24 +265,23 @@ CNAME(ffi_closure_SYSV): /* Preserve our struct trampoline_data * */ mov x22, x17 - /* Save the rest of the argument passing registers. */ - stp x0, x1, [x21, #0] - stp x2, x3, [x21, #16] - stp x4, x5, [x21, #32] - stp x6, x7, [x21, #48] - /* Don't forget we may have been given a result scratch pad address. - */ - str x8, [x21, #64] + /* Save the rest of the argument passing registers, including + the structure return pointer. */ + stp x0, x1, [x21, #16*N_V_ARG_REG + 0] + stp x2, x3, [x21, #16*N_V_ARG_REG + 16] + stp x4, x5, [x21, #16*N_V_ARG_REG + 32] + stp x6, x7, [x21, #16*N_V_ARG_REG + 48] + str x8, [x21, #16*N_V_ARG_REG + 64] /* Figure out if we should touch the vector registers. */ ldr x0, [x22, #8] - tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f + tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f /* Save the argument passing vector registers. */ - stp q0, q1, [x21, #8*32 + 0] - stp q2, q3, [x21, #8*32 + 32] - stp q4, q5, [x21, #8*32 + 64] - stp q6, q7, [x21, #8*32 + 96] + stp q0, q1, [x21, #0] + stp q2, q3, [x21, #32] + stp q4, q5, [x21, #64] + stp q6, q7, [x21, #96] 1: /* Load &ffi_closure.. */ ldr x0, [x22, #0] @@ -298,19 +294,19 @@ CNAME(ffi_closure_SYSV): /* Figure out if we should touch the vector registers. */ ldr x0, [x22, #8] - tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f + tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f /* Load the result passing vector registers. */ - ldp q0, q1, [x21, #8*32 + 0] - ldp q2, q3, [x21, #8*32 + 32] - ldp q4, q5, [x21, #8*32 + 64] - ldp q6, q7, [x21, #8*32 + 96] + ldp q0, q1, [x21, #0] + ldp q2, q3, [x21, #32] + ldp q4, q5, [x21, #64] + ldp q6, q7, [x21, #96] 1: /* Load the result passing core registers. */ - ldp x0, x1, [x21, #0] - ldp x2, x3, [x21, #16] - ldp x4, x5, [x21, #32] - ldp x6, x7, [x21, #48] + ldp x0, x1, [x21, #16*N_V_ARG_REG + 0] + ldp x2, x3, [x21, #16*N_V_ARG_REG + 16] + ldp x4, x5, [x21, #16*N_V_ARG_REG + 32] + ldp x6, x7, [x21, #16*N_V_ARG_REG + 48] /* Note nothing useful is returned in x8. */ /* We are done, unwind our frame. */ -- 1.9.3