[PATCH 05/16] aarch64: Reduce the size of register_context

public inbox for libffi-discuss@sourceware.org
 help / color / mirror / Atom feed

From: Richard Henderson <rth@twiddle.net>
To: libffi-discuss@sourceware.org
Cc: Richard Henderson <rth@redhat.com>
Subject: [PATCH 05/16] aarch64: Reduce the size of register_context
Date: Tue, 28 Oct 2014 18:54:00 -0000	[thread overview]
Message-ID: <1414522393-19169-6-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1414522393-19169-1-git-send-email-rth@twiddle.net>

From: Richard Henderson <rth@redhat.com>

We don't need to store 32 general and vector registers.
Only 8 of each are used for parameter passing.
---
 src/aarch64/ffi.c       |  35 ++++++++---------
 src/aarch64/ffitarget.h |   6 ---
 src/aarch64/internal.h  |  26 +++++++++++++
 src/aarch64/sysv.S      | 100 +++++++++++++++++++++++-------------------------
 4 files changed, 91 insertions(+), 76 deletions(-)
 create mode 100644 src/aarch64/internal.h

diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
index a6fcc11..58d088b 100644
--- a/src/aarch64/ffi.c
+++ b/src/aarch64/ffi.c
@@ -21,8 +21,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdint.h>
 #include <ffi.h>
 #include <ffi_common.h>
+#include "internal.h"
 
 /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
    all further uses in this file will refer to the 128-bit type.  */
@@ -35,38 +37,35 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 # define FFI_TYPE_LONGDOUBLE 4
 #endif
 
-#define N_X_ARG_REG 8
-#define N_V_ARG_REG 8
-
-#define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT)
-
 union _d
 {
   UINT64 d;
   UINT32 s[2];
 };
 
+struct _v
+{
+  union _d d[2] __attribute__((aligned(16)));
+};
+
 struct call_context
 {
-  UINT64 x [AARCH64_N_XREG];
-  struct
-  {
-    union _d d[2];
-  } v [AARCH64_N_VREG];
+  struct _v v[N_V_ARG_REG];
+  UINT64 x[N_X_ARG_REG];
+  UINT64 x8;
 };
 
 #if defined (__clang__) && defined (__APPLE__)
-extern void
-sys_icache_invalidate (void *start, size_t len);
+extern void sys_icache_invalidate (void *start, size_t len);
 #endif
 
 static inline void
 ffi_clear_cache (void *start, void *end)
 {
 #if defined (__clang__) && defined (__APPLE__)
-	sys_icache_invalidate (start, (char *)end - (char *)start);
+  sys_icache_invalidate (start, (char *)end - (char *)start);
 #elif defined (__GNUC__)
-	__builtin___clear_cache (start, end);
+  __builtin___clear_cache (start, end);
 #else
 #error "Missing builtin to flush instruction cache"
 #endif
@@ -802,7 +801,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
 
   if (is_v_register_candidate (cif->rtype))
     {
-      cif->aarch64_flags |= AARCH64_FFI_WITH_V;
+      cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
     }
   else
     {
@@ -810,7 +809,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
       for (i = 0; i < cif->nargs; i++)
         if (is_v_register_candidate (cif->arg_types[i]))
           {
-            cif->aarch64_flags |= AARCH64_FFI_WITH_V;
+            cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
             break;
           }
     }
@@ -924,7 +923,7 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
           }
         else
           {
-            memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64));
+	    context.x8 = (uintptr_t)rvalue;
             ffi_call_SYSV (aarch64_prep_args, &context, &ecif,
 			   stack_bytes, fn);
           }
@@ -1201,7 +1200,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
     }
   else
     {
-      memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64));
+      rvalue = (void *)(uintptr_t)context->x8;
       (closure->fun) (cif, rvalue, avalue, closure->user_data);
     }
 }
diff --git a/src/aarch64/ffitarget.h b/src/aarch64/ffitarget.h
index 4bbced2..336f28a 100644
--- a/src/aarch64/ffitarget.h
+++ b/src/aarch64/ffitarget.h
@@ -54,10 +54,4 @@ typedef enum ffi_abi
 #define FFI_EXTRA_CIF_FIELDS unsigned aarch64_flags
 #endif
 
-#define AARCH64_FFI_WITH_V_BIT 0
-
-#define AARCH64_N_XREG 32
-#define AARCH64_N_VREG 32
-#define AARCH64_CALL_CONTEXT_SIZE (AARCH64_N_XREG * 8 + AARCH64_N_VREG * 16)
-
 #endif
diff --git a/src/aarch64/internal.h b/src/aarch64/internal.h
new file mode 100644
index 0000000..b6b6104
--- /dev/null
+++ b/src/aarch64/internal.h
@@ -0,0 +1,26 @@
+/* 
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#define AARCH64_FLAG_ARG_V_BIT	0
+#define AARCH64_FLAG_ARG_V	(1 << AARCH64_FLAG_ARG_V_BIT)
+
+#define N_X_ARG_REG		8
+#define N_V_ARG_REG		8
+#define CALL_CONTEXT_SIZE	(N_V_ARG_REG * 16 + N_X_ARG_REG * 8 + 16)
diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S
index 169eab8..70870db 100644
--- a/src/aarch64/sysv.S
+++ b/src/aarch64/sysv.S
@@ -22,6 +22,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 #define LIBFFI_ASM
 #include <fficonfig.h>
 #include <ffi.h>
+#include "internal.h"
 
 #ifdef HAVE_MACHINE_ASM_H
 #include <machine/asm.h>
@@ -43,13 +44,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 #define cfi_def_cfa_register(reg)	.cfi_def_cfa_register reg
 
         .text
+        .align 2
+
         .globl CNAME(ffi_call_SYSV)
 #ifdef __ELF__
         .type CNAME(ffi_call_SYSV), #function
 #endif
-#ifdef __APPLE__
-        .align 2
-#endif
 
 /* ffi_call_SYSV()
 
@@ -142,42 +142,40 @@ CNAME(ffi_call_SYSV):
         mov     x23, x0
 
         /* Figure out if we should touch the vector registers.  */
-        tbz     x23, #AARCH64_FFI_WITH_V_BIT, 1f
+        tbz     x23, #AARCH64_FLAG_ARG_V_BIT, 1f
 
         /* Load the vector argument passing registers.  */
-        ldp     q0, q1, [x21, #8*32 +  0]
-        ldp     q2, q3, [x21, #8*32 + 32]
-        ldp     q4, q5, [x21, #8*32 + 64]
-        ldp     q6, q7, [x21, #8*32 + 96]
+        ldp     q0, q1, [x21, #0]
+        ldp     q2, q3, [x21, #32]
+        ldp     q4, q5, [x21, #64]
+        ldp     q6, q7, [x21, #96]
 1:
-        /* Load the core argument passing registers.  */
-        ldp     x0, x1, [x21,  #0]
-        ldp     x2, x3, [x21, #16]
-        ldp     x4, x5, [x21, #32]
-        ldp     x6, x7, [x21, #48]
-
-        /* Don't forget x8 which may be holding the address of a return buffer.
-	 */
-        ldr     x8,     [x21, #8*8]
+        /* Load the core argument passing registers, including
+	   the structure return pointer.  */
+        ldp     x0, x1, [x21, #16*N_V_ARG_REG + 0]
+        ldp     x2, x3, [x21, #16*N_V_ARG_REG + 16]
+        ldp     x4, x5, [x21, #16*N_V_ARG_REG + 32]
+        ldp     x6, x7, [x21, #16*N_V_ARG_REG + 48]
+        ldr     x8,     [x21, #16*N_V_ARG_REG + 64]
 
         blr     x24
 
         /* Save the core argument passing registers.  */
-        stp     x0, x1, [x21,  #0]
-        stp     x2, x3, [x21, #16]
-        stp     x4, x5, [x21, #32]
-        stp     x6, x7, [x21, #48]
+        stp     x0, x1, [x21, #16*N_V_ARG_REG + 0]
+        stp     x2, x3, [x21, #16*N_V_ARG_REG + 16]
+        stp     x4, x5, [x21, #16*N_V_ARG_REG + 32]
+        stp     x6, x7, [x21, #16*N_V_ARG_REG + 48]
 
         /* Note nothing useful ever comes back in x8!  */
 
         /* Figure out if we should touch the vector registers.  */
-        tbz     x23, #AARCH64_FFI_WITH_V_BIT, 1f
+        tbz     x23, #AARCH64_FLAG_ARG_V_BIT, 1f
 
         /* Save the vector argument passing registers.  */
-        stp     q0, q1, [x21, #8*32 + 0]
-        stp     q2, q3, [x21, #8*32 + 32]
-        stp     q4, q5, [x21, #8*32 + 64]
-        stp     q6, q7, [x21, #8*32 + 96]
+        stp     q0, q1, [x21, #0]
+        stp     q2, q3, [x21, #32]
+        stp     q4, q5, [x21, #64]
+        stp     q6, q7, [x21, #96]
 1:
         /* All done, unwind our stack frame.  */
         ldp     x21, x22, [x29,  # - ffi_call_SYSV_FS]
@@ -203,7 +201,7 @@ CNAME(ffi_call_SYSV):
         .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
 #endif
 
-#define ffi_closure_SYSV_FS (8 * 2 + AARCH64_CALL_CONTEXT_SIZE)
+#define ffi_closure_SYSV_FS (8 * 2 + CALL_CONTEXT_SIZE)
 
 /* ffi_closure_SYSV
 
@@ -243,10 +241,9 @@ CNAME(ffi_call_SYSV):
    Voila!  */
 
         .text
-        .globl CNAME(ffi_closure_SYSV)
-#ifdef __APPLE__
         .align 2
-#endif
+
+        .globl CNAME(ffi_closure_SYSV)
         .cfi_startproc
 CNAME(ffi_closure_SYSV):
         stp     x29, x30, [sp, #-16]!
@@ -268,24 +265,23 @@ CNAME(ffi_closure_SYSV):
         /* Preserve our struct trampoline_data *  */
         mov     x22, x17
 
-        /* Save the rest of the argument passing registers.  */
-        stp     x0, x1, [x21, #0]
-        stp     x2, x3, [x21, #16]
-        stp     x4, x5, [x21, #32]
-        stp     x6, x7, [x21, #48]
-        /* Don't forget we may have been given a result scratch pad address.
-	 */
-        str     x8,     [x21, #64]
+        /* Save the rest of the argument passing registers, including
+	   the structure return pointer.  */
+        stp     x0, x1, [x21, #16*N_V_ARG_REG + 0]
+        stp     x2, x3, [x21, #16*N_V_ARG_REG + 16]
+        stp     x4, x5, [x21, #16*N_V_ARG_REG + 32]
+        stp     x6, x7, [x21, #16*N_V_ARG_REG + 48]
+        str     x8,     [x21, #16*N_V_ARG_REG + 64]
 
         /* Figure out if we should touch the vector registers.  */
         ldr     x0, [x22, #8]
-        tbz     x0, #AARCH64_FFI_WITH_V_BIT, 1f
+        tbz     x0, #AARCH64_FLAG_ARG_V_BIT, 1f
 
         /* Save the argument passing vector registers.  */
-        stp     q0, q1, [x21, #8*32 + 0]
-        stp     q2, q3, [x21, #8*32 + 32]
-        stp     q4, q5, [x21, #8*32 + 64]
-        stp     q6, q7, [x21, #8*32 + 96]
+        stp     q0, q1, [x21, #0]
+        stp     q2, q3, [x21, #32]
+        stp     q4, q5, [x21, #64]
+        stp     q6, q7, [x21, #96]
 1:
         /* Load &ffi_closure..  */
         ldr     x0, [x22, #0]
@@ -298,19 +294,19 @@ CNAME(ffi_closure_SYSV):
 
         /* Figure out if we should touch the vector registers.  */
         ldr     x0, [x22, #8]
-        tbz     x0, #AARCH64_FFI_WITH_V_BIT, 1f
+        tbz     x0, #AARCH64_FLAG_ARG_V_BIT, 1f
 
         /* Load the result passing vector registers.  */
-        ldp     q0, q1, [x21, #8*32 + 0]
-        ldp     q2, q3, [x21, #8*32 + 32]
-        ldp     q4, q5, [x21, #8*32 + 64]
-        ldp     q6, q7, [x21, #8*32 + 96]
+        ldp     q0, q1, [x21, #0]
+        ldp     q2, q3, [x21, #32]
+        ldp     q4, q5, [x21, #64]
+        ldp     q6, q7, [x21, #96]
 1:
         /* Load the result passing core registers.  */
-        ldp     x0, x1, [x21,  #0]
-        ldp     x2, x3, [x21, #16]
-        ldp     x4, x5, [x21, #32]
-        ldp     x6, x7, [x21, #48]
+        ldp     x0, x1, [x21, #16*N_V_ARG_REG + 0]
+        ldp     x2, x3, [x21, #16*N_V_ARG_REG + 16]
+        ldp     x4, x5, [x21, #16*N_V_ARG_REG + 32]
+        ldp     x6, x7, [x21, #16*N_V_ARG_REG + 48]
         /* Note nothing useful is returned in x8.  */
 
         /* We are done, unwind our frame.  */
-- 
1.9.3

next prev parent reply	other threads:[~2014-10-28 18:54 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-28 18:53 [PATCH 00/16] Go closures for aarch64 Richard Henderson
2014-10-28 18:53 ` [PATCH 03/16] aarch64: Always distinguish LONGDOUBLE Richard Henderson
2014-10-28 18:53 ` [PATCH 01/16] aarch64: Fix non-apple compilation Richard Henderson
2014-10-28 18:54 ` [PATCH 12/16] aarch64: Unify scalar fp and hfa handling Richard Henderson
2014-10-28 18:54 ` [PATCH 11/16] aarch64: Move return value handling into ffi_closure_SYSV Richard Henderson
2014-10-28 18:54 ` [PATCH 08/16] aarch64: Tidy up abi manipulation Richard Henderson
2017-09-17 14:24   ` Andreas Schwab
2014-10-28 18:54 ` [PATCH 14/16] aarch64: Add support for complex types Richard Henderson
2014-10-28 18:54 ` [PATCH 15/16] aarch64: Move x8 out of call_context Richard Henderson
2014-10-28 18:54 ` [PATCH 10/16] aarch64: Move return value handling into ffi_call_SYSV Richard Henderson
2014-10-28 18:54 ` [PATCH 04/16] aarch64: Simplify AARCH64_STACK_ALIGN Richard Henderson
2014-10-28 18:54 ` [PATCH 07/16] aarch64: Treat void return as not passed in registers Richard Henderson
2014-10-28 18:54 ` Richard Henderson [this message]
2014-10-28 18:54 ` [PATCH 13/16] aarch64: Remove aarch64_flags Richard Henderson
2014-10-28 18:54 ` [PATCH 09/16] aarch64: Merge prep_args with ffi_call Richard Henderson
2014-10-28 18:54 ` [PATCH 16/16] aarch64: Add support for Go closures Richard Henderson
2014-10-28 18:54 ` [PATCH 02/16] aarch64: Improve is_hfa Richard Henderson
2014-10-28 18:54 ` [PATCH 06/16] aarch64: Use correct return registers Richard Henderson
2014-11-10 10:12 ` [PATCH 00/16] Go closures for aarch64 James Greenhalgh
2014-11-10 13:22   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1414522393-19169-6-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=libffi-discuss@sourceware.org \
    --cc=rth@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).