public inbox for libffi-discuss@sourceware.org
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: libffi-discuss@sourceware.org
Cc: davem@davemloft.net
Subject: [PATCH 4/8] sparc: Preprocess float point struct return
Date: Tue, 28 Oct 2014 19:46:00 -0000	[thread overview]
Message-ID: <1414525555-21256-5-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1414525555-21256-1-git-send-email-rth@twiddle.net>

We can eliminate recursion and speed structure return
by flattening a nested structure tree into a bitmask.
---
 src/sparc/ffi64.c    | 126 ++++++++++++++++++++++++++++++++++++++++-----------
 src/sparc/internal.h |   2 +
 src/sparc/v9.S       |  11 ++---
 3 files changed, 106 insertions(+), 33 deletions(-)

diff --git a/src/sparc/ffi64.c b/src/sparc/ffi64.c
index 7ed928d..65ae438 100644
--- a/src/sparc/ffi64.c
+++ b/src/sparc/ffi64.c
@@ -42,41 +42,103 @@
 #endif
 
 #ifdef SPARC64
-/* Perform machine dependent cif processing */
 
-int FFI_HIDDEN
-ffi_v9_layout_struct (ffi_type *arg, int off, void *d, void *si, void *sf)
+/* Flatten the contents of a structure to the parts that are passed in
+   floating point registers.  The return is a bit mask wherein bit N
+   set means bytes [4*n, 4*n+3] are passed in %fN.
+
+   We encode both the (running) size (maximum 32) and mask (maxumum 255)
+   into one integer.  The size is placed in the low byte, so that align
+   and addition work correctly.  The mask is placed in the second byte.  */
+
+static int
+ffi_struct_float_mask (ffi_type *struct_type, int size_mask)
 {
   ffi_type **elts, *t;
 
-  for (elts = arg->elements; (t = *elts) != NULL; elts++)
+  for (elts = struct_type->elements; (t = *elts) != NULL; elts++)
     {
       size_t z = t->size;
-      void *src = si;
+      int o, m;
 
-      off = ALIGN(off, t->alignment);
+      size_mask = ALIGN(size_mask, t->alignment);
       switch (t->type)
 	{
 	case FFI_TYPE_STRUCT:
-	  off = ffi_v9_layout_struct(t, off, d, si, sf);
-	  off = ALIGN(off, FFI_SIZEOF_ARG);
+	  size_mask = ffi_struct_float_mask (t, size_mask);
+	  size_mask = ALIGN(size_mask, FFI_SIZEOF_ARG);
 	  continue;
 	case FFI_TYPE_FLOAT:
 	case FFI_TYPE_DOUBLE:
 	case FFI_TYPE_LONGDOUBLE:
-	  /* Note that closures start with the argument offset,
-	     so that we know when to stop looking at fp regs.  */
-	  if (off < 128)
-	    src = sf;
+	  m = (1 << (z / 4)) - 1;	/* compute mask for type */
+	  o = (size_mask >> 2) & 0x3f;	/* extract word offset */
+	  size_mask |= m << (o + 8);	/* insert mask into place */
 	  break;
 	}
-      memcpy(d + off, src + off, z);
-      off += z;
+      size_mask += z;
+    }
+
+  size_mask = ALIGN(size_mask, struct_type->alignment);
+  FFI_ASSERT ((size_mask & 0xff) == struct_type->size);
+
+  return size_mask;
+}
+
+/* Merge floating point data into integer data.  If the structure is
+   entirely floating point, simply return a pointer to the fp data.  */
+
+static void *
+ffi_struct_float_merge (int size_mask, void *vi, void *vf)
+{
+  int size = size_mask & 0xff;
+  int mask = size_mask >> 8;
+  int n = size >> 2;
+
+  if (mask == 0)
+    return vi;
+  else if (mask == (1 << n) - 1)
+    return vf;
+  else
+    {
+      unsigned int *wi = vi, *wf = vf;
+      int i;
+
+      for (i = 0; i < n; ++i)
+	if ((mask >> i) & 1)
+	  wi[i] = wf[i];
+
+      return vi;
     }
+}
+
+/* Similar, but place the data into VD in the end.  */
 
-  return off;
+void FFI_HIDDEN
+ffi_struct_float_copy (int size_mask, void *vd, void *vi, void *vf)
+{
+  int size = size_mask & 0xff;
+  int mask = size_mask >> 8;
+  int n = size >> 2;
+
+  if (mask == 0)
+    ;
+  else if (mask == (1 << n) - 1)
+    vi = vf;
+  else
+    {
+      unsigned int *wd = vd, *wi = vi, *wf = vf;
+      int i;
+
+      for (i = 0; i < n; ++i)
+	wd[i] = ((mask >> i) & 1 ? wf : wi)[i];
+      return;
+    }
+  memcpy (vd, vi, size);
 }
 
+/* Perform machine dependent cif processing */
+
 ffi_status FFI_HIDDEN
 ffi_prep_cif_machdep(ffi_cif *cif)
 {
@@ -108,7 +170,10 @@ ffi_prep_cif_machdep(ffi_cif *cif)
 	  bytes = 8;
 	}
       else
-	flags = SPARC_RET_STRUCT;
+	{
+	  flags = ffi_struct_float_mask (rtype, 0) << SPARC_FLTMASK_SHIFT;
+	  flags |= SPARC_RET_STRUCT;
+	}
       break;
 
     case FFI_TYPE_SINT8:
@@ -343,7 +408,7 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
   ffi_cif *cif;
   ffi_type **arg_types;
   void **avalue;
-  int i, argn, nargs, flags;
+  int i, argn, argx, nargs, flags;
 
   cif = closure->cif;
   arg_types = cif->arg_types;
@@ -364,12 +429,13 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
     argn = 0;
 
   /* Grab the addresses of the arguments from the stack frame.  */
-  for (i = 0; i < nargs; i++)
+  for (i = 0; i < nargs; i++, argn = argx)
     {
       ffi_type *ty = arg_types[i];
-      void *a = &gpr[argn++];
+      void *a = &gpr[argn];
       size_t z;
 
+      argx = argn + 1;
       switch (ty->type)
 	{
 	case FFI_TYPE_STRUCT:
@@ -378,25 +444,31 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
 	    a = *(void **)a;
 	  else
 	    {
-	      if (--argn < 16)
-	        ffi_v9_layout_struct(arg_types[i], 8*argn, gpr, gpr, fpr);
-	      argn += ALIGN (z, 8) / 8;
+	      argx = argn + ALIGN (z, 8) / 8;
+	      if (argn < 16)
+		{
+		  int size_mask = ffi_struct_float_mask (ty, 0);
+		  int argn_mask = (0xffff00 >> argn) & 0xff00;
+
+		  /* Eliminate fp registers off the end.  */
+		  size_mask = (size_mask & 0xff) | (size_mask & argn_mask);
+		  a = ffi_struct_float_merge (size_mask, gpr+argn, fpr+argn);
+		}
 	    }
 	  break;
 
 	case FFI_TYPE_LONGDOUBLE:
-	  if (--argn & 1)
-	    argn++;
+	  argn = ALIGN (argn, 2);
 	  a = (argn < 16 ? fpr : gpr) + argn;
-	  argn += 2;
+	  argx = argn + 2;
 	  break;
 	case FFI_TYPE_DOUBLE:
 	  if (argn <= 16)
-	    a = fpr + argn - 1;
+	    a = fpr + argn;
 	  break;
 	case FFI_TYPE_FLOAT:
 	  if (argn <= 16)
-	    a = fpr + argn - 1;
+	    a = fpr + argn;
 	  a += 4;
 	  break;
 
diff --git a/src/sparc/internal.h b/src/sparc/internal.h
index df7c305..3018928 100644
--- a/src/sparc/internal.h
+++ b/src/sparc/internal.h
@@ -16,3 +16,5 @@
 #define SPARC_FLAG_RET_MASK	15
 #define SPARC_FLAG_RET_IN_MEM	32
 #define SPARC_FLAG_FP_ARGS	64
+
+#define SPARC_FLTMASK_SHIFT	8
diff --git a/src/sparc/v9.S b/src/sparc/v9.S
index e2fe036..3d91f2d 100644
--- a/src/sparc/v9.S
+++ b/src/sparc/v9.S
@@ -177,12 +177,11 @@ E 15
 	std	%f6, [%l2+56]
 
 	! Copy the structure into place.
-	ldx	[%i0+16], %o0			! load rtype from cif
-	mov	0, %o1				! load off
-	mov	%i2, %o2			! load dst
-	mov	%l2, %o3			! load src_int
-	call	C(ffi_v9_layout_struct)
-	 add	%l2, 32, %o4			! load src_fp
+	srl	%l0, SPARC_FLTMASK_SHIFT, %o0	! load size_mask
+	mov	%i2, %o1			! load dst
+	mov	%l2, %o2			! load src_gp
+	call	C(ffi_struct_float_copy)
+	 add	%l2, 32, %o3			! load src_fp
 
 	return	%i7+8
 	 nop
-- 
1.9.3

  parent reply	other threads:[~2014-10-28 19:46 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-28 19:46 [PATCH 0/8] Go closures for Sparc Richard Henderson
2014-10-28 19:46 ` [PATCH 3/8] sparc: Rewrite everything Richard Henderson
2014-10-29 18:10   ` David Miller
2014-10-29 20:01     ` Richard Henderson
2014-10-29 20:11       ` David Miller
2014-10-29 20:44         ` Richard Henderson
2014-10-30  4:52           ` David Miller
2014-10-28 19:46 ` [PATCH 1/8] sparc: Eliminate long double ifdefs Richard Henderson
2014-10-28 19:46 ` Richard Henderson [this message]
2014-10-28 19:46 ` [PATCH 2/8] sparc: Tidy up symbols Richard Henderson
2014-10-28 19:46 ` [PATCH 8/8] sparc: Re-add abi compliant structure support Richard Henderson
2014-10-28 19:46 ` [PATCH 5/8] sparc: Handle more cases of structure return directly Richard Henderson
2014-10-28 19:46 ` [PATCH 7/8] sparc: Add support for Go closures Richard Henderson
2014-10-28 19:46 ` [PATCH 6/8] sparc: Add support for complex types Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1414525555-21256-5-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=davem@davemloft.net \
    --cc=libffi-discuss@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).