public inbox for libffi-discuss@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 1/8] sparc: Eliminate long double ifdefs
  2014-10-28 19:46 [PATCH 0/8] Go closures for Sparc Richard Henderson
                   ` (2 preceding siblings ...)
  2014-10-28 19:46 ` [PATCH 2/8] sparc: Tidy up symbols Richard Henderson
@ 2014-10-28 19:46 ` Richard Henderson
  2014-10-28 19:46 ` [PATCH 4/8] sparc: Preprocess float point struct return Richard Henderson
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2014-10-28 19:46 UTC (permalink / raw)
  To: libffi-discuss; +Cc: davem

---
 src/sparc/ffi.c | 46 ++++++++++++++++------------------------------
 1 file changed, 16 insertions(+), 30 deletions(-)

diff --git a/src/sparc/ffi.c b/src/sparc/ffi.c
index 9f0fded..c2a0d14 100644
--- a/src/sparc/ffi.c
+++ b/src/sparc/ffi.c
@@ -27,9 +27,19 @@
 
 #include <ffi.h>
 #include <ffi_common.h>
-
 #include <stdlib.h>
 
+/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
+   all further uses in this file will refer to the 128-bit type.  */
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+# if FFI_TYPE_LONGDOUBLE != 4
+#  error FFI_TYPE_LONGDOUBLE out of date
+# endif
+#else
+# undef FFI_TYPE_LONGDOUBLE
+# define FFI_TYPE_LONGDOUBLE 4
+#endif
+
 
 /* ffi_prep_args is called by the assembly routine once stack space
    has been allocated for the function's arguments */
@@ -72,10 +82,7 @@ void ffi_prep_args_v8(char *stack, extended_cif *ecif)
       size_t z;
 
 	  if ((*p_arg)->type == FFI_TYPE_STRUCT
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
-	      || (*p_arg)->type == FFI_TYPE_LONGDOUBLE
-#endif
-	      )
+	      || (*p_arg)->type == FFI_TYPE_LONGDOUBLE)
 	    {
 	      *(unsigned int *) argp = (unsigned long)(* p_argv);
 	      z = sizeof(int);
@@ -176,9 +183,7 @@ int ffi_prep_args_v9(char *stack, extended_cif *ecif)
 	  /* FALLTHROUGH */
 	case FFI_TYPE_FLOAT:
 	case FFI_TYPE_DOUBLE:
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
 	case FFI_TYPE_LONGDOUBLE:
-#endif
 	  ret = 1; /* We should promote into FP regs as well as integer.  */
 	  break;
 	}
@@ -296,9 +301,7 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
     case FFI_TYPE_VOID:
     case FFI_TYPE_FLOAT:
     case FFI_TYPE_DOUBLE:
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
     case FFI_TYPE_LONGDOUBLE:
-#endif
       cif->flags = cif->rtype->type;
       break;
 
@@ -351,9 +354,7 @@ int ffi_v9_layout_struct(ffi_type *arg, int off, char *ret, char *intg, char *fl
 	  break;
 	case FFI_TYPE_FLOAT:
 	case FFI_TYPE_DOUBLE:
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
 	case FFI_TYPE_LONGDOUBLE:
-#endif
 	  memmove(ret + off, flt + off, (*ptr)->size);
 	  off += (*ptr)->size;
 	  break;
@@ -412,10 +413,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
       FFI_ASSERT(0);
 #else
       if (rvalue && (cif->rtype->type == FFI_TYPE_STRUCT
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
-	  || cif->flags == FFI_TYPE_LONGDOUBLE
-#endif
-	  ))
+	  || cif->flags == FFI_TYPE_LONGDOUBLE))
 	{
 	  /* For v8, we need an "unimp" with size of returning struct */
 	  /* behind "call", so we alloc some executable space for it. */
@@ -551,11 +549,7 @@ ffi_closure_sparc_inner_v8(ffi_closure *closure,
 
   /* Copy the caller's structure return address so that the closure
      returns the data directly to the caller.  */
-  if (cif->flags == FFI_TYPE_STRUCT
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE  
-      || cif->flags == FFI_TYPE_LONGDOUBLE
-#endif
-     )
+  if (cif->flags == FFI_TYPE_STRUCT || cif->flags == FFI_TYPE_LONGDOUBLE)
     rvalue = (void *) gpr[0];
 
   /* Always skip the structure return address.  */
@@ -565,10 +559,7 @@ ffi_closure_sparc_inner_v8(ffi_closure *closure,
   for (i = 0; i < cif->nargs; i++)
     {
       if (arg_types[i]->type == FFI_TYPE_STRUCT
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
-	  || arg_types[i]->type == FFI_TYPE_LONGDOUBLE
-#endif
-         )
+	  || arg_types[i]->type == FFI_TYPE_LONGDOUBLE)
 	{
 	  /* Straight copy of invisible reference.  */
 	  avalue[i] = (void *)gpr[argn++];
@@ -656,17 +647,12 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure,
 	  argn += ALIGN(arg_types[i]->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
 
 	  /* Align on a 16-byte boundary.  */
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
 	  if (arg_types[i]->type == FFI_TYPE_LONGDOUBLE && (argn % 2) != 0)
 	    argn++;
-#endif
 	  if (i < fp_slot_max
 	      && (arg_types[i]->type == FFI_TYPE_FLOAT
 		  || arg_types[i]->type == FFI_TYPE_DOUBLE
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
-		  || arg_types[i]->type == FFI_TYPE_LONGDOUBLE
-#endif
-		  ))
+		  || arg_types[i]->type == FFI_TYPE_LONGDOUBLE))
 	    avalue[i] = ((char *) &fpr[argn]) - arg_types[i]->size;
 	  else
 	    avalue[i] = ((char *) &gpr[argn]) - arg_types[i]->size;
-- 
1.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 8/8] sparc: Re-add abi compliant structure support
  2014-10-28 19:46 [PATCH 0/8] Go closures for Sparc Richard Henderson
@ 2014-10-28 19:46 ` Richard Henderson
  2014-10-28 19:46 ` [PATCH 3/8] sparc: Rewrite everything Richard Henderson
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2014-10-28 19:46 UTC (permalink / raw)
  To: libffi-discuss; +Cc: davem

The original code, removed in the "rewrite" patch, was incorrect for
large structures, and required dynamic allocation of a trampoline on
every ffi_call.

Instead, allocate a 4k entry table of all possible structure returns.
The table is 80k, but is read-only and dynamically paged, which ought
to be better than allocating the trampoline.

This is difficult to test with gcc.  One can only use -O0 at present.
See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63668.
---
 src/sparc/ffi.c      | 19 +++++++++++++------
 src/sparc/ffi64.c    |  2 +-
 src/sparc/internal.h |  2 +-
 src/sparc/v8.S       | 29 ++++++++++++++++++++++-------
 src/sparc/v9.S       |  2 +-
 5 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/src/sparc/ffi.c b/src/sparc/ffi.c
index 19c3586..d5212d8 100644
--- a/src/sparc/ffi.c
+++ b/src/sparc/ffi.c
@@ -66,7 +66,8 @@ ffi_prep_cif_machdep(ffi_cif *cif)
       break;
     case FFI_TYPE_LONGDOUBLE:
     case FFI_TYPE_STRUCT:
-      flags = SPARC_RET_STRUCT;
+      flags = (rtype->size & 0xfff) << SPARC_SIZEMASK_SHIFT;
+      flags |= SPARC_RET_STRUCT;
       break;
     case FFI_TYPE_SINT8:
       flags = SPARC_RET_SINT8;
@@ -187,7 +188,7 @@ ffi_prep_args_v8(ffi_cif *cif, unsigned long *argp, void *rvalue, void **avalue)
 
   if (rvalue == NULL)
     {
-      if (flags == SPARC_RET_STRUCT)
+      if ((flags & SPARC_FLAG_RET_MASK) == SPARC_RET_STRUCT)
 	{
 	  /* Since we pass the pointer to the callee, we need a value.
 	     We allowed for this space in ffi_call, before ffi_call_v8
@@ -290,7 +291,8 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
 
   /* If we've not got a return value, we need to create one if we've
      got to pass the return value to the callee.  Otherwise ignore it.  */
-  if (rvalue == NULL && cif->flags == SPARC_RET_STRUCT)
+  if (rvalue == NULL
+      && (cif->flags & SPARC_FLAG_RET_MASK) == SPARC_RET_STRUCT)
     bytes += ALIGN (cif->rtype->size, 8);
 
   ffi_call_v8(cif, fn, rvalue, avalue, -bytes, closure);
@@ -382,9 +384,14 @@ ffi_closure_sparc_inner_v8(ffi_cif *cif,
   avalue = alloca(nargs * sizeof(void *));
 
   /* Copy the caller's structure return address so that the closure
-     returns the data directly to the caller.  */
-  if (flags == SPARC_RET_STRUCT)
-    rvalue = (void *)*argp;
+     returns the data directly to the caller.  Also install it so we
+     can return the address in %o0.  */
+  if ((flags & SPARC_FLAG_RET_MASK) == SPARC_RET_STRUCT)
+    {
+      void *new_rvalue = (void *)*argp;
+      *(void **)rvalue = new_rvalue;
+      rvalue = new_rvalue;
+    }
 
   /* Always skip the structure return address.  */
   argp++;
diff --git a/src/sparc/ffi64.c b/src/sparc/ffi64.c
index 02f3d75..a4e41d2 100644
--- a/src/sparc/ffi64.c
+++ b/src/sparc/ffi64.c
@@ -197,7 +197,7 @@ ffi_prep_cif_machdep(ffi_cif *cif)
 	  int all_mask = (1 << word_size) - 1;
 	  int fp_mask = size_mask >> 8;
 
-	  flags = (size_mask << SPARC_FLTMASK_SHIFT) | SPARC_RET_STRUCT;
+	  flags = (size_mask << SPARC_SIZEMASK_SHIFT) | SPARC_RET_STRUCT;
 
 	  /* For special cases of all-int or all-fp, we can return
 	     the value directly without popping through a struct copy.  */
diff --git a/src/sparc/internal.h b/src/sparc/internal.h
index f9387d4..0a66472 100644
--- a/src/sparc/internal.h
+++ b/src/sparc/internal.h
@@ -23,4 +23,4 @@
 #define SPARC_FLAG_RET_IN_MEM	32
 #define SPARC_FLAG_FP_ARGS	64
 
-#define SPARC_FLTMASK_SHIFT	8
+#define SPARC_SIZEMASK_SHIFT	8
diff --git a/src/sparc/v8.S b/src/sparc/v8.S
index 66cf76f..3a811ef 100644
--- a/src/sparc/v8.S
+++ b/src/sparc/v8.S
@@ -91,6 +91,7 @@ C(ffi_call_v8):
 
 	add	%sp, 32, %sp		! deallocate prep frame
 	and	%o0, SPARC_FLAG_RET_MASK, %l0	! save return type
+	srl	%o0, SPARC_SIZEMASK_SHIFT, %l1	! save return size
 	ld	[%sp+64+4], %o0		! load all argument registers
 	ld	[%sp+64+8], %o1
 	ld	[%sp+64+12], %o2
@@ -182,22 +183,35 @@ E SPARC_RET_F_1
 	ret
 	 restore
 
-	! Struct returning functions expect and skip the unimp here.
 	.align	8
-8:	call	%i1
-	 mov	%i5, %g2		! load static chain
-	unimp	4
+9:	sth	%o0, [%i2]
 	ret
 	 restore
-
 	.align	8
-9:	sth	%o0, [%i2]
+10:	stb	%o0, [%i2]
 	ret
 	 restore
+
+	! Struct returning functions expect and skip the unimp here.
+	! To make it worse, conforming callees examine the unimp and
+	! make sure the low 12 bits of the unimp match the size of
+	! the struct being returned.
 	.align	8
-10:	stb	%o0, [%i2]
+8:	call	1f				! load pc in %o7
+	 sll	%l1, 2, %l0			! size * 4
+1:	sll	%l1, 4, %l1			! size * 16
+	add	%l0, %l1, %l0			! size * 20
+	add	%o7, %l0, %o7			! o7 = 0b + size*20
+	jmp	%o7+(2f-8b)
+	 mov	%i5, %g2			! load static chain
+2:
+.rept	0x1000
+	call	%i1
+	 nop
+	unimp	(. - 2b) / 20
 	ret
 	 restore
+.endr
 
 	cfi_endproc
 	.size	C(ffi_call_v8),. - C(ffi_call_v8)
@@ -275,6 +289,7 @@ E SPARC_RET_VOID
 	ret
 	 restore
 E SPARC_RET_STRUCT
+	ld	[%i2], %i0
 	jmp	%i7+12
 	 restore
 E SPARC_RET_UINT8
diff --git a/src/sparc/v9.S b/src/sparc/v9.S
index d848f9a..52732d3 100644
--- a/src/sparc/v9.S
+++ b/src/sparc/v9.S
@@ -188,7 +188,7 @@ E SPARC_RET_F_1
 	std	%f6, [%l2+56]
 
 	! Copy the structure into place.
-	srl	%l0, SPARC_FLTMASK_SHIFT, %o0	! load size_mask
+	srl	%l0, SPARC_SIZEMASK_SHIFT, %o0	! load size_mask
 	mov	%i2, %o1			! load dst
 	mov	%l2, %o2			! load src_gp
 	call	C(ffi_struct_float_copy)
-- 
1.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 6/8] sparc: Add support for complex types
  2014-10-28 19:46 [PATCH 0/8] Go closures for Sparc Richard Henderson
                   ` (5 preceding siblings ...)
  2014-10-28 19:46 ` [PATCH 7/8] sparc: Add support for Go closures Richard Henderson
@ 2014-10-28 19:46 ` Richard Henderson
  2014-10-28 19:46 ` [PATCH 5/8] sparc: Handle more cases of structure return directly Richard Henderson
  7 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2014-10-28 19:46 UTC (permalink / raw)
  To: libffi-discuss; +Cc: davem

---
 src/sparc/ffi.c                     |  92 +++++++++++++++++++++++++--
 src/sparc/ffi64.c                   |  48 ++++++++++++---
 src/sparc/ffitarget.h               |   1 +
 src/sparc/internal.h                |  10 +--
 src/sparc/v8.S                      | 120 ++++++++++++++++++++++++------------
 src/sparc/v9.S                      |   8 +--
 testsuite/libffi.call/call.exp      |  12 ++--
 testsuite/libffi.call/complex_int.c |   4 +-
 8 files changed, 223 insertions(+), 72 deletions(-)

diff --git a/src/sparc/ffi.c b/src/sparc/ffi.c
index 1b8f48e..d319c03 100644
--- a/src/sparc/ffi.c
+++ b/src/sparc/ffi.c
@@ -90,6 +90,40 @@ ffi_prep_cif_machdep(ffi_cif *cif)
     case FFI_TYPE_UINT64:
       flags = SPARC_RET_INT64;
       break;
+    case FFI_TYPE_COMPLEX:
+      rtt = rtype->elements[0]->type;
+      switch (rtt)
+	{
+	case FFI_TYPE_FLOAT:
+	  flags = SPARC_RET_F_2;
+	  break;
+	case FFI_TYPE_DOUBLE:
+	  flags = SPARC_RET_F_4;
+	  break;
+	case FFI_TYPE_LONGDOUBLE:
+	  flags = SPARC_RET_F_8;
+	  break;
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+	  flags = SPARC_RET_INT128;
+	  break;
+	case FFI_TYPE_INT:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT32:
+	  flags = SPARC_RET_INT64;
+	  break;
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT16:
+	  flags = SP_V8_RET_CPLX16;
+	  break;
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT8:
+	  flags = SP_V8_RET_CPLX8;
+	  break;
+	default:
+	  abort();
+	}
+      break;
     default:
       abort();
     }
@@ -102,11 +136,24 @@ ffi_prep_cif_machdep(ffi_cif *cif)
       size_t z = ty->size;
       int tt = ty->type;
 
-      if (tt == FFI_TYPE_STRUCT || tt == FFI_TYPE_LONGDOUBLE)
-	/* Passed by reference.  */
-	z = 4;
-      else
-	z = ALIGN(z, 4);
+      switch (tt)
+	{
+	case FFI_TYPE_STRUCT:
+	case FFI_TYPE_LONGDOUBLE:
+	by_reference:
+	  /* Passed by reference.  */
+	  z = 4;
+	  break;
+
+	case FFI_TYPE_COMPLEX:
+	  tt = ty->elements[0]->type;
+	  if (tt == FFI_TYPE_FLOAT || z > 8)
+	    goto by_reference;
+	  /* FALLTHRU */
+
+	default:
+	  z = ALIGN(z, 4);
+	}
       bytes += z;
     }
 
@@ -169,11 +216,14 @@ ffi_prep_args_v8(ffi_cif *cif, unsigned long *argp, void *rvalue, void **avalue)
     {
       ffi_type *ty = p_arg[i];
       void *a = avalue[i];
+      int tt = ty->type;
+      size_t z;
 
-      switch (ty->type)
+      switch (tt)
 	{
 	case FFI_TYPE_STRUCT:
 	case FFI_TYPE_LONGDOUBLE:
+	by_reference:
 	  *argp++ = (unsigned long)a;
 	  break;
 
@@ -205,6 +255,23 @@ ffi_prep_args_v8(ffi_cif *cif, unsigned long *argp, void *rvalue, void **avalue)
 	  *argp++ = *(SINT16 *)a;
 	  break;
 
+        case FFI_TYPE_COMPLEX:
+	  tt = ty->elements[0]->type;
+	  z = ty->size;
+	  if (tt == FFI_TYPE_FLOAT || z > 8)
+	    goto by_reference;
+	  if (z < 4)
+	    {
+	      memcpy((char *)argp + 4 - z, a, z);
+	      argp++;
+	    }
+	  else
+	    {
+	      memcpy(argp, a, z);
+	      argp += z / 4;
+	    }
+	  break;
+
 	default:
 	  abort();
 	}
@@ -299,11 +366,13 @@ ffi_closure_sparc_inner_v8(ffi_closure *closure, void *rvalue,
       ffi_type *ty = arg_types[i];
       int tt = ty->type;
       void *a = argp;
+      size_t z;
 
       switch (tt)
 	{
 	case FFI_TYPE_STRUCT:
 	case FFI_TYPE_LONGDOUBLE:
+	by_reference:
 	  /* Straight copy of invisible reference.  */
 	  a = (void *)*argp;
 	  break;
@@ -336,6 +405,17 @@ ffi_closure_sparc_inner_v8(ffi_closure *closure, void *rvalue,
 	  a += 3;
 	  break;
 
+        case FFI_TYPE_COMPLEX:
+	  tt = ty->elements[0]->type;
+	  z = ty->size;
+	  if (tt == FFI_TYPE_FLOAT || z > 8)
+	    goto by_reference;
+	  if (z < 4)
+	    a += 4 - z;
+	  else if (z > 4)
+	    argp++;
+	  break;
+
 	default:
 	  abort();
 	}
diff --git a/src/sparc/ffi64.c b/src/sparc/ffi64.c
index ab3ed09..1e2d3f4 100644
--- a/src/sparc/ffi64.c
+++ b/src/sparc/ffi64.c
@@ -52,14 +52,28 @@
    and addition work correctly.  The mask is placed in the second byte.  */
 
 static int
-ffi_struct_float_mask (ffi_type *struct_type, int size_mask)
+ffi_struct_float_mask (ffi_type *outer_type, int size_mask)
 {
-  ffi_type **elts, *t;
+  ffi_type **elts;
+  ffi_type *t;
 
-  for (elts = struct_type->elements; (t = *elts) != NULL; elts++)
+  if (outer_type->type == FFI_TYPE_COMPLEX)
+    {
+      int m = 0, tt = outer_type->elements[0]->type;
+      size_t z = outer_type->size;
+
+      if (tt == FFI_TYPE_FLOAT
+	  || tt == FFI_TYPE_DOUBLE
+	  || tt == FFI_TYPE_LONGDOUBLE)
+        m = (1 << (z / 4)) - 1;
+      return (m << 8) | z;
+    }
+  FFI_ASSERT (outer_type->type == FFI_TYPE_STRUCT);
+
+  for (elts = outer_type->elements; (t = *elts) != NULL; elts++)
     {
       size_t z = t->size;
-      int o, m;
+      int o, m, tt;
 
       size_mask = ALIGN(size_mask, t->alignment);
       switch (t->type)
@@ -67,6 +81,13 @@ ffi_struct_float_mask (ffi_type *struct_type, int size_mask)
 	case FFI_TYPE_STRUCT:
 	  size_mask = ffi_struct_float_mask (t, size_mask);
 	  continue;
+	case FFI_TYPE_COMPLEX:
+	  tt = t->elements[0]->type;
+	  if (tt != FFI_TYPE_FLOAT
+	      && tt != FFI_TYPE_DOUBLE
+	      && tt != FFI_TYPE_LONGDOUBLE)
+	    break;
+	  /* FALLTHRU */
 	case FFI_TYPE_FLOAT:
 	case FFI_TYPE_DOUBLE:
 	case FFI_TYPE_LONGDOUBLE:
@@ -78,8 +99,8 @@ ffi_struct_float_mask (ffi_type *struct_type, int size_mask)
       size_mask += z;
     }
 
-  size_mask = ALIGN(size_mask, struct_type->alignment);
-  FFI_ASSERT ((size_mask & 0xff) == struct_type->size);
+  size_mask = ALIGN(size_mask, outer_type->alignment);
+  FFI_ASSERT ((size_mask & 0xff) == outer_type->size);
 
   return size_mask;
 }
@@ -162,6 +183,7 @@ ffi_prep_cif_machdep(ffi_cif *cif)
       flags = SPARC_RET_F_4;
       break;
 
+    case FFI_TYPE_COMPLEX:
     case FFI_TYPE_STRUCT:
       if (rtype->size > 32)
 	{
@@ -194,7 +216,7 @@ ffi_prep_cif_machdep(ffi_cif *cif)
 	      {
 	      case 1: flags = SPARC_RET_F_1; break;
 	      case 2: flags = SPARC_RET_F_2; break;
-	      case 3: flags = SPARC_RET_F_3; break;
+	      case 3: flags = SP_V9_RET_F_3; break;
 	      case 4: flags = SPARC_RET_F_4; break;
 	      /* 5 word structures skipped; handled via RET_STRUCT.  */
 	      case 6: flags = SPARC_RET_F_6; break;
@@ -218,7 +240,7 @@ ffi_prep_cif_machdep(ffi_cif *cif)
       break;
     case FFI_TYPE_INT:
     case FFI_TYPE_SINT32:
-      flags = SPARC_RET_SINT32;
+      flags = SP_V9_RET_SINT32;
       break;
     case FFI_TYPE_UINT32:
       flags = SPARC_RET_UINT32;
@@ -242,6 +264,7 @@ ffi_prep_cif_machdep(ffi_cif *cif)
 
       switch (ty->type)
 	{
+	case FFI_TYPE_COMPLEX:
 	case FFI_TYPE_STRUCT:
 	  /* Large structs passed by reference.  */
 	  if (z > 16)
@@ -249,7 +272,12 @@ ffi_prep_cif_machdep(ffi_cif *cif)
 	      a = z = 8;
 	      break;
 	    }
-	  /* ??? FALLTHRU -- check for fp members in the struct.  */
+	  /* Small structs may be passed in integer or fp regs or both.  */
+	  if (bytes >= 16*8)
+	    break;
+	  if ((ffi_struct_float_mask (ty, 0) & 0xff00) == 0)
+	    break;
+	  /* FALLTHRU */
 	case FFI_TYPE_FLOAT:
 	case FFI_TYPE_DOUBLE:
 	case FFI_TYPE_LONGDOUBLE:
@@ -351,6 +379,7 @@ ffi_prep_args_v9(ffi_cif *cif, unsigned long *argp, void *rvalue, void **avalue)
 	  break;
 
 	case FFI_TYPE_LONGDOUBLE:
+	case FFI_TYPE_COMPLEX:
 	case FFI_TYPE_STRUCT:
 	  z = ty->size;
 	  if (z > 16)
@@ -466,6 +495,7 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
       argx = argn + 1;
       switch (ty->type)
 	{
+	case FFI_TYPE_COMPLEX:
 	case FFI_TYPE_STRUCT:
 	  z = ty->size;
 	  if (z > 16)
diff --git a/src/sparc/ffitarget.h b/src/sparc/ffitarget.h
index ff4dc0b..f70c937 100644
--- a/src/sparc/ffitarget.h
+++ b/src/sparc/ffitarget.h
@@ -58,6 +58,7 @@ typedef enum ffi_abi {
 #endif
 
 #define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
+#define FFI_TARGET_HAS_COMPLEX_TYPE
 
 /* ---- Definitions for closures ----------------------------------------- */
 
diff --git a/src/sparc/internal.h b/src/sparc/internal.h
index b4494d9..f9387d4 100644
--- a/src/sparc/internal.h
+++ b/src/sparc/internal.h
@@ -5,16 +5,18 @@
 #define SPARC_RET_UINT16	4
 #define SPARC_RET_SINT16	5
 #define SPARC_RET_UINT32	6
-#define SPARC_RET_SINT32	7	/* v9 only */
+#define SP_V9_RET_SINT32	7	/* v9 only */
+#define SP_V8_RET_CPLX16	7	/* v8 only */
 #define SPARC_RET_INT64		8
-#define SPARC_RET_INT128	9	/* v9 only */
+#define SPARC_RET_INT128	9
 
 /* Note that F_7 is missing, and is handled by SPARC_RET_STRUCT.  */
 #define SPARC_RET_F_8		10
-#define SPARC_RET_F_6		11	/* v9 only */
+#define SPARC_RET_F_6		11
 #define SPARC_RET_F_4		12
 #define SPARC_RET_F_2		13
-#define SPARC_RET_F_3		14	/* v9 only */
+#define SP_V9_RET_F_3		14	/* v9 only */
+#define SP_V8_RET_CPLX8		14	/* v8 only */
 #define SPARC_RET_F_1		15
 
 #define SPARC_FLAG_RET_MASK	15
diff --git a/src/sparc/v8.S b/src/sparc/v8.S
index 4adcf6d..e76d813 100644
--- a/src/sparc/v8.S
+++ b/src/sparc/v8.S
@@ -1,8 +1,8 @@
 /* -----------------------------------------------------------------------
    v8.S - Copyright (c) 2013  The Written Word, Inc.
 	  Copyright (c) 1996, 1997, 2003, 2004, 2008  Red Hat, Inc.
-   
-   SPARC Foreign Function Interface 
+
+   SPARC Foreign Function Interface
 
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
@@ -25,7 +25,7 @@
    DEALINGS IN THE SOFTWARE.
    ----------------------------------------------------------------------- */
 
-#define LIBFFI_ASM	
+#define LIBFFI_ASM
 #include <fficonfig.h>
 #include <ffi.h>
 #include <ffi_cfi.h>
@@ -45,7 +45,7 @@
 
 	.text
 
-#ifndef __GNUC__	
+#ifndef __GNUC__
         .align 8
 	.globl	C(ffi_flush_icache)
 	.type	C(ffi_flush_icache),@function
@@ -75,7 +75,7 @@ C(ffi_flush_icache):
 	.globl	C(ffi_call_v8)
 	.type	C(ffi_call_v8),@function
 	FFI_HIDDEN(C(ffi_call_v8))
-	
+
 C(ffi_call_v8):
 	cfi_startproc
 	! Allocate a stack frame sized by ffi_call.
@@ -139,26 +139,44 @@ E SPARC_RET_UINT32
 7:	st	%o0, [%i2]
 	ret
 	 restore
-E SPARC_RET_SINT32
-	unimp
+E SP_V8_RET_CPLX16
+	sth	%o0, [%i2+2]
+	b	9f
+	 srl	%o0, 16, %o0
 E SPARC_RET_INT64
-	std	%o0, [%i2]
+	st	%o0, [%i2]
+	st	%o1, [%i2+4]
 	ret
 	 restore
 E SPARC_RET_INT128
-	unimp
+	std	%o0, [%i2]
+	std	%o2, [%i2+8]
+	ret
+	 restore
 E SPARC_RET_F_8
-	unimp
+	st	%f7, [%i2+7*4]
+	nop
+	st	%f6, [%i2+6*4]
+	nop
 E SPARC_RET_F_6
-	unimp
+	st	%f5, [%i2+5*4]
+	nop
+	st	%f4, [%i2+4*4]
+	nop
 E SPARC_RET_F_4
-	unimp
+	st	%f3, [%i2+3*4]
+	nop
+	st	%f2, [%i2+2*4]
+	nop
 E SPARC_RET_F_2
-	std	%f0, [%i2]
+	st	%f1, [%i2+4]
+	st	%f0, [%i2]
 	ret
 	 restore
-E SPARC_RET_F_3
-	unimp
+E SP_V8_RET_CPLX8
+	stb	%o0, [%i2+1]
+	b	10f
+	 srl	%o0, 8, %o0
 E SPARC_RET_F_1
 	st	%f0, [%i2]
 	ret
@@ -172,16 +190,22 @@ E SPARC_RET_F_1
 	ret
 	 restore
 
+	.align	8
+9:	sth	%o0, [%i2]
+	ret
+	 restore
+	.align	8
+10:	stb	%o0, [%i2]
+	ret
+	 restore
+
 	cfi_endproc
 	.size	C(ffi_call_v8),. - C(ffi_call_v8)
 
 
-#undef STACKFRAME
-#define	STACKFRAME	104	/* 16*4 register window +
-				   1*4 struct return +	
-				   6*4 args backing store +
-				   2*4 return storage +
-				   1*4 alignment */
+/* 16*4 register window + 1*4 struct return + 6*4 args backing store
+   + 8*4 return storage + 1*4 alignment.  */
+#define	STACKFRAME	(16*4 + 4 + 6*4 + 8*4 + 4)
 
 /* ffi_closure_v8(...)
 
@@ -211,7 +235,7 @@ C(ffi_closure_v8):
 
 	! Call ffi_closure_sparc_inner to do the bulk of the work.
 	mov	%g2, %o0
-	add	%fp, -8, %o1
+	add	%fp, -8*4, %o1
 	call	ffi_closure_sparc_inner_v8
 	 add	%fp,  64, %o2
 
@@ -220,8 +244,8 @@ C(ffi_closure_v8):
 1:	sll	%o0, 4, %o0	! o0 = o0 * 16
 	add	%o7, %o0, %o7	! o7 = 0b + o0*16
 	jmp	%o7+(2f-0b)
-	 nop
-
+	 add	%fp, -8*4, %i2
+	 
 	! Note that each entry is 4 insns, enforced by the E macro.
 	.align	16
 2:
@@ -232,47 +256,63 @@ E SPARC_RET_STRUCT
 	jmp	%i7+12
 	 restore
 E SPARC_RET_UINT8
-	ldub	[%fp-8+3], %i0
+	ldub	[%i2+3], %i0
 	ret
 	 restore
 E SPARC_RET_SINT8
-	ldsb	[%fp-8+3], %i0
+	ldsb	[%i2+3], %i0
 	ret
 	 restore
 E SPARC_RET_UINT16
-	lduh	[%fp-8+2], %i0
+	lduh	[%i2+2], %i0
 	ret
 	 restore
 E SPARC_RET_SINT16
-	ldsh	[%fp-8+2], %i0
+	ldsh	[%i2+2], %i0
 	ret
 	 restore
 E SPARC_RET_UINT32
-	ld	[%fp-8], %i0
+	ld	[%i2], %i0
+	ret
+	 restore
+E SP_V8_RET_CPLX16
+	ld	[%i2], %i0
 	ret
 	 restore
-E SPARC_RET_SINT32
-	unimp
 E SPARC_RET_INT64
-	ldd	[%fp-8], %i0
+	ldd	[%i2], %i0
 	ret
 	 restore
 E SPARC_RET_INT128
-	unimp
+	ldd	[%i2], %i0
+	ldd	[%i2+8], %i2
+	ret
+	 restore
 E SPARC_RET_F_8
-	unimp
+	ld	[%i2+7*4], %f7
+	nop
+	ld	[%i2+6*4], %f6
+	nop
 E SPARC_RET_F_6
-	unimp
+	ld	[%i2+5*4], %f5
+	nop
+	ld	[%i2+4*4], %f4
+	nop
 E SPARC_RET_F_4
-	unimp
+	ld	[%i2+3*4], %f3
+	nop
+	ld	[%i2+2*4], %f2
+	nop
 E SPARC_RET_F_2
-	ldd	[%fp-8], %f0
+	ldd	[%i2], %f0
+	ret
+	 restore
+E SP_V8_RET_CPLX8
+	lduh	[%i2], %i0
 	ret
 	 restore
-E SPARC_RET_F_3
-	unimp
 E SPARC_RET_F_1
-	ld	[%fp-8], %f0
+	ld	[%i2], %f0
 	ret
 	 restore
 
diff --git a/src/sparc/v9.S b/src/sparc/v9.S
index d893d2f..5c3f27b 100644
--- a/src/sparc/v9.S
+++ b/src/sparc/v9.S
@@ -137,7 +137,7 @@ E SPARC_RET_UINT32
 	srl	%o0, 0, %i0
 	return	%i7+8
 	 stx	%o0, [%o2]
-E SPARC_RET_SINT32
+E SP_V9_RET_SINT32
 	sra	%o0, 0, %i0
 	return	%i7+8
 	 stx	%o0, [%o2]
@@ -167,7 +167,7 @@ E SPARC_RET_F_4
 E SPARC_RET_F_2
 	return	%i7+8
 	 std	%f0, [%o2]
-E SPARC_RET_F_3
+E SP_V9_RET_F_3
 	st	%f2, [%i2+2*4]
 	nop
 	st	%f1, [%i2+1*4]
@@ -294,7 +294,7 @@ E SPARC_RET_UINT32
 	lduw	[FP-160+4], %i0
 	return	%i7+8
 	 nop
-E SPARC_RET_SINT32
+E SP_V9_RET_SINT32
 	ldsw	[FP-160+4], %i0
 	return	%i7+8
 	 nop
@@ -326,7 +326,7 @@ E SPARC_RET_F_2
 	ldd	[FP-160], %f0
 	return	%i7+8
 	 nop
-E SPARC_RET_F_3
+E SP_V9_RET_F_3
 	ld	[FP-160+2*4], %f2
 	nop
 	ld	[FP-160+1*4], %f1
diff --git a/testsuite/libffi.call/call.exp b/testsuite/libffi.call/call.exp
index 5177f07..f144a65 100644
--- a/testsuite/libffi.call/call.exp
+++ b/testsuite/libffi.call/call.exp
@@ -24,16 +24,14 @@ set ctlist [lsearch -inline -all -glob [lsort [glob -nocomplain -- $srcdir/$subd
 
 run-many-tests $tlist ""
 
-if { ![istarget s390*] } {
-
+# ??? We really should preprocess ffi.h and grep
+# for FFI_TARGET_HAS_COMPLEX_TYPE.
+if { [istarget s390*] || [istarget sparc*] } {
+    run-many-tests $ctlist ""
+} else {
     foreach test $ctlist {
 	unsupported "$test"
     }
-
-} else {
-
-  run-many-tests $ctlist ""
-
 }
 
 dg-finish
diff --git a/testsuite/libffi.call/complex_int.c b/testsuite/libffi.call/complex_int.c
index 4c8e864..bac3190 100644
--- a/testsuite/libffi.call/complex_int.c
+++ b/testsuite/libffi.call/complex_int.c
@@ -12,9 +12,9 @@
 
 _Complex int f_complex(_Complex int c, int x, int *py)
 {
-  c = -(2 * creal (c)) + (cimag (c) + 1)* I;
+  __real__ c = -2 * __real__ c;
+  __imag__ c = __imag__ c + 1;
   *py += x;
-
   return c;
 }
 
-- 
1.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 7/8] sparc: Add support for Go closures
  2014-10-28 19:46 [PATCH 0/8] Go closures for Sparc Richard Henderson
                   ` (4 preceding siblings ...)
  2014-10-28 19:46 ` [PATCH 4/8] sparc: Preprocess float point struct return Richard Henderson
@ 2014-10-28 19:46 ` Richard Henderson
  2014-10-28 19:46 ` [PATCH 6/8] sparc: Add support for complex types Richard Henderson
  2014-10-28 19:46 ` [PATCH 5/8] sparc: Handle more cases of structure return directly Richard Henderson
  7 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2014-10-28 19:46 UTC (permalink / raw)
  To: libffi-discuss; +Cc: davem

---
 src/sparc/ffi.c       | 45 +++++++++++++++++++++++++++++++++++++--------
 src/sparc/ffi64.c     | 45 +++++++++++++++++++++++++++++++++++++--------
 src/sparc/ffitarget.h |  1 +
 src/sparc/v8.S        | 38 ++++++++++++++++++++++++++++++--------
 src/sparc/v9.S        | 33 ++++++++++++++++++++++++++++-----
 5 files changed, 133 insertions(+), 29 deletions(-)

diff --git a/src/sparc/ffi.c b/src/sparc/ffi.c
index d319c03..19c3586 100644
--- a/src/sparc/ffi.c
+++ b/src/sparc/ffi.c
@@ -176,7 +176,7 @@ ffi_prep_cif_machdep(ffi_cif *cif)
 }
 
 extern void ffi_call_v8(ffi_cif *cif, void (*fn)(void), void *rvalue,
-			void **avalue, size_t bytes) FFI_HIDDEN;
+			void **avalue, size_t bytes, void *closure) FFI_HIDDEN;
 
 int FFI_HIDDEN
 ffi_prep_args_v8(ffi_cif *cif, unsigned long *argp, void *rvalue, void **avalue)
@@ -280,8 +280,9 @@ ffi_prep_args_v8(ffi_cif *cif, unsigned long *argp, void *rvalue, void **avalue)
   return flags;
 }
 
-void
-ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+	      void **avalue, void *closure)
 {
   size_t bytes = cif->bytes;
 
@@ -292,7 +293,20 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
   if (rvalue == NULL && cif->flags == SPARC_RET_STRUCT)
     bytes += ALIGN (cif->rtype->size, 8);
 
-  ffi_call_v8(cif, fn, rvalue, avalue, -bytes);
+  ffi_call_v8(cif, fn, rvalue, avalue, -bytes, closure);
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+	     void **avalue, void *closure)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, closure);
 }
 
 #ifdef __GNUC__
@@ -308,6 +322,7 @@ extern void ffi_flush_icache (void *) FFI_HIDDEN;
 #endif
 
 extern void ffi_closure_v8(void) FFI_HIDDEN;
+extern void ffi_go_closure_v8(void) FFI_HIDDEN;
 
 ffi_status
 ffi_prep_closure_loc (ffi_closure *closure,
@@ -337,16 +352,30 @@ ffi_prep_closure_loc (ffi_closure *closure,
   return FFI_OK;
 }
 
+ffi_status
+ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif *cif,
+		     void (*fun)(ffi_cif*, void*, void**, void*))
+{
+  if (cif->abi != FFI_V8)
+    return FFI_BAD_ABI;
+
+  closure->tramp = ffi_go_closure_v8;
+  closure->cif = cif;
+  closure->fun = fun;
+
+  return FFI_OK;
+}
+
 int FFI_HIDDEN
-ffi_closure_sparc_inner_v8(ffi_closure *closure, void *rvalue,
+ffi_closure_sparc_inner_v8(ffi_cif *cif, 
+			   void (*fun)(ffi_cif*, void*, void**, void*),
+			   void *user_data, void *rvalue,
 			   unsigned long *argp)
 {
-  ffi_cif *cif;
   ffi_type **arg_types;
   void **avalue;
   int i, nargs, flags;
 
-  cif = closure->cif;
   arg_types = cif->arg_types;
   nargs = cif->nargs;
   flags = cif->flags;
@@ -424,7 +453,7 @@ ffi_closure_sparc_inner_v8(ffi_closure *closure, void *rvalue,
     }
 
   /* Invoke the closure.  */
-  (closure->fun) (cif, rvalue, avalue, closure->user_data);
+  fun (cif, rvalue, avalue, user_data);
 
   /* Tell ffi_closure_sparc how to perform return type promotions.  */
   return flags;
diff --git a/src/sparc/ffi64.c b/src/sparc/ffi64.c
index 1e2d3f4..02f3d75 100644
--- a/src/sparc/ffi64.c
+++ b/src/sparc/ffi64.c
@@ -305,7 +305,7 @@ ffi_prep_cif_machdep(ffi_cif *cif)
 }
 
 extern void ffi_call_v9(ffi_cif *cif, void (*fn)(void), void *rvalue,
-			void **avalue, size_t bytes) FFI_HIDDEN;
+			void **avalue, size_t bytes, void *closure) FFI_HIDDEN;
 
 /* ffi_prep_args is called by the assembly routine once stack space
    has been allocated for the function's arguments */
@@ -402,8 +402,9 @@ ffi_prep_args_v9(ffi_cif *cif, unsigned long *argp, void *rvalue, void **avalue)
   return flags;
 }
 
-void
-ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+static void
+ffi_call_int(ffi_cif *cif, void (*fn)(void), void *rvalue,
+	     void **avalue, void *closure)
 {
   size_t bytes = cif->bytes;
 
@@ -412,7 +413,20 @@ ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
   if (rvalue == NULL && (cif->flags & SPARC_FLAG_RET_IN_MEM))
     bytes += ALIGN (cif->rtype->size, 16);
 
-  ffi_call_v9(cif, fn, rvalue, avalue, -bytes);
+  ffi_call_v9(cif, fn, rvalue, avalue, -bytes, closure);
+}
+
+void
+ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  ffi_call_int(cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go(ffi_cif *cif, void (*fn)(void), void *rvalue,
+	    void **avalue, void *closure)
+{
+  ffi_call_int(cif, fn, rvalue, avalue, closure);
 }
 
 #ifdef __GNUC__
@@ -426,6 +440,7 @@ extern void ffi_flush_icache (void *) FFI_HIDDEN;
 #endif
 
 extern void ffi_closure_v9(void) FFI_HIDDEN;
+extern void ffi_go_closure_v9(void) FFI_HIDDEN;
 
 ffi_status
 ffi_prep_closure_loc (ffi_closure* closure,
@@ -458,16 +473,30 @@ ffi_prep_closure_loc (ffi_closure* closure,
   return FFI_OK;
 }
 
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+		     void (*fun)(ffi_cif*, void*, void**, void*))
+{
+  if (cif->abi != FFI_V9)
+    return FFI_BAD_ABI;
+
+  closure->tramp = ffi_go_closure_v9;
+  closure->cif = cif;
+  closure->fun = fun;
+
+  return FFI_OK;
+}
+
 int FFI_HIDDEN
-ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
+ffi_closure_sparc_inner_v9(ffi_cif *cif,
+			   void (*fun)(ffi_cif*, void*, void**, void*),
+			   void *user_data, void *rvalue,
 			   unsigned long *gpr, unsigned long *fpr)
 {
-  ffi_cif *cif;
   ffi_type **arg_types;
   void **avalue;
   int i, argn, argx, nargs, flags;
 
-  cif = closure->cif;
   arg_types = cif->arg_types;
   nargs = cif->nargs;
   flags = cif->flags;
@@ -555,7 +584,7 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
     }
 
   /* Invoke the closure.  */
-  (closure->fun) (cif, rvalue, avalue, closure->user_data);
+  fun (cif, rvalue, avalue, user_data);
 
   /* Tell ffi_closure_sparc how to perform return type promotions.  */
   return flags;
diff --git a/src/sparc/ffitarget.h b/src/sparc/ffitarget.h
index f70c937..6982903 100644
--- a/src/sparc/ffitarget.h
+++ b/src/sparc/ffitarget.h
@@ -63,6 +63,7 @@ typedef enum ffi_abi {
 /* ---- Definitions for closures ----------------------------------------- */
 
 #define FFI_CLOSURES 1
+#define FFI_GO_CLOSURES 1
 #define FFI_NATIVE_RAW_API 0
 
 #ifdef SPARC64
diff --git a/src/sparc/v8.S b/src/sparc/v8.S
index e76d813..66cf76f 100644
--- a/src/sparc/v8.S
+++ b/src/sparc/v8.S
@@ -102,7 +102,7 @@ C(ffi_call_v8):
 
 	! Call foreign function
 	call	%i1
-	 nop
+	 mov	%i5, %g2		! load static chain
 
 0:	call	1f		! load pc in %o7
 	 sll	%l0, 4, %l0
@@ -185,7 +185,7 @@ E SPARC_RET_F_1
 	! Struct returning functions expect and skip the unimp here.
 	.align	8
 8:	call	%i1
-	 nop
+	 mov	%i5, %g2		! load static chain
 	unimp	4
 	ret
 	 restore
@@ -211,20 +211,43 @@ E SPARC_RET_F_1
 
    Receives the closure argument in %g2.   */
 
+#ifdef HAVE_AS_REGISTER_PSEUDO_OP
+	.register	%g2, #scratch
+#endif
+
+	.align 8
+	.globl	C(ffi_go_closure_v8)
+	.type	C(ffi_go_closure_v8),@function
+	FFI_HIDDEN(C(ffi_go_closure_v8))
+
+C(ffi_go_closure_v8):
+	cfi_startproc
+	save	%sp, -STACKFRAME, %sp
+	cfi_def_cfa_register(%fp)
+	cfi_window_save
+
+	ld	[%g2+4], %o0			! load cif
+	ld	[%g2+8], %o1			! load fun
+	b	0f
+	 mov	%g2, %o2			! load user_data
+	cfi_endproc
+	.size	C(ffi_go_closure_v8), . - C(ffi_go_closure_v8)
+
 	.align 8
 	.globl	C(ffi_closure_v8)
 	.type	C(ffi_closure_v8),@function
 	FFI_HIDDEN(C(ffi_closure_v8))
 
 C(ffi_closure_v8):
-#ifdef HAVE_AS_REGISTER_PSEUDO_OP
-		.register	%g2, #scratch
-#endif
 	cfi_startproc
 	save	%sp, -STACKFRAME, %sp
 	cfi_def_cfa_register(%fp)
 	cfi_window_save
 
+	ld	[%g2+FFI_TRAMPOLINE_SIZE], %o0		! load cif
+	ld	[%g2+FFI_TRAMPOLINE_SIZE+4], %o1	! load fun
+	ld	[%g2+FFI_TRAMPOLINE_SIZE+8], %o2	! load user_data
+0:
 	! Store all of the potential argument registers in va_list format.
 	st	%i0, [%fp+68+0]
 	st	%i1, [%fp+68+4]
@@ -234,10 +257,9 @@ C(ffi_closure_v8):
 	st	%i5, [%fp+68+20]
 
 	! Call ffi_closure_sparc_inner to do the bulk of the work.
-	mov	%g2, %o0
-	add	%fp, -8*4, %o1
+	add	%fp, -8*4, %o3
 	call	ffi_closure_sparc_inner_v8
-	 add	%fp,  64, %o2
+	 add	%fp,  64, %o4
 
 0:	call	1f
 	 and	%o0, SPARC_FLAG_RET_MASK, %o0
diff --git a/src/sparc/v9.S b/src/sparc/v9.S
index 5c3f27b..d848f9a 100644
--- a/src/sparc/v9.S
+++ b/src/sparc/v9.S
@@ -94,8 +94,9 @@ C(ffi_call_v9):
 	ldx	[%sp+STACK_BIAS+128+16], %o2
 	ldx	[%sp+STACK_BIAS+128+24], %o3
 	ldx	[%sp+STACK_BIAS+128+32], %o4
+	ldx	[%sp+STACK_BIAS+128+40], %o5
 	call	%i1
-	 ldx	[%sp+STACK_BIAS+128+40], %o5
+	 mov	%i5, %g5			! load static chain
 
 0:	call	1f		! load pc in %o7
 	 and	%l0, SPARC_FLAG_RET_MASK, %l1
@@ -211,6 +212,25 @@ E SPARC_RET_F_1
    Receives the closure argument in %g1.   */
 
 	.align 8
+	.globl	C(ffi_go_closure_v9)
+	.type	C(ffi_go_closure_v9),@function
+	FFI_HIDDEN(C(ffi_go_closure_v9))
+
+C(ffi_go_closure_v9):
+	cfi_startproc
+	save	%sp, -STACKFRAME, %sp
+	cfi_def_cfa_register(%fp)
+	cfi_window_save
+
+	ldx	[%g5+8], %o0
+	ldx	[%g5+16], %o1
+	b	0f
+	 mov	%g5, %o2
+
+	cfi_endproc
+	.size	C(ffi_go_closure_v9), . - C(ffi_go_closure_v9)
+
+	.align 8
 	.globl	C(ffi_closure_v9)
 	.type	C(ffi_closure_v9),@function
 	FFI_HIDDEN(C(ffi_closure_v9))
@@ -221,6 +241,10 @@ C(ffi_closure_v9):
 	cfi_def_cfa_register(%fp)
 	cfi_window_save
 
+	ldx	[%g1+FFI_TRAMPOLINE_SIZE], %o0
+	ldx	[%g1+FFI_TRAMPOLINE_SIZE+8], %o1
+	ldx	[%g1+FFI_TRAMPOLINE_SIZE+16], %o2
+0:
 	! Store all of the potential argument registers in va_list format.
 	stx	%i0, [FP+128+0]
 	stx	%i1, [FP+128+8]
@@ -248,11 +272,10 @@ C(ffi_closure_v9):
 	std     %f30, [FP-8]
 
 	! Call ffi_closure_sparc_inner to do the bulk of the work.
-	mov	%g1, %o0
-	add	%fp, STACK_BIAS-160, %o1
-	add	%fp, STACK_BIAS+128, %o2
+	add	%fp, STACK_BIAS-160, %o3
+	add	%fp, STACK_BIAS+128, %o4
 	call	C(ffi_closure_sparc_inner_v9)
-	 add	%fp, STACK_BIAS-128, %o3
+	 add	%fp, STACK_BIAS-128, %o5
 
 0:	call	1f		! load pc in %o7
 	 and	%o0, SPARC_FLAG_RET_MASK, %o0
-- 
1.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 0/8] Go closures for Sparc
@ 2014-10-28 19:46 Richard Henderson
  2014-10-28 19:46 ` [PATCH 8/8] sparc: Re-add abi compliant structure support Richard Henderson
                   ` (7 more replies)
  0 siblings, 8 replies; 14+ messages in thread
From: Richard Henderson @ 2014-10-28 19:46 UTC (permalink / raw)
  To: libffi-discuss; +Cc: davem

This patch set splits apart the v8 and v9 abis.  It will never be
possible to swap between them at runtime, so give up pretending.

It then streamlines handling of v9 structures, which is then used
by the addition of support for complex.


r~


Richard Henderson (8):
  sparc: Eliminate long double ifdefs
  sparc: Tidy up symbols
  sparc: Rewrite everything
  sparc: Preprocess float point struct return
  sparc: Handle more cases of structure return directly
  sparc: Add support for complex types
  sparc: Add support for Go closures
  sparc: Re-add abi compliant structure support

 Makefile.am                         |   4 +-
 src/prep_cif.c                      |  11 -
 src/sparc/ffi.c                     | 861 ++++++++++++++----------------------
 src/sparc/ffi64.c                   | 592 +++++++++++++++++++++++++
 src/sparc/ffitarget.h               |  15 +-
 src/sparc/internal.h                |  26 ++
 src/sparc/v8.S                      | 551 ++++++++++++-----------
 src/sparc/v9.S                      | 504 ++++++++++++---------
 testsuite/libffi.call/call.exp      |  12 +-
 testsuite/libffi.call/complex_int.c |   4 +-
 10 files changed, 1530 insertions(+), 1050 deletions(-)
 create mode 100644 src/sparc/ffi64.c
 create mode 100644 src/sparc/internal.h

-- 
1.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 5/8] sparc: Handle more cases of structure return directly
  2014-10-28 19:46 [PATCH 0/8] Go closures for Sparc Richard Henderson
                   ` (6 preceding siblings ...)
  2014-10-28 19:46 ` [PATCH 6/8] sparc: Add support for complex types Richard Henderson
@ 2014-10-28 19:46 ` Richard Henderson
  7 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2014-10-28 19:46 UTC (permalink / raw)
  To: libffi-discuss; +Cc: davem

---
 src/sparc/ffi.c      |   4 +-
 src/sparc/ffi64.c    |  40 +++++++++++++++---
 src/sparc/internal.h |  24 ++++++-----
 src/sparc/v8.S       |  77 +++++++++++++++++------------------
 src/sparc/v9.S       | 112 ++++++++++++++++++++++++++++++---------------------
 5 files changed, 152 insertions(+), 105 deletions(-)

diff --git a/src/sparc/ffi.c b/src/sparc/ffi.c
index 7542847..1b8f48e 100644
--- a/src/sparc/ffi.c
+++ b/src/sparc/ffi.c
@@ -59,10 +59,10 @@ ffi_prep_cif_machdep(ffi_cif *cif)
       flags = SPARC_RET_VOID;
       break;
     case FFI_TYPE_FLOAT:
-      flags = SPARC_RET_FLOAT;
+      flags = SPARC_RET_F_1;
       break;
     case FFI_TYPE_DOUBLE:
-      flags = SPARC_RET_DOUBLE;
+      flags = SPARC_RET_F_2;
       break;
     case FFI_TYPE_LONGDOUBLE:
     case FFI_TYPE_STRUCT:
diff --git a/src/sparc/ffi64.c b/src/sparc/ffi64.c
index 65ae438..ab3ed09 100644
--- a/src/sparc/ffi64.c
+++ b/src/sparc/ffi64.c
@@ -66,7 +66,6 @@ ffi_struct_float_mask (ffi_type *struct_type, int size_mask)
 	{
 	case FFI_TYPE_STRUCT:
 	  size_mask = ffi_struct_float_mask (t, size_mask);
-	  size_mask = ALIGN(size_mask, FFI_SIZEOF_ARG);
 	  continue;
 	case FFI_TYPE_FLOAT:
 	case FFI_TYPE_DOUBLE:
@@ -154,13 +153,13 @@ ffi_prep_cif_machdep(ffi_cif *cif)
       flags = SPARC_RET_VOID;
       break;
     case FFI_TYPE_FLOAT:
-      flags = SPARC_RET_FLOAT;
+      flags = SPARC_RET_F_1;
       break;
     case FFI_TYPE_DOUBLE:
-      flags = SPARC_RET_DOUBLE;
+      flags = SPARC_RET_F_2;
       break;
     case FFI_TYPE_LONGDOUBLE:
-      flags = SPARC_RET_LDOUBLE;
+      flags = SPARC_RET_F_4;
       break;
 
     case FFI_TYPE_STRUCT:
@@ -171,8 +170,37 @@ ffi_prep_cif_machdep(ffi_cif *cif)
 	}
       else
 	{
-	  flags = ffi_struct_float_mask (rtype, 0) << SPARC_FLTMASK_SHIFT;
-	  flags |= SPARC_RET_STRUCT;
+	  int size_mask = ffi_struct_float_mask (rtype, 0);
+	  int word_size = (size_mask >> 2) & 0x3f;
+	  int all_mask = (1 << word_size) - 1;
+	  int fp_mask = size_mask >> 8;
+
+	  flags = (size_mask << SPARC_FLTMASK_SHIFT) | SPARC_RET_STRUCT;
+
+	  /* For special cases of all-int or all-fp, we can return
+	     the value directly without popping through a struct copy.  */
+	  if (fp_mask == 0)
+	    {
+	      if (rtype->alignment >= 8)
+		{
+		  if (rtype->size == 8)
+		    flags = SPARC_RET_INT64;
+		  else if (rtype->size == 16)
+		    flags = SPARC_RET_INT128;
+		}
+	    }
+	  else if (fp_mask == all_mask)
+	    switch (word_size)
+	      {
+	      case 1: flags = SPARC_RET_F_1; break;
+	      case 2: flags = SPARC_RET_F_2; break;
+	      case 3: flags = SPARC_RET_F_3; break;
+	      case 4: flags = SPARC_RET_F_4; break;
+	      /* 5 word structures skipped; handled via RET_STRUCT.  */
+	      case 6: flags = SPARC_RET_F_6; break;
+	      /* 7 word structures skipped; handled via RET_STRUCT.  */
+	      case 8: flags = SPARC_RET_F_8; break;
+	      }
 	}
       break;
 
diff --git a/src/sparc/internal.h b/src/sparc/internal.h
index 3018928..b4494d9 100644
--- a/src/sparc/internal.h
+++ b/src/sparc/internal.h
@@ -1,17 +1,21 @@
 #define SPARC_RET_VOID		0
 #define SPARC_RET_STRUCT	1
-#define SPARC_RET_FLOAT		2
-#define SPARC_RET_DOUBLE	3
-#define SPARC_RET_UINT8		4
-#define SPARC_RET_SINT8		5
-#define SPARC_RET_UINT16	6
-#define SPARC_RET_SINT16	7
+#define SPARC_RET_UINT8		2
+#define SPARC_RET_SINT8		3
+#define SPARC_RET_UINT16	4
+#define SPARC_RET_SINT16	5
+#define SPARC_RET_UINT32	6
+#define SPARC_RET_SINT32	7	/* v9 only */
 #define SPARC_RET_INT64		8
-#define SPARC_RET_UINT32	9
+#define SPARC_RET_INT128	9	/* v9 only */
 
-/* These two are only used for V9.  */
-#define SPARC_RET_SINT32	10
-#define SPARC_RET_LDOUBLE	11
+/* Note that F_7 is missing, and is handled by SPARC_RET_STRUCT.  */
+#define SPARC_RET_F_8		10
+#define SPARC_RET_F_6		11	/* v9 only */
+#define SPARC_RET_F_4		12
+#define SPARC_RET_F_2		13
+#define SPARC_RET_F_3		14	/* v9 only */
+#define SPARC_RET_F_1		15
 
 #define SPARC_FLAG_RET_MASK	15
 #define SPARC_FLAG_RET_IN_MEM	32
diff --git a/src/sparc/v8.S b/src/sparc/v8.S
index b0d50a3..4adcf6d 100644
--- a/src/sparc/v8.S
+++ b/src/sparc/v8.S
@@ -118,15 +118,6 @@ E SPARC_RET_VOID
 	 restore
 E SPARC_RET_STRUCT
 	unimp
-E SPARC_RET_FLOAT
-	st	%f0, [%i2]
-	ret
-	 restore
-E SPARC_RET_DOUBLE
-	std	%f0, [%i2]
-	ret
-	 restore
-	nop
 E SPARC_RET_UINT8
 	and	%o0, 0xff, %o0
 	st	%o0, [%i2]
@@ -144,28 +135,34 @@ E SPARC_RET_SINT16
 	sll	%o0, 16, %o0
 	b	7f
 	 sra	%o0, 16, %o0
-E SPARC_RET_INT64
-	std	%o0, [%i2]
-	ret
-	 restore
 E SPARC_RET_UINT32
 7:	st	%o0, [%i2]
 	ret
 	 restore
-
-	! Unused entries.  Don't allow bad data to do worse things.
-E 10
+E SPARC_RET_SINT32
 	unimp
-E 11
+E SPARC_RET_INT64
+	std	%o0, [%i2]
+	ret
+	 restore
+E SPARC_RET_INT128
 	unimp
-E 12
+E SPARC_RET_F_8
 	unimp
-E 13
+E SPARC_RET_F_6
 	unimp
-E 14
+E SPARC_RET_F_4
 	unimp
-E 15
+E SPARC_RET_F_2
+	std	%f0, [%i2]
+	ret
+	 restore
+E SPARC_RET_F_3
 	unimp
+E SPARC_RET_F_1
+	st	%f0, [%i2]
+	ret
+	 restore
 
 	! Struct returning functions expect and skip the unimp here.
 	.align	8
@@ -234,14 +231,6 @@ E SPARC_RET_VOID
 E SPARC_RET_STRUCT
 	jmp	%i7+12
 	 restore
-E SPARC_RET_FLOAT
-	ld	[%fp-8], %f0
-	ret
-	 restore
-E SPARC_RET_DOUBLE
-	ldd	[%fp-8], %f0
-	ret
-	 restore
 E SPARC_RET_UINT8
 	ldub	[%fp-8+3], %i0
 	ret
@@ -258,28 +247,34 @@ E SPARC_RET_SINT16
 	ldsh	[%fp-8+2], %i0
 	ret
 	 restore
-E SPARC_RET_INT64
-	ldd	[%fp-8], %i0
-	ret
-	 restore
 E SPARC_RET_UINT32
 	ld	[%fp-8], %i0
 	ret
 	 restore
-
-	! Unused entries.  Don't allow bad data to do worse things.
-E 10
+E SPARC_RET_SINT32
 	unimp
-E 11
+E SPARC_RET_INT64
+	ldd	[%fp-8], %i0
+	ret
+	 restore
+E SPARC_RET_INT128
 	unimp
-E 12
+E SPARC_RET_F_8
 	unimp
-E 13
+E SPARC_RET_F_6
 	unimp
-E 14
+E SPARC_RET_F_4
 	unimp
-E 15
+E SPARC_RET_F_2
+	ldd	[%fp-8], %f0
+	ret
+	 restore
+E SPARC_RET_F_3
 	unimp
+E SPARC_RET_F_1
+	ld	[%fp-8], %f0
+	ret
+	 restore
 
 	cfi_endproc
 	.size	C(ffi_closure_v8), . - C(ffi_closure_v8)
diff --git a/src/sparc/v9.S b/src/sparc/v9.S
index 3d91f2d..d893d2f 100644
--- a/src/sparc/v9.S
+++ b/src/sparc/v9.S
@@ -114,12 +114,6 @@ E SPARC_RET_STRUCT
 	sub	%sp, 64, %sp
 	b	8f
 	 stx	%o0, [%l2]
-E SPARC_RET_FLOAT
-	return	%i7+8
-	 st	%f0, [%o2]
-E SPARC_RET_DOUBLE
-	return	%i7+8
-	 std	%f0, [%o2]
 E SPARC_RET_UINT8
 	and	%o0, 0xff, %i0
 	return	%i7+8
@@ -139,10 +133,6 @@ E SPARC_RET_SINT16
 	sra	%o0, 16, %i0
 	return	%i7+8
 	 stx	%o0, [%o2]
-E SPARC_RET_INT64
-	stx	%o0, [%i2]
-	return	%i7+8
-	 nop
 E SPARC_RET_UINT32
 	srl	%o0, 0, %i0
 	return	%i7+8
@@ -151,20 +141,40 @@ E SPARC_RET_SINT32
 	sra	%o0, 0, %i0
 	return	%i7+8
 	 stx	%o0, [%o2]
-E SPARC_RET_LDOUBLE
-	std	%f0, [%i2]
+E SPARC_RET_INT64
+	stx	%o0, [%i2]
+	return	%i7+8
+	 nop
+E SPARC_RET_INT128
+	stx	%o0, [%i2]
+	stx	%o1, [%i2+8]
+	return	%i7+8
+	 nop
+E SPARC_RET_F_8
+	st	%f7, [%i2+7*4]
+	nop
+	st	%f6, [%i2+6*4]
+	nop
+E SPARC_RET_F_6
+	st	%f5, [%i2+5*4]
+	nop
+	st	%f4, [%i2+4*4]
+	nop
+E SPARC_RET_F_4
+	std	%f2, [%i2+2*4]
+	return	%i7+8
+	 std	%f0, [%o2]
+E SPARC_RET_F_2
 	return	%i7+8
-	 std	%f2, [%o2+8]
-
-	! Unused entries.  Don't allow bad data to do worse things.
-E 12
-	unimp
-E 13
-	unimp
-E 14
-	unimp
-E 15
-	unimp
+	 std	%f0, [%o2]
+E SPARC_RET_F_3
+	st	%f2, [%i2+2*4]
+	nop
+	st	%f1, [%i2+1*4]
+	nop
+E SPARC_RET_F_1
+	return	%i7+8
+	 st	%f0, [%o2]
 
 	! Finish the SPARC_RET_STRUCT sequence.
 	.align	8
@@ -264,14 +274,6 @@ E SPARC_RET_STRUCT
 	ldd	[FP-160], %f0
 	b	8f
 	 ldx	[FP-152], %i1
-E SPARC_RET_FLOAT
-	ld	[FP-160], %f0
-	return	%i7+8
-	 nop
-E SPARC_RET_DOUBLE
-	ldd	[FP-160], %f0
-	return	%i7+8
-	 nop
 E SPARC_RET_UINT8
 	ldub	[FP-160+7], %i0
 	return	%i7+8
@@ -288,10 +290,6 @@ E SPARC_RET_SINT16
 	ldsh	[FP-160+6], %i0
 	return	%i7+8
 	 nop
-E SPARC_RET_INT64
-	ldx	[FP-160], %i0
-	return	%i7+8
-	 nop
 E SPARC_RET_UINT32
 	lduw	[FP-160+4], %i0
 	return	%i7+8
@@ -300,21 +298,43 @@ E SPARC_RET_SINT32
 	ldsw	[FP-160+4], %i0
 	return	%i7+8
 	 nop
-E SPARC_RET_LDOUBLE
+E SPARC_RET_INT64
+	ldx	[FP-160], %i0
+	return	%i7+8
+	 nop
+E SPARC_RET_INT128
+	ldx	[FP-160], %i0
+	ldx	[FP-160+8], %i1
+	return	%i7+8
+	 nop
+E SPARC_RET_F_8
+	ld	[FP-160+7*4], %f7
+	nop
+	ld	[FP-160+6*4], %f6
+	nop
+E SPARC_RET_F_6
+	ld	[FP-160+5*4], %f5
+	nop
+	ld	[FP-160+4*4], %f4
+	nop
+E SPARC_RET_F_4
 	ldd	[FP-160], %f0
 	ldd	[FP-160+8], %f2
 	return	%i7+8
 	 nop
-
-	! Unused entries.  Don't allow bad data to do worse things.
-E 12
-	unimp
-E 13
-	unimp
-E 14
-	unimp
-E 15
-	unimp
+E SPARC_RET_F_2
+	ldd	[FP-160], %f0
+	return	%i7+8
+	 nop
+E SPARC_RET_F_3
+	ld	[FP-160+2*4], %f2
+	nop
+	ld	[FP-160+1*4], %f1
+	nop
+E SPARC_RET_F_1
+	ld	[FP-160], %f0
+	return	%i7+8
+	 nop
 
 	! Finish the SPARC_RET_STRUCT sequence.
 	.align	8
-- 
1.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 4/8] sparc: Preprocess float point struct return
  2014-10-28 19:46 [PATCH 0/8] Go closures for Sparc Richard Henderson
                   ` (3 preceding siblings ...)
  2014-10-28 19:46 ` [PATCH 1/8] sparc: Eliminate long double ifdefs Richard Henderson
@ 2014-10-28 19:46 ` Richard Henderson
  2014-10-28 19:46 ` [PATCH 7/8] sparc: Add support for Go closures Richard Henderson
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2014-10-28 19:46 UTC (permalink / raw)
  To: libffi-discuss; +Cc: davem

We can eliminate recursion and speed structure return
by flattening a nested structure tree into a bitmask.
---
 src/sparc/ffi64.c    | 126 ++++++++++++++++++++++++++++++++++++++++-----------
 src/sparc/internal.h |   2 +
 src/sparc/v9.S       |  11 ++---
 3 files changed, 106 insertions(+), 33 deletions(-)

diff --git a/src/sparc/ffi64.c b/src/sparc/ffi64.c
index 7ed928d..65ae438 100644
--- a/src/sparc/ffi64.c
+++ b/src/sparc/ffi64.c
@@ -42,41 +42,103 @@
 #endif
 
 #ifdef SPARC64
-/* Perform machine dependent cif processing */
 
-int FFI_HIDDEN
-ffi_v9_layout_struct (ffi_type *arg, int off, void *d, void *si, void *sf)
+/* Flatten the contents of a structure to the parts that are passed in
+   floating point registers.  The return is a bit mask wherein bit N
+   set means bytes [4*n, 4*n+3] are passed in %fN.
+
+   We encode both the (running) size (maximum 32) and mask (maxumum 255)
+   into one integer.  The size is placed in the low byte, so that align
+   and addition work correctly.  The mask is placed in the second byte.  */
+
+static int
+ffi_struct_float_mask (ffi_type *struct_type, int size_mask)
 {
   ffi_type **elts, *t;
 
-  for (elts = arg->elements; (t = *elts) != NULL; elts++)
+  for (elts = struct_type->elements; (t = *elts) != NULL; elts++)
     {
       size_t z = t->size;
-      void *src = si;
+      int o, m;
 
-      off = ALIGN(off, t->alignment);
+      size_mask = ALIGN(size_mask, t->alignment);
       switch (t->type)
 	{
 	case FFI_TYPE_STRUCT:
-	  off = ffi_v9_layout_struct(t, off, d, si, sf);
-	  off = ALIGN(off, FFI_SIZEOF_ARG);
+	  size_mask = ffi_struct_float_mask (t, size_mask);
+	  size_mask = ALIGN(size_mask, FFI_SIZEOF_ARG);
 	  continue;
 	case FFI_TYPE_FLOAT:
 	case FFI_TYPE_DOUBLE:
 	case FFI_TYPE_LONGDOUBLE:
-	  /* Note that closures start with the argument offset,
-	     so that we know when to stop looking at fp regs.  */
-	  if (off < 128)
-	    src = sf;
+	  m = (1 << (z / 4)) - 1;	/* compute mask for type */
+	  o = (size_mask >> 2) & 0x3f;	/* extract word offset */
+	  size_mask |= m << (o + 8);	/* insert mask into place */
 	  break;
 	}
-      memcpy(d + off, src + off, z);
-      off += z;
+      size_mask += z;
+    }
+
+  size_mask = ALIGN(size_mask, struct_type->alignment);
+  FFI_ASSERT ((size_mask & 0xff) == struct_type->size);
+
+  return size_mask;
+}
+
+/* Merge floating point data into integer data.  If the structure is
+   entirely floating point, simply return a pointer to the fp data.  */
+
+static void *
+ffi_struct_float_merge (int size_mask, void *vi, void *vf)
+{
+  int size = size_mask & 0xff;
+  int mask = size_mask >> 8;
+  int n = size >> 2;
+
+  if (mask == 0)
+    return vi;
+  else if (mask == (1 << n) - 1)
+    return vf;
+  else
+    {
+      unsigned int *wi = vi, *wf = vf;
+      int i;
+
+      for (i = 0; i < n; ++i)
+	if ((mask >> i) & 1)
+	  wi[i] = wf[i];
+
+      return vi;
     }
+}
+
+/* Similar, but place the data into VD in the end.  */
 
-  return off;
+void FFI_HIDDEN
+ffi_struct_float_copy (int size_mask, void *vd, void *vi, void *vf)
+{
+  int size = size_mask & 0xff;
+  int mask = size_mask >> 8;
+  int n = size >> 2;
+
+  if (mask == 0)
+    ;
+  else if (mask == (1 << n) - 1)
+    vi = vf;
+  else
+    {
+      unsigned int *wd = vd, *wi = vi, *wf = vf;
+      int i;
+
+      for (i = 0; i < n; ++i)
+	wd[i] = ((mask >> i) & 1 ? wf : wi)[i];
+      return;
+    }
+  memcpy (vd, vi, size);
 }
 
+/* Perform machine dependent cif processing */
+
 ffi_status FFI_HIDDEN
 ffi_prep_cif_machdep(ffi_cif *cif)
 {
@@ -108,7 +170,10 @@ ffi_prep_cif_machdep(ffi_cif *cif)
 	  bytes = 8;
 	}
       else
-	flags = SPARC_RET_STRUCT;
+	{
+	  flags = ffi_struct_float_mask (rtype, 0) << SPARC_FLTMASK_SHIFT;
+	  flags |= SPARC_RET_STRUCT;
+	}
       break;
 
     case FFI_TYPE_SINT8:
@@ -343,7 +408,7 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
   ffi_cif *cif;
   ffi_type **arg_types;
   void **avalue;
-  int i, argn, nargs, flags;
+  int i, argn, argx, nargs, flags;
 
   cif = closure->cif;
   arg_types = cif->arg_types;
@@ -364,12 +429,13 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
     argn = 0;
 
   /* Grab the addresses of the arguments from the stack frame.  */
-  for (i = 0; i < nargs; i++)
+  for (i = 0; i < nargs; i++, argn = argx)
     {
       ffi_type *ty = arg_types[i];
-      void *a = &gpr[argn++];
+      void *a = &gpr[argn];
       size_t z;
 
+      argx = argn + 1;
       switch (ty->type)
 	{
 	case FFI_TYPE_STRUCT:
@@ -378,25 +444,31 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
 	    a = *(void **)a;
 	  else
 	    {
-	      if (--argn < 16)
-	        ffi_v9_layout_struct(arg_types[i], 8*argn, gpr, gpr, fpr);
-	      argn += ALIGN (z, 8) / 8;
+	      argx = argn + ALIGN (z, 8) / 8;
+	      if (argn < 16)
+		{
+		  int size_mask = ffi_struct_float_mask (ty, 0);
+		  int argn_mask = (0xffff00 >> argn) & 0xff00;
+
+		  /* Eliminate fp registers off the end.  */
+		  size_mask = (size_mask & 0xff) | (size_mask & argn_mask);
+		  a = ffi_struct_float_merge (size_mask, gpr+argn, fpr+argn);
+		}
 	    }
 	  break;
 
 	case FFI_TYPE_LONGDOUBLE:
-	  if (--argn & 1)
-	    argn++;
+	  argn = ALIGN (argn, 2);
 	  a = (argn < 16 ? fpr : gpr) + argn;
-	  argn += 2;
+	  argx = argn + 2;
 	  break;
 	case FFI_TYPE_DOUBLE:
 	  if (argn <= 16)
-	    a = fpr + argn - 1;
+	    a = fpr + argn;
 	  break;
 	case FFI_TYPE_FLOAT:
 	  if (argn <= 16)
-	    a = fpr + argn - 1;
+	    a = fpr + argn;
 	  a += 4;
 	  break;
 
diff --git a/src/sparc/internal.h b/src/sparc/internal.h
index df7c305..3018928 100644
--- a/src/sparc/internal.h
+++ b/src/sparc/internal.h
@@ -16,3 +16,5 @@
 #define SPARC_FLAG_RET_MASK	15
 #define SPARC_FLAG_RET_IN_MEM	32
 #define SPARC_FLAG_FP_ARGS	64
+
+#define SPARC_FLTMASK_SHIFT	8
diff --git a/src/sparc/v9.S b/src/sparc/v9.S
index e2fe036..3d91f2d 100644
--- a/src/sparc/v9.S
+++ b/src/sparc/v9.S
@@ -177,12 +177,11 @@ E 15
 	std	%f6, [%l2+56]
 
 	! Copy the structure into place.
-	ldx	[%i0+16], %o0			! load rtype from cif
-	mov	0, %o1				! load off
-	mov	%i2, %o2			! load dst
-	mov	%l2, %o3			! load src_int
-	call	C(ffi_v9_layout_struct)
-	 add	%l2, 32, %o4			! load src_fp
+	srl	%l0, SPARC_FLTMASK_SHIFT, %o0	! load size_mask
+	mov	%i2, %o1			! load dst
+	mov	%l2, %o2			! load src_gp
+	call	C(ffi_struct_float_copy)
+	 add	%l2, 32, %o3			! load src_fp
 
 	return	%i7+8
 	 nop
-- 
1.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/8] sparc: Rewrite everything
  2014-10-28 19:46 [PATCH 0/8] Go closures for Sparc Richard Henderson
  2014-10-28 19:46 ` [PATCH 8/8] sparc: Re-add abi compliant structure support Richard Henderson
@ 2014-10-28 19:46 ` Richard Henderson
  2014-10-29 18:10   ` David Miller
  2014-10-28 19:46 ` [PATCH 2/8] sparc: Tidy up symbols Richard Henderson
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 14+ messages in thread
From: Richard Henderson @ 2014-10-28 19:46 UTC (permalink / raw)
  To: libffi-discuss; +Cc: davem

It's impossible to call between v8 and v9 ABIs, because of the stack bias
in the v9 ABI.  So let's not pretend it's just not implemented yet.  Split
the v9 code out to a separate file.

The register windows prevent ffi_call from setting up the entire stack
frame the assembly, but we needn't make an indirect call back to prep_args.
---
 Makefile.am           |   4 +-
 src/prep_cif.c        |  11 -
 src/sparc/ffi.c       | 735 ++++++++++++++------------------------------------
 src/sparc/ffi64.c     | 433 +++++++++++++++++++++++++++++
 src/sparc/ffitarget.h |  13 +-
 src/sparc/internal.h  |  18 ++
 src/sparc/v8.S        | 309 +++++++++++----------
 src/sparc/v9.S        | 330 ++++++++++++++---------
 8 files changed, 1036 insertions(+), 817 deletions(-)
 create mode 100644 src/sparc/ffi64.c
 create mode 100644 src/sparc/internal.h

diff --git a/Makefile.am b/Makefile.am
index 0e40451..579faf5 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -36,7 +36,7 @@ EXTRA_DIST = LICENSE ChangeLog.v1 ChangeLog.libgcj			\
 	 src/sh/ffi.c src/sh/sysv.S src/sh/ffitarget.h src/sh64/ffi.c	\
 	 src/sh64/sysv.S src/sh64/ffitarget.h src/sparc/v8.S		\
 	 src/sparc/v9.S src/sparc/ffitarget.h src/sparc/ffi.c		\
-	 src/x86/darwin64.S src/x86/ffi.c src/x86/sysv.S		\
+	 src/sparc/ffi64.c src/x86/darwin64.S src/x86/ffi.c src/x86/sysv.S \
 	 src/x86/win32.S src/x86/darwin.S src/x86/win64.S		\
 	 src/x86/freebsd.S src/x86/ffi64.c src/x86/unix64.S		\
 	 src/x86/ffitarget.h src/pa/ffitarget.h src/pa/ffi.c		\
@@ -144,7 +144,7 @@ nodist_libffi_la_SOURCES += src/x86/win32.S
 endif
 endif
 if SPARC
-nodist_libffi_la_SOURCES += src/sparc/ffi.c src/sparc/v8.S src/sparc/v9.S
+nodist_libffi_la_SOURCES += src/sparc/ffi.c src/sparc/ffi64.c src/sparc/v8.S src/sparc/v9.S
 endif
 if ALPHA
 nodist_libffi_la_SOURCES += src/alpha/ffi.c src/alpha/osf.S
diff --git a/src/prep_cif.c b/src/prep_cif.c
index be5eae3..5881ceb 100644
--- a/src/prep_cif.c
+++ b/src/prep_cif.c
@@ -147,9 +147,6 @@ ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
 #if !defined FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
   /* Make space for the return structure pointer */
   if (cif->rtype->type == FFI_TYPE_STRUCT
-#ifdef SPARC
-      && (cif->abi != FFI_V9 || cif->rtype->size > 32)
-#endif
 #ifdef TILE
       && (cif->rtype->size > 10 * FFI_SIZEOF_ARG)
 #endif
@@ -179,14 +176,6 @@ ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
       FFI_ASSERT_VALID_TYPE(*ptr);
 
 #if !defined FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
-#ifdef SPARC
-      if (((*ptr)->type == FFI_TYPE_STRUCT
-	   && ((*ptr)->size > 16 || cif->abi != FFI_V9))
-	  || ((*ptr)->type == FFI_TYPE_LONGDOUBLE
-	      && cif->abi != FFI_V9))
-	bytes += sizeof(void*);
-      else
-#endif
 	{
 	  /* Add any padding if necessary */
 	  if (((*ptr)->alignment - 1) & bytes)
diff --git a/src/sparc/ffi.c b/src/sparc/ffi.c
index 1f38f54..7542847 100644
--- a/src/sparc/ffi.c
+++ b/src/sparc/ffi.c
@@ -28,6 +28,9 @@
 #include <ffi.h>
 #include <ffi_common.h>
 #include <stdlib.h>
+#include "internal.h"
+
+#ifndef SPARC64
 
 /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
    all further uses in this file will refer to the 128-bit type.  */
@@ -40,632 +43,310 @@
 # define FFI_TYPE_LONGDOUBLE 4
 #endif
 
-
-/* ffi_prep_args is called by the assembly routine once stack space
-   has been allocated for the function's arguments */
-
-void FFI_HIDDEN
-ffi_prep_args_v8(char *stack, extended_cif *ecif)
-{
-  int i;
-  void **p_argv;
-  char *argp;
-  ffi_type **p_arg;
-
-  /* Skip 16 words for the window save area */
-  argp = stack + 16*sizeof(int);
-
-  /* This should only really be done when we are returning a structure,
-     however, it's faster just to do it all the time...
-
-  if ( ecif->cif->rtype->type == FFI_TYPE_STRUCT ) */
-  *(int *) argp = (long)ecif->rvalue;
-
-  /* And 1 word for the  structure return value. */
-  argp += sizeof(int);
-
-#ifdef USING_PURIFY
-  /* Purify will probably complain in our assembly routine, unless we
-     zero out this memory. */
-
-  ((int*)argp)[0] = 0;
-  ((int*)argp)[1] = 0;
-  ((int*)argp)[2] = 0;
-  ((int*)argp)[3] = 0;
-  ((int*)argp)[4] = 0;
-  ((int*)argp)[5] = 0;
-#endif
-
-  p_argv = ecif->avalue;
-
-  for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; i; i--, p_arg++)
-    {
-      size_t z;
-
-	  if ((*p_arg)->type == FFI_TYPE_STRUCT
-	      || (*p_arg)->type == FFI_TYPE_LONGDOUBLE)
-	    {
-	      *(unsigned int *) argp = (unsigned long)(* p_argv);
-	      z = sizeof(int);
-	    }
-	  else
-	    {
-	      z = (*p_arg)->size;
-	      if (z < sizeof(int))
-		{
-		  z = sizeof(int);
-		  switch ((*p_arg)->type)
-		    {
-		    case FFI_TYPE_SINT8:
-		      *(signed int *) argp = *(SINT8 *)(* p_argv);
-		      break;
-		      
-		    case FFI_TYPE_UINT8:
-		      *(unsigned int *) argp = *(UINT8 *)(* p_argv);
-		      break;
-		      
-		    case FFI_TYPE_SINT16:
-		      *(signed int *) argp = *(SINT16 *)(* p_argv);
-		      break;
-		      
-		    case FFI_TYPE_UINT16:
-		      *(unsigned int *) argp = *(UINT16 *)(* p_argv);
-		      break;
-
-		    default:
-		      FFI_ASSERT(0);
-		    }
-		}
-	      else
-		{
-		  memcpy(argp, *p_argv, z);
-		}
-	    }
-	  p_argv++;
-	  argp += z;
-    }
-  
-  return;
-}
-
-int FFI_HIDDEN
-ffi_prep_args_v9(char *stack, extended_cif *ecif)
-{
-  int i, ret = 0;
-  int tmp;
-  void **p_argv;
-  char *argp;
-  ffi_type **p_arg;
-
-  tmp = 0;
-
-  /* Skip 16 words for the window save area */
-  argp = stack + 16*sizeof(long long);
-
-#ifdef USING_PURIFY
-  /* Purify will probably complain in our assembly routine, unless we
-     zero out this memory. */
-
-  ((long long*)argp)[0] = 0;
-  ((long long*)argp)[1] = 0;
-  ((long long*)argp)[2] = 0;
-  ((long long*)argp)[3] = 0;
-  ((long long*)argp)[4] = 0;
-  ((long long*)argp)[5] = 0;
-#endif
-
-  p_argv = ecif->avalue;
-
-  if (ecif->cif->rtype->type == FFI_TYPE_STRUCT &&
-      ecif->cif->rtype->size > 32)
-    {
-      *(unsigned long long *) argp = (unsigned long)ecif->rvalue;
-      argp += sizeof(long long);
-      tmp = 1;
-    }
-
-  for (i = 0, p_arg = ecif->cif->arg_types; i < ecif->cif->nargs;
-       i++, p_arg++)
-    {
-      size_t z;
-
-      z = (*p_arg)->size;
-      switch ((*p_arg)->type)
-	{
-	case FFI_TYPE_STRUCT:
-	  if (z > 16)
-	    {
-	      /* For structures larger than 16 bytes we pass reference.  */
-	      *(unsigned long long *) argp = (unsigned long)* p_argv;
-	      argp += sizeof(long long);
-	      tmp++;
-	      p_argv++;
-	      continue;
-	    }
-	  /* FALLTHROUGH */
-	case FFI_TYPE_FLOAT:
-	case FFI_TYPE_DOUBLE:
-	case FFI_TYPE_LONGDOUBLE:
-	  ret = 1; /* We should promote into FP regs as well as integer.  */
-	  break;
-	}
-      if (z < sizeof(long long))
-	{
-	  switch ((*p_arg)->type)
-	    {
-	    case FFI_TYPE_SINT8:
-	      *(signed long long *) argp = *(SINT8 *)(* p_argv);
-	      break;
-
-	    case FFI_TYPE_UINT8:
-	      *(unsigned long long *) argp = *(UINT8 *)(* p_argv);
-	      break;
-
-	    case FFI_TYPE_SINT16:
-	      *(signed long long *) argp = *(SINT16 *)(* p_argv);
-	      break;
-
-	    case FFI_TYPE_UINT16:
-	      *(unsigned long long *) argp = *(UINT16 *)(* p_argv);
-	      break;
-
-	    case FFI_TYPE_SINT32:
-	      *(signed long long *) argp = *(SINT32 *)(* p_argv);
-	      break;
-
-	    case FFI_TYPE_UINT32:
-	      *(unsigned long long *) argp = *(UINT32 *)(* p_argv);
-	      break;
-
-	    case FFI_TYPE_FLOAT:
-	      *(float *) (argp + 4) = *(FLOAT32 *)(* p_argv); /* Right justify */
-	      break;
-
-	    case FFI_TYPE_STRUCT:
-	      memcpy(argp, *p_argv, z);
-	      break;
-
-	    default:
-	      FFI_ASSERT(0);
-	    }
-	  z = sizeof(long long);
-	  tmp++;
-	}
-      else if (z == sizeof(long long))
-	{
-	  memcpy(argp, *p_argv, z);
-	  z = sizeof(long long);
-	  tmp++;
-	}
-      else
-	{
-	  if ((tmp & 1) && (*p_arg)->alignment > 8)
-	    {
-	      tmp++;
-	      argp += sizeof(long long);
-	    }
-	  memcpy(argp, *p_argv, z);
-	  z = 2 * sizeof(long long);
-	  tmp += 2;
-	}
-      p_argv++;
-      argp += z;
-    }
-
-  return ret;
-}
-
 /* Perform machine dependent cif processing */
 ffi_status FFI_HIDDEN
 ffi_prep_cif_machdep(ffi_cif *cif)
 {
-  int wordsize;
-
-  if (cif->abi != FFI_V9)
-    {
-      wordsize = 4;
-
-      /* If we are returning a struct, this will already have been added.
-	 Otherwise we need to add it because it's always got to be there! */
-
-      if (cif->rtype->type != FFI_TYPE_STRUCT)
-	cif->bytes += wordsize;
-
-      /* sparc call frames require that space is allocated for 6 args,
-	 even if they aren't used. Make that space if necessary. */
-  
-      if (cif->bytes < 4*6+4)
-	cif->bytes = 4*6+4;
-    }
-  else
-    {
-      wordsize = 8;
-
-      /* sparc call frames require that space is allocated for 6 args,
-	 even if they aren't used. Make that space if necessary. */
-  
-      if (cif->bytes < 8*6)
-	cif->bytes = 8*6;
-    }
-
-  /* Adjust cif->bytes. to include 16 words for the window save area,
-     and maybe the struct/union return pointer area, */
-
-  cif->bytes += 16 * wordsize;
-
-  /* The stack must be 2 word aligned, so round bytes up
-     appropriately. */
-
-  cif->bytes = ALIGN(cif->bytes, 2 * wordsize);
+  ffi_type *rtype = cif->rtype;
+  int rtt = rtype->type;
+  size_t bytes;
+  int i, n, flags;
 
   /* Set the return type flag */
-  switch (cif->rtype->type)
+  switch (rtt)
     {
     case FFI_TYPE_VOID:
+      flags = SPARC_RET_VOID;
+      break;
     case FFI_TYPE_FLOAT:
+      flags = SPARC_RET_FLOAT;
+      break;
     case FFI_TYPE_DOUBLE:
-    case FFI_TYPE_LONGDOUBLE:
-      cif->flags = cif->rtype->type;
+      flags = SPARC_RET_DOUBLE;
       break;
-
+    case FFI_TYPE_LONGDOUBLE:
     case FFI_TYPE_STRUCT:
-      if (cif->abi == FFI_V9 && cif->rtype->size > 32)
-	cif->flags = FFI_TYPE_VOID;
-      else
-	cif->flags = FFI_TYPE_STRUCT;
+      flags = SPARC_RET_STRUCT;
       break;
-
     case FFI_TYPE_SINT8:
+      flags = SPARC_RET_SINT8;
+      break;
     case FFI_TYPE_UINT8:
+      flags = SPARC_RET_UINT8;
+      break;
     case FFI_TYPE_SINT16:
+      flags = SPARC_RET_SINT16;
+      break;
     case FFI_TYPE_UINT16:
-      if (cif->abi == FFI_V9)
-	cif->flags = FFI_TYPE_INT;
-      else
-	cif->flags = cif->rtype->type;
+      flags = SPARC_RET_UINT16;
+      break;
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_POINTER:
+      flags = SPARC_RET_UINT32;
       break;
-
     case FFI_TYPE_SINT64:
     case FFI_TYPE_UINT64:
-      if (cif->abi == FFI_V9)
-	cif->flags = FFI_TYPE_INT;
-      else
-	cif->flags = FFI_TYPE_SINT64;
+      flags = SPARC_RET_INT64;
       break;
-
     default:
-      cif->flags = FFI_TYPE_INT;
-      break;
+      abort();
     }
+  cif->flags = flags;
+
+  bytes = 0;
+  for (i = 0, n = cif->nargs; i < n; ++i)
+    {
+      ffi_type *ty = cif->arg_types[i];
+      size_t z = ty->size;
+      int tt = ty->type;
+
+      if (tt == FFI_TYPE_STRUCT || tt == FFI_TYPE_LONGDOUBLE)
+	/* Passed by reference.  */
+	z = 4;
+      else
+	z = ALIGN(z, 4);
+      bytes += z;
+    }
+
+  /* Sparc call frames require that space is allocated for 6 args,
+     even if they aren't used. Make that space if necessary.  */
+  if (bytes < 6 * 4)
+    bytes = 6 * 4;
+
+  /* The ABI always requires space for the struct return pointer.  */
+  bytes += 4;
+
+  /* The stack must be 2 word aligned, so round bytes up appropriately. */
+  bytes = ALIGN(bytes, 2 * 4);
+
+  /* Include the call frame to prep_args.  */
+  bytes += 4*16 + 4*8;
+  cif->bytes = bytes;
+
   return FFI_OK;
 }
 
-static int
-ffi_v9_layout_struct(ffi_type *arg, int off, char *ret, char *intg, char *flt)
+extern void ffi_call_v8(ffi_cif *cif, void (*fn)(void), void *rvalue,
+			void **avalue, size_t bytes) FFI_HIDDEN;
+
+int FFI_HIDDEN
+ffi_prep_args_v8(ffi_cif *cif, unsigned long *argp, void *rvalue, void **avalue)
 {
-  ffi_type **ptr = &arg->elements[0];
+  ffi_type **p_arg;
+  int flags = cif->flags;
+  int i, nargs;
 
-  while (*ptr != NULL)
+  if (rvalue == NULL)
     {
-      if (off & ((*ptr)->alignment - 1))
-	off = ALIGN(off, (*ptr)->alignment);
+      if (flags == SPARC_RET_STRUCT)
+	{
+	  /* Since we pass the pointer to the callee, we need a value.
+	     We allowed for this space in ffi_call, before ffi_call_v8
+	     alloca'd the space.  */
+	  rvalue = (char *)argp + cif->bytes;
+	}
+      else
+	{
+	  /* Otherwise, we can ignore the return value.  */
+	  flags = SPARC_RET_VOID;
+	}
+    }
+
+  /* This could only really be done when we are returning a structure.
+     However, the space is reserved so we can do it unconditionally.  */
+  *argp++ = (unsigned long)rvalue;
 
-      switch ((*ptr)->type)
+#ifdef USING_PURIFY
+  /* Purify will probably complain in our assembly routine,
+     unless we zero out this memory. */
+  memset(argp, 0, 6*4);
+#endif
+
+  p_arg = cif->arg_types;
+  for (i = 0, nargs = cif->nargs; i < nargs; i++)
+    {
+      ffi_type *ty = p_arg[i];
+      void *a = avalue[i];
+
+      switch (ty->type)
 	{
 	case FFI_TYPE_STRUCT:
-	  off = ffi_v9_layout_struct(*ptr, off, ret, intg, flt);
-	  off = ALIGN(off, FFI_SIZEOF_ARG);
+	case FFI_TYPE_LONGDOUBLE:
+	  *argp++ = (unsigned long)a;
 	  break;
-	case FFI_TYPE_FLOAT:
+
 	case FFI_TYPE_DOUBLE:
-	case FFI_TYPE_LONGDOUBLE:
-	  memmove(ret + off, flt + off, (*ptr)->size);
-	  off += (*ptr)->size;
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	  memcpy(argp, a, 8);
+	  argp += 2;
 	  break;
-	default:
-	  memmove(ret + off, intg + off, (*ptr)->size);
-	  off += (*ptr)->size;
+
+	case FFI_TYPE_INT:
+	case FFI_TYPE_FLOAT:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_POINTER:
+	  *argp++ = *(unsigned *)a;
 	  break;
-	}
-      ptr++;
-    }
-  return off;
-}
 
+	case FFI_TYPE_UINT8:
+	  *argp++ = *(UINT8 *)a;
+	  break;
+	case FFI_TYPE_SINT8:
+	  *argp++ = *(SINT8 *)a;
+	  break;
+	case FFI_TYPE_UINT16:
+	  *argp++ = *(UINT16 *)a;
+	  break;
+	case FFI_TYPE_SINT16:
+	  *argp++ = *(SINT16 *)a;
+	  break;
 
-#ifdef SPARC64
-extern int ffi_call_v9(void *, extended_cif *, unsigned, 
-		       unsigned, unsigned *, void (*fn)(void)) FFI_HIDDEN;
-#else
-extern int ffi_call_v8(void *, extended_cif *, unsigned, 
-		       unsigned, unsigned *, void (*fn)(void)) FFI_HIDDEN;
-#endif
+	default:
+	  abort();
+	}
+    }
 
-#ifndef __GNUC__
-void ffi_flush_icache (void *, size_t) FFI_HIDDEN;
-#endif
+  return flags;
+}
 
-void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
 {
-  extended_cif ecif;
-  void *rval = rvalue;
+  size_t bytes = cif->bytes;
 
-  ecif.cif = cif;
-  ecif.avalue = avalue;
+  FFI_ASSERT (cif->abi == FFI_V8);
 
-  /* If the return value is a struct and we don't have a return	*/
-  /* value address then we need to make one		        */
+  /* If we've not got a return value, we need to create one if we've
+     got to pass the return value to the callee.  Otherwise ignore it.  */
+  if (rvalue == NULL && cif->flags == SPARC_RET_STRUCT)
+    bytes += ALIGN (cif->rtype->size, 8);
 
-  ecif.rvalue = rvalue;
-  if (cif->rtype->type == FFI_TYPE_STRUCT)
-    {
-      if (cif->rtype->size <= 32)
-	rval = alloca(64);
-      else
-	{
-	  rval = NULL;
-	  if (rvalue == NULL)
-	    ecif.rvalue = alloca(cif->rtype->size);
-	}
-    }
+  ffi_call_v8(cif, fn, rvalue, avalue, -bytes);
+}
 
-  switch (cif->abi) 
-    {
-    case FFI_V8:
-#ifdef SPARC64
-      /* We don't yet support calling 32bit code from 64bit */
-      FFI_ASSERT(0);
-#else
-      if (rvalue && (cif->rtype->type == FFI_TYPE_STRUCT
-	  || cif->flags == FFI_TYPE_LONGDOUBLE))
-	{
-	  /* For v8, we need an "unimp" with size of returning struct */
-	  /* behind "call", so we alloc some executable space for it. */
-	  /* l7 is used, we need to make sure v8.S doesn't use %l7.   */
-	  unsigned int *call_struct = NULL;
-	  ffi_closure_alloc(32, (void **)&call_struct);
-	  if (call_struct)
-	    {
-	      unsigned long f = (unsigned long)fn;
-	      call_struct[0] = 0xae10001f;		 /* mov   %i7, %l7	 */
-	      call_struct[1] = 0xbe10000f;		 /* mov   %o7, %i7	 */
-	      call_struct[2] = 0x03000000 | f >> 10;     /* sethi %hi(fn), %g1	 */
-	      call_struct[3] = 0x9fc06000 | (f & 0x3ff); /* jmp %g1+%lo(fn), %o7 */
-	      call_struct[4] = 0x01000000;		 /* nop			 */
-	      if (cif->rtype->size < 0x7f)
-		call_struct[5] = cif->rtype->size;	 /* unimp		 */
-	      else
-		call_struct[5] = 0x01000000;	     	 /* nop			 */
-	      call_struct[6] = 0x81c7e008;		 /* ret			 */
-	      call_struct[7] = 0xbe100017;		 /* mov   %l7, %i7	 */
 #ifdef __GNUC__
-	      asm volatile ("iflush %0; iflush %0+8; iflush %0+16; iflush %0+24" : :
-			    "r" (call_struct) : "memory");
-	      /* SPARC v8 requires 5 instructions for flush to be visible */
-	      asm volatile ("nop; nop; nop; nop; nop");
-#else
-	      ffi_flush_icache (call_struct, 32);
-#endif
-	      ffi_call_v8(ffi_prep_args_v8, &ecif, cif->bytes,
-			  cif->flags, rvalue, call_struct);
-	      ffi_closure_free(call_struct);
-	    }
-	  else
-	    {
-	      ffi_call_v8(ffi_prep_args_v8, &ecif, cif->bytes,
-			  cif->flags, rvalue, fn);
-	    }
-	}
-      else
-	{
-	  ffi_call_v8(ffi_prep_args_v8, &ecif, cif->bytes,
-		      cif->flags, rvalue, fn);
-	}
-#endif
-      break;
-    case FFI_V9:
-#ifdef SPARC64
-      ffi_call_v9(ffi_prep_args_v9, &ecif, cif->bytes,
-		  cif->flags, rval, fn);
-      if (rvalue && rval && cif->rtype->type == FFI_TYPE_STRUCT)
-	ffi_v9_layout_struct(cif->rtype, 0, (char *)rvalue, (char *)rval, ((char *)rval)+32);
+static inline void
+ffi_flush_icache (void *p)
+{
+  /* SPARC v8 requires 5 instructions for flush to be visible */
+  asm volatile ("iflush	%0; iflush %0+8; nop; nop; nop; nop; nop"
+		: : "r" (p) : "memory");
+}
 #else
-      /* And vice versa */
-      FFI_ASSERT(0);
+extern void ffi_flush_icache (void *) FFI_HIDDEN;
 #endif
-      break;
-    default:
-      FFI_ASSERT(0);
-      break;
-    }
-}
-
 
-#ifdef SPARC64
-extern void ffi_closure_v9(void) FFI_HIDDEN;
-#else
 extern void ffi_closure_v8(void) FFI_HIDDEN;
-#endif
 
 ffi_status
-ffi_prep_closure_loc (ffi_closure* closure,
-		      ffi_cif* cif,
+ffi_prep_closure_loc (ffi_closure *closure,
+		      ffi_cif *cif,
 		      void (*fun)(ffi_cif*, void*, void**, void*),
 		      void *user_data,
 		      void *codeloc)
 {
   unsigned int *tramp = (unsigned int *) &closure->tramp[0];
-  unsigned long fn;
-#ifdef SPARC64
-  /* Trampoline address is equal to the closure address.  We take advantage
-     of that to reduce the trampoline size by 8 bytes. */
-  if (cif->abi != FFI_V9)
-    return FFI_BAD_ABI;
-  fn = (unsigned long) ffi_closure_v9;
-  tramp[0] = 0x83414000;	/* rd	%pc, %g1	*/
-  tramp[1] = 0xca586010;	/* ldx	[%g1+16], %g5	*/
-  tramp[2] = 0x81c14000;	/* jmp	%g5		*/
-  tramp[3] = 0x01000000;	/* nop			*/
-  *((unsigned long *) &tramp[4]) = fn;
-#else
-  unsigned long ctx = (unsigned long) codeloc;
+  unsigned long ctx = (unsigned long) closure;
+  unsigned long fn = (unsigned long) ffi_closure_v8;
+
   if (cif->abi != FFI_V8)
     return FFI_BAD_ABI;
-  fn = (unsigned long) ffi_closure_v8;
+
   tramp[0] = 0x03000000 | fn >> 10;	/* sethi %hi(fn), %g1	*/
   tramp[1] = 0x05000000 | ctx >> 10;	/* sethi %hi(ctx), %g2	*/
   tramp[2] = 0x81c06000 | (fn & 0x3ff);	/* jmp   %g1+%lo(fn)	*/
   tramp[3] = 0x8410a000 | (ctx & 0x3ff);/* or    %g2, %lo(ctx)	*/
-#endif
 
   closure->cif = cif;
   closure->fun = fun;
   closure->user_data = user_data;
 
-  /* Flush the Icache.  closure is 8 bytes aligned.  */
-#ifdef __GNUC__
-#ifdef SPARC64
-  asm volatile ("flush	%0; flush %0+8" : : "r" (closure) : "memory");
-#else
-  asm volatile ("iflush	%0; iflush %0+8" : : "r" (closure) : "memory");
-  /* SPARC v8 requires 5 instructions for flush to be visible */
-  asm volatile ("nop; nop; nop; nop; nop");
-#endif
-#else
-  ffi_flush_icache (closure, 16);
-#endif
+  ffi_flush_icache (closure);
 
   return FFI_OK;
 }
 
 int FFI_HIDDEN
 ffi_closure_sparc_inner_v8(ffi_closure *closure, void *rvalue,
-			   unsigned long *gpr, unsigned long *scratch)
+			   unsigned long *argp)
 {
   ffi_cif *cif;
   ffi_type **arg_types;
   void **avalue;
-  int i, argn;
+  int i, nargs, flags;
 
   cif = closure->cif;
   arg_types = cif->arg_types;
-  avalue = alloca(cif->nargs * sizeof(void *));
+  nargs = cif->nargs;
+  flags = cif->flags;
+  avalue = alloca(nargs * sizeof(void *));
 
   /* Copy the caller's structure return address so that the closure
      returns the data directly to the caller.  */
-  if (cif->flags == FFI_TYPE_STRUCT || cif->flags == FFI_TYPE_LONGDOUBLE)
-    rvalue = (void *) gpr[0];
+  if (flags == SPARC_RET_STRUCT)
+    rvalue = (void *)*argp;
 
   /* Always skip the structure return address.  */
-  argn = 1;
+  argp++;
 
   /* Grab the addresses of the arguments from the stack frame.  */
-  for (i = 0; i < cif->nargs; i++)
+  for (i = 0; i < nargs; i++)
     {
-      if (arg_types[i]->type == FFI_TYPE_STRUCT
-	  || arg_types[i]->type == FFI_TYPE_LONGDOUBLE)
+      ffi_type *ty = arg_types[i];
+      int tt = ty->type;
+      void *a = argp;
+
+      switch (tt)
 	{
+	case FFI_TYPE_STRUCT:
+	case FFI_TYPE_LONGDOUBLE:
 	  /* Straight copy of invisible reference.  */
-	  avalue[i] = (void *)gpr[argn++];
-	}
-      else if ((arg_types[i]->type == FFI_TYPE_DOUBLE
-	       || arg_types[i]->type == FFI_TYPE_SINT64
-	       || arg_types[i]->type == FFI_TYPE_UINT64)
-	       /* gpr is 8-byte aligned.  */
-	       && (argn % 2) != 0)
-	{
-	  /* Align on a 8-byte boundary.  */
-	  scratch[0] = gpr[argn];
-	  scratch[1] = gpr[argn+1];
-	  avalue[i] = scratch;
-	  scratch -= 2;
-	  argn += 2;
-	}
-      else
-	{
-	  /* Always right-justify.  */
-	  argn += ALIGN(arg_types[i]->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
-	  avalue[i] = ((char *) &gpr[argn]) - arg_types[i]->size;
-	}
-    }
-
-  /* Invoke the closure.  */
-  (closure->fun) (cif, rvalue, avalue, closure->user_data);
-
-  /* Tell ffi_closure_sparc how to perform return type promotions.  */
-  return cif->rtype->type;
-}
-
-int FFI_HIDDEN
-ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
-			   unsigned long *gpr, double *fpr)
-{
-  ffi_cif *cif;
-  ffi_type **arg_types;
-  void **avalue;
-  int i, argn, fp_slot_max;
-
-  cif = closure->cif;
-  arg_types = cif->arg_types;
-  avalue = alloca(cif->nargs * sizeof(void *));
-
-  /* Copy the caller's structure return address so that the closure
-     returns the data directly to the caller.  */
-  if (cif->flags == FFI_TYPE_VOID
-      && cif->rtype->type == FFI_TYPE_STRUCT)
-    {
-      rvalue = (void *) gpr[0];
-      /* Skip the structure return address.  */
-      argn = 1;
-    }
-  else
-    argn = 0;
-
-  fp_slot_max = 16 - argn;
+	  a = (void *)*argp;
+	  break;
 
-  /* Grab the addresses of the arguments from the stack frame.  */
-  for (i = 0; i < cif->nargs; i++)
-    {
-      if (arg_types[i]->type == FFI_TYPE_STRUCT)
-	{
-	  if (arg_types[i]->size > 16)
-	    {
-	      /* Straight copy of invisible reference.  */
-	      avalue[i] = (void *)gpr[argn++];
-	    }
-	  else
+	case FFI_TYPE_DOUBLE:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+	  if ((unsigned long)a & 7)
 	    {
-	      /* Left-justify.  */
-	      ffi_v9_layout_struct(arg_types[i],
-				   0,
-				   (char *) &gpr[argn],
-				   (char *) &gpr[argn],
-				   (char *) &fpr[argn]);
-	      avalue[i] = &gpr[argn];
-	      argn += ALIGN(arg_types[i]->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
+	      /* Align on a 8-byte boundary.  */
+	      UINT64 *tmp = alloca(8);
+	      *tmp = ((UINT64)argp[0] << 32) | argp[1];
+	      a = tmp;
 	    }
+	  argp++;
+	  break;
+
+	case FFI_TYPE_INT:
+	case FFI_TYPE_FLOAT:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_POINTER:
+	  break;
+        case FFI_TYPE_UINT16:
+        case FFI_TYPE_SINT16:
+	  a += 2;
+	  break;
+        case FFI_TYPE_UINT8:
+        case FFI_TYPE_SINT8:
+	  a += 3;
+	  break;
+
+	default:
+	  abort();
 	}
-      else
-	{
-	  /* Right-justify.  */
-	  argn += ALIGN(arg_types[i]->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
-
-	  /* Align on a 16-byte boundary.  */
-	  if (arg_types[i]->type == FFI_TYPE_LONGDOUBLE && (argn % 2) != 0)
-	    argn++;
-	  if (i < fp_slot_max
-	      && (arg_types[i]->type == FFI_TYPE_FLOAT
-		  || arg_types[i]->type == FFI_TYPE_DOUBLE
-		  || arg_types[i]->type == FFI_TYPE_LONGDOUBLE))
-	    avalue[i] = ((char *) &fpr[argn]) - arg_types[i]->size;
-	  else
-	    avalue[i] = ((char *) &gpr[argn]) - arg_types[i]->size;
-	}
+      argp++;
+      avalue[i] = a;
     }
 
   /* Invoke the closure.  */
   (closure->fun) (cif, rvalue, avalue, closure->user_data);
 
   /* Tell ffi_closure_sparc how to perform return type promotions.  */
-  return cif->rtype->type;
+  return flags;
 }
+#endif /* !SPARC64 */
diff --git a/src/sparc/ffi64.c b/src/sparc/ffi64.c
new file mode 100644
index 0000000..7ed928d
--- /dev/null
+++ b/src/sparc/ffi64.c
@@ -0,0 +1,433 @@
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 2011, 2013 Anthony Green
+           Copyright (c) 1996, 2003-2004, 2007-2008 Red Hat, Inc.
+
+   SPARC Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+#include "internal.h"
+
+/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
+   all further uses in this file will refer to the 128-bit type.  */
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+# if FFI_TYPE_LONGDOUBLE != 4
+#  error FFI_TYPE_LONGDOUBLE out of date
+# endif
+#else
+# undef FFI_TYPE_LONGDOUBLE
+# define FFI_TYPE_LONGDOUBLE 4
+#endif
+
+#ifdef SPARC64
+/* Perform machine dependent cif processing */
+
+int FFI_HIDDEN
+ffi_v9_layout_struct (ffi_type *arg, int off, void *d, void *si, void *sf)
+{
+  ffi_type **elts, *t;
+
+  for (elts = arg->elements; (t = *elts) != NULL; elts++)
+    {
+      size_t z = t->size;
+      void *src = si;
+
+      off = ALIGN(off, t->alignment);
+      switch (t->type)
+	{
+	case FFI_TYPE_STRUCT:
+	  off = ffi_v9_layout_struct(t, off, d, si, sf);
+	  off = ALIGN(off, FFI_SIZEOF_ARG);
+	  continue;
+	case FFI_TYPE_FLOAT:
+	case FFI_TYPE_DOUBLE:
+	case FFI_TYPE_LONGDOUBLE:
+	  /* Note that closures start with the argument offset,
+	     so that we know when to stop looking at fp regs.  */
+	  if (off < 128)
+	    src = sf;
+	  break;
+	}
+      memcpy(d + off, src + off, z);
+      off += z;
+    }
+
+  return off;
+}
+
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep(ffi_cif *cif)
+{
+  ffi_type *rtype = cif->rtype;
+  int rtt = rtype->type;
+  size_t bytes = 0;
+  int i, n, flags;
+
+  /* Set the return type flag */
+  switch (rtt)
+    {
+    case FFI_TYPE_VOID:
+      flags = SPARC_RET_VOID;
+      break;
+    case FFI_TYPE_FLOAT:
+      flags = SPARC_RET_FLOAT;
+      break;
+    case FFI_TYPE_DOUBLE:
+      flags = SPARC_RET_DOUBLE;
+      break;
+    case FFI_TYPE_LONGDOUBLE:
+      flags = SPARC_RET_LDOUBLE;
+      break;
+
+    case FFI_TYPE_STRUCT:
+      if (rtype->size > 32)
+	{
+	  flags = SPARC_RET_VOID | SPARC_FLAG_RET_IN_MEM;
+	  bytes = 8;
+	}
+      else
+	flags = SPARC_RET_STRUCT;
+      break;
+
+    case FFI_TYPE_SINT8:
+      flags = SPARC_RET_SINT8;
+      break;
+    case FFI_TYPE_UINT8:
+      flags = SPARC_RET_UINT8;
+      break;
+    case FFI_TYPE_SINT16:
+      flags = SPARC_RET_SINT16;
+      break;
+    case FFI_TYPE_UINT16:
+      flags = SPARC_RET_UINT16;
+      break;
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT32:
+      flags = SPARC_RET_SINT32;
+      break;
+    case FFI_TYPE_UINT32:
+      flags = SPARC_RET_UINT32;
+      break;
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_POINTER:
+      flags = SPARC_RET_INT64;
+      break;
+
+    default:
+      abort();
+    }
+
+  bytes = 0;
+  for (i = 0, n = cif->nargs; i < n; ++i)
+    {
+      ffi_type *ty = cif->arg_types[i];
+      size_t z = ty->size;
+      size_t a = ty->alignment;
+
+      switch (ty->type)
+	{
+	case FFI_TYPE_STRUCT:
+	  /* Large structs passed by reference.  */
+	  if (z > 16)
+	    {
+	      a = z = 8;
+	      break;
+	    }
+	  /* ??? FALLTHRU -- check for fp members in the struct.  */
+	case FFI_TYPE_FLOAT:
+	case FFI_TYPE_DOUBLE:
+	case FFI_TYPE_LONGDOUBLE:
+	  flags |= SPARC_FLAG_FP_ARGS;
+	  break;
+	}
+      bytes = ALIGN(bytes, a);
+      bytes += ALIGN(z, 8);
+    }
+
+  /* Sparc call frames require that space is allocated for 6 args,
+     even if they aren't used. Make that space if necessary. */
+  if (bytes < 6 * 8)
+    bytes = 6 * 8;
+
+  /* The stack must be 2 word aligned, so round bytes up appropriately. */
+  bytes = ALIGN(bytes, 16);
+
+  /* Include the call frame to prep_args.  */
+  bytes += 8*16 + 8*8;
+
+  cif->bytes = bytes;
+  cif->flags = flags;
+  return FFI_OK;
+}
+
+extern void ffi_call_v9(ffi_cif *cif, void (*fn)(void), void *rvalue,
+			void **avalue, size_t bytes) FFI_HIDDEN;
+
+/* ffi_prep_args is called by the assembly routine once stack space
+   has been allocated for the function's arguments */
+
+int FFI_HIDDEN
+ffi_prep_args_v9(ffi_cif *cif, unsigned long *argp, void *rvalue, void **avalue)
+{
+  ffi_type **p_arg;
+  int flags = cif->flags;
+  int i, nargs;
+
+  if (rvalue == NULL)
+    {
+      if (flags & SPARC_FLAG_RET_IN_MEM)
+	{
+	  /* Since we pass the pointer to the callee, we need a value.
+	     We allowed for this space in ffi_call, before ffi_call_v8
+	     alloca'd the space.  */
+	  rvalue = (char *)argp + cif->bytes;
+	}
+      else
+	{
+	  /* Otherwise, we can ignore the return value.  */
+	  flags = SPARC_RET_VOID;
+	}
+    }
+
+#ifdef USING_PURIFY
+  /* Purify will probably complain in our assembly routine,
+     unless we zero out this memory. */
+  memset(argp, 0, 6*8);
+#endif
+
+  if (flags & SPARC_FLAG_RET_IN_MEM)
+    *argp++ = (unsigned long)rvalue;
+
+  p_arg = cif->arg_types;
+  for (i = 0, nargs = cif->nargs; i < nargs; i++)
+    {
+      ffi_type *ty = p_arg[i];
+      void *a = avalue[i];
+      size_t z;
+
+      switch (ty->type)
+	{
+	case FFI_TYPE_SINT8:
+	  *argp++ = *(SINT8 *)a;
+	  break;
+	case FFI_TYPE_UINT8:
+	  *argp++ = *(UINT8 *)a;
+	  break;
+	case FFI_TYPE_SINT16:
+	  *argp++ = *(SINT16 *)a;
+	  break;
+	case FFI_TYPE_UINT16:
+	  *argp++ = *(UINT16 *)a;
+	  break;
+	case FFI_TYPE_INT:
+	case FFI_TYPE_SINT32:
+	  *argp++ = *(SINT32 *)a;
+	  break;
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_FLOAT:
+	  *argp++ = *(UINT32 *)a;
+	  break;
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_POINTER:
+	case FFI_TYPE_DOUBLE:
+	  *argp++ = *(UINT64 *)a;
+	  break;
+
+	case FFI_TYPE_LONGDOUBLE:
+	case FFI_TYPE_STRUCT:
+	  z = ty->size;
+	  if (z > 16)
+	    {
+	      /* For structures larger than 16 bytes we pass reference.  */
+	      *argp++ = (unsigned long)a;
+	      break;
+	    }
+	  if (((unsigned long)argp & 15) && ty->alignment > 8)
+	    argp++;
+	  memcpy(argp, a, z);
+	  argp += ALIGN(z, 8) / 8;
+	  break;
+
+	default:
+	  abort();
+	}
+    }
+
+  return flags;
+}
+
+void
+ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  size_t bytes = cif->bytes;
+
+  FFI_ASSERT (cif->abi == FFI_V9);
+
+  if (rvalue == NULL && (cif->flags & SPARC_FLAG_RET_IN_MEM))
+    bytes += ALIGN (cif->rtype->size, 16);
+
+  ffi_call_v9(cif, fn, rvalue, avalue, -bytes);
+}
+
+#ifdef __GNUC__
+static inline void
+ffi_flush_icache (void *p)
+{
+  asm volatile ("flush	%0; flush %0+8" : : "r" (p) : "memory");
+}
+#else
+extern void ffi_flush_icache (void *) FFI_HIDDEN;
+#endif
+
+extern void ffi_closure_v9(void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+		      ffi_cif* cif,
+		      void (*fun)(ffi_cif*, void*, void**, void*),
+		      void *user_data,
+		      void *codeloc)
+{
+  unsigned int *tramp = (unsigned int *) &closure->tramp[0];
+  unsigned long fn;
+
+  if (cif->abi != FFI_V9)
+    return FFI_BAD_ABI;
+
+  /* Trampoline address is equal to the closure address.  We take advantage
+     of that to reduce the trampoline size by 8 bytes. */
+  fn = (unsigned long) ffi_closure_v9;
+  tramp[0] = 0x83414000;	/* rd	%pc, %g1	*/
+  tramp[1] = 0xca586010;	/* ldx	[%g1+16], %g5	*/
+  tramp[2] = 0x81c14000;	/* jmp	%g5		*/
+  tramp[3] = 0x01000000;	/* nop			*/
+  *((unsigned long *) &tramp[4]) = fn;
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  ffi_flush_icache (closure);
+
+  return FFI_OK;
+}
+
+int FFI_HIDDEN
+ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
+			   unsigned long *gpr, unsigned long *fpr)
+{
+  ffi_cif *cif;
+  ffi_type **arg_types;
+  void **avalue;
+  int i, argn, nargs, flags;
+
+  cif = closure->cif;
+  arg_types = cif->arg_types;
+  nargs = cif->nargs;
+  flags = cif->flags;
+
+  avalue = alloca(nargs * sizeof(void *));
+
+  /* Copy the caller's structure return address so that the closure
+     returns the data directly to the caller.  */
+  if (flags & SPARC_FLAG_RET_IN_MEM)
+    {
+      rvalue = (void *) gpr[0];
+      /* Skip the structure return address.  */
+      argn = 1;
+    }
+  else
+    argn = 0;
+
+  /* Grab the addresses of the arguments from the stack frame.  */
+  for (i = 0; i < nargs; i++)
+    {
+      ffi_type *ty = arg_types[i];
+      void *a = &gpr[argn++];
+      size_t z;
+
+      switch (ty->type)
+	{
+	case FFI_TYPE_STRUCT:
+	  z = ty->size;
+	  if (z > 16)
+	    a = *(void **)a;
+	  else
+	    {
+	      if (--argn < 16)
+	        ffi_v9_layout_struct(arg_types[i], 8*argn, gpr, gpr, fpr);
+	      argn += ALIGN (z, 8) / 8;
+	    }
+	  break;
+
+	case FFI_TYPE_LONGDOUBLE:
+	  if (--argn & 1)
+	    argn++;
+	  a = (argn < 16 ? fpr : gpr) + argn;
+	  argn += 2;
+	  break;
+	case FFI_TYPE_DOUBLE:
+	  if (argn <= 16)
+	    a = fpr + argn - 1;
+	  break;
+	case FFI_TYPE_FLOAT:
+	  if (argn <= 16)
+	    a = fpr + argn - 1;
+	  a += 4;
+	  break;
+
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_POINTER:
+	  break;
+	case FFI_TYPE_INT:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	  a += 4;
+	  break;
+        case FFI_TYPE_UINT16:
+        case FFI_TYPE_SINT16:
+	  a += 6;
+	  break;
+        case FFI_TYPE_UINT8:
+        case FFI_TYPE_SINT8:
+	  a += 7;
+	  break;
+
+	default:
+	  abort();
+	}
+      avalue[i] = a;
+    }
+
+  /* Invoke the closure.  */
+  (closure->fun) (cif, rvalue, avalue, closure->user_data);
+
+  /* Tell ffi_closure_sparc how to perform return type promotions.  */
+  return flags;
+}
+#endif /* SPARC64 */
diff --git a/src/sparc/ffitarget.h b/src/sparc/ffitarget.h
index d89f787..ff4dc0b 100644
--- a/src/sparc/ffitarget.h
+++ b/src/sparc/ffitarget.h
@@ -46,18 +46,19 @@ typedef signed long            ffi_sarg;
 
 typedef enum ffi_abi {
   FFI_FIRST_ABI = 0,
-  FFI_V8,
-  FFI_V8PLUS,
-  FFI_V9,
-  FFI_LAST_ABI,
 #ifdef SPARC64
-  FFI_DEFAULT_ABI = FFI_V9
+  FFI_V9,
+  FFI_DEFAULT_ABI = FFI_V9,
 #else
-  FFI_DEFAULT_ABI = FFI_V8
+  FFI_V8,
+  FFI_DEFAULT_ABI = FFI_V8,
 #endif
+  FFI_LAST_ABI
 } ffi_abi;
 #endif
 
+#define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
+
 /* ---- Definitions for closures ----------------------------------------- */
 
 #define FFI_CLOSURES 1
diff --git a/src/sparc/internal.h b/src/sparc/internal.h
new file mode 100644
index 0000000..df7c305
--- /dev/null
+++ b/src/sparc/internal.h
@@ -0,0 +1,18 @@
+#define SPARC_RET_VOID		0
+#define SPARC_RET_STRUCT	1
+#define SPARC_RET_FLOAT		2
+#define SPARC_RET_DOUBLE	3
+#define SPARC_RET_UINT8		4
+#define SPARC_RET_SINT8		5
+#define SPARC_RET_UINT16	6
+#define SPARC_RET_SINT16	7
+#define SPARC_RET_INT64		8
+#define SPARC_RET_UINT32	9
+
+/* These two are only used for V9.  */
+#define SPARC_RET_SINT32	10
+#define SPARC_RET_LDOUBLE	11
+
+#define SPARC_FLAG_RET_MASK	15
+#define SPARC_FLAG_RET_IN_MEM	32
+#define SPARC_FLAG_FP_ARGS	64
diff --git a/src/sparc/v8.S b/src/sparc/v8.S
index 10c66ba..b0d50a3 100644
--- a/src/sparc/v8.S
+++ b/src/sparc/v8.S
@@ -29,9 +29,9 @@
 #include <fficonfig.h>
 #include <ffi.h>
 #include <ffi_cfi.h>
+#include "internal.h"
 
-#define STACKFRAME 96		/* Minimum stack framesize for SPARC */
-#define ARGS (64+4)		/* Offset of register area in frame */
+#ifndef SPARC64
 
 #define C2(X, Y)  X ## Y
 #define C1(X, Y)  C2(X, Y)
@@ -53,26 +53,24 @@
 
 C(ffi_flush_icache):
 	cfi_startproc
-        add %o0, %o1, %o2
-#ifdef SPARC64	
-1:	flush %o0
-#else	
 1:	iflush %o0
-#endif
-	add %o0, 8, %o0
-	cmp %o0, %o2
-	blt 1b
+	iflush %o+8
 	nop
 	nop
 	nop
 	nop
 	nop
 	retl
-	nop
+	 nop
 	cfi_endproc
 	.size	C(ffi_flush_icache), . - C(ffi_flush_icache)
 #endif
 
+.macro E index
+	.align	16
+	.org	2b + \index * 16
+.endm
+
         .align 8
 	.globl	C(ffi_call_v8)
 	.type	C(ffi_call_v8),@function
@@ -80,104 +78,104 @@ C(ffi_flush_icache):
 	
 C(ffi_call_v8):
 	cfi_startproc
-	save	%sp, -STACKFRAME, %sp
+	! Allocate a stack frame sized by ffi_call.
+	save	%sp, %o4, %sp
 	cfi_def_cfa_register(%fp)
 	cfi_window_save
-	
-	sub	%sp, %i2, %sp	! alloca() space in stack for frame to set up
-	add	%sp, STACKFRAME, %l0	! %l0 has start of 
-					! frame to set up
-
-	mov	%l0, %o0	! call routine to set up frame
-	call	%i0
-	mov	%i1, %o1	! (delay)
-
-	ld	[%l0+ARGS], %o0	! call foreign function
-	ld	[%l0+ARGS+4], %o1
-	ld	[%l0+ARGS+8], %o2
-	ld	[%l0+ARGS+12], %o3
-	ld	[%l0+ARGS+16], %o4
-	ld	[%l0+ARGS+20], %o5
-	call	%i5
-	mov	%l0, %sp	! (delay) switch to frame
-	nop			! STRUCT returning functions skip 12 instead of 8 bytes
-
-	! If the return value pointer is NULL, assume no return value.
-	tst	%i4
-	bz	L(done)
-	nop
-
-	cmp	%i3, FFI_TYPE_INT
-	be,a	L(done)
-	st	%o0, [%i4]	! (delay)
-
-	cmp	%i3, FFI_TYPE_FLOAT
-	be,a	L(done)
-	st	%f0, [%i4+0]	! (delay)
 
-	cmp	%i3, FFI_TYPE_DOUBLE
-	be,a	L(double)
-	st	%f0, [%i4+0]	! (delay)
-
-	cmp	%i3, FFI_TYPE_SINT8
-	be,a	L(sint8)
-	sll	%o0, 24, %o0	! (delay)
-
-	cmp	%i3, FFI_TYPE_UINT8
-	be,a	L(uint8)
-	sll	%o0, 24, %o0	! (delay)
-
-	cmp	%i3, FFI_TYPE_SINT16
-	be,a	L(sint16)
-	sll	%o0, 16, %o0	! (delay)
-
-	cmp	%i3, FFI_TYPE_UINT16
-	be,a	L(uint16)
-	sll	%o0, 16, %o0	! (delay)
-
-	cmp	%i3, FFI_TYPE_SINT64
-	be,a	L(longlong)
-	st	%o0, [%i4+0]	! (delay)
-
-L(done):
+	mov	%i0, %o0		! copy cif
+	add	%sp, 64+32, %o1		! load args area
+	mov	%i2, %o2		! copy rvalue
+	call	C(ffi_prep_args_v8)
+	 mov	%i3, %o3		! copy avalue
+
+	add	%sp, 32, %sp		! deallocate prep frame
+	and	%o0, SPARC_FLAG_RET_MASK, %l0	! save return type
+	ld	[%sp+64+4], %o0		! load all argument registers
+	ld	[%sp+64+8], %o1
+	ld	[%sp+64+12], %o2
+	ld	[%sp+64+16], %o3
+	cmp	%l0, SPARC_RET_STRUCT	! struct return needs an unimp 4
+	ld	[%sp+64+20], %o4
+	be	8f
+	 ld	[%sp+64+24], %o5
+
+	! Call foreign function
+	call	%i1
+	 nop
+
+0:	call	1f		! load pc in %o7
+	 sll	%l0, 4, %l0
+1:	add	%o7, %l0, %o7	! o7 = 0b + ret_type*16
+	jmp	%o7+(2f-0b)
+	 nop
+
+	! Note that each entry is 4 insns, enforced by the E macro.
+	.align	16
+2:
+E SPARC_RET_VOID
 	ret
-	restore
-
-L(double):
-	st	%f1, [%i4+4]
+	 restore
+E SPARC_RET_STRUCT
+	unimp
+E SPARC_RET_FLOAT
+	st	%f0, [%i2]
 	ret
-	restore
-
-L(sint8):
-	sra	%o0, 24, %o0
-	st	%o0, [%i4+0]
+	 restore
+E SPARC_RET_DOUBLE
+	std	%f0, [%i2]
 	ret
-	restore
-
-L(uint8):
-	srl	%o0, 24, %o0
-	st	%o0, [%i4+0]
+	 restore
+	nop
+E SPARC_RET_UINT8
+	and	%o0, 0xff, %o0
+	st	%o0, [%i2]
 	ret
-	restore
-
-L(sint16):
-	sra	%o0, 16, %o0
-	st	%o0, [%i4+0]
+	 restore
+E SPARC_RET_SINT8
+	sll	%o0, 24, %o0
+	b	7f
+	 sra	%o0, 24, %o0
+E SPARC_RET_UINT16
+	sll	%o0, 16, %o0
+	b	7f
+	 srl	%o0, 16, %o0
+E SPARC_RET_SINT16
+	sll	%o0, 16, %o0
+	b	7f
+	 sra	%o0, 16, %o0
+E SPARC_RET_INT64
+	std	%o0, [%i2]
 	ret
-	restore
-
-L(uint16):
-	srl	%o0, 16, %o0
-	st	%o0, [%i4+0]
+	 restore
+E SPARC_RET_UINT32
+7:	st	%o0, [%i2]
 	ret
-	restore
+	 restore
 
-L(longlong):
-	st	%o1, [%i4+4]
+	! Unused entries.  Don't allow bad data to do worse things.
+E 10
+	unimp
+E 11
+	unimp
+E 12
+	unimp
+E 13
+	unimp
+E 14
+	unimp
+E 15
+	unimp
+
+	! Struct returning functions expect and skip the unimp here.
+	.align	8
+8:	call	%i1
+	 nop
+	unimp	4
 	ret
-	restore
-	cfi_endproc
+	 restore
 
+	cfi_endproc
 	.size	C(ffi_call_v8),. - C(ffi_call_v8)
 
 
@@ -185,7 +183,8 @@ L(longlong):
 #define	STACKFRAME	104	/* 16*4 register window +
 				   1*4 struct return +	
 				   6*4 args backing store +
-				   3*4 locals */
+				   2*4 return storage +
+				   1*4 alignment */
 
 /* ffi_closure_v8(...)
 
@@ -201,15 +200,7 @@ C(ffi_closure_v8):
 		.register	%g2, #scratch
 #endif
 	cfi_startproc
-	! Reserve frame space for all arguments in case
-	! we need to align them on a 8-byte boundary.
-	ld	[%g2+FFI_TRAMPOLINE_SIZE], %g1
-	ld	[%g1+4], %g1
-	sll	%g1, 3, %g1
-	add	%g1, STACKFRAME, %g1
-	! %g1 == STACKFRAME + 8*nargs
-	neg	%g1
-	save	%sp, %g1, %sp
+	save	%sp, -STACKFRAME, %sp
 	cfi_def_cfa_register(%fp)
 	cfi_window_save
 
@@ -224,55 +215,75 @@ C(ffi_closure_v8):
 	! Call ffi_closure_sparc_inner to do the bulk of the work.
 	mov	%g2, %o0
 	add	%fp, -8, %o1
-	add	%fp,  64, %o2
 	call	ffi_closure_sparc_inner_v8
-	 add	%fp, -16, %o3
-
-	! Load up the return value in the proper type.
-	! See ffi_prep_cif_machdep for the list of cases.
-	cmp	%o0, FFI_TYPE_VOID
-	be	L(done1)
-
-	cmp	%o0, FFI_TYPE_INT
-	be	L(done1)
-	 ld	[%fp-8], %i0
-
-	cmp	%o0, FFI_TYPE_FLOAT
-	be,a	L(done1)
-	 ld	[%fp-8], %f0
-
-	cmp	%o0, FFI_TYPE_DOUBLE
-	be,a	L(done1)
-	 ldd	[%fp-8], %f0
-
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
-	cmp	%o0, FFI_TYPE_LONGDOUBLE
-	be	L(done2)
-#endif
-
-	cmp	%o0, FFI_TYPE_STRUCT
-	be	L(done2)
-
-	cmp	%o0, FFI_TYPE_SINT64
-	be,a	L(done1)
-	 ldd	[%fp-8], %i0
-
-	cmp	%o0, FFI_TYPE_UINT64
-	be,a	L(done1)
-	 ldd	[%fp-8], %i0
-
-	ld	[%fp-8], %i0
-L(done1):
-	jmp	%i7+8
+	 add	%fp,  64, %o2
+
+0:	call	1f
+	 and	%o0, SPARC_FLAG_RET_MASK, %o0
+1:	sll	%o0, 4, %o0	! o0 = o0 * 16
+	add	%o7, %o0, %o7	! o7 = 0b + o0*16
+	jmp	%o7+(2f-0b)
+	 nop
+
+	! Note that each entry is 4 insns, enforced by the E macro.
+	.align	16
+2:
+E SPARC_RET_VOID
+	ret
 	 restore
-L(done2):
-	! Skip 'unimp'.
+E SPARC_RET_STRUCT
 	jmp	%i7+12
 	 restore
+E SPARC_RET_FLOAT
+	ld	[%fp-8], %f0
+	ret
+	 restore
+E SPARC_RET_DOUBLE
+	ldd	[%fp-8], %f0
+	ret
+	 restore
+E SPARC_RET_UINT8
+	ldub	[%fp-8+3], %i0
+	ret
+	 restore
+E SPARC_RET_SINT8
+	ldsb	[%fp-8+3], %i0
+	ret
+	 restore
+E SPARC_RET_UINT16
+	lduh	[%fp-8+2], %i0
+	ret
+	 restore
+E SPARC_RET_SINT16
+	ldsh	[%fp-8+2], %i0
+	ret
+	 restore
+E SPARC_RET_INT64
+	ldd	[%fp-8], %i0
+	ret
+	 restore
+E SPARC_RET_UINT32
+	ld	[%fp-8], %i0
+	ret
+	 restore
+
+	! Unused entries.  Don't allow bad data to do worse things.
+E 10
+	unimp
+E 11
+	unimp
+E 12
+	unimp
+E 13
+	unimp
+E 14
+	unimp
+E 15
+	unimp
 
 	cfi_endproc
 	.size	C(ffi_closure_v8), . - C(ffi_closure_v8)
-
+#endif /* !SPARC64 */
 #if defined __ELF__ && defined __linux__
 	.section	.note.GNU-stack,"",@progbits
 #endif
diff --git a/src/sparc/v9.S b/src/sparc/v9.S
index aba468e..e2fe036 100644
--- a/src/sparc/v9.S
+++ b/src/sparc/v9.S
@@ -28,10 +28,9 @@
 #include <fficonfig.h>
 #include <ffi.h>
 #include <ffi_cfi.h>
+#include "internal.h"
 
 #ifdef SPARC64
-/* Only compile this in for 64bit builds, because otherwise the object file
-   will have inproper architecture due to used instructions.  */
 
 #define C2(X, Y)  X ## Y
 #define C1(X, Y)  C2(X, Y)
@@ -43,12 +42,14 @@
 #endif
 #define L(Y)	C1(.L, Y)
 
+.macro E index
+	.align	16
+	.org	2b + \index * 16
+.endm
 
-#define STACKFRAME 176		/* Minimum stack framesize for SPARC 64-bit */
 #define STACK_BIAS 2047
-#define ARGS (128)		/* Offset of register area in frame */
 
-.text
+	.text
         .align 8
 	.globl	C(ffi_call_v9)
 	.type	C(ffi_call_v9),@function
@@ -56,86 +57,135 @@
 
 C(ffi_call_v9):
 	cfi_startproc
-	save	%sp, -STACKFRAME, %sp
+	save	%sp, %o4, %sp
 	cfi_def_cfa_register(%fp)
 	cfi_window_save
 	
-	sub	%sp, %i2, %sp	! alloca() space in stack for frame to set up
-	add	%sp, STACKFRAME+STACK_BIAS, %l0	! %l0 has start of 
-						! frame to set up
-
-	mov	%l0, %o0	! call routine to set up frame
-	call	%i0
-	 mov	%i1, %o1	! (delay)
-	brz,pt	%o0, 1f
-	 ldx	[%l0+ARGS], %o0	! call foreign function
-
-	ldd	[%l0+ARGS], %f0
-	ldd	[%l0+ARGS+8], %f2
-	ldd	[%l0+ARGS+16], %f4
-	ldd	[%l0+ARGS+24], %f6
-	ldd	[%l0+ARGS+32], %f8
-	ldd	[%l0+ARGS+40], %f10
-	ldd	[%l0+ARGS+48], %f12
-	ldd	[%l0+ARGS+56], %f14
-	ldd	[%l0+ARGS+64], %f16
-	ldd	[%l0+ARGS+72], %f18
-	ldd	[%l0+ARGS+80], %f20
-	ldd	[%l0+ARGS+88], %f22
-	ldd	[%l0+ARGS+96], %f24
-	ldd	[%l0+ARGS+104], %f26
-	ldd	[%l0+ARGS+112], %f28
-	ldd	[%l0+ARGS+120], %f30
-
-1:	ldx	[%l0+ARGS+8], %o1
-	ldx	[%l0+ARGS+16], %o2
-	ldx	[%l0+ARGS+24], %o3
-	ldx	[%l0+ARGS+32], %o4
-	ldx	[%l0+ARGS+40], %o5
-	call	%i5
-	 sub	%l0, STACK_BIAS, %sp	! (delay) switch to frame
-
-	! If the return value pointer is NULL, assume no return value.
-	brz,pn	%i4, L(done)
-	 nop
-
-	cmp	%i3, FFI_TYPE_INT
-	be,a,pt	%icc, L(done)
-	 stx	%o0, [%i4+0]	! (delay)
-
-	cmp	%i3, FFI_TYPE_FLOAT
-	be,a,pn	%icc, L(done)
-	 st	%f0, [%i4+0]	! (delay)
-
-	cmp	%i3, FFI_TYPE_DOUBLE
-	be,a,pn	%icc, L(done)
-	 std	%f0, [%i4+0]	! (delay)
-
-	cmp	%i3, FFI_TYPE_STRUCT
-	be,pn	%icc, L(dostruct)
-
-	cmp	%i3, FFI_TYPE_LONGDOUBLE
-	bne,pt	%icc, L(done)
-	 nop
-	std	%f0, [%i4+0]
-	std	%f2, [%i4+8]
-
-L(done):
-	ret
-	 restore
-
-L(dostruct):
-	/* This will not work correctly for unions. */
-	stx	%o0, [%i4+0]
-	stx	%o1, [%i4+8]
-	stx	%o2, [%i4+16]
-	stx	%o3, [%i4+24]
-	std	%f0, [%i4+32]
-	std	%f2, [%i4+40]
-	std	%f4, [%i4+48]
-	std	%f6, [%i4+56]
-	ret
-	 restore
+	mov	%i0, %o0			! copy cif
+	add	%sp, STACK_BIAS+128+48, %o1	! load args area
+	mov	%i2, %o2			! copy rvalue
+	call	C(ffi_prep_args_v9)
+	 mov	%i3, %o3			! copy avalue
+
+	andcc	%o0, SPARC_FLAG_FP_ARGS, %g0	! need fp regs?
+	add	%sp, 48, %sp			! deallocate prep frame
+	be,pt	%xcc, 1f
+	 mov	%o0, %l0			! save flags
+
+	ldd	[%sp+STACK_BIAS+128], %f0	! load all fp arg regs
+	ldd	[%sp+STACK_BIAS+128+8], %f2
+	ldd	[%sp+STACK_BIAS+128+16], %f4
+	ldd	[%sp+STACK_BIAS+128+24], %f6
+	ldd	[%sp+STACK_BIAS+128+32], %f8
+	ldd	[%sp+STACK_BIAS+128+40], %f10
+	ldd	[%sp+STACK_BIAS+128+48], %f12
+	ldd	[%sp+STACK_BIAS+128+56], %f14
+	ldd	[%sp+STACK_BIAS+128+64], %f16
+	ldd	[%sp+STACK_BIAS+128+72], %f18
+	ldd	[%sp+STACK_BIAS+128+80], %f20
+	ldd	[%sp+STACK_BIAS+128+88], %f22
+	ldd	[%sp+STACK_BIAS+128+96], %f24
+	ldd	[%sp+STACK_BIAS+128+104], %f26
+	ldd	[%sp+STACK_BIAS+128+112], %f28
+	ldd	[%sp+STACK_BIAS+128+120], %f30
+
+1:	ldx	[%sp+STACK_BIAS+128], %o0	! load all int arg regs
+	ldx	[%sp+STACK_BIAS+128+8], %o1
+	ldx	[%sp+STACK_BIAS+128+16], %o2
+	ldx	[%sp+STACK_BIAS+128+24], %o3
+	ldx	[%sp+STACK_BIAS+128+32], %o4
+	call	%i1
+	 ldx	[%sp+STACK_BIAS+128+40], %o5
+
+0:	call	1f		! load pc in %o7
+	 and	%l0, SPARC_FLAG_RET_MASK, %l1
+1:	sll	%l1, 4, %l1
+	add	%o7, %l1, %o7	! o7 = 0b + ret_type*16
+	jmp	%o7+(2f-0b)
+	 nop
+
+	.align	16
+2:
+E SPARC_RET_VOID
+	return	%i7+8
+	 nop
+E SPARC_RET_STRUCT
+	add	%sp, STACK_BIAS-64+128+48, %l2
+	sub	%sp, 64, %sp
+	b	8f
+	 stx	%o0, [%l2]
+E SPARC_RET_FLOAT
+	return	%i7+8
+	 st	%f0, [%o2]
+E SPARC_RET_DOUBLE
+	return	%i7+8
+	 std	%f0, [%o2]
+E SPARC_RET_UINT8
+	and	%o0, 0xff, %i0
+	return	%i7+8
+	  stx	%o0, [%o2]
+E SPARC_RET_SINT8
+	sll	%o0, 24, %o0
+	sra	%o0, 24, %i0
+	return	%i7+8
+	 stx	%o0, [%o2]
+E SPARC_RET_UINT16
+	sll	%o0, 16, %o0
+	srl	%o0, 16, %i0
+	return	%i7+8
+	 stx	%o0, [%o2]
+E SPARC_RET_SINT16
+	sll	%o0, 16, %o0
+	sra	%o0, 16, %i0
+	return	%i7+8
+	 stx	%o0, [%o2]
+E SPARC_RET_INT64
+	stx	%o0, [%i2]
+	return	%i7+8
+	 nop
+E SPARC_RET_UINT32
+	srl	%o0, 0, %i0
+	return	%i7+8
+	 stx	%o0, [%o2]
+E SPARC_RET_SINT32
+	sra	%o0, 0, %i0
+	return	%i7+8
+	 stx	%o0, [%o2]
+E SPARC_RET_LDOUBLE
+	std	%f0, [%i2]
+	return	%i7+8
+	 std	%f2, [%o2+8]
+
+	! Unused entries.  Don't allow bad data to do worse things.
+E 12
+	unimp
+E 13
+	unimp
+E 14
+	unimp
+E 15
+	unimp
+
+	! Finish the SPARC_RET_STRUCT sequence.
+	.align	8
+8:	stx	%o1, [%l2+8]
+	stx	%o2, [%l2+16]
+	stx	%o3, [%l2+24]
+	std	%f0, [%l2+32]
+	std	%f2, [%l2+40]
+	std	%f4, [%l2+48]
+	std	%f6, [%l2+56]
+
+	! Copy the structure into place.
+	ldx	[%i0+16], %o0			! load rtype from cif
+	mov	0, %o1				! load off
+	mov	%i2, %o2			! load dst
+	mov	%l2, %o3			! load src_int
+	call	C(ffi_v9_layout_struct)
+	 add	%l2, 32, %o4			! load src_fp
+
+	return	%i7+8
+	 nop
 
 	cfi_endproc
 	.size	C(ffi_call_v9), . - C(ffi_call_v9)
@@ -195,54 +245,90 @@ C(ffi_closure_v9):
 	call	C(ffi_closure_sparc_inner_v9)
 	 add	%fp, STACK_BIAS-128, %o3
 
-	! Load up the return value in the proper type.
-	! See ffi_prep_cif_machdep for the list of cases.
-	cmp	%o0, FFI_TYPE_VOID
-	be,pn	%icc, L(done1)
-
-	cmp	%o0, FFI_TYPE_INT
-	be,pn	%icc, L(integer)
-
-	cmp	%o0, FFI_TYPE_FLOAT
-	be,a,pn	%icc, L(done1)
-	 ld	[FP-160], %f0
-
-	cmp	%o0, FFI_TYPE_DOUBLE
-	be,a,pn	%icc, L(done1)
-	 ldd	[FP-160], %f0
+0:	call	1f		! load pc in %o7
+	 and	%o0, SPARC_FLAG_RET_MASK, %o0
+1:	sll	%o0, 4, %o0	! o2 = i2 * 16
+	add	%o7, %o0, %o7	! o7 = 0b + i2*16
+	jmp	%o7+(2f-0b)
+	 nop
 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
-	cmp	%o0, FFI_TYPE_LONGDOUBLE
-	be,a,pn	%icc, L(longdouble1)
-	 ldd	[FP-160], %f0
-#endif
+	! Note that we cannot load the data in the delay slot of
+	! the return insn because the data is in the stack frame
+	! that is deallocated by the return.
+	.align	16
+2:
+E SPARC_RET_VOID
+	return	%i7+8
+	 nop
+E SPARC_RET_STRUCT
+	ldx	[FP-160], %i0
+	ldd	[FP-160], %f0
+	b	8f
+	 ldx	[FP-152], %i1
+E SPARC_RET_FLOAT
+	ld	[FP-160], %f0
+	return	%i7+8
+	 nop
+E SPARC_RET_DOUBLE
+	ldd	[FP-160], %f0
+	return	%i7+8
+	 nop
+E SPARC_RET_UINT8
+	ldub	[FP-160+7], %i0
+	return	%i7+8
+	 nop
+E SPARC_RET_SINT8
+	ldsb	[FP-160+7], %i0
+	return	%i7+8
+	 nop
+E SPARC_RET_UINT16
+	lduh	[FP-160+6], %i0
+	return	%i7+8
+	 nop
+E SPARC_RET_SINT16
+	ldsh	[FP-160+6], %i0
+	return	%i7+8
+	 nop
+E SPARC_RET_INT64
+	ldx	[FP-160], %i0
+	return	%i7+8
+	 nop
+E SPARC_RET_UINT32
+	lduw	[FP-160+4], %i0
+	return	%i7+8
+	 nop
+E SPARC_RET_SINT32
+	ldsw	[FP-160+4], %i0
+	return	%i7+8
+	 nop
+E SPARC_RET_LDOUBLE
+	ldd	[FP-160], %f0
+	ldd	[FP-160+8], %f2
+	return	%i7+8
+	 nop
 
-	! FFI_TYPE_STRUCT
-	ldx	[FP-152], %i1
+	! Unused entries.  Don't allow bad data to do worse things.
+E 12
+	unimp
+E 13
+	unimp
+E 14
+	unimp
+E 15
+	unimp
+
+	! Finish the SPARC_RET_STRUCT sequence.
+	.align	8
+8:	ldd	[FP-152], %f2
 	ldx	[FP-144], %i2
-	ldx	[FP-136], %i3
-	ldd	[FP-160], %f0
-	ldd	[FP-152], %f2
 	ldd	[FP-144], %f4
+	ldx	[FP-136], %i3
 	ldd	[FP-136], %f6
-
-L(integer):
-	ldx	[FP-160], %i0
-
-L(done1):
-	ret
-	 restore
-
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
-L(longdouble1):
-	ldd	[FP-152], %f2
-	ret
-	 restore
-#endif
+	return	%i7+8
+	 nop
 
 	cfi_endproc
 	.size	C(ffi_closure_v9), . - C(ffi_closure_v9)
-
 #endif /* SPARC64 */
 #ifdef __linux__
 	.section	.note.GNU-stack,"",@progbits
-- 
1.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 2/8] sparc: Tidy up symbols
  2014-10-28 19:46 [PATCH 0/8] Go closures for Sparc Richard Henderson
  2014-10-28 19:46 ` [PATCH 8/8] sparc: Re-add abi compliant structure support Richard Henderson
  2014-10-28 19:46 ` [PATCH 3/8] sparc: Rewrite everything Richard Henderson
@ 2014-10-28 19:46 ` Richard Henderson
  2014-10-28 19:46 ` [PATCH 1/8] sparc: Eliminate long double ifdefs Richard Henderson
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2014-10-28 19:46 UTC (permalink / raw)
  To: libffi-discuss; +Cc: davem

Assembly to use local labels, .type annotation, hidden annotation.
I do retain the _prefix for the symbols, but given that it wasn't
done consistently across all symbols, I doubt it's actually needed.
---
 src/sparc/ffi.c |  34 +++++-----
 src/sparc/v8.S  | 198 +++++++++++++++++++-------------------------------------
 src/sparc/v9.S  | 156 ++++++++++++++------------------------------
 3 files changed, 133 insertions(+), 255 deletions(-)

diff --git a/src/sparc/ffi.c b/src/sparc/ffi.c
index c2a0d14..1f38f54 100644
--- a/src/sparc/ffi.c
+++ b/src/sparc/ffi.c
@@ -44,7 +44,8 @@
 /* ffi_prep_args is called by the assembly routine once stack space
    has been allocated for the function's arguments */
 
-void ffi_prep_args_v8(char *stack, extended_cif *ecif)
+void FFI_HIDDEN
+ffi_prep_args_v8(char *stack, extended_cif *ecif)
 {
   int i;
   void **p_argv;
@@ -127,7 +128,8 @@ void ffi_prep_args_v8(char *stack, extended_cif *ecif)
   return;
 }
 
-int ffi_prep_args_v9(char *stack, extended_cif *ecif)
+int FFI_HIDDEN
+ffi_prep_args_v9(char *stack, extended_cif *ecif)
 {
   int i, ret = 0;
   int tmp;
@@ -254,7 +256,8 @@ int ffi_prep_args_v9(char *stack, extended_cif *ecif)
 }
 
 /* Perform machine dependent cif processing */
-ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep(ffi_cif *cif)
 {
   int wordsize;
 
@@ -337,7 +340,8 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
   return FFI_OK;
 }
 
-int ffi_v9_layout_struct(ffi_type *arg, int off, char *ret, char *intg, char *flt)
+static int
+ffi_v9_layout_struct(ffi_type *arg, int off, char *ret, char *intg, char *flt)
 {
   ffi_type **ptr = &arg->elements[0];
 
@@ -371,14 +375,14 @@ int ffi_v9_layout_struct(ffi_type *arg, int off, char *ret, char *intg, char *fl
 
 #ifdef SPARC64
 extern int ffi_call_v9(void *, extended_cif *, unsigned, 
-		       unsigned, unsigned *, void (*fn)(void));
+		       unsigned, unsigned *, void (*fn)(void)) FFI_HIDDEN;
 #else
 extern int ffi_call_v8(void *, extended_cif *, unsigned, 
-		       unsigned, unsigned *, void (*fn)(void));
+		       unsigned, unsigned *, void (*fn)(void)) FFI_HIDDEN;
 #endif
 
 #ifndef __GNUC__
-void ffi_flush_icache (void *, size_t);
+void ffi_flush_icache (void *, size_t) FFI_HIDDEN;
 #endif
 
 void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
@@ -478,9 +482,9 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
 
 
 #ifdef SPARC64
-extern void ffi_closure_v9(void);
+extern void ffi_closure_v9(void) FFI_HIDDEN;
 #else
-extern void ffi_closure_v8(void);
+extern void ffi_closure_v8(void) FFI_HIDDEN;
 #endif
 
 ffi_status
@@ -534,9 +538,9 @@ ffi_prep_closure_loc (ffi_closure* closure,
   return FFI_OK;
 }
 
-int
-ffi_closure_sparc_inner_v8(ffi_closure *closure,
-  void *rvalue, unsigned long *gpr, unsigned long *scratch)
+int FFI_HIDDEN
+ffi_closure_sparc_inner_v8(ffi_closure *closure, void *rvalue,
+			   unsigned long *gpr, unsigned long *scratch)
 {
   ffi_cif *cif;
   ffi_type **arg_types;
@@ -592,9 +596,9 @@ ffi_closure_sparc_inner_v8(ffi_closure *closure,
   return cif->rtype->type;
 }
 
-int
-ffi_closure_sparc_inner_v9(ffi_closure *closure,
-  void *rvalue, unsigned long *gpr, double *fpr)
+int FFI_HIDDEN
+ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
+			   unsigned long *gpr, double *fpr)
 {
   ffi_cif *cif;
   ffi_type **arg_types;
diff --git a/src/sparc/v8.S b/src/sparc/v8.S
index 6bf7ac0..10c66ba 100644
--- a/src/sparc/v8.S
+++ b/src/sparc/v8.S
@@ -28,18 +28,31 @@
 #define LIBFFI_ASM	
 #include <fficonfig.h>
 #include <ffi.h>
+#include <ffi_cfi.h>
 
 #define STACKFRAME 96		/* Minimum stack framesize for SPARC */
 #define ARGS (64+4)		/* Offset of register area in frame */
 
-#ifndef __GNUC__	
+#define C2(X, Y)  X ## Y
+#define C1(X, Y)  C2(X, Y)
+
+#ifdef __USER_LABEL_PREFIX__
+# define C(Y)	C1(__USER_LABEL_PREFIX__, Y)
+#else
+# define C(Y)	Y
+#endif
+#define L(Y)	C1(.L, Y)
+
 	.text
+
+#ifndef __GNUC__	
         .align 8
-.globl ffi_flush_icache
-.globl _ffi_flush_icache
+	.globl	C(ffi_flush_icache)
+	.type	C(ffi_flush_icache),@function
+	FFI_HIDDEN(C(ffi_flush_icache))
 
-ffi_flush_icache:
-_ffi_flush_icache:	
+C(ffi_flush_icache):
+	cfi_startproc
         add %o0, %o1, %o2
 #ifdef SPARC64	
 1:	flush %o0
@@ -56,20 +69,20 @@ _ffi_flush_icache:
 	nop
 	retl
 	nop
-.ffi_flush_icache_end:
-	.size	ffi_flush_icache,.ffi_flush_icache_end-ffi_flush_icache
+	cfi_endproc
+	.size	C(ffi_flush_icache), . - C(ffi_flush_icache)
 #endif
 
-	.text
         .align 8
-.globl ffi_call_v8
-.globl _ffi_call_v8
+	.globl	C(ffi_call_v8)
+	.type	C(ffi_call_v8),@function
+	FFI_HIDDEN(C(ffi_call_v8))
 	
-ffi_call_v8:
-_ffi_call_v8:
-.LLFB1:
+C(ffi_call_v8):
+	cfi_startproc
 	save	%sp, -STACKFRAME, %sp
-.LLCFI0:
+	cfi_def_cfa_register(%fp)
+	cfi_window_save
 	
 	sub	%sp, %i2, %sp	! alloca() space in stack for frame to set up
 	add	%sp, STACKFRAME, %l0	! %l0 has start of 
@@ -91,81 +104,81 @@ _ffi_call_v8:
 
 	! If the return value pointer is NULL, assume no return value.
 	tst	%i4
-	bz	done
+	bz	L(done)
 	nop
 
 	cmp	%i3, FFI_TYPE_INT
-	be,a	done
+	be,a	L(done)
 	st	%o0, [%i4]	! (delay)
 
 	cmp	%i3, FFI_TYPE_FLOAT
-	be,a	done
+	be,a	L(done)
 	st	%f0, [%i4+0]	! (delay)
 
 	cmp	%i3, FFI_TYPE_DOUBLE
-	be,a	double
+	be,a	L(double)
 	st	%f0, [%i4+0]	! (delay)
 
 	cmp	%i3, FFI_TYPE_SINT8
-	be,a	sint8
+	be,a	L(sint8)
 	sll	%o0, 24, %o0	! (delay)
 
 	cmp	%i3, FFI_TYPE_UINT8
-	be,a	uint8
+	be,a	L(uint8)
 	sll	%o0, 24, %o0	! (delay)
 
 	cmp	%i3, FFI_TYPE_SINT16
-	be,a	sint16
+	be,a	L(sint16)
 	sll	%o0, 16, %o0	! (delay)
 
 	cmp	%i3, FFI_TYPE_UINT16
-	be,a	uint16
+	be,a	L(uint16)
 	sll	%o0, 16, %o0	! (delay)
 
 	cmp	%i3, FFI_TYPE_SINT64
-	be,a	longlong
+	be,a	L(longlong)
 	st	%o0, [%i4+0]	! (delay)
-done:
+
+L(done):
 	ret
 	restore
 
-double:
+L(double):
 	st	%f1, [%i4+4]
 	ret
 	restore
 
-sint8:
+L(sint8):
 	sra	%o0, 24, %o0
 	st	%o0, [%i4+0]
 	ret
 	restore
 
-uint8:
+L(uint8):
 	srl	%o0, 24, %o0
 	st	%o0, [%i4+0]
 	ret
 	restore
 
-sint16:
+L(sint16):
 	sra	%o0, 16, %o0
 	st	%o0, [%i4+0]
 	ret
 	restore
 
-uint16:
+L(uint16):
 	srl	%o0, 16, %o0
 	st	%o0, [%i4+0]
 	ret
 	restore
 
-longlong:
+L(longlong):
 	st	%o1, [%i4+4]
 	ret
 	restore
-.LLFE1:
+	cfi_endproc
 
-.ffi_call_v8_end:
-	.size	ffi_call_v8,.ffi_call_v8_end-ffi_call_v8
+	.size	C(ffi_call_v8),. - C(ffi_call_v8)
 
 
 #undef STACKFRAME
@@ -178,15 +191,16 @@ longlong:
 
    Receives the closure argument in %g2.   */
 
-	.text
 	.align 8
-	.globl ffi_closure_v8
+	.globl	C(ffi_closure_v8)
+	.type	C(ffi_closure_v8),@function
+	FFI_HIDDEN(C(ffi_closure_v8))
 
-ffi_closure_v8:
+C(ffi_closure_v8):
 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
 		.register	%g2, #scratch
 #endif
-.LLFB2:
+	cfi_startproc
 	! Reserve frame space for all arguments in case
 	! we need to align them on a 8-byte boundary.
 	ld	[%g2+FFI_TRAMPOLINE_SIZE], %g1
@@ -196,7 +210,8 @@ ffi_closure_v8:
 	! %g1 == STACKFRAME + 8*nargs
 	neg	%g1
 	save	%sp, %g1, %sp
-.LLCFI1:
+	cfi_def_cfa_register(%fp)
+	cfi_window_save
 
 	! Store all of the potential argument registers in va_list format.
 	st	%i0, [%fp+68+0]
@@ -216,130 +231,47 @@ ffi_closure_v8:
 	! Load up the return value in the proper type.
 	! See ffi_prep_cif_machdep for the list of cases.
 	cmp	%o0, FFI_TYPE_VOID
-	be	done1
+	be	L(done1)
 
 	cmp	%o0, FFI_TYPE_INT
-	be	done1
+	be	L(done1)
 	 ld	[%fp-8], %i0
 
 	cmp	%o0, FFI_TYPE_FLOAT
-	be,a	done1
+	be,a	L(done1)
 	 ld	[%fp-8], %f0
 
 	cmp	%o0, FFI_TYPE_DOUBLE
-	be,a	done1
+	be,a	L(done1)
 	 ldd	[%fp-8], %f0
 
 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
 	cmp	%o0, FFI_TYPE_LONGDOUBLE
-	be	done2
+	be	L(done2)
 #endif
 
 	cmp	%o0, FFI_TYPE_STRUCT
-	be	done2
+	be	L(done2)
 
 	cmp	%o0, FFI_TYPE_SINT64
-	be,a	done1
+	be,a	L(done1)
 	 ldd	[%fp-8], %i0
 
 	cmp	%o0, FFI_TYPE_UINT64
-	be,a	done1
+	be,a	L(done1)
 	 ldd	[%fp-8], %i0
 
 	ld	[%fp-8], %i0
-done1:
+L(done1):
 	jmp	%i7+8
 	 restore
-done2:
+L(done2):
 	! Skip 'unimp'.
 	jmp	%i7+12
 	 restore
-.LLFE2:
-
-.ffi_closure_v8_end:
-	.size	ffi_closure_v8,.ffi_closure_v8_end-ffi_closure_v8
 
-#ifdef SPARC64
-#define WS 8
-#define nword	xword
-#define uanword	uaxword
-#else
-#define WS 4
-#define nword	long
-#define uanword	uaword
-#endif
-
-#ifdef HAVE_RO_EH_FRAME
-	.section	".eh_frame",#alloc
-#else
-	.section	".eh_frame",#alloc,#write
-#endif
-.LLframe1:
-	.uaword	.LLECIE1-.LLSCIE1	! Length of Common Information Entry
-.LLSCIE1:
-	.uaword	0x0	! CIE Identifier Tag
-	.byte	0x1	! CIE Version
-	.ascii "zR\0"	! CIE Augmentation
-	.byte	0x1	! uleb128 0x1; CIE Code Alignment Factor
-	.byte	0x80-WS	! sleb128 -WS; CIE Data Alignment Factor
-	.byte	0xf	! CIE RA Column
-	.byte	0x1	! uleb128 0x1; Augmentation size
-#ifdef HAVE_AS_SPARC_UA_PCREL
-	.byte	0x1b	! FDE Encoding (pcrel sdata4)
-#else
-	.byte	0x50	! FDE Encoding (aligned absolute)
-#endif
-	.byte	0xc	! DW_CFA_def_cfa
-	.byte	0xe	! uleb128 0xe
-	.byte	0x0	! uleb128 0x0
-	.align	WS
-.LLECIE1:
-.LLSFDE1:
-	.uaword	.LLEFDE1-.LLASFDE1	! FDE Length
-.LLASFDE1:
-	.uaword	.LLASFDE1-.LLframe1	! FDE CIE offset
-#ifdef HAVE_AS_SPARC_UA_PCREL
-	.uaword	%r_disp32(.LLFB1)
-	.uaword	.LLFE1-.LLFB1	! FDE address range
-#else
-	.align	WS
-	.nword	.LLFB1
-	.uanword .LLFE1-.LLFB1	! FDE address range
-#endif
-	.byte	0x0	! uleb128 0x0; Augmentation size
-	.byte	0x4	! DW_CFA_advance_loc4
-	.uaword	.LLCFI0-.LLFB1
-	.byte	0xd	! DW_CFA_def_cfa_register
-	.byte	0x1e	! uleb128 0x1e
-	.byte	0x2d	! DW_CFA_GNU_window_save
-	.byte	0x9	! DW_CFA_register
-	.byte	0xf	! uleb128 0xf
-	.byte	0x1f	! uleb128 0x1f
-	.align	WS
-.LLEFDE1:
-.LLSFDE2:
-	.uaword	.LLEFDE2-.LLASFDE2	! FDE Length
-.LLASFDE2:
-	.uaword	.LLASFDE2-.LLframe1	! FDE CIE offset
-#ifdef HAVE_AS_SPARC_UA_PCREL
-	.uaword	%r_disp32(.LLFB2)
-	.uaword	.LLFE2-.LLFB2	! FDE address range
-#else
-	.align	WS
-	.nword	.LLFB2
-	.uanword .LLFE2-.LLFB2	! FDE address range
-#endif
-	.byte	0x0	! uleb128 0x0; Augmentation size
-	.byte	0x4	! DW_CFA_advance_loc4
-	.uaword	.LLCFI1-.LLFB2
-	.byte	0xd	! DW_CFA_def_cfa_register
-	.byte	0x1e	! uleb128 0x1e
-	.byte	0x2d	! DW_CFA_GNU_window_save
-	.byte	0x9	! DW_CFA_register
-	.byte	0xf	! uleb128 0xf
-	.byte	0x1f	! uleb128 0x1f
-	.align	WS
-.LLEFDE2:
+	cfi_endproc
+	.size	C(ffi_closure_v8), . - C(ffi_closure_v8)
 
 #if defined __ELF__ && defined __linux__
 	.section	.note.GNU-stack,"",@progbits
diff --git a/src/sparc/v9.S b/src/sparc/v9.S
index bf31a2b..aba468e 100644
--- a/src/sparc/v9.S
+++ b/src/sparc/v9.S
@@ -27,25 +27,38 @@
 #define LIBFFI_ASM	
 #include <fficonfig.h>
 #include <ffi.h>
+#include <ffi_cfi.h>
 
 #ifdef SPARC64
 /* Only compile this in for 64bit builds, because otherwise the object file
    will have inproper architecture due to used instructions.  */
 
+#define C2(X, Y)  X ## Y
+#define C1(X, Y)  C2(X, Y)
+
+#ifdef __USER_LABEL_PREFIX__
+# define C(Y)	C1(__USER_LABEL_PREFIX__, Y)
+#else
+# define C(Y)	Y
+#endif
+#define L(Y)	C1(.L, Y)
+
+
 #define STACKFRAME 176		/* Minimum stack framesize for SPARC 64-bit */
 #define STACK_BIAS 2047
 #define ARGS (128)		/* Offset of register area in frame */
 
 .text
         .align 8
-.globl ffi_call_v9
-.globl _ffi_call_v9
+	.globl	C(ffi_call_v9)
+	.type	C(ffi_call_v9),@function
+	FFI_HIDDEN(C(ffi_call_v9))
 
-ffi_call_v9:
-_ffi_call_v9:
-.LLFB1:
+C(ffi_call_v9):
+	cfi_startproc
 	save	%sp, -STACKFRAME, %sp
-.LLCFI0:
+	cfi_def_cfa_register(%fp)
+	cfi_window_save
 	
 	sub	%sp, %i2, %sp	! alloca() space in stack for frame to set up
 	add	%sp, STACKFRAME+STACK_BIAS, %l0	! %l0 has start of 
@@ -83,34 +96,35 @@ _ffi_call_v9:
 	 sub	%l0, STACK_BIAS, %sp	! (delay) switch to frame
 
 	! If the return value pointer is NULL, assume no return value.
-	brz,pn	%i4, done
+	brz,pn	%i4, L(done)
 	 nop
 
 	cmp	%i3, FFI_TYPE_INT
-	be,a,pt	%icc, done
+	be,a,pt	%icc, L(done)
 	 stx	%o0, [%i4+0]	! (delay)
 
 	cmp	%i3, FFI_TYPE_FLOAT
-	be,a,pn	%icc, done
+	be,a,pn	%icc, L(done)
 	 st	%f0, [%i4+0]	! (delay)
 
 	cmp	%i3, FFI_TYPE_DOUBLE
-	be,a,pn	%icc, done
+	be,a,pn	%icc, L(done)
 	 std	%f0, [%i4+0]	! (delay)
 
 	cmp	%i3, FFI_TYPE_STRUCT
-	be,pn	%icc, dostruct
+	be,pn	%icc, L(dostruct)
 
 	cmp	%i3, FFI_TYPE_LONGDOUBLE
-	bne,pt	%icc, done
+	bne,pt	%icc, L(done)
 	 nop
 	std	%f0, [%i4+0]
 	std	%f2, [%i4+8]
 
-done:	ret
+L(done):
+	ret
 	 restore
 
-dostruct:
+L(dostruct):
 	/* This will not work correctly for unions. */
 	stx	%o0, [%i4+0]
 	stx	%o1, [%i4+8]
@@ -122,10 +136,9 @@ dostruct:
 	std	%f6, [%i4+56]
 	ret
 	 restore
-.LLFE1:
 
-.ffi_call_v9_end:
-	.size	ffi_call_v9,.ffi_call_v9_end-ffi_call_v9
+	cfi_endproc
+	.size	C(ffi_call_v9), . - C(ffi_call_v9)
 
 
 #undef STACKFRAME
@@ -138,14 +151,16 @@ dostruct:
 
    Receives the closure argument in %g1.   */
 
-	.text
 	.align 8
-	.globl ffi_closure_v9
+	.globl	C(ffi_closure_v9)
+	.type	C(ffi_closure_v9),@function
+	FFI_HIDDEN(C(ffi_closure_v9))
 
-ffi_closure_v9:
-.LLFB2:
+C(ffi_closure_v9):
+	cfi_startproc
 	save	%sp, -STACKFRAME, %sp
-.LLCFI1:
+	cfi_def_cfa_register(%fp)
+	cfi_window_save
 
 	! Store all of the potential argument registers in va_list format.
 	stx	%i0, [FP+128+0]
@@ -177,28 +192,28 @@ ffi_closure_v9:
 	mov	%g1, %o0
 	add	%fp, STACK_BIAS-160, %o1
 	add	%fp, STACK_BIAS+128, %o2
-	call	ffi_closure_sparc_inner_v9
+	call	C(ffi_closure_sparc_inner_v9)
 	 add	%fp, STACK_BIAS-128, %o3
 
 	! Load up the return value in the proper type.
 	! See ffi_prep_cif_machdep for the list of cases.
 	cmp	%o0, FFI_TYPE_VOID
-	be,pn	%icc, done1
+	be,pn	%icc, L(done1)
 
 	cmp	%o0, FFI_TYPE_INT
-	be,pn	%icc, integer
+	be,pn	%icc, L(integer)
 
 	cmp	%o0, FFI_TYPE_FLOAT
-	be,a,pn	%icc, done1
+	be,a,pn	%icc, L(done1)
 	 ld	[FP-160], %f0
 
 	cmp	%o0, FFI_TYPE_DOUBLE
-	be,a,pn	%icc, done1
+	be,a,pn	%icc, L(done1)
 	 ldd	[FP-160], %f0
 
 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
 	cmp	%o0, FFI_TYPE_LONGDOUBLE
-	be,a,pn	%icc, longdouble1
+	be,a,pn	%icc, L(longdouble1)
 	 ldd	[FP-160], %f0
 #endif
 
@@ -211,97 +226,24 @@ ffi_closure_v9:
 	ldd	[FP-144], %f4
 	ldd	[FP-136], %f6
 
-integer:
+L(integer):
 	ldx	[FP-160], %i0
 
-done1:
+L(done1):
 	ret
 	 restore
 
 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
-longdouble1:
+L(longdouble1):
 	ldd	[FP-152], %f2
 	ret
 	 restore
 #endif
-.LLFE2:
 
-.ffi_closure_v9_end:
-	.size	ffi_closure_v9,.ffi_closure_v9_end-ffi_closure_v9
-
-#ifdef HAVE_RO_EH_FRAME
-	.section	".eh_frame",#alloc
-#else
-	.section	".eh_frame",#alloc,#write
-#endif
-.LLframe1:
-	.uaword	.LLECIE1-.LLSCIE1	! Length of Common Information Entry
-.LLSCIE1:
-	.uaword	0x0	! CIE Identifier Tag
-	.byte	0x1	! CIE Version
-	.ascii "zR\0"	! CIE Augmentation
-	.byte	0x1	! uleb128 0x1; CIE Code Alignment Factor
-	.byte	0x78	! sleb128 -8; CIE Data Alignment Factor
-	.byte	0xf	! CIE RA Column
-	.byte	0x1	! uleb128 0x1; Augmentation size
-#ifdef HAVE_AS_SPARC_UA_PCREL
-	.byte	0x1b	! FDE Encoding (pcrel sdata4)
-#else
-	.byte	0x50	! FDE Encoding (aligned absolute)
-#endif
-	.byte	0xc	! DW_CFA_def_cfa
-	.byte	0xe	! uleb128 0xe
-	.byte	0xff,0xf	! uleb128 0x7ff
-	.align 8
-.LLECIE1:
-.LLSFDE1:
-	.uaword	.LLEFDE1-.LLASFDE1	! FDE Length
-.LLASFDE1:
-	.uaword	.LLASFDE1-.LLframe1	! FDE CIE offset
-#ifdef HAVE_AS_SPARC_UA_PCREL
-	.uaword	%r_disp32(.LLFB1)
-	.uaword	.LLFE1-.LLFB1		! FDE address range
-#else
-	.align 8
-	.xword	.LLFB1
-	.uaxword	.LLFE1-.LLFB1	! FDE address range
-#endif
-	.byte	0x0	! uleb128 0x0; Augmentation size
-	.byte	0x4	! DW_CFA_advance_loc4
-	.uaword	.LLCFI0-.LLFB1
-	.byte	0xd	! DW_CFA_def_cfa_register
-	.byte	0x1e	! uleb128 0x1e
-	.byte	0x2d	! DW_CFA_GNU_window_save
-	.byte	0x9	! DW_CFA_register
-	.byte	0xf	! uleb128 0xf
-	.byte	0x1f	! uleb128 0x1f
-	.align 8
-.LLEFDE1:
-.LLSFDE2:
-	.uaword	.LLEFDE2-.LLASFDE2	! FDE Length
-.LLASFDE2:
-	.uaword	.LLASFDE2-.LLframe1	! FDE CIE offset
-#ifdef HAVE_AS_SPARC_UA_PCREL
-	.uaword	%r_disp32(.LLFB2)
-	.uaword	.LLFE2-.LLFB2		! FDE address range
-#else
-	.align 8
-	.xword	.LLFB2
-	.uaxword	.LLFE2-.LLFB2	! FDE address range
-#endif
-	.byte	0x0	! uleb128 0x0; Augmentation size
-	.byte	0x4	! DW_CFA_advance_loc4
-	.uaword	.LLCFI1-.LLFB2
-	.byte	0xd	! DW_CFA_def_cfa_register
-	.byte	0x1e	! uleb128 0x1e
-	.byte	0x2d	! DW_CFA_GNU_window_save
-	.byte	0x9	! DW_CFA_register
-	.byte	0xf	! uleb128 0xf
-	.byte	0x1f	! uleb128 0x1f
-	.align 8
-.LLEFDE2:
-#endif
+	cfi_endproc
+	.size	C(ffi_closure_v9), . - C(ffi_closure_v9)
 
+#endif /* SPARC64 */
 #ifdef __linux__
 	.section	.note.GNU-stack,"",@progbits
 #endif
-- 
1.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/8] sparc: Rewrite everything
  2014-10-28 19:46 ` [PATCH 3/8] sparc: Rewrite everything Richard Henderson
@ 2014-10-29 18:10   ` David Miller
  2014-10-29 20:01     ` Richard Henderson
  0 siblings, 1 reply; 14+ messages in thread
From: David Miller @ 2014-10-29 18:10 UTC (permalink / raw)
  To: rth; +Cc: libffi-discuss

From: Richard Henderson <rth@twiddle.net>
Date: Tue, 28 Oct 2014 12:45:50 -0700

> +    case FFI_TYPE_STRUCT:
> +      if (rtype->size > 32)
> +	{
> +	  flags = SPARC_RET_VOID | SPARC_FLAG_RET_IN_MEM;
> +	  bytes = 8;
> +	}
> +      else
> +	flags = SPARC_RET_STRUCT;
> +      break;

Here you use a cutoff of 32 bytes.

> +	case FFI_TYPE_LONGDOUBLE:
> +	case FFI_TYPE_STRUCT:
> +	  z = ty->size;
> +	  if (z > 16)
> +	    {
> +	      /* For structures larger than 16 bytes we pass reference.  */
> +	      *argp++ = (unsigned long)a;
> +	      break;
> +	    }
> +	  if (((unsigned long)argp & 15) && ty->alignment > 8)
> +	    argp++;
> +	  memcpy(argp, a, z);
> +	  argp += ALIGN(z, 8) / 8;
> +	  break;

Yet here in args prep, the cutoff is 16.

Maybe I'm missing something?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/8] sparc: Rewrite everything
  2014-10-29 18:10   ` David Miller
@ 2014-10-29 20:01     ` Richard Henderson
  2014-10-29 20:11       ` David Miller
  0 siblings, 1 reply; 14+ messages in thread
From: Richard Henderson @ 2014-10-29 20:01 UTC (permalink / raw)
  To: David Miller; +Cc: libffi-discuss

On 10/29/2014 11:10 AM, David Miller wrote:
> From: Richard Henderson <rth@twiddle.net>
> Date: Tue, 28 Oct 2014 12:45:50 -0700
> 
>> +    case FFI_TYPE_STRUCT:
>> +      if (rtype->size > 32)
>> +	{
>> +	  flags = SPARC_RET_VOID | SPARC_FLAG_RET_IN_MEM;
>> +	  bytes = 8;
>> +	}
>> +      else
>> +	flags = SPARC_RET_STRUCT;
>> +      break;
> 
> Here you use a cutoff of 32 bytes.

Return type.

> 
>> +	case FFI_TYPE_LONGDOUBLE:
>> +	case FFI_TYPE_STRUCT:
>> +	  z = ty->size;
>> +	  if (z > 16)
>> +	    {
>> +	      /* For structures larger than 16 bytes we pass reference.  */
>> +	      *argp++ = (unsigned long)a;
>> +	      break;
>> +	    }
>> +	  if (((unsigned long)argp & 15) && ty->alignment > 8)
>> +	    argp++;
>> +	  memcpy(argp, a, z);
>> +	  argp += ALIGN(z, 8) / 8;
>> +	  break;
> 
> Yet here in args prep, the cutoff is 16.

Parameter type.

> Maybe I'm missing something?

The two limits are in fact different.  In gcc, see sparc_return_in_memory and
sparc_pass_by_reference.


r~


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/8] sparc: Rewrite everything
  2014-10-29 20:01     ` Richard Henderson
@ 2014-10-29 20:11       ` David Miller
  2014-10-29 20:44         ` Richard Henderson
  0 siblings, 1 reply; 14+ messages in thread
From: David Miller @ 2014-10-29 20:11 UTC (permalink / raw)
  To: rth; +Cc: libffi-discuss

From: Richard Henderson <rth@twiddle.net>
Date: Wed, 29 Oct 2014 13:01:45 -0700

> On 10/29/2014 11:10 AM, David Miller wrote:
>> Maybe I'm missing something?
> 
> The two limits are in fact different.  In gcc, see sparc_return_in_memory and
> sparc_pass_by_reference.

My bad, thanks for clarifying.

That's the only thing that caught my eye.  I think for most v9 chips a
'return' is slightly more expensive than a 'ret/restore'.  'return' is
good for saving an instruction when you can put something in that
delay slot, but if you can't then you might as well do 'ret/restore'.

Series otherwise looks great, nice work!

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/8] sparc: Rewrite everything
  2014-10-29 20:11       ` David Miller
@ 2014-10-29 20:44         ` Richard Henderson
  2014-10-30  4:52           ` David Miller
  0 siblings, 1 reply; 14+ messages in thread
From: Richard Henderson @ 2014-10-29 20:44 UTC (permalink / raw)
  To: David Miller; +Cc: libffi-discuss

On 10/29/2014 01:10 PM, David Miller wrote:
> From: Richard Henderson <rth@twiddle.net>
> Date: Wed, 29 Oct 2014 13:01:45 -0700
> 
>> On 10/29/2014 11:10 AM, David Miller wrote:
>>> Maybe I'm missing something?
>>
>> The two limits are in fact different.  In gcc, see sparc_return_in_memory and
>> sparc_pass_by_reference.
> 
> My bad, thanks for clarifying.
> 
> That's the only thing that caught my eye.  I think for most v9 chips a
> 'return' is slightly more expensive than a 'ret/restore'.  'return' is
> good for saving an instruction when you can put something in that
> delay slot, but if you can't then you might as well do 'ret/restore'.

Ah right, thanks.

The one other microarchitecture question I had was wrt call/ret paring.

I was assuming that, for predition purposes, "ret" vs "jmp" must be based on
the register used -- %i7 or %o7.  Thus my call ... jmp %o7+const hopefully
keeps any call/return prediction stack in sync?


r~

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/8] sparc: Rewrite everything
  2014-10-29 20:44         ` Richard Henderson
@ 2014-10-30  4:52           ` David Miller
  0 siblings, 0 replies; 14+ messages in thread
From: David Miller @ 2014-10-30  4:52 UTC (permalink / raw)
  To: rth; +Cc: libffi-discuss

From: Richard Henderson <rth@twiddle.net>
Date: Wed, 29 Oct 2014 13:44:37 -0700

> The one other microarchitecture question I had was wrt call/ret paring.
> 
> I was assuming that, for predition purposes, "ret" vs "jmp" must be based on
> the register used -- %i7 or %o7.  Thus my call ... jmp %o7+const hopefully
> keeps any call/return prediction stack in sync?

Yes, that should ensure a return address stack prediction hit.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2014-10-30  4:52 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-10-28 19:46 [PATCH 0/8] Go closures for Sparc Richard Henderson
2014-10-28 19:46 ` [PATCH 8/8] sparc: Re-add abi compliant structure support Richard Henderson
2014-10-28 19:46 ` [PATCH 3/8] sparc: Rewrite everything Richard Henderson
2014-10-29 18:10   ` David Miller
2014-10-29 20:01     ` Richard Henderson
2014-10-29 20:11       ` David Miller
2014-10-29 20:44         ` Richard Henderson
2014-10-30  4:52           ` David Miller
2014-10-28 19:46 ` [PATCH 2/8] sparc: Tidy up symbols Richard Henderson
2014-10-28 19:46 ` [PATCH 1/8] sparc: Eliminate long double ifdefs Richard Henderson
2014-10-28 19:46 ` [PATCH 4/8] sparc: Preprocess float point struct return Richard Henderson
2014-10-28 19:46 ` [PATCH 7/8] sparc: Add support for Go closures Richard Henderson
2014-10-28 19:46 ` [PATCH 6/8] sparc: Add support for complex types Richard Henderson
2014-10-28 19:46 ` [PATCH 5/8] sparc: Handle more cases of structure return directly Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).