From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 9268 invoked by alias); 28 Oct 2014 19:46:42 -0000 Mailing-List: contact libffi-discuss-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libffi-discuss-owner@sourceware.org Received: (qmail 9206 invoked by uid 89); 28 Oct 2014 19:46:41 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.3 required=5.0 tests=AWL,BAYES_00,FREEMAIL_ENVFROM_END_DIGIT,FREEMAIL_FROM,RCVD_IN_DNSWL_LOW,SPF_PASS autolearn=ham version=3.3.2 X-HELO: mail-qa0-f53.google.com Received: from mail-qa0-f53.google.com (HELO mail-qa0-f53.google.com) (209.85.216.53) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Tue, 28 Oct 2014 19:46:38 +0000 Received: by mail-qa0-f53.google.com with SMTP id n8so1013595qaq.26 for ; Tue, 28 Oct 2014 12:46:36 -0700 (PDT) X-Received: by 10.224.130.198 with SMTP id u6mr8164753qas.99.1414525594093; Tue, 28 Oct 2014 12:46:34 -0700 (PDT) Received: from anchor.com (50-194-63-110-static.hfc.comcastbusiness.net. [50.194.63.110]) by mx.google.com with ESMTPSA id 11sm2042715qgj.34.2014.10.28.12.46.32 for (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 28 Oct 2014 12:46:33 -0700 (PDT) From: Richard Henderson To: libffi-discuss@sourceware.org Cc: davem@davemloft.net Subject: [PATCH 4/8] sparc: Preprocess float point struct return Date: Tue, 28 Oct 2014 19:46:00 -0000 Message-Id: <1414525555-21256-5-git-send-email-rth@twiddle.net> In-Reply-To: <1414525555-21256-1-git-send-email-rth@twiddle.net> References: <1414525555-21256-1-git-send-email-rth@twiddle.net> X-SW-Source: 2014/txt/msg00152.txt.bz2 We can eliminate recursion and speed structure return by flattening a nested structure tree into a bitmask. --- src/sparc/ffi64.c | 126 ++++++++++++++++++++++++++++++++++++++++----------- src/sparc/internal.h | 2 + src/sparc/v9.S | 11 ++--- 3 files changed, 106 insertions(+), 33 deletions(-) diff --git a/src/sparc/ffi64.c b/src/sparc/ffi64.c index 7ed928d..65ae438 100644 --- a/src/sparc/ffi64.c +++ b/src/sparc/ffi64.c @@ -42,41 +42,103 @@ #endif #ifdef SPARC64 -/* Perform machine dependent cif processing */ -int FFI_HIDDEN -ffi_v9_layout_struct (ffi_type *arg, int off, void *d, void *si, void *sf) +/* Flatten the contents of a structure to the parts that are passed in + floating point registers. The return is a bit mask wherein bit N + set means bytes [4*n, 4*n+3] are passed in %fN. + + We encode both the (running) size (maximum 32) and mask (maxumum 255) + into one integer. The size is placed in the low byte, so that align + and addition work correctly. The mask is placed in the second byte. */ + +static int +ffi_struct_float_mask (ffi_type *struct_type, int size_mask) { ffi_type **elts, *t; - for (elts = arg->elements; (t = *elts) != NULL; elts++) + for (elts = struct_type->elements; (t = *elts) != NULL; elts++) { size_t z = t->size; - void *src = si; + int o, m; - off = ALIGN(off, t->alignment); + size_mask = ALIGN(size_mask, t->alignment); switch (t->type) { case FFI_TYPE_STRUCT: - off = ffi_v9_layout_struct(t, off, d, si, sf); - off = ALIGN(off, FFI_SIZEOF_ARG); + size_mask = ffi_struct_float_mask (t, size_mask); + size_mask = ALIGN(size_mask, FFI_SIZEOF_ARG); continue; case FFI_TYPE_FLOAT: case FFI_TYPE_DOUBLE: case FFI_TYPE_LONGDOUBLE: - /* Note that closures start with the argument offset, - so that we know when to stop looking at fp regs. */ - if (off < 128) - src = sf; + m = (1 << (z / 4)) - 1; /* compute mask for type */ + o = (size_mask >> 2) & 0x3f; /* extract word offset */ + size_mask |= m << (o + 8); /* insert mask into place */ break; } - memcpy(d + off, src + off, z); - off += z; + size_mask += z; + } + + size_mask = ALIGN(size_mask, struct_type->alignment); + FFI_ASSERT ((size_mask & 0xff) == struct_type->size); + + return size_mask; +} + +/* Merge floating point data into integer data. If the structure is + entirely floating point, simply return a pointer to the fp data. */ + +static void * +ffi_struct_float_merge (int size_mask, void *vi, void *vf) +{ + int size = size_mask & 0xff; + int mask = size_mask >> 8; + int n = size >> 2; + + if (mask == 0) + return vi; + else if (mask == (1 << n) - 1) + return vf; + else + { + unsigned int *wi = vi, *wf = vf; + int i; + + for (i = 0; i < n; ++i) + if ((mask >> i) & 1) + wi[i] = wf[i]; + + return vi; } +} + +/* Similar, but place the data into VD in the end. */ - return off; +void FFI_HIDDEN +ffi_struct_float_copy (int size_mask, void *vd, void *vi, void *vf) +{ + int size = size_mask & 0xff; + int mask = size_mask >> 8; + int n = size >> 2; + + if (mask == 0) + ; + else if (mask == (1 << n) - 1) + vi = vf; + else + { + unsigned int *wd = vd, *wi = vi, *wf = vf; + int i; + + for (i = 0; i < n; ++i) + wd[i] = ((mask >> i) & 1 ? wf : wi)[i]; + return; + } + memcpy (vd, vi, size); } +/* Perform machine dependent cif processing */ + ffi_status FFI_HIDDEN ffi_prep_cif_machdep(ffi_cif *cif) { @@ -108,7 +170,10 @@ ffi_prep_cif_machdep(ffi_cif *cif) bytes = 8; } else - flags = SPARC_RET_STRUCT; + { + flags = ffi_struct_float_mask (rtype, 0) << SPARC_FLTMASK_SHIFT; + flags |= SPARC_RET_STRUCT; + } break; case FFI_TYPE_SINT8: @@ -343,7 +408,7 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue, ffi_cif *cif; ffi_type **arg_types; void **avalue; - int i, argn, nargs, flags; + int i, argn, argx, nargs, flags; cif = closure->cif; arg_types = cif->arg_types; @@ -364,12 +429,13 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue, argn = 0; /* Grab the addresses of the arguments from the stack frame. */ - for (i = 0; i < nargs; i++) + for (i = 0; i < nargs; i++, argn = argx) { ffi_type *ty = arg_types[i]; - void *a = &gpr[argn++]; + void *a = &gpr[argn]; size_t z; + argx = argn + 1; switch (ty->type) { case FFI_TYPE_STRUCT: @@ -378,25 +444,31 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue, a = *(void **)a; else { - if (--argn < 16) - ffi_v9_layout_struct(arg_types[i], 8*argn, gpr, gpr, fpr); - argn += ALIGN (z, 8) / 8; + argx = argn + ALIGN (z, 8) / 8; + if (argn < 16) + { + int size_mask = ffi_struct_float_mask (ty, 0); + int argn_mask = (0xffff00 >> argn) & 0xff00; + + /* Eliminate fp registers off the end. */ + size_mask = (size_mask & 0xff) | (size_mask & argn_mask); + a = ffi_struct_float_merge (size_mask, gpr+argn, fpr+argn); + } } break; case FFI_TYPE_LONGDOUBLE: - if (--argn & 1) - argn++; + argn = ALIGN (argn, 2); a = (argn < 16 ? fpr : gpr) + argn; - argn += 2; + argx = argn + 2; break; case FFI_TYPE_DOUBLE: if (argn <= 16) - a = fpr + argn - 1; + a = fpr + argn; break; case FFI_TYPE_FLOAT: if (argn <= 16) - a = fpr + argn - 1; + a = fpr + argn; a += 4; break; diff --git a/src/sparc/internal.h b/src/sparc/internal.h index df7c305..3018928 100644 --- a/src/sparc/internal.h +++ b/src/sparc/internal.h @@ -16,3 +16,5 @@ #define SPARC_FLAG_RET_MASK 15 #define SPARC_FLAG_RET_IN_MEM 32 #define SPARC_FLAG_FP_ARGS 64 + +#define SPARC_FLTMASK_SHIFT 8 diff --git a/src/sparc/v9.S b/src/sparc/v9.S index e2fe036..3d91f2d 100644 --- a/src/sparc/v9.S +++ b/src/sparc/v9.S @@ -177,12 +177,11 @@ E 15 std %f6, [%l2+56] ! Copy the structure into place. - ldx [%i0+16], %o0 ! load rtype from cif - mov 0, %o1 ! load off - mov %i2, %o2 ! load dst - mov %l2, %o3 ! load src_int - call C(ffi_v9_layout_struct) - add %l2, 32, %o4 ! load src_fp + srl %l0, SPARC_FLTMASK_SHIFT, %o0 ! load size_mask + mov %i2, %o1 ! load dst + mov %l2, %o2 ! load src_gp + call C(ffi_struct_float_copy) + add %l2, 32, %o3 ! load src_fp return %i7+8 nop -- 1.9.3