From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 5671 invoked by alias); 13 Nov 2013 15:25:31 -0000 Mailing-List: contact libffi-discuss-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libffi-discuss-owner@sourceware.org Received: (qmail 5653 invoked by uid 89); 13 Nov 2013 15:25:30 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-0.2 required=5.0 tests=AWL,BAYES_50,FREEMAIL_FROM,RDNS_NONE,SPF_PASS autolearn=no version=3.3.2 X-HELO: mail-pa0-f48.google.com Received: from Unknown (HELO mail-pa0-f48.google.com) (209.85.220.48) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Wed, 13 Nov 2013 15:25:28 +0000 Received: by mail-pa0-f48.google.com with SMTP id bj1so545755pad.35 for ; Wed, 13 Nov 2013 07:25:17 -0800 (PST) X-Received: by 10.68.225.232 with SMTP id rn8mr41294240pbc.32.1384356317647; Wed, 13 Nov 2013 07:25:17 -0800 (PST) Received: from bubble.grove.modra.org ([101.166.26.37]) by mx.google.com with ESMTPSA id sy10sm52457473pac.15.2013.11.13.07.25.15 for (version=TLSv1.1 cipher=ECDHE-RSA-RC4-SHA bits=128/128); Wed, 13 Nov 2013 07:25:17 -0800 (PST) Received: by bubble.grove.modra.org (Postfix, from userid 1000) id 0B2A4EA006D; Thu, 14 Nov 2013 01:55:12 +1030 (CST) Date: Wed, 13 Nov 2013 15:25:00 -0000 From: Alan Modra To: libffi-discuss@sourceware.org Subject: Support PowerPC64 ELFv2 ABI Message-ID: <20131113152512.GF20756@bubble.grove.modra.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.21 (2010-09-15) X-SW-Source: 2013/txt/msg00204.txt.bz2 Finally, this adds _CALL_ELF == 2 support. ELFv1 objects can't be linked with ELFv2 objects, so this is one case where preprocessor tests in ffi.c are fine. Also, there is no need to define a new FFI_ELFv2 or somesuch value in enum ffi_abi. FFI_LINUX64 will happily serve both ABIs. * src/powerpc/ffitarget.h (FFI_V2_TYPE_FLOAT_HOMOG, FFI_V2_TYPE_DOUBLE_HOMOG, FFI_V2_TYPE_SMALL_STRUCT): Define. (FFI_TRAMPOLINE_SIZE): Define variant for ELFv2. * src/powerpc/ffi.c (FLAG_ARG_NEEDS_PSAVE): Define. (discover_homogeneous_aggregate): New function. (ffi_prep_args64): Adjust start of param save area for ELFv2. Handle homogenous floating point struct parms. (ffi_prep_cif_machdep_core): Adjust space calculation for ELFv2. Handle ELFv2 return values. Set FLAG_ARG_NEEDS_PSAVE. Handle homogenous floating point structs. (ffi_call): Increase size of smst_buffer for ELFv2. Handle ELFv2. (flush_icache): Compile for ELFv2. (ffi_prep_closure_loc): Set up ELFv2 trampoline. (ffi_closure_helper_LINUX64): Don't return all structs directly to caller. Handle homogenous floating point structs. Handle ELFv2 struct return values. * src/powerpc/linux64.S (ffi_call_LINUX64): Set up r2 for ELFv2. Adjust toc save location. Call function pointer using r12. Handle FLAG_RETURNS_SMST. Don't predict branches. * src/powerpc/linux64_closure.S (ffi_closure_LINUX64): Set up r2 for ELFv2. Define ELFv2 versions of STACKFRAME, PARMSAVE, and RETVAL. Handle possibly missing parameter save area. Handle ELFv2 return values. (.note.GNU-stack): Move inside outer #ifdef. diff --git a/src/powerpc/ffi.c b/src/powerpc/ffi.c index cd63e26..69356a2 100644 --- a/src/powerpc/ffi.c +++ b/src/powerpc/ffi.c @@ -49,6 +49,7 @@ enum { FLAG_RETURNS_128BITS = 1 << (31-27), /* cr6 */ FLAG_ARG_NEEDS_COPY = 1 << (31- 7), + FLAG_ARG_NEEDS_PSAVE = FLAG_ARG_NEEDS_COPY, /* Used by ELFv2 */ #ifndef __NO_FPRS__ FLAG_FP_ARGUMENTS = 1 << (31- 6), /* cr1.eq; specified by ABI */ #endif @@ -389,6 +390,45 @@ enum { }; enum { ASM_NEEDS_REGISTERS64 = 4 }; +#if _CALL_ELF == 2 +static unsigned int +discover_homogeneous_aggregate (const ffi_type *t, unsigned int *elnum) +{ + switch (t->type) + { + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + *elnum = 1; + return (int) t->type; + + case FFI_TYPE_STRUCT:; + { + unsigned int base_elt = 0, total_elnum = 0; + ffi_type **el = t->elements; + while (*el) + { + unsigned int el_elt, el_elnum = 0; + el_elt = discover_homogeneous_aggregate (*el, &el_elnum); + if (el_elt == 0 + || (base_elt && base_elt != el_elt)) + return 0; + base_elt = el_elt; + total_elnum += el_elnum; + if (total_elnum > 8) + return 0; + el++; + } + *elnum = total_elnum; + return base_elt; + } + + default: + return 0; + } +} +#endif + + /* ffi_prep_args64 is called by the assembly routine once stack space has been allocated for the function's arguments. @@ -476,7 +516,11 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack) stacktop.c = (char *) stack + bytes; gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64; gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64; +#if _CALL_ELF == 2 + rest.ul = stack + 4 + NUM_GPR_ARG_REGISTERS64; +#else rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64; +#endif fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64; fparg_count = 0; next_arg.ul = gpr_base.ul; @@ -498,6 +542,8 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack) i < nargs; i++, ptr++, p_argv.v++) { + unsigned int elt, elnum; + switch ((*ptr)->type) { case FFI_TYPE_FLOAT: @@ -555,28 +601,79 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack) if (align > 1) next_arg.p = ALIGN (next_arg.p, align); #endif - words = ((*ptr)->size + 7) / 8; - if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul) + elt = 0; +#if _CALL_ELF == 2 + elt = discover_homogeneous_aggregate (*ptr, &elnum); +#endif + if (elt) { - size_t first = gpr_end.c - next_arg.c; - memcpy (next_arg.c, *p_argv.c, first); - memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first); - next_arg.c = rest.c + words * 8 - first; + union { + void *v; + float *f; + double *d; + } arg; + + arg.v = *p_argv.v; + if (elt == FFI_TYPE_FLOAT) + { + do + { + double_tmp = *arg.f++; + if (fparg_count < NUM_FPR_ARG_REGISTERS64 + && i < nfixedargs) + *fpr_base.d++ = double_tmp; + else + *next_arg.f = (float) double_tmp; + if (++next_arg.f == gpr_end.f) + next_arg.f = rest.f; + fparg_count++; + } + while (--elnum != 0); + if ((next_arg.p & 3) != 0) + { + if (++next_arg.f == gpr_end.f) + next_arg.f = rest.f; + } + } + else + do + { + double_tmp = *arg.d++; + if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs) + *fpr_base.d++ = double_tmp; + else + *next_arg.d = double_tmp; + if (++next_arg.d == gpr_end.d) + next_arg.d = rest.d; + fparg_count++; + } + while (--elnum != 0); } else { - char *where = next_arg.c; + words = ((*ptr)->size + 7) / 8; + if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul) + { + size_t first = gpr_end.c - next_arg.c; + memcpy (next_arg.c, *p_argv.c, first); + memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first); + next_arg.c = rest.c + words * 8 - first; + } + else + { + char *where = next_arg.c; #ifndef __LITTLE_ENDIAN__ - /* Structures with size less than eight bytes are passed - left-padded. */ - if ((*ptr)->size < 8) - where += 8 - (*ptr)->size; + /* Structures with size less than eight bytes are passed + left-padded. */ + if ((*ptr)->size < 8) + where += 8 - (*ptr)->size; #endif - memcpy (where, *p_argv.c, (*ptr)->size); - next_arg.ul += words; - if (next_arg.ul == gpr_end.ul) - next_arg.ul = rest.ul; + memcpy (where, *p_argv.c, (*ptr)->size); + next_arg.ul += words; + if (next_arg.ul == gpr_end.ul) + next_arg.ul = rest.ul; + } } break; @@ -632,11 +729,10 @@ ffi_prep_cif_machdep_core (ffi_cif *cif) unsigned type = cif->rtype->type; unsigned size = cif->rtype->size; + /* The machine-independent calculation of cif->bytes doesn't work + for us. Redo the calculation. */ if (cif->abi != FFI_LINUX64) { - /* All the machine-independent calculation of cif->bytes will be wrong. - Redo the calculation for SYSV. */ - /* Space for the frame pointer, callee's LR, and the asm's temp regs. */ bytes = (2 + ASM_NEEDS_REGISTERS) * sizeof (int); @@ -646,13 +742,20 @@ ffi_prep_cif_machdep_core (ffi_cif *cif) else { /* 64-bit ABI. */ +#if _CALL_ELF == 2 + /* Space for backchain, CR, LR, TOC and the asm's temp regs. */ + bytes = (4 + ASM_NEEDS_REGISTERS64) * sizeof (long); + /* Space for the general registers. */ + bytes += NUM_GPR_ARG_REGISTERS64 * sizeof (long); +#else /* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp regs. */ bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long); /* Space for the mandatory parm save area and general registers. */ bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long); +#endif } /* Return value handling. The rules for SYSV are as follows: @@ -730,6 +833,25 @@ ffi_prep_cif_machdep_core (ffi_cif *cif) flags |= FLAG_RETURNS_SMST; break; } +#if _CALL_ELF == 2 + if (cif->abi == FFI_LINUX64) + { + unsigned int elt, elnum; + elt = discover_homogeneous_aggregate (cif->rtype, &elnum); + if (elt) + { + if (elt == FFI_TYPE_DOUBLE) + flags |= FLAG_RETURNS_64BITS; + flags |= FLAG_RETURNS_FP | FLAG_RETURNS_SMST; + break; + } + if (size <= 16) + { + flags |= FLAG_RETURNS_SMST; + break; + } + } +#endif intarg_count++; flags |= FLAG_RETVAL_REFERENCE; /* Fall through. */ @@ -845,6 +967,7 @@ ffi_prep_cif_machdep_core (ffi_cif *cif) else for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) { + unsigned int elt, elnum; #ifdef __STRUCT_PARM_ALIGN__ unsigned int align; #endif @@ -855,12 +978,16 @@ ffi_prep_cif_machdep_core (ffi_cif *cif) case FFI_TYPE_LONGDOUBLE: fparg_count += 2; intarg_count += 2; + if (fparg_count > NUM_FPR_ARG_REGISTERS) + flags |= FLAG_ARG_NEEDS_PSAVE; break; #endif case FFI_TYPE_FLOAT: case FFI_TYPE_DOUBLE: fparg_count++; intarg_count++; + if (fparg_count > NUM_FPR_ARG_REGISTERS) + flags |= FLAG_ARG_NEEDS_PSAVE; break; case FFI_TYPE_STRUCT: @@ -873,6 +1000,21 @@ ffi_prep_cif_machdep_core (ffi_cif *cif) intarg_count = ALIGN (intarg_count, align); #endif intarg_count += ((*ptr)->size + 7) / 8; + elt = 0; +#if _CALL_ELF == 2 + elt = discover_homogeneous_aggregate (*ptr, &elnum); +#endif + if (elt) + { + fparg_count += elnum; + if (fparg_count > NUM_FPR_ARG_REGISTERS) + flags |= FLAG_ARG_NEEDS_PSAVE; + } + else + { + if (intarg_count > NUM_GPR_ARG_REGISTERS) + flags |= FLAG_ARG_NEEDS_PSAVE; + } break; case FFI_TYPE_POINTER: @@ -888,9 +1030,11 @@ ffi_prep_cif_machdep_core (ffi_cif *cif) /* Everything else is passed as a 8-byte word in a GPR, either the object itself or a pointer to it. */ intarg_count++; + if (intarg_count > NUM_GPR_ARG_REGISTERS) + flags |= FLAG_ARG_NEEDS_PSAVE; break; default: - FFI_ASSERT (0); + FFI_ASSERT (0); } } @@ -928,8 +1072,13 @@ ffi_prep_cif_machdep_core (ffi_cif *cif) #endif /* Stack space. */ - if (intarg_count > NUM_GPR_ARG_REGISTERS64) - bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long); + if (cif->abi == FFI_LINUX64) + { + if (intarg_count > NUM_GPR_ARG_REGISTERS64) + bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long); + } + else if ((flags & FLAG_ARG_NEEDS_PSAVE) != 0) + bytes += intarg_count * sizeof (long); } /* The stack space allocated needs to be a multiple of 16 bytes. */ @@ -957,6 +1106,10 @@ ffi_prep_cif_machdep_var (ffi_cif *cif, unsigned int ntotalargs MAYBE_UNUSED) { cif->nfixedargs = nfixedargs; +#if _CALL_ELF == 2 + if (cif->abi == FFI_LINUX64) + cif->flags |= FLAG_ARG_NEEDS_PSAVE; +#endif return ffi_prep_cif_machdep_core (cif); } @@ -976,8 +1129,11 @@ ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) * * We bounce-buffer SYSV small struct return values so that sysv.S * can write r3 and r4 to memory without worrying about struct size. + * + * For ELFv2 ABI, use a bounce buffer for homogeneous structs too, + * for similar reasons. */ - unsigned int smst_buffer[2]; + unsigned long smst_buffer[8]; extended_cif ecif; ecif.cif = cif; @@ -1019,10 +1175,11 @@ ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) #ifndef __LITTLE_ENDIAN__ /* The SYSV ABI returns a structure of up to 4 bytes in size left-padded in r3. */ - if (rsize <= 4) + if (cif->abi == FFI_SYSV && rsize <= 4) memcpy (rvalue, (char *) smst_buffer + 4 - rsize, rsize); /* The SYSV ABI returns a structure of up to 8 bytes in size - left-padded in r3/r4. */ + left-padded in r3/r4, and the ELFv2 ABI similarly returns a + structure of up to 8 bytes in size left-padded in r3. */ else if (rsize <= 8) memcpy (rvalue, (char *) smst_buffer + 8 - rsize, rsize); else @@ -1032,7 +1189,7 @@ ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) } -#ifndef POWERPC64 +#if !defined POWERPC64 || _CALL_ELF == 2 #define MIN_CACHE_LINE_SIZE 8 static void @@ -1056,6 +1213,22 @@ ffi_prep_closure_loc (ffi_closure *closure, void *codeloc) { #ifdef POWERPC64 +# if _CALL_ELF == 2 + unsigned int *tramp = (unsigned int *) &closure->tramp[0]; + + if (cif->abi != FFI_LINUX64) + return FFI_BAD_ABI; + + tramp[0] = 0xe96c0018; /* 0: ld 11,2f-0b(12) */ + tramp[1] = 0xe98c0010; /* ld 12,1f-0b(12) */ + tramp[2] = 0x7d8903a6; /* mtctr 12 */ + tramp[3] = 0x4e800420; /* bctr */ + /* 1: .quad function_addr */ + /* 2: .quad context */ + *(void **) &tramp[4] = (void *) ffi_closure_LINUX64; + *(void **) &tramp[6] = codeloc; + flush_icache ((char *)tramp, (char *)codeloc, FFI_TRAMPOLINE_SIZE); +# else void **tramp = (void **) &closure->tramp[0]; if (cif->abi != FFI_LINUX64) @@ -1063,6 +1236,7 @@ ffi_prep_closure_loc (ffi_closure *closure, /* Copy function address and TOC from ffi_closure_LINUX64. */ memcpy (tramp, (char *) ffi_closure_LINUX64, 16); tramp[2] = codeloc; +# endif #else unsigned int *tramp; @@ -1419,9 +1593,10 @@ ffi_closure_helper_LINUX64 (ffi_closure *closure, void *rvalue, cif = closure->cif; avalue = alloca (cif->nargs * sizeof (void *)); - /* Copy the caller's structure return value address so that the closure - returns the data directly to the caller. */ - if (cif->rtype->type == FFI_TYPE_STRUCT) + /* Copy the caller's structure return value address so that the + closure returns the data directly to the caller. */ + if (cif->rtype->type == FFI_TYPE_STRUCT + && (cif->flags & FLAG_RETURNS_SMST) == 0) { rvalue = (void *) *pst; pst++; @@ -1435,6 +1610,8 @@ ffi_closure_helper_LINUX64 (ffi_closure *closure, void *rvalue, /* Grab the addresses of the arguments from the stack frame. */ while (i < avn) { + unsigned int elt, elnum; + switch (arg_types[i]->type) { case FFI_TYPE_SINT8: @@ -1476,14 +1653,75 @@ ffi_closure_helper_LINUX64 (ffi_closure *closure, void *rvalue, if (align > 1) pst = (unsigned long *) ALIGN ((size_t) pst, align); #endif -#ifndef __LITTLE_ENDIAN__ - /* Structures with size less than eight bytes are passed - left-padded. */ - if (arg_types[i]->size < 8) - avalue[i] = (char *) pst + 8 - arg_types[i]->size; + elt = 0; +#if _CALL_ELF == 2 + elt = discover_homogeneous_aggregate (arg_types[i], &elnum); +#endif + if (elt) + { + union { + void *v; + unsigned long *ul; + float *f; + double *d; + size_t p; + } to, from; + + /* Repackage the aggregate from its parts. The + aggregate size is not greater than the space taken by + the registers so store back to the register/parameter + save arrays. */ + if (pfr + elnum <= end_pfr) + to.v = pfr; + else + to.v = pst; + + avalue[i] = to.v; + from.ul = pst; + if (elt == FFI_TYPE_FLOAT) + { + do + { + if (pfr < end_pfr && i < nfixedargs) + { + *to.f = (float) pfr->d; + pfr++; + } + else + *to.f = *from.f; + to.f++; + from.f++; + } + while (--elnum != 0); + } + else + { + do + { + if (pfr < end_pfr && i < nfixedargs) + { + *to.d = pfr->d; + pfr++; + } + else + *to.d = *from.d; + to.d++; + from.d++; + } + while (--elnum != 0); + } + } else + { +#ifndef __LITTLE_ENDIAN__ + /* Structures with size less than eight bytes are passed + left-padded. */ + if (arg_types[i]->size < 8) + avalue[i] = (char *) pst + 8 - arg_types[i]->size; + else #endif - avalue[i] = pst; + avalue[i] = pst; + } pst += (arg_types[i]->size + 7) / 8; break; @@ -1554,5 +1792,14 @@ ffi_closure_helper_LINUX64 (ffi_closure *closure, void *rvalue, (closure->fun) (cif, rvalue, avalue, closure->user_data); /* Tell ffi_closure_LINUX64 how to perform return type promotions. */ + if ((cif->flags & FLAG_RETURNS_SMST) != 0) + { + if ((cif->flags & FLAG_RETURNS_FP) == 0) + return FFI_V2_TYPE_SMALL_STRUCT + cif->rtype->size - 1; + else if ((cif->flags & FLAG_RETURNS_64BITS) != 0) + return FFI_V2_TYPE_DOUBLE_HOMOG; + else + return FFI_V2_TYPE_FLOAT_HOMOG; + } return cif->rtype->type; } diff --git a/src/powerpc/ffitarget.h b/src/powerpc/ffitarget.h index 2a7e9a1..2be728e 100644 --- a/src/powerpc/ffitarget.h +++ b/src/powerpc/ffitarget.h @@ -122,14 +122,23 @@ typedef enum ffi_abi { defined in ffi.c, to determine the exact return type and its size. */ #define FFI_SYSV_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 2) -#if defined(POWERPC64) || defined(POWERPC_AIX) +/* Used by ELFv2 for homogenous structure returns. */ +#define FFI_V2_TYPE_FLOAT_HOMOG (FFI_TYPE_LAST + 1) +#define FFI_V2_TYPE_DOUBLE_HOMOG (FFI_TYPE_LAST + 2) +#define FFI_V2_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 3) + +#if _CALL_ELF == 2 +# define FFI_TRAMPOLINE_SIZE 32 +#else +# if defined(POWERPC64) || defined(POWERPC_AIX) # if defined(POWERPC_DARWIN64) # define FFI_TRAMPOLINE_SIZE 48 # else # define FFI_TRAMPOLINE_SIZE 24 # endif -#else /* POWERPC || POWERPC_AIX */ +# else /* POWERPC || POWERPC_AIX */ # define FFI_TRAMPOLINE_SIZE 40 +# endif #endif #ifndef LIBFFI_ASM diff --git a/src/powerpc/linux64.S b/src/powerpc/linux64.S index 7f89934..85b8aaa 100644 --- a/src/powerpc/linux64.S +++ b/src/powerpc/linux64.S @@ -32,15 +32,22 @@ #ifdef __powerpc64__ .hidden ffi_call_LINUX64 .globl ffi_call_LINUX64 +# if _CALL_ELF == 2 + .text +ffi_call_LINUX64: + addis %r2, %r12, .TOC.-ffi_call_LINUX64@ha + addi %r2, %r2, .TOC.-ffi_call_LINUX64@l + .localentry ffi_call_LINUX64, . - ffi_call_LINUX64 +# else .section ".opd","aw" .align 3 ffi_call_LINUX64: -#ifdef _CALL_LINUX +# ifdef _CALL_LINUX .quad .L.ffi_call_LINUX64,.TOC.@tocbase,0 .type ffi_call_LINUX64,@function .text .L.ffi_call_LINUX64: -#else +# else .hidden .ffi_call_LINUX64 .globl .ffi_call_LINUX64 .quad .ffi_call_LINUX64,.TOC.@tocbase,0 @@ -48,7 +55,8 @@ ffi_call_LINUX64: .type .ffi_call_LINUX64,@function .text .ffi_call_LINUX64: -#endif +# endif +# endif .LFB1: mflr %r0 std %r28, -32(%r1) @@ -63,26 +71,35 @@ ffi_call_LINUX64: mr %r31, %r5 /* flags, */ mr %r30, %r6 /* rvalue, */ mr %r29, %r7 /* function address. */ +/* Save toc pointer, not for the ffi_prep_args64 call, but for the later + bctrl function call. */ +# if _CALL_ELF == 2 + std %r2, 24(%r1) +# else std %r2, 40(%r1) +# endif /* Call ffi_prep_args64. */ mr %r4, %r1 -#ifdef _CALL_LINUX +# if defined _CALL_LINUX || _CALL_ELF == 2 bl ffi_prep_args64 -#else +# else bl .ffi_prep_args64 -#endif +# endif - ld %r0, 0(%r29) +# if _CALL_ELF == 2 + mr %r12, %r29 +# else + ld %r12, 0(%r29) ld %r2, 8(%r29) ld %r11, 16(%r29) - +# endif /* Now do the call. */ /* Set up cr1 with bits 4-7 of the flags. */ mtcrf 0x40, %r31 /* Get the address to call into CTR. */ - mtctr %r0 + mtctr %r12 /* Load all those argument registers. */ ld %r3, -32-(8*8)(%r28) ld %r4, -32-(7*8)(%r28) @@ -117,12 +134,17 @@ ffi_call_LINUX64: /* This must follow the call immediately, the unwinder uses this to find out if r2 has been saved or not. */ +# if _CALL_ELF == 2 + ld %r2, 24(%r1) +# else ld %r2, 40(%r1) +# endif /* Now, deal with the return value. */ mtcrf 0x01, %r31 - bt- 30, .Ldone_return_value - bt- 29, .Lfp_return_value + bt 31, .Lstruct_return_value + bt 30, .Ldone_return_value + bt 29, .Lfp_return_value std %r3, 0(%r30) /* Fall through... */ @@ -147,14 +169,48 @@ ffi_call_LINUX64: .Lfloat_return_value: stfs %f1, 0(%r30) b .Ldone_return_value + +.Lstruct_return_value: + bf 29, .Lsmall_struct + bf 28, .Lfloat_homog_return_value + stfd %f1, 0(%r30) + stfd %f2, 8(%r30) + stfd %f3, 16(%r30) + stfd %f4, 24(%r30) + stfd %f5, 32(%r30) + stfd %f6, 40(%r30) + stfd %f7, 48(%r30) + stfd %f8, 56(%r30) + b .Ldone_return_value + +.Lfloat_homog_return_value: + stfs %f1, 0(%r30) + stfs %f2, 4(%r30) + stfs %f3, 8(%r30) + stfs %f4, 12(%r30) + stfs %f5, 16(%r30) + stfs %f6, 20(%r30) + stfs %f7, 24(%r30) + stfs %f8, 28(%r30) + b .Ldone_return_value + +.Lsmall_struct: + std %r3, 0(%r30) + std %r4, 8(%r30) + b .Ldone_return_value + .LFE1: .long 0 .byte 0,12,0,1,128,4,0,0 -#ifdef _CALL_LINUX +# if _CALL_ELF == 2 + .size ffi_call_LINUX64,.-ffi_call_LINUX64 +# else +# ifdef _CALL_LINUX .size ffi_call_LINUX64,.-.L.ffi_call_LINUX64 -#else +# else .size .ffi_call_LINUX64,.-.ffi_call_LINUX64 -#endif +# endif +# endif .section .eh_frame,EH_FRAME_FLAGS,@progbits .Lframe1: diff --git a/src/powerpc/linux64_closure.S b/src/powerpc/linux64_closure.S index bc677fc..9b6b5f3 100644 --- a/src/powerpc/linux64_closure.S +++ b/src/powerpc/linux64_closure.S @@ -33,15 +33,22 @@ #ifdef __powerpc64__ FFI_HIDDEN (ffi_closure_LINUX64) .globl ffi_closure_LINUX64 +# if _CALL_ELF == 2 + .text +ffi_closure_LINUX64: + addis %r2, %r12, .TOC.-ffi_closure_LINUX64@ha + addi %r2, %r2, .TOC.-ffi_closure_LINUX64@l + .localentry ffi_closure_LINUX64, . - ffi_closure_LINUX64 +# else .section ".opd","aw" .align 3 ffi_closure_LINUX64: -#ifdef _CALL_LINUX +# ifdef _CALL_LINUX .quad .L.ffi_closure_LINUX64,.TOC.@tocbase,0 .type ffi_closure_LINUX64,@function .text .L.ffi_closure_LINUX64: -#else +# else FFI_HIDDEN (.ffi_closure_LINUX64) .globl .ffi_closure_LINUX64 .quad .ffi_closure_LINUX64,.TOC.@tocbase,0 @@ -49,15 +56,52 @@ ffi_closure_LINUX64: .type .ffi_closure_LINUX64,@function .text .ffi_closure_LINUX64: -#endif +# endif +# endif +# if _CALL_ELF == 2 +# 32 byte special reg save area + 64 byte parm save area and retval +# + 13*8 fpr save area + round to 16 +# define STACKFRAME 208 +# define PARMSAVE 32 +# No parameter save area is needed for the call to ffi_closure_helper_LINUX64, +# so return value can start there. +# define RETVAL PARMSAVE +# else # 48 bytes special reg save area + 64 bytes parm save area # + 16 bytes retval area + 13*8 bytes fpr save area + round to 16 # define STACKFRAME 240 # define PARMSAVE 48 # define RETVAL PARMSAVE+64 +# endif .LFB1: +# if _CALL_ELF == 2 + ld %r12, FFI_TRAMPOLINE_SIZE(%r11) # closure->cif + mflr %r0 + lwz %r12, 28(%r12) # cif->flags + mtcrf 0x40, %r12 + addi %r12, %r1, PARMSAVE + bt 7, .Lparmsave + # Our caller has not allocated a parameter save area. + # We need to allocate one here and use it to pass gprs to + # ffi_closure_helper_LINUX64. The return value area will do. + addi %r12, %r1, -STACKFRAME+RETVAL +.Lparmsave: + std %r0, 16(%r1) + # Save general regs into parm save area + std %r3, 0(%r12) + std %r4, 8(%r12) + std %r5, 16(%r12) + std %r6, 24(%r12) + std %r7, 32(%r12) + std %r8, 40(%r12) + std %r9, 48(%r12) + std %r10, 56(%r12) + + # load up the pointer to the parm save area + mr %r5, %r12 +# else mflr %r0 # Save general regs into parm save area # This is the parameter save area set up by our caller. @@ -74,6 +118,7 @@ ffi_closure_LINUX64: # load up the pointer to the parm save area addi %r5, %r1, PARMSAVE +# endif # next save fpr 1 to fpr 13 stfd %f1, -104+(0*8)(%r1) @@ -103,11 +148,11 @@ ffi_closure_LINUX64: mr %r3, %r11 # make the call -#ifdef _CALL_LINUX +# if defined _CALL_LINUX || _CALL_ELF == 2 bl ffi_closure_helper_LINUX64 -#else +# else bl .ffi_closure_helper_LINUX64 -#endif +# endif .Lret: # now r3 contains the return type @@ -116,10 +161,12 @@ ffi_closure_LINUX64: # look up the proper starting point in table # by using return type as offset + ld %r0, STACKFRAME+16(%r1) + cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT + bge .Lsmall mflr %r4 # move address of .Lret to r4 sldi %r3, %r3, 4 # now multiply return type by 16 addi %r4, %r4, .Lret_type0 - .Lret - ld %r0, STACKFRAME+16(%r1) add %r3, %r3, %r4 # add contents of table to table address mtctr %r3 bctr # jump to it @@ -234,15 +281,73 @@ ffi_closure_LINUX64: mtlr %r0 addi %r1, %r1, STACKFRAME blr -# esac +# case FFI_V2_TYPE_FLOAT_HOMOG + lfs %f1, RETVAL+0(%r1) + lfs %f2, RETVAL+4(%r1) + lfs %f3, RETVAL+8(%r1) + b .Lmorefloat +# case FFI_V2_TYPE_DOUBLE_HOMOG + lfd %f1, RETVAL+0(%r1) + lfd %f2, RETVAL+8(%r1) + lfd %f3, RETVAL+16(%r1) + lfd %f4, RETVAL+24(%r1) + mtlr %r0 + lfd %f5, RETVAL+32(%r1) + lfd %f6, RETVAL+40(%r1) + lfd %f7, RETVAL+48(%r1) + lfd %f8, RETVAL+56(%r1) + addi %r1, %r1, STACKFRAME + blr +.Lmorefloat: + lfs %f4, RETVAL+12(%r1) + mtlr %r0 + lfs %f5, RETVAL+16(%r1) + lfs %f6, RETVAL+20(%r1) + lfs %f7, RETVAL+24(%r1) + lfs %f8, RETVAL+28(%r1) + addi %r1, %r1, STACKFRAME + blr +.Lsmall: +# ifdef __LITTLE_ENDIAN__ + ld %r3,RETVAL+0(%r1) + mtlr %r0 + ld %r4,RETVAL+8(%r1) + addi %r1, %r1, STACKFRAME + blr +# else + # A struct smaller than a dword is returned in the low bits of r3 + # ie. right justified. Larger structs are passed left justified + # in r3 and r4. The return value area on the stack will have + # the structs as they are usually stored in memory. + cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT + 7 # size 8 bytes? + neg %r5, %r3 + ld %r3,RETVAL+0(%r1) + blt .Lsmalldown + mtlr %r0 + ld %r4,RETVAL+8(%r1) + addi %r1, %r1, STACKFRAME + blr +.Lsmalldown: + addi %r5, %r5, FFI_V2_TYPE_SMALL_STRUCT + 7 + mtlr %r0 + sldi %r5, %r5, 3 + addi %r1, %r1, STACKFRAME + srd %r3, %r3, %r5 + blr +# endif + .LFE1: .long 0 .byte 0,12,0,1,128,0,0,0 -#ifdef _CALL_LINUX +# if _CALL_ELF == 2 + .size ffi_closure_LINUX64,.-ffi_closure_LINUX64 +# else +# ifdef _CALL_LINUX .size ffi_closure_LINUX64,.-.L.ffi_closure_LINUX64 -#else +# else .size .ffi_closure_LINUX64,.-.ffi_closure_LINUX64 -#endif +# endif +# endif .section .eh_frame,EH_FRAME_FLAGS,@progbits .Lframe1: -- Alan Modra Australia Development Lab, IBM