From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 8955 invoked by alias); 28 Oct 2014 18:32:24 -0000 Mailing-List: contact libffi-discuss-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libffi-discuss-owner@sourceware.org Received: (qmail 8813 invoked by uid 89); 28 Oct 2014 18:32:24 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.3 required=5.0 tests=AWL,BAYES_00,FREEMAIL_ENVFROM_END_DIGIT,FREEMAIL_FROM,RCVD_IN_DNSWL_LOW,SPF_PASS autolearn=ham version=3.3.2 X-HELO: mail-qa0-f52.google.com Received: from mail-qa0-f52.google.com (HELO mail-qa0-f52.google.com) (209.85.216.52) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Tue, 28 Oct 2014 18:32:22 +0000 Received: by mail-qa0-f52.google.com with SMTP id u7so893672qaz.25 for ; Tue, 28 Oct 2014 11:32:19 -0700 (PDT) X-Received: by 10.140.34.102 with SMTP id k93mr7238770qgk.21.1414521139336; Tue, 28 Oct 2014 11:32:19 -0700 (PDT) Received: from anchor.com (50-194-63-110-static.hfc.comcastbusiness.net. [50.194.63.110]) by mx.google.com with ESMTPSA id j1sm1948207qao.38.2014.10.28.11.32.18 for (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 28 Oct 2014 11:32:18 -0700 (PDT) From: Richard Henderson To: libffi-discuss@sourceware.org Subject: [PATCH 8/8] x86_64: Add support for complex types Date: Tue, 28 Oct 2014 18:32:00 -0000 Message-Id: <1414521094-18403-9-git-send-email-rth@twiddle.net> In-Reply-To: <1414521094-18403-1-git-send-email-rth@twiddle.net> References: <1414521094-18403-1-git-send-email-rth@twiddle.net> X-SW-Source: 2014/txt/msg00128.txt.bz2 --- src/x86/ffi64.c | 97 +++++++++++++++++++++++++++++++++++++++++++++------- src/x86/internal64.h | 6 ++-- src/x86/unix64.S | 63 ++++++++++++++++++---------------- 3 files changed, 122 insertions(+), 44 deletions(-) diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c index a03061b..650f7bb 100644 --- a/src/x86/ffi64.c +++ b/src/x86/ffi64.c @@ -171,6 +171,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], case FFI_TYPE_UINT64: case FFI_TYPE_SINT64: case FFI_TYPE_POINTER: + do_integer: { size_t size = byte_offset + type->size; @@ -301,11 +302,42 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], } return words; } - - default: - FFI_ASSERT(0); + case FFI_TYPE_COMPLEX: + { + ffi_type *inner = type->elements[0]; + switch (inner->type) + { + case FFI_TYPE_INT: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + goto do_integer; + + case FFI_TYPE_FLOAT: + classes[0] = X86_64_SSE_CLASS; + if (byte_offset % 8) + { + classes[1] = X86_64_SSESF_CLASS; + return 2; + } + return 1; + case FFI_TYPE_DOUBLE: + classes[0] = classes[1] = X86_64_SSEDF_CLASS; + return 2; +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + classes[0] = X86_64_COMPLEX_X87_CLASS; + return 1; +#endif + } + } } - return 0; /* Never reached. */ + abort(); } /* Examine the argument and return set number of register required in each @@ -360,7 +392,7 @@ ffi_prep_cif_machdep (ffi_cif *cif) { int gprcount, ssecount, i, avn, ngpr, nsse, flags; enum x86_64_reg_class classes[MAX_CLASSES]; - size_t bytes, n; + size_t bytes, n, rtype_size; ffi_type *rtype; if (cif->abi != FFI_UNIX64) @@ -369,6 +401,7 @@ ffi_prep_cif_machdep (ffi_cif *cif) gprcount = ssecount = 0; rtype = cif->rtype; + rtype_size = rtype->size; switch (rtype->type) { case FFI_TYPE_VOID: @@ -421,16 +454,54 @@ ffi_prep_cif_machdep (ffi_cif *cif) } else { - /* Mark which registers the result appears in. */ _Bool sse0 = SSE_CLASS_P (classes[0]); - _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]); - if (sse0) - flags = (sse1 ? UNIX64_RET_ST_XMM0_XMM1 : UNIX64_RET_ST_XMM0_RAX); - else - flags = (sse1 ? UNIX64_RET_ST_RAX_XMM0 : UNIX64_RET_ST_RAX_RDX); - /* Mark the true size of the structure. */ - flags |= rtype->size << UNIX64_SIZE_SHIFT; + if (rtype_size == 4 && sse0) + flags = UNIX64_RET_XMM32; + else if (rtype_size == 8) + flags = sse0 ? UNIX64_RET_XMM64 : UNIX64_RET_INT64; + else + { + _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]); + if (sse0 && sse1) + flags = UNIX64_RET_ST_XMM0_XMM1; + else if (sse0) + flags = UNIX64_RET_ST_XMM0_RAX; + else if (sse1) + flags = UNIX64_RET_ST_RAX_XMM0; + else + flags = UNIX64_RET_ST_RAX_RDX; + flags |= rtype_size << UNIX64_SIZE_SHIFT; + } + } + break; + case FFI_TYPE_COMPLEX: + switch (rtype->elements[0]->type) + { + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_INT: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + flags = UNIX64_RET_ST_RAX_RDX | (rtype_size << UNIX64_SIZE_SHIFT); + break; + case FFI_TYPE_FLOAT: + flags = UNIX64_RET_XMM64; + break; + case FFI_TYPE_DOUBLE: + flags = UNIX64_RET_ST_XMM0_XMM1 | (16 << UNIX64_SIZE_SHIFT); + break; +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + flags = UNIX64_RET_X87_2; + break; +#endif + default: + return FFI_BAD_TYPEDEF; } break; default: diff --git a/src/x86/internal64.h b/src/x86/internal64.h index 07b1b10..512e955 100644 --- a/src/x86/internal64.h +++ b/src/x86/internal64.h @@ -9,11 +9,13 @@ #define UNIX64_RET_XMM32 8 #define UNIX64_RET_XMM64 9 #define UNIX64_RET_X87 10 -#define UNIX64_RET_ST_RAX_RDX 11 +#define UNIX64_RET_X87_2 11 #define UNIX64_RET_ST_XMM0_RAX 12 #define UNIX64_RET_ST_RAX_XMM0 13 #define UNIX64_RET_ST_XMM0_XMM1 14 -#define UNIX64_RET_LAST 14 +#define UNIX64_RET_ST_RAX_RDX 15 + +#define UNIX64_RET_LAST 15 #define UNIX64_FLAG_RET_IN_MEM (1 << 10) #define UNIX64_FLAG_XMM_ARGS (1 << 11) diff --git a/src/x86/unix64.S b/src/x86/unix64.S index 0151229..6066bbf 100644 --- a/src/x86/unix64.S +++ b/src/x86/unix64.S @@ -156,9 +156,10 @@ E UNIX64_RET_XMM64 E UNIX64_RET_X87 fstpt (%rdi) ret -E UNIX64_RET_ST_RAX_RDX - movq %rdx, 8(%rsi) - jmp 2f +E UNIX64_RET_X87_2 + fstpt (%rdi) + fstpt 16(%rdi) + ret E UNIX64_RET_ST_XMM0_RAX movq %rax, 8(%rsi) jmp 3f @@ -167,14 +168,15 @@ E UNIX64_RET_ST_RAX_XMM0 jmp 2f E UNIX64_RET_ST_XMM0_XMM1 movq %xmm1, 8(%rsi) - - .align 8 -3: movq %xmm0, (%rsi) + jmp 3f +E UNIX64_RET_ST_RAX_RDX + movq %rdx, 8(%rsi) +2: movq %rax, (%rsi) shrl $UNIX64_SIZE_SHIFT, %ecx rep movsb ret .align 8 -2: movq %rax, (%rsi) +3: movq %xmm0, (%rsi) shrl $UNIX64_SIZE_SHIFT, %ecx rep movsb ret @@ -201,11 +203,11 @@ E UNIX64_RET_ST_XMM0_XMM1 .size ffi_call_unix64,.-ffi_call_unix64 /* 6 general registers, 8 vector registers, - 16 bytes of rvalue, 8 bytes of alignment. */ + 32 bytes of rvalue, 8 bytes of alignment. */ #define ffi_closure_OFS_G 0 #define ffi_closure_OFS_V (6*8) #define ffi_closure_OFS_RVALUE (ffi_closure_OFS_V + 8*16) -#define ffi_closure_FS (ffi_closure_OFS_RVALUE + 16 + 8) +#define ffi_closure_FS (ffi_closure_OFS_RVALUE + 32 + 8) /* The location of rvalue within the red zone after deallocating the frame. */ #define ffi_closure_RED_RVALUE (ffi_closure_OFS_RVALUE - ffi_closure_FS) @@ -275,6 +277,7 @@ ffi_closure_unix64: leaq 0f(%rip), %r11 ja 9f leaq (%r11, %r10, 8), %r10 + leaq ffi_closure_RED_RVALUE(%rsp), %rsi jmp *%r10 .align 8 @@ -282,52 +285,54 @@ ffi_closure_unix64: E UNIX64_RET_VOID ret E UNIX64_RET_UINT8 - movzbl ffi_closure_RED_RVALUE(%rsp), %eax + movzbl (%rsi), %eax ret E UNIX64_RET_UINT16 - movzwl ffi_closure_RED_RVALUE(%rsp), %eax + movzwl (%rsi), %eax ret E UNIX64_RET_UINT32 - movl ffi_closure_RED_RVALUE(%rsp), %eax + movl (%rsi), %eax ret E UNIX64_RET_SINT8 - movsbl ffi_closure_RED_RVALUE(%rsp), %eax + movsbl (%rsi), %eax ret E UNIX64_RET_SINT16 - movswl ffi_closure_RED_RVALUE(%rsp), %eax + movswl (%rsi), %eax ret E UNIX64_RET_SINT32 - movl ffi_closure_RED_RVALUE(%rsp), %eax + movl (%rsi), %eax ret E UNIX64_RET_INT64 - movq ffi_closure_RED_RVALUE(%rsp), %rax + movq (%rsi), %rax ret E UNIX64_RET_XMM32 - movd ffi_closure_RED_RVALUE(%rsp), %xmm0 + movd (%rsi), %xmm0 ret E UNIX64_RET_XMM64 - movq ffi_closure_RED_RVALUE(%rsp), %xmm0 + movq (%rsi), %xmm0 ret E UNIX64_RET_X87 - fldt ffi_closure_RED_RVALUE(%rsp) + fldt (%rsi) + ret +E UNIX64_RET_X87_2 + fldt 16(%rsi) + fldt (%rsi) ret -E UNIX64_RET_ST_RAX_RDX - movq ffi_closure_RED_RVALUE+8(%rsp), %rdx - jmp 2f E UNIX64_RET_ST_XMM0_RAX - movq ffi_closure_RED_RVALUE+8(%rsp), %rax + movq 8(%rsi), %rax jmp 3f E UNIX64_RET_ST_RAX_XMM0 - movq ffi_closure_RED_RVALUE+8(%rsp), %xmm0 + movq 8(%rsi), %xmm0 jmp 2f E UNIX64_RET_ST_XMM0_XMM1 - movq ffi_closure_RED_RVALUE+8(%rsp), %xmm1 - - .align 8 -3: movq ffi_closure_RED_RVALUE(%rsp), %xmm0 + movq 8(%rsi), %xmm1 + jmp 3f +E UNIX64_RET_ST_RAX_RDX + movq 8(%rsi), %rdx +2: movq (%rsi), %rax ret .align 8 -2: movq ffi_closure_RED_RVALUE(%rsp), %rax +3: movq (%rsi), %xmm0 ret 9: call abort@PLT -- 1.9.3