This is the mail archive of the
libffi-discuss@sourceware.org
mailing list for the libffi project.
[PATCH 8/8] x86_64: Add support for complex types
- From: Richard Henderson <rth at twiddle dot net>
- To: libffi-discuss at sourceware dot org
- Date: Tue, 28 Oct 2014 11:31:34 -0700
- Subject: [PATCH 8/8] x86_64: Add support for complex types
- Authentication-results: sourceware.org; auth=none
- References: <1414521094-18403-1-git-send-email-rth at twiddle dot net>
---
src/x86/ffi64.c | 97 +++++++++++++++++++++++++++++++++++++++++++++-------
src/x86/internal64.h | 6 ++--
src/x86/unix64.S | 63 ++++++++++++++++++----------------
3 files changed, 122 insertions(+), 44 deletions(-)
diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c
index a03061b..650f7bb 100644
--- a/src/x86/ffi64.c
+++ b/src/x86/ffi64.c
@@ -171,6 +171,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
case FFI_TYPE_UINT64:
case FFI_TYPE_SINT64:
case FFI_TYPE_POINTER:
+ do_integer:
{
size_t size = byte_offset + type->size;
@@ -301,11 +302,42 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
}
return words;
}
-
- default:
- FFI_ASSERT(0);
+ case FFI_TYPE_COMPLEX:
+ {
+ ffi_type *inner = type->elements[0];
+ switch (inner->type)
+ {
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ goto do_integer;
+
+ case FFI_TYPE_FLOAT:
+ classes[0] = X86_64_SSE_CLASS;
+ if (byte_offset % 8)
+ {
+ classes[1] = X86_64_SSESF_CLASS;
+ return 2;
+ }
+ return 1;
+ case FFI_TYPE_DOUBLE:
+ classes[0] = classes[1] = X86_64_SSEDF_CLASS;
+ return 2;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+ classes[0] = X86_64_COMPLEX_X87_CLASS;
+ return 1;
+#endif
+ }
+ }
}
- return 0; /* Never reached. */
+ abort();
}
/* Examine the argument and return set number of register required in each
@@ -360,7 +392,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
{
int gprcount, ssecount, i, avn, ngpr, nsse, flags;
enum x86_64_reg_class classes[MAX_CLASSES];
- size_t bytes, n;
+ size_t bytes, n, rtype_size;
ffi_type *rtype;
if (cif->abi != FFI_UNIX64)
@@ -369,6 +401,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
gprcount = ssecount = 0;
rtype = cif->rtype;
+ rtype_size = rtype->size;
switch (rtype->type)
{
case FFI_TYPE_VOID:
@@ -421,16 +454,54 @@ ffi_prep_cif_machdep (ffi_cif *cif)
}
else
{
- /* Mark which registers the result appears in. */
_Bool sse0 = SSE_CLASS_P (classes[0]);
- _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
- if (sse0)
- flags = (sse1 ? UNIX64_RET_ST_XMM0_XMM1 : UNIX64_RET_ST_XMM0_RAX);
- else
- flags = (sse1 ? UNIX64_RET_ST_RAX_XMM0 : UNIX64_RET_ST_RAX_RDX);
- /* Mark the true size of the structure. */
- flags |= rtype->size << UNIX64_SIZE_SHIFT;
+ if (rtype_size == 4 && sse0)
+ flags = UNIX64_RET_XMM32;
+ else if (rtype_size == 8)
+ flags = sse0 ? UNIX64_RET_XMM64 : UNIX64_RET_INT64;
+ else
+ {
+ _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+ if (sse0 && sse1)
+ flags = UNIX64_RET_ST_XMM0_XMM1;
+ else if (sse0)
+ flags = UNIX64_RET_ST_XMM0_RAX;
+ else if (sse1)
+ flags = UNIX64_RET_ST_RAX_XMM0;
+ else
+ flags = UNIX64_RET_ST_RAX_RDX;
+ flags |= rtype_size << UNIX64_SIZE_SHIFT;
+ }
+ }
+ break;
+ case FFI_TYPE_COMPLEX:
+ switch (rtype->elements[0]->type)
+ {
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ flags = UNIX64_RET_ST_RAX_RDX | (rtype_size << UNIX64_SIZE_SHIFT);
+ break;
+ case FFI_TYPE_FLOAT:
+ flags = UNIX64_RET_XMM64;
+ break;
+ case FFI_TYPE_DOUBLE:
+ flags = UNIX64_RET_ST_XMM0_XMM1 | (16 << UNIX64_SIZE_SHIFT);
+ break;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+ flags = UNIX64_RET_X87_2;
+ break;
+#endif
+ default:
+ return FFI_BAD_TYPEDEF;
}
break;
default:
diff --git a/src/x86/internal64.h b/src/x86/internal64.h
index 07b1b10..512e955 100644
--- a/src/x86/internal64.h
+++ b/src/x86/internal64.h
@@ -9,11 +9,13 @@
#define UNIX64_RET_XMM32 8
#define UNIX64_RET_XMM64 9
#define UNIX64_RET_X87 10
-#define UNIX64_RET_ST_RAX_RDX 11
+#define UNIX64_RET_X87_2 11
#define UNIX64_RET_ST_XMM0_RAX 12
#define UNIX64_RET_ST_RAX_XMM0 13
#define UNIX64_RET_ST_XMM0_XMM1 14
-#define UNIX64_RET_LAST 14
+#define UNIX64_RET_ST_RAX_RDX 15
+
+#define UNIX64_RET_LAST 15
#define UNIX64_FLAG_RET_IN_MEM (1 << 10)
#define UNIX64_FLAG_XMM_ARGS (1 << 11)
diff --git a/src/x86/unix64.S b/src/x86/unix64.S
index 0151229..6066bbf 100644
--- a/src/x86/unix64.S
+++ b/src/x86/unix64.S
@@ -156,9 +156,10 @@ E UNIX64_RET_XMM64
E UNIX64_RET_X87
fstpt (%rdi)
ret
-E UNIX64_RET_ST_RAX_RDX
- movq %rdx, 8(%rsi)
- jmp 2f
+E UNIX64_RET_X87_2
+ fstpt (%rdi)
+ fstpt 16(%rdi)
+ ret
E UNIX64_RET_ST_XMM0_RAX
movq %rax, 8(%rsi)
jmp 3f
@@ -167,14 +168,15 @@ E UNIX64_RET_ST_RAX_XMM0
jmp 2f
E UNIX64_RET_ST_XMM0_XMM1
movq %xmm1, 8(%rsi)
-
- .align 8
-3: movq %xmm0, (%rsi)
+ jmp 3f
+E UNIX64_RET_ST_RAX_RDX
+ movq %rdx, 8(%rsi)
+2: movq %rax, (%rsi)
shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb
ret
.align 8
-2: movq %rax, (%rsi)
+3: movq %xmm0, (%rsi)
shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb
ret
@@ -201,11 +203,11 @@ E UNIX64_RET_ST_XMM0_XMM1
.size ffi_call_unix64,.-ffi_call_unix64
/* 6 general registers, 8 vector registers,
- 16 bytes of rvalue, 8 bytes of alignment. */
+ 32 bytes of rvalue, 8 bytes of alignment. */
#define ffi_closure_OFS_G 0
#define ffi_closure_OFS_V (6*8)
#define ffi_closure_OFS_RVALUE (ffi_closure_OFS_V + 8*16)
-#define ffi_closure_FS (ffi_closure_OFS_RVALUE + 16 + 8)
+#define ffi_closure_FS (ffi_closure_OFS_RVALUE + 32 + 8)
/* The location of rvalue within the red zone after deallocating the frame. */
#define ffi_closure_RED_RVALUE (ffi_closure_OFS_RVALUE - ffi_closure_FS)
@@ -275,6 +277,7 @@ ffi_closure_unix64:
leaq 0f(%rip), %r11
ja 9f
leaq (%r11, %r10, 8), %r10
+ leaq ffi_closure_RED_RVALUE(%rsp), %rsi
jmp *%r10
.align 8
@@ -282,52 +285,54 @@ ffi_closure_unix64:
E UNIX64_RET_VOID
ret
E UNIX64_RET_UINT8
- movzbl ffi_closure_RED_RVALUE(%rsp), %eax
+ movzbl (%rsi), %eax
ret
E UNIX64_RET_UINT16
- movzwl ffi_closure_RED_RVALUE(%rsp), %eax
+ movzwl (%rsi), %eax
ret
E UNIX64_RET_UINT32
- movl ffi_closure_RED_RVALUE(%rsp), %eax
+ movl (%rsi), %eax
ret
E UNIX64_RET_SINT8
- movsbl ffi_closure_RED_RVALUE(%rsp), %eax
+ movsbl (%rsi), %eax
ret
E UNIX64_RET_SINT16
- movswl ffi_closure_RED_RVALUE(%rsp), %eax
+ movswl (%rsi), %eax
ret
E UNIX64_RET_SINT32
- movl ffi_closure_RED_RVALUE(%rsp), %eax
+ movl (%rsi), %eax
ret
E UNIX64_RET_INT64
- movq ffi_closure_RED_RVALUE(%rsp), %rax
+ movq (%rsi), %rax
ret
E UNIX64_RET_XMM32
- movd ffi_closure_RED_RVALUE(%rsp), %xmm0
+ movd (%rsi), %xmm0
ret
E UNIX64_RET_XMM64
- movq ffi_closure_RED_RVALUE(%rsp), %xmm0
+ movq (%rsi), %xmm0
ret
E UNIX64_RET_X87
- fldt ffi_closure_RED_RVALUE(%rsp)
+ fldt (%rsi)
+ ret
+E UNIX64_RET_X87_2
+ fldt 16(%rsi)
+ fldt (%rsi)
ret
-E UNIX64_RET_ST_RAX_RDX
- movq ffi_closure_RED_RVALUE+8(%rsp), %rdx
- jmp 2f
E UNIX64_RET_ST_XMM0_RAX
- movq ffi_closure_RED_RVALUE+8(%rsp), %rax
+ movq 8(%rsi), %rax
jmp 3f
E UNIX64_RET_ST_RAX_XMM0
- movq ffi_closure_RED_RVALUE+8(%rsp), %xmm0
+ movq 8(%rsi), %xmm0
jmp 2f
E UNIX64_RET_ST_XMM0_XMM1
- movq ffi_closure_RED_RVALUE+8(%rsp), %xmm1
-
- .align 8
-3: movq ffi_closure_RED_RVALUE(%rsp), %xmm0
+ movq 8(%rsi), %xmm1
+ jmp 3f
+E UNIX64_RET_ST_RAX_RDX
+ movq 8(%rsi), %rdx
+2: movq (%rsi), %rax
ret
.align 8
-2: movq ffi_closure_RED_RVALUE(%rsp), %rax
+3: movq (%rsi), %xmm0
ret
9: call abort@PLT
--
1.9.3