This is the mail archive of the
libffi-discuss@sourceware.org
mailing list for the libffi project.
[PATCH 05/16] aarch64: Reduce the size of register_context
- From: Richard Henderson <rth at twiddle dot net>
- To: libffi-discuss at sourceware dot org
- Cc: Richard Henderson <rth at redhat dot com>
- Date: Tue, 28 Oct 2014 11:53:02 -0700
- Subject: [PATCH 05/16] aarch64: Reduce the size of register_context
- Authentication-results: sourceware.org; auth=none
- References: <1414522393-19169-1-git-send-email-rth at twiddle dot net>
From: Richard Henderson <rth@redhat.com>
We don't need to store 32 general and vector registers.
Only 8 of each are used for parameter passing.
---
src/aarch64/ffi.c | 35 ++++++++---------
src/aarch64/ffitarget.h | 6 ---
src/aarch64/internal.h | 26 +++++++++++++
src/aarch64/sysv.S | 100 +++++++++++++++++++++++-------------------------
4 files changed, 91 insertions(+), 76 deletions(-)
create mode 100644 src/aarch64/internal.h
diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
index a6fcc11..58d088b 100644
--- a/src/aarch64/ffi.c
+++ b/src/aarch64/ffi.c
@@ -21,8 +21,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include <stdio.h>
#include <stdlib.h>
+#include <stdint.h>
#include <ffi.h>
#include <ffi_common.h>
+#include "internal.h"
/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
all further uses in this file will refer to the 128-bit type. */
@@ -35,38 +37,35 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
# define FFI_TYPE_LONGDOUBLE 4
#endif
-#define N_X_ARG_REG 8
-#define N_V_ARG_REG 8
-
-#define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT)
-
union _d
{
UINT64 d;
UINT32 s[2];
};
+struct _v
+{
+ union _d d[2] __attribute__((aligned(16)));
+};
+
struct call_context
{
- UINT64 x [AARCH64_N_XREG];
- struct
- {
- union _d d[2];
- } v [AARCH64_N_VREG];
+ struct _v v[N_V_ARG_REG];
+ UINT64 x[N_X_ARG_REG];
+ UINT64 x8;
};
#if defined (__clang__) && defined (__APPLE__)
-extern void
-sys_icache_invalidate (void *start, size_t len);
+extern void sys_icache_invalidate (void *start, size_t len);
#endif
static inline void
ffi_clear_cache (void *start, void *end)
{
#if defined (__clang__) && defined (__APPLE__)
- sys_icache_invalidate (start, (char *)end - (char *)start);
+ sys_icache_invalidate (start, (char *)end - (char *)start);
#elif defined (__GNUC__)
- __builtin___clear_cache (start, end);
+ __builtin___clear_cache (start, end);
#else
#error "Missing builtin to flush instruction cache"
#endif
@@ -802,7 +801,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
if (is_v_register_candidate (cif->rtype))
{
- cif->aarch64_flags |= AARCH64_FFI_WITH_V;
+ cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
}
else
{
@@ -810,7 +809,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
for (i = 0; i < cif->nargs; i++)
if (is_v_register_candidate (cif->arg_types[i]))
{
- cif->aarch64_flags |= AARCH64_FFI_WITH_V;
+ cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
break;
}
}
@@ -924,7 +923,7 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
}
else
{
- memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64));
+ context.x8 = (uintptr_t)rvalue;
ffi_call_SYSV (aarch64_prep_args, &context, &ecif,
stack_bytes, fn);
}
@@ -1201,7 +1200,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
}
else
{
- memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64));
+ rvalue = (void *)(uintptr_t)context->x8;
(closure->fun) (cif, rvalue, avalue, closure->user_data);
}
}
diff --git a/src/aarch64/ffitarget.h b/src/aarch64/ffitarget.h
index 4bbced2..336f28a 100644
--- a/src/aarch64/ffitarget.h
+++ b/src/aarch64/ffitarget.h
@@ -54,10 +54,4 @@ typedef enum ffi_abi
#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_flags
#endif
-#define AARCH64_FFI_WITH_V_BIT 0
-
-#define AARCH64_N_XREG 32
-#define AARCH64_N_VREG 32
-#define AARCH64_CALL_CONTEXT_SIZE (AARCH64_N_XREG * 8 + AARCH64_N_VREG * 16)
-
#endif
diff --git a/src/aarch64/internal.h b/src/aarch64/internal.h
new file mode 100644
index 0000000..b6b6104
--- /dev/null
+++ b/src/aarch64/internal.h
@@ -0,0 +1,26 @@
+/*
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#define AARCH64_FLAG_ARG_V_BIT 0
+#define AARCH64_FLAG_ARG_V (1 << AARCH64_FLAG_ARG_V_BIT)
+
+#define N_X_ARG_REG 8
+#define N_V_ARG_REG 8
+#define CALL_CONTEXT_SIZE (N_V_ARG_REG * 16 + N_X_ARG_REG * 8 + 16)
diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S
index 169eab8..70870db 100644
--- a/src/aarch64/sysv.S
+++ b/src/aarch64/sysv.S
@@ -22,6 +22,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
+#include "internal.h"
#ifdef HAVE_MACHINE_ASM_H
#include <machine/asm.h>
@@ -43,13 +44,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg
.text
+ .align 2
+
.globl CNAME(ffi_call_SYSV)
#ifdef __ELF__
.type CNAME(ffi_call_SYSV), #function
#endif
-#ifdef __APPLE__
- .align 2
-#endif
/* ffi_call_SYSV()
@@ -142,42 +142,40 @@ CNAME(ffi_call_SYSV):
mov x23, x0
/* Figure out if we should touch the vector registers. */
- tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f
+ tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f
/* Load the vector argument passing registers. */
- ldp q0, q1, [x21, #8*32 + 0]
- ldp q2, q3, [x21, #8*32 + 32]
- ldp q4, q5, [x21, #8*32 + 64]
- ldp q6, q7, [x21, #8*32 + 96]
+ ldp q0, q1, [x21, #0]
+ ldp q2, q3, [x21, #32]
+ ldp q4, q5, [x21, #64]
+ ldp q6, q7, [x21, #96]
1:
- /* Load the core argument passing registers. */
- ldp x0, x1, [x21, #0]
- ldp x2, x3, [x21, #16]
- ldp x4, x5, [x21, #32]
- ldp x6, x7, [x21, #48]
-
- /* Don't forget x8 which may be holding the address of a return buffer.
- */
- ldr x8, [x21, #8*8]
+ /* Load the core argument passing registers, including
+ the structure return pointer. */
+ ldp x0, x1, [x21, #16*N_V_ARG_REG + 0]
+ ldp x2, x3, [x21, #16*N_V_ARG_REG + 16]
+ ldp x4, x5, [x21, #16*N_V_ARG_REG + 32]
+ ldp x6, x7, [x21, #16*N_V_ARG_REG + 48]
+ ldr x8, [x21, #16*N_V_ARG_REG + 64]
blr x24
/* Save the core argument passing registers. */
- stp x0, x1, [x21, #0]
- stp x2, x3, [x21, #16]
- stp x4, x5, [x21, #32]
- stp x6, x7, [x21, #48]
+ stp x0, x1, [x21, #16*N_V_ARG_REG + 0]
+ stp x2, x3, [x21, #16*N_V_ARG_REG + 16]
+ stp x4, x5, [x21, #16*N_V_ARG_REG + 32]
+ stp x6, x7, [x21, #16*N_V_ARG_REG + 48]
/* Note nothing useful ever comes back in x8! */
/* Figure out if we should touch the vector registers. */
- tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f
+ tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f
/* Save the vector argument passing registers. */
- stp q0, q1, [x21, #8*32 + 0]
- stp q2, q3, [x21, #8*32 + 32]
- stp q4, q5, [x21, #8*32 + 64]
- stp q6, q7, [x21, #8*32 + 96]
+ stp q0, q1, [x21, #0]
+ stp q2, q3, [x21, #32]
+ stp q4, q5, [x21, #64]
+ stp q6, q7, [x21, #96]
1:
/* All done, unwind our stack frame. */
ldp x21, x22, [x29, # - ffi_call_SYSV_FS]
@@ -203,7 +201,7 @@ CNAME(ffi_call_SYSV):
.size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
#endif
-#define ffi_closure_SYSV_FS (8 * 2 + AARCH64_CALL_CONTEXT_SIZE)
+#define ffi_closure_SYSV_FS (8 * 2 + CALL_CONTEXT_SIZE)
/* ffi_closure_SYSV
@@ -243,10 +241,9 @@ CNAME(ffi_call_SYSV):
Voila! */
.text
- .globl CNAME(ffi_closure_SYSV)
-#ifdef __APPLE__
.align 2
-#endif
+
+ .globl CNAME(ffi_closure_SYSV)
.cfi_startproc
CNAME(ffi_closure_SYSV):
stp x29, x30, [sp, #-16]!
@@ -268,24 +265,23 @@ CNAME(ffi_closure_SYSV):
/* Preserve our struct trampoline_data * */
mov x22, x17
- /* Save the rest of the argument passing registers. */
- stp x0, x1, [x21, #0]
- stp x2, x3, [x21, #16]
- stp x4, x5, [x21, #32]
- stp x6, x7, [x21, #48]
- /* Don't forget we may have been given a result scratch pad address.
- */
- str x8, [x21, #64]
+ /* Save the rest of the argument passing registers, including
+ the structure return pointer. */
+ stp x0, x1, [x21, #16*N_V_ARG_REG + 0]
+ stp x2, x3, [x21, #16*N_V_ARG_REG + 16]
+ stp x4, x5, [x21, #16*N_V_ARG_REG + 32]
+ stp x6, x7, [x21, #16*N_V_ARG_REG + 48]
+ str x8, [x21, #16*N_V_ARG_REG + 64]
/* Figure out if we should touch the vector registers. */
ldr x0, [x22, #8]
- tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f
+ tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f
/* Save the argument passing vector registers. */
- stp q0, q1, [x21, #8*32 + 0]
- stp q2, q3, [x21, #8*32 + 32]
- stp q4, q5, [x21, #8*32 + 64]
- stp q6, q7, [x21, #8*32 + 96]
+ stp q0, q1, [x21, #0]
+ stp q2, q3, [x21, #32]
+ stp q4, q5, [x21, #64]
+ stp q6, q7, [x21, #96]
1:
/* Load &ffi_closure.. */
ldr x0, [x22, #0]
@@ -298,19 +294,19 @@ CNAME(ffi_closure_SYSV):
/* Figure out if we should touch the vector registers. */
ldr x0, [x22, #8]
- tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f
+ tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f
/* Load the result passing vector registers. */
- ldp q0, q1, [x21, #8*32 + 0]
- ldp q2, q3, [x21, #8*32 + 32]
- ldp q4, q5, [x21, #8*32 + 64]
- ldp q6, q7, [x21, #8*32 + 96]
+ ldp q0, q1, [x21, #0]
+ ldp q2, q3, [x21, #32]
+ ldp q4, q5, [x21, #64]
+ ldp q6, q7, [x21, #96]
1:
/* Load the result passing core registers. */
- ldp x0, x1, [x21, #0]
- ldp x2, x3, [x21, #16]
- ldp x4, x5, [x21, #32]
- ldp x6, x7, [x21, #48]
+ ldp x0, x1, [x21, #16*N_V_ARG_REG + 0]
+ ldp x2, x3, [x21, #16*N_V_ARG_REG + 16]
+ ldp x4, x5, [x21, #16*N_V_ARG_REG + 32]
+ ldp x6, x7, [x21, #16*N_V_ARG_REG + 48]
/* Note nothing useful is returned in x8. */
/* We are done, unwind our frame. */
--
1.9.3