This is the mail archive of the
libffi-discuss@sourceware.org
mailing list for the libffi project.
[PATCH 05/10] arm: Rewrite ffi_call
- From: Richard Henderson <rth at twiddle dot net>
- To: libffi-discuss at sourceware dot org
- Date: Wed, 29 Oct 2014 13:05:42 -0700
- Subject: [PATCH 05/10] arm: Rewrite ffi_call
- Authentication-results: sourceware.org; auth=none
- References: <1414613147-10917-1-git-send-email-rth at twiddle dot net>
Use the trick to allocate the stack frame for ffi_call_SYSV
within ffi_call itself.
---
src/arm/ffi.c | 285 ++++++++++++++++++----------------
src/arm/ffitarget.h | 2 +-
src/arm/internal.h | 7 +
src/arm/sysv.S | 440 ++++++++++++++++------------------------------------
4 files changed, 294 insertions(+), 440 deletions(-)
create mode 100644 src/arm/internal.h
diff --git a/src/arm/ffi.c b/src/arm/ffi.c
index d00ed89..c91b869 100644
--- a/src/arm/ffi.c
+++ b/src/arm/ffi.c
@@ -30,16 +30,13 @@
#include <ffi.h>
#include <ffi_common.h>
-
#include <stdlib.h>
+#include "internal.h"
/* Forward declares. */
static int vfp_type_p (const ffi_type *);
static void layout_vfp_args (ffi_cif *);
-int ffi_prep_args_SYSV (char *stack, extended_cif *ecif, float *vfp_space);
-int ffi_prep_args_VFP (char *stack, extended_cif *ecif, float *vfp_space);
-
static void *
ffi_align (ffi_type *ty, void *p)
{
@@ -98,53 +95,44 @@ ffi_put_arg (ffi_type *ty, void *src, void *dst)
return ALIGN (z, 4);
}
-/* ffi_prep_args is called by the assembly routine once stack space
- has been allocated for the function's arguments
+/* ffi_prep_args is called once stack space has been allocated
+ for the function's arguments.
The vfp_space parameter is the load area for VFP regs, the return
value is cif->vfp_used (word bitset of VFP regs used for passing
arguments). These are only used for the VFP hard-float ABI.
*/
-int
-ffi_prep_args_SYSV (char *stack, extended_cif *ecif, float *vfp_space)
+static void
+ffi_prep_args_SYSV (ffi_cif *cif, int flags, void *rvalue,
+ void **avalue, char *argp)
{
- register unsigned int i;
- register void **p_argv;
- register char *argp;
- register ffi_type **p_arg;
- argp = stack;
+ ffi_type **arg_types = cif->arg_types;
+ int i, n;
- if (ecif->cif->flags == FFI_TYPE_STRUCT)
+ if (flags == ARM_TYPE_STRUCT)
{
- *(void **) argp = ecif->rvalue;
+ *(void **) argp = rvalue;
argp += 4;
}
- p_argv = ecif->avalue;
-
- for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
- (i != 0); i--, p_arg++, p_argv++)
+ for (i = 0, n = cif->nargs; i < n; i++)
{
- argp = ffi_align (*p_arg, argp);
- argp += ffi_put_arg (*p_arg, *p_argv, argp);
+ ffi_type *ty = arg_types[i];
+ argp = ffi_align (ty, argp);
+ argp += ffi_put_arg (ty, avalue[i], argp);
}
-
- return 0;
}
-int
-ffi_prep_args_VFP (char *stack, extended_cif * ecif, float *vfp_space)
+static void
+ffi_prep_args_VFP (ffi_cif *cif, int flags, void *rvalue,
+ void **avalue, char *stack, char *vfp_space)
{
- register unsigned int i, vi = 0;
- register void **p_argv;
- register char *argp, *regp, *eo_regp;
- register ffi_type **p_arg;
+ ffi_type **arg_types = cif->arg_types;
+ int i, n, vi = 0;
+ char *argp, *regp, *eo_regp;
char stack_used = 0;
char done_with_regs = 0;
- /* Make sure we are using FFI_VFP. */
- FFI_ASSERT (ecif->cif->abi == FFI_VFP);
-
/* The first 4 words on the stack are used for values
passed in core registers. */
regp = stack;
@@ -152,37 +140,36 @@ ffi_prep_args_VFP (char *stack, extended_cif * ecif, float *vfp_space)
/* If the function returns an FFI_TYPE_STRUCT in memory,
that address is passed in r0 to the function. */
- if (ecif->cif->flags == FFI_TYPE_STRUCT)
+ if (flags == ARM_TYPE_STRUCT)
{
- *(void **) regp = ecif->rvalue;
+ *(void **) regp = rvalue;
regp += 4;
}
- p_argv = ecif->avalue;
-
- for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
- (i != 0); i--, p_arg++, p_argv++)
+ for (i = 0, n = cif->nargs; i < n; i++)
{
- int is_vfp_type = vfp_type_p (*p_arg);
+ ffi_type *ty = arg_types[i];
+ void *a = avalue[i];
+ int is_vfp_type = vfp_type_p (ty);
/* Allocated in VFP registers. */
- if (vi < ecif->cif->vfp_nargs && is_vfp_type)
+ if (vi < cif->vfp_nargs && is_vfp_type)
{
- char *vfp_slot = (char *) (vfp_space + ecif->cif->vfp_args[vi++]);
- ffi_put_arg (*p_arg, *p_argv, vfp_slot);
+ char *vfp_slot = vfp_space + cif->vfp_args[vi++] * 4;
+ ffi_put_arg (ty, a, vfp_slot);
continue;
}
/* Try allocating in core registers. */
else if (!done_with_regs && !is_vfp_type)
{
- char *tregp = ffi_align (*p_arg, regp);
- size_t size = (*p_arg)->size;
+ char *tregp = ffi_align (ty, regp);
+ size_t size = ty->size;
size = (size < 4) ? 4 : size; // pad
/* Check if there is space left in the aligned register
area to place the argument. */
if (tregp + size <= eo_regp)
{
- regp = tregp + ffi_put_arg (*p_arg, *p_argv, tregp);
+ regp = tregp + ffi_put_arg (ty, a, tregp);
done_with_regs = (regp == argp);
// ensure we did not write into the stack area
FFI_ASSERT (regp <= argp);
@@ -195,87 +182,97 @@ ffi_prep_args_VFP (char *stack, extended_cif * ecif, float *vfp_space)
{
stack_used = 1;
done_with_regs = 1;
- argp = tregp + ffi_put_arg (*p_arg, *p_argv, tregp);
+ argp = tregp + ffi_put_arg (ty, a, tregp);
FFI_ASSERT (eo_regp < argp);
continue;
}
}
/* Base case, arguments are passed on the stack */
stack_used = 1;
- argp = ffi_align (*p_arg, argp);
- argp += ffi_put_arg (*p_arg, *p_argv, argp);
+ argp = ffi_align (ty, argp);
+ argp += ffi_put_arg (ty, a, argp);
}
- /* Indicate the VFP registers used. */
- return ecif->cif->vfp_used;
}
/* Perform machine dependent cif processing */
ffi_status
-ffi_prep_cif_machdep (ffi_cif * cif)
+ffi_prep_cif_machdep (ffi_cif *cif)
{
+ int flags = 0, cabi = cif->abi;
+ size_t bytes;
+
/* Round the stack up to a multiple of 8 bytes. This isn't needed
everywhere, but it is on some platforms, and it doesn't harm anything
when it isn't needed. */
- cif->bytes = (cif->bytes + 7) & ~7;
+ bytes = ALIGN (cif->bytes, 8);
+
+ /* Minimum stack space is the 4 register arguments that we pop. */
+ if (bytes < 4*4)
+ bytes = 4*4;
+ cif->bytes = bytes;
+
+ /* Map out the register placements of VFP register args. The VFP
+ hard-float calling conventions are slightly more sophisticated
+ than the base calling conventions, so we do it here instead of
+ in ffi_prep_args(). */
+ if (cabi == FFI_VFP)
+ layout_vfp_args (cif);
/* Set the return type flag */
switch (cif->rtype->type)
{
case FFI_TYPE_VOID:
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
- cif->flags = (unsigned) cif->rtype->type;
+ flags = ARM_TYPE_VOID;
+ break;
+
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_POINTER:
+ flags = ARM_TYPE_INT;
break;
case FFI_TYPE_SINT64:
case FFI_TYPE_UINT64:
- cif->flags = (unsigned) FFI_TYPE_SINT64;
+ flags = ARM_TYPE_INT64;
+ break;
+
+ case FFI_TYPE_FLOAT:
+ flags = (cabi == FFI_VFP ? ARM_TYPE_VFP_S : ARM_TYPE_INT);
+ break;
+ case FFI_TYPE_DOUBLE:
+ flags = (cabi == FFI_VFP ? ARM_TYPE_VFP_D : ARM_TYPE_INT64);
break;
case FFI_TYPE_STRUCT:
- if (cif->abi == FFI_VFP)
+ if (cabi == FFI_VFP)
{
int h = vfp_type_p (cif->rtype);
- if (h)
- {
- int ele_count = h >> 8;
- int type_code = h & 0xff;
- if (ele_count > 1)
- {
- if (type_code == FFI_TYPE_FLOAT)
- type_code = FFI_TYPE_STRUCT_VFP_FLOAT;
- else
- type_code = FFI_TYPE_STRUCT_VFP_DOUBLE;
- }
- cif->flags = type_code;
+
+ flags = ARM_TYPE_VFP_N;
+ if (h == 0x100 + FFI_TYPE_FLOAT)
+ flags = ARM_TYPE_VFP_S;
+ if (h == 0x100 + FFI_TYPE_DOUBLE)
+ flags = ARM_TYPE_VFP_D;
+ if (h != 0)
break;
- }
- }
- if (cif->rtype->size <= 4)
- {
- /* A Composite Type not larger than 4 bytes is returned in r0. */
- cif->flags = (unsigned) FFI_TYPE_INT;
- }
- else
- {
- /* A Composite Type larger than 4 bytes, or whose size cannot
- be determined statically ... is stored in memory at an
- address passed [in r0]. */
- cif->flags = (unsigned) FFI_TYPE_STRUCT;
}
+
+ /* A Composite Type not larger than 4 bytes is returned in r0.
+ A Composite Type larger than 4 bytes, or whose size cannot
+ be determined statically ... is stored in memory at an
+ address passed [in r0]. */
+ flags = (cif->rtype->size <= 4 ? ARM_TYPE_INT : ARM_TYPE_STRUCT);
break;
default:
- cif->flags = FFI_TYPE_INT;
- break;
+ abort();
}
-
- /* Map out the register placements of VFP register args. The VFP
- hard-float calling conventions are slightly more sophisticated
- than the base calling conventions, so we do it here instead of
- in ffi_prep_args(). */
- if (cif->abi == FFI_VFP)
- layout_vfp_args (cif);
+ cif->flags = flags;
return FFI_OK;
}
@@ -293,69 +290,83 @@ ffi_prep_cif_machdep_var (ffi_cif * cif,
}
/* Prototypes for assembly functions, in sysv.S. */
-extern void ffi_call_SYSV (void (*fn) (void), extended_cif *, unsigned,
- unsigned, unsigned *);
-extern void ffi_call_VFP (void (*fn) (void), extended_cif *, unsigned,
- unsigned, unsigned *);
-void
-ffi_call (ffi_cif * cif, void (*fn) (void), void *rvalue, void **avalue)
+struct call_frame
{
- extended_cif ecif;
-
- int small_struct = (cif->flags == FFI_TYPE_INT
- && cif->rtype->type == FFI_TYPE_STRUCT);
- int vfp_struct = (cif->flags == FFI_TYPE_STRUCT_VFP_FLOAT
- || cif->flags == FFI_TYPE_STRUCT_VFP_DOUBLE);
-
- unsigned int temp;
-
- ecif.cif = cif;
- ecif.avalue = avalue;
+ void *fp;
+ void *lr;
+ void *rvalue;
+ int flags;
+};
- /* If the return value is a struct and we don't have a return
- value address then we need to make one. */
+extern void ffi_call_SYSV (void *stack, struct call_frame *,
+ void (*fn) (void)) FFI_HIDDEN;
+extern void ffi_call_VFP (void *vfp_space, struct call_frame *,
+ void (*fn) (void), unsigned vfp_used) FFI_HIDDEN;
- if ((rvalue == NULL) && (cif->flags == FFI_TYPE_STRUCT))
+void
+ffi_call (ffi_cif * cif, void (*fn) (void), void *rvalue, void **avalue)
+{
+ int flags = cif->flags;
+ ffi_type *rtype = cif->rtype;
+ size_t bytes, rsize, vfp_size;
+ char *stack, *vfp_space, *new_rvalue;
+ struct call_frame *frame;
+
+ rsize = 0;
+ if (rvalue == NULL)
{
- ecif.rvalue = alloca (cif->rtype->size);
+ /* If the return value is a struct and we don't have a return
+ value address then we need to make one. Otherwise the return
+ value is in registers and we can ignore them. */
+ if (flags == ARM_TYPE_STRUCT)
+ rsize = rtype->size;
+ else
+ flags = ARM_TYPE_VOID;
}
- else if (small_struct)
- ecif.rvalue = &temp;
- else if (vfp_struct)
+ else if (flags == ARM_TYPE_VFP_N)
{
/* Largest case is double x 4. */
- ecif.rvalue = alloca (32);
+ rsize = 32;
}
- else
- ecif.rvalue = rvalue;
+ else if (flags == ARM_TYPE_INT && rtype->type == FFI_TYPE_STRUCT)
+ rsize = 4;
- switch (cif->abi)
- {
- case FFI_SYSV:
- ffi_call_SYSV (fn, &ecif, cif->bytes, cif->flags, ecif.rvalue);
- break;
+ /* Largest case. */
+ vfp_size = (cif->abi == FFI_VFP && cif->vfp_used ? 8*8: 0);
- case FFI_VFP:
-#ifdef __ARM_EABI__
- ffi_call_VFP (fn, &ecif, cif->bytes, cif->flags, ecif.rvalue);
- break;
-#endif
+ bytes = cif->bytes;
+ stack = alloca (vfp_size + bytes + sizeof(struct call_frame) + rsize);
- default:
- FFI_ASSERT (0);
- break;
+ vfp_space = NULL;
+ if (vfp_size)
+ {
+ vfp_space = stack;
+ stack += vfp_size;
}
- if (small_struct)
+
+ frame = (struct call_frame *)(stack + bytes);
+
+ new_rvalue = rvalue;
+ if (rsize)
+ new_rvalue = (void *)(frame + 1);
+
+ frame->rvalue = new_rvalue;
+ frame->flags = flags;
+
+ if (vfp_space)
{
- FFI_ASSERT (rvalue != NULL);
- memcpy (rvalue, &temp, cif->rtype->size);
+ ffi_prep_args_VFP (cif, flags, new_rvalue, avalue, stack, vfp_space);
+ ffi_call_VFP (vfp_space, frame, fn, cif->vfp_used);
}
- else if (vfp_struct)
+ else
{
- FFI_ASSERT (rvalue != NULL);
- memcpy (rvalue, ecif.rvalue, cif->rtype->size);
+ ffi_prep_args_SYSV (cif, flags, new_rvalue, avalue, stack);
+ ffi_call_SYSV (stack, frame, fn);
}
+
+ if (rvalue && rvalue != new_rvalue)
+ memcpy (rvalue, new_rvalue, rtype->size);
}
/** private members **/
diff --git a/src/arm/ffitarget.h b/src/arm/ffitarget.h
index 26d494d..6355904 100644
--- a/src/arm/ffitarget.h
+++ b/src/arm/ffitarget.h
@@ -53,7 +53,7 @@ typedef enum ffi_abi {
#define FFI_EXTRA_CIF_FIELDS \
int vfp_used; \
- short vfp_reg_free, vfp_nargs; \
+ unsigned short vfp_reg_free, vfp_nargs; \
signed char vfp_args[16] \
/* Internally used. */
diff --git a/src/arm/internal.h b/src/arm/internal.h
new file mode 100644
index 0000000..6cf0b2a
--- /dev/null
+++ b/src/arm/internal.h
@@ -0,0 +1,7 @@
+#define ARM_TYPE_VFP_S 0
+#define ARM_TYPE_VFP_D 1
+#define ARM_TYPE_VFP_N 2
+#define ARM_TYPE_INT64 3
+#define ARM_TYPE_INT 4
+#define ARM_TYPE_VOID 5
+#define ARM_TYPE_STRUCT 6
diff --git a/src/arm/sysv.S b/src/arm/sysv.S
index 541bbe9..b967d97 100644
--- a/src/arm/sysv.S
+++ b/src/arm/sysv.S
@@ -1,8 +1,8 @@
/* -----------------------------------------------------------------------
sysv.S - Copyright (c) 1998, 2008, 2011 Red Hat, Inc.
Copyright (c) 2011 Plausible Labs Cooperative, Inc.
-
- ARM Foreign Function Interface
+
+ ARM Foreign Function Interface
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -28,219 +28,155 @@
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
-#ifdef HAVE_MACHINE_ASM_H
-#include <machine/asm.h>
-#else
-#ifdef __USER_LABEL_PREFIX__
-#define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-
-/* Use the right prefix for global labels. */
-#define CNAME(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
-#else
-#define CNAME(x) x
-#endif
-#ifdef __APPLE__
-#define ENTRY(x) .globl _##x; _##x:
-#else
-#define ENTRY(x) .globl CNAME(x); .type CNAME(x),%function; CNAME(x):
-#endif /* __APPLE__ */
-#endif
-
-#ifdef __ELF__
-#define LSYM(x) .x
-#else
-#define LSYM(x) x
-#endif
-
-/* Use the SOFTFP return value ABI on Mac OS X, as per the iOS ABI
- Function Call Guide */
-#ifdef __APPLE__
-#define __SOFTFP__
-#endif
-
-/* We need a better way of testing for this, but for now, this is all
- we can do. */
-@ This selects the minimum architecture level required.
-#define __ARM_ARCH__ 3
-
-#if defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__)
-# undef __ARM_ARCH__
-# define __ARM_ARCH__ 4
-#endif
-
-#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
- || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
- || defined(__ARM_ARCH_5TEJ__)
-# undef __ARM_ARCH__
-# define __ARM_ARCH__ 5
-#endif
-
-#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+#include <ffi_cfi.h>
+#include "internal.h"
+
+/* GCC 4.8 provides __ARM_ARCH; construct it otherwise. */
+#ifndef __ARM_ARCH
+# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+ || defined(__ARM_ARCH_7EM__)
+# define __ARM_ARCH 7
+# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
|| defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
|| defined(__ARM_ARCH_6M__)
-# undef __ARM_ARCH__
-# define __ARM_ARCH__ 6
-#endif
-
-#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
- || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
- || defined(__ARM_ARCH_7EM__)
-# undef __ARM_ARCH__
-# define __ARM_ARCH__ 7
-#endif
-
-#if __ARM_ARCH__ >= 5
-# define call_reg(x) blx x
-#elif defined (__ARM_ARCH_4T__)
-# define call_reg(x) mov lr, pc ; bx x
-# if defined(__thumb__) || defined(__THUMB_INTERWORK__)
-# define __INTERWORKING__
+# define __ARM_ARCH 6
+# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
+ || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
+ || defined(__ARM_ARCH_5TEJ__)
+# define __ARM_ARCH 5
+# else
+# define __ARM_ARCH 4
# endif
-#else
-# define call_reg(x) mov lr, pc ; mov pc, x
#endif
/* Conditionally compile unwinder directives. */
+.macro UNWIND text:vararg
#ifdef __ARM_EABI__
-#define UNWIND
-#else
-#define UNWIND @
+ \text
#endif
+.endm
+#if defined(HAVE_AS_CFI_PSEUDO_OP) && defined(__ARM_EABI__)
+ .cfi_sections .debug_frame
+#endif
-.syntax unified
-
-#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
-#define ARM_FUNC_START(name) \
- .text; \
- .align 2; \
- .thumb; \
- .thumb_func; \
- ENTRY(name); \
- bx pc; \
- nop; \
- .arm; \
- UNWIND .fnstart; \
-_L__##name:
+#define CONCAT(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+#ifdef __USER_LABEL_PREFIX__
+# define CNAME(X) CONCAT (__USER_LABEL_PREFIX__, X)
#else
-#define ARM_FUNC_START(name) \
- .text; \
- .align 2; \
- .arm; \
- ENTRY(name); \
- UNWIND .fnstart
+# define CNAME(X) X
#endif
-
-.macro RETLDM regs=, cond=, dirn=ia
-#if defined (__INTERWORKING__)
- .ifc "\regs",""
- ldr\cond lr, [sp], #4
- .else
- ldm\cond\dirn sp!, {\regs, lr}
- .endif
- bx\cond lr
+#ifdef __ELF__
+# define SIZE(X) .size CNAME(X), . - CNAME(X)
+# define TYPE(X, Y) .type CNAME(X), Y
#else
- .ifc "\regs",""
- ldr\cond pc, [sp], #4
- .else
- ldm\cond\dirn sp!, {\regs, pc}
- .endif
+# define SIZE(X)
+# define TYPE(X, Y)
#endif
-.endm
-
- @ r0: ffi_prep_args
- @ r1: &ecif
- @ r2: cif->bytes
- @ r3: fig->flags
- @ sp+0: ecif.rvalue
-
- @ This assumes we are using gas.
-ARM_FUNC_START(ffi_call_SYSV)
- @ Save registers
- stmfd sp!, {r0-r3, fp, lr}
- UNWIND .save {r0-r3, fp, lr}
- mov fp, sp
-
- UNWIND .setfp fp, sp
-
- @ Make room for all of the new args.
- sub sp, fp, r2
-
- @ Place all of the ffi_prep_args in position
- mov r0, sp
- @ r1 already set
- @ Call ffi_prep_args(stack, &ecif)
- bl CNAME(ffi_prep_args_SYSV)
+#define ARM_FUNC_START(name, gl) \
+ .align 3; \
+ .ifne gl; .globl CNAME(name); FFI_HIDDEN(CNAME(name)); .endif; \
+ TYPE(name, %function); \
+ CNAME(name):
- @ move first 4 parameters in registers
- ldmia sp, {r0-r3}
+#define ARM_FUNC_END(name) \
+ SIZE(name)
- @ and adjust stack
- sub lr, fp, sp @ cif->bytes == fp - sp
- ldr ip, [fp] @ load fn() in advance
- cmp lr, #16
- movhs lr, #16
- add sp, sp, lr
+/* Aid in defining a jump table with 8 bytes between entries. */
+.macro E index
+ .if . - 0b - 8*\index
+ .error "type table out of sync"
+ .endif
+.endm
- @ call (fn) (...)
- call_reg(ip)
-
- @ Remove the space we pushed for the args
- mov sp, fp
+ .text
+ .syntax unified
+ .arm
+
+ /* We require interworking on LDM, which implies ARMv5T,
+ which implies the existance of BLX. */
+ .arch armv5t
+
+ /* Note that we use STC and LDC to encode VFP instructions,
+ so that we do not need ".fpu vfp", nor get that added to
+ the object file attributes. These will not be executed
+ unless the FFI_VFP abi is used. */
+
+ @ r0: stack
+ @ r1: frame
+ @ r2: fn
+ @ r3: vfp_used
+
+ARM_FUNC_START(ffi_call_VFP, 1)
+ UNWIND .fnstart
+ cfi_startproc
+
+ cmp r3, #3 @ load only d0 if possible
+ ldcle p11, cr0, [r0] @ vldrle d0, [sp]
+ ldcgt p11, cr0, [r0], {16} @ vldmgt sp, {d0-d7}
+ add r0, r0, #64 @ discard the vfp register args
+ /* FALLTHRU */
+ARM_FUNC_END(ffi_call_VFP)
+
+ARM_FUNC_START(ffi_call_SYSV, 1)
+ stm r1, {fp, lr}
+ mov fp, r1
+
+ @ This is a bit of a lie wrt the origin of the unwind info, but
+ @ now we've got the usual frame pointer and two saved registers.
+ UNWIND .save {fp,lr}
+ UNWIND .setfp fp, sp
+ cfi_def_cfa(fp, 8)
+ cfi_rel_offset(fp, 0)
+ cfi_rel_offset(lr, 4)
+
+ mov sp, r0 @ install the stack pointer
+ mov lr, r2 @ move the fn pointer out of the way
+ ldmia sp!, {r0-r3} @ move first 4 parameters in registers.
+ blx lr @ call fn
@ Load r2 with the pointer to storage for the return value
- ldr r2, [sp, #24]
-
- @ Load r3 with the return type code
- ldr r3, [sp, #12]
-
- @ If the return value pointer is NULL, assume no return value.
- cmp r2, #0
- beq LSYM(Lepilogue)
+ @ Load r3 with the return type code
+ ldr r2, [fp, #8]
+ ldr r3, [fp, #12]
-@ return INT
- cmp r3, #FFI_TYPE_INT
-#if defined(__SOFTFP__) || defined(__ARM_EABI__)
- cmpne r3, #FFI_TYPE_FLOAT
-#endif
- streq r0, [r2]
- beq LSYM(Lepilogue)
-
- @ return INT64
- cmp r3, #FFI_TYPE_SINT64
-#if defined(__SOFTFP__) || defined(__ARM_EABI__)
- cmpne r3, #FFI_TYPE_DOUBLE
-#endif
- stmiaeq r2, {r0, r1}
-
-#if !defined(__SOFTFP__) && !defined(__ARM_EABI__)
- beq LSYM(Lepilogue)
-
-@ return FLOAT
- cmp r3, #FFI_TYPE_FLOAT
- stfeqs f0, [r2]
- beq LSYM(Lepilogue)
-
-@ return DOUBLE or LONGDOUBLE
- cmp r3, #FFI_TYPE_DOUBLE
- stfeqd f0, [r2]
-#endif
-
-LSYM(Lepilogue):
-#if defined (__INTERWORKING__)
- ldmia sp!, {r0-r3,fp, lr}
- bx lr
-#else
- ldmia sp!, {r0-r3,fp, pc}
-#endif
-
-.ffi_call_SYSV_end:
- UNWIND .fnend
-#ifdef __ELF__
- .size CNAME(ffi_call_SYSV),.ffi_call_SYSV_end-CNAME(ffi_call_SYSV)
-#endif
+ @ Deallocate the stack with the arguments.
+ mov sp, fp
+ cfi_def_cfa_register(sp)
+
+ @ Store values stored in registers.
+ .align 3
+ add pc, pc, r3, lsl #3
+ nop
+0:
+E ARM_TYPE_VFP_S
+ stc p10, cr0, [r2] @ vstr s0, [r2]
+ pop {fp,pc}
+E ARM_TYPE_VFP_D
+ stc p11, cr0, [r2] @ vstr d0, [r2]
+ pop {fp,pc}
+E ARM_TYPE_VFP_N
+ stc p11, cr0, [r2], {8} @ vstm r2, {d0-d3}
+ pop {fp,pc}
+E ARM_TYPE_INT64
+ str r1, [r2, #4]
+ nop
+E ARM_TYPE_INT
+ str r0, [r2]
+ pop {fp,pc}
+E ARM_TYPE_VOID
+ pop {fp,pc}
+ nop
+E ARM_TYPE_STRUCT
+ pop {fp,pc}
+
+ cfi_endproc
+ UNWIND .fnend
+ARM_FUNC_END(ffi_call_SYSV)
/*
@@ -251,7 +187,8 @@ LSYM(Lepilogue):
void *args;
*/
-ARM_FUNC_START(ffi_closure_SYSV)
+ARM_FUNC_START(ffi_closure_SYSV, 1)
+ UNWIND .fnstart
UNWIND .pad #16
add ip, sp, #16
stmfd sp!, {ip, lr}
@@ -310,116 +247,16 @@ ARM_FUNC_START(ffi_closure_SYSV)
ldfd f0, [sp]
b .Lclosure_epilogue
#endif
-
-.ffi_closure_SYSV_end:
UNWIND .fnend
-#ifdef __ELF__
- .size CNAME(ffi_closure_SYSV),.ffi_closure_SYSV_end-CNAME(ffi_closure_SYSV)
-#endif
+ARM_FUNC_END(ffi_closure_SYSV)
/* Below are VFP hard-float ABI call and closure implementations.
Add VFP FPU directive here. This is only compiled into the library
under EABI. */
#ifdef __ARM_EABI__
- .fpu vfp
-
- @ r0: fn
- @ r1: &ecif
- @ r2: cif->bytes
- @ r3: fig->flags
- @ sp+0: ecif.rvalue
-
-ARM_FUNC_START(ffi_call_VFP)
- @ Save registers
- stmfd sp!, {r0-r3, fp, lr}
- UNWIND .save {r0-r3, fp, lr}
- mov fp, sp
- UNWIND .setfp fp, sp
-
- @ Make room for all of the new args.
- sub sp, sp, r2
-
- @ Make room for loading VFP args
- sub sp, sp, #64
-
- @ Place all of the ffi_prep_args in position
- mov r0, sp
- @ r1 already set
- sub r2, fp, #64 @ VFP scratch space
-
- @ Call ffi_prep_args(stack, &ecif, vfp_space)
- bl CNAME(ffi_prep_args_VFP)
-
- @ Load VFP register args if needed
- cmp r0, #0
- mov ip, fp
- beq LSYM(Lbase_args)
-
- @ Load only d0 if possible
- cmp r0, #3
- sub ip, fp, #64
- flddle d0, [ip]
- fldmiadgt ip, {d0-d7}
-
-LSYM(Lbase_args):
- @ move first 4 parameters in registers
- ldmia sp, {r0-r3}
-
- @ and adjust stack
- sub lr, ip, sp @ cif->bytes == (fp - 64) - sp
- ldr ip, [fp] @ load fn() in advance
- cmp lr, #16
- movhs lr, #16
- add sp, sp, lr
-
- @ call (fn) (...)
- call_reg(ip)
-
- @ Remove the space we pushed for the args
- mov sp, fp
-
- @ Load r2 with the pointer to storage for
- @ the return value
- ldr r2, [sp, #24]
-
- @ Load r3 with the return type code
- ldr r3, [sp, #12]
-
- @ If the return value pointer is NULL,
- @ assume no return value.
- cmp r2, #0
- beq LSYM(Lepilogue_vfp)
-
- cmp r3, #FFI_TYPE_INT
- streq r0, [r2]
- beq LSYM(Lepilogue_vfp)
-
- cmp r3, #FFI_TYPE_SINT64
- stmeqia r2, {r0, r1}
- beq LSYM(Lepilogue_vfp)
-
- cmp r3, #FFI_TYPE_FLOAT
- fstseq s0, [r2]
- beq LSYM(Lepilogue_vfp)
-
- cmp r3, #FFI_TYPE_DOUBLE
- fstdeq d0, [r2]
- beq LSYM(Lepilogue_vfp)
-
- cmp r3, #FFI_TYPE_STRUCT_VFP_FLOAT
- cmpne r3, #FFI_TYPE_STRUCT_VFP_DOUBLE
- fstmiadeq r2, {d0-d3}
-
-LSYM(Lepilogue_vfp):
- RETLDM "r0-r3,fp"
-
-.ffi_call_VFP_end:
- UNWIND .fnend
- .size CNAME(ffi_call_VFP),.ffi_call_VFP_end-CNAME(ffi_call_VFP)
-
-
-ARM_FUNC_START(ffi_closure_VFP)
+ARM_FUNC_START(ffi_closure_VFP, 1)
+ UNWIND .fnstart
fstmfdd sp!, {d0-d7}
@ r0-r3, then d0-d7
UNWIND .pad #80
@@ -475,16 +312,15 @@ ARM_FUNC_START(ffi_closure_VFP)
.Lretdouble_struct_vfp:
fldmiad sp, {d0-d3}
b .Lclosure_epilogue_vfp
-
-.ffi_closure_VFP_end:
UNWIND .fnend
- .size CNAME(ffi_closure_VFP),.ffi_closure_VFP_end-CNAME(ffi_closure_VFP)
+ARM_FUNC_END(ffi_closure_VFP)
#endif
-ENTRY(ffi_arm_trampoline)
+ARM_FUNC_START(ffi_arm_trampoline, 1)
stmfd sp!, {r0-r3}
ldr r0, [pc]
ldr pc, [pc]
+ARM_FUNC_END(ffi_arm_trampoline)
#if defined __ELF__ && defined __linux__
.section .note.GNU-stack,"",%progbits
--
1.9.3