This is the mail archive of the libffi-discuss@sourceware.org mailing list for the libffi project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 09/16] aarch64: Merge prep_args with ffi_call


From: Richard Henderson <rth@redhat.com>

Use the trick to allocate the stack frame for ffi_call_SYSV
within ffi_call itself.
---
 src/aarch64/ffi.c  | 193 ++++++++++++++++++++++++-----------------------------
 src/aarch64/sysv.S | 192 ++++++++++++++++------------------------------------
 2 files changed, 144 insertions(+), 241 deletions(-)

diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
index d19384b..a067303 100644
--- a/src/aarch64/ffi.c
+++ b/src/aarch64/ffi.c
@@ -72,14 +72,6 @@ ffi_clear_cache (void *start, void *end)
 }
 
 extern void
-ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *,
-			    extended_cif *),
-               struct call_context *context,
-               extended_cif *,
-               size_t,
-               void (*fn)(void));
-
-extern void
 ffi_closure_SYSV (ffi_closure *);
 
 /* Test for an FFI floating point representation.  */
@@ -311,12 +303,11 @@ struct arg_state
 
 /* Initialize a procedure call argument marshalling state.  */
 static void
-arg_init (struct arg_state *state, size_t call_frame_size)
+arg_init (struct arg_state *state)
 {
   state->ngrn = 0;
   state->nsrn = 0;
   state->nsaa = 0;
-
 #if defined (__APPLE__)
   state->allocating_variadic = 0;
 #endif
@@ -529,27 +520,88 @@ allocate_int_to_reg_or_stack (struct call_context *context,
   return allocate_to_stack (state, stack, size, size);
 }
 
-/* Marshall the arguments from FFI representation to procedure call
-   context and stack.  */
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  /* Round the stack up to a multiple of the stack alignment requirement. */
+  cif->bytes = ALIGN(cif->bytes, 16);
 
-static unsigned
-aarch64_prep_args (struct call_context *context, unsigned char *stack,
-		   extended_cif *ecif)
+  /* Initialize our flags. We are interested if this CIF will touch a
+     vector register, if so we will enable context save and load to
+     those registers, otherwise not. This is intended to be friendly
+     to lazy float context switching in the kernel.  */
+  cif->aarch64_flags = 0;
+
+  if (is_v_register_candidate (cif->rtype))
+    {
+      cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
+    }
+  else
+    {
+      int i;
+      for (i = 0; i < cif->nargs; i++)
+        if (is_v_register_candidate (cif->arg_types[i]))
+          {
+            cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
+            break;
+          }
+    }
+
+#if defined (__APPLE__)
+  cif->aarch64_nfixedargs = 0;
+#endif
+
+  return FFI_OK;
+}
+
+#if defined (__APPLE__)
+
+/* Perform Apple-specific cif processing for variadic calls */
+ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
+				    unsigned int nfixedargs,
+				    unsigned int ntotalargs)
 {
-  ffi_cif *cif = ecif->cif;
-  void **avalue = ecif->avalue;
-  int i, nargs = cif->nargs;
+  ffi_status status;
+
+  status = ffi_prep_cif_machdep (cif);
+
+  cif->aarch64_nfixedargs = nfixedargs;
+
+  return status;
+}
+
+#endif
+
+extern void ffi_call_SYSV (void *stack, void *frame,
+			   void (*fn)(void), int flags) FFI_HIDDEN;
+
+/* Call a function with the provided arguments and capture the return
+   value.  */
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  struct call_context *context;
+  void *stack, *frame;
   struct arg_state state;
+  size_t stack_bytes;
+  int i, nargs = cif->nargs;
+  int h, t;
+  ffi_type *rtype;
 
-  arg_init (&state, cif->bytes);
+  /* Allocate consectutive stack for everything we'll need.  */
+  stack_bytes = cif->bytes;
+  stack = alloca (stack_bytes + 32 + sizeof(struct call_context));
+  frame = stack + stack_bytes;
+  context = frame + 32;
 
+  arg_init (&state);
   for (i = 0; i < nargs; i++)
     {
       ffi_type *ty = cif->arg_types[i];
       size_t s = ty->size;
-      int h, t = ty->type;
       void *a = avalue[i];
 
+      t = ty->type;
       switch (t)
 	{
 	case FFI_TYPE_VOID:
@@ -665,83 +717,12 @@ aarch64_prep_args (struct call_context *context, unsigned char *stack,
 #endif
     }
 
-  return cif->aarch64_flags;
-}
-
-ffi_status
-ffi_prep_cif_machdep (ffi_cif *cif)
-{
-  /* Round the stack up to a multiple of the stack alignment requirement. */
-  cif->bytes = ALIGN(cif->bytes, 16);
-
-  /* Initialize our flags. We are interested if this CIF will touch a
-     vector register, if so we will enable context save and load to
-     those registers, otherwise not. This is intended to be friendly
-     to lazy float context switching in the kernel.  */
-  cif->aarch64_flags = 0;
-
-  if (is_v_register_candidate (cif->rtype))
-    {
-      cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
-    }
-  else
-    {
-      int i;
-      for (i = 0; i < cif->nargs; i++)
-        if (is_v_register_candidate (cif->arg_types[i]))
-          {
-            cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
-            break;
-          }
-    }
-
-#if defined (__APPLE__)
-  cif->aarch64_nfixedargs = 0;
-#endif
-
-  return FFI_OK;
-}
-
-#if defined (__APPLE__)
-
-/* Perform Apple-specific cif processing for variadic calls */
-ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
-				    unsigned int nfixedargs,
-				    unsigned int ntotalargs)
-{
-  ffi_status status;
-
-  status = ffi_prep_cif_machdep (cif);
-
-  cif->aarch64_nfixedargs = nfixedargs;
-
-  return status;
-}
-
-#endif
-
-/* Call a function with the provided arguments and capture the return
-   value.  */
-void
-ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
-{
-  extended_cif ecif;
-  struct call_context context;
-  size_t stack_bytes;
-  int h, t;
-
-  ecif.cif = cif;
-  ecif.avalue = avalue;
-  ecif.rvalue = rvalue;
-
-  stack_bytes = cif->bytes;
-
-  memset (&context, 0, sizeof (context));
-  if (is_register_candidate (cif->rtype))
+  rtype = cif->rtype;
+  if (is_register_candidate (rtype))
     {
-      ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
+      ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
 
-      t = cif->rtype->type;
+      t = rtype->type;
       switch (t)
 	{
 	case FFI_TYPE_INT:
@@ -754,33 +735,35 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
 	case FFI_TYPE_POINTER:
 	case FFI_TYPE_UINT64:
 	case FFI_TYPE_SINT64:
-	  *(ffi_arg *)rvalue = extend_integer_type (&context.x[0], t);
+	  *(ffi_arg *)rvalue = extend_integer_type (&context->x[0], t);
 	  break;
 
 	case FFI_TYPE_FLOAT:
 	case FFI_TYPE_DOUBLE:
 	case FFI_TYPE_LONGDOUBLE:
-	  compress_hfa_type (rvalue, &context.v[0], 0x100 + t);
+	  compress_hfa_type (rvalue, &context->v[0], 0x100 + t);
 	  break;
 
 	case FFI_TYPE_STRUCT:
 	  h = is_hfa (cif->rtype);
 	  if (h)
-	    compress_hfa_type (rvalue, &context.v[0], h);
-	  else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
-	    memcpy (rvalue, &context.x[0], cif->rtype->size);
+	    compress_hfa_type (rvalue, &context->v[0], h);
 	  else
-	    abort();
+	    {
+	      FFI_ASSERT (rtype->size <= 16);
+	      memcpy (rvalue, &context->x[0], rtype->size);
+	    }
 	  break;
 
 	default:
-	  abort();
+	  FFI_ASSERT (0);
+	  break;
 	}
     }
   else
     {
-      context.x8 = (uintptr_t)rvalue;
-      ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
+      context->x8 = (uintptr_t)rvalue;
+      ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
     }
 }
 
@@ -851,7 +834,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
   struct arg_state state;
   ffi_type *rtype;
 
-  arg_init (&state, ALIGN(cif->bytes, 16));
+  arg_init (&state);
 
   for (i = 0; i < nargs; i++)
     {
diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S
index fa7ff5b..a5f636a 100644
--- a/src/aarch64/sysv.S
+++ b/src/aarch64/sysv.S
@@ -22,6 +22,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 #define LIBFFI_ASM
 #include <fficonfig.h>
 #include <ffi.h>
+#include <ffi_cfi.h>
 #include "internal.h"
 
 #ifdef HAVE_MACHINE_ASM_H
@@ -38,158 +39,77 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 #endif
 #endif
 
-#define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#define cfi_restore(reg)		.cfi_restore reg
-#define cfi_def_cfa_register(reg)	.cfi_def_cfa_register reg
+	.text
+	.align 2
 
-        .text
-        .align 2
-
-        .globl CNAME(ffi_call_SYSV)
+	.globl CNAME(ffi_call_SYSV)
 #ifdef __ELF__
-        .type CNAME(ffi_call_SYSV), #function
+	.type	CNAME(ffi_call_SYSV), #function
+	.hidden	CNAME(ffi_call_SYSV)
 #endif
 
-/* ffi_call_SYSV()
-
-   Create a stack frame, setup an argument context, call the callee
-   and extract the result.
-
-   The maximum required argument stack size is provided,
-   ffi_call_SYSV() allocates that stack space then calls the
-   prepare_fn to populate register context and stack.  The
-   argument passing registers are loaded from the register
-   context and the callee called, on return the register passing
-   register are saved back to the context.  Our caller will
-   extract the return value from the final state of the saved
-   register context.
-
-   Prototype:
-
-   extern unsigned
-   ffi_call_SYSV (void (*)(struct call_context *context, unsigned char *,
-			   extended_cif *),
-                  struct call_context *context,
-                  extended_cif *,
-                  size_t required_stack_size,
-                  void (*fn)(void));
+/* ffi_call_SYSV
+   extern void ffi_call_SYSV (void *stack, void *frame,
+			      void (*fn)(void), int flags);
 
    Therefore on entry we have:
 
-   x0 prepare_fn
-   x1 &context
-   x2 &ecif
-   x3 bytes
-   x4 fn
-
-   This function uses the following stack frame layout:
+   x0 stack
+   x1 frame
+   x2 fn
+   x3 flags
+*/
 
-   ==
-                saved x30(lr)
-   x29(fp)->    saved x29(fp)
-                saved x24
-                saved x23
-                saved x22
-   sp'    ->    saved x21
-                ...
-   sp     ->    (constructed callee stack arguments)
-   ==
-
-   Voila! */
-
-#define ffi_call_SYSV_FS (8 * 4)
-
-        .cfi_startproc
+	cfi_startproc
 CNAME(ffi_call_SYSV):
-        stp     x29, x30, [sp, #-16]!
-	cfi_adjust_cfa_offset (16)
-        cfi_rel_offset (x29, 0)
-        cfi_rel_offset (x30, 8)
-
-        mov     x29, sp
-	cfi_def_cfa_register (x29)
-        sub     sp, sp, #ffi_call_SYSV_FS
-
-        stp     x21, x22, [sp, #0]
-        cfi_rel_offset (x21, 0 - ffi_call_SYSV_FS)
-        cfi_rel_offset (x22, 8 - ffi_call_SYSV_FS)
-
-        stp     x23, x24, [sp, #16]
-        cfi_rel_offset (x23, 16 - ffi_call_SYSV_FS)
-        cfi_rel_offset (x24, 24 - ffi_call_SYSV_FS)
-
-        mov     x21, x1
-        mov     x22, x2
-        mov     x24, x4
-
-        /* Allocate the stack space for the actual arguments, many
-           arguments will be passed in registers, but we assume
-           worst case and allocate sufficient stack for ALL of
-           the arguments.  */
-        sub     sp, sp, x3
-
-        /* unsigned (*prepare_fn) (struct call_context *context,
-				   unsigned char *stack, extended_cif *ecif);
-	 */
-        mov     x23, x0
-        mov     x0, x1
-        mov     x1, sp
-        /* x2 already in place */
-        blr     x23
-
-        /* Preserve the flags returned.  */
-        mov     x23, x0
-
-        /* Figure out if we should touch the vector registers.  */
-        tbz     x23, #AARCH64_FLAG_ARG_V_BIT, 1f
-
-        /* Load the vector argument passing registers.  */
-        ldp     q0, q1, [x21, #0]
-        ldp     q2, q3, [x21, #32]
-        ldp     q4, q5, [x21, #64]
-        ldp     q6, q7, [x21, #96]
+	/* Use a stack frame allocated by our caller.  */
+	cfi_def_cfa(x1, 32);
+	stp	x29, x30, [x1]
+	mov	x29, x1
+	mov	sp, x0
+	cfi_def_cfa_register(x29)
+	cfi_rel_offset (x29, 0)
+	cfi_rel_offset (x30, 8)
+
+	str	w3, [x29, #16]		/* save flags */
+	mov	x9, x2			/* save fn */
+
+	/* Load the vector argument passing registers, if necessary.  */
+	tbz	w3, #AARCH64_FLAG_ARG_V_BIT, 1f
+	ldp     q0, q1, [x29, #32 + 0]
+	ldp     q2, q3, [x29, #32 + 32]
+	ldp     q4, q5, [x29, #32 + 64]
+	ldp     q6, q7, [x29, #32 + 96]
 1:
-        /* Load the core argument passing registers, including
+	/* Load the core argument passing registers, including
 	   the structure return pointer.  */
-        ldp     x0, x1, [x21, #16*N_V_ARG_REG + 0]
-        ldp     x2, x3, [x21, #16*N_V_ARG_REG + 16]
-        ldp     x4, x5, [x21, #16*N_V_ARG_REG + 32]
-        ldp     x6, x7, [x21, #16*N_V_ARG_REG + 48]
-        ldr     x8,     [x21, #16*N_V_ARG_REG + 64]
-
-        blr     x24
+	ldp     x0, x1, [x29, #32 + 16*N_V_ARG_REG + 0]
+	ldp     x2, x3, [x29, #32 + 16*N_V_ARG_REG + 16]
+	ldp     x4, x5, [x29, #32 + 16*N_V_ARG_REG + 32]
+	ldp     x6, x7, [x29, #32 + 16*N_V_ARG_REG + 48]
+	ldr     x8,     [x29, #32 + 16*N_V_ARG_REG + 64]
 
-        /* Save the core return registers.  */
-        stp     x0, x1, [x21, #16*N_V_ARG_REG]
+	blr     x9			/* call fn */
 
-        /* Figure out if we should touch the vector registers.  */
-        tbz     x23, #AARCH64_FLAG_ARG_V_BIT, 1f
+	ldr	w3, [x29, #16]		/* reload flags */
 
-        /* Save the vector return registers.  */
-        stp     q0, q1, [x21, #0]
-        stp     q2, q3, [x21, #32]
-1:
-        /* All done, unwind our stack frame.  */
-        ldp     x21, x22, [x29,  # - ffi_call_SYSV_FS]
-        cfi_restore (x21)
-        cfi_restore (x22)
-
-        ldp     x23, x24, [x29,  # - ffi_call_SYSV_FS + 16]
-        cfi_restore (x23)
-        cfi_restore (x24)
-
-        mov     sp, x29
+	/* Partially deconstruct the stack frame.  */
+	mov     sp, x29
 	cfi_def_cfa_register (sp)
+	ldp     x29, x30, [x29]
 
-        ldp     x29, x30, [sp], #16
-	cfi_adjust_cfa_offset (-16)
-        cfi_restore (x29)
-        cfi_restore (x30)
+	/* Save the core return registers.  */
+	stp     x0, x1, [sp, #32 + 16*N_V_ARG_REG]
 
-        ret
+	/* Save the vector return registers, if necessary.  */
+	tbz     w3, #AARCH64_FLAG_ARG_V_BIT, 1f
+	stp     q0, q1, [sp, #32 + 0]
+	stp     q2, q3, [sp, #32 + 32]
+1:
+	/* All done.  */
+	ret
 
-        .cfi_endproc
+	cfi_endproc
 #ifdef __ELF__
         .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
 #endif
@@ -237,7 +157,7 @@ CNAME(ffi_call_SYSV):
         .align 2
 
         .globl CNAME(ffi_closure_SYSV)
-        .cfi_startproc
+        cfi_startproc
 CNAME(ffi_closure_SYSV):
         stp     x29, x30, [sp, #-16]!
 	cfi_adjust_cfa_offset (16)
@@ -310,7 +230,7 @@ CNAME(ffi_closure_SYSV):
         cfi_restore (x30)
 
         ret
-        .cfi_endproc
+	cfi_endproc
 #ifdef __ELF__
         .size CNAME(ffi_closure_SYSV), .-CNAME(ffi_closure_SYSV)
 #endif
-- 
1.9.3


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]