This is the mail archive of the libffi-discuss@sourceware.org mailing list for the libffi project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 11/16] aarch64: Move return value handling into ffi_closure_SYSV


From: Richard Henderson <rth@redhat.com>

As with the change to ffi_call_SYSV, this avoids copying data
into a temporary buffer.
---
 src/aarch64/ffi.c       | 196 +++++++------------------------------
 src/aarch64/ffitarget.h |   2 +-
 src/aarch64/sysv.S      | 249 +++++++++++++++++++++++++++---------------------
 3 files changed, 176 insertions(+), 271 deletions(-)

diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
index ffa1363..c5a429a 100644
--- a/src/aarch64/ffi.c
+++ b/src/aarch64/ffi.c
@@ -71,9 +71,6 @@ ffi_clear_cache (void *start, void *end)
 #endif
 }
 
-extern void
-ffi_closure_SYSV (ffi_closure *);
-
 /* Test for an FFI floating point representation.  */
 
 static unsigned
@@ -211,69 +208,6 @@ is_hfa(const ffi_type *ty)
   return (ele_count << 8) | candidate;
 }
 
-/* Test if an ffi_type is a candidate for passing in a register.
-
-   This test does not check that sufficient registers of the
-   appropriate class are actually available, merely that IFF
-   sufficient registers are available then the argument will be passed
-   in register(s).
-
-   Note that an ffi_type that is deemed to be a register candidate
-   will always be returned in registers.
-
-   Returns 1 if a register candidate else 0.  */
-
-static int
-is_register_candidate (ffi_type *ty)
-{
-  switch (ty->type)
-    {
-    case FFI_TYPE_VOID:
-      return 0;
-    case FFI_TYPE_FLOAT:
-    case FFI_TYPE_DOUBLE:
-    case FFI_TYPE_LONGDOUBLE:
-    case FFI_TYPE_UINT8:
-    case FFI_TYPE_UINT16:
-    case FFI_TYPE_UINT32:
-    case FFI_TYPE_UINT64:
-    case FFI_TYPE_POINTER:
-    case FFI_TYPE_SINT8:
-    case FFI_TYPE_SINT16:
-    case FFI_TYPE_SINT32:
-    case FFI_TYPE_INT:
-    case FFI_TYPE_SINT64:
-      return 1;
-
-    case FFI_TYPE_STRUCT:
-      if (is_hfa (ty))
-        {
-          return 1;
-        }
-      else if (ty->size > 16)
-        {
-          /* Too large. Will be replaced with a pointer to memory. The
-             pointer MAY be passed in a register, but the value will
-             not. This test specifically fails since the argument will
-             never be passed by value in registers. */
-          return 0;
-        }
-      else
-        {
-          /* Might be passed in registers depending on the number of
-             registers required. */
-          return (ty->size + 7) / 8 < N_X_ARG_REG;
-        }
-      break;
-
-    default:
-      FFI_ASSERT (0);
-      break;
-    }
-
-  return 0;
-}
-
 /* Test if an ffi_type argument or result is a candidate for a vector
    register.  */
 
@@ -797,42 +731,42 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *orig_rvalue, void **avalue)
     memcpy (orig_rvalue, rvalue, rtype_size);
 }
 
-static unsigned char trampoline [] =
-{ 0x70, 0x00, 0x00, 0x58,	/* ldr	x16, 1f	*/
-  0x91, 0x00, 0x00, 0x10,	/* adr	x17, 2f	*/
-  0x00, 0x02, 0x1f, 0xd6	/* br	x16	*/
-};
-
 /* Build a trampoline.  */
 
-#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS)			\
-  ({unsigned char *__tramp = (unsigned char*)(TRAMP);			\
-    UINT64  __fun = (UINT64)(FUN);					\
-    UINT64  __ctx = (UINT64)(CTX);					\
-    UINT64  __flags = (UINT64)(FLAGS);					\
-    memcpy (__tramp, trampoline, sizeof (trampoline));			\
-    memcpy (__tramp + 12, &__fun, sizeof (__fun));			\
-    memcpy (__tramp + 20, &__ctx, sizeof (__ctx));			\
-    memcpy (__tramp + 28, &__flags, sizeof (__flags));			\
-    ffi_clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE);		\
-  })
+extern void ffi_closure_SYSV (void) FFI_HIDDEN;
+extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
 
 ffi_status
-ffi_prep_closure_loc (ffi_closure* closure,
+ffi_prep_closure_loc (ffi_closure *closure,
                       ffi_cif* cif,
                       void (*fun)(ffi_cif*,void*,void**,void*),
                       void *user_data,
                       void *codeloc)
 {
+  static const unsigned char trampoline[16] = {
+    0x90, 0x00, 0x00, 0x58,	/* ldr	x16, tramp+16	*/
+    0xf1, 0xff, 0xff, 0x10,	/* adr	x17, tramp+0	*/
+    0x00, 0x02, 0x1f, 0xd6	/* br	x16		*/
+  };
+  char *tramp = closure->tramp;
+  void (*start)(void);
+
   if (cif->abi != FFI_SYSV)
     return FFI_BAD_ABI;
 
-  FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_SYSV, codeloc,
-		       cif->aarch64_flags);
-
-  closure->cif  = cif;
+  closure->cif = cif;
+  closure->fun = fun;
   closure->user_data = user_data;
-  closure->fun  = fun;
+
+  memcpy (tramp, trampoline, sizeof(trampoline));
+
+  if (cif->flags & AARCH64_FLAG_ARG_V)
+    start = ffi_closure_SYSV_V;
+  else
+    start = ffi_closure_SYSV;
+  *(UINT64 *)(tramp + 16) = (uintptr_t)start;
+
+  ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
 
   return FFI_OK;
 }
@@ -853,20 +787,20 @@ ffi_prep_closure_loc (ffi_closure* closure,
    descriptors, invokes the wrapped function, then marshalls the return
    value back into the call context.  */
 
-void FFI_HIDDEN
-ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
-			void *stack)
+int FFI_HIDDEN
+ffi_closure_SYSV_inner (ffi_cif *cif,
+			void (*fun)(ffi_cif*,void*,void**,void*),
+			void *user_data,
+			struct call_context *context,
+			void *stack, void *rvalue)
 {
-  ffi_cif *cif = closure->cif;
   void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
-  void *rvalue = NULL;
-  int i, h, nargs = cif->nargs;
+  int i, h, nargs, flags;
   struct arg_state state;
-  ffi_type *rtype;
 
   arg_init (&state);
 
-  for (i = 0; i < nargs; i++)
+  for (i = 0, nargs = cif->nargs; i < nargs; i++)
     {
       ffi_type *ty = cif->arg_types[i];
       int t = ty->type;
@@ -955,69 +889,11 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
 	}
     }
 
-  /* Figure out where the return value will be passed, either in registers
-     or in a memory block allocated by the caller and passed in x8.  */
-  rtype = cif->rtype;
-  if (is_register_candidate (rtype))
-    {
-      size_t s = rtype->size;
-      int t;
-
-      /* Register candidates are *always* returned in registers. */
-
-      /* Allocate a scratchpad for the return value, we will let the
-         callee scrible the result into the scratch pad then move the
-         contents into the appropriate return value location for the
-         call convention.  */
-      rvalue = alloca (s);
-      (closure->fun) (cif, rvalue, avalue, closure->user_data);
-
-      /* Copy the return value into the call context so that it is returned
-         as expected to our caller.  */
-      t = rtype->type;
-      switch (t)
-        {
-        case FFI_TYPE_VOID:
-          break;
-
-        case FFI_TYPE_INT:
-        case FFI_TYPE_UINT8:
-        case FFI_TYPE_UINT16:
-        case FFI_TYPE_UINT32:
-        case FFI_TYPE_UINT64:
-        case FFI_TYPE_SINT8:
-        case FFI_TYPE_SINT16:
-        case FFI_TYPE_SINT32:
-        case FFI_TYPE_SINT64:
-        case FFI_TYPE_POINTER:
-	  context->x[0] = extend_integer_type (rvalue, t);
-          break;
-
-        case FFI_TYPE_FLOAT:
-        case FFI_TYPE_DOUBLE:
-        case FFI_TYPE_LONGDOUBLE:
-	  extend_hfa_type (&context->v[0], rvalue, 0x100 + t);
-	  break;
+  flags = cif->flags;
+  if (flags & AARCH64_RET_IN_MEM)
+    rvalue = (void *)(uintptr_t)context->x8;
 
-        case FFI_TYPE_STRUCT:
-	  h = is_hfa (cif->rtype);
-          if (h)
-	    extend_hfa_type (&context->v[0], rvalue, h);
-          else
-	    {
-	      FFI_ASSERT (s <= 16);
-              memcpy (&context->x[0], rvalue, s);
-            }
-          break;
+  fun (cif, rvalue, avalue, user_data);
 
-        default:
-          abort();
-        }
-    }
-  else
-    {
-      rvalue = (void *)(uintptr_t)context->x8;
-      (closure->fun) (cif, rvalue, avalue, closure->user_data);
-    }
+  return flags;
 }
-
diff --git a/src/aarch64/ffitarget.h b/src/aarch64/ffitarget.h
index 336f28a..b488bbe 100644
--- a/src/aarch64/ffitarget.h
+++ b/src/aarch64/ffitarget.h
@@ -42,7 +42,7 @@ typedef enum ffi_abi
 /* ---- Definitions for closures ----------------------------------------- */
 
 #define FFI_CLOSURES 1
-#define FFI_TRAMPOLINE_SIZE 36
+#define FFI_TRAMPOLINE_SIZE 24
 #define FFI_NATIVE_RAW_API 0
 
 /* ---- Internal ---- */
diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S
index ba15663..abd848d 100644
--- a/src/aarch64/sysv.S
+++ b/src/aarch64/sysv.S
@@ -39,15 +39,15 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 #endif
 #endif
 
+#ifdef __AARCH64EB__
+# define BE(X)	X
+#else
+# define BE(X)	0
+#endif
+
 	.text
 	.align 4
 
-	.globl	CNAME(ffi_call_SYSV)
-#ifdef __ELF__
-	.type	CNAME(ffi_call_SYSV), #function
-	.hidden	CNAME(ffi_call_SYSV)
-#endif
-
 /* ffi_call_SYSV
    extern void ffi_call_SYSV (void *stack, void *frame,
 			      void (*fn)(void), void *rvalue, int flags);
@@ -179,131 +179,160 @@ CNAME(ffi_call_SYSV):
 	nop
 
 	cfi_endproc
+
+	.globl	CNAME(ffi_call_SYSV)
 #ifdef __ELF__
-        .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
+	.type	CNAME(ffi_call_SYSV), #function
+	.hidden	CNAME(ffi_call_SYSV)
+	.size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
 #endif
 
-#define ffi_closure_SYSV_FS (8 * 2 + CALL_CONTEXT_SIZE)
-
 /* ffi_closure_SYSV
 
    Closure invocation glue. This is the low level code invoked directly by
    the closure trampoline to setup and call a closure.
 
-   On entry x17 points to a struct trampoline_data, x16 has been clobbered
+   On entry x17 points to a struct ffi_closure, x16 has been clobbered
    all other registers are preserved.
 
    We allocate a call context and save the argument passing registers,
    then invoked the generic C ffi_closure_SYSV_inner() function to do all
    the real work, on return we load the result passing registers back from
    the call context.
+*/
 
-   On entry
-
-   extern void
-   ffi_closure_SYSV (struct trampoline_data *);
-
-   struct trampoline_data
-   {
-        UINT64 *ffi_closure;
-        UINT64 flags;
-   };
-
-   This function uses the following stack frame layout:
-
-   ==
-                saved x30(lr)
-   x29(fp)->    saved x29(fp)
-                saved x22
-                saved x21
-                ...
-   sp     ->    call_context
-   ==
+#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64)
 
-   Voila!  */
+	.align 4
+CNAME(ffi_closure_SYSV_V):
+	cfi_startproc
+	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+	cfi_rel_offset (x29, 0)
+	cfi_rel_offset (x30, 8)
 
-        .text
-        .align 4
+	/* Save the argument passing vector registers.  */
+	stp     q0, q1, [sp, #16 + 0]
+	stp     q2, q3, [sp, #16 + 32]
+	stp     q4, q5, [sp, #16 + 64]
+	stp     q6, q7, [sp, #16 + 96]
+	b	0f
+	cfi_endproc
 
-        .globl	CNAME(ffi_closure_SYSV)
+	.globl	CNAME(ffi_closure_SYSV_V)
 #ifdef __ELF__
-	.type	CNAME(ffi_closure_SYSV), #function
-	.hidden	CNAME(ffi_closure_SYSV)
+	.type	CNAME(ffi_closure_SYSV_V), #function
+	.hidden	CNAME(ffi_closure_SYSV_V)
+	.size	CNAME(ffi_closure_SYSV_V), . - CNAME(ffi_closure_SYSV_V)
 #endif
-        cfi_startproc
-CNAME(ffi_closure_SYSV):
-        stp     x29, x30, [sp, #-16]!
-	cfi_adjust_cfa_offset (16)
-        cfi_rel_offset (x29, 0)
-        cfi_rel_offset (x30, 8)
-
-        mov     x29, sp
-        cfi_def_cfa_register (x29)
-
-        sub     sp, sp, #ffi_closure_SYSV_FS
-
-        stp     x21, x22, [x29, #-16]
-        cfi_rel_offset (x21, -16)
-        cfi_rel_offset (x22, -8)
-
-        /* Load x21 with &call_context.  */
-        mov     x21, sp
-        /* Preserve our struct trampoline_data *  */
-        mov     x22, x17
-
-        /* Save the rest of the argument passing registers, including
-	   the structure return pointer.  */
-        stp     x0, x1, [x21, #16*N_V_ARG_REG + 0]
-        stp     x2, x3, [x21, #16*N_V_ARG_REG + 16]
-        stp     x4, x5, [x21, #16*N_V_ARG_REG + 32]
-        stp     x6, x7, [x21, #16*N_V_ARG_REG + 48]
-        str     x8,     [x21, #16*N_V_ARG_REG + 64]
-
-        /* Figure out if we should touch the vector registers.  */
-        ldr     x0, [x22, #8]
-        tbz     x0, #AARCH64_FLAG_ARG_V_BIT, 1f
-
-        /* Save the argument passing vector registers.  */
-        stp     q0, q1, [x21, #0]
-        stp     q2, q3, [x21, #32]
-        stp     q4, q5, [x21, #64]
-        stp     q6, q7, [x21, #96]
-1:
-        /* Load &ffi_closure..  */
-        ldr     x0, [x22, #0]
-        mov     x1, x21
-        /* Compute the location of the stack at the point that the
-           trampoline was called.  */
-        add     x2, x29, #16
-
-        bl      CNAME(ffi_closure_SYSV_inner)
-
-        /* Figure out if we should touch the vector registers.  */
-        ldr     x0, [x22, #8]
-        tbz     x0, #AARCH64_FLAG_ARG_V_BIT, 1f
-
-        /* Load the result passing vector registers.  */
-        ldp     q0, q1, [x21, #0]
-        ldp     q2, q3, [x21, #32]
-1:
-        /* Load the result passing core registers.  */
-        ldp     x0, x1, [x21, #16*N_V_ARG_REG + 0]
-
-        /* We are done, unwind our frame.  */
-        ldp     x21, x22, [x29,  #-16]
-        cfi_restore (x21)
-        cfi_restore (x22)
 
-        mov     sp, x29
-        cfi_def_cfa_register (sp)
-
-        ldp     x29, x30, [sp], #16
-	cfi_adjust_cfa_offset (-16)
-        cfi_restore (x29)
-        cfi_restore (x30)
-
-        ret
+	.align	4
+	cfi_startproc
+CNAME(ffi_closure_SYSV):
+	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+	cfi_rel_offset (x29, 0)
+	cfi_rel_offset (x30, 8)
+0:
+	mov     x29, sp
+
+	/* Save the argument passing core registers.  */
+	stp     x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
+	stp     x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
+	stp     x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
+	stp     x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
+	str     x8,     [sp, #16 + 16*N_V_ARG_REG + 64]
+
+	/* Load ffi_closure_inner arguments.  */
+	ldp	x0, x1, [x17, #FFI_TRAMPOLINE_SIZE]	/* load cif, fn */
+	ldr	x2, [x17, #FFI_TRAMPOLINE_SIZE+16]	/* load user_data */
+	add	x3, sp, #16				/* load context */
+	add	x4, sp, #ffi_closure_SYSV_FS		/* load stack */
+	add	x5, sp, #16+CALL_CONTEXT_SIZE		/* load rvalue */
+	bl      CNAME(ffi_closure_SYSV_inner)
+
+	/* Load the return value as directed.  */
+	adr	x1, 0f
+	and	w0, w0, #AARCH64_RET_MASK
+	add	x1, x1, x0, lsl #3
+	add	x3, sp, #16+CALL_CONTEXT_SIZE
+	br	x1
+
+	/* Note that each table entry is 2 insns, and thus 8 bytes.  */
+	.align	4
+0:	b	99f			/* VOID */
+	nop
+1:	ldr	x0, [x3]		/* INT64 */
+	b	99f
+2:	ldp	x0, x1, [x3]		/* INT128 */
+	b	99f
+3:	brk	#1000			/* UNUSED */
+	nop
+4:	brk	#1000			/* UNUSED */
+	nop
+5:	brk	#1000			/* UNUSED */
+	nop
+6:	brk	#1000			/* UNUSED */
+	nop
+7:	brk	#1000			/* UNUSED */
+	nop
+8:	ldr	s3, [x3, #12]		/* S4 */
+	nop
+9:	ldr	s2, [x2, #8]		/* S3 */
+	nop
+10:	ldp	s0, s1, [x3]		/* S2 */
+	b	99f
+11:	ldr	s0, [x3]		/* S1 */
+	b	99f
+12:	ldr	d3, [x3, #24]		/* D4 */
+	nop
+13:	ldr	d2, [x3, #16]		/* D3 */
+	nop
+14:	ldp	d0, d1, [x3]		/* D2 */
+	b	99f
+15:	ldr	d0, [x3]		/* D1 */
+	b	99f
+16:	ldr	q3, [x3, #48]		/* Q4 */
+	nop
+17:	ldr	q2, [x3, #32]		/* Q3 */
+	nop
+18:	ldp	q0, q1, [x3]		/* Q2 */
+	b	99f
+19:	ldr	q0, [x3]		/* Q1 */
+	b	99f
+20:	ldrb	w0, [x3, #BE(7)]	/* UINT8 */
+	b	99f
+21:	brk	#1000			/* reserved */
+	nop
+22:	ldrh	w0, [x3, #BE(6)]	/* UINT16 */
+	b	99f
+23:	brk	#1000			/* reserved */
+	nop
+24:	ldr	w0, [x3, #BE(4)]	/* UINT32 */
+	b	99f
+25:	brk	#1000			/* reserved */
+	nop
+26:	ldrsb	x0, [x3, #BE(7)]	/* SINT8 */
+	b	99f
+27:	brk	#1000			/* reserved */
+	nop
+28:	ldrsh	x0, [x3, #BE(6)]	/* SINT16 */
+	b	99f
+29:	brk	#1000			/* reserved */
+	nop
+30:	ldrsw	x0, [x3, #BE(4)]	/* SINT32 */
+	nop
+31:					/* reserved */
+99:	ldp     x29, x30, [sp], #ffi_closure_SYSV_FS
+	cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS)
+	cfi_restore (x29)
+	cfi_restore (x30)
+	ret
 	cfi_endproc
+
+	.globl	CNAME(ffi_closure_SYSV)
 #ifdef __ELF__
-        .size CNAME(ffi_closure_SYSV), .-CNAME(ffi_closure_SYSV)
+	.type	CNAME(ffi_closure_SYSV), #function
+	.hidden	CNAME(ffi_closure_SYSV)
+	.size	CNAME(ffi_closure_SYSV), . - CNAME(ffi_closure_SYSV)
 #endif
-- 
1.9.3


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]