This is the mail archive of the
libffi-discuss@sourceware.org
mailing list for the libffi project.
[PATCH 02/16] aarch64: Improve is_hfa
- From: Richard Henderson <rth at twiddle dot net>
- To: libffi-discuss at sourceware dot org
- Cc: Richard Henderson <rth at redhat dot com>
- Date: Tue, 28 Oct 2014 11:52:59 -0700
- Subject: [PATCH 02/16] aarch64: Improve is_hfa
- Authentication-results: sourceware.org; auth=none
- References: <1414522393-19169-1-git-send-email-rth at twiddle dot net>
From: Richard Henderson <rth@redhat.com>
The set of functions get_homogeneous_type, element_count, and is_hfa
are all intertwined and recompute data. Return a compound quantity
from is_hfa that contains all the data and avoids the recomputation.
---
src/aarch64/ffi.c | 212 +++++++++++++++++++++++++++++++++---------------------
1 file changed, 131 insertions(+), 81 deletions(-)
diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
index cdb7816..0834614 100644
--- a/src/aarch64/ffi.c
+++ b/src/aarch64/ffi.c
@@ -242,88 +242,132 @@ is_floating_type (unsigned short type)
|| type == FFI_TYPE_LONGDOUBLE);
}
-/* Test for a homogeneous structure. */
+/* A subroutine of is_hfa. Given a structure type, return the type code
+ of the first non-structure element. Recurse for structure elements.
+ Return -1 if the structure is in fact empty, i.e. no nested elements. */
-static unsigned short
-get_homogeneous_type (ffi_type *ty)
+static int
+is_hfa0 (const ffi_type *ty)
{
- if (ty->type == FFI_TYPE_STRUCT && ty->elements)
- {
- unsigned i;
- unsigned short candidate_type
- = get_homogeneous_type (ty->elements[0]);
- for (i =1; ty->elements[i]; i++)
- {
- unsigned short iteration_type = 0;
- /* If we have a nested struct, we must find its homogeneous type.
- If that fits with our candidate type, we are still
- homogeneous. */
- if (ty->elements[i]->type == FFI_TYPE_STRUCT
- && ty->elements[i]->elements)
- {
- iteration_type = get_homogeneous_type (ty->elements[i]);
- }
- else
- {
- iteration_type = ty->elements[i]->type;
- }
+ ffi_type **elements = ty->elements;
+ int i, ret = -1;
- /* If we are not homogeneous, return FFI_TYPE_STRUCT. */
- if (candidate_type != iteration_type)
- return FFI_TYPE_STRUCT;
- }
- return candidate_type;
- }
+ if (elements != NULL)
+ for (i = 0; elements[i]; ++i)
+ {
+ ret = elements[i]->type;
+ if (ret == FFI_TYPE_STRUCT)
+ {
+ ret = is_hfa0 (elements[i]);
+ if (ret < 0)
+ continue;
+ }
+ break;
+ }
- /* Base case, we have no more levels of nesting, so we
- are a basic type, and so, trivially homogeneous in that type. */
- return ty->type;
+ return ret;
}
-/* Determine the number of elements within a STRUCT.
+/* A subroutine of is_hfa. Given a structure type, return true if all
+ of the non-structure elements are the same as CANDIDATE. */
- Note, we must handle nested structs.
+static int
+is_hfa1 (const ffi_type *ty, int candidate)
+{
+ ffi_type **elements = ty->elements;
+ int i;
- If ty is not a STRUCT this function will return 0. */
+ if (elements != NULL)
+ for (i = 0; elements[i]; ++i)
+ {
+ int t = elements[i]->type;
+ if (t == FFI_TYPE_STRUCT)
+ {
+ if (!is_hfa1 (elements[i], candidate))
+ return 0;
+ }
+ else if (t != candidate)
+ return 0;
+ }
-static unsigned
-element_count (ffi_type *ty)
-{
- if (ty->type == FFI_TYPE_STRUCT && ty->elements)
- {
- unsigned n;
- unsigned elems = 0;
- for (n = 0; ty->elements[n]; n++)
- {
- if (ty->elements[n]->type == FFI_TYPE_STRUCT
- && ty->elements[n]->elements)
- elems += element_count (ty->elements[n]);
- else
- elems++;
- }
- return elems;
- }
- return 0;
+ return 1;
}
-/* Test for a homogeneous floating point aggregate.
+/* Determine if TY is an homogenous floating point aggregate (HFA).
+ That is, a structure consisting of 1 to 4 members of all the same type,
+ where that type is a floating point scalar.
- A homogeneous floating point aggregate is a homogeneous aggregate of
- a half- single- or double- precision floating point type with one
- to four elements. Note that this includes nested structs of the
- basic type. */
+ Returns non-zero iff TY is an HFA. The result is an encoded value where
+ bits 0-7 contain the type code, and bits 8-10 contain the element count. */
static int
-is_hfa (ffi_type *ty)
+is_hfa(const ffi_type *ty)
{
- if (ty->type == FFI_TYPE_STRUCT
- && ty->elements[0]
- && is_floating_type (get_homogeneous_type (ty)))
+ ffi_type **elements;
+ int candidate, i;
+ size_t size, ele_count;
+
+ /* Quickest tests first. */
+ if (ty->type != FFI_TYPE_STRUCT)
+ return 0;
+
+ /* No HFA types are smaller than 4 bytes, or larger than 64 bytes. */
+ size = ty->size;
+ if (size < 4 || size > 64)
+ return 0;
+
+ /* Find the type of the first non-structure member. */
+ elements = ty->elements;
+ candidate = elements[0]->type;
+ if (candidate == FFI_TYPE_STRUCT)
{
- unsigned n = element_count (ty);
- return n >= 1 && n <= 4;
+ for (i = 0; ; ++i)
+ {
+ candidate = is_hfa0 (elements[i]);
+ if (candidate >= 0)
+ break;
+ }
}
- return 0;
+
+ /* If the first member is not a floating point type, it's not an HFA.
+ Also quickly re-check the size of the structure. */
+ switch (candidate)
+ {
+ case FFI_TYPE_FLOAT:
+ ele_count = size / sizeof(float);
+ if (size != ele_count * sizeof(float))
+ return 0;
+ break;
+ case FFI_TYPE_DOUBLE:
+ ele_count = size / sizeof(double);
+ if (size != ele_count * sizeof(double))
+ return 0;
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ ele_count = size / sizeof(long double);
+ if (size != ele_count * sizeof(long double))
+ return 0;
+ break;
+ default:
+ return 0;
+ }
+ if (ele_count > 4)
+ return 0;
+
+ /* Finally, make sure that all scalar elements are the same type. */
+ for (i = 0; elements[i]; ++i)
+ {
+ if (elements[i]->type == FFI_TYPE_STRUCT)
+ {
+ if (!is_hfa1 (elements[i], candidate))
+ return 0;
+ }
+ else if (elements[i]->type != candidate)
+ return 0;
+ }
+
+ /* All tests succeeded. Encode the result. */
+ return (ele_count << 8) | candidate;
}
/* Test if an ffi_type is a candidate for passing in a register.
@@ -559,7 +603,10 @@ copy_hfa_to_reg_or_stack (void *memory,
unsigned char *stack,
struct arg_state *state)
{
- unsigned elems = element_count (ty);
+ int h = is_hfa (ty);
+ int type = h & 0xff;
+ unsigned elems = h >> 8;
+
if (available_v (state) < elems)
{
/* There are insufficient V registers. Further V register allocations
@@ -573,7 +620,6 @@ copy_hfa_to_reg_or_stack (void *memory,
else
{
int i;
- unsigned short type = get_homogeneous_type (ty);
for (i = 0; i < elems; i++)
{
void *reg = allocate_to_v (context, state);
@@ -813,6 +859,7 @@ void
ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
extended_cif ecif;
+ int h;
ecif.cif = cif;
ecif.avalue = avalue;
@@ -861,11 +908,12 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
}
case FFI_TYPE_STRUCT:
- if (is_hfa (cif->rtype))
+ h = is_hfa (cif->rtype);
+ if (h)
{
int j;
- unsigned short type = get_homogeneous_type (cif->rtype);
- unsigned elems = element_count (cif->rtype);
+ int type = h & 0xff;
+ int elems = h >> 8;
for (j = 0; j < elems; j++)
{
void *reg = get_basic_type_addr (type, &context, j);
@@ -967,7 +1015,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
ffi_cif *cif = closure->cif;
void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
void *rvalue = NULL;
- int i;
+ int i, h;
struct arg_state state;
arg_init (&state, ALIGN(cif->bytes, 16));
@@ -1002,9 +1050,10 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
#endif
case FFI_TYPE_STRUCT:
- if (is_hfa (ty))
+ h = is_hfa (ty);
+ if (h)
{
- unsigned n = element_count (ty);
+ unsigned n = h >> 8;
if (available_v (&state) < n)
{
state.nsrn = N_V_ARG_REG;
@@ -1013,7 +1062,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
}
else
{
- switch (get_homogeneous_type (ty))
+ switch (h & 0xff)
{
case FFI_TYPE_FLOAT:
{
@@ -1027,9 +1076,9 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
correctly. The fake can be tossed once the
closure function has returned hence alloca()
is sufficient. */
- int j;
+ unsigned j;
UINT32 *p = avalue[i] = alloca (ty->size);
- for (j = 0; j < element_count (ty); j++)
+ for (j = 0; j < n; j++)
memcpy (&p[j],
allocate_to_s (context, &state),
sizeof (*p));
@@ -1048,9 +1097,9 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
correctly. The fake can be tossed once the
closure function has returned hence alloca()
is sufficient. */
- int j;
+ unsigned j;
UINT64 *p = avalue[i] = alloca (ty->size);
- for (j = 0; j < element_count (ty); j++)
+ for (j = 0; j < n; j++)
memcpy (&p[j],
allocate_to_d (context, &state),
sizeof (*p));
@@ -1143,11 +1192,12 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
break;
}
case FFI_TYPE_STRUCT:
- if (is_hfa (cif->rtype))
+ h = is_hfa (cif->rtype);
+ if (h)
{
int j;
- unsigned short type = get_homogeneous_type (cif->rtype);
- unsigned elems = element_count (cif->rtype);
+ int type = h & 0xff;
+ int elems = h >> 8;
for (j = 0; j < elems; j++)
{
void *reg = get_basic_type_addr (type, context, j);
--
1.9.3