This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] malloc: initial numa support
- From: Joern Engel <joern at purestorage dot com>
- To: "GNU C. Library" <libc-alpha at sourceware dot org>
- Cc: Siddhesh Poyarekar <siddhesh dot poyarekar at gmail dot com>, Joern Engel <joern at purestorage dot org>
- Date: Mon, 25 Jan 2016 16:24:54 -0800
- Subject: [PATCH] malloc: initial numa support
- Authentication-results: sourceware.org; auth=none
- References: <1453767942-19369-1-git-send-email-joern at purestorage dot com>
From: Joern Engel <joern@purestorage.org>
Deliberately unoptimized. We don't explicitly set the numa policy for
our heaps, relying on memory being allocated locally. And we do a
syscall for every malloc, which is exceedingly expensive.
See how this one fares, then refine the code.
JIRA: PURE-27597
---
tpc/malloc2.13/arena.h | 47 +++++++++++++++++++++++++++++------------------
tpc/malloc2.13/malloc.c | 18 ++++++++++++++----
2 files changed, 43 insertions(+), 22 deletions(-)
diff --git a/tpc/malloc2.13/arena.h b/tpc/malloc2.13/arena.h
index 118563003c8d..b8fc5c99a1cd 100644
--- a/tpc/malloc2.13/arena.h
+++ b/tpc/malloc2.13/arena.h
@@ -94,6 +94,17 @@ static int __malloc_initialized = -1;
/**************************************************************************/
+/*
+ * Calling getcpu() for every allocation is too expensive - but we can turn
+ * the syscall into a pointer dereference to a kernel shared memory page.
+ */
+#include <sys/syscall.h>
+static inline int getnode(void)
+{
+ int node, ret;
+ ret = syscall(SYS_getcpu, NULL, &node, NULL);
+ return (ret == -1) ? 0 : node;
+}
/* arena_get() acquires an arena and locks the corresponding mutex.
First, try the one last locked successfully by this thread. (This
@@ -110,7 +121,10 @@ static int __malloc_initialized = -1;
#define arena_lookup(ptr) do { \
Void_t *vptr = NULL; \
+ int node = getnode(); \
ptr = (struct malloc_state *)tsd_getspecific(arena_key, vptr); \
+ if (!ptr || ptr->numa_node != node) \
+ ptr = numa_arena[node]; \
} while(0)
#define arena_lock(ptr, size) do { \
@@ -330,12 +344,12 @@ ptmalloc_init_minimal (void)
mp_.pagesize = malloc_getpagesize;
}
-
+static struct malloc_state *_int_new_arena(size_t size, int numa_node);
static void ptmalloc_init(void)
{
const char *s;
- int secure = 0;
+ int i, secure = 0;
if (__malloc_initialized >= 0)
return;
@@ -358,7 +372,12 @@ static void ptmalloc_init(void)
#endif /* !defined NO_THREADS */
mutex_init(&main_arena.mutex);
main_arena.next = &main_arena;
-
+ numa_arena[0] = &main_arena;
+ for (i = 1; i < MAX_NUMA_NODES; i++) {
+ numa_arena[i] = _int_new_arena(0, i);
+ numa_arena[i]->local_next = numa_arena[i];
+ (void)mutex_unlock(&numa_arena[i]->mutex);
+ }
mutex_init(&list_lock);
tsd_key_create(&arena_key, NULL);
@@ -633,7 +652,7 @@ heap_trim(heap_info *heap, size_t pad)
/* Create a new arena with initial size "size". */
-static struct malloc_state *_int_new_arena(size_t size)
+static struct malloc_state *_int_new_arena(size_t size, int numa_node)
{
struct malloc_state *a;
heap_info *h;
@@ -647,7 +666,7 @@ static struct malloc_state *_int_new_arena(size_t size)
to deal with the large request via mmap_chunk(). */
h = new_heap(sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT, mp_.top_pad);
if (!h)
- return 0;
+ abort();
}
a = h->ar_ptr = (struct malloc_state *)(h + 1);
malloc_init_state(a);
@@ -676,7 +695,7 @@ static struct malloc_state *_int_new_arena(size_t size)
a->next = main_arena.next;
atomic_write_barrier();
main_arena.next = a;
-
+ a->numa_node = numa_node;
THREAD_STAT(++(a->stat_lock_loop));
@@ -689,17 +708,9 @@ static struct malloc_state *internal_function arena_get2(struct malloc_state *a_
{
struct malloc_state *a;
- if (!a_tsd)
- a = a_tsd = &main_arena;
- else {
- a = a_tsd->next;
- if (!a) {
- /* This can only happen while initializing the new arena. */
- (void)mutex_lock(&main_arena.mutex);
- THREAD_STAT(++(main_arena.stat_lock_wait));
- return &main_arena;
- }
- }
+ a = a_tsd->next;
+ if (!a)
+ abort();
/* Check the global, circularly linked list for available arenas. */
bool retried = false;
@@ -730,7 +741,7 @@ static struct malloc_state *internal_function arena_get2(struct malloc_state *a_
}
/* Nothing immediately available, so generate a new arena. */
- a = _int_new_arena(size);
+ a = _int_new_arena(size, a_tsd->numa_node);
(void)mutex_unlock(&list_lock);
return a;
diff --git a/tpc/malloc2.13/malloc.c b/tpc/malloc2.13/malloc.c
index a420ef278e68..4bc6247d910e 100644
--- a/tpc/malloc2.13/malloc.c
+++ b/tpc/malloc2.13/malloc.c
@@ -2154,6 +2154,14 @@ struct malloc_state {
/* Serialize access. */
mutex_t mutex;
+ /* NUMA-local linked list */
+ struct malloc_state *local_next;
+
+ /* Linked list */
+ struct malloc_state *next;
+
+ int numa_node;
+
/* Flags (formerly in max_fast). */
int flags;
@@ -2177,10 +2185,6 @@ struct malloc_state {
/* Bitmap of bins */
unsigned int binmap[BINMAPSIZE];
- /* Linked list */
- struct malloc_state *next;
-
-
/* Memory allocated from the system in this arena. */
INTERNAL_SIZE_T system_mem;
INTERNAL_SIZE_T max_system_mem;
@@ -2223,6 +2227,12 @@ struct malloc_par {
static struct malloc_state main_arena;
+/*
+ * For numa locality, have a per-node list of arenas.
+ */
+#define MAX_NUMA_NODES 2
+static struct malloc_state *numa_arena[MAX_NUMA_NODES];
+
/* There is only one instance of the malloc parameters. */
static struct malloc_par mp_;
--
2.7.0.rc3