This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] malloc: initial numa support


From: Joern Engel <joern@purestorage.org>

Deliberately unoptimized.  We don't explicitly set the numa policy for
our heaps, relying on memory being allocated locally.  And we do a
syscall for every malloc, which is exceedingly expensive.

See how this one fares, then refine the code.

JIRA: PURE-27597
---
 tpc/malloc2.13/arena.h  | 47 +++++++++++++++++++++++++++++------------------
 tpc/malloc2.13/malloc.c | 18 ++++++++++++++----
 2 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/tpc/malloc2.13/arena.h b/tpc/malloc2.13/arena.h
index 118563003c8d..b8fc5c99a1cd 100644
--- a/tpc/malloc2.13/arena.h
+++ b/tpc/malloc2.13/arena.h
@@ -94,6 +94,17 @@ static int __malloc_initialized = -1;
 
 /**************************************************************************/
 
+/*
+ * Calling getcpu() for every allocation is too expensive - but we can turn
+ * the syscall into a pointer dereference to a kernel shared memory page.
+ */
+#include <sys/syscall.h>
+static inline int getnode(void)
+{
+	int node, ret;
+	ret = syscall(SYS_getcpu, NULL, &node, NULL);
+	return (ret == -1) ? 0 : node;
+}
 
 /* arena_get() acquires an arena and locks the corresponding mutex.
    First, try the one last locked successfully by this thread.  (This
@@ -110,7 +121,10 @@ static int __malloc_initialized = -1;
 
 #define arena_lookup(ptr) do { \
 	Void_t *vptr = NULL; \
+	int node = getnode(); \
 	ptr = (struct malloc_state *)tsd_getspecific(arena_key, vptr); \
+	if (!ptr || ptr->numa_node != node) \
+		ptr = numa_arena[node]; \
 } while(0)
 
 #define arena_lock(ptr, size) do { \
@@ -330,12 +344,12 @@ ptmalloc_init_minimal (void)
   mp_.pagesize       = malloc_getpagesize;
 }
 
-
+static struct malloc_state *_int_new_arena(size_t size, int numa_node);
 
 static void ptmalloc_init(void)
 {
 	const char *s;
-	int secure = 0;
+	int i, secure = 0;
 
 	if (__malloc_initialized >= 0)
 		return;
@@ -358,7 +372,12 @@ static void ptmalloc_init(void)
 #endif				/* !defined NO_THREADS */
 	mutex_init(&main_arena.mutex);
 	main_arena.next = &main_arena;
-
+	numa_arena[0] = &main_arena;
+	for (i = 1; i < MAX_NUMA_NODES; i++) {
+		numa_arena[i] = _int_new_arena(0, i);
+		numa_arena[i]->local_next = numa_arena[i];
+		(void)mutex_unlock(&numa_arena[i]->mutex);
+	}
 
 	mutex_init(&list_lock);
 	tsd_key_create(&arena_key, NULL);
@@ -633,7 +652,7 @@ heap_trim(heap_info *heap, size_t pad)
 
 /* Create a new arena with initial size "size".  */
 
-static struct malloc_state *_int_new_arena(size_t size)
+static struct malloc_state *_int_new_arena(size_t size, int numa_node)
 {
 	struct malloc_state *a;
 	heap_info *h;
@@ -647,7 +666,7 @@ static struct malloc_state *_int_new_arena(size_t size)
 		   to deal with the large request via mmap_chunk().  */
 		h = new_heap(sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT, mp_.top_pad);
 		if (!h)
-			return 0;
+			abort();
 	}
 	a = h->ar_ptr = (struct malloc_state *)(h + 1);
 	malloc_init_state(a);
@@ -676,7 +695,7 @@ static struct malloc_state *_int_new_arena(size_t size)
 	a->next = main_arena.next;
 	atomic_write_barrier();
 	main_arena.next = a;
-
+	a->numa_node = numa_node;
 
 	THREAD_STAT(++(a->stat_lock_loop));
 
@@ -689,17 +708,9 @@ static struct malloc_state *internal_function arena_get2(struct malloc_state *a_
 {
 	struct malloc_state *a;
 
-	if (!a_tsd)
-		a = a_tsd = &main_arena;
-	else {
-		a = a_tsd->next;
-		if (!a) {
-			/* This can only happen while initializing the new arena. */
-			(void)mutex_lock(&main_arena.mutex);
-			THREAD_STAT(++(main_arena.stat_lock_wait));
-			return &main_arena;
-		}
-	}
+	a = a_tsd->next;
+	if (!a)
+		abort();
 
 	/* Check the global, circularly linked list for available arenas. */
 	bool retried = false;
@@ -730,7 +741,7 @@ static struct malloc_state *internal_function arena_get2(struct malloc_state *a_
 	}
 
 	/* Nothing immediately available, so generate a new arena.  */
-	a = _int_new_arena(size);
+	a = _int_new_arena(size, a_tsd->numa_node);
 	(void)mutex_unlock(&list_lock);
 
 	return a;
diff --git a/tpc/malloc2.13/malloc.c b/tpc/malloc2.13/malloc.c
index a420ef278e68..4bc6247d910e 100644
--- a/tpc/malloc2.13/malloc.c
+++ b/tpc/malloc2.13/malloc.c
@@ -2154,6 +2154,14 @@ struct malloc_state {
 	/* Serialize access.  */
 	mutex_t mutex;
 
+	/* NUMA-local linked list */
+	struct malloc_state *local_next;
+
+	/* Linked list */
+	struct malloc_state *next;
+
+	int numa_node;
+
 	/* Flags (formerly in max_fast).  */
 	int flags;
 
@@ -2177,10 +2185,6 @@ struct malloc_state {
 	/* Bitmap of bins */
 	unsigned int binmap[BINMAPSIZE];
 
-	/* Linked list */
-	struct malloc_state *next;
-
-
 	/* Memory allocated from the system in this arena.  */
 	INTERNAL_SIZE_T system_mem;
 	INTERNAL_SIZE_T max_system_mem;
@@ -2223,6 +2227,12 @@ struct malloc_par {
 
 static struct malloc_state main_arena;
 
+/*
+ * For numa locality, have a per-node list of arenas.
+ */
+#define MAX_NUMA_NODES 2
+static struct malloc_state *numa_arena[MAX_NUMA_NODES];
+
 /* There is only one instance of the malloc parameters.  */
 
 static struct malloc_par mp_;
-- 
2.7.0.rc3


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]