This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] malloc: use bitmap to conserve hot bins


From: Joern Engel <joern@purestorage.org>

Tcache_gc needs some heuristic to decide what memory will remain useful
in the future and what memory can be returned to the allocator.  This
uses the same heuristic as lru - bins that have been used in the recent
past are preserved entirely, bins that haven't been used are emptied.

JIRA: PURE-27597
---
 tpc/malloc2.13/tcache.h | 88 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 71 insertions(+), 17 deletions(-)

diff --git a/tpc/malloc2.13/tcache.h b/tpc/malloc2.13/tcache.h
index 7cf6b316456f..1829cd4ebb9a 100644
--- a/tpc/malloc2.13/tcache.h
+++ b/tpc/malloc2.13/tcache.h
@@ -24,9 +24,12 @@ static inline int fls(int x)
  * arena.  On free we keep the freed object in hope of reusing it in
  * future allocations.
  */
-#define CACHE_SIZE		(1 << 16)
-#define MAX_CACHED_SIZE		(CACHE_SIZE >> 3)
-#define MAX_PREFETCH_SIZE	(CACHE_SIZE >> 6)
+#define CACHE_SIZE_BITS		(16)
+#define CACHE_SIZE		(1 << CACHE_SIZE_BITS)
+#define MAX_CACHED_SIZE_BITS	(CACHE_SIZE_BITS - 3)
+#define MAX_CACHED_SIZE		(1 << MAX_CACHED_SIZE_BITS)
+#define MAX_PREFETCH_SIZE_BITS	(CACHE_SIZE_BITS - 6)
+#define MAX_PREFETCH_SIZE	(1 << MAX_PREFETCH_SIZE_BITS)
 #define NO_PREFETCH		(1 << 3)
 
 /*
@@ -40,8 +43,8 @@ static inline int fls(int x)
  */
 #define ALIGN_BITS	(4)
 #define ALIGNMENT	(1 << ALIGN_BITS)
-#define ALIGN_DOWN(x)   ((x) & ~(ALIGNMENT - 1))
-#define ALIGN(x)	(((x) + (ALIGNMENT - 1)) & ~(ALIGNMENT - 1))
+#define ALIGN_DOWN(x, a) ((x) & ~((a) - 1))
+#define ALIGN(x, a)	(((x) + ((a) - 1)) & ~((a) - 1))
 #define SUBBIN_BITS	(4)
 #define SUBBINS		(1 << SUBBIN_BITS)
 
@@ -57,7 +60,27 @@ static int cache_bin(int size)
 	return SUBBINS * (power - SUBBIN_BITS - 1) + subbin;
 }
 
-#define CACHE_NO_BINS 97
+#define CACHE_NO_BINS	(SUBBINS * (MAX_CACHED_SIZE_BITS - ALIGN_BITS - SUBBIN_BITS + 1) + 1)
+
+#define CACHE_BITMAP_SIZE	(ALIGN(CACHE_NO_BINS, __WORDSIZE) / __WORDSIZE)
+
+#define BITMAP_WORD(i)	((i) / __WORDSIZE)
+#define BITMAP_BIT(i)	(1UL << ((i) & (__WORDSIZE - 1)))
+
+static inline void clear_bit(unsigned long *bitmap, int i)
+{
+	bitmap[BITMAP_WORD(i)] &= ~BITMAP_BIT(i);
+}
+
+static inline void set_bit(unsigned long *bitmap, int i)
+{
+	bitmap[BITMAP_WORD(i)] |= BITMAP_BIT(i);
+}
+
+static inline int get_bit(unsigned long *bitmap, int i)
+{
+	return !!(bitmap[BITMAP_WORD(i)] & BITMAP_BIT(i));
+}
 
 struct thread_cache {
 	/* Bytes in cache */
@@ -66,21 +89,32 @@ struct thread_cache {
 	/* Objects in cache */
 	uint32_t tc_count;
 
+	unsigned long accessed_map[CACHE_BITMAP_SIZE];
+
 	struct malloc_chunk *tc_bin[CACHE_NO_BINS];
 };
 
-/*
- * XXX Completely unoptimized - lots of low-hanging fruit
- */
-static void cache_gc(struct thread_cache *cache)
+static inline void set_accessed(struct thread_cache *cache, int bin)
+{
+	set_bit(cache->accessed_map, bin);
+}
+
+static inline int is_accessed(struct thread_cache *cache, int bin)
+{
+	return get_bit(cache->accessed_map, bin);
+}
+
+static void tcache_gc(struct thread_cache *cache)
 {
 	struct malloc_chunk *victim, *next;
 	struct malloc_state *arena;
-	int i;
+	int i, did_repeat = 0;
 
+repeat:
 	for (i = 0; i < CACHE_NO_BINS; i++) {
 		victim = cache->tc_bin[i];
-		if (!victim)
+		/* accessed bins get skipped - they are useful */
+		if (is_accessed(cache, i) || !victim)
 			continue;
 		cache->tc_bin[i] = NULL;
 		while (victim) {
@@ -88,14 +122,33 @@ static void cache_gc(struct thread_cache *cache)
 			cache->tc_size -= chunksize(victim);
 			cache->tc_count--;
 			arena = arena_for_chunk(victim);
+			/* TODO: use atomic bins instead */
 			mutex_lock(&arena->mutex);
 			_int_free(arena, victim);
 			mutex_unlock(&arena->mutex);
 			victim = next;
 		}
 	}
-	assert(cache->tc_size == 0);
-	assert(cache->tc_count == 0);
+	memset(cache->accessed_map, 0, sizeof(cache->accessed_map));
+
+	if (cache->tc_size > CACHE_SIZE) {
+		/*
+		 * Since we skip accessed bins we can run into
+		 * pathological cases where all bins are empty or
+		 * accessed and we made no progress.  In those cases
+		 * we retry after clearing the accessed bits, freeing
+		 * the entire cache.  Should be rare.
+		 */
+		did_repeat = 1;
+		goto repeat;
+	} else if (did_repeat) {
+		/*
+		 * Since we freed the entire cache, we can verify the
+		 * counters are consistent.
+		 */
+		assert(cache->tc_size == 0);
+		assert(cache->tc_count == 0);
+	}
 }
 
 static void *tcache_malloc(size_t size)
@@ -127,6 +180,7 @@ static void *tcache_malloc(size_t size)
 
 	bin_no = cache_bin(nb);
 	assert(bin_no < CACHE_NO_BINS);
+	set_accessed(cache, bin_no);
 
 	bin = &cache->tc_bin[bin_no];
 	victim = *bin;
@@ -150,11 +204,11 @@ static void *tcache_malloc(size_t size)
 	/*
 	 * GC the cache before prefetching, not after.  The last thing
 	 * we want is to spend effort prefetching, then release all
-	 * those objects via cache_gc.  Also do it before taking the
+	 * those objects via tcache_gc.  Also do it before taking the
 	 * lock, to minimize hold times.
 	 */
 	if (nb <= MAX_PREFETCH_SIZE && (cache->tc_size + nb * 8) > CACHE_SIZE )
-		cache_gc(cache);
+		tcache_gc(cache);
 
 	arena = arena_get(size);
 	if (!arena)
@@ -226,6 +280,6 @@ static int tcache_free(mchunkptr p)
 	p->fd = *bin;
 	*bin = p;
 	if (cache->tc_size > CACHE_SIZE)
-		cache_gc(cache);
+		tcache_gc(cache);
 	return 1;
 }
-- 
2.7.0.rc3


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]