This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] malloc: prefetch for tcache_malloc
- From: Joern Engel <joern at purestorage dot com>
- To: "GNU C. Library" <libc-alpha at sourceware dot org>
- Cc: Siddhesh Poyarekar <siddhesh dot poyarekar at gmail dot com>, Joern Engel <joern at purestorage dot org>
- Date: Mon, 25 Jan 2016 16:25:08 -0800
- Subject: [PATCH] malloc: prefetch for tcache_malloc
- Authentication-results: sourceware.org; auth=none
- References: <1453767942-19369-1-git-send-email-joern at purestorage dot com>
From: Joern Engel <joern@purestorage.org>
We also cycle through the entire bin if the first object doesn't fit.
Can be somewhat expensive, but we either find a match or will prefetch
and find a match the next few times.
JIRA: PURE-27597
---
tpc/malloc2.13/malloc.c | 5 +++-
tpc/malloc2.13/tcache.h | 68 +++++++++++++++++++++++++++++++++++++++++--------
2 files changed, 62 insertions(+), 11 deletions(-)
diff --git a/tpc/malloc2.13/malloc.c b/tpc/malloc2.13/malloc.c
index 40f6aa578c6f..1ee563bb299e 100644
--- a/tpc/malloc2.13/malloc.c
+++ b/tpc/malloc2.13/malloc.c
@@ -3267,7 +3267,7 @@ Void_t *public_mALLOc(size_t bytes)
victim = tcache_malloc(bytes);
if (victim)
- return victim;
+ goto out;
ar_ptr = arena_get(bytes);
if (!ar_ptr)
@@ -3279,6 +3279,9 @@ Void_t *public_mALLOc(size_t bytes)
}
arena_unlock(ar_ptr);
assert(!victim || chunk_is_mmapped(mem2chunk(victim)) || ar_ptr == arena_for_chunk(mem2chunk(victim)));
+out:
+ if (perturb_byte)
+ alloc_perturb(victim, bytes);
return victim;
}
diff --git a/tpc/malloc2.13/tcache.h b/tpc/malloc2.13/tcache.h
index 62d58cc77475..7cf6b316456f 100644
--- a/tpc/malloc2.13/tcache.h
+++ b/tpc/malloc2.13/tcache.h
@@ -27,7 +27,7 @@ static inline int fls(int x)
#define CACHE_SIZE (1 << 16)
#define MAX_CACHED_SIZE (CACHE_SIZE >> 3)
#define MAX_PREFETCH_SIZE (CACHE_SIZE >> 6)
-#define NO_PREFECT (1 << 3)
+#define NO_PREFETCH (1 << 3)
/*
* Binning is done as a subdivided buddy allocator. A buddy allocator
@@ -102,9 +102,9 @@ static void *tcache_malloc(size_t size)
{
struct thread_cache *cache;
struct malloc_state *arena;
- struct malloc_chunk **bin, *victim;
+ struct malloc_chunk **bin, *victim, *prefetch;
size_t nb;
- int bin_no;
+ int bin_no, i;
checked_request2size(size, nb);
if (nb > MAX_CACHED_SIZE)
@@ -114,6 +114,10 @@ static void *tcache_malloc(size_t size)
if (!cache) {
arena = arena_get(sizeof(*cache));
cache = _int_malloc(arena, sizeof(*cache));
+ if (!cache) {
+ arena = get_backup_arena(arena, sizeof(*cache));
+ cache = _int_malloc(arena, sizeof(*cache));
+ }
arena_unlock(arena);
if (!cache)
return NULL;
@@ -126,23 +130,67 @@ static void *tcache_malloc(size_t size)
bin = &cache->tc_bin[bin_no];
victim = *bin;
- if (victim) {
- if (chunksize(victim) < nb)
- return NULL;
+ while (victim) {
+ if (chunksize(victim) < nb) {
+ bin = &victim->fd;
+ victim = *bin;
+ continue;
+ }
if (cache_bin(chunksize(*bin)) != bin_no) {
malloc_printerr(check_action, "invalid tcache entry", victim);
return NULL;
}
*bin = victim->fd;
void *p = chunk2mem(victim);
- if (perturb_byte)
- alloc_perturb(p, size);
cache->tc_size -= chunksize(victim);
cache->tc_count--;
return p;
}
- /* TODO: prefetch objects */
- return NULL;
+
+ /*
+ * GC the cache before prefetching, not after. The last thing
+ * we want is to spend effort prefetching, then release all
+ * those objects via cache_gc. Also do it before taking the
+ * lock, to minimize hold times.
+ */
+ if (nb <= MAX_PREFETCH_SIZE && (cache->tc_size + nb * 8) > CACHE_SIZE )
+ cache_gc(cache);
+
+ arena = arena_get(size);
+ if (!arena)
+ return NULL;
+ /* TODO: _int_malloc does checked_request2size() again, which is silly */
+ victim = _int_malloc(arena, size);
+ if (!victim) {
+ arena = get_backup_arena(arena, size);
+ victim = _int_malloc(arena, size);
+ }
+ if (victim && nb <= MAX_PREFETCH_SIZE) {
+ /* Prefetch some more while we hold the lock */
+ for (i = 0; i < NO_PREFETCH; i++) {
+ prefetch = _int_malloc(arena, size);
+ if (!prefetch)
+ break;
+ prefetch = mem2chunk(prefetch);
+ if (cache_bin(chunksize(prefetch)) > bin_no) {
+ /*
+ * If _int_malloc() returns bigger chunks,
+ * we assume that prefetching won't buy us
+ * any benefits.
+ */
+ _int_free(arena, prefetch);
+ break;
+ }
+ assert(cache_bin(chunksize(prefetch)) == bin_no);
+ cache->tc_size += chunksize(prefetch);
+ cache->tc_count++;
+ prefetch->fd = *bin;
+ *bin = prefetch;
+ }
+ }
+ arena_unlock(arena);
+ assert(!victim || arena == arena_for_chunk(mem2chunk(victim)));
+ return victim;
}
/*
--
2.7.0.rc3