This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] Remove atomic operations from malloc.c
- From: Will Newton <will dot newton at linaro dot org>
- To: Leonhard Holz <leonhard dot holz at web dot de>
- Cc: libc-alpha <libc-alpha at sourceware dot org>
- Date: Wed, 11 Feb 2015 08:37:22 +0000
- Subject: Re: [PATCH] Remove atomic operations from malloc.c
- Authentication-results: sourceware.org; auth=none
- References: <54DB130F dot 9070300 at web dot de>
On 11 February 2015 at 08:30, Leonhard Holz <leonhard.holz@web.de> wrote:
> The fastbin path of malloc/free features atomic operations, which were
> probably an attempt for lock-free code. But there is an ABA problem in
> malloc's
>
> while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
> != victim);
>
> because even if *fd == victim, victim->fd might have changed in the
> meantime. There is no easy way to fix this, an old comment mentions the need
> for a CAS2 operation with tagged pointers. Consequently malloc is right now
> wrapped by a lock to avoid problems.
>
> Anyhow, while the atomic ops avoid a lock in free they are costly too.
> Especially at high contention the compare_and_exchange may fail multiple
> times (see
> http://blog.memsql.com/common-pitfalls-in-writing-lock-free-algorithms/ for
> a discussion). So I measured how removing atomics and installing a lock
> around free affects performance and it turns out (at least in my case with 2
> cores), that it has no effect:
>
> Current implementation
> threads time per iteration
> 1 116.709
> 8 705.080
> 16 1394.74
> 32 2923.03
>
> Without atomics
> threads time per iteration
> 1 112.541
> 8 715.897
> 16 1403.67
> 32 2881.30
Did you try running the malloc benchtest?
> So I propose to remove the atomic ops because it simplifies the code and
> reactivates a security check (size of top fastbin chunk in free).
>
>
> Remove atomic operations from mallocs fastbin path.
> * malloc/malloc.c (_int_malloc): Remove compare_and_exchange.
> (_int_free): Remove compare_and_exchange and lock arena if needed.
> (malloc_consolidate): Remove atomic_exchange.
>
>
> diff --git a/malloc/malloc.c b/malloc/malloc.c
> index aa7edbf..f43c14c 100644
> --- a/malloc/malloc.c
> +++ b/malloc/malloc.c
> @@ -219,7 +219,6 @@
> #include <malloc-machine.h>
> #include <malloc-sysdep.h>
>
> -#include <atomic.h>
> #include <_itoa.h>
> #include <bits/wordsize.h>
> #include <sys/sysinfo.h>
> @@ -3332,15 +3331,7 @@ _int_malloc (mstate av, size_t bytes)
> {
> idx = fastbin_index (nb);
> mfastbinptr *fb = &fastbin (av, idx);
> - mchunkptr pp = *fb;
> - do
> - {
> - victim = pp;
> - if (victim == NULL)
> - break;
> - }
> - while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd,
> victim))
> - != victim);
> + victim = *fb;
> if (victim != 0)
> {
> if (__builtin_expect (fastbin_index (chunksize (victim)) != idx,
> 0))
> @@ -3350,6 +3341,7 @@ _int_malloc (mstate av, size_t bytes)
> malloc_printerr (check_action, errstr, chunk2mem (victim));
> return NULL;
> }
> + *fb = victim->fd;
> check_remalloced_chunk (av, victim, nb);
> void *p = chunk2mem (victim);
> alloc_perturb (p, bytes);
> @@ -3857,29 +3849,18 @@ _int_free (mstate av, mchunkptr p, int have_lock)
> #endif
> ) {
>
> + if (!have_lock)
> + {
> + (void) mutex_lock (&av->mutex);
> + locked = 1;
> + }
> +
> if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ,
> 0)
> || __builtin_expect (chunksize (chunk_at_offset (p, size))
> >= av->system_mem, 0))
> {
> - /* We might not have a lock at this point and concurrent
> modifications
> - of system_mem might have let to a false positive. Redo the test
> - after getting the lock. */
> - if (have_lock
> - || ({ assert (locked == 0);
> - mutex_lock(&av->mutex);
> - locked = 1;
> - chunk_at_offset (p, size)->size <= 2 * SIZE_SZ
> - || chunksize (chunk_at_offset (p, size)) >=
> av->system_mem;
> - }))
> - {
> - errstr = "free(): invalid next size (fast)";
> - goto errout;
> - }
> - if (! have_lock)
> - {
> - (void)mutex_unlock(&av->mutex);
> - locked = 0;
> - }
> + errstr = "free(): invalid next size (fast)";
> + goto errout;
> }
>
> free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
> @@ -3887,35 +3868,34 @@ _int_free (mstate av, mchunkptr p, int have_lock)
> set_fastchunks(av);
> unsigned int idx = fastbin_index(size);
> fb = &fastbin (av, idx);
> + mchunkptr old = *fb;
>
> - /* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
> - mchunkptr old = *fb, old2;
> - unsigned int old_idx = ~0u;
> - do
> + /* Check that the top of the bin is not the record we are going to add
> + (i.e., double free). */
> + if (__builtin_expect (old == p, 0))
> {
> - /* Check that the top of the bin is not the record we are going to
> add
> - (i.e., double free). */
> - if (__builtin_expect (old == p, 0))
> - {
> - errstr = "double free or corruption (fasttop)";
> - goto errout;
> - }
> - /* Check that size of fastbin chunk at the top is the same as
> - size of the chunk that we are adding. We can dereference OLD
> - only if we have the lock, otherwise it might have already been
> - deallocated. See use of OLD_IDX below for the actual check. */
> - if (have_lock && old != NULL)
> - old_idx = fastbin_index(chunksize(old));
> - p->fd = old2 = old;
> + errstr = "double free or corruption (fasttop)";
> + goto errout;
> }
> - while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) !=
> old2);
>
> - if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
> + /* Check that size of fastbin chunk at the top is the same as
> + size of the chunk that we are adding. */
> + if (old != NULL && __builtin_expect (fastbin_index(chunksize(old)) !=
> idx, 0))
> {
> errstr = "invalid fastbin entry (free)";
> goto errout;
> }
> - }
> +
> + /* Link P to its fastbin: P->FD = *FB; *FB = P; */
> + p->fd = old;
> + *fb = p;
> +
> + if (!have_lock)
> + {
> + (void) mutex_unlock(&av->mutex);
> + locked = 0;
> + }
> + }
>
> /*
> Consolidate other non-mmapped chunks as they arrive.
> @@ -4121,7 +4101,8 @@ static void malloc_consolidate(mstate av)
> maxfb = &fastbin (av, NFASTBINS - 1);
> fb = &fastbin (av, 0);
> do {
> - p = atomic_exchange_acq (fb, 0);
> + p = *fb;
> + *fb = NULL;
> if (p != 0) {
> do {
> check_inuse_chunk(av, p);
--
Will Newton
Toolchain Working Group, Linaro