This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] elf: Do not use memalign for TCB/TLS blocks allocation [BZ #17730]
- From: Carlos O'Donell <carlos at redhat dot com>
- To: Florian Weimer <fweimer at redhat dot com>, libc-alpha at sourceware dot org
- Date: Sun, 10 Jul 2016 23:12:36 -0400
- Subject: Re: [PATCH] elf: Do not use memalign for TCB/TLS blocks allocation [BZ #17730]
- Authentication-results: sourceware.org; auth=none
- References: <20160621113822.0A6F04016363B@oldenburg.str.redhat.com>
On 06/21/2016 07:38 AM, Florian Weimer wrote:
> Instead, call malloc and explicitly align the pointer.
>
> There is no external location to store the original (unaligned)
> pointer, and this commit increases the allocation size to store
> the pointer at a fixed location relative to the TCB pointer.
>
> The manual alignment means that some space goes unused which
> was previously made available for subsequent allocations.
> However, in the TLS_DTV_AT_TP case, the manual alignment code
> avoids aligning the pre-TCB to the TLS block alignment. (Even
> while using memalign, the allocation had some unused padding
> in front.)
>
> This concludes the removal of memalign calls from the TLS code,
> and the new tst-tls3-malloc test verifies that only core malloc
> routines are used.
This looks good to me.
Is the the test case 100% reliable? It seems to me that it should be,
but I haven't scanned all of the paths through the TLS startup.
> 2016-06-21 Florian Weimer <fweimer@redhat.com>
>
> [BZ #17730]
> Avoid using memalign for TCB allocations.
> * elf/dl-tls.c (tcb_to_pointer_to_free_location): New.
> (_dl_allocate_tls_storage): Use malloc and manual alignment.
> Avoid alignment gap in the TLS_DTV_AT_TP case.
> (_dl_deallocate_tls): Use tcb_to_pointer_to_free_location to
> determine the pointer to free.
> * nptl/tst-tls3-malloc.c: New test.
> * nptl/Makefile (tests): Add it.
> (tst-tls3-malloc): Link with libdl, libpthread.
> (LDFLAGS-tst-tls3-malloc): Set.
> (tst-tls3-malloc.out): Depend on DSO used in test.
>
> diff --git a/elf/dl-tls.c b/elf/dl-tls.c
> index be6a3c7..17567ad 100644
> --- a/elf/dl-tls.c
> +++ b/elf/dl-tls.c
> @@ -347,6 +347,22 @@ _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
> *alignp = GL(dl_tls_static_align);
> }
>
> +/* Derive the location of the pointer to the start of the original
> + allocation (before alignment) from the pointer to the TCB. */
> +static inline void **
> +tcb_to_pointer_to_free_location (void *tcb)
> +{
> +#if TLS_TCB_AT_TP
> + /* The TCB follows the TLS blocks, and the pointer to the front
> + follows the TCB. */
> + void **original_pointer_location = tcb + TLS_TCB_SIZE;
> +#elif TLS_DTV_AT_TP
> + /* The TCB comes first, preceded by the pre-TCB, and the pointer is
> + before that. */
> + void **original_pointer_location = tcb - TLS_PRE_TCB_SIZE - sizeof (void *);
> +#endif
> + return original_pointer_location;
> +}
OK.
>
> void *
> internal_function
> @@ -359,39 +375,50 @@ _dl_allocate_tls_storage (void)
> /* Memory layout is:
> [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ]
> ^ This should be returned. */
> - size += (TLS_PRE_TCB_SIZE + GL(dl_tls_static_align) - 1)
> - & ~(GL(dl_tls_static_align) - 1);
> + size += TLS_PRE_TCB_SIZE;
OK.
> #endif
>
> - /* Allocate a correctly aligned chunk of memory. */
> - result = __libc_memalign (GL(dl_tls_static_align), size);
> - if (__builtin_expect (result != NULL, 1))
> - {
> - /* Allocate the DTV. */
> - void *allocated = result;
> + /* Perform the allocation. Reserve space for the required alignment
> + and the pointer to the original allocation. */
> + size_t alignment = GL(dl_tls_static_align);
> + void *allocated = malloc (size + alignment + sizeof (void *));
> + if (__glibc_unlikely (allocated == NULL))
> + return NULL;
OK.
>
> + /* Perform alignment and allocate the DTV. */
> #if TLS_TCB_AT_TP
> - /* The TCB follows the TLS blocks. */
> - result = (char *) result + size - TLS_TCB_SIZE;
> + /* The TCB follows the TLS blocks, which determine the alignment.
> + (TCB alignment requirements have been taken into account when
> + calculating GL(dl_tls_static_align).) */
> + void *aligned = (void *) roundup ((uintptr_t) allocated, alignment);
> + result = aligned + size - TLS_TCB_SIZE;
>
> - /* Clear the TCB data structure. We can't ask the caller (i.e.
> - libpthread) to do it, because we will initialize the DTV et al. */
> - memset (result, '\0', TLS_TCB_SIZE);
> + /* Clear the TCB data structure. We can't ask the caller (i.e.
> + libpthread) to do it, because we will initialize the DTV et al. */
> + memset (result, '\0', TLS_TCB_SIZE);
> #elif TLS_DTV_AT_TP
> - result = (char *) result + size - GL(dl_tls_static_size);
> + /* Pre-TCB and TCB come before the TLS blocks. The layout computed
> + in _dl_determine_tlsoffset assumes that the TCB is aligned to the
> + TLS block alignment, and not just the TLS blocks after it. This
> + can leave an unused alignment gap between the TCB and the TLS
> + blocks. */
> + result = (void *) roundup
> + (sizeof (void *) + TLS_PRE_TCB_SIZE + (uintptr_t) allocated,
> + alignment);
OK.
>
> - /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before it.
> - We can't ask the caller (i.e. libpthread) to do it, because we will
> - initialize the DTV et al. */
> - memset ((char *) result - TLS_PRE_TCB_SIZE, '\0',
> - TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
> + /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before
> + it. We can't ask the caller (i.e. libpthread) to do it, because
> + we will initialize the DTV et al. */
> + memset (result - TLS_PRE_TCB_SIZE, '\0', TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
OK.
> #endif
>
> - result = allocate_dtv (result);
> - if (result == NULL)
> - free (allocated);
> - }
> + /* Record the value of the original pointer for later
> + deallocation. */
> + *tcb_to_pointer_to_free_location (result) = allocated;
OK.
>
> + result = allocate_dtv (result);
> + if (result == NULL)
> + free (allocated);
> return result;
> }
>
> @@ -558,17 +585,7 @@ _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
> free (dtv - 1);
>
> if (dealloc_tcb)
> - {
> -#if TLS_TCB_AT_TP
> - /* The TCB follows the TLS blocks. Back up to free the whole block. */
> - tcb -= GL(dl_tls_static_size) - TLS_TCB_SIZE;
> -#elif TLS_DTV_AT_TP
> - /* Back up the TLS_PRE_TCB_SIZE bytes. */
> - tcb -= (TLS_PRE_TCB_SIZE + GL(dl_tls_static_align) - 1)
> - & ~(GL(dl_tls_static_align) - 1);
> -#endif
> - free (tcb);
> - }
> + free (*tcb_to_pointer_to_free_location (tcb));
OK.
> }
> rtld_hidden_def (_dl_deallocate_tls)
>
> diff --git a/nptl/Makefile b/nptl/Makefile
> index e0bc1b7..5bbe8cf 100644
> --- a/nptl/Makefile
> +++ b/nptl/Makefile
> @@ -321,8 +321,8 @@ tests += tst-cancelx2 tst-cancelx3 tst-cancelx4 tst-cancelx5 \
> tst-cleanupx0 tst-cleanupx1 tst-cleanupx2 tst-cleanupx3 tst-cleanupx4 \
> tst-oncex3 tst-oncex4
> ifeq ($(build-shared),yes)
> -tests += tst-atfork2 tst-tls3 tst-tls4 tst-tls5 tst-_res1 tst-fini1 \
> - tst-stackguard1
> +tests += tst-atfork2 tst-tls3 tst-tls3-malloc tst-tls4 tst-tls5 tst-_res1 \
OK.
> + tst-fini1 tst-stackguard1
> tests-nolibpthread += tst-fini1
> ifeq ($(have-z-execstack),yes)
> tests += tst-execstack
> @@ -527,6 +527,10 @@ LDFLAGS-tst-tls3 = -rdynamic
> $(objpfx)tst-tls3.out: $(objpfx)tst-tls3mod.so
> $(objpfx)tst-tls3mod.so: $(shared-thread-library)
>
> +$(objpfx)tst-tls3-malloc: $(libdl) $(shared-thread-library)
> +LDFLAGS-tst-tls3-malloc = -rdynamic
> +$(objpfx)tst-tls3-malloc.out: $(objpfx)tst-tls3mod.so
> +
> $(objpfx)tst-tls4: $(libdl) $(shared-thread-library)
> $(objpfx)tst-tls4.out: $(objpfx)tst-tls4moda.so $(objpfx)tst-tls4modb.so
>
> diff --git a/nptl/tst-tls3-malloc.c b/nptl/tst-tls3-malloc.c
> new file mode 100644
> index 0000000..5eab3cd
> --- /dev/null
> +++ b/nptl/tst-tls3-malloc.c
> @@ -0,0 +1,176 @@
> +/* Test TLS allocation with an interposed malloc.
> + Copyright (C) 2016 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +/* Reuse the test. */
> +#include "tst-tls3.c"
> +
> +#include <sys/mman.h>
> +
> +/* Interpose a minimal malloc implementation. This implementation
> + deliberately interposes just a restricted set of symbols, to detect
> + if the TLS code bypasses the interposed malloc. */
> +
> +/* Lock to guard malloc internals. */
> +static pthread_mutex_t malloc_lock = PTHREAD_MUTEX_INITIALIZER;
> +
> +/* Information about an allocation chunk. */
> +struct malloc_chunk
> +{
> + /* Start of the allocation. */
> + void *start;
> + /* Size of the allocation. */
> + size_t size;
> +};
> +
> +enum { malloc_chunk_count = 1000 };
> +static struct malloc_chunk chunks[malloc_chunk_count];
> +
> +/* Lock the malloc lock. */
> +static void
> +xlock (void)
> +{
> + int ret = pthread_mutex_lock (&malloc_lock);
> + if (ret != 0)
> + {
> + errno = ret;
> + printf ("error: pthread_mutex_lock: %m\n");
> + _exit (1);
> + }
> +}
> +
> +/* Unlock the malloc lock. */
> +static void
> +xunlock (void)
> +{
> + int ret = pthread_mutex_unlock (&malloc_lock);
> + if (ret != 0)
> + {
> + errno = ret;
> + printf ("error: pthread_mutex_unlock: %m\n");
> + _exit (1);
> + }
> +}
> +
> +/* Internal malloc without locking and registration. */
> +static void *
> +malloc_internal (size_t size)
> +{
> + void *result = mmap (NULL, size, PROT_READ | PROT_WRITE,
> + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> + if (result == MAP_FAILED)
> + {
> + printf ("error: mmap: %m\n");
> + _exit (1);
> + }
> + return result;
> +}
> +
> +void *
> +malloc (size_t size)
> +{
> + if (size == 0)
> + size = 1;
> + xlock ();
> + void *result = malloc_internal (size);
> + for (int i = 0; i < malloc_chunk_count; ++i)
> + if (chunks[i].start == NULL)
> + {
> + chunks[i].start = result;
> + chunks[i].size = size;
> + xunlock ();
> + return result;
> + }
> + xunlock ();
> + printf ("error: no place to store chunk pointer\n");
> + _exit (1);
> +}
> +
> +void *
> +calloc (size_t a, size_t b)
> +{
> + if (b != 0 && a > SIZE_MAX / b)
> + return NULL;
> + /* malloc uses mmap, which provides zeroed memory. */
> + return malloc (a * b);
> +}
> +
> +static void
> +xunmap (void *ptr, size_t size)
> +{
> + int ret = munmap (ptr, size);
> + if (ret < 0)
> + {
> + printf ("error: munmap (%p, %zu) failed: %m\n", ptr, size);
> + _exit (1);
> + }
> +}
> +
> +void
> +free (void *ptr)
> +{
> + if (ptr == NULL)
> + return;
> +
> + xlock ();
> + for (int i = 0; i < malloc_chunk_count; ++i)
> + if (chunks[i].start == ptr)
> + {
> + xunmap (ptr, chunks[i].size);
> + chunks[i] = (struct malloc_chunk) {};
> + xunlock ();
> + return;
> + }
> + xunlock ();
> + printf ("error: tried to free non-allocated pointer %p\n", ptr);
> + _exit (1);
> +}
> +
> +void *
> +realloc (void *old, size_t size)
> +{
> + if (old != NULL)
> + {
> + xlock ();
> + for (int i = 0; i < malloc_chunk_count; ++i)
> + if (chunks[i].start == old)
> + {
> + size_t old_size = chunks[i].size;
> + void *result;
> + if (old_size < size)
> + {
> + result = malloc_internal (size);
> + /* Reuse the slot for the new allocation. */
> + memcpy (result, old, old_size);
> + xunmap (old, old_size);
> + chunks[i].start = result;
> + chunks[i].size = size;
> + }
> + else
> + /* Old size is not smaller, so reuse the old
> + allocation. */
> + result = old;
> + xunlock ();
> + return result;
> + }
> + xunlock ();
> + printf ("error: tried to realloc non-allocated pointer %p\n", old);
> + _exit (1);
> + }
> + else
> + return malloc (size);
> +}
>
OK.
Cheers,
Carlos.