This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: extend dl-minimal malloc implementation


Florian Weimer <fweimer@redhat.com> writes:
> It will allow us to use dynarrays and scratch buffers in the dynamic
> linker because we get a working realloc.

Yup, but you still won't be able to migrate those buffers across the
minimal/complete switchover once libc.so's malloc is loaded.

> You can use _Static_assert for that.

Done.

>> +static free_chunk free_list = { 0, 0, NULL };
>
> Can we turn this into a single pointer, to save .bss space?

Done, with suitable fixes throughout.

>> +static free_chunk *
>> +find_on_free_list (void *old_pointer, size_t n, int zero_it)
>
> zero_it should be bool (also in other places).

Fixed.

>> +      /* Shrinking - just reuse the old chunk.  */
>> +      if (old_chunk->size >= n)
>> +	return old_pointer;
>
> Can we put the tail on the free list?

We *can* but unless there's a compelling need to, remember that the
design goals here are (1) keep the code simple and easy to review, and
(2) see #1.

What we *don't* want is for the minimal malloc to grow in complexity
until it rivals the full malloc ;-)

>> +  /* If we're realloc'ing the last block, we either extend it (by
>> +     re-allocating it over the old block) or we copy it (because a new
>> +     noncontiguous mapping was needed).  */
>
> What does “last” mean here?

Reworded.  It's the highest addressed block in the current mmap'd area.

>> +  /* else if it's really big we can't store it on the free list.  */
>> +  if (ch->size == TOOBIG)
>> +    return;
>
> I think we should just fail TOOBIG allocations.

Carlos and I talked briefly about this a while back.  The idea here is
that in the RARE case that we need to allocate more than 2G at a time,
at least my patch won't break that.  And hopefully you won't be free'ing
or realloc'ing that memory.

Alternately (i.e. in the future, if it matters ;) we can divert all
TOOBIG allocations to mmap/munmap directly.

> Would it make sense to keep the free list order by increasing addresses
> and attempt to coalesce neighboring chunks?  The free list should be
> really short, and this should help us to reduce fragmentation.

Only if we care about fragmentation that much, and see it happen during
loading.  Likewise, we don't try to unmap anything.  We assume a small
amount of memory might be wasted by un-reused free'd blocks when we
switch to the full malloc, but I don't see any way to guarantee that
amount is zero anyway.

> Another thing that might be worth considering is to keep the free list
> pointer in a parameter.

It wouldn't match the full malloc API at that point, though.  We would
need to be careful to stop calling those functions before libc.so is
linked in.

We'd still need to async-protect the top-of-heap data, I don't think
we'd be buying much.  If we need to, we can make this malloc async-safe
throughout, but that still only "solves" the problem before libc.so is
linked in.


>From 3028f66942b68723a2151ca013d4f9d646fbd697 Mon Sep 17 00:00:00 2001
From: DJ Delorie <dj@delorie.com>
Date: Tue, 20 Jun 2017 12:56:30 -0400
Subject: Extend dl-minimal's malloc() implementation

* elf/dl-minimal.c: Upgrade minimal malloc to allow for reusing
free'd memory.  Migrate malloc/calloc/realloc to morecore(),
add free list, optimize calloc zeroing.
* elf/tst-dl-minimal-malloc.c: New.
* elf/Makefile: Run it.

diff --git a/ChangeLog b/ChangeLog
index ebab4ce..31eb8f2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2017-06-20  DJ Delorie  <dj@delorie.com>
+
+	* elf/dl-minimal.c: Upgrade minimal malloc to allow for reusing
+	free'd memory.  Migrate malloc/calloc/realloc to morecore(),
+	add free list, optimize calloc zeroing.
+	* elf/tst-dl-minimal-malloc.c: New.
+	* elf/Makefile: Run it.
+
 2017-06-20  Wilco Dijkstra  <wdijkstr@arm.com>
 
 	* benchtests/powf-inputs: Add reduced trace from wrf.
diff --git a/elf/Makefile b/elf/Makefile
index 201b328..a926449 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -293,6 +293,7 @@ tests += vismain
 tests-pie += vismain
 CFLAGS-vismain.c = $(PIE-ccflag)
 endif
+tests += tst-dl-minimal-malloc
 modules-execstack-yes = tst-execstack-mod
 extra-test-objs += $(addsuffix .os,$(strip $(modules-names)))
 
diff --git a/elf/dl-minimal.c b/elf/dl-minimal.c
index 59e159a..c9b29a3 100644
--- a/elf/dl-minimal.c
+++ b/elf/dl-minimal.c
@@ -16,6 +16,7 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#ifndef TESTING_MALLOC
 #include <errno.h>
 #include <limits.h>
 #include <stdio.h>
@@ -28,6 +29,7 @@
 #include <ldsodefs.h>
 #include <_itoa.h>
 #include <malloc/malloc-internal.h>
+#endif
 
 #include <assert.h>
 
@@ -43,10 +45,190 @@ extern void weak_function free (void *ptr);
 extern void * weak_function realloc (void *ptr, size_t n);
 
 
-/* Allocate an aligned memory block.  */
-void * weak_function
-malloc (size_t n)
+/* For each chunk of memory we work with in this minimal malloc, we
+   store one 32-bit "size" field before it, and reuse the first word
+   of the user-space memory to store a linked list of free chunks when
+   the chunk is freed.  This is similar in concept to the regular
+   malloc's design.
+ */
+typedef struct free_chunk {
+  /* This is just to avoid "packed" */
+  uint32_t filler;
+  /* This is the only variable that takes up space between user chunks.  */
+  uint32_t size;
+  /* The address of "next" is what we return to the user.  */
+  struct free_chunk *next;
+} free_chunk;
+
+/* Convert between user and chunk pointers.  */
+#define ptr2chunk(ptr) (free_chunk *)((char *)ptr - sizeof (uint32_t)*2)
+#define chunk2ptr(ch) (void *)(& ch->next)
+/* Extra bytes we need between chunks.  */
+#define OVERHEAD sizeof(uint32_t)
+#define TOOBIG (uint32_t)(~0L)
+
+/* Debugging - make sure the free_chunk struct doesn't have a 32-bit
+   hole between size and next.  */
+_Static_assert (sizeof(free_chunk) == sizeof(uint32_t) * 2 + sizeof(void *),
+		"free_chunk has holes between fields");
+
+/* We have a singly linked list of free'd chunks.  Newly freed chunks
+   are added at the head end, and chunks are removed from
+   anywhere.  */
+static free_chunk *free_list = NULL;
+
+#ifndef DJTEST
+#define DJTEST 0
+#endif
+
+#if DJTEST
+
+/* Strictly for debugging :-)  */
+static char *
+djhex (char *buf, int64_t v)
+{
+  const char hex[] = "0123456789abcdef";
+  int i;
+  for (i=0; i<16; i++)
+    buf[i] = hex[(v>>(4*(15-i))) & 15];
+  buf[16] = ' ';
+  return buf+17;
+}
+static void
+djprintf (const char *msg, size_t s, void *ptr)
+{
+  char buf[1000], *bp = buf;
+  bp = djhex (bp, (int64_t)s);
+  bp = djhex (bp, (int64_t)ptr);
+  write(2, buf, bp-buf);
+  write (2, msg, strlen(msg));
+}
+static void
+djchunk (const char *msg, free_chunk *ch)
+{
+  char buf[1000], *bp = buf;
+  write (2, "\033[30m", 5);
+  bp = djhex (bp, (int64_t)ch);
+  if (ch) {
+    bp = djhex (bp, (int64_t)ch->size);
+    bp = djhex (bp, (int64_t)ch->next);
+  }
+  write(2, buf, bp-buf);
+  write (2, msg, strlen(msg));
+  write (2, "\033[0m", 4);
+}
+static void
+djnl(void) {
+  write (2, "\n", 1);
+}
+
+#else
+
+#define djprintf(a,b,c)
+#define djchunk(a,b)
+#define djnl()
+
+#endif
+
+static free_chunk *
+find_on_free_list (void *old_pointer, size_t n, bool zero_it)
 {
+  free_chunk *old_chunk, *rv, **rvp, *best, **bestp;
+  size_t best_size;
+
+  rvp = &free_list;
+  best = NULL;
+  best_size = 0;
+
+  djchunk("free_list\n", free_list);
+  for (rv = free_list; rv; rv = rv->next) {
+    djchunk("free list\n", rv);
+    if (rv->size >= n
+	&& (rv->size < best_size
+	    || best == NULL))
+      {
+	best = rv;
+	best_size = rv->size;
+	/* bestp points to the previous chunk->next, or &free_list */
+	bestp = rvp;
+	if (rv->size == n)
+	  /* It doesn't get any bester than this.  */
+	  break;
+      }
+    rvp = &(rv->next);
+  }
+
+  if (best == NULL)
+    return NULL;
+
+  *bestp = best->next;
+  djprintf("malloc - reused\n", n, best);
+  djchunk ("bestp\n", *bestp);
+  djchunk ("best\n", best);
+
+  best->next = NULL;
+
+  if (zero_it)
+    memset (chunk2ptr (best), '\0', n);
+
+  if (old_pointer)
+    {
+      old_chunk = ptr2chunk (old_pointer);
+      memcpy (chunk2ptr (best), old_pointer, old_chunk->size < n ? old_chunk->size : n);
+    }
+
+  return best;
+}
+
+/* Combination malloc/calloc/realloc - allocates memory, possibly
+   extending an existing chunk (realloc), possibly zeroing it if
+   needed (calloc).  You shouldn't ask for both realloc and calloc at
+   the same time, of course.  */
+static void *
+morecore(void *old_pointer, size_t n, bool zero_it)
+{
+  free_chunk *rv;
+
+  djnl();
+  djprintf("morecore\n", zero_it ? -n : n, old_pointer);
+
+  /* We round up the requested size so that we have room for the
+     "size" of the next chunk after this one is properly aligned.
+     This means the smallest chunk is 12 bytes on 64-bit
+     platforms.  */
+  if (n < sizeof(void *))
+    n = sizeof(void *);
+  n = ALIGN_UP (n + OVERHEAD, MALLOC_ALIGNMENT) - OVERHEAD;
+
+  /* Special cases for realloc.  */
+  if (old_pointer)
+    {
+      free_chunk *old_chunk = ptr2chunk (old_pointer);
+
+      /* Shrinking - just reuse the old chunk.  */
+      if (old_chunk->size >= n)
+	return old_pointer;
+
+      /* See if we can realloc using existing space at the end of our
+	 current mapping.  */
+      if (alloc_end != 0
+	  && old_pointer == alloc_last_block
+	  && (char *)alloc_end - (char *)old_pointer < n)
+	{
+	  /* We have space at the end of the current page, use it.  */
+	  old_chunk->size = n;
+	  alloc_ptr = alloc_last_block + n;
+	  return old_pointer;
+	}
+    }
+
+  /* Do a best-fit search of the free list first... */
+  rv = find_on_free_list (old_pointer, n, zero_it);
+  if (rv)
+    return chunk2ptr(rv);
+
+  /* Nothing on the free list, allocate a new chunk.  */
+
   if (alloc_end == 0)
     {
       /* Consume any unused space in the last page of our data segment.  */
@@ -57,9 +239,16 @@ malloc (size_t n)
 				& ~(GLRO(dl_pagesize) - 1));
     }
 
-  /* Make sure the allocation pointer is ideally aligned.  */
-  alloc_ptr = (void *) 0 + (((alloc_ptr - (void *) 0) + MALLOC_ALIGNMENT - 1)
-			    & ~(MALLOC_ALIGNMENT - 1));
+  /* If we're realloc'ing the highest addressed block in our heap, we
+     either extend it (by re-allocating it over the old block) or we
+     copy it (because a new noncontiguous mapping was needed).  */
+  if (old_pointer && old_pointer == alloc_last_block)
+    /* Note we do NOT pad/align here, because we're using the
+       padding/alignment from back when we first allocated it.  */
+    alloc_ptr = alloc_last_block;
+  else
+    /* Make sure the allocation pointer is ideally aligned.  */
+    alloc_ptr = (void *) PTR_ALIGN_UP ((char *) alloc_ptr + OVERHEAD, MALLOC_ALIGNMENT);
 
   if (alloc_ptr + n >= alloc_end || n >= -(uintptr_t) alloc_ptr)
     {
@@ -75,13 +264,46 @@ malloc (size_t n)
       if (page == MAP_FAILED)
 	return NULL;
       if (page != alloc_end)
-	alloc_ptr = page;
+	{
+	  alloc_ptr = page;
+	  /* Make sure the allocation pointer is ideally aligned.  */
+	  alloc_ptr = (void *) PTR_ALIGN_UP ((char *) alloc_ptr + OVERHEAD, MALLOC_ALIGNMENT);
+	}
       alloc_end = page + nup;
     }
 
+  if (old_pointer && alloc_ptr != old_pointer)
+    {
+      free_chunk *old_chunk = ptr2chunk (old_pointer);
+      /* We need to copy the data, since we didn't just extend our mapping.  */
+      memcpy (alloc_ptr, old_pointer, old_chunk->size < n ? old_chunk->size : n);
+    }
+  /* The else case here is that we extended our mapping contiguously,
+     and/or reused existing space, so old_pointer == alloc_ptr (or
+     it's not set) and we don't need to do the copy.  */
+
+  /* Remember the chunk size here.  */
+  rv = ptr2chunk (alloc_ptr);
+  if (n < UINT32_MAX)
+    rv->size = n;
+  else
+    rv->size = TOOBIG;
+
   alloc_last_block = (void *) alloc_ptr;
   alloc_ptr += n;
-  return alloc_last_block;
+  djprintf("\033[32mmalloc\033[0m\n", n, alloc_last_block);
+  djchunk ("returning\n", rv);
+
+  /* We don't need to zero this, even if desired, since it's never
+     been used since we mmap'd it.  */
+  return chunk2ptr (rv);
+}
+
+/* Allocate an aligned memory block.  */
+void * weak_function
+malloc (size_t n)
+{
+  return morecore (NULL, n, 0);
 }
 
 /* We use this function occasionally since the real implementation may
@@ -90,9 +312,6 @@ malloc (size_t n)
 void * weak_function
 calloc (size_t nmemb, size_t size)
 {
-  /* New memory from the trivial malloc above is always already cleared.
-     (We make sure that's true in the rare occasion it might not be,
-     by clearing memory in free, below.)  */
   size_t bytes = nmemb * size;
 
 #define HALF_SIZE_T (((size_t) 1) << (8 * sizeof (size_t) / 2))
@@ -100,36 +319,50 @@ calloc (size_t nmemb, size_t size)
       && size != 0 && bytes / size != nmemb)
     return NULL;
 
-  return malloc (bytes);
+  return morecore (NULL, bytes, 1);
+}
+
+/* This is only called with the most recent block returned by malloc.  */
+void * weak_function
+realloc (void *ptr, size_t n)
+{
+  return morecore (ptr, n, 0);
 }
 
 /* This will rarely be called.  */
 void weak_function
 free (void *ptr)
 {
-  /* We can free only the last block allocated.  */
+  free_chunk *ch = ptr2chunk(ptr);
+  djnl();
+  djprintf("\033[31mfree\033[0m\n", ch->size, ptr);
+
+  if (ptr == NULL)
+    return;
+
+  /* paranoia, and debugging - we want a core dump, not a text message.  For testing. */
+  if (ch->size == 0)
+    *(int *)(-1) = 0;
+
+  /* We can easily free the last block in the heap.  */
   if (ptr == alloc_last_block)
     {
-      /* Since this is rare, we clear the freed block here
-	 so that calloc can presume malloc returns cleared memory.  */
-      memset (alloc_last_block, '\0', alloc_ptr - alloc_last_block);
-      alloc_ptr = alloc_last_block;
+      /* Undo the alignment and padding we do in morecore().  */
+      alloc_ptr = (void *) ((char *)alloc_last_block - OVERHEAD);
+      return;
     }
-}
 
-/* This is only called with the most recent block returned by malloc.  */
-void * weak_function
-realloc (void *ptr, size_t n)
-{
-  if (ptr == NULL)
-    return malloc (n);
-  assert (ptr == alloc_last_block);
-  size_t old_size = alloc_ptr - alloc_last_block;
-  alloc_ptr = alloc_last_block;
-  void *new = malloc (n);
-  return new != ptr ? memcpy (new, ptr, old_size) : new;
+  /* else if it's really big we can't store it on the free list.  */
+  if (ch->size == TOOBIG)
+    return;
+
+  /* else store the chunk on the free list.  */
+  ch->next = free_list;
+  free_list = ch;
+  djchunk("free_list\n", free_list);
 }

+#ifndef TESTING_MALLOC
 /* Avoid signal frobnication in setjmp/longjmp.  Keeps things smaller.  */
 
 #include <setjmp.h>
@@ -294,3 +527,5 @@ __strsep (char **stringp, const char *delim)
 }
 weak_alias (__strsep, strsep)
 strong_alias (__strsep, __strsep_g)
+
+#endif /* TESTING_MALLOC */
diff --git a/elf/tst-dl-minimal-malloc.c b/elf/tst-dl-minimal-malloc.c
new file mode 100644
index 0000000..a0b3e26
--- /dev/null
+++ b/elf/tst-dl-minimal-malloc.c
@@ -0,0 +1,163 @@
+/* Minimal testsuite for dl-minimal's malloc/free implementation
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <libc-lock.h>
+#include <stdarg.h>
+#include <libc-pointer-arith.h>
+
+/*----------------------------------------*/
+
+/* All this hackery is just because we're trying to use dl-minimal.os
+   directly to ensure we get the right copy of malloc to test, but
+   dl-minimal.os is designed to be used in ld.so, not other
+   programs.  */
+
+#define internal_function
+#define attribute_hidden
+#define attribute_relro
+#define weak_function
+#define __rtld_lock_define_recursive(a,b)
+#define rtld_hidden_proto(x)
+#define libc_hidden_proto(x)
+#define weak_extern(x)
+#define __libc_enable_secure 1
+
+#define __mmap mmap
+#define MALLOC_ALIGNMENT 16
+#define GLRO(x) x
+
+int dl_pagesize;
+
+/*----------------------------------------*/
+
+#define TESTING_MALLOC 1
+#include "./dl-minimal.c"
+
+/*----------------------------------------*/
+
+#define SZ(p) *(uint32_t *)((p)-4)
+
+static void
+fill(unsigned char *ptr, int count)
+{
+  memset (ptr, '5', count);
+}
+
+static int
+check_calloc (int sz)
+{
+  int i;
+  char *ptr = calloc (sz, 1);
+  for (i=0; i<sz; i++)
+    if (ptr[i] != '\0')
+      {
+	printf("calloc(%d) had nonzero at %d\n", sz, i);
+	return 1;
+      }
+  return 0;
+}
+
+
+static int
+do_test(void)
+{
+  unsigned char *p, *p2, *p3;
+  size_t rest;
+  int rv = 0;
+
+  dl_pagesize = getpagesize();
+
+  p = malloc(4000);
+  if (SZ(p) != 4012)
+    {
+      printf("size of malloc(4000) not 4012\n");
+      return 1;
+    }
+
+  /* Force the next one after this to a new page.  */
+  rest = 4095 - (((size_t) p) & 4095);
+  p2 = realloc (p, rest);
+
+  p = malloc(12);
+  p2 = realloc(p, 12);
+  if (p != p2)
+    {
+      printf("realloc in place: pointer changed\n");
+      return 1;
+    }
+  p2 = realloc(p, 24);
+  if (p != p2)
+    {
+      printf("realloc in place: pointer changed\n");
+      return 1;
+    }
+
+  /* Block more reallocs-in-place.  */
+  p2 = malloc(1);
+
+  p2 = realloc(p, 40);
+  if (p2 == p)
+    {
+      printf("realloc copy: pointer not moved\n");
+      return 1;
+    }
+
+  free (p2);
+  p = malloc(40);
+  if (p != p2)
+    {
+      printf("free list: free'd chunk not reused\n");
+      return 1;
+    }
+
+  p = malloc(30);
+  p2 = malloc(50);
+  p3 = malloc(70);
+
+  fill(p, 30);
+  fill(p2, 50);
+  fill(p3, 70);
+
+  free (p);
+  free (p2);
+  free (p3);
+
+  p = malloc (45);
+  if (p != p2)
+    {
+      printf("free list: best fit not used\n");
+      return 1;
+    }
+
+  rv += check_calloc (30);
+  rv += check_calloc (50);
+  rv += check_calloc (70);
+  rv += check_calloc (90);
+
+  return rv;
+}
+
+#include <support/test-driver.c>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]