This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: PowerPC malloc alignment
- From: Daniel Jacobowitz <drow at false dot org>
- To: libc-alpha at sourceware dot org
- Cc: Roland McGrath <roland at redhat dot com>, Jakub Jelinek <jakub at redhat dot com>, Steven Munroe <sjmunroe at us dot ibm dot com>
- Date: Fri, 30 Nov 2007 12:13:36 -0500
- Subject: Re: PowerPC malloc alignment
- References: <20071031205240.GA30110@caradoc.them.org> <20071031212316.GU2896@sunsite.mff.cuni.cz> <20071031213648.769884D04AE@magilla.localdomain> <20071102194011.GA5520@caradoc.them.org> <20071105180927.GA20539@caradoc.them.org>
On Mon, Nov 05, 2007 at 01:09:27PM -0500, Daniel Jacobowitz wrote:
> Here is a patch which does neither of those extras. Both extras are
> for the sole benefit of existing emacs binaries; probably one of them
> should accompany this patch, but at least this is a starting point.
Here's a revised version. Corey Minyard at MontaVista discovered that
there was another place which assumed MALLOC_ALIGNMENT == 2 * SIZE_SZ;
it should be treated just like the others.
--
Daniel Jacobowitz
CodeSourcery
2007-11-30 Daniel Jacobowitz <dan@codesourcery.com>
* malloc/malloc.c (MALLOC_ALIGNMENT): Use __alignof__ (long double).
(SMALLBIN_CORRECTION): New.
(MIN_LARGE_SIZE, smallbin_index): Use it to handle 16-byte alignment.
(largebin_index_32_big): New.
(largebin_index): Use it for 16-byte alignment.
(sYSMALLOc): Handle MALLOC_ALIGNMENT > 2 * SIZE_SZ.
Index: malloc/malloc.c
===================================================================
RCS file: /cvs/glibc/libc/malloc/malloc.c,v
retrieving revision 1.181
diff -u -p -r1.181 malloc.c
--- malloc/malloc.c 2 Oct 2007 03:52:03 -0000 1.181
+++ malloc/malloc.c 30 Nov 2007 17:10:43 -0000
@@ -378,16 +378,8 @@ extern "C" {
#ifndef MALLOC_ALIGNMENT
-/* XXX This is the correct definition. It differs from 2*SIZE_SZ only on
- powerpc32. For the time being, changing this is causing more
- compatibility problems due to malloc_get_state/malloc_set_state than
- will returning blocks not adequately aligned for long double objects
- under -mlong-double-128.
-
#define MALLOC_ALIGNMENT (2 * SIZE_SZ < __alignof__ (long double) \
? __alignof__ (long double) : 2 * SIZE_SZ)
-*/
-#define MALLOC_ALIGNMENT (2 * SIZE_SZ)
#endif
/* The corresponding bit mask value */
@@ -2121,18 +2113,23 @@ typedef struct malloc_chunk* mbinptr;
The bins top out around 1MB because we expect to service large
requests via mmap.
+
+ Bin 0 does not exist. Bin 1 is the unordered list; if that would be
+ a valid chunk size the small bins are bumped up one.
*/
#define NBINS 128
#define NSMALLBINS 64
#define SMALLBIN_WIDTH MALLOC_ALIGNMENT
-#define MIN_LARGE_SIZE (NSMALLBINS * SMALLBIN_WIDTH)
+#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
+#define MIN_LARGE_SIZE ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
#define in_smallbin_range(sz) \
((unsigned long)(sz) < (unsigned long)MIN_LARGE_SIZE)
#define smallbin_index(sz) \
- (SMALLBIN_WIDTH == 16 ? (((unsigned)(sz)) >> 4) : (((unsigned)(sz)) >> 3))
+ ((SMALLBIN_WIDTH == 16 ? (((unsigned)(sz)) >> 4) : (((unsigned)(sz)) >> 3)) \
+ + SMALLBIN_CORRECTION)
#define largebin_index_32(sz) \
(((((unsigned long)(sz)) >> 6) <= 38)? 56 + (((unsigned long)(sz)) >> 6): \
@@ -2142,6 +2139,14 @@ typedef struct malloc_chunk* mbinptr;
((((unsigned long)(sz)) >> 18) <= 2)? 124 + (((unsigned long)(sz)) >> 18): \
126)
+#define largebin_index_32_big(sz) \
+(((((unsigned long)(sz)) >> 6) <= 45)? 49 + (((unsigned long)(sz)) >> 6): \
+ ((((unsigned long)(sz)) >> 9) <= 20)? 91 + (((unsigned long)(sz)) >> 9): \
+ ((((unsigned long)(sz)) >> 12) <= 10)? 110 + (((unsigned long)(sz)) >> 12): \
+ ((((unsigned long)(sz)) >> 15) <= 4)? 119 + (((unsigned long)(sz)) >> 15): \
+ ((((unsigned long)(sz)) >> 18) <= 2)? 124 + (((unsigned long)(sz)) >> 18): \
+ 126)
+
// XXX It remains to be seen whether it is good to keep the widths of
// XXX the buckets the same or whether it should be scaled by a factor
// XXX of two as well.
@@ -2154,7 +2159,9 @@ typedef struct malloc_chunk* mbinptr;
126)
#define largebin_index(sz) \
- (SIZE_SZ == 8 ? largebin_index_64 (sz) : largebin_index_32 (sz))
+ (SIZE_SZ == 8 ? largebin_index_64 (sz) \
+ : MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz) \
+ : largebin_index_32 (sz))
#define bin_index(sz) \
((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz))
@@ -2951,14 +2958,14 @@ static Void_t* sYSMALLOc(nb, av) INTERNA
Round up size to nearest page. For mmapped chunks, the overhead
is one SIZE_SZ unit larger than for normal chunks, because there
is no following chunk whose prev_size field could be used.
+
+ See the front_misalign handling below, for glibc there is no
+ need for further alignments unless we have have high alignment.
*/
-#if 1
- /* See the front_misalign handling below, for glibc there is no
- need for further alignments. */
- size = (nb + SIZE_SZ + pagemask) & ~pagemask;
-#else
- size = (nb + SIZE_SZ + MALLOC_ALIGN_MASK + pagemask) & ~pagemask;
-#endif
+ if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+ size = (nb + SIZE_SZ + pagemask) & ~pagemask;
+ else
+ size = (nb + SIZE_SZ + MALLOC_ALIGN_MASK + pagemask) & ~pagemask;
tried_mmap = true;
/* Don't try if size wraps around 0 */
@@ -2976,13 +2983,16 @@ static Void_t* sYSMALLOc(nb, av) INTERNA
address argument for later munmap in free() and realloc().
*/
-#if 1
- /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
- MALLOC_ALIGN_MASK is 2*SIZE_SZ-1. Each mmap'ed area is page
- aligned and therefore definitely MALLOC_ALIGN_MASK-aligned. */
- assert (((INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK) == 0);
-#else
- front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK;
+ if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+ {
+ /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
+ MALLOC_ALIGN_MASK is 2*SIZE_SZ-1. Each mmap'ed area is page
+ aligned and therefore definitely MALLOC_ALIGN_MASK-aligned. */
+ assert (((INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK) == 0);
+ front_misalign = 0;
+ }
+ else
+ front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK;
if (front_misalign > 0) {
correction = MALLOC_ALIGNMENT - front_misalign;
p = (mchunkptr)(mm + correction);
@@ -2990,7 +3000,6 @@ static Void_t* sYSMALLOc(nb, av) INTERNA
set_head(p, (size - correction) |IS_MMAPPED);
}
else
-#endif
{
p = (mchunkptr)mm;
set_head(p, size|IS_MMAPPED);
@@ -3278,8 +3287,25 @@ static Void_t* sYSMALLOc(nb, av) INTERNA
/* handle non-contiguous cases */
else {
- /* MORECORE/mmap must correctly align */
- assert(((unsigned long)chunk2mem(brk) & MALLOC_ALIGN_MASK) == 0);
+ if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
+ /* MORECORE/mmap must correctly align */
+ assert(((unsigned long)chunk2mem(brk) & MALLOC_ALIGN_MASK) == 0);
+ else {
+ front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK;
+ if (front_misalign > 0) {
+
+ /*
+ Skip over some bytes to arrive at an aligned position.
+ We don't need to specially mark these wasted front bytes.
+ They will never be accessed anyway because
+ prev_inuse of av->top (and any chunk created from its start)
+ is always true after initialization.
+ */
+
+ correction = MALLOC_ALIGNMENT - front_misalign;
+ aligned_brk += correction;
+ }
+ }
/* Find out current end of memory */
if (snd_brk == (char*)(MORECORE_FAILURE)) {