This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch release/2.26/master updated. glibc-2.26-84-ga546080


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, release/2.26/master has been updated
       via  a546080d517c8833ce1d6abdb86a9293c8d29bae (commit)
       via  aa5be982ea1f59ee1e479fe9a561aeafd91a2261 (commit)
       via  ade53e0df7a1257c54ce96e01fa659b374db2117 (commit)
      from  77f921dac17c5fa99bd9e926d926c327982895f7 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=a546080d517c8833ce1d6abdb86a9293c8d29bae

commit a546080d517c8833ce1d6abdb86a9293c8d29bae
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Wed Oct 18 12:20:55 2017 +0100

    Fix build failure on tilepro due to unsupported atomics
    
            * malloc/malloc.c (malloc_state): Use int for have_fastchunks since
            not all targets support atomics on bool.
    
    (cherry-picked from 2c2245b92ccf6344b324d17d8f94ccd3b8c559c6)

diff --git a/ChangeLog b/ChangeLog
index 67d3503..d67ad03 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2017-10-18  Wilco Dijkstra  <wdijkstr@arm.com>
+
+	* malloc/malloc.c (malloc_state): Use int for have_fastchunks since
+	not all targets support atomics on bool.
+
 2017-10-17  Wilco Dijkstra  <wdijkstr@arm.com>
 
 	* malloc/malloc.c (FASTCHUNKS_BIT): Remove.
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 37e3c44..dd9f699 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1689,7 +1689,8 @@ struct malloc_state
   int flags;
 
   /* Set if the fastbin chunks contain recently inserted free blocks.  */
-  bool have_fastchunks;
+  /* Note this is a bool but not all targets support atomics on booleans.  */
+  int have_fastchunks;
 
   /* Fastbins */
   mfastbinptr fastbinsY[NFASTBINS];

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=aa5be982ea1f59ee1e479fe9a561aeafd91a2261

commit aa5be982ea1f59ee1e479fe9a561aeafd91a2261
Author: Siddhesh Poyarekar <siddhesh@sourceware.org>
Date:   Thu Nov 16 12:21:27 2017 +0530

    Use relaxed atomics for malloc have_fastchunks
    
    Currently free typically uses 2 atomic operations per call.  The have_fastchunks
    flag indicates whether there are recently freed blocks in the fastbins.  This
    is purely an optimization to avoid calling malloc_consolidate too often and
    avoiding the overhead of walking all fast bins even if all are empty during a
    sequence of allocations.  However using catomic_or to update the flag is
    completely unnecessary since it can be changed into a simple boolean and
    accessed using relaxed atomics.  There is no change in multi-threaded behaviour
    given the flag is already approximate (it may be set when there are no blocks in
    any fast bins, or it may be clear when there are free blocks that could be
    consolidated).
    
    Performance of malloc/free improves by 27% on a simple benchmark on AArch64
    (both single and multithreaded). The number of load/store exclusive instructions
    is reduced by 33%. Bench-malloc-thread speeds up by ~3% in all cases.
    
    	* malloc/malloc.c (FASTCHUNKS_BIT): Remove.
    	(have_fastchunks): Remove.
    	(clear_fastchunks): Remove.
    	(set_fastchunks): Remove.
    	(malloc_state): Add have_fastchunks.
    	(malloc_init_state): Use have_fastchunks.
    	(do_check_malloc_state): Remove incorrect invariant checks.
    	(_int_malloc): Use have_fastchunks.
    	(_int_free): Likewise.
    	(malloc_consolidate): Likewise.
    
    (cherry-picked from e956075a5a2044d05ce48b905b10270ed4a63e87)

diff --git a/ChangeLog b/ChangeLog
index 192acbf..67d3503 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,18 @@
 2017-10-17  Wilco Dijkstra  <wdijkstr@arm.com>
 
+	* malloc/malloc.c (FASTCHUNKS_BIT): Remove.
+	(have_fastchunks): Remove.
+	(clear_fastchunks): Remove.
+	(set_fastchunks): Remove.
+	(malloc_state): Add have_fastchunks.
+	(malloc_init_state): Use have_fastchunks.
+	(do_check_malloc_state): Remove incorrect invariant checks.
+	(_int_malloc): Use have_fastchunks.
+	(_int_free): Likewise.
+	(malloc_consolidate): Likewise.
+
+2017-10-17  Wilco Dijkstra  <wdijkstr@arm.com>
+
 	* malloc/malloc.c (tcache_put): Inline.
 	(tcache_get): Inline.
 
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 546579b..37e3c44 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1613,27 +1613,6 @@ typedef struct malloc_chunk *mfastbinptr;
 #define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
 
 /*
-   Since the lowest 2 bits in max_fast don't matter in size comparisons,
-   they are used as flags.
- */
-
-/*
-   FASTCHUNKS_BIT held in max_fast indicates that there are probably
-   some fastbin chunks. It is set true on entering a chunk into any
-   fastbin, and cleared only in malloc_consolidate.
-
-   The truth value is inverted so that have_fastchunks will be true
-   upon startup (since statics are zero-filled), simplifying
-   initialization checks.
- */
-
-#define FASTCHUNKS_BIT        (1U)
-
-#define have_fastchunks(M)     (((M)->flags & FASTCHUNKS_BIT) == 0)
-#define clear_fastchunks(M)    catomic_or (&(M)->flags, FASTCHUNKS_BIT)
-#define set_fastchunks(M)      catomic_and (&(M)->flags, ~FASTCHUNKS_BIT)
-
-/*
    NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
    regions.  Otherwise, contiguity is exploited in merging together,
    when possible, results from consecutive MORECORE calls.
@@ -1690,6 +1669,17 @@ get_max_fast (void)
    ----------- Internal state representation and initialization -----------
  */
 
+/*
+   have_fastchunks indicates that there are probably some fastbin chunks.
+   It is set true on entering a chunk into any fastbin, and cleared early in
+   malloc_consolidate.  The value is approximate since it may be set when there
+   are no fastbin chunks, or it may be clear even if there are fastbin chunks
+   available.  Given it's sole purpose is to reduce number of redundant calls to
+   malloc_consolidate, it does not affect correctness.  As a result we can safely
+   use relaxed atomic accesses.
+ */
+
+
 struct malloc_state
 {
   /* Serialize access.  */
@@ -1698,6 +1688,9 @@ struct malloc_state
   /* Flags (formerly in max_fast).  */
   int flags;
 
+  /* Set if the fastbin chunks contain recently inserted free blocks.  */
+  bool have_fastchunks;
+
   /* Fastbins */
   mfastbinptr fastbinsY[NFASTBINS];
 
@@ -1841,7 +1834,7 @@ malloc_init_state (mstate av)
   set_noncontiguous (av);
   if (av == &main_arena)
     set_max_fast (DEFAULT_MXFAST);
-  av->flags |= FASTCHUNKS_BIT;
+  atomic_store_relaxed (&av->have_fastchunks, false);
 
   av->top = initial_top (av);
 }
@@ -2206,11 +2199,6 @@ do_check_malloc_state (mstate av)
         }
     }
 
-  if (total != 0)
-    assert (have_fastchunks (av));
-  else if (!have_fastchunks (av))
-    assert (total == 0);
-
   /* check normal bins */
   for (i = 1; i < NBINS; ++i)
     {
@@ -3701,7 +3689,7 @@ _int_malloc (mstate av, size_t bytes)
   else
     {
       idx = largebin_index (nb);
-      if (have_fastchunks (av))
+      if (atomic_load_relaxed (&av->have_fastchunks))
         malloc_consolidate (av);
     }
 
@@ -4116,7 +4104,7 @@ _int_malloc (mstate av, size_t bytes)
 
       /* When we are using atomic ops to free fast chunks we can get
          here for all block sizes.  */
-      else if (have_fastchunks (av))
+      else if (atomic_load_relaxed (&av->have_fastchunks))
         {
           malloc_consolidate (av);
           /* restore original bin index */
@@ -4242,7 +4230,7 @@ _int_free (mstate av, mchunkptr p, int have_lock)
 
     free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
 
-    set_fastchunks(av);
+    atomic_store_relaxed (&av->have_fastchunks, true);
     unsigned int idx = fastbin_index(size);
     fb = &fastbin (av, idx);
 
@@ -4393,7 +4381,7 @@ _int_free (mstate av, mchunkptr p, int have_lock)
     */
 
     if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
-      if (have_fastchunks(av))
+      if (atomic_load_relaxed (&av->have_fastchunks))
 	malloc_consolidate(av);
 
       if (av == &main_arena) {
@@ -4464,7 +4452,7 @@ static void malloc_consolidate(mstate av)
   */
 
   if (get_max_fast () != 0) {
-    clear_fastchunks(av);
+    atomic_store_relaxed (&av->have_fastchunks, false);
 
     unsorted_bin = unsorted_chunks(av);
 

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=ade53e0df7a1257c54ce96e01fa659b374db2117

commit ade53e0df7a1257c54ce96e01fa659b374db2117
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Tue Oct 17 18:25:43 2017 +0100

    Inline tcache functions
    
    The functions tcache_get and tcache_put show up in profiles as they
    are a critical part of the tcache code.  Inline them to give tcache
    a 16% performance gain.  Since this improves multi-threaded cases
    as well, it helps offset any potential performance loss due to adding
    single-threaded fast paths.
    
    	* malloc/malloc.c (tcache_put): Inline.
    	(tcache_get): Inline.
    
    (cherry-picked from commit e4dd4ace56880d2f1064cd787e2bdb96ddacc3c4)

diff --git a/ChangeLog b/ChangeLog
index e17a716..192acbf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2017-10-17  Wilco Dijkstra  <wdijkstr@arm.com>
+
+	* malloc/malloc.c (tcache_put): Inline.
+	(tcache_get): Inline.
+
 2017-10-13  James Clarke  <jrtc27@jrtc27.com>
 
 	* sysdeps/powerpc/powerpc32/dl-machine.h (elf_machine_rela):
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 01ec157..546579b 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -2957,7 +2957,7 @@ static __thread tcache_perthread_struct *tcache = NULL;
 
 /* Caller must ensure that we know tc_idx is valid and there's room
    for more chunks.  */
-static void
+static __always_inline void
 tcache_put (mchunkptr chunk, size_t tc_idx)
 {
   tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
@@ -2969,7 +2969,7 @@ tcache_put (mchunkptr chunk, size_t tc_idx)
 
 /* Caller must ensure that we know tc_idx is valid and there's
    available chunks to remove.  */
-static void *
+static __always_inline void *
 tcache_get (size_t tc_idx)
 {
   tcache_entry *e = tcache->entries[tc_idx];

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog       |   23 ++++++++++++++++++++++
 malloc/malloc.c |   57 ++++++++++++++++++++++--------------------------------
 2 files changed, 46 insertions(+), 34 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]