This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 1/2] Single thread optimization for malloc atomics


This patch adds a single-thread optimization for malloc atomic usage to
first check if process is single-thread (ST) and if so use normal
load/store instead of atomic instructions.

It is a respin of my initial attempt to add ST optimization on PowerPC.
Now malloc optimization is arch-agnostic and rely in already defined macros
in GLIBC (SINGLE_THREAD_P).  x86 probably would kike an atomic.h refactor
to avoid the double check for '__local_multiple_threads', but that is not
the aim of this patch.

Tested on powerpc32, powerpc64, and x86_64.

--

	* malloc/malloc.c (MALLOC_ATOMIC_OR): New macro: optimize atomic for
	single thread.
	(MALLOC_ATOMIC_AND): Likewise.
	(MALLOC_ATOMIC_CAS_VAL_ACQ): Likewise.
	(MALLOC_ATOMIC_CAS_VAL_REL): Likewise.
	(clear_fastchunks): Use optimized single thread macro.
	(set_fastchunks): Likewise.
	(_int_malloc): Likewise.
	(_int_free): Likewise.

---

diff --git a/malloc/malloc.c b/malloc/malloc.c
index 1120d4d..bb0aa82 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -231,6 +231,7 @@
 #include <errno.h>
 
 #include <shlib-compat.h>
+#include <sysdep-cancel.h>  /* For SINGLE_THREAD_P macro  */
 
 /* For uintptr_t.  */
 #include <stdint.h>
@@ -243,6 +244,57 @@
 
 
 /*
+  Single-thread lock optimization: atomic primitives first check the number of
+  threads and avoid atomic instructions for single-thread case.
+ */
+#define MALLOC_ATOMIC_OR(mem, mask)					      \
+ ({									      \
+   if (!SINGLE_THREAD_P)						      \
+     catomic_or (mem, mask);						      \
+   else									      \
+     *mem |= mask;							      \
+ })
+
+#define MALLOC_ATOMIC_AND(mem, mask)					      \
+ ({									      \
+   if (!SINGLE_THREAD_P)						      \
+     catomic_and (mem, mask);						      \
+   else									      \
+     *mem &= mask;							      \
+ })
+
+#define MALLOC_ATOMIC_CAS_VAL_ACQ(mem, newval, oldval)			      \
+ ({									      \
+    __typeof (*(mem)) __tmp;						      \
+    __typeof (mem) __memp = (mem);					      \
+    if (!SINGLE_THREAD_P)						      \
+      __tmp = catomic_compare_and_exchange_val_acq (mem, newval, oldval);     \
+    else								      \
+      {									      \
+	__tmp = *__memp;						      \
+	if (__tmp == oldval)						      \
+	  *__memp = newval;						      \
+      }									      \
+    __tmp;								      \
+ })
+
+#define MALLOC_ATOMIC_CAS_VAL_REL(mem, newval, oldval)			      \
+ ({									      \
+    __typeof (*(mem)) __tmp;						      \
+    __typeof (mem) __memp = (mem);					      \
+    if (!SINGLE_THREAD_P)						      \
+      __tmp = catomic_compare_and_exchange_val_rel (mem, newval, oldval);     \
+    else								      \
+      {									      \
+	__tmp = *__memp;						      \
+	if (__tmp == oldval)						      \
+	  *__memp = newval;						      \
+      }									      \
+    __tmp;								      \
+ })
+
+
+/*
   Debugging:
 
   Because freed chunks may be overwritten with bookkeeping fields, this
@@ -1632,8 +1684,8 @@ typedef struct malloc_chunk *mfastbinptr;
 #define FASTCHUNKS_BIT        (1U)
 
 #define have_fastchunks(M)     (((M)->flags & FASTCHUNKS_BIT) == 0)
-#define clear_fastchunks(M)    catomic_or (&(M)->flags, FASTCHUNKS_BIT)
-#define set_fastchunks(M)      catomic_and (&(M)->flags, ~FASTCHUNKS_BIT)
+#define clear_fastchunks(M)    MALLOC_ATOMIC_OR (&(M)->flags, FASTCHUNKS_BIT)
+#define set_fastchunks(M)      MALLOC_ATOMIC_AND (&(M)->flags, ~FASTCHUNKS_BIT)
 
 /*
    NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
@@ -3334,7 +3386,7 @@ _int_malloc (mstate av, size_t bytes)
           if (victim == NULL)
             break;
         }
-      while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
+      while ((pp = MALLOC_ATOMIC_CAS_VAL_ACQ (fb, victim->fd, victim))
              != victim);
       if (victim != 0)
         {
@@ -3903,7 +3955,7 @@ _int_free (mstate av, mchunkptr p, int have_lock)
 	  old_idx = fastbin_index(chunksize(old));
 	p->fd = old2 = old;
       }
-    while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);
+    while ((old = MALLOC_ATOMIC_CAS_VAL_REL (fb, p, old2)) != old2);
 
     if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
       {
-- 
1.8.4.2


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]