This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Clean up BUSY_WAIT_NOP and atomic_delay.


This patch combines BUSY_WAIT_NOP and atomic_delay into a new
atomic_spin_nop function and adjusts all clients.  The new function is
put into atomic.h because what is best done in a spin loop is
architecture-specific, and atomics must be used for spinning.  The
function name is meant to tell users that this has no effect on
synchronization semantics but is a performance aid for spinning.

Roland, this removes the NaCl definitions of spinning because they
should be primarily architecture-dependent.  I don't know whether you're
doing something really advanced with spinning yet that would be NaCl
specific, but then this would be future work anyway and would likely
involve a custom implementation of a whole spin loop and the
accompanying futex wait call.  If so, we can address this later.

Dave, can you check the sparc bits please?  I believe I just moved code
around, but I can't test this easily.

Tested on x86_64-linux.


2015-06-23  Torvald Riegel  <triegel@redhat.com>

	* sysdeps/unix/sysv/linux/i386/lowlevellock.h (BUSY_WAIT_NOP): Remove.
	* sysdeps/unix/sysv/linux/x86_64/lowlevellock.h (BUSY_WAIT_NOP):
	Likewise.
	* sysdeps/i386/i486/bits/atomic.h (atomic_delay): Rename to
	atomic_spin_nop.
	* sysdeps/x86_64/bits/atomic.h: Likewise.
	* sysdeps/unix/sysv/linux/sparc/lowlevellock.h (BUSY_WAIT_NOP): Rename
	to atomic_spin_nop and move ...
	* sysdeps/sparc/sparc32/sparcv9/bits/atomic.h (atomic_spin_nop):
	... here and ...
	* sysdeps/sparc/sparc64/bits/atomic.h: ... here.
	* nptl/pthread_mutex_lock.c (__pthread_mutex_lock): Use
	atomic_spin_nop instead of BUSY_WAIT_NOP.
	* nptl/pthread_mutex_timedlock.c (__pthread_mutex_timedlock):
	Likewise.
	* sysdeps/nacl/lll_timedwait_tid.c (__lll_timedwait_tid): Likewise.
	* sysdeps/nacl/lowlevellock.h (BUSY_WAIT_NOP): Remove.
	(lll_wait_tid): Use atomic_spin_nop instead of BUSY_WAIT_NOP.
	* nscd/nscd-client.h (__nscd_acquire_maplock): Use atomic_spin_nop
	instead of atomic_delay.
commit 07a8f937ec0e9d23e8e3da4235b216b18dbc1dfe
Author: Torvald Riegel <triegel@redhat.com>
Date:   Tue Jun 23 15:22:25 2015 +0200

    Clean up BUSY_WAIT_NOP and atomic_delay.
    
    This patch combines BUSY_WAIT_NOP and atomic_delay into a new
    atomic_spin_nop function and adjusts all clients.  The new function is
    put into atomic.h because what is best done in a spin loop is
    architecture-specific, and atomics must be used for spinning.  The
    function name is meant to tell users that this has no effect on
    synchronization semantics but is a performance aid for spinning.

diff --git a/include/atomic.h b/include/atomic.h
index 7fd70c4..221bea0 100644
--- a/include/atomic.h
+++ b/include/atomic.h
@@ -754,9 +754,10 @@ void __atomic_link_error (void);
 
 #endif /* !USE_ATOMIC_COMPILER_BUILTINS  */
 
-
-#ifndef atomic_delay
-# define atomic_delay() do { /* nothing */ } while (0)
+/* This operation does not affect synchronization semantics but can be used
+   in the body of a spin loop to potentially improve its efficiency.  */
+#ifndef atomic_spin_nop
+# define atomic_spin_nop() do { /* nothing */ } while (0)
 #endif
 
 #endif	/* atomic.h */
diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
index 9607512..9a3b466 100644
--- a/nptl/pthread_mutex_lock.c
+++ b/nptl/pthread_mutex_lock.c
@@ -23,6 +23,7 @@
 #include <sys/param.h>
 #include <not-cancel.h>
 #include "pthreadP.h"
+#include <atomic.h>
 #include <lowlevellock.h>
 #include <stap-probe.h>
 
@@ -135,10 +136,7 @@ __pthread_mutex_lock (mutex)
 		  LLL_MUTEX_LOCK (mutex);
 		  break;
 		}
-
-#ifdef BUSY_WAIT_NOP
-	      BUSY_WAIT_NOP;
-#endif
+	      atomic_spin_nop ();
 	    }
 	  while (LLL_MUTEX_TRYLOCK (mutex) != 0);
 
diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c
index 109a46a..f0fb03e 100644
--- a/nptl/pthread_mutex_timedlock.c
+++ b/nptl/pthread_mutex_timedlock.c
@@ -22,6 +22,7 @@
 #include <sys/param.h>
 #include <sys/time.h>
 #include "pthreadP.h"
+#include <atomic.h>
 #include <lowlevellock.h>
 #include <not-cancel.h>
 
@@ -125,10 +126,7 @@ pthread_mutex_timedlock (mutex, abstime)
 					  PTHREAD_MUTEX_PSHARED (mutex));
 		  break;
 		}
-
-#ifdef BUSY_WAIT_NOP
-	      BUSY_WAIT_NOP;
-#endif
+	      atomic_spin_nop ();
 	    }
 	  while (lll_trylock (mutex->__data.__lock) != 0);
 
diff --git a/nscd/nscd-client.h b/nscd/nscd-client.h
index 43a8c61..740e2f9 100644
--- a/nscd/nscd-client.h
+++ b/nscd/nscd-client.h
@@ -378,7 +378,7 @@ __nscd_acquire_maplock (volatile struct locked_map_ptr *mapptr)
       if (__glibc_unlikely (++cnt > 5))
 	return false;
 
-      atomic_delay ();
+      atomic_spin_nop ();
     }
 
   return true;
diff --git a/sysdeps/i386/i486/bits/atomic.h b/sysdeps/i386/i486/bits/atomic.h
index 59165be..59f3d34 100644
--- a/sysdeps/i386/i486/bits/atomic.h
+++ b/sysdeps/i386/i486/bits/atomic.h
@@ -479,7 +479,7 @@ typedef uintmax_t uatomic_max_t;
      __result; })
 
 
-#define atomic_delay() asm ("rep; nop")
+#define atomic_spin_nop() asm ("rep; nop")
 
 
 #define __arch_and_body(lock, mem, mask) \
diff --git a/sysdeps/nacl/lll_timedwait_tid.c b/sysdeps/nacl/lll_timedwait_tid.c
index ecaf0b1..ef544cf 100644
--- a/sysdeps/nacl/lll_timedwait_tid.c
+++ b/sysdeps/nacl/lll_timedwait_tid.c
@@ -40,7 +40,7 @@ __lll_timedwait_tid (int *tidp, const struct timespec *abstime)
 	   finish quick enough that the timeout doesn't matter.  If any
 	   thread ever stays in this state for long, there is something
 	   catastrophically wrong.  */
-	BUSY_WAIT_NOP;
+	atomic_spin_nop ();
       else
 	{
 	  assert (tid > 0);
diff --git a/sysdeps/nacl/lowlevellock.h b/sysdeps/nacl/lowlevellock.h
index 0b85d8d..3634f19 100644
--- a/sysdeps/nacl/lowlevellock.h
+++ b/sysdeps/nacl/lowlevellock.h
@@ -21,10 +21,6 @@
 /* Everything except the exit handling is the same as the generic code.  */
 # include <sysdeps/nptl/lowlevellock.h>
 
-# ifndef BUSY_WAIT_NOP
-#  define BUSY_WAIT_NOP		__sync_synchronize ()
-# endif
-
 /* See exit-thread.h for details.  */
 # define NACL_EXITING_TID	1
 
@@ -36,7 +32,7 @@
     while ((__tid = atomic_load_relaxed (__tidp)) != 0) \
       {							\
 	if (__tid == NACL_EXITING_TID)			\
-	  BUSY_WAIT_NOP;				\
+	  atomic_spin_nop ();				\
 	else						\
 	  lll_futex_wait (__tidp, __tid, LLL_PRIVATE);	\
       }							\
diff --git a/sysdeps/sparc/sparc32/sparcv9/bits/atomic.h b/sysdeps/sparc/sparc32/sparcv9/bits/atomic.h
index 317be62..2122afb 100644
--- a/sysdeps/sparc/sparc32/sparcv9/bits/atomic.h
+++ b/sysdeps/sparc/sparc32/sparcv9/bits/atomic.h
@@ -100,3 +100,6 @@ typedef uintmax_t uatomic_max_t;
   __asm __volatile ("membar #LoadLoad | #LoadStore" : : : "memory")
 #define atomic_write_barrier() \
   __asm __volatile ("membar #LoadStore | #StoreStore" : : : "memory")
+
+extern void __cpu_relax (void);
+#define atomic_spin_nop () __cpu_relax ()
diff --git a/sysdeps/sparc/sparc64/bits/atomic.h b/sysdeps/sparc/sparc64/bits/atomic.h
index 35804a8..48b7fd6 100644
--- a/sysdeps/sparc/sparc64/bits/atomic.h
+++ b/sysdeps/sparc/sparc64/bits/atomic.h
@@ -121,3 +121,6 @@ typedef uintmax_t uatomic_max_t;
   __asm __volatile ("membar #LoadLoad | #LoadStore" : : : "memory")
 #define atomic_write_barrier() \
   __asm __volatile ("membar #LoadStore | #StoreStore" : : : "memory")
+
+extern void __cpu_relax (void);
+#define atomic_spin_nop () __cpu_relax ()
diff --git a/sysdeps/unix/sysv/linux/i386/lowlevellock.h b/sysdeps/unix/sysv/linux/i386/lowlevellock.h
index f57afc6..58f5638 100644
--- a/sysdeps/unix/sysv/linux/i386/lowlevellock.h
+++ b/sysdeps/unix/sysv/linux/i386/lowlevellock.h
@@ -58,10 +58,6 @@
 #define LLL_LOCK_INITIALIZER_WAITERS	(2)
 
 
-/* Delay in spinlock loop.  */
-#define BUSY_WAIT_NOP	asm ("rep; nop")
-
-
 /* NB: in the lll_trylock macro we simply return the value in %eax
    after the cmpxchg instruction.  In case the operation succeded this
    value is zero.  In case the operation failed, the cmpxchg instruction
diff --git a/sysdeps/unix/sysv/linux/sparc/lowlevellock.h b/sysdeps/unix/sysv/linux/sparc/lowlevellock.h
index 9aefd9e..7608c01 100644
--- a/sysdeps/unix/sysv/linux/sparc/lowlevellock.h
+++ b/sysdeps/unix/sysv/linux/sparc/lowlevellock.h
@@ -25,12 +25,6 @@
 #include <atomic.h>
 #include <kernel-features.h>
 
-#ifndef __sparc32_atomic_do_lock
-/* Delay in spinlock loop.  */
-extern void __cpu_relax (void);
-#define BUSY_WAIT_NOP	__cpu_relax ()
-#endif
-
 #include <lowlevellock-futex.h>
 
 static inline int
diff --git a/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h b/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
index 573b48c..de525cd 100644
--- a/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
+++ b/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
@@ -57,9 +57,6 @@
 #define LLL_LOCK_INITIALIZER_LOCKED	(1)
 #define LLL_LOCK_INITIALIZER_WAITERS	(2)
 
-/* Delay in spinlock loop.  */
-#define BUSY_WAIT_NOP	  asm ("rep; nop")
-
 
 /* NB: in the lll_trylock macro we simply return the value in %eax
    after the cmpxchg instruction.  In case the operation succeded this
diff --git a/sysdeps/x86_64/bits/atomic.h b/sysdeps/x86_64/bits/atomic.h
index 203d92c..337b334 100644
--- a/sysdeps/x86_64/bits/atomic.h
+++ b/sysdeps/x86_64/bits/atomic.h
@@ -410,7 +410,7 @@ typedef uintmax_t uatomic_max_t;
      __result; })
 
 
-#define atomic_delay() asm ("rep; nop")
+#define atomic_spin_nop() asm ("rep; nop")
 
 
 #define __arch_and_body(lock, mem, mask) \

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]