This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: PowerPC: libc single-thread lock optimization
- From: Adhemerval Zanella <azanella at linux dot vnet dot ibm dot com>
- To: libc-alpha at sourceware dot org
- Date: Mon, 28 Apr 2014 18:40:46 -0300
- Subject: Re: PowerPC: libc single-thread lock optimization
- Authentication-results: sourceware.org; auth=none
- References: <5343F8F1 dot 4000400 at linux dot vnet dot ibm dot com>
Ping.
On 08-04-2014 10:26, Adhemerval Zanella wrote:
> This patch adds a single-thread optimization for libc.so locks used
> within the shared objects. For each lock operations it checks it the
> process has already spawned one thread and if not use non-atomic
> operations. Other libraries (libpthread.so for instance) are unaffected
> by this change.
>
> This is similar to x86_64 optimization on locks and atomics by using the
> __libc_multiple_threads variable.
>
> Tested on powerpc32, powerpc64, and powerp64le.
>
> Note: for macro code change I tried to change as little as possible the
> current syntax.
>
> --
>
> * nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
> (__lll_robust_trylock): Add single-thread lock optimization for calls
> within libc.so.
> * sysdeps/powerpc/bits/atomic.h
> (__arch_compare_and_exchange_val_32_acq): Likewise.
> (__arch_compare_and_exchange_val_32_rel): Likewise.
> (__arch_atomic_exchange_32_acq): Likewise.
> (__arch_atomic_exchange_32_rel): Likewise.
> (__arch_atomic_exchange_and_add_32): Likewise.
> (__arch_atomic_increment_val_32): Likewise.
> (__arch_atomic_decrement_val_32): Likewise.
> (__arch_atomic_decrement_if_positive_32): Likewise.
> * sysdeps/powerpc/powerpc32/bits/atomic.h
> (__arch_compare_and_exchange_bool_32_acq): Likewise.
> (__arch_compare_and_exchange_bool_32_rel): Likewise.
> * sysdeps/powerpc/powerpc64/bits/atomic.h
> (__arch_compare_and_exchange_bool_32_acq): Likewise.
> (__arch_compare_and_exchange_bool_32_rel): Likewise.
> (__arch_compare_and_exchange_bool_64_acq): Likewise.
> (__arch_compare_and_exchange_bool_64_rel): Likewise.
> (__arch_compare_and_exchange_val_64_acq): Likewise.
> (__arch_compare_and_exchange_val_64_rel): Likewise.
> (__arch_atomic_exchange_64_acq): Likewise.
> (__arch_atomic_exchange_64_rel): Likewise.
> (__arch_atomic_exchange_and_add_64): Likewise.
> (__arch_atomic_increment_val_64): Likewise.
> (__arch_atomic_decrement_val_64): Likewise.
> (__arch_atomic_decrement_if_positive_64): Likewise.
>
> ---
>
> diff --git a/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
> index ab92c3f..419ee2f 100644
> --- a/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
> +++ b/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
> @@ -205,7 +205,9 @@
> /* Set *futex to ID if it is 0, atomically. Returns the old value */
> #define __lll_robust_trylock(futex, id) \
> ({ int __val; \
> - __asm __volatile ("1: lwarx %0,0,%2" MUTEX_HINT_ACQ "\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile ( \
> + "1: lwarx %0,0,%2" MUTEX_HINT_ACQ "\n" \
> " cmpwi 0,%0,0\n" \
> " bne 2f\n" \
> " stwcx. %3,0,%2\n" \
> @@ -214,6 +216,12 @@
> : "=&r" (__val), "=m" (*futex) \
> : "r" (futex), "r" (id), "m" (*futex) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __val = *futex; \
> + if (__val == 0) \
> + *futex = id; \
> + } \
> __val; \
> })
>
> diff --git a/sysdeps/powerpc/bits/atomic.h b/sysdeps/powerpc/bits/atomic.h
> index 2ffba48..2d31411 100644
> --- a/sysdeps/powerpc/bits/atomic.h
> +++ b/sysdeps/powerpc/bits/atomic.h
> @@ -76,6 +76,10 @@ typedef uintmax_t uatomic_max_t;
> # define MUTEX_HINT_REL
> #endif
>
> +/* Note: SINGLE_THREAD_P is defined either in
> + sysdeps/powerpc/powerpc64/bits/atomic.h or
> + sysdeps/powerpc/powerpc32/bits/atomic.h */
> +
> #define atomic_full_barrier() __asm ("sync" ::: "memory")
> #define atomic_write_barrier() __asm ("eieio" ::: "memory")
>
> @@ -83,7 +87,8 @@ typedef uintmax_t uatomic_max_t;
> ({ \
> __typeof (*(mem)) __tmp; \
> __typeof (mem) __memp = (mem); \
> - __asm __volatile ( \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile ( \
> "1: lwarx %0,0,%1" MUTEX_HINT_ACQ "\n" \
> " cmpw %0,%2\n" \
> " bne 2f\n" \
> @@ -93,6 +98,12 @@ typedef uintmax_t uatomic_max_t;
> : "=&r" (__tmp) \
> : "b" (__memp), "r" (oldval), "r" (newval) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __tmp = *__memp; \
> + if (__tmp == oldval) \
> + *__memp = newval; \
> + } \
> __tmp; \
> })
>
> @@ -100,7 +111,8 @@ typedef uintmax_t uatomic_max_t;
> ({ \
> __typeof (*(mem)) __tmp; \
> __typeof (mem) __memp = (mem); \
> - __asm __volatile (__ARCH_REL_INSTR "\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile (__ARCH_REL_INSTR "\n" \
> "1: lwarx %0,0,%1" MUTEX_HINT_REL "\n" \
> " cmpw %0,%2\n" \
> " bne 2f\n" \
> @@ -110,13 +122,20 @@ typedef uintmax_t uatomic_max_t;
> : "=&r" (__tmp) \
> : "b" (__memp), "r" (oldval), "r" (newval) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __tmp = *__memp; \
> + if (__tmp == oldval) \
> + *__memp = newval; \
> + } \
> __tmp; \
> })
>
> #define __arch_atomic_exchange_32_acq(mem, value) \
> ({ \
> __typeof (*mem) __val; \
> - __asm __volatile ( \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile ( \
> "1: lwarx %0,0,%2" MUTEX_HINT_ACQ "\n" \
> " stwcx. %3,0,%2\n" \
> " bne- 1b\n" \
> @@ -124,64 +143,92 @@ typedef uintmax_t uatomic_max_t;
> : "=&r" (__val), "=m" (*mem) \
> : "b" (mem), "r" (value), "m" (*mem) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __val = *mem; \
> + *mem = value; \
> + } \
> __val; \
> })
>
> #define __arch_atomic_exchange_32_rel(mem, value) \
> ({ \
> __typeof (*mem) __val; \
> - __asm __volatile (__ARCH_REL_INSTR "\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile (__ARCH_REL_INSTR "\n" \
> "1: lwarx %0,0,%2" MUTEX_HINT_REL "\n" \
> " stwcx. %3,0,%2\n" \
> " bne- 1b" \
> : "=&r" (__val), "=m" (*mem) \
> : "b" (mem), "r" (value), "m" (*mem) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __val = *mem; \
> + *mem = value; \
> + } \
> __val; \
> })
>
> #define __arch_atomic_exchange_and_add_32(mem, value) \
> ({ \
> __typeof (*mem) __val, __tmp; \
> - __asm __volatile ("1: lwarx %0,0,%3\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile ( \
> + "1: lwarx %0,0,%3\n" \
> " add %1,%0,%4\n" \
> " stwcx. %1,0,%3\n" \
> " bne- 1b" \
> : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \
> : "b" (mem), "r" (value), "m" (*mem) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __val = *mem; \
> + *mem += value; \
> + } \
> __val; \
> })
>
> #define __arch_atomic_increment_val_32(mem) \
> ({ \
> __typeof (*(mem)) __val; \
> - __asm __volatile ("1: lwarx %0,0,%2\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile ( \
> + "1: lwarx %0,0,%2\n" \
> " addi %0,%0,1\n" \
> " stwcx. %0,0,%2\n" \
> " bne- 1b" \
> : "=&b" (__val), "=m" (*mem) \
> : "b" (mem), "m" (*mem) \
> : "cr0", "memory"); \
> + else \
> + __val = ++(*mem); \
> __val; \
> })
>
> #define __arch_atomic_decrement_val_32(mem) \
> ({ \
> __typeof (*(mem)) __val; \
> - __asm __volatile ("1: lwarx %0,0,%2\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile ( \
> + "1: lwarx %0,0,%2\n" \
> " subi %0,%0,1\n" \
> " stwcx. %0,0,%2\n" \
> " bne- 1b" \
> : "=&b" (__val), "=m" (*mem) \
> : "b" (mem), "m" (*mem) \
> : "cr0", "memory"); \
> + else \
> + __val = --(*mem); \
> __val; \
> })
>
> #define __arch_atomic_decrement_if_positive_32(mem) \
> ({ int __val, __tmp; \
> - __asm __volatile ("1: lwarx %0,0,%3\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile ( \
> + "1: lwarx %0,0,%3\n" \
> " cmpwi 0,%0,0\n" \
> " addi %1,%0,-1\n" \
> " ble 2f\n" \
> @@ -191,6 +238,12 @@ typedef uintmax_t uatomic_max_t;
> : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \
> : "b" (mem), "m" (*mem) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __val = (*mem); \
> + if (__val > 0) \
> + --(*mem); \
> + } \
> __val; \
> })
>
> diff --git a/sysdeps/powerpc/powerpc32/bits/atomic.h b/sysdeps/powerpc/powerpc32/bits/atomic.h
> index 7613bdc..08043a7 100644
> --- a/sysdeps/powerpc/powerpc32/bits/atomic.h
> +++ b/sysdeps/powerpc/powerpc32/bits/atomic.h
> @@ -17,6 +17,8 @@
> License along with the GNU C Library; if not, see
> <http://www.gnu.org/licenses/>. */
>
> +#include <tls.h>
> +
> /* POWER6 adds a "Mutex Hint" to the Load and Reserve instruction.
> This is a hint to the hardware to expect additional updates adjacent
> to the lock word or not. If we are acquiring a Mutex, the hint
> @@ -33,6 +35,14 @@
> # define MUTEX_HINT_REL
> #endif
>
> +/* Check if the process has created a thread. */
> +#ifndef NOT_IN_libc
> +# define SINGLE_THREAD_P \
> + (THREAD_GETMEM (THREAD_SELF, header.multiple_threads) == 0)
> +#else
> +# define SINGLE_THREAD_P 0
> +#endif
> +
> /*
> * The 32-bit exchange_bool is different on powerpc64 because the subf
> * does signed 64-bit arithmetic while the lwarx is 32-bit unsigned
> @@ -42,7 +52,8 @@
> #define __arch_compare_and_exchange_bool_32_acq(mem, newval, oldval) \
> ({ \
> unsigned int __tmp; \
> - __asm __volatile ( \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile ( \
> "1: lwarx %0,0,%1" MUTEX_HINT_ACQ "\n" \
> " subf. %0,%2,%0\n" \
> " bne 2f\n" \
> @@ -52,13 +63,20 @@
> : "=&r" (__tmp) \
> : "b" (mem), "r" (oldval), "r" (newval) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __tmp = !(*mem == oldval); \
> + if (!__tmp) \
> + *mem = newval; \
> + } \
> __tmp != 0; \
> })
>
> #define __arch_compare_and_exchange_bool_32_rel(mem, newval, oldval) \
> ({ \
> unsigned int __tmp; \
> - __asm __volatile (__ARCH_REL_INSTR "\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile (__ARCH_REL_INSTR "\n" \
> "1: lwarx %0,0,%1" MUTEX_HINT_REL "\n" \
> " subf. %0,%2,%0\n" \
> " bne 2f\n" \
> @@ -68,6 +86,12 @@
> : "=&r" (__tmp) \
> : "b" (mem), "r" (oldval), "r" (newval) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __tmp = !(*mem == oldval); \
> + if (!__tmp) \
> + *mem = newval; \
> + } \
> __tmp != 0; \
> })
>
> diff --git a/sysdeps/powerpc/powerpc64/bits/atomic.h b/sysdeps/powerpc/powerpc64/bits/atomic.h
> index 527fe7c..0e2fe98 100644
> --- a/sysdeps/powerpc/powerpc64/bits/atomic.h
> +++ b/sysdeps/powerpc/powerpc64/bits/atomic.h
> @@ -17,6 +17,8 @@
> License along with the GNU C Library; if not, see
> <http://www.gnu.org/licenses/>. */
>
> +#include <tls.h>
> +
> /* POWER6 adds a "Mutex Hint" to the Load and Reserve instruction.
> This is a hint to the hardware to expect additional updates adjacent
> to the lock word or not. If we are acquiring a Mutex, the hint
> @@ -33,6 +35,15 @@
> # define MUTEX_HINT_REL
> #endif
>
> +/* Check if the process has created a thread. The lock optimization is only
> + for locks within libc.so. */
> +#ifndef NOT_IN_libc
> +# define SINGLE_THREAD_P \
> + (THREAD_GETMEM (THREAD_SELF, header.multiple_threads) == 0)
> +#else
> +# define SINGLE_THREAD_P 0
> +#endif
> +
> /* The 32-bit exchange_bool is different on powerpc64 because the subf
> does signed 64-bit arithmetic while the lwarx is 32-bit unsigned
> (a load word and zero (high 32) form) load.
> @@ -42,7 +53,8 @@
> #define __arch_compare_and_exchange_bool_32_acq(mem, newval, oldval) \
> ({ \
> unsigned int __tmp, __tmp2; \
> - __asm __volatile (" clrldi %1,%1,32\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile (" clrldi %1,%1,32\n" \
> "1: lwarx %0,0,%2" MUTEX_HINT_ACQ "\n" \
> " subf. %0,%1,%0\n" \
> " bne 2f\n" \
> @@ -52,13 +64,20 @@
> : "=&r" (__tmp), "=r" (__tmp2) \
> : "b" (mem), "1" (oldval), "r" (newval) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __tmp = !(*mem == oldval); \
> + if (!__tmp) \
> + *mem = newval; \
> + } \
> __tmp != 0; \
> })
>
> #define __arch_compare_and_exchange_bool_32_rel(mem, newval, oldval) \
> ({ \
> unsigned int __tmp, __tmp2; \
> - __asm __volatile (__ARCH_REL_INSTR "\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile (__ARCH_REL_INSTR "\n" \
> " clrldi %1,%1,32\n" \
> "1: lwarx %0,0,%2" MUTEX_HINT_REL "\n" \
> " subf. %0,%1,%0\n" \
> @@ -69,6 +88,12 @@
> : "=&r" (__tmp), "=r" (__tmp2) \
> : "b" (mem), "1" (oldval), "r" (newval) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __tmp = !(*mem == oldval); \
> + if (!__tmp) \
> + *mem = newval; \
> + } \
> __tmp != 0; \
> })
>
> @@ -80,7 +105,8 @@
> #define __arch_compare_and_exchange_bool_64_acq(mem, newval, oldval) \
> ({ \
> unsigned long __tmp; \
> - __asm __volatile ( \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile ( \
> "1: ldarx %0,0,%1" MUTEX_HINT_ACQ "\n" \
> " subf. %0,%2,%0\n" \
> " bne 2f\n" \
> @@ -90,13 +116,20 @@
> : "=&r" (__tmp) \
> : "b" (mem), "r" (oldval), "r" (newval) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __tmp = !(*mem == oldval); \
> + if (!__tmp) \
> + *mem = newval; \
> + } \
> __tmp != 0; \
> })
>
> #define __arch_compare_and_exchange_bool_64_rel(mem, newval, oldval) \
> ({ \
> unsigned long __tmp; \
> - __asm __volatile (__ARCH_REL_INSTR "\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile (__ARCH_REL_INSTR "\n" \
> "1: ldarx %0,0,%2" MUTEX_HINT_REL "\n" \
> " subf. %0,%2,%0\n" \
> " bne 2f\n" \
> @@ -106,6 +139,12 @@
> : "=&r" (__tmp) \
> : "b" (mem), "r" (oldval), "r" (newval) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __tmp = !(*mem == oldval); \
> + if (!__tmp) \
> + *mem = newval; \
> + } \
> __tmp != 0; \
> })
>
> @@ -113,7 +152,8 @@
> ({ \
> __typeof (*(mem)) __tmp; \
> __typeof (mem) __memp = (mem); \
> - __asm __volatile ( \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile ( \
> "1: ldarx %0,0,%1" MUTEX_HINT_ACQ "\n" \
> " cmpd %0,%2\n" \
> " bne 2f\n" \
> @@ -123,6 +163,12 @@
> : "=&r" (__tmp) \
> : "b" (__memp), "r" (oldval), "r" (newval) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __tmp = *__memp; \
> + if (__tmp == oldval) \
> + *__memp = newval; \
> + } \
> __tmp; \
> })
>
> @@ -130,7 +176,8 @@
> ({ \
> __typeof (*(mem)) __tmp; \
> __typeof (mem) __memp = (mem); \
> - __asm __volatile (__ARCH_REL_INSTR "\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile (__ARCH_REL_INSTR "\n" \
> "1: ldarx %0,0,%1" MUTEX_HINT_REL "\n" \
> " cmpd %0,%2\n" \
> " bne 2f\n" \
> @@ -140,13 +187,20 @@
> : "=&r" (__tmp) \
> : "b" (__memp), "r" (oldval), "r" (newval) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __tmp = *__memp; \
> + if (__tmp == oldval) \
> + *__memp = newval; \
> + } \
> __tmp; \
> })
>
> #define __arch_atomic_exchange_64_acq(mem, value) \
> ({ \
> __typeof (*mem) __val; \
> - __asm __volatile (__ARCH_REL_INSTR "\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile (__ARCH_REL_INSTR "\n" \
> "1: ldarx %0,0,%2" MUTEX_HINT_ACQ "\n" \
> " stdcx. %3,0,%2\n" \
> " bne- 1b\n" \
> @@ -154,64 +208,88 @@
> : "=&r" (__val), "=m" (*mem) \
> : "b" (mem), "r" (value), "m" (*mem) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __val = *mem; \
> + *mem = value; \
> + } \
> __val; \
> })
>
> #define __arch_atomic_exchange_64_rel(mem, value) \
> ({ \
> __typeof (*mem) __val; \
> - __asm __volatile (__ARCH_REL_INSTR "\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile (__ARCH_REL_INSTR "\n" \
> "1: ldarx %0,0,%2" MUTEX_HINT_REL "\n" \
> " stdcx. %3,0,%2\n" \
> " bne- 1b" \
> : "=&r" (__val), "=m" (*mem) \
> : "b" (mem), "r" (value), "m" (*mem) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __val = *mem; \
> + *mem = value; \
> + } \
> __val; \
> })
>
> #define __arch_atomic_exchange_and_add_64(mem, value) \
> ({ \
> __typeof (*mem) __val, __tmp; \
> - __asm __volatile ("1: ldarx %0,0,%3\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile ("1: ldarx %0,0,%3\n" \
> " add %1,%0,%4\n" \
> " stdcx. %1,0,%3\n" \
> " bne- 1b" \
> : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \
> : "b" (mem), "r" (value), "m" (*mem) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __val = *mem; \
> + *mem += value; \
> + } \
> __val; \
> })
>
> #define __arch_atomic_increment_val_64(mem) \
> ({ \
> __typeof (*(mem)) __val; \
> - __asm __volatile ("1: ldarx %0,0,%2\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile ("1: ldarx %0,0,%2\n" \
> " addi %0,%0,1\n" \
> " stdcx. %0,0,%2\n" \
> " bne- 1b" \
> : "=&b" (__val), "=m" (*mem) \
> : "b" (mem), "m" (*mem) \
> : "cr0", "memory"); \
> + else \
> + __val = ++(*mem); \
> __val; \
> })
>
> #define __arch_atomic_decrement_val_64(mem) \
> ({ \
> __typeof (*(mem)) __val; \
> - __asm __volatile ("1: ldarx %0,0,%2\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile ("1: ldarx %0,0,%2\n" \
> " subi %0,%0,1\n" \
> " stdcx. %0,0,%2\n" \
> " bne- 1b" \
> : "=&b" (__val), "=m" (*mem) \
> : "b" (mem), "m" (*mem) \
> : "cr0", "memory"); \
> + else \
> + __val = --(*mem); \
> __val; \
> })
>
> #define __arch_atomic_decrement_if_positive_64(mem) \
> ({ int __val, __tmp; \
> - __asm __volatile ("1: ldarx %0,0,%3\n" \
> + if (!SINGLE_THREAD_P) \
> + __asm __volatile ("1: ldarx %0,0,%3\n" \
> " cmpdi 0,%0,0\n" \
> " addi %1,%0,-1\n" \
> " ble 2f\n" \
> @@ -221,6 +299,12 @@
> : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \
> : "b" (mem), "m" (*mem) \
> : "cr0", "memory"); \
> + else \
> + { \
> + __val = (*mem); \
> + if (__val > 0) \
> + --(*mem); \
> + } \
> __val; \
> })
>