This is the mail archive of the
libc-ports@sources.redhat.com
mailing list for the libc-ports project.
Re: [PATCH] Optimize libc_lock_lock for MIPS XLP.
- From: Maxim Kuvyrkov <maxim at codesourcery dot com>
- To: Chris Metcalf <cmetcalf at tilera dot com>, "Joseph S. Myers" <joseph at codesourcery dot com>
- Cc: Tom de Vries <Tom_deVries at mentor dot com>, GLIBC Devel <libc-alpha at sourceware dot org>, <libc-ports at sourceware dot org>, Tom de Vries <vries at codesourcery dot com>
- Date: Wed, 15 Aug 2012 09:30:06 +1200
- Subject: Re: [PATCH] Optimize libc_lock_lock for MIPS XLP.
- References: <FC4EF172-B43E-4298-A2E9-681FA28650DB@mentor.com> <4FD9DB74.8080905@tilera.com> <40CBC472-71CC-4FF3-A452-073B76701215@codesourcery.com> <4FDAA190.3050706@tilera.com> <C8A2E1C0-8B9E-4C07-96F6-3F83C2E88C61@codesourcery.com> <15EB7E17-5692-4221-A1B1-FC16EA236BFF@codesourcery.com> <4FEC94AF.40301@tilera.com> <4FF73F75.6060303@mentor.com> <7CC74175-BA9B-4461-8918-9D99DABEC484@codesourcery.com> <502AA7EC.5020406@tilera.com>
On 15/08/2012, at 7:33 AM, Chris Metcalf wrote:
> On 8/14/2012 12:00 AM, Maxim Kuvyrkov wrote:
>> + atomic_compared_and_exchange_acq take the same time to execute.
>
> Typo.
Fixed.
>
>> + if (__result >= 2) \
>> + __result = (atomic_exchange_acq (__futex, 2) != 0); \
>
> Why not just return the old value in memory here (i.e. omit the "!= 0"), as
> you do with the exchange_and_add above? That seems more parallel in
> structure, and also more efficient.
I think you are right here.
The "!= 0" comes from the pattern of how __lll_trylock, __lll_cond_trylock and __lll_robust_trylock are defined. They all use "atomic_compare_and_exchange_val_acq (futex, <value>, 0) != 0", which seems excessive as well.
I've removed the "!= 0" from __libc_lock_trylock and check the testsuite. Updated patch attached.
Joseph, you are the MIPS maintainer, do you have any comments on this patch?
Thank you,
--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics
Optimize __libc_lock_lock and __libc_lock_trylock for MIPS.
* nptl/sysdeps/pthread/bits/libc-lockP.h (__libc_lock_lock)
(__libc_lock_trylock): Allow pre-existing definitions.
ports/
* sysdeps/unix/sysv/linux/mips/nptl/lowlevellock.h (__libc_lock_lock)
(__libc_lock_trylock): Define versions optimized for MIPS.
---
nptl/sysdeps/pthread/bits/libc-lockP.h | 10 ++++-
.../unix/sysv/linux/mips/nptl/lowlevellock.h | 39 +++++++++++++++++++-
2 files changed, 45 insertions(+), 4 deletions(-)
diff --git a/nptl/sysdeps/pthread/bits/libc-lockP.h b/nptl/sysdeps/pthread/bits/libc-lockP.h
index 0ebac91..7adaeb4 100644
--- a/nptl/sysdeps/pthread/bits/libc-lockP.h
+++ b/nptl/sysdeps/pthread/bits/libc-lockP.h
@@ -176,9 +176,12 @@ typedef pthread_key_t __libc_key_t;
/* Lock the named lock variable. */
#if !defined NOT_IN_libc || defined IS_IN_libpthread
-# define __libc_lock_lock(NAME) \
+# ifndef __libc_lock_lock
+# define __libc_lock_lock(NAME) \
({ lll_lock (NAME, LLL_PRIVATE); 0; })
+# endif
#else
+# undef __libc_lock_lock
# define __libc_lock_lock(NAME) \
__libc_maybe_call (__pthread_mutex_lock, (&(NAME)), 0)
#endif
@@ -189,9 +192,12 @@ typedef pthread_key_t __libc_key_t;
/* Try to lock the named lock variable. */
#if !defined NOT_IN_libc || defined IS_IN_libpthread
-# define __libc_lock_trylock(NAME) \
+# ifndef __libc_lock_trylock
+# define __libc_lock_trylock(NAME) \
lll_trylock (NAME)
+# endif
#else
+# undef __libc_lock_trylock
# define __libc_lock_trylock(NAME) \
__libc_maybe_call (__pthread_mutex_trylock, (&(NAME)), 0)
#endif
diff --git a/ports/sysdeps/unix/sysv/linux/mips/nptl/lowlevellock.h b/ports/sysdeps/unix/sysv/linux/mips/nptl/lowlevellock.h
index 88b601e..d368ae1 100644
--- a/ports/sysdeps/unix/sysv/linux/mips/nptl/lowlevellock.h
+++ b/ports/sysdeps/unix/sysv/linux/mips/nptl/lowlevellock.h
@@ -1,5 +1,4 @@
-/* Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008,
- 2009 Free Software Foundation, Inc.
+/* Copyright (C) 2003-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -291,4 +290,40 @@ extern int __lll_timedwait_tid (int *, const struct timespec *)
__res; \
})
+/* Implement __libc_lock_lock using exchange_and_add, which expands into
+ a single instruction on XLP processors. We enable this for all MIPS
+ processors as atomic_exchange_and_add_acq and
+ atomic_compare_and_exchange_acq take the same time to execute.
+ This is a simplified expansion of ({ lll_lock (NAME, LLL_PRIVATE); 0; }).
+
+ Note: __lll_lock_wait_private() resets lock value to '2', which prevents
+ unbounded increase of the lock value and [with billions of threads]
+ overflow. */
+#define __libc_lock_lock(NAME) \
+ ({ \
+ int *__futex = &(NAME); \
+ if (__builtin_expect (atomic_exchange_and_add_acq (__futex, 1), 0)) \
+ __lll_lock_wait_private (__futex); \
+ 0; \
+ })
+
+#ifdef _MIPS_ARCH_XLP
+/* The generic version using a single atomic_compare_and_exchange_acq takes
+ less time for non-XLP processors, so we use below for XLP only. */
+# define __libc_lock_trylock(NAME) \
+ ({ \
+ int *__futex = &(NAME); \
+ int __result = atomic_exchange_and_add_acq (__futex, 1); \
+ /* If __result == 0, we succeeded in acquiring the lock. \
+ If __result == 1, we switched the lock to 'contended' state, which \
+ will cause a [possibly unnecessary] call to lll_futex_wait. This is \
+ unlikely, so we accept the possible inefficiency. \
+ If __result >= 2, we need to set the lock to 'contended' state to avoid \
+ unbounded increase from subsequent trylocks. */ \
+ if (__result >= 2) \
+ __result = atomic_exchange_acq (__futex, 2); \
+ __result; \
+ })
+#endif
+
#endif /* lowlevellock.h */
--
1.7.4.1