This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
pthread_barrier_wait hangs with NPTL
- From: Sebastien Decugis <sebastien dot decugis at ext dot bull dot net>
- To: libc-alpha at sources dot redhat dot com
- Date: Wed, 18 Feb 2004 12:02:37 +0100
- Subject: pthread_barrier_wait hangs with NPTL
I have also a hang case with NPTL and not with Linuxthreads, with
barriers. It happens at least on i686 and ia64.
This happens with the following sequence:
-----------------
thread_A:
pthread_barrier_init(&b, NULL, 2)
pthread_create(pth, NULL, thread_B, NULL);
pthread_barrier_wait(&b);
pthread_barrier_destroy(&b);
pthread_barrier_init(&b, NULL, 3);
...
thread_B:
pthread_barrier_wait(&b); /* this function never returns with NPTL */
------------------
I have a complete example if someone wants to try it.
After some researchs, it appeared to me that the pthread_barrier_destroy
can return with success error code, while the other threads still need
to read data from the barrier (namely, the curr_event field).
This case happens because:
* thread_A inits the barrier, then enters pthread_create.
* thread_B starts running, and stops in FUTEX_WAIT inside
pthread_barrier_wait.
* thread_A resumes, in enters pthread_barrier_wait, calls FUTEX_WAKE,
but at this point on a uniprocessor, thread_B 'can' resume execution but
in fact thread_A goes on. It leaves pthread_barrier_wait and enters
pthread_barrier_destroy, which succeeds as the barrier seems to be not
used. The the thread_A can dispose of memory used by the barrier, as the
destroy call was successfull. Here we only reinitialize it.
* thread_B resumes, the futex was awaken so it tests for the CURR_EVENT
value, which is garbage, so it can loop and re-enter FUTEX_WAIT. The
thread hangs.
I attached to this email a proposal for a patch I wrote, but this is
incomplete since I only take care of i486 arch. Basically, it consists
of adding another field saying 'busy' in the pthread_barrier_t data.
NOTE: I previously posted this mail on the phil-list mailing list but
got no answer at all...
--
Sébastien DECUGIS
Bull S.A.
Tel: 04 76 29 74 93
diff -Nur libc/nptl/ChangeLog libcnew/nptl/ChangeLog
--- libc/nptl/ChangeLog 2004-02-13 18:03:54.000000000 +0100
+++ libcnew/nptl/ChangeLog 2004-02-13 18:00:10.000000000 +0100
@@ -1,3 +1,17 @@
+2004-02-13 Sebastien Decugis <sebastien.decugis@bull.net>
+
+ * pthread_barrier_init.c: Added busy initialization.
+
+ * pthread_barrier_destroy.c: loop while busy. busy is true
+ as long as there are waiters testing for curr_event value...
+
+ * sysdeps/unix/sysv/linux/i386/i486/pthread_barrier_wait.S:
+ Added busy increment and decrement.
+
+ * sysdeps/unix/sysv/linux/internaltypes.h: busy field declaration.
+
+ * sysdeps/unix/sysv/linux/lowlevelbarrier.sym: added BUSY.
+
2004-02-13 Ulrich Drepper <drepper@redhat.com>
* sysdeps/pthread/pthread_cond_timedwait.c
diff -Nur libc/nptl/pthread_barrier_destroy.c libcnew/nptl/pthread_barrier_destroy.c
--- libc/nptl/pthread_barrier_destroy.c 2004-02-13 18:03:54.000000000 +0100
+++ libcnew/nptl/pthread_barrier_destroy.c 2004-02-16 10:12:12.209430792 +0100
@@ -27,18 +27,34 @@
pthread_barrier_t *barrier;
{
struct pthread_barrier *ibarrier;
- int result = EBUSY;
+ int result = -1;
ibarrier = (struct pthread_barrier *) barrier;
+ while (result == -1)
+ {
lll_lock (ibarrier->lock);
if (__builtin_expect (ibarrier->left == ibarrier->init_count, 1))
+ {
/* The barrier is not used anymore. */
- result = 0;
+ if (ibarrier->busy)
+ {
+ lll_unlock (ibarrier->lock);
+ pthread_yield();
+ }
+ else
+ {
+ result = 0;
+ }
+ }
else
+ {
/* Still used, return with an error. */
lll_unlock (ibarrier->lock);
-
+ result = EBUSY;
+ }
+ }
return result;
}
+
diff -Nur libc/nptl/pthread_barrier_init.c libcnew/nptl/pthread_barrier_init.c
--- libc/nptl/pthread_barrier_init.c 2004-02-13 18:03:54.000000000 +0100
+++ libcnew/nptl/pthread_barrier_init.c 2004-02-13 17:54:51.000000000 +0100
@@ -52,6 +52,7 @@
ibarrier->left = count;
ibarrier->init_count = count;
ibarrier->curr_event = 0;
+ ibarrier->busy = 0;
return 0;
}
diff -Nur libc/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_barrier_wait.S libcnew/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_barrier_wait.S
--- libc/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_barrier_wait.S 2004-02-13 18:04:20.000000000 +0100
+++ libcnew/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_barrier_wait.S 2004-02-13 17:47:31.000000000 +0100
@@ -50,7 +50,12 @@
/* One less waiter. If this was the last one needed wake
everybody. */
-2: subl $1, LEFT(%ebx)
+#if BUSY == 0
+2: addl $1, (%ebx)
+#else
+2: addl $1, BUSY(%ebx)
+#endif
+ subl $1, LEFT(%ebx)
je 3f
/* There are more threads to come. */
@@ -84,6 +89,29 @@
#endif
je,pn 8b
+ /* acquire the lock before changing BUSY */
+ /* save %edx */
+ pushl %edx
+ /* get the mutex */
+ movl $1, %edx
+ xorl %eax, %eax
+ LOCK
+ cmpxchgl %edx, MUTEX(%ebx)
+ jnz 9f
+ /* now we have the lock, decrease busy */
+#if BUSY == 0
+10: subl $1, (%ebx)
+#else
+10: subl $1, BUSY(%ebx)
+#endif
+ /* and finally unlock */
+ LOCK
+ subl $1, MUTEX(%ebx)
+ jne 11f
+ /* then restore %edx */
+12: popl %edx
+ /* and go on exiting */
+
/* Note: %esi is still zero. */
movl %esi, %eax /* != PTHREAD_BARRIER_SERIAL_THREAD */
@@ -110,6 +138,12 @@
/* Release the mutex. We cannot release the lock before
waking the waiting threads since otherwise a new thread might
arrive and gets waken up, too. */
+#if BUSY == 0
+ subl $1, (%ebx)
+#else
+ subl $1, BUSY(%ebx)
+#endif
+
LOCK
subl $1, MUTEX(%ebx)
jne 4f
@@ -123,10 +157,18 @@
call __lll_mutex_lock_wait
jmp 2b
+9: leal MUTEX(%ebx), %ecx
+ call __lll_mutex_lock_wait
+ jmp 10b
+
4: leal MUTEX(%ebx), %eax
call __lll_mutex_unlock_wake
jmp 5b
+11: leal MUTEX(%ebx), %eax
+ call __lll_mutex_unlock_wake
+ jmp 12b
+
6: leal MUTEX(%ebx), %eax
call __lll_mutex_unlock_wake
jmp 7b
diff -Nur libc/nptl/sysdeps/unix/sysv/linux/internaltypes.h libcnew/nptl/sysdeps/unix/sysv/linux/internaltypes.h
--- libc/nptl/sysdeps/unix/sysv/linux/internaltypes.h 2004-02-13 18:06:03.000000000 +0100
+++ libcnew/nptl/sysdeps/unix/sysv/linux/internaltypes.h 2004-02-13 15:55:05.000000000 +0100
@@ -91,6 +91,7 @@
int lock;
unsigned int left;
unsigned int init_count;
+ unsigned int busy;
};
diff -Nur libc/nptl/sysdeps/unix/sysv/linux/lowlevelbarrier.sym libcnew/nptl/sysdeps/unix/sysv/linux/lowlevelbarrier.sym
--- libc/nptl/sysdeps/unix/sysv/linux/lowlevelbarrier.sym 2004-02-13 18:06:03.000000000 +0100
+++ libcnew/nptl/sysdeps/unix/sysv/linux/lowlevelbarrier.sym 2004-02-13 16:09:46.000000000 +0100
@@ -9,3 +9,4 @@
MUTEX offsetof (struct pthread_barrier, lock)
LEFT offsetof (struct pthread_barrier, left)
INIT_COUNT offsetof (struct pthread_barrier, init_count)
+BUSY offsetof (struct pthread_barrier, busy)