[Date Prev][Date Next] [Thread Prev][Thread Next]
[Thread Index]
[Date Index]
[Author Index]
NPTL: pthread_condtimedwait hang or mutex_lock hang
- From: "srikrish" <srikrish in ibm com>
- To: <phil-list redhat com>
- Subject: NPTL: pthread_condtimedwait hang or mutex_lock hang
- Date: Fri, 31 Oct 2003 19:27:56 +0530
__lll_mutex_lock_wait () from /lib/tls/libpthread.so.0 never returns.
Hardware Environment: IA32 bit machine, Architecture:i686
Software Environment:
RHEL 3.0 GOLD. Kernel:2.4.21-4.ELsmp, glibc-2.3.2-95.3, nptl
version: /lib/tls/libpthread-0.60.so
This problem happens on all 3 RHEL 3.0 (AS/WS/ES), but most frequently on
AS.
Steps to Reproduce:
1.Create 2 threads (waitThread and sleepThread) and cancel them, call
pthread_cond_timedwait() to be waken up
2. waitThread also use pthread condition but different condition variable
waitThread calls pthread_cond_wait() until it gets cancelled.
Once it's cancelled, waitThread_cleanup routine is invoked.
In cleanup routine, it calls pthread_Cond_broadcase to wake up main
thread.
3. sleepThread is sleeping until it gets cancelled.
Once it's cancelled, sleepThread_cleanup routine is invoked.
In cleanup routine, it also calls pthread_Cond_broadcase to wake up main
thread.
Actual Results: Two different symptoms
1) main thread hang because pthread_cond_timedwait is not waken up
This is gdb stack trace
(gdb) thread 2
[Switching to thread 2 (Thread -1220404304 (LWP 3538))]#0 0xb75ebc32 in
_dl_sys
info_int80 () from /lib/ld-linux.so.2
(gdb) where
#0 0xb75ebc32 in _dl_sysinfo_int80 () from /lib/ld-linux.so.2
#1 0xb75d067b in __lll_mutex_lock_wait () from /lib/tls/libpthread.so.0
#2 0x00000dd2 in ?? ()
#3 0x0804b6e0 in ?? ()
#4 0x0804b3fc in sleepcnt ()
#5 0xb75d23a8 in _L_mutex_cond_lock_28 () from /lib/tls/libpthread.so.0
#6 0x0804b6e0 in ?? ()
2) Or, all thread hang to get a lock.
Expected Results:
Main thread is waken up and ends normally.
It works on all other linux distro including RH9 more than 100 loop.
It also works on RHEL 3 but in old linuxThread mode (which set
LD_ASSUME_KERNEL)
Additional Information:
This is a test program (mtcond).
It's compiled with gcc 2.95.3.
There 3 command line options to run mtcond.
usage: mtcond -l<loopcnt> [-c] [-s sleepcnt]
loopcnt >=1 : Since sometimes problem doesn't happen in 1-st loop,
need
to run in a loop
-c : cause waitThread is waiting in pthread_cond_wait()
without -c option, waitThread is just sleeping, and no problem.
-s : you may ignore this option.
The typical usage of mtcond to regenerate this problem is :
> mtcond -l100 -c
/***************************************************************************
* FILENAME mtcond.c
***************************************************************************/
#include <pthread.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <signal.h>
#include <assert.h>
#define THREAD2_INIT 0
#define THREAD2_CREATED 1
#define THREAD2_CANCELED 2
#define THREAD2_ENDED 3
static int condmode = 0;
static int loopcnt = 10;
static int sleepcnt = 0;
static pthread_mutex_t thread2_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t thread_end_cv;
typedef struct thread_status {
int status;
pthread_t tid;
pthread_cond_t cond;
int flag;
int locked;
}thread_status_t;
void waitThread_cleanup(void *ptr)
{
int rc;
thread_status_t *pStatus = (thread_status_t *)ptr;
fprintf(stderr, " [WAIT_CLEANUP][%d]: Clean up is called\n",
pStatus->tid);
if(pStatus->locked)
{
fprintf(stderr, " [WAIT_CLEANUP][%d] releasing previous lock\n",
pStatus->tid);
pStatus->locked = 0;
rc = pthread_mutex_unlock(&thread2_mutex);
assert(rc==0);
fprintf(stderr, " [WAIT_CLEANUP][%d] released previous lock\n",
pStatus->tid);
}
fprintf(stderr, " [WAIT_CLEANUP][%d] waiting lock\n", pStatus->tid);
rc = pthread_mutex_lock(&thread2_mutex);
assert(rc==0);
pStatus->locked = 1;
fprintf(stderr, " [WAIT_CLEANUP][%d] got_lock\n", pStatus->tid);
pStatus->status = THREAD2_ENDED;
fprintf(stderr, " [WAIT_CLEANUP][%d] sending cond_broadcast\n",
pStatus->tid);
rc = pthread_cond_broadcast(&thread_end_cv);
assert(rc==0);
fprintf(stderr, " [WAIT_CLEANUP][%d] releasing lock\n",
pStatus->tid);
rc = pthread_mutex_unlock(&thread2_mutex);
assert(rc==0);
pStatus->locked = 0;
fprintf(stderr, " [WAIT_CLEANUP][%d] released lock\n", pStatus->tid);
fprintf(stderr, " [WAIT_CLEANUP][%d] Clean up is done\n",
pStatus->tid);
}
void sleepThread_cleanup(void *ptr)
{
int rc;
thread_status_t *pStatus = (thread_status_t *)ptr;
fprintf(stderr, " [SLEEP_CLEANUP][%d]: Clean up is called\n",
pStatus->tid);
if(!pStatus->locked)
{
fprintf(stderr, " [SLEEP_CLEANUP][%d] waiting lock\n",
pStatus->tid);
rc = pthread_mutex_lock(&thread2_mutex);
assert(rc==0);
pStatus->locked = 1;
fprintf(stderr, " [SLEEP_CLEANUP][%d] got_lock\n", pStatus->tid);
}
pStatus->status = THREAD2_ENDED;
fprintf(stderr, " [SLEEP_CLEANUP][%d] sending cond_broadcast\n",
pStatus->tid);
rc = pthread_cond_broadcast(&thread_end_cv);
assert(rc==0);
fprintf(stderr, " [SLEEP_CLEANUP][%d] releasing lock\n",
pStatus->tid);
rc = pthread_mutex_unlock(&thread2_mutex);
assert(rc==0);
pStatus->locked = 0;
fprintf(stderr, " [SLEEP_CLEANUP][%d] released lock\n",
pStatus->tid);
fprintf(stderr, " [SLEEP_CLEANUP][%d]: Clean up is done\n",
pStatus->tid);
}
void * sleepThread (void *status)
{
int i, rc;
thread_status_t *tstatus = (thread_status_t *)status;
tstatus->tid = pthread_self ();
fprintf (stderr, " [SLEEP][%d]: sleepThread startup \n", tstatus->tid);
tstatus->status = THREAD2_CREATED;
pthread_cleanup_push(sleepThread_cleanup, tstatus);
while(1)
{
sleep(1);
pthread_testcancel();
}
pthread_cleanup_pop(0);
return status;
}
void * waitThread (void *status)
{
int i, rc;
thread_status_t *tstatus = (thread_status_t *)status;
tstatus->tid = pthread_self ();
fprintf (stderr, " [WAIT][%d]: waitThread startup \n", tstatus->tid);
tstatus->status = THREAD2_CREATED;
pthread_cleanup_push(waitThread_cleanup, tstatus);
while(1)
{
if(condmode)
{
fprintf(stderr, " [WAIT][%d] waiting lock\n", tstatus->tid);
rc = pthread_mutex_lock(&thread2_mutex);
assert(rc==0);
tstatus->locked = 1;
fprintf(stderr, " [WAIT][%d] got_lock\n", tstatus->tid);
fprintf(stderr, " [WAIT][%d]:call cond_wait\n", tstatus->tid);
rc = pthread_cond_wait(&tstatus->cond, &thread2_mutex);
assert(rc==0);
fprintf(stderr, " [WAIT][%d]:cond wake up\n", tstatus->tid);
tstatus->locked = 0;
rc = pthread_mutex_unlock(&thread2_mutex);
assert(rc==0);
fprintf(stderr, " WAIT[%d] release_lock\n", tstatus->tid);
} else
{
sleep(1);
pthread_testcancel();
}
}
pthread_cleanup_pop(0);
return status;
}
#define WAIT_TIME_SECONDS 1
static void
loop ()
{
int i,j, rc;
pthread_t tid;
pthread_t wait_tid, sleep_tid;
void *str[2];
int old_state; /* Former thread cancellation state
*/
thread_status_t wait_status, sleep_status;
struct timespec ts;
struct timeval tp;
pthread_attr_t thread_attr;
tid = pthread_self ();
fprintf (stderr, "[%d]: loopThread startup \n", tid);
sleep(1);
rc = pthread_cond_init(&thread_end_cv, NULL);
assert(rc==0);
for (i = 0; i < loopcnt; i++)
{
fprintf(stderr, "###################################\n");
fprintf(stderr, "# %d-th loop start\n", i);
fprintf(stderr, "###################################\n");
rc = pthread_attr_init (&thread_attr);
assert(rc==0);
wait_status.status = THREAD2_INIT;
wait_status.flag = 1;
wait_status.locked = 0;
rc = pthread_cond_init( &wait_status.cond, NULL);
assert(rc==0);
rc = pthread_create (&wait_tid, &thread_attr, waitThread,
&wait_status);
if (rc)
{
fprintf (stderr, "[%d]: pthread_create fail (errno=%d)\n",
tid,errno);
break;
}
sleep(1);
sleep_status.status = THREAD2_INIT;
sleep_status.flag = 0;
sleep_status.locked = 0;
rc = pthread_create (&sleep_tid, &thread_attr, sleepThread,
&sleep_status);
if (rc)
{
fprintf (stderr, "[%d]: pthread_create fail (errno=%d)\n",
tid,errno);
break;
}
(void) pthread_attr_destroy(&thread_attr);
sleep (2);
if(condmode)
{
fprintf(stderr, "[LOOP][%d] calling cond_broadcast to
WAIT_THREAD\n", tid);
rc = pthread_cond_broadcast(&wait_status.cond);
assert(rc==0);
fprintf(stderr, "[LOOP][%d] called cond_broadcast to WAIT_THREAD\n",
tid);
if(sleepcnt)
sleep(sleepcnt);
}
while(sleep_status.status != THREAD2_CREATED) sleep(1);
fprintf(stderr, "[LOOP][%d] waiting lock before cancen
sleepThread\n",tid);
rc = pthread_mutex_lock(&thread2_mutex);
assert(rc==0);
fprintf(stderr, "[LOOP][%d] got_lock\n", tid);
fprintf (stderr, "[LOOP]canceling SLEEP_THREAD [%d] \n", sleep_tid);
rc = pthread_cancel (sleep_tid);
assert(rc == 0);
sleep_status.status = THREAD2_CANCELED;
fprintf(stderr, "[LOOP][%d] releasing lock after cancel
sleepThread\n", tid);
rc = pthread_mutex_unlock(&thread2_mutex);
assert(rc==0);
fprintf(stderr, "[LOOP][%d] released lock\n", tid);
fprintf(stderr, "[LOOP][%d] waiting lock before cancen waitThread\n",
tid);
rc = pthread_mutex_lock(&thread2_mutex);
assert(rc==0);
fprintf(stderr, "[LOOP][%d] got_lock\n", tid);
while(wait_status.status != THREAD2_CREATED) sleep(1);
fprintf (stderr, "[LOOP]canceling WAIT_THREAD [%d] \n", wait_tid);
rc = pthread_cancel (wait_tid);
assert(rc == 0);
wait_status.status = THREAD2_CANCELED;
fprintf(stderr, "[LOOP][%d] releasing lock after cancel waitThread\n",
tid);
rc = pthread_mutex_unlock(&thread2_mutex);
assert(rc==0);
fprintf(stderr, "[LOOP][%d] released lock\n", tid);
fprintf(stderr, "[LOOP][%d] waiting lock before go into timedwait
loop\n", tid);
rc = pthread_mutex_lock(&thread2_mutex);
assert(rc==0);
fprintf(stderr, "[LOOP][%d] got_lock\n", tid);
while(wait_status.status == THREAD2_CANCELED || sleep_status.status ==
THREAD2_CANCELED)
{
/* Usually worker threads will loop on these operations */
rc = gettimeofday(&tp, NULL);
/* Convert from timeval to timespec */
ts.tv_sec = tp.tv_sec;
ts.tv_nsec = tp.tv_usec * 1000;
ts.tv_sec += WAIT_TIME_SECONDS;
fprintf(stderr, "[LOOP]waiting in cond_timewait\n");
rc = pthread_cond_timedwait(&thread_end_cv, &thread2_mutex,
&ts);
fprintf(stderr, "[LOOP]cond_timewait return (rc=%d)\n", rc);
}
fprintf(stderr, "[LOOP][%d] releasing lock\n", tid);
rc = pthread_mutex_unlock(&thread2_mutex);
assert(rc==0);
fprintf(stderr, "[LOOP][%d] released lock\n", tid);
sleep (1);
rc = pthread_join (sleep_tid, &str[1]);
if (rc)
{
fprintf (stderr, "[%d]: pthread_join fail (errno=%d)\n", tid,
errno);
break;
}
fprintf(stderr, "SLEEP_THREAD is joined\n");
rc = pthread_join (wait_tid, &str[0]);
if (rc)
{
fprintf (stderr, "[%d]: pthread_join fail (errno=%d)\n", tid,
errno);
break;
}
fprintf(stderr, "WAIT_THREAD is joined\n");
rc = pthread_cond_destroy( &wait_status.cond);
assert(rc == 0);
sleep(1);
}
rc = pthread_cond_destroy( &thread_end_cv);
return;
}
main (int argc, char *argv[])
{
int c;
while ((c = getopt (argc, argv, "l:cs:")) != EOF)
{
switch (c)
{
case 's':
sleepcnt = atoi(optarg);
break;
case 'c':
condmode = 1;
break;
case 'l':
loopcnt = atoi (optarg);
loopcnt = atoi (optarg);
break;
}
}
loop();
exit (0);
}
[Date Prev][Date Next] [Thread Prev][Thread Next]
[Thread Index]
[Date Index]
[Author Index]