[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

NPTL: pthread_condtimedwait hang or mutex_lock hang



__lll_mutex_lock_wait () from /lib/tls/libpthread.so.0 never returns.
Hardware Environment: IA32 bit machine, Architecture:i686
Software Environment:
RHEL 3.0 GOLD. Kernel:2.4.21-4.ELsmp, glibc-2.3.2-95.3, nptl
version: /lib/tls/libpthread-0.60.so
This problem happens on all 3 RHEL 3.0 (AS/WS/ES), but most frequently on
AS.
Steps to Reproduce:
1.Create 2 threads (waitThread and sleepThread) and cancel them, call
pthread_cond_timedwait() to be waken up
2. waitThread also use pthread condition but different condition variable
   waitThread calls pthread_cond_wait() until it gets cancelled.
   Once it's cancelled, waitThread_cleanup routine is invoked.
   In cleanup routine, it calls pthread_Cond_broadcase to wake up main
thread.
3. sleepThread is sleeping until it gets cancelled.
   Once it's cancelled, sleepThread_cleanup routine is invoked.
   In cleanup routine, it also calls pthread_Cond_broadcase to wake up main
thread.

Actual Results: Two different symptoms
1) main thread hang because pthread_cond_timedwait is not waken up
 This is gdb stack trace
(gdb) thread 2
[Switching to thread 2 (Thread -1220404304 (LWP 3538))]#0  0xb75ebc32 in
_dl_sys
info_int80 () from /lib/ld-linux.so.2
(gdb) where
#0  0xb75ebc32 in _dl_sysinfo_int80 () from /lib/ld-linux.so.2
#1  0xb75d067b in __lll_mutex_lock_wait () from /lib/tls/libpthread.so.0
#2  0x00000dd2 in ?? ()
#3  0x0804b6e0 in ?? ()
#4  0x0804b3fc in sleepcnt ()
#5  0xb75d23a8 in _L_mutex_cond_lock_28 () from /lib/tls/libpthread.so.0
#6  0x0804b6e0 in ?? ()

2) Or, all thread hang to get a lock.

Expected Results:
Main thread is waken up and ends normally.
It works on all other linux distro including RH9 more than 100 loop.
It also works on RHEL 3 but in old linuxThread mode (which set
LD_ASSUME_KERNEL)

Additional Information:
This is a test program (mtcond).
It's compiled with gcc 2.95.3.
There 3 command line options to run mtcond.
usage: mtcond -l<loopcnt> [-c] [-s sleepcnt]
       loopcnt >=1 : Since sometimes problem doesn't happen in 1-st loop,
need
to run in a loop
       -c : cause waitThread is waiting in pthread_cond_wait()
            without -c option, waitThread is just sleeping, and no problem.
       -s : you may ignore this option.

The typical usage of mtcond to regenerate this problem is :
> mtcond -l100 -c

/***************************************************************************
 * FILENAME   mtcond.c

***************************************************************************/
#include <pthread.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <signal.h>
#include <assert.h>


#define THREAD2_INIT     0
#define THREAD2_CREATED  1
#define THREAD2_CANCELED 2
#define THREAD2_ENDED    3

static int condmode = 0;
static int loopcnt = 10;
static int sleepcnt = 0;
static pthread_mutex_t thread2_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t thread_end_cv;
typedef struct thread_status {
    int status;
    pthread_t tid;
    pthread_cond_t cond;
    int flag;
    int locked;
}thread_status_t;

void waitThread_cleanup(void *ptr)
{
    int rc;
    thread_status_t *pStatus = (thread_status_t *)ptr;
    fprintf(stderr, "   [WAIT_CLEANUP][%d]: Clean up is called\n",
pStatus->tid);
    if(pStatus->locked)
    {
        fprintf(stderr, "    [WAIT_CLEANUP][%d] releasing previous lock\n",
pStatus->tid);
        pStatus->locked = 0;
        rc = pthread_mutex_unlock(&thread2_mutex);
        assert(rc==0);
        fprintf(stderr, "    [WAIT_CLEANUP][%d] released previous lock\n",
pStatus->tid);
    }
    fprintf(stderr, "    [WAIT_CLEANUP][%d] waiting lock\n", pStatus->tid);
    rc = pthread_mutex_lock(&thread2_mutex);
    assert(rc==0);
    pStatus->locked = 1;
    fprintf(stderr, "    [WAIT_CLEANUP][%d] got_lock\n", pStatus->tid);
    pStatus->status = THREAD2_ENDED;
    fprintf(stderr, "    [WAIT_CLEANUP][%d] sending cond_broadcast\n",
pStatus->tid);
    rc = pthread_cond_broadcast(&thread_end_cv);
    assert(rc==0);
    fprintf(stderr, "    [WAIT_CLEANUP][%d] releasing lock\n",
pStatus->tid);
    rc = pthread_mutex_unlock(&thread2_mutex);
    assert(rc==0);
    pStatus->locked = 0;
    fprintf(stderr, "    [WAIT_CLEANUP][%d] released lock\n", pStatus->tid);
    fprintf(stderr, "    [WAIT_CLEANUP][%d] Clean up is done\n",
pStatus->tid);
}
void sleepThread_cleanup(void *ptr)
{
    int rc;
    thread_status_t *pStatus = (thread_status_t *)ptr;
    fprintf(stderr, "    [SLEEP_CLEANUP][%d]: Clean up is called\n",
pStatus->tid);
    if(!pStatus->locked)
    {
        fprintf(stderr, "    [SLEEP_CLEANUP][%d] waiting lock\n",
pStatus->tid);
        rc = pthread_mutex_lock(&thread2_mutex);
        assert(rc==0);
        pStatus->locked = 1;
        fprintf(stderr, "    [SLEEP_CLEANUP][%d] got_lock\n", pStatus->tid);
    }
    pStatus->status = THREAD2_ENDED;
    fprintf(stderr, "    [SLEEP_CLEANUP][%d] sending cond_broadcast\n",
pStatus->tid);
    rc = pthread_cond_broadcast(&thread_end_cv);
    assert(rc==0);
    fprintf(stderr, "    [SLEEP_CLEANUP][%d] releasing lock\n",
pStatus->tid);
    rc = pthread_mutex_unlock(&thread2_mutex);
    assert(rc==0);
    pStatus->locked = 0;
    fprintf(stderr, "    [SLEEP_CLEANUP][%d] released lock\n",
pStatus->tid);
    fprintf(stderr, "    [SLEEP_CLEANUP][%d]: Clean up is done\n",
pStatus->tid);
}

void * sleepThread (void *status)
{
  int i, rc;
  thread_status_t *tstatus = (thread_status_t *)status;

  tstatus->tid = pthread_self ();
  fprintf (stderr, "    [SLEEP][%d]: sleepThread startup \n", tstatus->tid);
  tstatus->status = THREAD2_CREATED;
  pthread_cleanup_push(sleepThread_cleanup, tstatus);
  while(1)
  {
      sleep(1);
      pthread_testcancel();
  }
  pthread_cleanup_pop(0);
  return status;
}
void * waitThread (void *status)
{
  int i, rc;
  thread_status_t *tstatus = (thread_status_t *)status;

  tstatus->tid = pthread_self ();
  fprintf (stderr, "    [WAIT][%d]: waitThread startup \n", tstatus->tid);
  tstatus->status = THREAD2_CREATED;
  pthread_cleanup_push(waitThread_cleanup, tstatus);

  while(1)
  {
    if(condmode)
    {
        fprintf(stderr, "    [WAIT][%d] waiting lock\n", tstatus->tid);
        rc = pthread_mutex_lock(&thread2_mutex);
        assert(rc==0);
        tstatus->locked = 1;
        fprintf(stderr, "    [WAIT][%d] got_lock\n", tstatus->tid);
        fprintf(stderr, "    [WAIT][%d]:call cond_wait\n", tstatus->tid);
        rc = pthread_cond_wait(&tstatus->cond, &thread2_mutex);
        assert(rc==0);
        fprintf(stderr, "    [WAIT][%d]:cond wake up\n", tstatus->tid);
        tstatus->locked = 0;
        rc = pthread_mutex_unlock(&thread2_mutex);
        assert(rc==0);
        fprintf(stderr, "    WAIT[%d] release_lock\n", tstatus->tid);
    } else
    {
        sleep(1);
        pthread_testcancel();
    }
  }

  pthread_cleanup_pop(0);
  return status;
}
#define WAIT_TIME_SECONDS 1

static void
loop ()
{
  int i,j, rc;
  pthread_t tid;
  pthread_t wait_tid, sleep_tid;
  void *str[2];
  int             old_state;      /* Former thread cancellation state
*/
  thread_status_t wait_status, sleep_status;
    struct timespec   ts;
    struct timeval    tp;

  pthread_attr_t thread_attr;

  tid = pthread_self ();
  fprintf (stderr, "[%d]: loopThread startup \n", tid);
  sleep(1);
  rc = pthread_cond_init(&thread_end_cv, NULL);
    assert(rc==0);
  for (i = 0; i < loopcnt; i++)
  {
      fprintf(stderr, "###################################\n");
      fprintf(stderr, "#  %d-th loop start\n", i);
      fprintf(stderr, "###################################\n");
      rc = pthread_attr_init (&thread_attr);
        assert(rc==0);

      wait_status.status = THREAD2_INIT;
      wait_status.flag = 1;
      wait_status.locked = 0;
      rc = pthread_cond_init( &wait_status.cond, NULL);
        assert(rc==0);
      rc = pthread_create (&wait_tid, &thread_attr, waitThread,
&wait_status);
      if (rc)
      {
        fprintf (stderr, "[%d]: pthread_create fail (errno=%d)\n",
tid,errno);
        break;
      }
      sleep(1);
      sleep_status.status = THREAD2_INIT;
      sleep_status.flag = 0;
      sleep_status.locked = 0;
      rc = pthread_create (&sleep_tid, &thread_attr, sleepThread,
&sleep_status);
      if (rc)
      {
        fprintf (stderr, "[%d]: pthread_create fail (errno=%d)\n",
tid,errno);
        break;
      }
      (void) pthread_attr_destroy(&thread_attr);
      sleep (2);
      if(condmode)
      {
        fprintf(stderr, "[LOOP][%d] calling cond_broadcast to
WAIT_THREAD\n", tid);
        rc = pthread_cond_broadcast(&wait_status.cond);
        assert(rc==0);
        fprintf(stderr, "[LOOP][%d] called cond_broadcast to WAIT_THREAD\n",
tid);
        if(sleepcnt)
            sleep(sleepcnt);
      }
      while(sleep_status.status != THREAD2_CREATED) sleep(1);
      fprintf(stderr, "[LOOP][%d] waiting lock before cancen
sleepThread\n",tid);
      rc = pthread_mutex_lock(&thread2_mutex);
        assert(rc==0);
      fprintf(stderr, "[LOOP][%d] got_lock\n", tid);
      fprintf (stderr, "[LOOP]canceling SLEEP_THREAD [%d] \n", sleep_tid);
      rc = pthread_cancel (sleep_tid);
      assert(rc == 0);
      sleep_status.status = THREAD2_CANCELED;
      fprintf(stderr, "[LOOP][%d] releasing lock after cancel
sleepThread\n", tid);
      rc = pthread_mutex_unlock(&thread2_mutex);
      assert(rc==0);
      fprintf(stderr, "[LOOP][%d] released lock\n", tid);
      fprintf(stderr, "[LOOP][%d] waiting lock before cancen waitThread\n",
tid);
      rc = pthread_mutex_lock(&thread2_mutex);
        assert(rc==0);
      fprintf(stderr, "[LOOP][%d] got_lock\n", tid);
      while(wait_status.status != THREAD2_CREATED) sleep(1);
      fprintf (stderr, "[LOOP]canceling WAIT_THREAD [%d] \n", wait_tid);
      rc = pthread_cancel (wait_tid);
      assert(rc == 0);
      wait_status.status = THREAD2_CANCELED;
      fprintf(stderr, "[LOOP][%d] releasing lock after cancel waitThread\n",
tid);
      rc = pthread_mutex_unlock(&thread2_mutex);
      assert(rc==0);
      fprintf(stderr, "[LOOP][%d] released lock\n", tid);
      fprintf(stderr, "[LOOP][%d] waiting lock before go into timedwait
loop\n", tid);
      rc = pthread_mutex_lock(&thread2_mutex);
        assert(rc==0);
      fprintf(stderr, "[LOOP][%d] got_lock\n", tid);

      while(wait_status.status == THREAD2_CANCELED || sleep_status.status ==
THREAD2_CANCELED)
      {
        /* Usually worker threads will loop on these operations */
            rc =  gettimeofday(&tp, NULL);
            /* Convert from timeval to timespec */
            ts.tv_sec  = tp.tv_sec;
            ts.tv_nsec = tp.tv_usec * 1000;
            ts.tv_sec += WAIT_TIME_SECONDS;
            fprintf(stderr, "[LOOP]waiting in cond_timewait\n");
            rc = pthread_cond_timedwait(&thread_end_cv, &thread2_mutex,
&ts);
            fprintf(stderr, "[LOOP]cond_timewait return (rc=%d)\n", rc);
      }

      fprintf(stderr, "[LOOP][%d] releasing lock\n", tid);
      rc = pthread_mutex_unlock(&thread2_mutex);
      assert(rc==0);
      fprintf(stderr, "[LOOP][%d] released lock\n", tid);
      sleep (1);
      rc = pthread_join (sleep_tid, &str[1]);
      if (rc)
      {
        fprintf (stderr, "[%d]: pthread_join fail (errno=%d)\n", tid,
errno);
        break;
      }
      fprintf(stderr, "SLEEP_THREAD is joined\n");
      rc = pthread_join (wait_tid, &str[0]);
      if (rc)
      {
        fprintf (stderr, "[%d]: pthread_join fail (errno=%d)\n", tid,
errno);
        break;
      }
      fprintf(stderr, "WAIT_THREAD is joined\n");
      rc = pthread_cond_destroy( &wait_status.cond);
      assert(rc == 0);
      sleep(1);
  }
  rc = pthread_cond_destroy( &thread_end_cv);
  return;
}

main (int argc, char *argv[])
{
  int c;


  while ((c = getopt (argc, argv, "l:cs:")) != EOF)
    {
      switch (c)
    {
    case 's':
      sleepcnt = atoi(optarg);
      break;
    case 'c':
      condmode = 1;
      break;
    case 'l':
      loopcnt = atoi (optarg);

      loopcnt = atoi (optarg);
      break;
    }
  }
  loop();
  exit (0);
}




[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]