[dm-devel] [PATCH] Race-condition fix with free_waiter threads during shutdown.
Konrad Rzeszutek
konrad at virtualiron.com
Wed Mar 18 23:38:09 UTC 2009
From: Konrad Rzeszutek <konrad at mars.virtualiron.com>
When we shutdown, the main process locks the mutex, causing
all of the free_waiter functions to pile up on their lock.
Once we unlock in the main process, all of the free_waiters
start working. However the next instruction in the main proces
is to destroy the mutex. The short window is all the free_waiter
threads have to do their cleanup before they attempt to unlock
the mutex - which might have been de-allocated (and set to NULL).
End result can be a seg-fault.
This fix adds a ref-count to the mutex so that during shutdown
we spin and wait until all of the free_waiter functions
have completed and the ref-count is set to zero.
---
libmultipath/lock.c | 4 ++--
libmultipath/lock.h | 23 ++++++++++++++++-------
libmultipath/structs_vec.h | 8 +++++++-
libmultipath/waiter.c | 14 ++++++++++----
multipath/main.c | 1 +
multipathd/main.c | 28 +++++++++++++++++-----------
6 files changed, 53 insertions(+), 25 deletions(-)
diff --git a/libmultipath/lock.c b/libmultipath/lock.c
index 0ca8783..54e2988 100644
--- a/libmultipath/lock.c
+++ b/libmultipath/lock.c
@@ -1,8 +1,8 @@
#include <pthread.h>
#include "lock.h"
-
+#include <stdio.h>
void cleanup_lock (void * data)
{
- unlock((pthread_mutex_t *)data);
+ unlock ((*(struct mutex_lock *)data));
}
diff --git a/libmultipath/lock.h b/libmultipath/lock.h
index 6afecda..f0e0bfa 100644
--- a/libmultipath/lock.h
+++ b/libmultipath/lock.h
@@ -1,19 +1,28 @@
#ifndef _LOCK_H
#define _LOCK_H
+/*
+ * Wrapper for the mutex. Includes a ref-count to keep
+ * track of how many there are out-standing threads blocking
+ * on a mutex. */
+struct mutex_lock {
+ pthread_mutex_t *mutex;
+ int depth;
+};
+
#ifdef LCKDBG
#define lock(a) \
- fprintf(stderr, "%s:%s(%i) lock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
- pthread_mutex_lock(a)
+ fprintf(stderr, "%s:%s(%i) lock %p depth: %d (%ld)\n", __FILE__, __FUNCTION__, __LINE__, a.mutex, a.depth, pthread_self()); \
+ a.depth++; pthread_mutex_lock(a.mutex)
#define unlock(a) \
- fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
- pthread_mutex_unlock(a)
+ fprintf(stderr, "%s:%s(%i) unlock %p depth: %d (%ld)\n", __FILE__, __FUNCTION__, __LINE__, a.mutex, a.depth, pthread_self()); \
+ a.depth--; pthread_mutex_unlock(a.mutex)
#define lock_cleanup_pop(a) \
- fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
+ fprintf(stderr, "%s:%s(%i) unlock %p depth: %d (%ld)\n", __FILE__, __FUNCTION__, __LINE__, a.mutex, a.depth, pthread_self()); \
pthread_cleanup_pop(1);
#else
-#define lock(a) pthread_mutex_lock(a)
-#define unlock(a) pthread_mutex_unlock(a)
+#define lock(a) a.depth++; pthread_mutex_lock(a.mutex)
+#define unlock(a) a.depth--; pthread_mutex_unlock(a.mutex)
#define lock_cleanup_pop(a) pthread_cleanup_pop(1);
#endif
diff --git a/libmultipath/structs_vec.h b/libmultipath/structs_vec.h
index 19a2387..78e468a 100644
--- a/libmultipath/structs_vec.h
+++ b/libmultipath/structs_vec.h
@@ -1,8 +1,14 @@
#ifndef _STRUCTS_VEC_H
#define _STRUCTS_VEC_H
+#include "lock.h"
+/*
+struct mutex_lock {
+ pthread_mutex_t *mutex;
+ int depth;
+}; */
struct vectors {
- pthread_mutex_t *lock;
+ struct mutex_lock lock; /* defined in lock.h */
vector pathvec;
vector mpvec;
};
diff --git a/libmultipath/waiter.c b/libmultipath/waiter.c
index 54cd19f..e9cde8b 100644
--- a/libmultipath/waiter.c
+++ b/libmultipath/waiter.c
@@ -45,7 +45,10 @@ void free_waiter (void *data)
*/
wp->mpp->waiter = NULL;
else
- condlog(3, "free_waiter, mpp freed before wp=%p,", wp);
+ /*
+ * This is OK condition during shutdown.
+ */
+ condlog(3, "free_waiter, mpp freed before wp=%p (%s).", wp, wp->mapname);
unlock(wp->vecs->lock);
@@ -58,13 +61,16 @@ void free_waiter (void *data)
void stop_waiter_thread (struct multipath *mpp, struct vectors *vecs)
{
struct event_thread *wp = (struct event_thread *)mpp->waiter;
+ pthread_t thread;
if (!wp) {
condlog(3, "%s: no waiter thread", mpp->alias);
return;
}
- condlog(2, "%s: stop event checker thread", wp->mapname);
- pthread_kill((pthread_t)wp->thread, SIGUSR1);
+ thread = wp->thread;
+ condlog(2, "%s: stop event checker thread (%lu)", wp->mapname, thread);
+
+ pthread_kill(thread, SIGUSR1);
}
static sigset_t unblock_signals(void)
@@ -148,7 +154,7 @@ int waiteventloop (struct event_thread *waiter)
* 4) a path reinstate : nothing to do
* 5) a switch group : nothing to do
*/
- pthread_cleanup_push(cleanup_lock, waiter->vecs->lock);
+ pthread_cleanup_push(cleanup_lock, &waiter->vecs->lock);
lock(waiter->vecs->lock);
r = update_multipath(waiter->vecs, waiter->mapname);
lock_cleanup_pop(waiter->vecs->lock);
diff --git a/multipath/main.c b/multipath/main.c
index ade858d..dacae1f 100644
--- a/multipath/main.c
+++ b/multipath/main.c
@@ -440,6 +440,7 @@ main (int argc, char *argv[])
condlog(3, "restart multipath configuration process");
out:
+
sysfs_cleanup();
dm_lib_release();
dm_lib_exit();
diff --git a/multipathd/main.c b/multipathd/main.c
index 323ed7f..b7532f1 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -582,7 +582,7 @@ uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
*len = 0;
vecs = (struct vectors *)trigger_data;
- pthread_cleanup_push(cleanup_lock, vecs->lock);
+ pthread_cleanup_push(cleanup_lock, &vecs->lock);
lock(vecs->lock);
r = parse_cmd(str, reply, len, vecs);
@@ -740,9 +740,9 @@ exit_daemon (int status)
condlog(3, "unlink pidfile");
unlink(DEFAULT_PIDFILE);
- lock(&exit_mutex);
+ pthread_mutex_lock(&exit_mutex);
pthread_cond_signal(&exit_cond);
- unlock(&exit_mutex);
+ pthread_mutex_unlock(&exit_mutex);
return status;
}
@@ -1020,7 +1020,7 @@ checkerloop (void *ap)
}
while (1) {
- pthread_cleanup_push(cleanup_lock, vecs->lock);
+ pthread_cleanup_push(cleanup_lock, &vecs->lock);
lock(vecs->lock);
condlog(4, "tick");
@@ -1163,13 +1163,14 @@ init_vecs (void)
if (!vecs)
return NULL;
- vecs->lock =
+ vecs->lock.mutex =
(pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
- if (!vecs->lock)
+ if (!vecs->lock.mutex)
goto out;
- pthread_mutex_init(vecs->lock, NULL);
+ pthread_mutex_init(vecs->lock.mutex, NULL);
+ vecs->lock.depth = 0;
return vecs;
@@ -1341,7 +1342,6 @@ child (void * param)
condlog(0, "failure during configuration");
exit(1);
}
-
/*
* start threads
*/
@@ -1375,9 +1375,15 @@ child (void * param)
free_polls();
unlock(vecs->lock);
- pthread_mutex_destroy(vecs->lock);
- FREE(vecs->lock);
- vecs->lock = NULL;
+ /* Now all the waitevent threads will start rushing in. */
+ while (vecs->lock.depth > 0) {
+ sleep (1); /* This is weak. */
+ condlog(3,"Have %d wait event checkers threads to de-alloc, waiting..\n", vecs->lock.depth);
+ }
+ pthread_mutex_destroy(vecs->lock.mutex);
+ FREE(vecs->lock.mutex);
+ vecs->lock.depth = 0;
+ vecs->lock.mutex = NULL;
FREE(vecs);
vecs = NULL;
--
1.5.4.1
More information about the dm-devel
mailing list