[dm-devel] [RFC PATCH] libmultipath: prevent DSO unloading with astray checker threads

mwilck at suse.com mwilck at suse.com
Thu Nov 5 11:49:52 UTC 2020


From: Martin Wilck <mwilck at suse.com>

The multipathd tur checker thread is designed to be able to finish at
any time, even after the tur checker itself has been freed. The
multipathd shutdown code makes sure all the checkers have been freed
before freeing the checker_class and calling dlclose() to unload the
DSO, but this doesn't guarantee that the checker threads have finished.
If one hasn't, the DSO will get unloaded while the thread still running
code from it, causing a segfault.

This patch fixes the issue by further incrementing the DSO's refcount
for every running thread. To avoid race conditions leading to segfaults,
the thread's entrypoint must be in libmultipath, not in the DSO itself.
Therefore we add a new optional checker method, libcheck_thread().
Checkers defining this method may create a detached thread with
entrypoint checker_thread_entry(), which will call the DSO's
libcheck_thread and take care of the refcount handling.

Reported-by: Benjamin Marzinski <bmarzins at redhat.com>
Signed-off-by: Martin Wilck <mwilck at suse.com>
---
 libmultipath/checkers.c     | 55 ++++++++++++++++++++++++++++++++-----
 libmultipath/checkers.h     | 17 ++++++++++++
 libmultipath/checkers/tur.c |  7 ++---
 3 files changed, 67 insertions(+), 12 deletions(-)

diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
index f7ddd53..7d10f2a 100644
--- a/libmultipath/checkers.c
+++ b/libmultipath/checkers.c
@@ -3,6 +3,9 @@
 #include <stddef.h>
 #include <dlfcn.h>
 #include <sys/stat.h>
+#include <pthread.h>
+#include <urcu.h>
+#include <urcu/uatomic.h>
 
 #include "debug.h"
 #include "checkers.h"
@@ -19,6 +22,7 @@ struct checker_class {
 	int (*mp_init)(struct checker *);    /* to allocate the mpcontext */
 	void (*free)(struct checker *);      /* to free the context */
 	void (*reset)(void);		     /* to reset the global variables */
+	void *(*thread)(void *);	     /* async thread entry point */
 	const char **msgtable;
 	short msgtable_size;
 };
@@ -54,19 +58,32 @@ static struct checker_class *alloc_checker_class(void)
 	c = MALLOC(sizeof(struct checker_class));
 	if (c) {
 		INIT_LIST_HEAD(&c->node);
-		c->refcount = 1;
+		uatomic_set(&c->refcount, 1);
 	}
 	return c;
 }
 
+/* Use uatomic_{sub,add}_return() to ensure proper memory barriers */
+static int checker_class_ref(struct checker_class *cls)
+{
+	return uatomic_add_return(&cls->refcount, 1);
+}
+
+static int checker_class_unref(struct checker_class *cls)
+{
+	return uatomic_sub_return(&cls->refcount, 1);
+}
+
 void free_checker_class(struct checker_class *c)
 {
+	int cnt;
+
 	if (!c)
 		return;
-	c->refcount--;
-	if (c->refcount) {
-		condlog(4, "%s checker refcount %d",
-			c->name, c->refcount);
+	cnt = checker_class_unref(c);
+	if (cnt != 0) {
+		condlog(cnt < 0 ? 1 : 4, "%s checker refcount %d",
+			c->name, cnt);
 		return;
 	}
 	condlog(3, "unloading %s checker", c->name);
@@ -160,7 +177,8 @@ static struct checker_class *add_checker_class(const char *multipath_dir,
 
 	c->mp_init = (int (*)(struct checker *)) dlsym(c->handle, "libcheck_mp_init");
 	c->reset = (void (*)(void)) dlsym(c->handle, "libcheck_reset");
-	/* These 2 functions can be NULL. call dlerror() to clear out any
+	c->thread = (void *(*)(void*)) dlsym(c->handle, "libcheck_thread");
+	/* These 3 functions can be NULL. call dlerror() to clear out any
 	 * error string */
 	dlerror();
 
@@ -346,6 +364,29 @@ bad_id:
 	return generic_msg[CHECKER_MSGID_NONE];
 }
 
+static void checker_cleanup_thread(void *arg)
+{
+	struct checker_class *cls = arg;
+
+	(void)checker_class_unref(cls);
+	rcu_unregister_thread();
+}
+
+void *checker_thread_entry(void *arg)
+{
+	struct checker *chk = arg;
+	void *rv;
+
+	assert(chk && chk->cls && chk->cls->thread);
+
+	rcu_register_thread();
+	(void)checker_class_ref(chk->cls);
+	pthread_cleanup_push(checker_cleanup_thread, chk->cls);
+	rv = chk->cls->thread(chk->context);
+	pthread_cleanup_pop(1);
+	return rv;
+}
+
 void checker_clear_message (struct checker *c)
 {
 	if (!c)
@@ -370,7 +411,7 @@ void checker_get(const char *multipath_dir, struct checker *dst,
 	if (!src)
 		return;
 
-	src->refcount++;
+	(void)checker_class_ref(dst->cls);
 }
 
 int init_checkers(const char *multipath_dir)
diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h
index 9d5f90b..01af02d 100644
--- a/libmultipath/checkers.h
+++ b/libmultipath/checkers.h
@@ -148,6 +148,23 @@ void checker_set_async (struct checker *);
 void checker_set_fd (struct checker *, int);
 void checker_enable (struct checker *);
 void checker_disable (struct checker *);
+/*
+ * checker_thread_entry(): entry point for async path checker thread
+ *
+ * Path checkers that do I/O may hang forever. To avoid blocking, some
+ * checkers therefore use asyncronous, detached threads for checking
+ * the paths. These threads may continue hanging if multipathd is stopped.
+ * In this case, we can't unload the checker DSO at exit. In order to
+ * avoid race conditions and crashes, the entry point of the thread
+ * needs to be in libmultipath, not in the DSO itself.
+ * Checker threads must use this function as entry point. It will call
+ * the DSO's "libcheck_thread" function with the checker context as
+ * argument. When libcheck_thread() returns, it will clean up and
+ * decrement the DSO's refcount. See the tur checker for a usage example.
+ *
+ * @param arg: pointer to struct checker, must have non-NULL cls->thread
+ */
+void *checker_thread_entry (void *);
 int checker_check (struct checker *, int);
 int checker_is_sync(const struct checker *);
 const char *checker_name (const struct checker *);
diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
index e886fcf..063c419 100644
--- a/libmultipath/checkers/tur.c
+++ b/libmultipath/checkers/tur.c
@@ -15,7 +15,6 @@
 #include <errno.h>
 #include <sys/time.h>
 #include <pthread.h>
-#include <urcu.h>
 #include <urcu/uatomic.h>
 
 #include "checkers.h"
@@ -204,7 +203,6 @@ static void cleanup_func(void *data)
 	holders = uatomic_sub_return(&ct->holders, 1);
 	if (!holders)
 		cleanup_context(ct);
-	rcu_unregister_thread();
 }
 
 /*
@@ -251,7 +249,7 @@ static void tur_deep_sleep(const struct tur_checker_context *ct)
 #define tur_deep_sleep(x) do {} while (0)
 #endif /* TUR_TEST_MAJOR */
 
-static void *tur_thread(void *ctx)
+void *libcheck_thread(void *ctx)
 {
 	struct tur_checker_context *ct = ctx;
 	int state, running;
@@ -259,7 +257,6 @@ static void *tur_thread(void *ctx)
 
 	/* This thread can be canceled, so setup clean up */
 	tur_thread_cleanup_push(ct);
-	rcu_register_thread();
 
 	condlog(4, "%d:%d : tur checker starting up", major(ct->devt),
 		minor(ct->devt));
@@ -394,7 +391,7 @@ int libcheck_check(struct checker * c)
 		uatomic_set(&ct->running, 1);
 		tur_set_async_timeout(c);
 		setup_thread_attr(&attr, 32 * 1024, 1);
-		r = pthread_create(&ct->thread, &attr, tur_thread, ct);
+		r = pthread_create(&ct->thread, &attr, checker_thread_entry, c);
 		pthread_attr_destroy(&attr);
 		if (r) {
 			uatomic_sub(&ct->holders, 1);
-- 
2.29.0





More information about the dm-devel mailing list