[dm-devel] [PATCH 3/7] libmultipath: prevent DSO unloading with astray checker threads

mwilck at suse.com mwilck at suse.com
Thu Dec 17 11:00:14 UTC 2020


From: Martin Wilck <mwilck at suse.com>

The multipathd tur checker thread is designed to be able to finish at
any time, even after the tur checker itself has been freed. The
multipathd shutdown code makes sure all the checkers have been freed
before freeing the checker_class and calling dlclose() to unload the
DSO, but this doesn't guarantee that the checker threads have finished.
If one hasn't, the DSO will get unloaded while the thread still running
code from it, causing a segfault.

This patch fixes the issue by further incrementing the DSO's refcount
for every running thread. To avoid race conditions leading to segfaults,
the thread's entrypoint must be in libmultipath, not in the DSO itself.
Therefore we add a new optional checker method, libcheck_thread().
Checkers defining this method may create a detached thread with
entrypoint checker_thread_entry(), which will call the DSO's
libcheck_thread and take care of the refcount handling.

Reported-by: Benjamin Marzinski <bmarzins at redhat.com>
Signed-off-by: Martin Wilck <mwilck at suse.com>
---
 libmultipath/checkers.c           | 68 +++++++++++++++++++++++++++----
 libmultipath/checkers.h           | 25 ++++++++++++
 libmultipath/checkers/tur.c       | 12 +++---
 libmultipath/libmultipath.version |  5 +++
 4 files changed, 97 insertions(+), 13 deletions(-)

diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
index 18b1f5e..2dd9915 100644
--- a/libmultipath/checkers.c
+++ b/libmultipath/checkers.c
@@ -3,6 +3,8 @@
 #include <stddef.h>
 #include <dlfcn.h>
 #include <sys/stat.h>
+#include <urcu.h>
+#include <urcu/uatomic.h>
 
 #include "debug.h"
 #include "checkers.h"
@@ -20,6 +22,7 @@ struct checker_class {
 	int (*mp_init)(struct checker *);    /* to allocate the mpcontext */
 	void (*free)(struct checker *);      /* to free the context */
 	void (*reset)(void);		     /* to reset the global variables */
+	void *(*thread)(void *);	     /* async thread entry point */
 	const char **msgtable;
 	short msgtable_size;
 };
@@ -55,19 +58,32 @@ static struct checker_class *alloc_checker_class(void)
 	c = MALLOC(sizeof(struct checker_class));
 	if (c) {
 		INIT_LIST_HEAD(&c->node);
-		c->refcount = 1;
+		uatomic_set(&c->refcount, 1);
 	}
 	return c;
 }
 
+/* Use uatomic_{sub,add}_return() to ensure proper memory barriers */
+static int checker_class_ref(struct checker_class *cls)
+{
+	return uatomic_add_return(&cls->refcount, 1);
+}
+
+static int checker_class_unref(struct checker_class *cls)
+{
+	return uatomic_sub_return(&cls->refcount, 1);
+}
+
 void free_checker_class(struct checker_class *c)
 {
+	int cnt;
+
 	if (!c)
 		return;
-	c->refcount--;
-	if (c->refcount) {
-		condlog(4, "%s checker refcount %d",
-			c->name, c->refcount);
+	cnt = checker_class_unref(c);
+	if (cnt != 0) {
+		condlog(cnt < 0 ? 1 : 4, "%s checker refcount %d",
+			c->name, cnt);
 		return;
 	}
 	condlog(3, "unloading %s checker", c->name);
@@ -161,7 +177,8 @@ static struct checker_class *add_checker_class(const char *multipath_dir,
 
 	c->mp_init = (int (*)(struct checker *)) dlsym(c->handle, "libcheck_mp_init");
 	c->reset = (void (*)(void)) dlsym(c->handle, "libcheck_reset");
-	/* These 2 functions can be NULL. call dlerror() to clear out any
+	c->thread = (void *(*)(void*)) dlsym(c->handle, "libcheck_thread");
+	/* These 3 functions can be NULL. call dlerror() to clear out any
 	 * error string */
 	dlerror();
 
@@ -347,6 +364,43 @@ bad_id:
 	return generic_msg[CHECKER_MSGID_NONE];
 }
 
+static void checker_cleanup_thread(void *arg)
+{
+	struct checker_class *cls = arg;
+
+	(void)checker_class_unref(cls);
+	rcu_unregister_thread();
+}
+
+static void *checker_thread_entry(void *arg)
+{
+	struct checker_context *ctx = arg;
+	void *rv;
+
+	rcu_register_thread();
+	pthread_cleanup_push(checker_cleanup_thread, ctx->cls);
+	rv = ctx->cls->thread(ctx);
+	pthread_cleanup_pop(1);
+	return rv;
+}
+
+int start_checker_thread(pthread_t *thread, const pthread_attr_t *attr,
+			 struct checker_context *ctx)
+{
+	int rv;
+
+	assert(ctx && ctx->cls && ctx->cls->thread);
+	/* Take a ref here, lest the class be freed before the thread starts */
+	(void)checker_class_ref(ctx->cls);
+	rv = pthread_create(thread, attr, checker_thread_entry, ctx);
+	if (rv != 0) {
+		condlog(1, "failed to start checker thread for %s: %m",
+			ctx->cls->name);
+		checker_class_unref(ctx->cls);
+	}
+	return rv;
+}
+
 void checker_clear_message (struct checker *c)
 {
 	if (!c)
@@ -371,7 +425,7 @@ void checker_get(const char *multipath_dir, struct checker *dst,
 	if (!src)
 		return;
 
-	src->refcount++;
+	(void)checker_class_ref(dst->cls);
 }
 
 int init_checkers(const char *multipath_dir)
diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h
index 9d5f90b..2fd1d1c 100644
--- a/libmultipath/checkers.h
+++ b/libmultipath/checkers.h
@@ -1,6 +1,7 @@
 #ifndef _CHECKERS_H
 #define _CHECKERS_H
 
+#include <pthread.h>
 #include "list.h"
 #include "memory.h"
 #include "defaults.h"
@@ -148,6 +149,28 @@ void checker_set_async (struct checker *);
 void checker_set_fd (struct checker *, int);
 void checker_enable (struct checker *);
 void checker_disable (struct checker *);
+/*
+ * start_checker_thread(): start async path checker thread
+ *
+ * This function provides a wrapper around pthread_create().
+ * The created thread will call the DSO's "libcheck_thread" function with the
+ * checker context as argument.
+ *
+ * Rationale:
+ * Path checkers that do I/O may hang forever. To avoid blocking, some
+ * checkers therefore use asyncronous, detached threads for checking
+ * the paths. These threads may continue hanging if multipathd is stopped.
+ * In this case, we can't unload the checker DSO at exit. In order to
+ * avoid race conditions and crashes, the entry point of the thread
+ * needs to be in libmultipath, not in the DSO itself.
+ *
+ * @param arg: pointer to struct checker_context.
+ */
+struct checker_context {
+	struct checker_class *cls;
+};
+int start_checker_thread (pthread_t *thread, const pthread_attr_t *attr,
+			  struct checker_context *ctx);
 int checker_check (struct checker *, int);
 int checker_is_sync(const struct checker *);
 const char *checker_name (const struct checker *);
@@ -164,6 +187,8 @@ void checker_get(const char *, struct checker *, const char *);
 int libcheck_check(struct checker *);
 int libcheck_init(struct checker *);
 void libcheck_free(struct checker *);
+void *libcheck_thread(struct checker_context *ctx);
+
 /*
  * msgid => message map.
  *
diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
index e886fcf..a4b4a21 100644
--- a/libmultipath/checkers/tur.c
+++ b/libmultipath/checkers/tur.c
@@ -15,7 +15,6 @@
 #include <errno.h>
 #include <sys/time.h>
 #include <pthread.h>
-#include <urcu.h>
 #include <urcu/uatomic.h>
 
 #include "checkers.h"
@@ -55,6 +54,7 @@ struct tur_checker_context {
 	pthread_cond_t active;
 	int holders; /* uatomic access only */
 	int msgid;
+	struct checker_context ctx;
 };
 
 int libcheck_init (struct checker * c)
@@ -74,6 +74,7 @@ int libcheck_init (struct checker * c)
 	pthread_mutex_init(&ct->lock, NULL);
 	if (fstat(c->fd, &sb) == 0)
 		ct->devt = sb.st_rdev;
+	ct->ctx.cls = c->cls;
 	c->context = ct;
 
 	return 0;
@@ -204,7 +205,6 @@ static void cleanup_func(void *data)
 	holders = uatomic_sub_return(&ct->holders, 1);
 	if (!holders)
 		cleanup_context(ct);
-	rcu_unregister_thread();
 }
 
 /*
@@ -251,15 +251,15 @@ static void tur_deep_sleep(const struct tur_checker_context *ct)
 #define tur_deep_sleep(x) do {} while (0)
 #endif /* TUR_TEST_MAJOR */
 
-static void *tur_thread(void *ctx)
+void *libcheck_thread(struct checker_context *ctx)
 {
-	struct tur_checker_context *ct = ctx;
+	struct tur_checker_context *ct =
+		container_of(ctx, struct tur_checker_context, ctx);
 	int state, running;
 	short msgid;
 
 	/* This thread can be canceled, so setup clean up */
 	tur_thread_cleanup_push(ct);
-	rcu_register_thread();
 
 	condlog(4, "%d:%d : tur checker starting up", major(ct->devt),
 		minor(ct->devt));
@@ -394,7 +394,7 @@ int libcheck_check(struct checker * c)
 		uatomic_set(&ct->running, 1);
 		tur_set_async_timeout(c);
 		setup_thread_attr(&attr, 32 * 1024, 1);
-		r = pthread_create(&ct->thread, &attr, tur_thread, ct);
+		r = start_checker_thread(&ct->thread, &attr, &ct->ctx);
 		pthread_attr_destroy(&attr);
 		if (r) {
 			uatomic_sub(&ct->holders, 1);
diff --git a/libmultipath/libmultipath.version b/libmultipath/libmultipath.version
index 2e3583f..751099d 100644
--- a/libmultipath/libmultipath.version
+++ b/libmultipath/libmultipath.version
@@ -270,3 +270,8 @@ global:
 	dm_prereq;
 	skip_libmp_dm_init;
 } LIBMULTIPATH_4.1.0;
+
+LIBMULTIPATH_4.3.0 {
+global:
+	start_checker_thread;
+} LIBMULTIPATH_4.2.0;
-- 
2.29.0





More information about the dm-devel mailing list