[dm-devel] [PATCH] multipath: clean up code for stopping the waiter threads
Christophe Varoqui
christophe.varoqui at gmail.com
Sun May 20 10:34:08 UTC 2012
On sam., 2012-05-19 at 01:37 -0500, Benjamin Marzinski wrote:
> The way multipathd currently stops the waiter threads needs some work.
> Right now they are stopped by being sent the SIGUSR1 signal. However their
> cleanup code assumes that they are being cancelled, just like all the other
> threads are. There's no reason for them to be so unnecessarily
> complicated and different from the other threads
>
> This patch does a couple of things. First, it removes the mutex from
> the event_thread. This wasn't doing anything. It was designed to protect
> the wp->mapname variable, which the waiter threads were checking to see
> if they should quit. However, the mutex was only ever being used by the
> thread itself, and it clearly didn't need to serialize with itself. Also,
> the function to clear the mapname, signal_waiter(), was set with
> pthread_cleanup_push(), which never got called early, since the threads
> weren't being cancelled. Thus, the mapname never got cleared
> until the pthreads were about to shut down.
>
> The patch also rips out all the signal stopping code, and just uses
> pthread_cancel. There already are cancellation points in the waiter
> thread code. Between the cancellation points, both explicit and implicit,
> and the fact that the waiter threads will never be killed except when the
> killer is holding the vecs lock, there shouldn't be any place where the
> waiter thread can access freed data.
>
> To make sure the waiter thread cleans itself up properly, the dmt
> has been moved into the event_thread structure, and is destroyed in
> free_waiter() if necessary.
>
Applied.
Thanks.
> Signed-off-by: Benjamin Marzinski <bmarzins at redhat.com>
> ---
> libmultipath/waiter.c | 74 +++++++++++---------------------------------------
> libmultipath/waiter.h | 4 +-
> 2 files changed, 19 insertions(+), 59 deletions(-)
>
> Index: multipath-tools-120518/libmultipath/waiter.c
> ===================================================================
> --- multipath-tools-120518.orig/libmultipath/waiter.c
> +++ multipath-tools-120518/libmultipath/waiter.c
> @@ -29,23 +29,17 @@ struct event_thread *alloc_waiter (void)
>
> wp = (struct event_thread *)MALLOC(sizeof(struct event_thread));
> memset(wp, 0, sizeof(struct event_thread));
> - pthread_mutex_init(&wp->lock, NULL);
>
> return wp;
> }
>
> -void signal_waiter (void *data)
> +void free_waiter (void *data)
> {
> struct event_thread *wp = (struct event_thread *)data;
>
> - pthread_mutex_lock(&wp->lock);
> - memset(wp->mapname, 0, WWID_SIZE);
> - pthread_mutex_unlock(&wp->lock);
> -}
> + if (wp->dmt)
> + dm_task_destroy(wp->dmt);
>
> -void free_waiter (struct event_thread *wp)
> -{
> - pthread_mutex_destroy(&wp->lock);
> FREE(wp);
> }
>
> @@ -58,83 +52,56 @@ void stop_waiter_thread (struct multipat
> }
> condlog(2, "%s: stop event checker thread (%lu)", mpp->alias,
> mpp->waiter);
> - pthread_kill(mpp->waiter, SIGUSR1);
> + pthread_cancel(mpp->waiter);
> mpp->waiter = (pthread_t)0;
> }
>
> -static sigset_t unblock_signals(void)
> -{
> - sigset_t set, old;
> -
> - sigemptyset(&set);
> - sigaddset(&set, SIGHUP);
> - sigaddset(&set, SIGUSR1);
> - pthread_sigmask(SIG_UNBLOCK, &set, &old);
> - return old;
> -}
> -
> /*
> * returns the reschedule delay
> * negative means *stop*
> */
> int waiteventloop (struct event_thread *waiter)
> {
> - sigset_t set;
> - struct dm_task *dmt = NULL;
> int event_nr;
> int r;
>
> - pthread_mutex_lock(&waiter->lock);
> if (!waiter->event_nr)
> waiter->event_nr = dm_geteventnr(waiter->mapname);
>
> - if (!(dmt = dm_task_create(DM_DEVICE_WAITEVENT))) {
> + if (!(waiter->dmt = dm_task_create(DM_DEVICE_WAITEVENT))) {
> condlog(0, "%s: devmap event #%i dm_task_create error",
> waiter->mapname, waiter->event_nr);
> - pthread_mutex_unlock(&waiter->lock);
> return 1;
> }
>
> - if (!dm_task_set_name(dmt, waiter->mapname)) {
> + if (!dm_task_set_name(waiter->dmt, waiter->mapname)) {
> condlog(0, "%s: devmap event #%i dm_task_set_name error",
> waiter->mapname, waiter->event_nr);
> - dm_task_destroy(dmt);
> - pthread_mutex_unlock(&waiter->lock);
> + dm_task_destroy(waiter->dmt);
> + waiter->dmt = NULL;
> return 1;
> }
>
> - if (waiter->event_nr && !dm_task_set_event_nr(dmt,
> + if (waiter->event_nr && !dm_task_set_event_nr(waiter->dmt,
> waiter->event_nr)) {
> condlog(0, "%s: devmap event #%i dm_task_set_event_nr error",
> waiter->mapname, waiter->event_nr);
> - dm_task_destroy(dmt);
> - pthread_mutex_unlock(&waiter->lock);
> + dm_task_destroy(waiter->dmt);
> + waiter->dmt = NULL;
> return 1;
> }
> - pthread_mutex_unlock(&waiter->lock);
>
> - dm_task_no_open_count(dmt);
> -
> - /* accept wait interruption */
> - set = unblock_signals();
> + dm_task_no_open_count(waiter->dmt);
>
> /* wait */
> - r = dm_task_run(dmt);
> -
> - /* wait is over : event or interrupt */
> - pthread_sigmask(SIG_SETMASK, &set, NULL);
> + r = dm_task_run(waiter->dmt);
>
> - dm_task_destroy(dmt);
> + dm_task_destroy(waiter->dmt);
> + waiter->dmt = NULL;
>
> if (!r) /* wait interrupted by signal */
> return -1;
>
> - pthread_mutex_lock(&waiter->lock);
> - if (!strlen(waiter->mapname)) {
> - /* waiter should exit */
> - pthread_mutex_unlock(&waiter->lock);
> - return -1;
> - }
> waiter->event_nr++;
>
> /*
> @@ -164,20 +131,16 @@ int waiteventloop (struct event_thread *
> if (r) {
> condlog(2, "%s: event checker exit",
> waiter->mapname);
> - pthread_mutex_unlock(&waiter->lock);
> return -1; /* stop the thread */
> }
>
> event_nr = dm_geteventnr(waiter->mapname);
>
> - if (waiter->event_nr == event_nr) {
> - pthread_mutex_unlock(&waiter->lock);
> + if (waiter->event_nr == event_nr)
> return 1; /* upon problem reschedule 1s later */
> - }
>
> waiter->event_nr = event_nr;
> }
> - pthread_mutex_unlock(&waiter->lock);
> return -1; /* never reach there */
> }
>
> @@ -189,7 +152,7 @@ void *waitevent (void *et)
> mlockall(MCL_CURRENT | MCL_FUTURE);
>
> waiter = (struct event_thread *)et;
> - pthread_cleanup_push(signal_waiter, et);
> + pthread_cleanup_push(free_waiter, et);
>
> block_signal(SIGUSR1, NULL);
> block_signal(SIGHUP, NULL);
> @@ -203,7 +166,6 @@ void *waitevent (void *et)
> }
>
> pthread_cleanup_pop(1);
> - free_waiter(waiter);
> return NULL;
> }
>
> @@ -219,10 +181,8 @@ int start_waiter_thread (struct multipat
> if (!wp)
> goto out;
>
> - pthread_mutex_lock(&wp->lock);
> strncpy(wp->mapname, mpp->alias, WWID_SIZE);
> wp->vecs = vecs;
> - pthread_mutex_unlock(&wp->lock);
>
> if (pthread_create(&wp->thread, &waiter_attr, waitevent, wp)) {
> condlog(0, "%s: cannot create event checker", wp->mapname);
> Index: multipath-tools-120518/libmultipath/waiter.h
> ===================================================================
> --- multipath-tools-120518.orig/libmultipath/waiter.h
> +++ multipath-tools-120518/libmultipath/waiter.h
> @@ -4,15 +4,15 @@
> extern pthread_attr_t waiter_attr;
>
> struct event_thread {
> + struct dm_task *dmt;
> pthread_t thread;
> - pthread_mutex_t lock;
> int event_nr;
> char mapname[WWID_SIZE];
> struct vectors *vecs;
> };
>
> struct event_thread * alloc_waiter (void);
> -void signal_waiter (void *data);
> +void free_waiter (void *data);
> void stop_waiter_thread (struct multipath *mpp, struct vectors *vecs);
> int start_waiter_thread (struct multipath *mpp, struct vectors *vecs);
> int waiteventloop (struct event_thread *waiter);
More information about the dm-devel
mailing list