[dm-devel] [PATCH 3/3] multipathd: avoid io_err_stat ABBA deadlock
Martin Wilck
martin.wilck at suse.com
Wed Jan 13 11:45:48 UTC 2021
On Tue, 2021-01-12 at 17:52 -0600, Benjamin Marzinski wrote:
> When the checker thread enqueues paths for the io_err_stat thread to
> check, it calls enqueue_io_err_stat_by_path() with the vecs lock
> held.
> start_io_err_stat_thread() is also called with the vecs lock held.
> These two functions both lock io_err_pathvec_lock. When the
> io_err_stat
> thread updates the paths in vecs->pathvec in poll_io_err_stat(), it
> has
> the io_err_pathvec_lock held, and then locks the vecs lock. This can
> cause an ABBA deadlock.
>
> To solve this, service_paths() no longer updates the paths in
> vecs->pathvec with the io_err_pathvec_lock held. It does this by
> moving
> the io_err_stat_path from io_err_pathvec to a local vector when it
> needs
> to update the path. After releasing the io_err_pathvec_lock, it goes
> through this temporary vector, updates the paths with the vecs lock
> held, and then frees everything.
>
> This change fixes a bug in service_paths() where elements were being
> deleted from io_err_pathvec, without the index being decremented,
> causing the loop to skip elements. Also, service_paths() could be
> cancelled while holding the io_err_pathvec_lock, so it should have a
> cleanup handler.
>
> Signed-off-by: Benjamin Marzinski <bmarzins at redhat.com>
Looks good. Only two nits below.
> ---
> libmultipath/io_err_stat.c | 55 +++++++++++++++++++++---------------
> --
> 1 file changed, 31 insertions(+), 24 deletions(-)
>
> diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c
> index 4c6f7f08..a222594e 100644
> --- a/libmultipath/io_err_stat.c
> +++ b/libmultipath/io_err_stat.c
> @@ -385,20 +385,6 @@ recover:
> return 0;
> }
>
> -static int delete_io_err_stat_by_addr(struct io_err_stat_path *p)
> -{
> - int i;
> -
> - i = find_slot(io_err_pathvec, p);
> - if (i != -1)
> - vector_del_slot(io_err_pathvec, i);
> -
> - destroy_directio_ctx(p);
> - free_io_err_stat_path(p);
> -
> - return 0;
> -}
> -
> static void account_async_io_state(struct io_err_stat_path *pp, int
> rc)
> {
> switch (rc) {
> @@ -415,17 +401,26 @@ static void account_async_io_state(struct
> io_err_stat_path *pp, int rc)
> }
> }
>
> -static int poll_io_err_stat(struct vectors *vecs, struct
> io_err_stat_path *pp)
> +static int io_err_stat_time_up(struct io_err_stat_path *pp)
> {
> struct timespec currtime, difftime;
> - struct path *path;
> - double err_rate;
>
> if (clock_gettime(CLOCK_MONOTONIC, &currtime) != 0)
> - return 1;
> + return 0;
This can't fail. Please change it to get_monotonic_time().
> timespecsub(&currtime, &pp->start_time, &difftime);
> if (difftime.tv_sec < pp->total_time)
> return 0;
> + return 1;
> +}
> +
> +static void end_io_err_stat(struct io_err_stat_path *pp)
> +{
> + struct timespec currtime;
> + struct path *path;
> + double err_rate;
> +
> + if (clock_gettime(CLOCK_MONOTONIC, &currtime) != 0)
> + currtime = pp->start_time;
See above.
>
> io_err_stat_log(4, "%s: check end", pp->devname);
>
> @@ -464,10 +459,6 @@ static int poll_io_err_stat(struct vectors
> *vecs, struct io_err_stat_path *pp)
> pp->devname);
> }
> lock_cleanup_pop(vecs->lock);
> -
> - delete_io_err_stat_by_addr(pp);
> -
> - return 0;
> }
>
> static int send_each_async_io(struct dio_ctx *ct, int fd, char *dev)
> @@ -639,17 +630,33 @@ static void process_async_ios_event(int
> timeout_nsecs, char *dev)
>
> static void service_paths(void)
> {
> + struct _vector _pathvec = {0};
> + /* avoid gcc warnings that &_pathvec will never be NULL in
> vector ops */
> + vector tmp_pathvec = &_pathvec;
> struct io_err_stat_path *pp;
> int i;
>
> pthread_mutex_lock(&io_err_pathvec_lock);
> + pthread_cleanup_push(cleanup_unlock, &io_err_pathvec_lock);
> vector_foreach_slot(io_err_pathvec, pp, i) {
> send_batch_async_ios(pp);
> process_async_ios_event(TIMEOUT_NO_IO_NSEC, pp-
> >devname);
> poll_async_io_timeout();
> - poll_io_err_stat(vecs, pp);
> + if (io_err_stat_time_up(pp)) {
> + if (!vector_alloc_slot(tmp_pathvec))
> + continue;
> + vector_del_slot(io_err_pathvec, i--);
> + vector_set_slot(tmp_pathvec, pp);
> + }
> }
> - pthread_mutex_unlock(&io_err_pathvec_lock);
> + pthread_cleanup_pop(1);
> + vector_foreach_slot_backwards(tmp_pathvec, pp, i) {
> + end_io_err_stat(pp);
> + vector_del_slot(tmp_pathvec, i);
> + destroy_directio_ctx(pp);
> + free_io_err_stat_path(pp);
> + }
> + vector_reset(tmp_pathvec);
> }
>
> static void cleanup_exited(__attribute__((unused)) void *arg)
--
Dr. Martin Wilck <mwilck at suse.com>, Tel. +49 (0)911 74053 2107
SUSE Software Solutions Germany GmbH
HRB 36809, AG Nürnberg GF: Felix Imendörffer
More information about the dm-devel
mailing list