[dm-devel] [RFC PATCH 5/5] multipathd: RFC add new polling dmevents waiter thread
Martin Wilck
mwilck at suse.com
Sat Feb 10 19:55:53 UTC 2018
Hi Ben,
thanks a lot for this. I have only a few minor nitpicks (see below).
I suppose you've tested this already?
Regards
Martin
On Fri, 2018-02-09 at 23:07 -0600, Benjamin Marzinski wrote:
> The current method of waiting for dmevents on multipath devices
> involves
> creating a seperate thread for each device. This can become very
> wasteful when there are large numbers of multipath devices. Also,
> since
> multipathd needs to grab the vecs lock to update the devices, the
> additional threads don't actually provide much parallelism.
>
> The patch adds a new method of updating multipath devices on
> dmevents,
> which uses the new device-mapper event polling interface. This means
> that there is only one dmevent waiting thread which will wait for
> events
> on all of the multipath devices. Currently the code to get the event
> number from the list of device names and to re-arm the polling
> interface
> is not in libdevmapper, so the patch does that work. Obviously, these
> bits need to go into libdevmapper, so that multipathd can use a
> standard
> interface.
>
> I haven't touched any of the existing event waiting code, since event
> polling was only added to device-mapper in version
> 4.37.0. multipathd
> checks this version, and defaults to using the polling code if
> device-mapper supports it. This can be overridden by running
> multipathd
> with "-w", to force it to use the old event waiting code.
Why use a command line option here rather than a config file option?
>
> Signed-off-by: Benjamin Marzinski <bmarzins at redhat.com>
> ---
> multipathd/Makefile | 3 +-
> multipathd/dmevents.c | 396
> ++++++++++++++++++++++++++++++++++++++++++++++++++
> multipathd/dmevents.h | 13 ++
> multipathd/main.c | 58 +++++++-
> 4 files changed, 461 insertions(+), 9 deletions(-)
> create mode 100644 multipathd/dmevents.c
> create mode 100644 multipathd/dmevents.h
>
> diff --git a/multipathd/Makefile b/multipathd/Makefile
> index 85f29a7..4c438f0 100644
> --- a/multipathd/Makefile
> +++ b/multipathd/Makefile
> @@ -22,7 +22,8 @@ ifdef SYSTEMD
> endif
> endif
>
> -OBJS = main.o pidfile.o uxlsnr.o uxclnt.o cli.o cli_handlers.o
> waiter.o
> +OBJS = main.o pidfile.o uxlsnr.o uxclnt.o cli.o cli_handlers.o
> waiter.o \
> + dmevents.o
>
> EXEC = multipathd
>
> diff --git a/multipathd/dmevents.c b/multipathd/dmevents.c
> new file mode 100644
> index 0000000..a56c055
> --- /dev/null
> +++ b/multipathd/dmevents.c
> @@ -0,0 +1,396 @@
> +/*
> + * Copyright (c) 2004, 2005 Christophe Varoqui
> + * Copyright (c) 2005 Kiyoshi Ueda, NEC
> + * Copyright (c) 2005 Edward Goggin, EMC
> + * Copyright (c) 2005, 2018 Benjamin Marzinski, Redhat
> + */
> +#include <unistd.h>
> +#include <libdevmapper.h>
> +#include <sys/mman.h>
> +#include <pthread.h>
> +#include <urcu.h>
> +#include <poll.h>
> +#include <sys/ioctl.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <linux/dm-ioctl.h>
> +#include <errno.h>
> +
> +#include "vector.h"
> +#include "structs.h"
> +#include "structs_vec.h"
> +#include "devmapper.h"
> +#include "debug.h"
> +#include "dmevents.h"
> +
> +#ifndef DM_DEV_ARM_POLL
> +#define DM_DEV_ARM_POLL _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD + 1,
> struct dm_ioctl)
> +#endif
> +
> +enum event_actions {
> + EVENT_NOTHING,
> + EVENT_REMOVE,
> + EVENT_UPDATE,
> +};
> +
> +struct dev_event {
> + char name[WWID_SIZE];
> + uint32_t evt_nr;
> + enum event_actions action;
> +};
> +
> +struct dmevent_waiter {
> + int fd;
> + struct vectors *vecs;
> + vector events;
> + pthread_mutex_t events_lock;
> +};
> +
> +static struct dmevent_waiter *waiter;
> +
> +int dmevent_poll_supported(void)
> +{
> + unsigned int minv[3] = {4, 37, 0};
> + unsigned int v[3];
> +
> + if (dm_drv_version(v))
> + return 0;
> +
> + if (VERSION_GE(v, minv))
> + return 1;
> + return 0;
> +}
> +
> +
> +int alloc_dmevent_waiter(struct vectors *vecs)
> +{
> + if (!vecs) {
> + condlog(0, "can't create waiter structure. invalid
> vectors");
> + goto fail;
> + }
> + waiter = (struct dmevent_waiter *)malloc(sizeof(struct
> dmevent_waiter));
> + if (!waiter) {
> + condlog(0, "failed to allocate waiter structure");
> + goto fail;
> + }
> + memset(waiter, 0, sizeof(struct dmevent_waiter));
> + waiter->events = vector_alloc();
> + if (!waiter->events) {
> + condlog(0, "failed to allocate waiter events
> vector");
> + goto fail_waiter;
> + }
> + waiter->fd = open("/dev/mapper/control", O_RDWR);
> + if (waiter->fd < 0) {
> + condlog(0, "failed to open /dev/mapper/control for
> waiter");
> + goto fail_events;
> + }
> + pthread_mutex_init(&waiter->events_lock, NULL);
> + waiter->vecs = vecs;
> +
> + return 0;
> +fail_events:
> + vector_free(waiter->events);
> +fail_waiter:
> + free(waiter);
> +fail:
> + waiter = NULL;
> + return -1;
> +}
Nitpick: conventionally, an "alloc"-type function would return the
pointer, and NULL on failure.
> +
> +void free_dmevent_waiter(void)
> +{
> + struct dev_event *dev_evt;
> + int i;
> +
> + if (!waiter)
> + return;
> + pthread_mutex_destroy(&waiter->events_lock);
> + close(waiter->fd);
> + vector_foreach_slot(waiter->events, dev_evt, i)
> + free(dev_evt);
> + vector_free(waiter->events);
> + free(waiter);
> + waiter = NULL;
> +}
Nitpick: Similarly, a "free" function typically takes the pointer to be
freed as argument.
> +
> +static int arm_dm_event_poll(int fd)
> +{
> + struct dm_ioctl dmi;
> + memset(&dmi, 0, sizeof(dmi));
> + dmi.version[0] = DM_VERSION_MAJOR;
> + dmi.version[1] = DM_VERSION_MINOR;
> + dmi.version[2] = DM_VERSION_PATCHLEVEL;
> + dmi.flags = 0x4;
What's the meaning of this flag? I couldn't find it in dm-ioctl.h
> + dmi.data_start = offsetof(struct dm_ioctl, data);
> + dmi.data_size = sizeof(dmi);
> + return ioctl(fd, DM_DEV_ARM_POLL, &dmi);
> +}
> +
> +/*
> + * As of version 4.37.0 device-mapper stores the event number in the
> + * dm_names structure after the name, when DM_DEVICE_LIST is called
> + */
> +static uint32_t dm_event_nr(struct dm_names *n)
> +{
> + return *(uint32_t *)(((uintptr_t)(strchr(n->name, 0) + 1) +
> 7) & ~7);
> +}
> +
> +static int dm_get_events(void)
> +{
> + struct dm_task *dmt;
> + struct dm_names *names;
> + struct dev_event *dev_evt;
> + int i;
> +
> + if (!(dmt = libmp_dm_task_create(DM_DEVICE_LIST)))
> + return -1;
> +
> + dm_task_no_open_count(dmt);
> +
> + if (!dm_task_run(dmt))
> + goto fail;
> +
> + if (!(names = dm_task_get_names(dmt)))
> + goto fail;
> +
> + pthread_mutex_lock(&waiter->events_lock);
> + vector_foreach_slot(waiter->events, dev_evt, i)
> + dev_evt->action = EVENT_REMOVE;
> + while (names->dev) {
> + uint32_t event_nr;
> +
> + if (!dm_is_mpath(names->name))
> + goto next;
> +
> + event_nr = dm_event_nr(names);
> + vector_foreach_slot(waiter->events, dev_evt, i) {
> + if (!strcmp(dev_evt->name, names->name)) {
> + if (event_nr != dev_evt->evt_nr) {
> + dev_evt->evt_nr = event_nr;
> + dev_evt->action =
> EVENT_UPDATE;
> + } else
> + dev_evt->action =
> EVENT_NOTHING;
> + break;
> + }
> + }
> +next:
> + if (!names->next)
> + break;
> + names = (void *)names + names->next;
> + }
> + pthread_mutex_unlock(&waiter->events_lock);
> + dm_task_destroy(dmt);
> + return 0;
> +
> +fail:
> + dm_task_destroy(dmt);
> + return -1;
> +}
> +
> +/* You must call update_multipath() after calling this function, to
> + * deal with any events that came in before the device was added */
> +int watch_dmevents(char *name)
> +{
> + int event_nr;
> + struct dev_event *dev_evt, *old_dev_evt;
> + int i;
> +
> + if (!dm_is_mpath(name)) {
> + condlog(0, "%s: not a multipath device. can't watch
> events",
> + name);
> + return -1;
> + }
> +
> + if ((event_nr = dm_geteventnr(name)) < 0)
> + return -1;
> +
> + dev_evt = (struct dev_event *)malloc(sizeof(struct
> dev_event));
> + if (!dev_evt) {
> + condlog(0, "%s: can't allocate event waiter
> structure", name);
> + return -1;
> + }
> +
> + strncpy(dev_evt->name, name, WWID_SIZE);
> + dev_evt->name[WWID_SIZE - 1] = 0;
Nitpick: It might be better to use strlcpy or snprintf here.
> + dev_evt->evt_nr = event_nr;
> + dev_evt->action = EVENT_NOTHING;
> +
> + pthread_mutex_lock(&waiter->events_lock);
> + vector_foreach_slot(waiter->events, old_dev_evt, i){
> + if (!strcmp(dev_evt->name, old_dev_evt->name)) {
> + /* caller will be updating this device */
> + old_dev_evt->evt_nr = event_nr;
> + old_dev_evt->action = EVENT_NOTHING;
> + pthread_mutex_unlock(&waiter->events_lock);
> + condlog(2, "%s: already waiting for events
> on device",
> + name);
> + free(dev_evt);
> + return 0;
> + }
> + }
> + if (!vector_alloc_slot(waiter->events)) {
> + pthread_mutex_unlock(&waiter->events_lock);
> + free(dev_evt);
> + return -1;
> + }
> + vector_set_slot(waiter->events, dev_evt);
> + pthread_mutex_unlock(&waiter->events_lock);
> + return 0;
> +}
> +
> +void unwatch_all_dmevents(void)
> +{
> + struct dev_event *dev_evt;
> + int i;
> +
> + pthread_mutex_lock(&waiter->events_lock);
> + vector_foreach_slot(waiter->events, dev_evt, i)
> + free(dev_evt);
> + vector_reset(waiter->events);
> + pthread_mutex_unlock(&waiter->events_lock);
> +}
> +
> +static void unwatch_dmevents(char *name)
> +{
> + struct dev_event *dev_evt;
> + int i;
> +
> + pthread_mutex_lock(&waiter->events_lock);
> + vector_foreach_slot(waiter->events, dev_evt, i) {
> + if (!strcmp(dev_evt->name, name)) {
> + vector_del_slot(waiter->events, i);
> + free(dev_evt);
> + break;
> + }
> + }
> + pthread_mutex_unlock(&waiter->events_lock);
> +}
> +
> +/*
> + * returns the reschedule delay
> + * negative means *stop*
> + */
> +
> +/* poll, arm, update, return */
> +static int dmevent_loop (void)
> +{
> + int r, i = 0;
> + struct pollfd pfd;
> + struct dev_event *dev_evt;
> +
> + pfd.fd = waiter->fd;
> + pfd.events = POLLIN;
> + r = poll(&pfd, 1, -1);
> + if (r <= 0) {
> + condlog(0, "failed polling for dm events: %s",
> strerror(errno));
> + /* sleep 1s and hope things get better */
> + return 1;
> + }
> +
> + if (arm_dm_event_poll(waiter->fd) != 0) {
> + condlog(0, "Cannot re-arm event polling: %s",
> strerror(errno));
> + /* sleep 1s and hope things get better */
> + return 1;
> + }
> +
> + if (dm_get_events() != 0) {
> + condlog(0, "failed getting dm events: %s",
> strerror(errno));
> + /* sleep 1s and hope things get better */
> + return 1;
> + }
> +
> + /*
> + * upon event ...
> + */
> +
> + while (1) {
> + int done = 1;
> + struct dev_event curr_dev;
> + struct multipath *mpp;
> +
> + pthread_mutex_lock(&waiter->events_lock);
> + vector_foreach_slot(waiter->events, dev_evt, i) {
> + if (dev_evt->action != EVENT_NOTHING) {
> + curr_dev = *dev_evt;
> + if (dev_evt->action == EVENT_REMOVE)
> {
> + vector_del_slot(waiter-
> >events, i);
> + free(dev_evt);
> + } else
> + dev_evt->action =
> EVENT_NOTHING;
> + done = 0;
> + break;
> + }
> + }
> + pthread_mutex_unlock(&waiter->events_lock);
> + if (done)
> + return 1;
> +
> + condlog(3, "%s: devmap event #%i", curr_dev.name,
> + curr_dev.evt_nr);
> +
> + /*
> + * event might be :
> + *
> + * 1) a table reload, which means our mpp structure
> is
> + * obsolete : refresh it through
> update_multipath()
> + * 2) a path failed by DM : mark as such through
> + * update_multipath()
> + * 3) map has gone away : stop the thread.
> + * 4) a path reinstate : nothing to do
> + * 5) a switch group : nothing to do
> + */
> + pthread_cleanup_push(cleanup_lock, &waiter->vecs-
> >lock);
> + lock(&waiter->vecs->lock);
> + pthread_testcancel();
> + r = 0;
> + if (curr_dev.action == EVENT_REMOVE) {
> + mpp = find_mp_by_alias(waiter->vecs->mpvec,
> + curr_dev.name);
> + if (mpp)
> + remove_map(mpp, waiter->vecs, 1);
> + } else
> + r = update_multipath(waiter->vecs,
> curr_dev.name, 1);
> + lock_cleanup_pop(&waiter->vecs->lock);
> +
> + if (r) {
> + condlog(2, "%s: stopped watching dmevents",
> + curr_dev.name);
> + unwatch_dmevents(curr_dev.name);
> + }
> + }
> + condlog(0, "dmevent waiter thread unexpectedly quit");
> + return -1; /* never reach there */
> +}
> +
> +static void rcu_unregister(void *param)
> +{
> + rcu_unregister_thread();
> +}
> +
> +void *wait_dmevents (void *unused)
> +{
> + int r;
> +
> +
> + if (!waiter) {
> + condlog(0, "dmevents waiter not intialized");
> + return NULL;
> + }
> +
> + pthread_cleanup_push(rcu_unregister, NULL);
> + rcu_register_thread();
> + mlockall(MCL_CURRENT | MCL_FUTURE);
> +
> + while (1) {
> + r = dmevent_loop();
> +
> + if (r < 0)
> + break;
> +
> + sleep(r);
> + }
> +
> + pthread_cleanup_pop(1);
> + return NULL;
> +}
> diff --git a/multipathd/dmevents.h b/multipathd/dmevents.h
> new file mode 100644
> index 0000000..569e855
> --- /dev/null
> +++ b/multipathd/dmevents.h
> @@ -0,0 +1,13 @@
> +#ifndef _DMEVENTS_H
> +#define _DMEVENTS_H
> +
> +#include "structs_vec.h"
> +
> +int dmevent_poll_supported(void);
> +int alloc_dmevent_waiter(struct vectors *vecs);
> +void free_dmevent_waiter(void);
> +int watch_dmevents(char *name);
> +void unwatch_all_dmevents(void);
> +void *wait_dmevents (void *unused);
> +
> +#endif /* _DMEVENTS_H */
> diff --git a/multipathd/main.c b/multipathd/main.c
> index 2963bde..6dabf2c 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -82,6 +82,7 @@ static int use_watchdog;
> #include "cli_handlers.h"
> #include "lock.h"
> #include "waiter.h"
> +#include "dmevents.h"
> #include "io_err_stat.h"
> #include "wwids.h"
> #include "../third-party/valgrind/drd.h"
> @@ -108,6 +109,7 @@ int uxsock_timeout;
> int verbosity;
> int bindings_read_only;
> int ignore_new_devs;
> +int poll_dmevents = 1;
> enum daemon_status running_state = DAEMON_INIT;
> pid_t daemon_pid;
> pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
> @@ -288,11 +290,23 @@ switch_pathgroup (struct multipath * mpp)
> mpp->alias, mpp->bestpg);
> }
>
> +static int
> +wait_for_events(struct multipath *mpp, struct vectors *vecs)
> +{
> + if (poll_dmevents)
> + return watch_dmevents(mpp->alias);
> + else
> + return start_waiter_thread(mpp, vecs);
> +}
> +
> static void
> remove_map_and_stop_waiter(struct multipath *mpp, struct vectors
> *vecs,
> int purge_vec)
> {
> - stop_waiter_thread(mpp, vecs);
> + /* devices are automatically removed by the dmevent polling
> code,
> + * so they don't need to be manually removed here */
> + if (!poll_dmevents)
> + stop_waiter_thread(mpp, vecs);
> remove_map(mpp, vecs, purge_vec);
> }
>
> @@ -305,8 +319,12 @@ remove_maps_and_stop_waiters(struct vectors
> *vecs)
> if (!vecs)
> return;
>
> - vector_foreach_slot(vecs->mpvec, mpp, i)
> - stop_waiter_thread(mpp, vecs);
> + if (!poll_dmevents) {
> + vector_foreach_slot(vecs->mpvec, mpp, i)
> + stop_waiter_thread(mpp, vecs);
> + }
> + else
> + unwatch_all_dmevents();
>
> remove_maps(vecs);
> }
> @@ -351,7 +369,7 @@ retry:
> dm_lib_release();
>
> fail:
> - if (new_map && (retries < 0 || start_waiter_thread(mpp,
> vecs))) {
> + if (new_map && (retries < 0 || wait_for_events(mpp, vecs)))
> {
> condlog(0, "%s: failed to create new map", mpp-
> >alias);
> remove_map(mpp, vecs, 1);
> return 1;
> @@ -870,7 +888,7 @@ retry:
>
> if ((mpp->action == ACT_CREATE ||
> (mpp->action == ACT_NOTHING && start_waiter && !mpp-
> >waiter)) &&
> - start_waiter_thread(mpp, vecs))
> + wait_for_events(mpp, vecs))
> goto fail_map;
>
> /*
> @@ -2173,7 +2191,7 @@ configure (struct vectors * vecs)
> * start dm event waiter threads for these new maps
> */
> vector_foreach_slot(vecs->mpvec, mpp, i) {
> - if (start_waiter_thread(mpp, vecs)) {
> + if (wait_for_events(mpp, vecs)) {
> remove_map(mpp, vecs, 1);
> i--;
> continue;
> @@ -2414,7 +2432,7 @@ set_oom_adj (void)
> static int
> child (void * param)
> {
> - pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr;
> + pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr,
> dmevent_thr;
> pthread_attr_t log_attr, misc_attr, uevent_attr;
> struct vectors * vecs;
> struct multipath * mpp;
> @@ -2476,6 +2494,8 @@ child (void * param)
> goto failed;
> }
>
> + if (poll_dmevents)
> + poll_dmevents = dmevent_poll_supported();
> setlogmask(LOG_UPTO(conf->verbosity + 3));
>
> envp = getenv("LimitNOFILE");
> @@ -2542,6 +2562,19 @@ child (void * param)
>
> init_path_check_interval(vecs);
>
> + if (poll_dmevents) {
> + if (alloc_dmevent_waiter(vecs)) {
> + condlog(0, "failed to allocate dmevents
> waiter info");
> + goto failed;
> + }
> + if ((rc = pthread_create(&dmevent_thr, &misc_attr,
> + wait_dmevents, NULL))) {
> + condlog(0, "failed to create dmevent waiter
> thread: %d",
> + rc);
> + goto failed;
> + }
> + }
> +
> /*
> * Start uevent listener early to catch events
> */
> @@ -2615,11 +2648,15 @@ child (void * param)
> pthread_cancel(uevent_thr);
> pthread_cancel(uxlsnr_thr);
> pthread_cancel(uevq_thr);
> + if (poll_dmevents)
> + pthread_cancel(dmevent_thr);
>
> pthread_join(check_thr, NULL);
> pthread_join(uevent_thr, NULL);
> pthread_join(uxlsnr_thr, NULL);
> pthread_join(uevq_thr, NULL);
> + if (poll_dmevents)
> + pthread_join(dmevent_thr, NULL);
>
> stop_io_err_stat_thread();
>
> @@ -2634,6 +2671,8 @@ child (void * param)
>
> cleanup_checkers();
> cleanup_prio();
> + if (poll_dmevents)
> + free_dmevent_waiter();
>
> dm_lib_release();
> dm_lib_exit();
> @@ -2765,7 +2804,7 @@ main (int argc, char *argv[])
> udev = udev_new();
> libmp_udev_set_sync_support(0);
>
> - while ((arg = getopt(argc, argv, ":dsv:k::Bn")) != EOF ) {
> + while ((arg = getopt(argc, argv, ":dsv:k::Bniw")) != EOF ) {
> switch(arg) {
> case 'd':
> foreground = 1;
> @@ -2799,6 +2838,9 @@ main (int argc, char *argv[])
> case 'n':
> ignore_new_devs = 1;
> break;
> + case 'w':
> + poll_dmevents = 0;
> + break;
> default:
> fprintf(stderr, "Invalid argument '-%c'\n",
> optopt);
--
Dr. Martin Wilck <mwilck at suse.com>, Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
More information about the dm-devel
mailing list