[libvirt] [PATCH 10/10] Add a plugin for the 'sanlock' project

Daniel P. Berrange berrange at redhat.com
Fri May 27 09:42:59 UTC 2011


On Fri, May 27, 2011 at 05:37:51PM +0800, Daniel Veillard wrote:
> On Thu, May 19, 2011 at 07:24:25AM -0400, Daniel P. Berrange wrote:
> > Sanlock is a project that implements a disk-paxos locking
> > algorithm. This is suitable for cluster deployments with
> > shared storage.
> > 
> > * src/Makefile.am: Add dlopen plugin for sanlock
> > * src/locking/lock_driver_sanlock.c: Sanlock driver
> > ---
> >  libvirt.spec.in                   |   11 +
> >  po/POTFILES.in                    |    1 +
> >  src/Makefile.am                   |   12 +
> >  src/libvirt_private.syms          |    1 +
> >  src/locking/lock_driver_sanlock.c |  413 +++++++++++++++++++++++++++++++++++++
> >  5 files changed, 438 insertions(+), 0 deletions(-)
> >  create mode 100644 src/locking/lock_driver_sanlock.c
> > 
> > diff --git a/libvirt.spec.in b/libvirt.spec.in
> > index e85f68f..73213ea 100644
> > --- a/libvirt.spec.in
> > +++ b/libvirt.spec.in
> > @@ -77,6 +77,7 @@
> >  %define with_dtrace        0%{!?_without_dtrace:0}
> >  %define with_cgconfig      0%{!?_without_cgconfig:0}
> >  %define with_referential   0%{!?_without_referential:1}
> > +%define with_sanlock       0%{!?_without_sanlock:0}
> >  
> >  # Non-server/HV driver defaults which are always enabled
> >  %define with_python        0%{!?_without_python:1}
> > @@ -180,6 +181,7 @@
> >  
> >  %if 0%{?fedora} >= 13 || 0%{?rhel} >= 6
> >  %define with_dtrace 1
> > +%define with_sanlock 1
> >  %endif
> >  
> >  # Pull in cgroups config system
> > @@ -435,6 +437,9 @@ BuildRequires: systemtap-sdt-devel
> >  %if %{with_referential}
> >  BuildRequires: referential-devel
> >  %endif
> > +%if %{with_sanlock}
> > +BuildRequires: sanlock-devel
> > +%endif
> 
>   Hum ... weird
> 
>   [root at paphio ~]# yum install sanlock-devel
>   ..
>   No package sanlock-devel available.
>   Error: Nothing to do
>   [root at paphio ~]# cat /etc/fedora-release 
>   Fedora release 14 (Laughlin)
>   [root at paphio ~]# 
> 
> are you sure about the dep ?
> 
> >  %if %{with_storage_fs}
> >  # For mount/umount in FS driver
> > @@ -718,6 +723,8 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/*.la
> >  rm -f $RPM_BUILD_ROOT%{_libdir}/*.a
> >  rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.la
> >  rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.a
> > +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.la
> > +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.a
> >  
> >  %if %{with_network}
> >  install -d -m 0755 $RPM_BUILD_ROOT%{_datadir}/lib/libvirt/dnsmasq/
> > @@ -1004,6 +1011,10 @@ fi
> >  %attr(0755, root, root) %{_libexecdir}/libvirt_lxc
> >  %endif
> >  
> > +%if %{with_sanlock}
> > +%attr(0755, root, root) %{_libdir}/libvirt/lock-driver/sanlock.so
> > +%endif
> > +
> >  %attr(0755, root, root) %{_libexecdir}/libvirt_parthelper
> >  %attr(0755, root, root) %{_libexecdir}/libvirt_iohelper
> >  %attr(0755, root, root) %{_sbindir}/libvirtd
> > diff --git a/po/POTFILES.in b/po/POTFILES.in
> > index 9c3d287..c3b45f9 100644
> > --- a/po/POTFILES.in
> > +++ b/po/POTFILES.in
> > @@ -31,6 +31,7 @@ src/fdstream.c
> >  src/interface/netcf_driver.c
> >  src/internal.h
> >  src/libvirt.c
> > +src/locking/lock_driver_sanlock.c
> >  src/locking/lock_manager.c
> >  src/lxc/lxc_container.c
> >  src/lxc/lxc_conf.c
> > diff --git a/src/Makefile.am b/src/Makefile.am
> > index 1e5a72e..edf017d 100644
> > --- a/src/Makefile.am
> > +++ b/src/Makefile.am
> > @@ -99,6 +99,9 @@ DRIVER_SOURCES =						\
> >  		locking/lock_driver_nop.h locking/lock_driver_nop.c \
> >  		locking/domain_lock.h locking/domain_lock.c
> >  
> > +LOCK_DRIVER_SANLOCK_SOURCES = \
> > +		locking/lock_driver_sanlock.c
> > +
> >  
> >  # XML configuration format handling sources
> >  # Domain driver generic impl APIs
> > @@ -1159,6 +1162,15 @@ libvirt_qemu_la_CFLAGS = $(AM_CFLAGS)
> >  libvirt_qemu_la_LIBADD = libvirt.la $(CYGWIN_EXTRA_LIBADD)
> >  EXTRA_DIST += $(LIBVIRT_QEMU_SYMBOL_FILE)
> >  
> > +
> > +lockdriverdir = $(libdir)/libvirt/lock-driver
> > +lockdriver_LTLIBRARIES = sanlock.la
> > +
> > +sanlock_la_SOURCES = $(LOCK_DRIVER_SANLOCK_SOURCES)
> > +sanlock_la_CFLAGS = $(AM_CLFAGS)
> > +sanlock_la_LDFLAGS = -module -avoid-version
> > +sanlock_la_LIBADD = -lsanlock
> > +
> >  libexec_PROGRAMS =
> >  
> >  if WITH_LIBVIRTD
> > diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> > index a3fe2f1..e61ea13 100644
> > --- a/src/libvirt_private.syms
> > +++ b/src/libvirt_private.syms
> > @@ -650,6 +650,7 @@ virVMOperationTypeToString;
> >  # memory.h
> >  virAlloc;
> >  virAllocN;
> > +virAllocVar;
> >  virExpandN;
> >  virFree;
> >  virReallocN;
> > diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c
> > new file mode 100644
> > index 0000000..6a31fdf
> > --- /dev/null
> > +++ b/src/locking/lock_driver_sanlock.c
> > @@ -0,0 +1,413 @@
> > +/*
> > + * lock_driver_sanlock.c: A lock driver for Sanlock
> > + *
> > + * Copyright (C) 2010-2011 Red Hat, Inc.
> > + *
> > + * This library is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * This library is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with this library; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
> 
> 
>   * Author: Daniel P. Berrange <berrange at redhat.com>
> 
> > + */
> > +
> > +#include <config.h>
> > +
> > +#include <stdlib.h>
> > +#include <stdint.h>
> > +#include <unistd.h>
> > +#include <string.h>
> > +#include <stdio.h>
> > +#include <errno.h>
> > +#include <sys/types.h>
> > +
> > +#include <sanlock.h>
> > +#include <sanlock_resource.h>
> > +
> > +#include "lock_driver.h"
> > +#include "logging.h"
> > +#include "virterror_internal.h"
> > +#include "memory.h"
> > +#include "util.h"
> > +#include "files.h"
> > +
> > +#define VIR_FROM_THIS VIR_FROM_LOCKING
> > +
> > +#define virLockError(code, ...)                                     \
> > +    virReportErrorHelper(VIR_FROM_THIS, code, __FILE__,             \
> > +                         __FUNCTION__, __LINE__, __VA_ARGS__)
> > +
> > +typedef struct _virLockManagerSanlockPrivate virLockManagerSanlockPrivate;
> > +typedef virLockManagerSanlockPrivate *virLockManagerSanlockPrivatePtr;
> > +
> > +struct _virLockManagerSanlockPrivate {
> > +    char vm_name[SANLK_NAME_LEN];
> > +    char vm_uuid[VIR_UUID_BUFLEN];
> > +    unsigned int vm_id;
> > +    unsigned int vm_pid;
> > +    unsigned int flags;
> > +    bool hasRWDisks;
> > +    int res_count;
> > +    struct sanlk_resource *res_args[SANLK_MAX_RESOURCES];
> > +};
> > +
> > +/*
> > + * sanlock plugin for the libvirt virLockManager API
> > + */
> > +
> > +static int virLockManagerSanlockInit(unsigned int version ATTRIBUTE_UNUSED,
> > +                                     unsigned int flags)
> > +{
> > +    virCheckFlags(0, -1);
> > +    return 0;
> > +}
> > +
> > +static int virLockManagerSanlockDeinit(void)
> > +{
> > +    virLockError(VIR_ERR_INTERNAL_ERROR, "%s",
> > +                 _("Unloading sanlock plugin is forbidden"));
> > +    return -1;
> > +}
> > +
> > +static int virLockManagerSanlockNew(virLockManagerPtr lock,
> > +                                    unsigned int type,
> > +                                    size_t nparams,
> > +                                    virLockManagerParamPtr params,
> > +                                    unsigned int flags)
> > +{
> > +    virLockManagerParamPtr param;
> > +    virLockManagerSanlockPrivatePtr priv;
> > +    int i;
> > +
> > +    virCheckFlags(0, -1);
> > +
> > +    if (type != VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN) {
> > +        virLockError(VIR_ERR_INTERNAL_ERROR,
> > +                     _("Unsupported object type %d"), type);
> > +        return -1;
> > +    }
> > +
> > +    if (VIR_ALLOC(priv) < 0) {
> > +        virReportOOMError();
> > +        return -1;
> > +    }
> > +
> > +    priv->flags = flags;
> > +
> > +    for (i = 0; i < nparams; i++) {
> > +        param = &params[i];
> > +
> > +        if (STREQ(param->key, "uuid")) {
> > +            memcpy(priv->vm_uuid, param->value.uuid, 16);
> > +        } else if (STREQ(param->key, "name")) {
> > +            if (!virStrcpy(priv->vm_name, param->value.str, SANLK_NAME_LEN)) {
> > +                virLockError(VIR_ERR_INTERNAL_ERROR,
> > +                             _("Domain name '%s' exceeded %d characters"),
> > +                             param->value.str, SANLK_NAME_LEN);
> > +                goto error;
> > +            }
> > +        } else if (STREQ(param->key, "pid")) {
> > +            priv->vm_pid = param->value.ui;
> > +        } else if (STREQ(param->key, "id")) {
> > +            priv->vm_id = param->value.ui;
> > +        }
> > +    }
> > +
> > +    lock->privateData = priv;
> > +    return 0;
> > +
> > +error:
> > +    VIR_FREE(priv);
> > +    return -1;
> > +}
> > +
> > +static void virLockManagerSanlockFree(virLockManagerPtr lock)
> > +{
> > +    virLockManagerSanlockPrivatePtr priv = lock->privateData;
> > +    int i;
> > +
> > +    if (!priv)
> > +        return;
> > +
> > +    for (i = 0; i < priv->res_count; i++)
> > +        VIR_FREE(priv->res_args[i]);
> > +    VIR_FREE(priv);
> > +    lock->privateData = NULL;
> > +}
> > +
> > +static int virLockManagerSanlockAddResource(virLockManagerPtr lock,
> > +                                            unsigned int type,
> > +                                            const char *name,
> > +                                            size_t nparams,
> > +                                            virLockManagerParamPtr params,
> > +                                            unsigned int flags)
> > +{
> > +    virLockManagerSanlockPrivatePtr priv = lock->privateData;
> > +    struct sanlk_resource *res;
> > +    int i;
> > +
> > +    virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY |
> > +                  VIR_LOCK_MANAGER_RESOURCE_SHARED, -1);
> > +
> > +    if (priv->res_count == SANLK_MAX_RESOURCES) {
> > +        virLockError(VIR_ERR_INTERNAL_ERROR,
> > +                     _("Too many resources %d for object"),
> > +                     SANLK_MAX_RESOURCES);
> > +        return -1;
> > +    }
> > +
> > +    if (type == VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK) {
> > +        if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED |
> > +                       VIR_LOCK_MANAGER_RESOURCE_READONLY)))
> > +            priv->hasRWDisks = true;
> > +        return 0;
> > +    }
> > +
> > +    if (type != VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE)
> > +        return 0;
> > +
> > +    if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) {
> > +        virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> > +                     _("Readonly leases are not supported"));
> > +        return -1;
> > +    }
> > +    if (flags & VIR_LOCK_MANAGER_RESOURCE_SHARED) {
> > +        virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> > +                     _("Sharable leases are not supported"));
> > +        return -1;
> > +    }
> > +
> > +    if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0) {
> > +        virReportOOMError();
> > +        return -1;
> > +    }
> > +
> > +    res->num_disks = 1;
> > +    if (!virStrcpy(res->name, name, SANLK_NAME_LEN)) {
> > +        virLockError(VIR_ERR_INTERNAL_ERROR,
> > +                     _("Resource name '%s' exceeds %d characters"),
> > +                     name, SANLK_NAME_LEN);
> > +        goto error;
> > +    }
> > +
> > +    for (i = 0; i < nparams; i++) {
> > +        if (STREQ(params[i].key, "path")) {
> > +            if (!virStrcpy(res->disks[0].path, params[i].value.str, SANLK_PATH_LEN)) {
> > +                virLockError(VIR_ERR_INTERNAL_ERROR,
> > +                             _("Lease path '%s' exceeds %d characters"),
> > +                             params[i].value.str, SANLK_PATH_LEN);
> > +                goto error;
> > +            }
> > +        } else if (STREQ(params[i].key, "offset")) {
> > +            res->disks[0].offset = params[i].value.ul;
> > +        } else if (STREQ(params[i].key, "lockspace")) {
> > +            if (!virStrcpy(res->lockspace_name, params[i].value.str, SANLK_NAME_LEN)) {
> > +                virLockError(VIR_ERR_INTERNAL_ERROR,
> > +                             _("Resource lockspace '%s' exceeds %d characters"),
> > +                             params[i].value.str, SANLK_NAME_LEN);
> > +                goto error;
> > +            }
> > +        }
> > +    }
> > +
> > +    priv->res_args[priv->res_count] = res;
> > +    priv->res_count++;
> > +    return 0;
> > +
> > +error:
> > +    VIR_FREE(res);
> > +    return -1;
> > +}
> > +
> > +static int virLockManagerSanlockAcquire(virLockManagerPtr lock,
> > +                                        const char *state,
> > +                                        unsigned int flags)
> > +{
> > +    virLockManagerSanlockPrivatePtr priv = lock->privateData;
> > +    struct sanlk_options *opt;
> > +    struct sanlk_resource **res_args;
> > +    int res_count;
> > +    bool res_free = false;
> > +    int sock = -1;
> > +    int rv;
> > +    int i;
> > +
> > +    virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1);
> > +
> > +    if (priv->res_count == 0 &&
> > +        priv->hasRWDisks) {
> > +        virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> > +                     _("Read/write, exclusive access, disks were present, but no leases specified"));
> > +        return -1;
> > +    }
> > +
> > +    if (VIR_ALLOC(opt) < 0) {
> > +        virReportOOMError();
> > +        return -1;
> > +    }
> > +
> > +    if (!virStrcpy(opt->owner_name, priv->vm_name, SANLK_NAME_LEN)) {
> > +        virLockError(VIR_ERR_INTERNAL_ERROR,
> > +                     _("Domain name '%s' exceeded %d characters"),
> > +                     priv->vm_name, SANLK_NAME_LEN);
> > +        goto error;
> > +    }
> > +
> > +    if (state && STRNEQ(state, "") && 0) {
> > +        if ((rv = sanlock_state_to_args((char *)state,
> > +                                        &res_count,
> > +                                        &res_args)) < 0) {
> > +            virReportSystemError(-rv,
> > +                                 _("Unable to parse lock state %s"),
> > +                                 state);
> > +            goto error;
> > +        }
> > +        res_free = true;
> > +    } else {
> > +        res_args = priv->res_args;
> > +        res_count = priv->res_count;
> > +    }
> > +
> > +    VIR_DEBUG("Register sanlock %d", flags);
> > +    /* We only initialize 'sock' if we are in the real
> > +     * child process and we need it to be inherited
> > +     *
> > +     * If sock==-1, then sanlock auto-open/closes a
> > +     * temporary sock
> > +     */
> > +    if (priv->vm_pid == getpid() &&
> > +        (sock = sanlock_register()) < 0) {
> > +        virReportSystemError(-sock, "%s",
> > +                             _("Failed to open socket to sanlock daemon"));
> > +        goto error;
> > +    }
> > +
> > +    if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) {
> > +        VIR_DEBUG("Acquiring object %u", priv->res_count);
> > +        if ((rv = sanlock_acquire(sock, priv->vm_pid, 0,
> > +                                  priv->res_count, priv->res_args,
> > +                                  opt)) < 0) {
> 
>   Hum ...
> 
> > +#if 1
> > +            virReportSystemError(-rv, "%s",
> > +                                 _("Failed to acquire lock"));
> > +#else
> > +            virLockError(VIR_ERR_INTERNAL_ERROR, "%s",
> > +                         _("Failed to acquire lock"));
> > +#endif
> 
>   this probably is worth some kind of comment or cleaned up
> 
> > +            goto error;
> > +        }
> > +    }
> > +
> > +    VIR_FREE(opt);
> > +
> > +    /*
> > +     * We are *intentionally* "leaking" sock file descriptor
> > +     * because we want it to be inherited by QEMU. When the
> > +     * sock FD finally closes upon QEMU exit (or crash) then
> > +     * sanlock will notice EOF and release the lock
> > +     */
> > +    if (sock != -1 &&
> > +        virSetInherit(sock, true) < 0)
> > +        goto error;
> > +
> > +    VIR_DEBUG("Acquire completed fd=%d", sock);
> > +
> > +    if (res_free) {
> > +        for (i = 0 ; i < res_count ; i++) {
> > +            VIR_FREE(res_args[i]);
> > +        }
> > +        VIR_FREE(res_args);
> > +    }
> > +
> > +    return 0;
> > +
> > +error:
> > +    if (res_free) {
> > +        for (i = 0 ; i < res_count ; i++) {
> > +            VIR_FREE(res_args[i]);
> > +        }
> > +        VIR_FREE(res_args);
> > +    }
> > +    VIR_FREE(opt);
> > +    VIR_FORCE_CLOSE(sock);
> > +    return -1;
> > +}
> > +
> > +
> > +static int virLockManagerSanlockRelease(virLockManagerPtr lock,
> > +                                        char **state,
> > +                                        unsigned int flags)
> > +{
> > +    virLockManagerSanlockPrivatePtr priv = lock->privateData;
> > +    int res_count;
> > +    int rv;
> > +
> > +    virCheckFlags(0, -1);
> > +
> > +    if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) {
> > +        virReportSystemError(-rv, "%s",
> > +                             _("Failed to release lock"));
> > +        return -1;
> > +    }
> > +
> > +    if (STREQ(*state, ""))
> > +        VIR_FREE(*state);
> > +
> > +    if ((rv = sanlock_release(-1, priv->vm_pid, SANLK_REL_ALL, 0, NULL)) < 0) {
> > +        virReportSystemError(-rv, "%s",
> > +                             _("Failed to release lock"));
> > +        return -1;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int virLockManagerSanlockInquire(virLockManagerPtr lock,
> > +                                        char **state,
> > +                                        unsigned int flags)
> > +{
> > +    virLockManagerSanlockPrivatePtr priv = lock->privateData;
> > +    int rv, res_count;
> > +
> > +    virCheckFlags(0, -1);
> > +
> > +    VIR_DEBUG("pid=%d", priv->vm_pid);
> > +
> > +    if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) {
> > +        virReportSystemError(-rv, "%s",
> > +                             _("Failed to inquire lock"));
> > +        return -1;
> > +    }
> > +
> > +    if (STREQ(*state, ""))
> > +        VIR_FREE(*state);
> > +
> > +    return 0;
> > +}
> > +
> > +virLockDriver virLockDriverImpl =
> > +{
> > +    .version = VIR_LOCK_MANAGER_VERSION,
> > +
> > +    .flags = VIR_LOCK_MANAGER_USES_STATE,
> > +
> > +    .drvInit = virLockManagerSanlockInit,
> > +    .drvDeinit = virLockManagerSanlockDeinit,
> > +
> > +    .drvNew = virLockManagerSanlockNew,
> > +    .drvFree = virLockManagerSanlockFree,
> > +
> > +    .drvAddResource = virLockManagerSanlockAddResource,
> > +
> > +    .drvAcquire = virLockManagerSanlockAcquire,
> > +    .drvRelease = virLockManagerSanlockRelease,
> > +    .drvInquire = virLockManagerSanlockInquire,
> > +};
> 
>   I'm a bit puzzled by the new dependancy, and this might prevent me
>   from building rc1 of 0.9.2 if pushed as-is,
> 
>   but ACK in principle.

This is last weeks v4 posting. There is a v5 I posted this week which
addresses the things you mention here.

Regards,
Daniel
-- 
|: http://berrange.com      -o-    http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org              -o-             http://virt-manager.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org       -o-       http://live.gnome.org/gtk-vnc :|




More information about the libvir-list mailing list