[libvirt] [PATCH 10/10] Add a plugin for the 'sanlock' project

Daniel P. Berrange berrange at redhat.com
Thu May 19 11:24:25 UTC 2011


Sanlock is a project that implements a disk-paxos locking
algorithm. This is suitable for cluster deployments with
shared storage.

* src/Makefile.am: Add dlopen plugin for sanlock
* src/locking/lock_driver_sanlock.c: Sanlock driver
---
 libvirt.spec.in                   |   11 +
 po/POTFILES.in                    |    1 +
 src/Makefile.am                   |   12 +
 src/libvirt_private.syms          |    1 +
 src/locking/lock_driver_sanlock.c |  413 +++++++++++++++++++++++++++++++++++++
 5 files changed, 438 insertions(+), 0 deletions(-)
 create mode 100644 src/locking/lock_driver_sanlock.c

diff --git a/libvirt.spec.in b/libvirt.spec.in
index e85f68f..73213ea 100644
--- a/libvirt.spec.in
+++ b/libvirt.spec.in
@@ -77,6 +77,7 @@
 %define with_dtrace        0%{!?_without_dtrace:0}
 %define with_cgconfig      0%{!?_without_cgconfig:0}
 %define with_referential   0%{!?_without_referential:1}
+%define with_sanlock       0%{!?_without_sanlock:0}
 
 # Non-server/HV driver defaults which are always enabled
 %define with_python        0%{!?_without_python:1}
@@ -180,6 +181,7 @@
 
 %if 0%{?fedora} >= 13 || 0%{?rhel} >= 6
 %define with_dtrace 1
+%define with_sanlock 1
 %endif
 
 # Pull in cgroups config system
@@ -435,6 +437,9 @@ BuildRequires: systemtap-sdt-devel
 %if %{with_referential}
 BuildRequires: referential-devel
 %endif
+%if %{with_sanlock}
+BuildRequires: sanlock-devel
+%endif
 
 %if %{with_storage_fs}
 # For mount/umount in FS driver
@@ -718,6 +723,8 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/*.la
 rm -f $RPM_BUILD_ROOT%{_libdir}/*.a
 rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.la
 rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.a
+rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.la
+rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.a
 
 %if %{with_network}
 install -d -m 0755 $RPM_BUILD_ROOT%{_datadir}/lib/libvirt/dnsmasq/
@@ -1004,6 +1011,10 @@ fi
 %attr(0755, root, root) %{_libexecdir}/libvirt_lxc
 %endif
 
+%if %{with_sanlock}
+%attr(0755, root, root) %{_libdir}/libvirt/lock-driver/sanlock.so
+%endif
+
 %attr(0755, root, root) %{_libexecdir}/libvirt_parthelper
 %attr(0755, root, root) %{_libexecdir}/libvirt_iohelper
 %attr(0755, root, root) %{_sbindir}/libvirtd
diff --git a/po/POTFILES.in b/po/POTFILES.in
index 9c3d287..c3b45f9 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -31,6 +31,7 @@ src/fdstream.c
 src/interface/netcf_driver.c
 src/internal.h
 src/libvirt.c
+src/locking/lock_driver_sanlock.c
 src/locking/lock_manager.c
 src/lxc/lxc_container.c
 src/lxc/lxc_conf.c
diff --git a/src/Makefile.am b/src/Makefile.am
index 1e5a72e..edf017d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -99,6 +99,9 @@ DRIVER_SOURCES =						\
 		locking/lock_driver_nop.h locking/lock_driver_nop.c \
 		locking/domain_lock.h locking/domain_lock.c
 
+LOCK_DRIVER_SANLOCK_SOURCES = \
+		locking/lock_driver_sanlock.c
+
 
 # XML configuration format handling sources
 # Domain driver generic impl APIs
@@ -1159,6 +1162,15 @@ libvirt_qemu_la_CFLAGS = $(AM_CFLAGS)
 libvirt_qemu_la_LIBADD = libvirt.la $(CYGWIN_EXTRA_LIBADD)
 EXTRA_DIST += $(LIBVIRT_QEMU_SYMBOL_FILE)
 
+
+lockdriverdir = $(libdir)/libvirt/lock-driver
+lockdriver_LTLIBRARIES = sanlock.la
+
+sanlock_la_SOURCES = $(LOCK_DRIVER_SANLOCK_SOURCES)
+sanlock_la_CFLAGS = $(AM_CLFAGS)
+sanlock_la_LDFLAGS = -module -avoid-version
+sanlock_la_LIBADD = -lsanlock
+
 libexec_PROGRAMS =
 
 if WITH_LIBVIRTD
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index a3fe2f1..e61ea13 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -650,6 +650,7 @@ virVMOperationTypeToString;
 # memory.h
 virAlloc;
 virAllocN;
+virAllocVar;
 virExpandN;
 virFree;
 virReallocN;
diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c
new file mode 100644
index 0000000..6a31fdf
--- /dev/null
+++ b/src/locking/lock_driver_sanlock.c
@@ -0,0 +1,413 @@
+/*
+ * lock_driver_sanlock.c: A lock driver for Sanlock
+ *
+ * Copyright (C) 2010-2011 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ *
+ */
+
+#include <config.h>
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/types.h>
+
+#include <sanlock.h>
+#include <sanlock_resource.h>
+
+#include "lock_driver.h"
+#include "logging.h"
+#include "virterror_internal.h"
+#include "memory.h"
+#include "util.h"
+#include "files.h"
+
+#define VIR_FROM_THIS VIR_FROM_LOCKING
+
+#define virLockError(code, ...)                                     \
+    virReportErrorHelper(VIR_FROM_THIS, code, __FILE__,             \
+                         __FUNCTION__, __LINE__, __VA_ARGS__)
+
+typedef struct _virLockManagerSanlockPrivate virLockManagerSanlockPrivate;
+typedef virLockManagerSanlockPrivate *virLockManagerSanlockPrivatePtr;
+
+struct _virLockManagerSanlockPrivate {
+    char vm_name[SANLK_NAME_LEN];
+    char vm_uuid[VIR_UUID_BUFLEN];
+    unsigned int vm_id;
+    unsigned int vm_pid;
+    unsigned int flags;
+    bool hasRWDisks;
+    int res_count;
+    struct sanlk_resource *res_args[SANLK_MAX_RESOURCES];
+};
+
+/*
+ * sanlock plugin for the libvirt virLockManager API
+ */
+
+static int virLockManagerSanlockInit(unsigned int version ATTRIBUTE_UNUSED,
+                                     unsigned int flags)
+{
+    virCheckFlags(0, -1);
+    return 0;
+}
+
+static int virLockManagerSanlockDeinit(void)
+{
+    virLockError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("Unloading sanlock plugin is forbidden"));
+    return -1;
+}
+
+static int virLockManagerSanlockNew(virLockManagerPtr lock,
+                                    unsigned int type,
+                                    size_t nparams,
+                                    virLockManagerParamPtr params,
+                                    unsigned int flags)
+{
+    virLockManagerParamPtr param;
+    virLockManagerSanlockPrivatePtr priv;
+    int i;
+
+    virCheckFlags(0, -1);
+
+    if (type != VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN) {
+        virLockError(VIR_ERR_INTERNAL_ERROR,
+                     _("Unsupported object type %d"), type);
+        return -1;
+    }
+
+    if (VIR_ALLOC(priv) < 0) {
+        virReportOOMError();
+        return -1;
+    }
+
+    priv->flags = flags;
+
+    for (i = 0; i < nparams; i++) {
+        param = &params[i];
+
+        if (STREQ(param->key, "uuid")) {
+            memcpy(priv->vm_uuid, param->value.uuid, 16);
+        } else if (STREQ(param->key, "name")) {
+            if (!virStrcpy(priv->vm_name, param->value.str, SANLK_NAME_LEN)) {
+                virLockError(VIR_ERR_INTERNAL_ERROR,
+                             _("Domain name '%s' exceeded %d characters"),
+                             param->value.str, SANLK_NAME_LEN);
+                goto error;
+            }
+        } else if (STREQ(param->key, "pid")) {
+            priv->vm_pid = param->value.ui;
+        } else if (STREQ(param->key, "id")) {
+            priv->vm_id = param->value.ui;
+        }
+    }
+
+    lock->privateData = priv;
+    return 0;
+
+error:
+    VIR_FREE(priv);
+    return -1;
+}
+
+static void virLockManagerSanlockFree(virLockManagerPtr lock)
+{
+    virLockManagerSanlockPrivatePtr priv = lock->privateData;
+    int i;
+
+    if (!priv)
+        return;
+
+    for (i = 0; i < priv->res_count; i++)
+        VIR_FREE(priv->res_args[i]);
+    VIR_FREE(priv);
+    lock->privateData = NULL;
+}
+
+static int virLockManagerSanlockAddResource(virLockManagerPtr lock,
+                                            unsigned int type,
+                                            const char *name,
+                                            size_t nparams,
+                                            virLockManagerParamPtr params,
+                                            unsigned int flags)
+{
+    virLockManagerSanlockPrivatePtr priv = lock->privateData;
+    struct sanlk_resource *res;
+    int i;
+
+    virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY |
+                  VIR_LOCK_MANAGER_RESOURCE_SHARED, -1);
+
+    if (priv->res_count == SANLK_MAX_RESOURCES) {
+        virLockError(VIR_ERR_INTERNAL_ERROR,
+                     _("Too many resources %d for object"),
+                     SANLK_MAX_RESOURCES);
+        return -1;
+    }
+
+    if (type == VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK) {
+        if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED |
+                       VIR_LOCK_MANAGER_RESOURCE_READONLY)))
+            priv->hasRWDisks = true;
+        return 0;
+    }
+
+    if (type != VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE)
+        return 0;
+
+    if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) {
+        virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                     _("Readonly leases are not supported"));
+        return -1;
+    }
+    if (flags & VIR_LOCK_MANAGER_RESOURCE_SHARED) {
+        virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                     _("Sharable leases are not supported"));
+        return -1;
+    }
+
+    if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0) {
+        virReportOOMError();
+        return -1;
+    }
+
+    res->num_disks = 1;
+    if (!virStrcpy(res->name, name, SANLK_NAME_LEN)) {
+        virLockError(VIR_ERR_INTERNAL_ERROR,
+                     _("Resource name '%s' exceeds %d characters"),
+                     name, SANLK_NAME_LEN);
+        goto error;
+    }
+
+    for (i = 0; i < nparams; i++) {
+        if (STREQ(params[i].key, "path")) {
+            if (!virStrcpy(res->disks[0].path, params[i].value.str, SANLK_PATH_LEN)) {
+                virLockError(VIR_ERR_INTERNAL_ERROR,
+                             _("Lease path '%s' exceeds %d characters"),
+                             params[i].value.str, SANLK_PATH_LEN);
+                goto error;
+            }
+        } else if (STREQ(params[i].key, "offset")) {
+            res->disks[0].offset = params[i].value.ul;
+        } else if (STREQ(params[i].key, "lockspace")) {
+            if (!virStrcpy(res->lockspace_name, params[i].value.str, SANLK_NAME_LEN)) {
+                virLockError(VIR_ERR_INTERNAL_ERROR,
+                             _("Resource lockspace '%s' exceeds %d characters"),
+                             params[i].value.str, SANLK_NAME_LEN);
+                goto error;
+            }
+        }
+    }
+
+    priv->res_args[priv->res_count] = res;
+    priv->res_count++;
+    return 0;
+
+error:
+    VIR_FREE(res);
+    return -1;
+}
+
+static int virLockManagerSanlockAcquire(virLockManagerPtr lock,
+                                        const char *state,
+                                        unsigned int flags)
+{
+    virLockManagerSanlockPrivatePtr priv = lock->privateData;
+    struct sanlk_options *opt;
+    struct sanlk_resource **res_args;
+    int res_count;
+    bool res_free = false;
+    int sock = -1;
+    int rv;
+    int i;
+
+    virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1);
+
+    if (priv->res_count == 0 &&
+        priv->hasRWDisks) {
+        virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                     _("Read/write, exclusive access, disks were present, but no leases specified"));
+        return -1;
+    }
+
+    if (VIR_ALLOC(opt) < 0) {
+        virReportOOMError();
+        return -1;
+    }
+
+    if (!virStrcpy(opt->owner_name, priv->vm_name, SANLK_NAME_LEN)) {
+        virLockError(VIR_ERR_INTERNAL_ERROR,
+                     _("Domain name '%s' exceeded %d characters"),
+                     priv->vm_name, SANLK_NAME_LEN);
+        goto error;
+    }
+
+    if (state && STRNEQ(state, "") && 0) {
+        if ((rv = sanlock_state_to_args((char *)state,
+                                        &res_count,
+                                        &res_args)) < 0) {
+            virReportSystemError(-rv,
+                                 _("Unable to parse lock state %s"),
+                                 state);
+            goto error;
+        }
+        res_free = true;
+    } else {
+        res_args = priv->res_args;
+        res_count = priv->res_count;
+    }
+
+    VIR_DEBUG("Register sanlock %d", flags);
+    /* We only initialize 'sock' if we are in the real
+     * child process and we need it to be inherited
+     *
+     * If sock==-1, then sanlock auto-open/closes a
+     * temporary sock
+     */
+    if (priv->vm_pid == getpid() &&
+        (sock = sanlock_register()) < 0) {
+        virReportSystemError(-sock, "%s",
+                             _("Failed to open socket to sanlock daemon"));
+        goto error;
+    }
+
+    if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) {
+        VIR_DEBUG("Acquiring object %u", priv->res_count);
+        if ((rv = sanlock_acquire(sock, priv->vm_pid, 0,
+                                  priv->res_count, priv->res_args,
+                                  opt)) < 0) {
+#if 1
+            virReportSystemError(-rv, "%s",
+                                 _("Failed to acquire lock"));
+#else
+            virLockError(VIR_ERR_INTERNAL_ERROR, "%s",
+                         _("Failed to acquire lock"));
+#endif
+            goto error;
+        }
+    }
+
+    VIR_FREE(opt);
+
+    /*
+     * We are *intentionally* "leaking" sock file descriptor
+     * because we want it to be inherited by QEMU. When the
+     * sock FD finally closes upon QEMU exit (or crash) then
+     * sanlock will notice EOF and release the lock
+     */
+    if (sock != -1 &&
+        virSetInherit(sock, true) < 0)
+        goto error;
+
+    VIR_DEBUG("Acquire completed fd=%d", sock);
+
+    if (res_free) {
+        for (i = 0 ; i < res_count ; i++) {
+            VIR_FREE(res_args[i]);
+        }
+        VIR_FREE(res_args);
+    }
+
+    return 0;
+
+error:
+    if (res_free) {
+        for (i = 0 ; i < res_count ; i++) {
+            VIR_FREE(res_args[i]);
+        }
+        VIR_FREE(res_args);
+    }
+    VIR_FREE(opt);
+    VIR_FORCE_CLOSE(sock);
+    return -1;
+}
+
+
+static int virLockManagerSanlockRelease(virLockManagerPtr lock,
+                                        char **state,
+                                        unsigned int flags)
+{
+    virLockManagerSanlockPrivatePtr priv = lock->privateData;
+    int res_count;
+    int rv;
+
+    virCheckFlags(0, -1);
+
+    if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) {
+        virReportSystemError(-rv, "%s",
+                             _("Failed to release lock"));
+        return -1;
+    }
+
+    if (STREQ(*state, ""))
+        VIR_FREE(*state);
+
+    if ((rv = sanlock_release(-1, priv->vm_pid, SANLK_REL_ALL, 0, NULL)) < 0) {
+        virReportSystemError(-rv, "%s",
+                             _("Failed to release lock"));
+        return -1;
+    }
+
+    return 0;
+}
+
+static int virLockManagerSanlockInquire(virLockManagerPtr lock,
+                                        char **state,
+                                        unsigned int flags)
+{
+    virLockManagerSanlockPrivatePtr priv = lock->privateData;
+    int rv, res_count;
+
+    virCheckFlags(0, -1);
+
+    VIR_DEBUG("pid=%d", priv->vm_pid);
+
+    if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) {
+        virReportSystemError(-rv, "%s",
+                             _("Failed to inquire lock"));
+        return -1;
+    }
+
+    if (STREQ(*state, ""))
+        VIR_FREE(*state);
+
+    return 0;
+}
+
+virLockDriver virLockDriverImpl =
+{
+    .version = VIR_LOCK_MANAGER_VERSION,
+
+    .flags = VIR_LOCK_MANAGER_USES_STATE,
+
+    .drvInit = virLockManagerSanlockInit,
+    .drvDeinit = virLockManagerSanlockDeinit,
+
+    .drvNew = virLockManagerSanlockNew,
+    .drvFree = virLockManagerSanlockFree,
+
+    .drvAddResource = virLockManagerSanlockAddResource,
+
+    .drvAcquire = virLockManagerSanlockAcquire,
+    .drvRelease = virLockManagerSanlockRelease,
+    .drvInquire = virLockManagerSanlockInquire,
+};
-- 
1.7.4.4




More information about the libvir-list mailing list