[libvirt] [PATCH v2] Make LXC container startup/shutdown/I/O more robust

Daniel P. Berrange berrange at redhat.com
Thu Feb 24 13:42:18 UTC 2011


The current LXC I/O controller looks for HUP to detect
when a guest has quit. This isn't reliable as during
initial bootup it is possible that 'init' will close
the console and let mingetty re-open it. The shutdown
of containers was also flakey because it only killed
the libvirt I/O controller and expected container
processes to gracefully follow.

Change the I/O controller such that when it see HUP
or an I/O error, it uses kill($PID, 0) to see if the
process has really quit.

Change the container shutdown sequence to use the
virCgroupKillPainfully function to ensure every
really goes away

This change makes the use of the 'cpu', 'cpuacct'
and 'memory' cgroups controllers compulsory with
LXC

* docs/drvlxc.html.in: Document that certain cgroups
  controllers are now mandatory
* src/lxc/lxc_controller.c: Check if PID is still
  alive before quitting on I/O error/HUP
* src/lxc/lxc_driver.c: Use virCgroupKillPainfully
---
 docs/drvlxc.html.in      |   18 +++++
 src/lxc/lxc_controller.c |   42 +++++++++---
 src/lxc/lxc_driver.c     |  155 ++++++++++++++++++----------------------------
 3 files changed, 110 insertions(+), 105 deletions(-)

diff --git a/docs/drvlxc.html.in b/docs/drvlxc.html.in
index 35058c4..3e715b1 100644
--- a/docs/drvlxc.html.in
+++ b/docs/drvlxc.html.in
@@ -9,6 +9,24 @@ light-weight "application container" which does not have it's own root image.  Y
 start it using
 </p>
 
+<h2>Cgroups Requirements</h2>
+
+<p>
+The libvirt LXC driver requires that certain cgroups controllers are
+mounted on the host OS. The minimum required controllers are 'cpuacct',
+'memory' and 'devices', while recommended extra controllers are
+'cpu', 'freezer' and 'blkio'. The /etc/cgconfig.conf & cgconfig
+init service used to mount cgroups at host boot time. To manually
+mount them use. NB, the blkio controller in some kernels will not
+allow creation of nested sub-directories which will prevent correct
+operation of the libvirt LXC driver. On such kernels the blkio controller
+must not be mounted.
+</p>
+
+<pre>
+ # mount -t cgroup cgroup /dev/cgroup -o cpuacct,memory,devices,cpu,freezer,blkio
+</pre>
+
 <h3>Example config version 1</h3>
 <p></p>
 <pre>
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index d2b113c..61e21c3 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -32,6 +32,7 @@
 #include <sys/personality.h>
 #include <unistd.h>
 #include <paths.h>
+#include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
 #include <getopt.h>
@@ -120,12 +121,10 @@ static int lxcSetContainerResources(virDomainDefPtr def)
         virReportSystemError(-rc,
                              _("Unable to set memory limit for domain %s"),
                              def->name);
-        /* Don't fail if we can't set memory due to lack of kernel support */
-        if (rc != -ENOENT)
-            goto cleanup;
+        goto cleanup;
     }
 
-    if(def->mem.hard_limit) {
+    if (def->mem.hard_limit) {
         rc = virCgroupSetMemoryHardLimit(cgroup, def->mem.hard_limit);
         if (rc != 0) {
             virReportSystemError(-rc,
@@ -135,7 +134,7 @@ static int lxcSetContainerResources(virDomainDefPtr def)
         }
     }
 
-    if(def->mem.soft_limit) {
+    if (def->mem.soft_limit) {
         rc = virCgroupSetMemorySoftLimit(cgroup, def->mem.soft_limit);
         if (rc != 0) {
             virReportSystemError(-rc,
@@ -145,7 +144,7 @@ static int lxcSetContainerResources(virDomainDefPtr def)
         }
     }
 
-    if(def->mem.swap_hard_limit) {
+    if (def->mem.swap_hard_limit) {
         rc = virCgroupSetSwapHardLimit(cgroup, def->mem.swap_hard_limit);
         if (rc != 0) {
             virReportSystemError(-rc,
@@ -324,6 +323,18 @@ ignorable_epoll_accept_errno(int errnum)
           || errnum == EWOULDBLOCK);
 }
 
+static bool
+lxcPidGone(pid_t container)
+{
+    waitpid(container, NULL, WNOHANG);
+
+    if (kill(container, 0) < 0 &&
+        errno == ESRCH)
+        return true;
+
+    return false;
+}
+
 /**
  * lxcControllerMain
  * @monitor: server socket fd to accept client requests
@@ -341,7 +352,8 @@ ignorable_epoll_accept_errno(int errnum)
 static int lxcControllerMain(int monitor,
                              int client,
                              int appPty,
-                             int contPty)
+                             int contPty,
+                             pid_t container)
 {
     int rc = -1;
     int epollFd;
@@ -447,7 +459,13 @@ static int lxcControllerMain(int monitor,
                         ++numActive;
                     }
                 } else if (epollEvent.events & EPOLLHUP) {
-                    VIR_DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd);
+                    if (lxcPidGone(container))
+                        goto cleanup;
+                    curFdOff = epollEvent.data.fd == appPty ? 0 : 1;
+                    if (fdArray[curFdOff].active) {
+                        fdArray[curFdOff].active = 0;
+                        --numActive;
+                    }
                     continue;
                 } else {
                     lxcError(VIR_ERR_INTERNAL_ERROR,
@@ -486,7 +504,9 @@ static int lxcControllerMain(int monitor,
                 --numActive;
                 fdArray[curFdOff].active = 0;
             } else if (-1 == rc) {
-                goto cleanup;
+                if (lxcPidGone(container))
+                    goto cleanup;
+                continue;
             }
 
         }
@@ -584,7 +604,7 @@ lxcControllerRun(virDomainDefPtr def,
     int rc = -1;
     int control[2] = { -1, -1};
     int containerPty = -1;
-    char *containerPtyPath;
+    char *containerPtyPath = NULL;
     pid_t container = -1;
     virDomainFSDefPtr root;
     char *devpts = NULL;
@@ -706,7 +726,7 @@ lxcControllerRun(virDomainDefPtr def,
     if (lxcControllerClearCapabilities() < 0)
         goto cleanup;
 
-    rc = lxcControllerMain(monitor, client, appPty, containerPty);
+    rc = lxcControllerMain(monitor, client, appPty, containerPty, container);
 
 cleanup:
     VIR_FREE(devptmx);
diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index 5b6f784..7aaa5cd 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -952,36 +952,16 @@ cleanup:
  * @driver: pointer to driver structure
  * @vm: pointer to VM to clean up
  *
- * waitpid() on the container process.  kill and wait the tty process
- * This is called by both lxcDomainDestroy and lxcSigHandler when a
- * container exits.
- *
- * Returns 0 on success or -1 in case of error
+ * Cleanout resources associated with the now dead VM
+ * 
  */
-static int lxcVmCleanup(lxc_driver_t *driver,
+static void lxcVmCleanup(lxc_driver_t *driver,
                         virDomainObjPtr  vm)
 {
-    int rc = 0;
-    int waitRc;
-    int childStatus = -1;
     virCgroupPtr cgroup;
     int i;
     lxcDomainObjPrivatePtr priv = vm->privateData;
 
-    while (((waitRc = waitpid(vm->pid, &childStatus, 0)) == -1) &&
-           errno == EINTR)
-        ; /* empty */
-
-    if ((waitRc != vm->pid) && (errno != ECHILD)) {
-        virReportSystemError(errno,
-                             _("waitpid failed to wait for container %d: %d"),
-                             vm->pid, waitRc);
-        rc = -1;
-    } else if (WIFEXITED(childStatus)) {
-        VIR_DEBUG("container exited with rc: %d", WEXITSTATUS(childStatus));
-        rc = -1;
-    }
-
     /* now that we know it's stopped call the hook if present */
     if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
         char *xml = virDomainDefFormat(vm->def, 0);
@@ -1021,8 +1001,6 @@ static int lxcVmCleanup(lxc_driver_t *driver,
         vm->def->id = -1;
         vm->newDef = NULL;
     }
-
-    return rc;
 }
 
 /**
@@ -1181,11 +1159,10 @@ error:
 
 
 static int lxcVmTerminate(lxc_driver_t *driver,
-                          virDomainObjPtr vm,
-                          int signum)
+                          virDomainObjPtr vm)
 {
-    if (signum == 0)
-        signum = SIGINT;
+    virCgroupPtr group = NULL;
+    int rc;
 
     if (vm->pid <= 0) {
         lxcError(VIR_ERR_INTERNAL_ERROR,
@@ -1193,18 +1170,29 @@ static int lxcVmTerminate(lxc_driver_t *driver,
         return -1;
     }
 
-    if (kill(vm->pid, signum) < 0) {
-        if (errno != ESRCH) {
-            virReportSystemError(errno,
-                                 _("Failed to kill pid %d"),
-                                 vm->pid);
-            return -1;
-        }
+    if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0)
+        return -1;
+
+    rc = virCgroupKillPainfully(group);
+    if (rc < 0) {
+        virReportSystemError(-rc, "%s",
+                             _("Failed to kill container PIDs"));
+        rc = -1;
+        goto cleanup;
     }
+    if (rc == 1) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("Some container PIDs refused to die"));
+        rc = -1;
+        goto cleanup;
+    }
+    lxcVmCleanup(driver, vm);
 
-    vm->state = VIR_DOMAIN_SHUTDOWN;
+    rc = 0;
 
-    return lxcVmCleanup(driver, vm);
+cleanup:
+    virCgroupFree(&group);
+    return rc;
 }
 
 static void lxcMonitorEvent(int watch,
@@ -1228,7 +1216,7 @@ static void lxcMonitorEvent(int watch,
         goto cleanup;
     }
 
-    if (lxcVmTerminate(driver, vm, SIGINT) < 0) {
+    if (lxcVmTerminate(driver, vm) < 0) {
         virEventRemoveHandle(watch);
     } else {
         event = virDomainEventNewFromObj(vm,
@@ -1473,6 +1461,31 @@ static int lxcVmStart(virConnectPtr conn,
     char **veths = NULL;
     lxcDomainObjPrivatePtr priv = vm->privateData;
 
+    if (!lxc_driver->cgroup) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("The 'cpuacct', 'devices' & 'memory' cgroups controllers must be mounted"));
+        return -1;
+    }
+
+    if (!virCgroupMounted(lxc_driver->cgroup,
+                          VIR_CGROUP_CONTROLLER_CPUACCT)) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("Unable to find 'cpuacct' cgroups controller mount"));
+        return -1;
+    }
+    if (!virCgroupMounted(lxc_driver->cgroup,
+                          VIR_CGROUP_CONTROLLER_DEVICES)) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("Unable to find 'devices' cgroups controller mount"));
+        return -1;
+    }
+    if (!virCgroupMounted(lxc_driver->cgroup,
+                          VIR_CGROUP_CONTROLLER_MEMORY)) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("Unable to find 'memory' cgroups controller mount"));
+        return -1;
+    }
+
     if ((r = virFileMakePath(driver->logDir)) != 0) {
         virReportSystemError(r,
                              _("Cannot create log directory '%s'"),
@@ -1543,7 +1556,7 @@ static int lxcVmStart(virConnectPtr conn,
              VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP,
              lxcMonitorEvent,
              vm, NULL)) < 0) {
-        lxcVmTerminate(driver, vm, 0);
+        lxcVmTerminate(driver, vm);
         goto cleanup;
     }
 
@@ -1711,55 +1724,6 @@ cleanup:
     return dom;
 }
 
-/**
- * lxcDomainShutdown:
- * @dom: pointer to domain to shutdown
- *
- * Sends SIGINT to container root process to request it to shutdown
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcDomainShutdown(virDomainPtr dom)
-{
-    lxc_driver_t *driver = dom->conn->privateData;
-    virDomainObjPtr vm;
-    virDomainEventPtr event = NULL;
-    int ret = -1;
-
-    lxcDriverLock(driver);
-    vm = virDomainFindByUUID(&driver->domains, dom->uuid);
-    if (!vm) {
-        char uuidstr[VIR_UUID_STRING_BUFLEN];
-        virUUIDFormat(dom->uuid, uuidstr);
-        lxcError(VIR_ERR_NO_DOMAIN,
-                 _("No domain with matching uuid '%s'"), uuidstr);
-        goto cleanup;
-    }
-
-    if (!virDomainObjIsActive(vm)) {
-        lxcError(VIR_ERR_OPERATION_INVALID,
-                 "%s", _("Domain is not running"));
-        goto cleanup;
-    }
-
-    ret = lxcVmTerminate(driver, vm, 0);
-    event = virDomainEventNewFromObj(vm,
-                                     VIR_DOMAIN_EVENT_STOPPED,
-                                     VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN);
-    if (!vm->persistent) {
-        virDomainRemoveInactive(&driver->domains, vm);
-        vm = NULL;
-    }
-
-cleanup:
-    if (vm)
-        virDomainObjUnlock(vm);
-    if (event)
-        lxcDomainEventQueue(driver, event);
-    lxcDriverUnlock(driver);
-    return ret;
-}
-
 
 static int
 lxcDomainEventRegister(virConnectPtr conn,
@@ -1927,7 +1891,7 @@ static int lxcDomainDestroy(virDomainPtr dom)
         goto cleanup;
     }
 
-    ret = lxcVmTerminate(driver, vm, SIGKILL);
+    ret = lxcVmTerminate(driver, vm);
     event = virDomainEventNewFromObj(vm,
                                      VIR_DOMAIN_EVENT_STOPPED,
                                      VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
@@ -2056,7 +2020,7 @@ lxcReconnectVM(void *payload, const void *name ATTRIBUTE_UNUSED, void *opaque)
                  VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP,
                  lxcMonitorEvent,
                  vm, NULL)) < 0) {
-            lxcVmTerminate(driver, vm, 0);
+            lxcVmTerminate(driver, vm);
             goto cleanup;
         }
     } else {
@@ -2123,8 +2087,11 @@ static int lxcStartup(int privileged)
     rc = virCgroupForDriver("lxc", &lxc_driver->cgroup, privileged, 1);
     if (rc < 0) {
         char buf[1024];
-        VIR_WARN("Unable to create cgroup for driver: %s",
-                 virStrerror(-rc, buf, sizeof(buf)));
+        VIR_DEBUG("Unable to create cgroup for LXC driver: %s",
+                  virStrerror(-rc, buf, sizeof(buf)));
+        /* Don't abort startup. We will explicitly report to
+         * the user when they try to start a VM
+         */
     }
 
     /* Call function to load lxc driver configuration information */
@@ -2844,7 +2811,7 @@ static virDriver lxcDriver = {
     lxcDomainLookupByName, /* domainLookupByName */
     lxcDomainSuspend, /* domainSuspend */
     lxcDomainResume, /* domainResume */
-    lxcDomainShutdown, /* domainShutdown */
+    NULL, /* domainShutdown */
     NULL, /* domainReboot */
     lxcDomainDestroy, /* domainDestroy */
     lxcGetOSType, /* domainGetOSType */
-- 
1.7.4




More information about the libvir-list mailing list