[libvirt] [PATCH] Make LXC container startup/shutdown/I/O more robust

Daniel P. Berrange berrange at redhat.com
Tue Feb 22 17:55:39 UTC 2011


The current LXC I/O controller looks for HUP to detect
when a guest has quit. This isn't reliable as during
initial bootup it is possible that 'init' will close
the console and let mingetty re-open it. The shutdown
of containers was also flakey because it only killed
the libvirt I/O controller and expected container
processes to gracefully follow.

Change the I/O controller such that when it see HUP
or an I/O error, it uses kill($PID, 0) to see if the
process has really quit.

Change the container shutdown sequence to use the
virCgroupKillPainfully function to ensure every
really goes away

* docs/drvlxc.html.in: Document that certain cgroups
  controllers are now mandatory
* src/lxc/lxc_controller.c: Check if PID is still
  alive before quitting on I/O error/HUP
* src/lxc/lxc_driver.c: Use virCgroupKillPainfully
---
 docs/drvlxc.html.in      |   18 ++++++
 src/lxc/lxc_controller.c |   42 ++++++++++----
 src/lxc/lxc_driver.c     |  142 ++++++++++++++++------------------------------
 3 files changed, 98 insertions(+), 104 deletions(-)

diff --git a/docs/drvlxc.html.in b/docs/drvlxc.html.in
index 35058c4..3e715b1 100644
--- a/docs/drvlxc.html.in
+++ b/docs/drvlxc.html.in
@@ -9,6 +9,24 @@ light-weight "application container" which does not have it's own root image.  Y
 start it using
 </p>
 
+<h2>Cgroups Requirements</h2>
+
+<p>
+The libvirt LXC driver requires that certain cgroups controllers are
+mounted on the host OS. The minimum required controllers are 'cpuacct',
+'memory' and 'devices', while recommended extra controllers are
+'cpu', 'freezer' and 'blkio'. The /etc/cgconfig.conf & cgconfig
+init service used to mount cgroups at host boot time. To manually
+mount them use. NB, the blkio controller in some kernels will not
+allow creation of nested sub-directories which will prevent correct
+operation of the libvirt LXC driver. On such kernels the blkio controller
+must not be mounted.
+</p>
+
+<pre>
+ # mount -t cgroup cgroup /dev/cgroup -o cpuacct,memory,devices,cpu,freezer,blkio
+</pre>
+
 <h3>Example config version 1</h3>
 <p></p>
 <pre>
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index b742a33..0cd7dc4 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -31,6 +31,7 @@
 #include <sys/un.h>
 #include <unistd.h>
 #include <paths.h>
+#include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
 #include <getopt.h>
@@ -119,12 +120,10 @@ static int lxcSetContainerResources(virDomainDefPtr def)
         virReportSystemError(-rc,
                              _("Unable to set memory limit for domain %s"),
                              def->name);
-        /* Don't fail if we can't set memory due to lack of kernel support */
-        if (rc != -ENOENT)
-            goto cleanup;
+        goto cleanup;
     }
 
-    if(def->mem.hard_limit) {
+    if (def->mem.hard_limit) {
         rc = virCgroupSetMemoryHardLimit(cgroup, def->mem.hard_limit);
         if (rc != 0) {
             virReportSystemError(-rc,
@@ -134,7 +133,7 @@ static int lxcSetContainerResources(virDomainDefPtr def)
         }
     }
 
-    if(def->mem.soft_limit) {
+    if (def->mem.soft_limit) {
         rc = virCgroupSetMemorySoftLimit(cgroup, def->mem.soft_limit);
         if (rc != 0) {
             virReportSystemError(-rc,
@@ -144,7 +143,7 @@ static int lxcSetContainerResources(virDomainDefPtr def)
         }
     }
 
-    if(def->mem.swap_hard_limit) {
+    if (def->mem.swap_hard_limit) {
         rc = virCgroupSetSwapHardLimit(cgroup, def->mem.swap_hard_limit);
         if (rc != 0) {
             virReportSystemError(-rc,
@@ -323,6 +322,18 @@ ignorable_epoll_accept_errno(int errnum)
           || errnum == EWOULDBLOCK);
 }
 
+static bool
+lxcPidGone(pid_t container)
+{
+    waitpid(container, NULL, WNOHANG);
+
+    if (kill(container, 0) < 0 &&
+        errno == ESRCH)
+        return true;
+
+    return false;
+}
+
 /**
  * lxcControllerMain
  * @monitor: server socket fd to accept client requests
@@ -340,7 +351,8 @@ ignorable_epoll_accept_errno(int errnum)
 static int lxcControllerMain(int monitor,
                              int client,
                              int appPty,
-                             int contPty)
+                             int contPty,
+                             pid_t container)
 {
     int rc = -1;
     int epollFd;
@@ -446,7 +458,13 @@ static int lxcControllerMain(int monitor,
                         ++numActive;
                     }
                 } else if (epollEvent.events & EPOLLHUP) {
-                    VIR_DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd);
+                    if (lxcPidGone(container))
+                        goto cleanup;
+                    curFdOff = epollEvent.data.fd == appPty ? 0 : 1;
+                    if (fdArray[curFdOff].active) {
+                        fdArray[curFdOff].active = 0;
+                        --numActive;
+                    }
                     continue;
                 } else {
                     lxcError(VIR_ERR_INTERNAL_ERROR,
@@ -485,7 +503,9 @@ static int lxcControllerMain(int monitor,
                 --numActive;
                 fdArray[curFdOff].active = 0;
             } else if (-1 == rc) {
-                goto cleanup;
+                if (lxcPidGone(container))
+                    goto cleanup;
+                continue;
             }
 
         }
@@ -564,7 +584,7 @@ lxcControllerRun(virDomainDefPtr def,
     int rc = -1;
     int control[2] = { -1, -1};
     int containerPty = -1;
-    char *containerPtyPath;
+    char *containerPtyPath = NULL;
     pid_t container = -1;
     virDomainFSDefPtr root;
     char *devpts = NULL;
@@ -683,7 +703,7 @@ lxcControllerRun(virDomainDefPtr def,
     if (lxcControllerClearCapabilities() < 0)
         goto cleanup;
 
-    rc = lxcControllerMain(monitor, client, appPty, containerPty);
+    rc = lxcControllerMain(monitor, client, appPty, containerPty, container);
 
 cleanup:
     VIR_FREE(devptmx);
diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index 625777a..e47c201 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -953,36 +953,16 @@ cleanup:
  * @driver: pointer to driver structure
  * @vm: pointer to VM to clean up
  *
- * waitpid() on the container process.  kill and wait the tty process
- * This is called by both lxcDomainDestroy and lxcSigHandler when a
- * container exits.
- *
- * Returns 0 on success or -1 in case of error
+ * Cleanout resources associated with the now dead VM
+ * 
  */
-static int lxcVmCleanup(lxc_driver_t *driver,
+static void lxcVmCleanup(lxc_driver_t *driver,
                         virDomainObjPtr  vm)
 {
-    int rc = 0;
-    int waitRc;
-    int childStatus = -1;
     virCgroupPtr cgroup;
     int i;
     lxcDomainObjPrivatePtr priv = vm->privateData;
 
-    while (((waitRc = waitpid(vm->pid, &childStatus, 0)) == -1) &&
-           errno == EINTR)
-        ; /* empty */
-
-    if ((waitRc != vm->pid) && (errno != ECHILD)) {
-        virReportSystemError(errno,
-                             _("waitpid failed to wait for container %d: %d"),
-                             vm->pid, waitRc);
-        rc = -1;
-    } else if (WIFEXITED(childStatus)) {
-        VIR_DEBUG("container exited with rc: %d", WEXITSTATUS(childStatus));
-        rc = -1;
-    }
-
     /* now that we know it's stopped call the hook if present */
     if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
         char *xml = virDomainDefFormat(vm->def, 0);
@@ -1022,8 +1002,6 @@ static int lxcVmCleanup(lxc_driver_t *driver,
         vm->def->id = -1;
         vm->newDef = NULL;
     }
-
-    return rc;
 }
 
 /**
@@ -1182,11 +1160,10 @@ error:
 
 
 static int lxcVmTerminate(lxc_driver_t *driver,
-                          virDomainObjPtr vm,
-                          int signum)
+                          virDomainObjPtr vm)
 {
-    if (signum == 0)
-        signum = SIGINT;
+    virCgroupPtr group = NULL;
+    int rc;
 
     if (vm->pid <= 0) {
         lxcError(VIR_ERR_INTERNAL_ERROR,
@@ -1194,18 +1171,30 @@ static int lxcVmTerminate(lxc_driver_t *driver,
         return -1;
     }
 
-    if (kill(vm->pid, signum) < 0) {
-        if (errno != ESRCH) {
-            virReportSystemError(errno,
-                                 _("Failed to kill pid %d"),
-                                 vm->pid);
-            return -1;
-        }
-    }
+    if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0)
+        return -1;
 
+    rc = virCgroupKillPainfully(group);
+    if (rc < 0) {
+        virReportSystemError(-rc, "%s",
+                             _("Failed to kill container PIDs"));
+        rc = -1;
+        goto cleanup;
+    }
+    if (rc == 1) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("Some container PIDs refused to die"));
+        rc = -1;
+        goto cleanup;
+    }
+    lxcVmCleanup(driver, vm);
     vm->state = VIR_DOMAIN_SHUTDOWN;
 
-    return lxcVmCleanup(driver, vm);
+    rc = 0;
+
+cleanup:
+    virCgroupFree(&group);
+    return rc;
 }
 
 static void lxcMonitorEvent(int watch,
@@ -1229,7 +1218,7 @@ static void lxcMonitorEvent(int watch,
         goto cleanup;
     }
 
-    if (lxcVmTerminate(driver, vm, SIGINT) < 0) {
+    if (lxcVmTerminate(driver, vm) < 0) {
         virEventRemoveHandle(watch);
     } else {
         event = virDomainEventNewFromObj(vm,
@@ -1544,7 +1533,7 @@ static int lxcVmStart(virConnectPtr conn,
              VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP,
              lxcMonitorEvent,
              vm, NULL)) < 0) {
-        lxcVmTerminate(driver, vm, 0);
+        lxcVmTerminate(driver, vm);
         goto cleanup;
     }
 
@@ -1712,55 +1701,6 @@ cleanup:
     return dom;
 }
 
-/**
- * lxcDomainShutdown:
- * @dom: pointer to domain to shutdown
- *
- * Sends SIGINT to container root process to request it to shutdown
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcDomainShutdown(virDomainPtr dom)
-{
-    lxc_driver_t *driver = dom->conn->privateData;
-    virDomainObjPtr vm;
-    virDomainEventPtr event = NULL;
-    int ret = -1;
-
-    lxcDriverLock(driver);
-    vm = virDomainFindByUUID(&driver->domains, dom->uuid);
-    if (!vm) {
-        char uuidstr[VIR_UUID_STRING_BUFLEN];
-        virUUIDFormat(dom->uuid, uuidstr);
-        lxcError(VIR_ERR_NO_DOMAIN,
-                 _("No domain with matching uuid '%s'"), uuidstr);
-        goto cleanup;
-    }
-
-    if (!virDomainObjIsActive(vm)) {
-        lxcError(VIR_ERR_OPERATION_INVALID,
-                 "%s", _("Domain is not running"));
-        goto cleanup;
-    }
-
-    ret = lxcVmTerminate(driver, vm, 0);
-    event = virDomainEventNewFromObj(vm,
-                                     VIR_DOMAIN_EVENT_STOPPED,
-                                     VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN);
-    if (!vm->persistent) {
-        virDomainRemoveInactive(&driver->domains, vm);
-        vm = NULL;
-    }
-
-cleanup:
-    if (vm)
-        virDomainObjUnlock(vm);
-    if (event)
-        lxcDomainEventQueue(driver, event);
-    lxcDriverUnlock(driver);
-    return ret;
-}
-
 
 static int
 lxcDomainEventRegister(virConnectPtr conn,
@@ -1928,7 +1868,7 @@ static int lxcDomainDestroy(virDomainPtr dom)
         goto cleanup;
     }
 
-    ret = lxcVmTerminate(driver, vm, SIGKILL);
+    ret = lxcVmTerminate(driver, vm);
     event = virDomainEventNewFromObj(vm,
                                      VIR_DOMAIN_EVENT_STOPPED,
                                      VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
@@ -2057,7 +1997,7 @@ lxcReconnectVM(void *payload, const void *name ATTRIBUTE_UNUSED, void *opaque)
                  VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP,
                  lxcMonitorEvent,
                  vm, NULL)) < 0) {
-            lxcVmTerminate(driver, vm, 0);
+            lxcVmTerminate(driver, vm);
             goto cleanup;
         }
     } else {
@@ -2124,8 +2064,24 @@ static int lxcStartup(int privileged)
     rc = virCgroupForDriver("lxc", &lxc_driver->cgroup, privileged, 1);
     if (rc < 0) {
         char buf[1024];
-        VIR_WARN("Unable to create cgroup for driver: %s",
+        VIR_WARN("Unable to create cgroup, disabling LXC driver: %s",
                  virStrerror(-rc, buf, sizeof(buf)));
+        goto cleanup;
+    }
+    if (!virCgroupMounted(lxc_driver->cgroup,
+                          VIR_CGROUP_CONTROLLER_CPUACCT)) {
+        VIR_WARN0("Unable to find 'cpuacct' cgroups controller mount");
+        goto cleanup;
+    }
+    if (!virCgroupMounted(lxc_driver->cgroup,
+                          VIR_CGROUP_CONTROLLER_DEVICES)) {
+        VIR_WARN0("Unable to find 'devices' cgroups controller mount");
+        goto cleanup;
+    }
+    if (!virCgroupMounted(lxc_driver->cgroup,
+                          VIR_CGROUP_CONTROLLER_MEMORY)) {
+        VIR_WARN0("Unable to find 'memory' cgroups controller mount");
+        goto cleanup;
     }
 
     /* Call function to load lxc driver configuration information */
@@ -2845,7 +2801,7 @@ static virDriver lxcDriver = {
     lxcDomainLookupByName, /* domainLookupByName */
     lxcDomainSuspend, /* domainSuspend */
     lxcDomainResume, /* domainResume */
-    lxcDomainShutdown, /* domainShutdown */
+    NULL, /* domainShutdown */
     NULL, /* domainReboot */
     lxcDomainDestroy, /* domainDestroy */
     lxcGetOSType, /* domainGetOSType */
-- 
1.7.4




More information about the libvir-list mailing list