[libvirt] [PATCH 17/18] Change default cgroup layout for QEMU/LXC and honour XML config

Daniel P. Berrange berrange at redhat.com
Thu Apr 4 13:40:36 UTC 2013


From: "Daniel P. Berrange" <berrange at redhat.com>

Historically QEMU/LXC guests have been placed in a cgroup layout
that is

   $LOCATION-OF-LIBVIRTD/libvirt/{qemu,lxc}/$VMNAME

This is bad for a number of reasons

 - The cgroup hierarchy gets very deep which seriously
   impacts kernel performance due to cgroups scalability
   limitations.

 - It is hard to setup cgroup policies which apply across
   services and virtual machines, since all VMs are underneath
   the libvirtd service.

To address this the default cgroup location is changed to
be

    /system/$VMNAME.{lxc,qemu}.libvirt

This puts virtual machines at the same level in the hierarchy
as system services, allowing consistent policy to be setup
across all of them.

This also honours the new resource partition location from the
XML configuration, for example

  <resource>
    <partition>/virtualmachines/production</partitions>
  </resource>

will result in the VM being placed at

    /virtualmachines/production/$VMNAME.{lxc,qemu}.libvirt

NB, with the exception of the default, /system, path which
is intended to always exist, libvirt will not attempt to
auto-create the partitions in the XML. It is the responsibility
of the admin/app to configure the partitions. Later libvirt
APIs will provide a way todo this.

Signed-off-by: Daniel P. Berrange <berrange at redhat.com>
---
 src/lxc/lxc_cgroup.c    |  91 +++++++++++++++++++++++++++++++-------
 src/lxc/lxc_cgroup.h    |   2 +-
 src/lxc/lxc_process.c   |   4 +-
 src/qemu/qemu_cgroup.c  | 114 +++++++++++++++++++++++++++++++++++++-----------
 src/qemu/qemu_cgroup.h  |   3 +-
 src/qemu/qemu_process.c |   2 +-
 6 files changed, 169 insertions(+), 47 deletions(-)

diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 72940bd..8f19057 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -523,29 +523,88 @@ cleanup:
 }
 
 
-virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def)
+virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, bool startup)
 {
-    virCgroupPtr driver = NULL;
-    virCgroupPtr cgroup = NULL;
     int rc;
+    virCgroupPtr parent = NULL;
+    virCgroupPtr cgroup = NULL;
 
-    rc = virCgroupNewDriver("lxc", true, false, -1, &driver);
-    if (rc != 0) {
-        virReportSystemError(-rc, "%s",
-                             _("Unable to get cgroup for driver"));
-        goto cleanup;
+    if (!def->resource && startup) {
+        virDomainResourceDefPtr res;
+
+        if (VIR_ALLOC(res) < 0) {
+            virReportOOMError();
+            goto cleanup;
+        }
+
+        if (!(res->partition = strdup("/system"))) {
+            virReportOOMError();
+            VIR_FREE(res);
+            goto cleanup;
+        }
+
+        def->resource = res;
     }
 
-    rc = virCgroupNewDomainDriver(driver, def->name, true, &cgroup);
-    if (rc != 0) {
-        virReportSystemError(-rc,
-                             _("Unable to create cgroup for domain %s"),
-                             def->name);
-        goto cleanup;
+    if (def->resource &&
+        def->resource->partition) {
+        if (def->resource->partition[0] != '/') {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                           _("Resource partition '%s' must start with '/'"),
+                           def->resource->partition);
+            goto cleanup;
+        }
+        /* We only auto-create the default partition. In other
+         * cases we expec the sysadmin/app to have done so */
+        rc = virCgroupNewPartition(def->resource->partition,
+                                   STREQ(def->resource->partition, "/system"),
+                                   -1,
+                                   &parent);
+        if (rc != 0) {
+            virReportSystemError(-rc,
+                                 _("Unable to initialize %s cgroup"),
+                                 def->resource->partition);
+            goto cleanup;
+        }
+
+        rc = virCgroupNewDomainPartition(parent,
+                                         "lxc",
+                                         def->name,
+                                         true,
+                                         &cgroup);
+        if (rc != 0) {
+            virReportSystemError(-rc,
+                                 _("Unable to create cgroup for %s"),
+                                 def->name);
+            goto cleanup;
+        }
+    } else {
+        rc = virCgroupNewDriver("lxc",
+                                true,
+                                true,
+                                -1,
+                                &parent);
+        if (rc != 0) {
+            virReportSystemError(-rc,
+                                 _("Unable to create cgroup for %s"),
+                                 def->name);
+            goto cleanup;
+        }
+
+        rc = virCgroupNewDomainDriver(parent,
+                                      def->name,
+                                      true,
+                                      &cgroup);
+        if (rc != 0) {
+            virReportSystemError(-rc,
+                                 _("Unable to create cgroup for %s"),
+                                 def->name);
+            goto cleanup;
+        }
     }
 
 cleanup:
-    virCgroupFree(&driver);
+    virCgroupFree(&parent);
     return cgroup;
 }
 
@@ -556,7 +615,7 @@ virCgroupPtr virLXCCgroupJoin(virDomainDefPtr def)
     int ret = -1;
     int rc;
 
-    if (!(cgroup = virLXCCgroupCreate(def)))
+    if (!(cgroup = virLXCCgroupCreate(def, true)))
         return NULL;
 
     rc = virCgroupAddTask(cgroup, getpid());
diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h
index 25a427c..f040de2 100644
--- a/src/lxc/lxc_cgroup.h
+++ b/src/lxc/lxc_cgroup.h
@@ -27,7 +27,7 @@
 # include "lxc_fuse.h"
 # include "virusb.h"
 
-virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def);
+virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, bool startup);
 virCgroupPtr virLXCCgroupJoin(virDomainDefPtr def);
 int virLXCCgroupSetup(virDomainDefPtr def,
                       virCgroupPtr cgroup,
diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c
index 193dd9a..9f42354 100644
--- a/src/lxc/lxc_process.c
+++ b/src/lxc/lxc_process.c
@@ -1049,7 +1049,7 @@ int virLXCProcessStart(virConnectPtr conn,
 
     virCgroupFree(&priv->cgroup);
 
-    if (!(priv->cgroup = virLXCCgroupCreate(vm->def)))
+    if (!(priv->cgroup = virLXCCgroupCreate(vm->def, true)))
         return -1;
 
     if (!virCgroupHasController(priv->cgroup,
@@ -1464,7 +1464,7 @@ virLXCProcessReconnectDomain(virDomainObjPtr vm,
         if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm)))
             goto error;
 
-        if (!(priv->cgroup = virLXCCgroupCreate(vm->def)))
+        if (!(priv->cgroup = virLXCCgroupCreate(vm->def, false)))
             goto error;
 
         if (virLXCUpdateActiveUsbHostdevs(driver, vm->def) < 0)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index cb0faa1..db9aafe 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -188,46 +188,108 @@ int qemuSetupHostUsbDeviceCgroup(virUSBDevicePtr dev ATTRIBUTE_UNUSED,
 
 
 int qemuInitCgroup(virQEMUDriverPtr driver,
-                   virDomainObjPtr vm)
+                   virDomainObjPtr vm,
+                   bool startup)
 {
-    int rc;
+    int rc = -1;
     qemuDomainObjPrivatePtr priv = vm->privateData;
-    virCgroupPtr driverGroup = NULL;
+    virCgroupPtr parent = NULL;
     virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
 
     virCgroupFree(&priv->cgroup);
 
-    rc = virCgroupNewDriver("qemu",
-                            cfg->privileged,
-                            true,
-                            cfg->cgroupControllers,
-                            &driverGroup);
-    if (rc != 0) {
-        if (rc == -ENXIO ||
-            rc == -EPERM ||
-            rc == -EACCES) { /* No cgroups mounts == success */
-            VIR_DEBUG("No cgroups present/configured/accessible, ignoring error");
-            goto done;
+    if (!vm->def->resource && startup) {
+        virDomainResourceDefPtr res;
+
+        if (VIR_ALLOC(res) < 0) {
+            virReportOOMError();
+            goto cleanup;
         }
 
-        virReportSystemError(-rc,
-                             _("Unable to create cgroup for %s"),
-                             vm->def->name);
-        goto cleanup;
+        if (!(res->partition = strdup("/system"))) {
+            virReportOOMError();
+            VIR_FREE(res);
+            goto cleanup;
+        }
+
+        vm->def->resource = res;
     }
 
-    rc = virCgroupNewDomainDriver(driverGroup, vm->def->name, true, &priv->cgroup);
-    if (rc != 0) {
-        virReportSystemError(-rc,
-                             _("Unable to create cgroup for %s"),
-                             vm->def->name);
-        goto cleanup;
+    if (vm->def->resource &&
+        vm->def->resource->partition) {
+        if (vm->def->resource->partition[0] != '/') {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                           _("Resource partition '%s' must start with '/'"),
+                           vm->def->resource->partition);
+            goto cleanup;
+        }
+        /* We only auto-create the default partition. In other
+         * cases we expec the sysadmin/app to have done so */
+        rc = virCgroupNewPartition(vm->def->resource->partition,
+                                   STREQ(vm->def->resource->partition, "/system"),
+                                   cfg->cgroupControllers,
+                                   &parent);
+        if (rc != 0) {
+            if (rc == -ENXIO ||
+                rc == -EPERM ||
+                rc == -EACCES) { /* No cgroups mounts == success */
+                VIR_DEBUG("No cgroups present/configured/accessible, ignoring error");
+                goto done;
+            }
+
+            virReportSystemError(-rc,
+                                 _("Unable to initialize %s cgroup"),
+                                 vm->def->resource->partition);
+            goto cleanup;
+        }
+
+        rc = virCgroupNewDomainPartition(parent,
+                                         "qemu",
+                                         vm->def->name,
+                                         true,
+                                         &priv->cgroup);
+        if (rc != 0) {
+            virReportSystemError(-rc,
+                                 _("Unable to create cgroup for %s"),
+                                 vm->def->name);
+            goto cleanup;
+        }
+    } else {
+        rc = virCgroupNewDriver("qemu",
+                                cfg->privileged,
+                                true,
+                                cfg->cgroupControllers,
+                                &parent);
+        if (rc != 0) {
+            if (rc == -ENXIO ||
+                rc == -EPERM ||
+                rc == -EACCES) { /* No cgroups mounts == success */
+                VIR_DEBUG("No cgroups present/configured/accessible, ignoring error");
+                goto done;
+            }
+
+            virReportSystemError(-rc,
+                                 _("Unable to create cgroup for %s"),
+                                 vm->def->name);
+            goto cleanup;
+        }
+
+        rc = virCgroupNewDomainDriver(parent,
+                                      vm->def->name,
+                                      true,
+                                      &priv->cgroup);
+        if (rc != 0) {
+            virReportSystemError(-rc,
+                                 _("Unable to create cgroup for %s"),
+                                 vm->def->name);
+            goto cleanup;
+        }
     }
 
 done:
     rc = 0;
 cleanup:
-    virCgroupFree(&driverGroup);
+    virCgroupFree(&parent);
     virObjectUnref(cfg);
     return rc;
 }
@@ -246,7 +308,7 @@ int qemuSetupCgroup(virQEMUDriverPtr driver,
         (const char *const *)cfg->cgroupDeviceACL :
         defaultDeviceACL;
 
-    if (qemuInitCgroup(driver, vm) < 0)
+    if (qemuInitCgroup(driver, vm, true) < 0)
         return -1;
 
     if (!priv->cgroup)
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
index 6cbfebc..e63f443 100644
--- a/src/qemu/qemu_cgroup.h
+++ b/src/qemu/qemu_cgroup.h
@@ -37,7 +37,8 @@ int qemuSetupHostUsbDeviceCgroup(virUSBDevicePtr dev,
                                  const char *path,
                                  void *opaque);
 int qemuInitCgroup(virQEMUDriverPtr driver,
-                   virDomainObjPtr vm);
+                   virDomainObjPtr vm,
+                   bool startup);
 int qemuSetupCgroup(virQEMUDriverPtr driver,
                     virDomainObjPtr vm,
                     virBitmapPtr nodemask);
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index a86e62c..a7f0563 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3005,7 +3005,7 @@ qemuProcessReconnect(void *opaque)
     if (qemuUpdateActiveUsbHostdevs(driver, obj->def) < 0)
         goto error;
 
-    if (qemuInitCgroup(driver, obj) < 0)
+    if (qemuInitCgroup(driver, obj, false) < 0)
         goto error;
 
     /* XXX: Need to change as long as lock is introduced for
-- 
1.8.1.4




More information about the libvir-list mailing list