[libvirt PATCH v4 1/7] qemu, hypervisor: refactor some cgroup mgmt methods

Praveen K Paladugu prapal at linux.microsoft.com
Tue Jan 11 22:43:23 UTC 2022


Refactor some cgroup management methods from qemu into hypervisor.
These methods will be shared with ch driver for cgroup management.

Signed-off-by: Praveen K Paladugu <prapal at linux.microsoft.com>
---
 src/hypervisor/domain_cgroup.c | 457 ++++++++++++++++++++++++++++++++-
 src/hypervisor/domain_cgroup.h |  72 ++++++
 src/libvirt_private.syms       |  14 +-
 src/qemu/qemu_cgroup.c         | 413 +----------------------------
 src/qemu/qemu_cgroup.h         |  11 -
 src/qemu/qemu_driver.c         |  14 +-
 src/qemu/qemu_hotplug.c        |   7 +-
 src/qemu/qemu_process.c        |  24 +-
 8 files changed, 580 insertions(+), 432 deletions(-)

diff --git a/src/hypervisor/domain_cgroup.c b/src/hypervisor/domain_cgroup.c
index 61b54f071c..05c53ff7d1 100644
--- a/src/hypervisor/domain_cgroup.c
+++ b/src/hypervisor/domain_cgroup.c
@@ -22,11 +22,12 @@
 
 #include "domain_cgroup.h"
 #include "domain_driver.h"
-
+#include "util/virnuma.h"
+#include "virlog.h"
 #include "virutil.h"
 
 #define VIR_FROM_THIS VIR_FROM_DOMAIN
-
+VIR_LOG_INIT("domain.cgroup");
 
 int
 virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio)
@@ -269,3 +270,455 @@ virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup,
 
     return 0;
 }
+
+
+int
+virDomainCgroupSetupBlkioCgroup(virDomainObj *vm,
+                                virCgroup *cgroup)
+{
+
+    if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_BLKIO)) {
+        if (vm->def->blkio.weight || vm->def->blkio.ndevices) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("Block I/O tuning is not available on this host"));
+            return -1;
+        } else {
+            return 0;
+        }
+    }
+
+    return virDomainCgroupSetupBlkio(cgroup, vm->def->blkio);
+}
+
+
+int
+virDomainCgroupSetupMemoryCgroup(virDomainObj *vm,
+                                 virCgroup *cgroup)
+{
+
+    if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) {
+        if (virMemoryLimitIsSet(vm->def->mem.hard_limit) ||
+            virMemoryLimitIsSet(vm->def->mem.soft_limit) ||
+            virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("Memory cgroup is not available on this host"));
+            return -1;
+        } else {
+            return 0;
+        }
+    }
+
+    return virDomainCgroupSetupMemtune(cgroup, vm->def->mem);
+}
+
+
+int
+virDomainCgroupSetupCpusetCgroup(virCgroup *cgroup)
+{
+
+    if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+        return 0;
+
+    if (virCgroupSetCpusetMemoryMigrate(cgroup, true) < 0)
+        return -1;
+
+    return 0;
+}
+
+
+int
+virDomainCgroupSetupCpuCgroup(virDomainObj *vm,
+                              virCgroup *cgroup)
+{
+
+    if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
+        if (vm->def->cputune.sharesSpecified) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("CPU tuning is not available on this host"));
+            return -1;
+        } else {
+            return 0;
+        }
+    }
+
+    if (vm->def->cputune.sharesSpecified) {
+
+        if (virCgroupSetCpuShares(cgroup, vm->def->cputune.shares) < 0)
+            return -1;
+
+    }
+
+    return 0;
+}
+
+
+int
+virDomainCgroupInitCgroup(const char *prefix,
+                          virDomainObj * vm,
+                          size_t nnicindexes,
+                          int *nicindexes,
+                          virCgroup * cgroup,
+                          int cgroupControllers,
+                          unsigned int maxThreadsPerProc,
+                          bool privileged,
+                          char *machineName)
+{
+    if (!privileged)
+        return 0;
+
+    if (!virCgroupAvailable())
+        return 0;
+
+    virCgroupFree(cgroup);
+    cgroup = NULL;
+
+    if (!vm->def->resource)
+        vm->def->resource = g_new0(virDomainResourceDef, 1);
+
+    if (!vm->def->resource->partition)
+        vm->def->resource->partition = g_strdup("/machine");
+
+    if (!g_path_is_absolute(vm->def->resource->partition)) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                       _("Resource partition '%s' must start with '/'"),
+                       vm->def->resource->partition);
+        return -1;
+    }
+
+    if (virCgroupNewMachine(machineName,
+                            prefix,
+                            vm->def->uuid,
+                            NULL,
+                            vm->pid,
+                            false,
+                            nnicindexes, nicindexes,
+                            vm->def->resource->partition,
+                            cgroupControllers,
+                            maxThreadsPerProc, &cgroup) < 0) {
+        if (virCgroupNewIgnoreError())
+            return 0;
+
+        return -1;
+    }
+
+    return 0;
+}
+
+
+void
+virDomainCgroupRestoreCgroupState(virDomainObj *vm,
+                                  virCgroup *cgroup)
+{
+    g_autofree char *mem_mask = NULL;
+    size_t i = 0;
+
+    g_autoptr(virBitmap) all_nodes = NULL;
+
+    if (!virNumaIsAvailable() ||
+        !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+        return;
+
+    if (!(all_nodes = virNumaGetHostMemoryNodeset()))
+        goto error;
+
+    if (!(mem_mask = virBitmapFormat(all_nodes)))
+        goto error;
+
+    if (virCgroupHasEmptyTasks(cgroup, VIR_CGROUP_CONTROLLER_CPUSET) <= 0)
+        goto error;
+
+    if (virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
+        goto error;
+
+    for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) {
+        virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i);
+
+        if (!vcpu->online)
+            continue;
+
+        if (virDomainCgroupRestoreCgroupThread(cgroup,
+                                               VIR_CGROUP_THREAD_VCPU,
+                                               i) < 0)
+            return;
+    }
+
+    for (i = 0; i < vm->def->niothreadids; i++) {
+        if (virDomainCgroupRestoreCgroupThread(cgroup,
+                                               VIR_CGROUP_THREAD_IOTHREAD,
+                                               vm->def->iothreadids[i]->iothread_id) < 0)
+            return;
+    }
+
+    if (virDomainCgroupRestoreCgroupThread(cgroup,
+                                           VIR_CGROUP_THREAD_EMULATOR,
+                                           0) < 0)
+        return;
+
+    return;
+
+ error:
+    virResetLastError();
+    VIR_DEBUG("Couldn't restore cgroups to meaningful state");
+    return;
+}
+
+
+int
+virDomainCgroupRestoreCgroupThread(virCgroup *cgroup,
+                                   virCgroupThreadName thread,
+                                   int id)
+{
+    g_autoptr(virCgroup) cgroup_temp = NULL;
+    g_autofree char *nodeset = NULL;
+
+    if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0)
+        return -1;
+
+    if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0)
+        return -1;
+
+    if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0)
+        return -1;
+
+    if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
+        return -1;
+
+    return 0;
+}
+
+
+int
+virDomainCgroupConnectCgroup(const char *prefix,
+                             virDomainObj *vm,
+                             virCgroup *cgroup,
+                             int cgroupControllers,
+                             bool privileged,
+                             char *machineName)
+{
+    if (privileged)
+        return 0;
+
+    if (!virCgroupAvailable())
+        return 0;
+
+    virCgroupFree(cgroup);
+
+    if (virCgroupNewDetectMachine(vm->def->name,
+                                  prefix,
+                                  vm->pid,
+                                  cgroupControllers, machineName, &cgroup) < 0)
+        return -1;
+
+    virDomainCgroupRestoreCgroupState(vm, cgroup);
+    return 0;
+}
+
+
+int
+virDomainCgroupSetupCgroup(const char *prefix,
+                           virDomainObj * vm,
+                           size_t nnicindexes,
+                           int *nicindexes,
+                           virCgroup * cgroup,
+                           int cgroupControllers,
+                           unsigned int maxThreadsPerProc,
+                           bool privileged,
+                           char *machineName)
+{
+    if (!vm->pid) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("Cannot setup cgroups until process is started"));
+        return -1;
+    }
+
+    if (virDomainCgroupInitCgroup(prefix,
+                                  vm,
+                                  nnicindexes,
+                                  nicindexes,
+                                  cgroup,
+                                  cgroupControllers,
+                                  maxThreadsPerProc,
+                                  privileged,
+                                  machineName) < 0)
+        return -1;
+
+    if (!cgroup)
+        return 0;
+
+    if (virDomainCgroupSetupBlkioCgroup(vm, cgroup) < 0)
+        return -1;
+
+    if (virDomainCgroupSetupMemoryCgroup(vm, cgroup) < 0)
+        return -1;
+
+    if (virDomainCgroupSetupCpuCgroup(vm, cgroup) < 0)
+        return -1;
+
+    if (virDomainCgroupSetupCpusetCgroup(cgroup) < 0)
+        return -1;
+
+    return 0;
+}
+
+
+int
+virDomainCgroupSetupVcpuBW(virCgroup *cgroup,
+                           unsigned long long period,
+                           long long quota)
+{
+    return virCgroupSetupCpuPeriodQuota(cgroup, period, quota);
+}
+
+
+int
+virDomainCgroupSetupCpusetCpus(virCgroup *cgroup,
+                               virBitmap *cpumask)
+{
+    return virCgroupSetupCpusetCpus(cgroup, cpumask);
+}
+
+
+int
+virDomainCgroupSetupGlobalCpuCgroup(virDomainObj *vm,
+                                    virCgroup *cgroup,
+                                    virBitmap *autoNodeset)
+{
+    unsigned long long period = vm->def->cputune.global_period;
+    long long quota = vm->def->cputune.global_quota;
+    g_autofree char *mem_mask = NULL;
+    virDomainNumatuneMemMode mem_mode;
+
+    if ((period || quota) &&
+        !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                       _("cgroup cpu is required for scheduler tuning"));
+        return -1;
+    }
+
+    /*
+     * If CPU cgroup controller is not initialized here, then we need
+     * neither period nor quota settings.  And if CPUSET controller is
+     * not initialized either, then there's nothing to do anyway.
+     */
+    if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
+        !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+        return 0;
+
+
+    if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
+        mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
+        virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
+                                            autoNodeset, &mem_mask, -1) < 0)
+        return -1;
+
+    if (period || quota) {
+        if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0)
+            return -1;
+    }
+
+    return 0;
+}
+
+
+int
+virDomainCgroupRemoveCgroup(virDomainObj *vm,
+                            virCgroup *cgroup,
+                            char *machineName)
+{
+    if (cgroup == NULL)
+        return 0;               /* Not supported, so claim success */
+
+    if (virCgroupTerminateMachine(machineName) < 0) {
+        if (!virCgroupNewIgnoreError())
+            VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name);
+    }
+
+    return virCgroupRemove(cgroup);
+}
+
+
+void
+virDomainCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData *data)
+{
+    if (!data)
+        return;
+
+    virCgroupFree(data->emulatorCgroup);
+    g_free(data->emulatorMemMask);
+    g_free(data);
+}
+
+
+/**
+ * virDomainCgroupEmulatorAllNodesAllow:
+ * @cgroup: domain cgroup pointer
+ * @retData: filled with structure used to roll back the operation
+ *
+ * Allows all NUMA nodes for the cloud hypervisor thread temporarily. This is
+ * necessary when hotplugging cpus since it requires memory allocated in the
+ * DMA region. Afterwards the operation can be reverted by
+ * virDomainCgroupEmulatorAllNodesRestore.
+ *
+ * Returns 0 on success -1 on error
+ */
+int
+virDomainCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
+                                     virCgroupEmulatorAllNodesData **retData)
+{
+    virCgroupEmulatorAllNodesData *data = NULL;
+    g_autofree char *all_nodes_str = NULL;
+
+    g_autoptr(virBitmap) all_nodes = NULL;
+    int ret = -1;
+
+    if (!virNumaIsAvailable() ||
+        !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+        return 0;
+
+    if (!(all_nodes = virNumaGetHostMemoryNodeset()))
+        goto cleanup;
+
+    if (!(all_nodes_str = virBitmapFormat(all_nodes)))
+        goto cleanup;
+
+    data = g_new0(virCgroupEmulatorAllNodesData, 1);
+
+    if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0,
+                           false, &data->emulatorCgroup) < 0)
+        goto cleanup;
+
+    if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask) < 0
+        || virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0)
+        goto cleanup;
+
+    *retData = g_steal_pointer(&data);
+    ret = 0;
+
+ cleanup:
+    virDomainCgroupEmulatorAllNodesDataFree(data);
+
+    return ret;
+}
+
+
+/**
+ * virDomainCgroupEmulatorAllNodesRestore:
+ * @data: data structure created by virDomainCgroupEmulatorAllNodesAllow
+ *
+ * Rolls back the setting done by virDomainCgroupEmulatorAllNodesAllow and frees the
+ * associated data.
+ */
+void
+virDomainCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData *data)
+{
+    virError *err;
+
+    if (!data)
+        return;
+
+    virErrorPreserveLast(&err);
+    virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask);
+    virErrorRestore(&err);
+
+    virDomainCgroupEmulatorAllNodesDataFree(data);
+}
diff --git a/src/hypervisor/domain_cgroup.h b/src/hypervisor/domain_cgroup.h
index f93e5f74fe..dfe7107674 100644
--- a/src/hypervisor/domain_cgroup.h
+++ b/src/hypervisor/domain_cgroup.h
@@ -23,6 +23,11 @@
 #include "vircgroup.h"
 #include "domain_conf.h"
 
+typedef struct _virCgroupEmulatorAllNodesData virCgroupEmulatorAllNodesData;
+struct _virCgroupEmulatorAllNodesData {
+    virCgroup *emulatorCgroup;
+    char *emulatorMemMask;
+};
 
 int virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio);
 int virDomainCgroupSetupMemtune(virCgroup *cgroup, virDomainMemtune mem);
@@ -36,3 +41,70 @@ int virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup,
                                             virDomainDef *persistentDef,
                                             virTypedParameterPtr params,
                                             int nparams);
+int
+virDomainCgroupSetupBlkioCgroup(virDomainObj * vm,
+                                virCgroup *cgroup);
+int
+virDomainCgroupSetupMemoryCgroup(virDomainObj * vm,
+                                 virCgroup *cgroup);
+int
+virDomainCgroupSetupCpusetCgroup(virCgroup *cgroup);
+int
+virDomainCgroupSetupCpuCgroup(virDomainObj * vm,
+                              virCgroup *cgroup);
+int
+virDomainCgroupInitCgroup(const char *prefix,
+                          virDomainObj * vm,
+                          size_t nnicindexes,
+                          int *nicindexes,
+                          virCgroup *cgroup,
+                          int cgroupControllers,
+                          unsigned int maxThreadsPerProc,
+                          bool privileged,
+                          char *machineName);
+void
+virDomainCgroupRestoreCgroupState(virDomainObj * vm,
+                                  virCgroup *cgroup);
+int
+virDomainCgroupConnectCgroup(const char *prefix,
+                             virDomainObj * vm,
+                             virCgroup *cgroup,
+                             int cgroupControllers,
+                             bool privileged,
+                             char *machineName);
+int
+virDomainCgroupSetupCgroup(const char *prefix,
+                           virDomainObj * vm,
+                           size_t nnicindexes,
+                           int *nicindexes,
+                           virCgroup *cgroup,
+                           int cgroupControllers,
+                           unsigned int maxThreadsPerProc,
+                           bool privileged,
+                           char *machineName);
+void
+virDomainCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData * data);
+int
+virDomainCgroupEmulatorAllNodesAllow(virCgroup * cgroup,
+                                     virCgroupEmulatorAllNodesData ** retData);
+void
+virDomainCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData * data);
+int
+virDomainCgroupSetupVcpuBW(virCgroup * cgroup,
+                           unsigned long long period,
+                           long long quota);
+int
+virDomainCgroupSetupCpusetCpus(virCgroup * cgroup,
+                               virBitmap * cpumask);
+int
+virDomainCgroupSetupGlobalCpuCgroup(virDomainObj * vm,
+                                    virCgroup * cgroup,
+                                    virBitmap *autoNodeset);
+int
+virDomainCgroupRemoveCgroup(virDomainObj * vm,
+                            virCgroup * cgroup,
+                            char *machineName);
+int
+virDomainCgroupRestoreCgroupThread(virCgroup *cgroup,
+                                   virCgroupThreadName thread,
+                                   int id);
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index ee14b99d88..d9d83d2a99 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1541,11 +1541,23 @@ virSetConnectStorage;
 
 
 # hypervisor/domain_cgroup.h
+virDomainCgroupConnectCgroup;
+virDomainCgroupEmulatorAllNodesAllow;
+virDomainCgroupEmulatorAllNodesRestore;
+virDomainCgroupInitCgroup;
+virDomainCgroupRemoveCgroup;
 virDomainCgroupSetMemoryLimitParameters;
 virDomainCgroupSetupBlkio;
+virDomainCgroupSetupBlkioCgroup;
+virDomainCgroupSetupCgroup;
+virDomainCgroupSetupCpuCgroup;
+virDomainCgroupSetupCpusetCgroup;
+virDomainCgroupSetupCpusetCpus;
 virDomainCgroupSetupDomainBlkioParameters;
+virDomainCgroupSetupGlobalCpuCgroup;
+virDomainCgroupSetupMemoryCgroup;
 virDomainCgroupSetupMemtune;
-
+virDomainCgroupSetupVcpuBW;
 
 # hypervisor/domain_driver.h
 virDomainDriverAddIOThreadCheck;
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 308be5b00f..bc0f73cb95 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -593,46 +593,6 @@ qemuSetupVideoCgroup(virDomainObj *vm,
     return ret;
 }
 
-
-static int
-qemuSetupBlkioCgroup(virDomainObj *vm)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-
-    if (!virCgroupHasController(priv->cgroup,
-                                VIR_CGROUP_CONTROLLER_BLKIO)) {
-        if (vm->def->blkio.weight || vm->def->blkio.ndevices) {
-            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
-                           _("Block I/O tuning is not available on this host"));
-            return -1;
-        }
-        return 0;
-    }
-
-    return virDomainCgroupSetupBlkio(priv->cgroup, vm->def->blkio);
-}
-
-
-static int
-qemuSetupMemoryCgroup(virDomainObj *vm)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-
-    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) {
-        if (virMemoryLimitIsSet(vm->def->mem.hard_limit) ||
-            virMemoryLimitIsSet(vm->def->mem.soft_limit) ||
-            virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) {
-            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
-                           _("Memory cgroup is not available on this host"));
-            return -1;
-        }
-        return 0;
-    }
-
-    return virDomainCgroupSetupMemtune(priv->cgroup, vm->def->mem);
-}
-
-
 static int
 qemuSetupFirmwareCgroup(virDomainObj *vm)
 {
@@ -861,44 +821,6 @@ qemuSetupDevicesCgroup(virDomainObj *vm)
 }
 
 
-static int
-qemuSetupCpusetCgroup(virDomainObj *vm)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-
-    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
-        return 0;
-
-    if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0)
-        return -1;
-
-    return 0;
-}
-
-
-static int
-qemuSetupCpuCgroup(virDomainObj *vm)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-
-    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
-       if (vm->def->cputune.sharesSpecified) {
-           virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
-                          _("CPU tuning is not available on this host"));
-           return -1;
-       }
-       return 0;
-    }
-
-    if (vm->def->cputune.sharesSpecified) {
-        if (virCgroupSetCpuShares(priv->cgroup, vm->def->cputune.shares) < 0)
-            return -1;
-    }
-
-    return 0;
-}
-
-
 static int
 qemuSetupCgroupAppid(virDomainObj *vm)
 {
@@ -927,166 +849,13 @@ qemuSetupCgroupAppid(virDomainObj *vm)
 }
 
 
-static int
-qemuInitCgroup(virDomainObj *vm,
-               size_t nnicindexes,
-               int *nicindexes)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-    g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
-
-    if (!priv->driver->privileged)
-        return 0;
-
-    if (!virCgroupAvailable())
-        return 0;
-
-    virCgroupFree(priv->cgroup);
-    priv->cgroup = NULL;
-
-    if (!vm->def->resource)
-        vm->def->resource = g_new0(virDomainResourceDef, 1);
-
-    if (!vm->def->resource->partition)
-        vm->def->resource->partition = g_strdup("/machine");
-
-    if (!g_path_is_absolute(vm->def->resource->partition)) {
-        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
-                       _("Resource partition '%s' must start with '/'"),
-                       vm->def->resource->partition);
-        return -1;
-    }
-
-    if (virCgroupNewMachine(priv->machineName,
-                            "qemu",
-                            vm->def->uuid,
-                            NULL,
-                            vm->pid,
-                            false,
-                            nnicindexes, nicindexes,
-                            vm->def->resource->partition,
-                            cfg->cgroupControllers,
-                            cfg->maxThreadsPerProc,
-                            &priv->cgroup) < 0) {
-        if (virCgroupNewIgnoreError())
-            return 0;
-
-        return -1;
-    }
-
-    return 0;
-}
-
-static int
-qemuRestoreCgroupThread(virCgroup *cgroup,
-                        virCgroupThreadName thread,
-                        int id)
-{
-    g_autoptr(virCgroup) cgroup_temp = NULL;
-    g_autofree char *nodeset = NULL;
-
-    if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0)
-        return -1;
-
-    if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0)
-        return -1;
-
-    if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0)
-        return -1;
-
-    if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
-        return -1;
-
-    return 0;
-}
-
-static void
-qemuRestoreCgroupState(virDomainObj *vm)
-{
-    g_autofree char *mem_mask = NULL;
-    qemuDomainObjPrivate *priv = vm->privateData;
-    size_t i = 0;
-    g_autoptr(virBitmap) all_nodes = NULL;
-
-    if (!virNumaIsAvailable() ||
-        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
-        return;
-
-    if (!(all_nodes = virNumaGetHostMemoryNodeset()))
-        goto error;
-
-    if (!(mem_mask = virBitmapFormat(all_nodes)))
-        goto error;
-
-    if (virCgroupHasEmptyTasks(priv->cgroup,
-                               VIR_CGROUP_CONTROLLER_CPUSET) <= 0)
-        goto error;
-
-    if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0)
-        goto error;
-
-    for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) {
-        virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i);
-
-        if (!vcpu->online)
-            continue;
-
-        if (qemuRestoreCgroupThread(priv->cgroup,
-                                    VIR_CGROUP_THREAD_VCPU, i) < 0)
-            return;
-    }
-
-    for (i = 0; i < vm->def->niothreadids; i++) {
-        if (qemuRestoreCgroupThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD,
-                                    vm->def->iothreadids[i]->iothread_id) < 0)
-            return;
-    }
-
-    if (qemuRestoreCgroupThread(priv->cgroup,
-                                VIR_CGROUP_THREAD_EMULATOR, 0) < 0)
-        return;
-
-    return;
-
- error:
-    virResetLastError();
-    VIR_DEBUG("Couldn't restore cgroups to meaningful state");
-    return;
-}
-
-int
-qemuConnectCgroup(virDomainObj *vm)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-    g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
-
-    if (!priv->driver->privileged)
-        return 0;
-
-    if (!virCgroupAvailable())
-        return 0;
-
-    virCgroupFree(priv->cgroup);
-    priv->cgroup = NULL;
-
-    if (virCgroupNewDetectMachine(vm->def->name,
-                                  "qemu",
-                                  vm->pid,
-                                  cfg->cgroupControllers,
-                                  priv->machineName,
-                                  &priv->cgroup) < 0)
-        return -1;
-
-    qemuRestoreCgroupState(vm);
-    return 0;
-}
-
 int
 qemuSetupCgroup(virDomainObj *vm,
                 size_t nnicindexes,
                 int *nicindexes)
 {
     qemuDomainObjPrivate *priv = vm->privateData;
+    g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
 
     if (!vm->pid) {
         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
@@ -1094,7 +863,15 @@ qemuSetupCgroup(virDomainObj *vm,
         return -1;
     }
 
-    if (qemuInitCgroup(vm, nnicindexes, nicindexes) < 0)
+    if (virDomainCgroupInitCgroup("qemu",
+                                  vm,
+                                  nnicindexes,
+                                  nicindexes,
+                                  priv->cgroup,
+                                  cfg->cgroupControllers,
+                                  cfg->maxThreadsPerProc,
+                                  priv->driver->privileged,
+                                  priv->machineName) < 0)
         return -1;
 
     if (!priv->cgroup)
@@ -1103,16 +880,16 @@ qemuSetupCgroup(virDomainObj *vm,
     if (qemuSetupDevicesCgroup(vm) < 0)
         return -1;
 
-    if (qemuSetupBlkioCgroup(vm) < 0)
+    if (virDomainCgroupSetupBlkioCgroup(vm, priv->cgroup) < 0)
         return -1;
 
-    if (qemuSetupMemoryCgroup(vm) < 0)
+    if (virDomainCgroupSetupMemoryCgroup(vm, priv->cgroup) < 0)
         return -1;
 
-    if (qemuSetupCpuCgroup(vm) < 0)
+    if (virDomainCgroupSetupCpuCgroup(vm, priv->cgroup) < 0)
         return -1;
 
-    if (qemuSetupCpusetCgroup(vm) < 0)
+    if (virDomainCgroupSetupCpusetCgroup(priv->cgroup) < 0)
         return -1;
 
     if (qemuSetupCgroupAppid(vm) < 0)
@@ -1121,23 +898,6 @@ qemuSetupCgroup(virDomainObj *vm,
     return 0;
 }
 
-int
-qemuSetupCgroupVcpuBW(virCgroup *cgroup,
-                      unsigned long long period,
-                      long long quota)
-{
-    return virCgroupSetupCpuPeriodQuota(cgroup, period, quota);
-}
-
-
-int
-qemuSetupCgroupCpusetCpus(virCgroup *cgroup,
-                          virBitmap *cpumask)
-{
-    return virCgroupSetupCpusetCpus(cgroup, cpumask);
-}
-
-
 int
 qemuSetupCgroupForExtDevices(virDomainObj *vm,
                              virQEMUDriver *driver)
@@ -1164,148 +924,3 @@ qemuSetupCgroupForExtDevices(virDomainObj *vm,
 
     return qemuExtDevicesSetupCgroup(driver, vm, cgroup_temp);
 }
-
-
-int
-qemuSetupGlobalCpuCgroup(virDomainObj *vm)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-    unsigned long long period = vm->def->cputune.global_period;
-    long long quota = vm->def->cputune.global_quota;
-    g_autofree char *mem_mask = NULL;
-    virDomainNumatuneMemMode mem_mode;
-
-    if ((period || quota) &&
-        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
-        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
-                       _("cgroup cpu is required for scheduler tuning"));
-        return -1;
-    }
-
-    /*
-     * If CPU cgroup controller is not initialized here, then we need
-     * neither period nor quota settings.  And if CPUSET controller is
-     * not initialized either, then there's nothing to do anyway.
-     */
-    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
-        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
-        return 0;
-
-
-    if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
-        mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
-        virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
-                                            priv->autoNodeset,
-                                            &mem_mask, -1) < 0)
-        return -1;
-
-    if (period || quota) {
-        if (qemuSetupCgroupVcpuBW(priv->cgroup, period, quota) < 0)
-            return -1;
-    }
-
-    return 0;
-}
-
-
-int
-qemuRemoveCgroup(virDomainObj *vm)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-
-    if (priv->cgroup == NULL)
-        return 0; /* Not supported, so claim success */
-
-    if (virCgroupTerminateMachine(priv->machineName) < 0) {
-        if (!virCgroupNewIgnoreError())
-            VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name);
-    }
-
-    return virCgroupRemove(priv->cgroup);
-}
-
-
-static void
-qemuCgroupEmulatorAllNodesDataFree(qemuCgroupEmulatorAllNodesData *data)
-{
-    if (!data)
-        return;
-
-    virCgroupFree(data->emulatorCgroup);
-    g_free(data->emulatorMemMask);
-    g_free(data);
-}
-
-
-/**
- * qemuCgroupEmulatorAllNodesAllow:
- * @cgroup: domain cgroup pointer
- * @retData: filled with structure used to roll back the operation
- *
- * Allows all NUMA nodes for the qemu emulator thread temporarily. This is
- * necessary when hotplugging cpus since it requires memory allocated in the
- * DMA region. Afterwards the operation can be reverted by
- * qemuCgroupEmulatorAllNodesRestore.
- *
- * Returns 0 on success -1 on error
- */
-int
-qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
-                                qemuCgroupEmulatorAllNodesData **retData)
-{
-    qemuCgroupEmulatorAllNodesData *data = NULL;
-    g_autofree char *all_nodes_str = NULL;
-    g_autoptr(virBitmap) all_nodes = NULL;
-    int ret = -1;
-
-    if (!virNumaIsAvailable() ||
-        !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
-        return 0;
-
-    if (!(all_nodes = virNumaGetHostMemoryNodeset()))
-        goto cleanup;
-
-    if (!(all_nodes_str = virBitmapFormat(all_nodes)))
-        goto cleanup;
-
-    data = g_new0(qemuCgroupEmulatorAllNodesData, 1);
-
-    if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0,
-                           false, &data->emulatorCgroup) < 0)
-        goto cleanup;
-
-    if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask) < 0 ||
-        virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0)
-        goto cleanup;
-
-    *retData = g_steal_pointer(&data);
-    ret = 0;
-
- cleanup:
-    qemuCgroupEmulatorAllNodesDataFree(data);
-
-    return ret;
-}
-
-
-/**
- * qemuCgroupEmulatorAllNodesRestore:
- * @data: data structure created by qemuCgroupEmulatorAllNodesAllow
- *
- * Rolls back the setting done by qemuCgroupEmulatorAllNodesAllow and frees the
- * associated data.
- */
-void
-qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data)
-{
-    virErrorPtr err;
-
-    if (!data)
-        return;
-
-    virErrorPreserveLast(&err);
-    virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask);
-    virErrorRestore(&err);
-
-    qemuCgroupEmulatorAllNodesDataFree(data);
-}
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
index cd537ebd82..f09134947f 100644
--- a/src/qemu/qemu_cgroup.h
+++ b/src/qemu/qemu_cgroup.h
@@ -56,18 +56,11 @@ int qemuSetupChardevCgroup(virDomainObj *vm,
                            virDomainChrDef *dev);
 int qemuTeardownChardevCgroup(virDomainObj *vm,
                               virDomainChrDef *dev);
-int qemuConnectCgroup(virDomainObj *vm);
 int qemuSetupCgroup(virDomainObj *vm,
                     size_t nnicindexes,
                     int *nicindexes);
-int qemuSetupCgroupVcpuBW(virCgroup *cgroup,
-                          unsigned long long period,
-                          long long quota);
-int qemuSetupCgroupCpusetCpus(virCgroup *cgroup, virBitmap *cpumask);
-int qemuSetupGlobalCpuCgroup(virDomainObj *vm);
 int qemuSetupCgroupForExtDevices(virDomainObj *vm,
                                  virQEMUDriver *driver);
-int qemuRemoveCgroup(virDomainObj *vm);
 
 typedef struct _qemuCgroupEmulatorAllNodesData qemuCgroupEmulatorAllNodesData;
 struct _qemuCgroupEmulatorAllNodesData {
@@ -75,8 +68,4 @@ struct _qemuCgroupEmulatorAllNodesData {
     char *emulatorMemMask;
 };
 
-int qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
-                                    qemuCgroupEmulatorAllNodesData **data);
-void qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data);
-
 extern const char *const defaultDeviceACL[];
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index d3d76c003f..c3d4bc1270 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -4424,7 +4424,7 @@ qemuDomainPinVcpuLive(virDomainObj *vm,
             if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, vcpu,
                                    false, &cgroup_vcpu) < 0)
                 goto cleanup;
-            if (qemuSetupCgroupCpusetCpus(cgroup_vcpu, cpumap) < 0)
+            if (virDomainCgroupSetupCpusetCpus(cgroup_vcpu, cpumap) < 0)
                 goto cleanup;
         }
 
@@ -4633,7 +4633,7 @@ qemuDomainPinEmulator(virDomainPtr dom,
                                    0, false, &cgroup_emulator) < 0)
                 goto endjob;
 
-            if (qemuSetupCgroupCpusetCpus(cgroup_emulator, pcpumap) < 0) {
+            if (virDomainCgroupSetupCpusetCpus(cgroup_emulator, pcpumap) < 0) {
                 virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                                _("failed to set cpuset.cpus in cgroup"
                                  " for emulator threads"));
@@ -5038,7 +5038,7 @@ qemuDomainPinIOThread(virDomainPtr dom,
             if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD,
                                    iothread_id, false, &cgroup_iothread) < 0)
                 goto endjob;
-            if (qemuSetupCgroupCpusetCpus(cgroup_iothread, pcpumap) < 0) {
+            if (virDomainCgroupSetupCpusetCpus(cgroup_iothread, pcpumap) < 0) {
                 virReportError(VIR_ERR_OPERATION_INVALID,
                                _("failed to set cpuset.cpus in cgroup"
                                  " for iothread %d"), iothread_id);
@@ -8938,7 +8938,7 @@ qemuSetGlobalBWLive(virCgroup *cgroup, unsigned long long period,
     if (period == 0 && quota == 0)
         return 0;
 
-    if (qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0)
+    if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0)
         return -1;
 
     return 0;
@@ -9133,7 +9133,7 @@ qemuSetVcpusBWLive(virDomainObj *vm, virCgroup *cgroup,
                                false, &cgroup_vcpu) < 0)
             return -1;
 
-        if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
+        if (virDomainCgroupSetupVcpuBW(cgroup_vcpu, period, quota) < 0)
             return -1;
     }
 
@@ -9154,7 +9154,7 @@ qemuSetEmulatorBandwidthLive(virCgroup *cgroup,
                            false, &cgroup_emulator) < 0)
         return -1;
 
-    if (qemuSetupCgroupVcpuBW(cgroup_emulator, period, quota) < 0)
+    if (virDomainCgroupSetupVcpuBW(cgroup_emulator, period, quota) < 0)
         return -1;
 
     return 0;
@@ -9181,7 +9181,7 @@ qemuSetIOThreadsBWLive(virDomainObj *vm, virCgroup *cgroup,
                                false, &cgroup_iothread) < 0)
             return -1;
 
-        if (qemuSetupCgroupVcpuBW(cgroup_iothread, period, quota) < 0)
+        if (virDomainCgroupSetupVcpuBW(cgroup_iothread, period, quota) < 0)
             return -1;
     }
 
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
index 04c6600f26..01d734e294 100644
--- a/src/qemu/qemu_hotplug.c
+++ b/src/qemu/qemu_hotplug.c
@@ -37,6 +37,7 @@
 #include "qemu_snapshot.h"
 #include "qemu_virtiofs.h"
 #include "domain_audit.h"
+#include "domain_cgroup.h"
 #include "netdev_bandwidth_conf.h"
 #include "domain_nwfilter.h"
 #include "virlog.h"
@@ -6551,11 +6552,11 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver,
                        bool enable)
 {
     qemuDomainObjPrivate *priv = vm->privateData;
-    qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
+    virCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
     ssize_t nextvcpu = -1;
     int ret = -1;
 
-    if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
+    if (virDomainCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
         goto cleanup;
 
     if (enable) {
@@ -6576,7 +6577,7 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver,
     ret = 0;
 
  cleanup:
-    qemuCgroupEmulatorAllNodesRestore(emulatorCgroup);
+    virDomainCgroupEmulatorAllNodesRestore(emulatorCgroup);
 
     return ret;
 }
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 5c9ca0fe4f..605fa2efa2 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -73,6 +73,7 @@
 #include "virpidfile.h"
 #include "virhostcpu.h"
 #include "domain_audit.h"
+#include "domain_cgroup.h"
 #include "domain_nwfilter.h"
 #include "domain_validate.h"
 #include "locking/domain_lock.h"
@@ -2686,7 +2687,7 @@ qemuProcessSetupPid(virDomainObj *vm,
 
         if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
             if (use_cpumask &&
-                qemuSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0)
+                virDomainCgroupSetupCpusetCpus(cgroup, use_cpumask) < 0)
                 goto cleanup;
 
             if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
@@ -2695,7 +2696,7 @@ qemuProcessSetupPid(virDomainObj *vm,
         }
 
         if ((period || quota) &&
-            qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0)
+            virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0)
             goto cleanup;
 
         /* Move the thread to the sub dir */
@@ -5964,7 +5965,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
 {
     unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def);
     qemuDomainObjPrivate *priv = vm->privateData;
-    qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
+    virCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
     virDomainVcpuDef *vcpu;
     qemuDomainVcpuPrivate *vcpupriv;
     size_t i;
@@ -5992,7 +5993,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
     qsort(bootHotplug, nbootHotplug, sizeof(*bootHotplug),
           qemuProcessVcpusSortOrder);
 
-    if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
+    if (virDomainCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
         goto cleanup;
 
     for (i = 0; i < nbootHotplug; i++) {
@@ -6016,7 +6017,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
     ret = 0;
 
  cleanup:
-    qemuCgroupEmulatorAllNodesRestore(emulatorCgroup);
+    virDomainCgroupEmulatorAllNodesRestore(emulatorCgroup);
     return ret;
 }
 
@@ -7006,7 +7007,7 @@ qemuProcessPrepareHost(virQEMUDriver *driver,
     /* Ensure no historical cgroup for this VM is lying around bogus
      * settings */
     VIR_DEBUG("Ensuring no historical cgroup is lying around");
-    qemuRemoveCgroup(vm);
+    virDomainCgroupRemoveCgroup(vm, priv->cgroup, priv->machineName);
 
     if (g_mkdir_with_parents(cfg->logDir, 0777) < 0) {
         virReportSystemError(errno,
@@ -7615,7 +7616,7 @@ qemuProcessLaunch(virConnectPtr conn,
         goto cleanup;
 
     VIR_DEBUG("Setting global CPU cgroup (if required)");
-    if (qemuSetupGlobalCpuCgroup(vm) < 0)
+    if (virDomainCgroupSetupGlobalCpuCgroup(vm, priv->cgroup, priv->autoNodeset) < 0)
         goto cleanup;
 
     VIR_DEBUG("Setting vCPU tuning/settings");
@@ -8223,7 +8224,7 @@ void qemuProcessStop(virQEMUDriver *driver,
     }
 
  retry:
-    if ((ret = qemuRemoveCgroup(vm)) < 0) {
+    if ((ret = virDomainCgroupRemoveCgroup(vm, priv->cgroup, priv->machineName)) < 0) {
         if (ret == -EBUSY && (retries++ < 5)) {
             g_usleep(200*1000);
             goto retry;
@@ -8782,7 +8783,12 @@ qemuProcessReconnect(void *opaque)
     if (!priv->machineName)
         goto error;
 
-    if (qemuConnectCgroup(obj) < 0)
+    if (virDomainCgroupConnectCgroup("qemu",
+                                     obj,
+                                     priv->cgroup,
+                                     cfg->cgroupControllers,
+                                     priv->driver->privileged,
+                                     priv->machineName) < 0)
         goto error;
 
     if (qemuDomainPerfRestart(obj) < 0)
-- 
2.27.0





More information about the libvir-list mailing list