[PATCH v3] qemu: Don't report spurious errors from vCPU tid validation on hotunplug timeout

Peter Krempa pkrempa at redhat.com
Tue Nov 22 14:08:43 UTC 2022


From: Shaleen Bathla <shaleen.bathla at oracle.com>

Use of qemuDomainValidateVcpuInfo in the helpers for hotplug and unplug
of vCPUs can lead to spurious errors reported such as:

  internal error: qemu didn't report thread id for vcpu 'XX'"

The reason for this is that qemuDomainValidateVcpuInfo validates the
state of all vCPUs against the expected state of vCPUs. If an unplug
operation completed before libvirt was unable to process it yet the
expected state could not reflect the current state.

To avoid spurious errors the qemuDomainHotplugAddVcpu and
qemuDomainRemoveVcpu functions are modified to do localized validation
only for the vCPUs they actually modify.

We also now ensure that the cgroups are modified before bailing out on
error for any vCPUs which passed validation.

Additionally in order for qemuDomainRemoveVcpuAlias to be able to find
the unplugged vCPU we must ensure that qemuDomainRefreshVcpuInfo does
not clear out the alias in case when the vCPU is no longer reported by
qemu.

Co-authored-by: Partha Satapathy <partha.satapathy at oracle.com>
Signed-off-by: Shaleen Bathla <shaleen.bathla at oracle.com>
Signed-off-by: Peter Krempa <pkrempa at redhat.com>
---

v3 addresses my review feedback of the original patch, as well as
rewrites the commit message for more clarity.

 src/qemu/qemu_domain.c  |  6 +++--
 src/qemu/qemu_hotplug.c | 53 ++++++++++++++++++++++++-----------------
 2 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index ef1a9c8c74..64ebec626c 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -9795,8 +9795,10 @@ qemuDomainRefreshVcpuInfo(virDomainObj *vm,
         vcpupriv->vcpus = info[i].vcpus;
         VIR_FREE(vcpupriv->type);
         vcpupriv->type = g_steal_pointer(&info[i].type);
-        VIR_FREE(vcpupriv->alias);
-        vcpupriv->alias = g_steal_pointer(&info[i].alias);
+        if (info[i].alias) {
+            VIR_FREE(vcpupriv->alias);
+            vcpupriv->alias = g_steal_pointer(&info[i].alias);
+        }
         virJSONValueFree(vcpupriv->props);
         vcpupriv->props = g_steal_pointer(&info[i].props);
         vcpupriv->enable_id = info[i].id;
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
index da92ced2f4..6e300f547c 100644
--- a/src/qemu/qemu_hotplug.c
+++ b/src/qemu/qemu_hotplug.c
@@ -6088,38 +6088,37 @@ qemuDomainRemoveVcpu(virDomainObj *vm,
     qemuDomainVcpuPrivate *vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpuinfo);
     int oldvcpus = virDomainDefGetVcpus(vm->def);
     unsigned int nvcpus = vcpupriv->vcpus;
-    virErrorPtr save_error = NULL;
     size_t i;
+    ssize_t offlineVcpuWithTid = -1;

     if (qemuDomainRefreshVcpuInfo(vm, VIR_ASYNC_JOB_NONE, false) < 0)
         return -1;

-    /* validation requires us to set the expected state prior to calling it */
     for (i = vcpu; i < vcpu + nvcpus; i++) {
         vcpuinfo = virDomainDefGetVcpu(vm->def, i);
-        vcpuinfo->online = false;
+        vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpuinfo);
+
+        if (vcpupriv->tid == 0) {
+            vcpuinfo->online = false;
+            /* Clear the alias as VCPU is now unplugged */
+            VIR_FREE(vcpupriv->alias);
+            ignore_value(virCgroupDelThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, i));
+        } else {
+            if (offlineVcpuWithTid == -1)
+                offlineVcpuWithTid = i;
+        }
     }

-    if (qemuDomainValidateVcpuInfo(vm) < 0) {
-        /* rollback vcpu count if the setting has failed */
+    if (offlineVcpuWithTid != -1) {
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       _("qemu reported thread id for inactive vcpu '%zu'"),
+                       offlineVcpuWithTid);
         virDomainAuditVcpu(vm, oldvcpus, oldvcpus - nvcpus, "update", false);
-
-        for (i = vcpu; i < vcpu + nvcpus; i++) {
-            vcpuinfo = virDomainDefGetVcpu(vm->def, i);
-            vcpuinfo->online = true;
-        }
         return -1;
     }

     virDomainAuditVcpu(vm, oldvcpus, oldvcpus - nvcpus, "update", true);

-    virErrorPreserveLast(&save_error);
-
-    for (i = vcpu; i < vcpu + nvcpus; i++)
-        ignore_value(virCgroupDelThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, i));
-
-    virErrorRestore(&save_error);
-
     return 0;
 }

@@ -6141,6 +6140,9 @@ qemuDomainRemoveVcpuAlias(virDomainObj *vm,
             return;
         }
     }
+
+    VIR_DEBUG("vcpu '%s' not found in vcpulist of domain '%s'",
+              alias, vm->def->name);
 }


@@ -6209,6 +6211,7 @@ qemuDomainHotplugAddVcpu(virQEMUDriver *driver,
     int rc;
     int oldvcpus = virDomainDefGetVcpus(vm->def);
     size_t i;
+    bool vcpuTidMissing = false;

     if (!qemuDomainSupportsNewVcpuHotplug(vm)) {
         virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
@@ -6238,20 +6241,26 @@ qemuDomainHotplugAddVcpu(virQEMUDriver *driver,
     if (qemuDomainRefreshVcpuInfo(vm, VIR_ASYNC_JOB_NONE, false) < 0)
         return -1;

-    /* validation requires us to set the expected state prior to calling it */
     for (i = vcpu; i < vcpu + nvcpus; i++) {
         vcpuinfo = virDomainDefGetVcpu(vm->def, i);
         vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpuinfo);

         vcpuinfo->online = true;

-        if (vcpupriv->tid > 0 &&
-            qemuProcessSetupVcpu(vm, i, true) < 0)
-            return -1;
+        if (vcpupriv->tid > 0) {
+            if (qemuProcessSetupVcpu(vm, i, true) < 0) {
+                return -1;
+            }
+        } else {
+            vcpuTidMissing = true;
+        }
     }

-    if (qemuDomainValidateVcpuInfo(vm) < 0)
+    if (vcpuTidMissing && qemuDomainHasVcpuPids(vm)) {
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       _("qemu didn't report thread id for vcpu '%zu'"), i);
         return -1;
+    }

     qemuDomainVcpuPersistOrder(vm->def);

-- 
2.37.3



More information about the libvir-list mailing list