[libvirt] [PATCH] esx: Improve blocked task detection and fix race condition

Matthias Bolte matthias.bolte at googlemail.com
Mon Jul 26 01:12:25 UTC 2010


esxVI_WaitForTaskCompletion can take a UUID to lookup the
corresponding domain and check if the current task for it
is blocked by a question. It calls another function to do
this: esxVI_LookupAndHandleVirtualMachineQuestion looks up
the VirtualMachine and checks for a question. If there is
a question it calls esxVI_HandleVirtualMachineQuestion to
handle it.

If there was no question or it has been answered the call
to esxVI_LookupAndHandleVirtualMachineQuestion returns 0.
If any error occurred during the lookup and answering
process -1 is returned. The problem with this is, that -1
is also returned when there was no error but the question
could not be answered. So esxVI_WaitForTaskCompletion cannot
distinguish between this two situations and reports that a
question is blocking the task even when there was actually
another problem.

This inherent problem didn't surface until vSphere 4.1 when
you try to define a new domain. The driver tries to lookup
the domain that is just in the process of being registered.
There seems to be some kind of race condition and the driver
manages to issue a lookup command before the ESX server was
able to register the domain. This used to work before.

Due to the return value problem described above the driver
reported a false error message in that case.

To solve this esxVI_WaitForTaskCompletion now takes an
additional occurrence parameter that describes whether or
not to expect the domain to be existent. Also add a new
parameter to esxVI_LookupAndHandleVirtualMachineQuestion
that allows to distinguish if the call returned -1 because
of an actual error or because the question could not be
answered.
---
 src/esx/esx_driver.c |   17 ++++++++++++++-
 src/esx/esx_vi.c     |   53 ++++++++++++++++++++++++++++++++++++-------------
 src/esx/esx_vi.h     |    7 ++++-
 src/esx/esx_vmx.c    |    4 +-
 4 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/src/esx/esx_driver.c b/src/esx/esx_driver.c
index f9affea..eb64556 100644
--- a/src/esx/esx_driver.c
+++ b/src/esx/esx_driver.c
@@ -1409,6 +1409,7 @@ esxDomainSuspend(virDomainPtr domain)
 
     if (esxVI_SuspendVM_Task(priv->primary, virtualMachine->obj, &task) < 0 ||
         esxVI_WaitForTaskCompletion(priv->primary, task, domain->uuid,
+                                    esxVI_Occurrence_RequiredItem,
                                     priv->autoAnswer, &taskInfoState) < 0) {
         goto cleanup;
     }
@@ -1462,6 +1463,7 @@ esxDomainResume(virDomainPtr domain)
     if (esxVI_PowerOnVM_Task(priv->primary, virtualMachine->obj, NULL,
                              &task) < 0 ||
         esxVI_WaitForTaskCompletion(priv->primary, task, domain->uuid,
+                                    esxVI_Occurrence_RequiredItem,
                                     priv->autoAnswer, &taskInfoState) < 0) {
         goto cleanup;
     }
@@ -1607,8 +1609,9 @@ esxDomainDestroy(virDomainPtr domain)
     }
 
     if (esxVI_PowerOffVM_Task(ctx, virtualMachine->obj, &task) < 0 ||
-        esxVI_WaitForTaskCompletion(ctx, task, domain->uuid, priv->autoAnswer,
-                                    &taskInfoState) < 0) {
+        esxVI_WaitForTaskCompletion(ctx, task, domain->uuid,
+                                    esxVI_Occurrence_RequiredItem,
+                                    priv->autoAnswer, &taskInfoState) < 0) {
         goto cleanup;
     }
 
@@ -1724,6 +1727,7 @@ esxDomainSetMaxMemory(virDomainPtr domain, unsigned long memory)
     if (esxVI_ReconfigVM_Task(priv->primary, virtualMachine->obj, spec,
                               &task) < 0 ||
         esxVI_WaitForTaskCompletion(priv->primary, task, domain->uuid,
+                                    esxVI_Occurrence_RequiredItem,
                                     priv->autoAnswer, &taskInfoState) < 0) {
         goto cleanup;
     }
@@ -1775,6 +1779,7 @@ esxDomainSetMemory(virDomainPtr domain, unsigned long memory)
     if (esxVI_ReconfigVM_Task(priv->primary, virtualMachine->obj, spec,
                               &task) < 0 ||
         esxVI_WaitForTaskCompletion(priv->primary, task, domain->uuid,
+                                    esxVI_Occurrence_RequiredItem,
                                     priv->autoAnswer, &taskInfoState) < 0) {
         goto cleanup;
     }
@@ -2104,6 +2109,7 @@ esxDomainSetVcpus(virDomainPtr domain, unsigned int nvcpus)
     if (esxVI_ReconfigVM_Task(priv->primary, virtualMachine->obj, spec,
                               &task) < 0 ||
         esxVI_WaitForTaskCompletion(priv->primary, task, domain->uuid,
+                                    esxVI_Occurrence_RequiredItem,
                                     priv->autoAnswer, &taskInfoState) < 0) {
         goto cleanup;
     }
@@ -2505,6 +2511,7 @@ esxDomainCreateWithFlags(virDomainPtr domain, unsigned int flags)
     if (esxVI_PowerOnVM_Task(priv->primary, virtualMachine->obj, NULL,
                              &task) < 0 ||
         esxVI_WaitForTaskCompletion(priv->primary, task, domain->uuid,
+                                    esxVI_Occurrence_RequiredItem,
                                     priv->autoAnswer, &taskInfoState) < 0) {
         goto cleanup;
     }
@@ -2702,6 +2709,7 @@ esxDomainDefineXML(virConnectPtr conn, const char *xml ATTRIBUTE_UNUSED)
                               datastoreRelatedPath, NULL, esxVI_Boolean_False,
                               resourcePool, hostSystem->obj, &task) < 0 ||
         esxVI_WaitForTaskCompletion(priv->host, task, def->uuid,
+                                    esxVI_Occurrence_OptionalItem,
                                     priv->autoAnswer, &taskInfoState) < 0) {
         goto cleanup;
     }
@@ -3069,6 +3077,7 @@ esxDomainSetSchedulerParameters(virDomainPtr domain,
     if (esxVI_ReconfigVM_Task(priv->primary, virtualMachine->obj, spec,
                               &task) < 0 ||
         esxVI_WaitForTaskCompletion(priv->primary, task, domain->uuid,
+                                    esxVI_Occurrence_RequiredItem,
                                     priv->autoAnswer, &taskInfoState) < 0) {
         goto cleanup;
     }
@@ -3223,6 +3232,7 @@ esxDomainMigratePerform(virDomainPtr domain,
                              esxVI_VirtualMachinePowerState_Undefined,
                              &task) < 0 ||
         esxVI_WaitForTaskCompletion(priv->vCenter, task, domain->uuid,
+                                    esxVI_Occurrence_RequiredItem,
                                     priv->autoAnswer, &taskInfoState) < 0) {
         goto cleanup;
     }
@@ -3465,6 +3475,7 @@ esxDomainSnapshotCreateXML(virDomainPtr domain, const char *xmlDesc,
                                   esxVI_Boolean_True,
                                   esxVI_Boolean_False, &task) < 0 ||
         esxVI_WaitForTaskCompletion(priv->primary, task, domain->uuid,
+                                    esxVI_Occurrence_RequiredItem,
                                     priv->autoAnswer, &taskInfoState) < 0) {
         goto cleanup;
     }
@@ -3721,6 +3732,7 @@ esxDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, unsigned int flags)
     if (esxVI_RevertToSnapshot_Task(priv->primary, snapshotTree->snapshot, NULL,
                                     &task) < 0 ||
         esxVI_WaitForTaskCompletion(priv->primary, task, snapshot->domain->uuid,
+                                    esxVI_Occurrence_RequiredItem,
                                     priv->autoAnswer, &taskInfoState) < 0) {
         goto cleanup;
     }
@@ -3775,6 +3787,7 @@ esxDomainSnapshotDelete(virDomainSnapshotPtr snapshot, unsigned int flags)
     if (esxVI_RemoveSnapshot_Task(priv->primary, snapshotTree->snapshot,
                                   removeChildren, &task) < 0 ||
         esxVI_WaitForTaskCompletion(priv->primary, task, snapshot->domain->uuid,
+                                    esxVI_Occurrence_RequiredItem,
                                     priv->autoAnswer, &taskInfoState) < 0) {
         goto cleanup;
     }
diff --git a/src/esx/esx_vi.c b/src/esx/esx_vi.c
index 69ba7a9..98b8bcd 100644
--- a/src/esx/esx_vi.c
+++ b/src/esx/esx_vi.c
@@ -2217,6 +2217,7 @@ esxVI_LookupVirtualMachineByUuidAndPrepareForTask
     esxVI_String *completePropertyNameList = NULL;
     esxVI_VirtualMachineQuestionInfo *questionInfo = NULL;
     esxVI_TaskInfo *pendingTaskInfoList = NULL;
+    esxVI_Boolean blocked = esxVI_Boolean_Undefined;
 
     if (esxVI_String_DeepCopyList(&completePropertyNameList,
                                   propertyNameList) < 0 ||
@@ -2235,7 +2236,8 @@ esxVI_LookupVirtualMachineByUuidAndPrepareForTask
 
     if (questionInfo != NULL &&
         esxVI_HandleVirtualMachineQuestion(ctx, (*virtualMachine)->obj,
-                                           questionInfo, autoAnswer) < 0) {
+                                           questionInfo, autoAnswer,
+                                           &blocked) < 0) {
         goto cleanup;
     }
 
@@ -2528,7 +2530,9 @@ esxVI_LookupPendingTaskInfoListByVirtualMachine
 int
 esxVI_LookupAndHandleVirtualMachineQuestion(esxVI_Context *ctx,
                                             const unsigned char *uuid,
-                                            esxVI_Boolean autoAnswer)
+                                            esxVI_Occurrence occurrence,
+                                            esxVI_Boolean autoAnswer,
+                                            esxVI_Boolean *blocked)
 {
     int result = -1;
     esxVI_ObjectContent *virtualMachine = NULL;
@@ -2538,17 +2542,22 @@ esxVI_LookupAndHandleVirtualMachineQuestion(esxVI_Context *ctx,
     if (esxVI_String_AppendValueToList(&propertyNameList,
                                        "runtime.question") < 0 ||
         esxVI_LookupVirtualMachineByUuid(ctx, uuid, propertyNameList,
-                                         &virtualMachine,
-                                         esxVI_Occurrence_RequiredItem) < 0 ||
-        esxVI_GetVirtualMachineQuestionInfo(virtualMachine,
-                                            &questionInfo) < 0) {
+                                         &virtualMachine, occurrence) < 0) {
         goto cleanup;
     }
 
-    if (questionInfo != NULL &&
-        esxVI_HandleVirtualMachineQuestion(ctx, virtualMachine->obj,
-                                           questionInfo, autoAnswer) < 0) {
-        goto cleanup;
+    if (virtualMachine != NULL) {
+        if (esxVI_GetVirtualMachineQuestionInfo(virtualMachine,
+                                                &questionInfo) < 0) {
+            goto cleanup;
+        }
+
+        if (questionInfo != NULL &&
+            esxVI_HandleVirtualMachineQuestion(ctx, virtualMachine->obj,
+                                               questionInfo, autoAnswer,
+                                               blocked) < 0) {
+            goto cleanup;
+        }
     }
 
     result = 0;
@@ -2700,7 +2709,7 @@ int
 esxVI_HandleVirtualMachineQuestion
   (esxVI_Context *ctx, esxVI_ManagedObjectReference *virtualMachine,
    esxVI_VirtualMachineQuestionInfo *questionInfo,
-   esxVI_Boolean autoAnswer)
+   esxVI_Boolean autoAnswer, esxVI_Boolean *blocked)
 {
     int result = -1;
     esxVI_ElementDescription *elementDescription = NULL;
@@ -2709,6 +2718,13 @@ esxVI_HandleVirtualMachineQuestion
     int answerIndex = 0;
     char *possibleAnswers = NULL;
 
+    if (blocked == NULL || *blocked != esxVI_Boolean_Undefined) {
+        ESX_VI_ERROR(VIR_ERR_INTERNAL_ERROR, "%s", _("Invalid argument"));
+        return -1;
+    }
+
+    *blocked = esxVI_Boolean_False;
+
     if (questionInfo->choice->choiceInfo != NULL) {
         for (elementDescription = questionInfo->choice->choiceInfo;
              elementDescription != NULL;
@@ -2742,6 +2758,8 @@ esxVI_HandleVirtualMachineQuestion
                          _("Pending question blocks virtual machine execution, "
                            "question is '%s', no possible answers"),
                          questionInfo->text);
+
+            *blocked = esxVI_Boolean_True;
             goto cleanup;
         } else if (answerChoice == NULL) {
             ESX_VI_ERROR(VIR_ERR_INTERNAL_ERROR,
@@ -2749,6 +2767,8 @@ esxVI_HandleVirtualMachineQuestion
                            "question is '%s', possible answers are %s, but no "
                            "default answer is specified"), questionInfo->text,
                          possibleAnswers);
+
+            *blocked = esxVI_Boolean_True;
             goto cleanup;
         }
 
@@ -2774,6 +2794,7 @@ esxVI_HandleVirtualMachineQuestion
                          questionInfo->text);
         }
 
+        *blocked = esxVI_Boolean_True;
         goto cleanup;
     }
 
@@ -2795,6 +2816,7 @@ int
 esxVI_WaitForTaskCompletion(esxVI_Context *ctx,
                             esxVI_ManagedObjectReference *task,
                             const unsigned char *virtualMachineUuid,
+                            esxVI_Occurrence virtualMachineOccurrence,
                             esxVI_Boolean autoAnswer,
                             esxVI_TaskInfoState *finalState)
 {
@@ -2810,6 +2832,7 @@ esxVI_WaitForTaskCompletion(esxVI_Context *ctx,
     esxVI_PropertyChange *propertyChange = NULL;
     esxVI_AnyType *propertyValue = NULL;
     esxVI_TaskInfoState state = esxVI_TaskInfoState_Undefined;
+    esxVI_Boolean blocked = esxVI_Boolean_Undefined;
     esxVI_TaskInfo *taskInfo = NULL;
 
     version = strdup("");
@@ -2850,7 +2873,8 @@ esxVI_WaitForTaskCompletion(esxVI_Context *ctx,
 
         if (virtualMachineUuid != NULL) {
             if (esxVI_LookupAndHandleVirtualMachineQuestion
-                  (ctx, virtualMachineUuid, autoAnswer) < 0) {
+                  (ctx, virtualMachineUuid, virtualMachineOccurrence,
+                   autoAnswer, &blocked) < 0) {
                 /*
                  * FIXME: Disable error reporting here, so possible errors from
                  *        esxVI_LookupTaskInfoByTask() and esxVI_CancelTask()
@@ -2861,12 +2885,13 @@ esxVI_WaitForTaskCompletion(esxVI_Context *ctx,
                 }
 
                 if (taskInfo->cancelable == esxVI_Boolean_True) {
-                    if (esxVI_CancelTask(ctx, task) < 0) {
+                    if (esxVI_CancelTask(ctx, task) < 0 &&
+                        blocked == esxVI_Boolean_True) {
                         VIR_ERROR0(_("Cancelable task is blocked by an "
                                      "unanswered question but cancelation "
                                      "failed"));
                     }
-                } else {
+                } else if (blocked == esxVI_Boolean_True) {
                     VIR_ERROR0(_("Non-cancelable task is blocked by an "
                                  "unanswered question"));
                 }
diff --git a/src/esx/esx_vi.h b/src/esx/esx_vi.h
index 9eb5094..ef9c6d9 100644
--- a/src/esx/esx_vi.h
+++ b/src/esx/esx_vi.h
@@ -367,7 +367,9 @@ int esxVI_LookupPendingTaskInfoListByVirtualMachine
 
 int esxVI_LookupAndHandleVirtualMachineQuestion(esxVI_Context *ctx,
                                                 const unsigned char *uuid,
-                                                esxVI_Boolean autoAnswer);
+                                                esxVI_Occurrence occurrence,
+                                                esxVI_Boolean autoAnswer,
+                                                esxVI_Boolean *blocked);
 
 int esxVI_LookupRootSnapshotTreeList
       (esxVI_Context *ctx, const unsigned char *virtualMachineUuid,
@@ -382,11 +384,12 @@ int esxVI_HandleVirtualMachineQuestion
       (esxVI_Context *ctx,
        esxVI_ManagedObjectReference *virtualMachine,
        esxVI_VirtualMachineQuestionInfo *questionInfo,
-       esxVI_Boolean autoAnswer);
+       esxVI_Boolean autoAnswer, esxVI_Boolean *blocked);
 
 int esxVI_WaitForTaskCompletion(esxVI_Context *ctx,
                                 esxVI_ManagedObjectReference *task,
                                 const unsigned char *virtualMachineUuid,
+                                esxVI_Occurrence virtualMachineOccurrence,
                                 esxVI_Boolean autoAnswer,
                                 esxVI_TaskInfoState *finalState);
 
diff --git a/src/esx/esx_vmx.c b/src/esx/esx_vmx.c
index c63b159..ee7afb2 100644
--- a/src/esx/esx_vmx.c
+++ b/src/esx/esx_vmx.c
@@ -811,8 +811,8 @@ esxVMX_AutodetectSCSIControllerModel(esxVI_Context *ctx,
     /* Search datastore for file */
     if (esxVI_SearchDatastore_Task(ctx, hostDatastoreBrowser, datastorePath,
                                    searchSpec, &task) < 0 ||
-        esxVI_WaitForTaskCompletion(ctx, task, NULL, esxVI_Boolean_False,
-                                    &taskInfoState) < 0) {
+        esxVI_WaitForTaskCompletion(ctx, task, NULL, esxVI_Occurrence_None,
+                                    esxVI_Boolean_False, &taskInfoState) < 0) {
         goto cleanup;
     }
 
-- 
1.7.0.4




More information about the libvir-list mailing list