[libvirt] [PATCH] qemu: Allow migration to be cancelled at any phase

Thu Nov 8 10:00:40 UTC 2012

On 07.11.2012 23:23, Jiri Denemark wrote:
> On Wed, Nov 07, 2012 at 12:03:39 +0100, Michal Privoznik wrote:
>> Currently, if user calls virDomainAbortJob we just issue
>> 'migrate_cancel' and hope for the best. However, if user calls
>> the API in wrong phase when migration hasn't been started yet
>> (perform phase) the cancel request is just ignored. With this
>> patch, the request is remembered and as soon as perform phase
>> starts, migration is cancelled.
>> ---
>>  src/qemu/qemu_domain.c    |   26 ++++++++++++++++++++++++++
>>  src/qemu/qemu_domain.h    |    4 ++++
>>  src/qemu/qemu_driver.c    |   31 +++++++++++++++++++++++++++----
>>  src/qemu/qemu_migration.c |   43 +++++++++++++++++++++++++++++++++++++++++--
>>  4 files changed, 98 insertions(+), 6 deletions(-)
>>
>> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
>> index a5592b9..031be5f 100644
>> --- a/src/qemu/qemu_domain.c
>> +++ b/src/qemu/qemu_domain.c
>> @@ -160,6 +160,7 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv)
>>      job->mask = DEFAULT_JOB_MASK;
>>      job->start = 0;
>>      job->dump_memory_only = false;
>> +    job->asyncAbort = false;
>>      memset(&job->info, 0, sizeof(job->info));
>>  }
>>  
>> @@ -959,6 +960,31 @@ qemuDomainObjEndAsyncJob(struct qemud_driver *driver, virDomainObjPtr obj)
>>      return virObjectUnref(obj);
>>  }
>>  
>> +void
>> +qemuDomainObjAbortAsyncJob(virDomainObjPtr obj)
>> +{
>> +    qemuDomainObjPrivatePtr priv = obj->privateData;
>> +
>> +    VIR_DEBUG("Requesting abort of async job: %s",
>> +              qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
>> +
>> +    priv->job.asyncAbort = true;
>> +}
>> +
>> +/**
>> + * qemuDomainObjAbortAsyncJobRequested:
>> + * @obj: domain object
>> + *
>> + * Was abort requested? @obj MUST be locked when calling this.
>> + */
>> +bool
>> +qemuDomainObjAbortAsyncJobRequested(virDomainObjPtr obj)
>> +{
>> +    qemuDomainObjPrivatePtr priv = obj->privateData;
>> +
>> +    return priv->job.asyncAbort;
>> +}
>> +
> 
> I don't think we need this qemuDomainObjAbortAsyncJobRequested function. It's
> much easier and shorter if the caller just checks priv->job.asyncAbort
> directly. The current code uses functions only for changing the values or more
> complicated reads.
> 
>>  static int
>>  qemuDomainObjEnterMonitorInternal(struct qemud_driver *driver,
>>                                    bool driver_locked,
>> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
>> index 9c2f67c..9a31bbe 100644
>> --- a/src/qemu/qemu_domain.h
>> +++ b/src/qemu/qemu_domain.h
>> @@ -111,6 +111,7 @@ struct qemuDomainJobObj {
>>      unsigned long long start;           /* When the async job started */
>>      bool dump_memory_only;              /* use dump-guest-memory to do dump */
>>      virDomainJobInfo info;              /* Async job progress data */
>> +    bool asyncAbort;                    /* abort of async job requested */
>>  };
>>  
>>  typedef struct _qemuDomainPCIAddressSet qemuDomainPCIAddressSet;
>> @@ -204,6 +205,9 @@ bool qemuDomainObjEndJob(struct qemud_driver *driver,
>>  bool qemuDomainObjEndAsyncJob(struct qemud_driver *driver,
>>                                virDomainObjPtr obj)
>>      ATTRIBUTE_RETURN_CHECK;
>> +void qemuDomainObjAbortAsyncJob(virDomainObjPtr obj);
>> +bool qemuDomainObjAbortAsyncJobRequested(virDomainObjPtr obj);
>> +
>>  void qemuDomainObjSetJobPhase(struct qemud_driver *driver,
>>                                virDomainObjPtr obj,
>>                                int phase);
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>> index 7b8eec6..009c2c8 100644
>> --- a/src/qemu/qemu_driver.c
>> +++ b/src/qemu/qemu_driver.c
>> @@ -10331,6 +10331,8 @@ static int qemuDomainAbortJob(virDomainPtr dom) {
>>      virDomainObjPtr vm;
>>      int ret = -1;
>>      qemuDomainObjPrivatePtr priv;
>> +    /* Poll every 50ms for job termination */
>> +    struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull };
>>  
>>      qemuDriverLock(driver);
>>      vm = virDomainFindByUUID(&driver->domains, dom->uuid);
>> @@ -10365,10 +10367,31 @@ static int qemuDomainAbortJob(virDomainPtr dom) {
>>          goto endjob;
>>      }
>>  
>> -    VIR_DEBUG("Cancelling job at client request");
>> -    qemuDomainObjEnterMonitor(driver, vm);
>> -    ret = qemuMonitorMigrateCancel(priv->mon);
>> -    qemuDomainObjExitMonitor(driver, vm);
>> +    qemuDomainObjAbortAsyncJob(vm);
>> +    VIR_DEBUG("Waiting for async job '%s' to finish",
>> +              qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
>> +    while (priv->job.asyncJob) {
>> +        if (qemuDomainObjEndJob(driver, vm) == 0) {
>> +            vm = NULL;
>> +            goto cleanup;
>> +        }
>> +        virDomainObjUnlock(vm);
>> +
>> +        nanosleep(&ts, NULL);
>> +
>> +        virDomainObjLock(vm);
>> +        if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_ABORT) < 0)
>> +            goto cleanup;
>> +
>> +        if (!virDomainObjIsActive(vm)) {
>> +            virReportError(VIR_ERR_OPERATION_INVALID,
>> +                           "%s", _("domain is not running"));
>> +            goto endjob;
>> +        }
>> +
>> +    }
>> +
>> +    ret = 0;
>>  
>>  endjob:
>>      if (qemuDomainObjEndJob(driver, vm) == 0)
> 
> AbortJob API was never a synchronous one and I don't think we need to change
> it. Not to mention that this code unlocks and locks vm without holding an
> extra reference to it. And even if it did so, it cannot guarantee that the job
> it's checking in the loop is the same one it tried to cancel. It's quite
> unlikely but the original job could have finished and another one could have
> been started while this code was sleeping.

However, AbortJob is documented as synchronous. If current implementation doesn't
follow docs it is a bug. On the other hand, I don't recall anybody screaming about
it so far. But that means nothing, right?

> 
> In other words, I'd just change to do:
> 
>         VIR_DEBUG("Cancelling job at client request");
> +       qemuDomainObjAbortAsyncJob(vm);
>         qemuDomainObjEnterMonitor(driver, vm);
>         ret = qemuMonitorMigrateCancel(priv->mon);
>         qemuDomainObjExitMonitor(driver, vm);
> 
>> diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
>> index 5f8a9c5..c840686 100644
>> --- a/src/qemu/qemu_migration.c
>> +++ b/src/qemu/qemu_migration.c
>> @@ -1172,6 +1172,12 @@ qemuMigrationUpdateJobStatus(struct qemud_driver *driver,
>>      }
>>      priv->job.info.timeElapsed -= priv->job.start;
>>  
>> +    if (qemuDomainObjAbortAsyncJobRequested(vm)) {
>> +        VIR_DEBUG("Migration abort requested. Translating "
>> +                  "status to MIGRATION_STATUS_CANCELLED");
>> +        status = QEMU_MONITOR_MIGRATION_STATUS_CANCELLED;
>> +    }
>> +
> 
> This check seems to be to late and that complicates the code later. If we keep
> qemuDomainAbortJob calling qemuMonitorMigrateCancel in qemuDomainAbortJob, we
> may count on that and check priv.job.asyncAbort before entering the monitor
> to tell QEMU to start migrating in qemuMigrationRun. If the abort is not
> requested by then, it may only happen after we call qemuMonitorMigrateTo* and
> in that case the migration will be properly aborted by
> qemuMonitorMigrateCancel.
> 

Not really. The issue I've seen was like this:

   Thread A					Thread B
1. start async migration out job
2. 						Since job is set, abort it by calling 'migrate_cancel'
3.						return to user
4. issue 'migrate' on the monitor

And I think your suggestion makes it just harder to hit this race and not really avoid it.
However with my code, we are guaranteed that 'migrate_cancel' will have an effect.

But I agree that the check can be moved before the 'query_migrate' command so we don't have to
enter the monitor when we know we are canceling migration.

> 
>>      ret = -1;
>>      switch (status) {
>>      case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE:
>> @@ -1214,6 +1220,35 @@ qemuMigrationUpdateJobStatus(struct qemud_driver *driver,
>>      return ret;
>>  }
>>  
>> +static int
>> +qemuMigrationCancel(struct qemud_driver *driver, virDomainObjPtr vm)
>> +{
>> +    qemuDomainObjPrivatePtr priv = vm->privateData;
>> +    int ret = -1;
>> +
>> +    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_ABORT) < 0)
>> +        goto cleanup;
>> +
>> +    if (!virDomainObjIsActive(vm)) {
>> +        virReportError(VIR_ERR_OPERATION_INVALID,
>> +                       "%s", _("domain is not running"));
>> +        goto endjob;
>> +    }
>> +
>> +    qemuDomainObjEnterMonitor(driver, vm);
>> +    ret = qemuMonitorMigrateCancel(priv->mon);
>> +    qemuDomainObjExitMonitor(driver, vm);
>> +
>> +endjob:
>> +    if (qemuDomainObjEndJob(driver, vm) == 0) {
>> +        virReportError(VIR_ERR_OPEN_FAILED, "%s",
>> +                       _("domain unexpectedly died"));
>> +        ret = -1;
>> +    }
>> +
>> +cleanup:
>> +    return ret;
>> +}
>>  
>>  static int
>>  qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm,
>> @@ -1262,10 +1297,14 @@ qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm,
>>      }
>>  
>>  cleanup:
>> -    if (priv->job.info.type == VIR_DOMAIN_JOB_COMPLETED)
>> +    if (priv->job.info.type == VIR_DOMAIN_JOB_COMPLETED) {
>>          return 0;
>> -    else
>> +    } else {
>> +        if (priv->job.info.type == VIR_DOMAIN_JOB_CANCELLED &&
>> +            qemuMigrationCancel(driver, vm) < 0)
>> +            VIR_DEBUG("Cancelling job at client request");
>>          return -1;
>> +    }
>>  }
> 
> This hack is unnecessary when we do what I suggested above.
> 
> But in any case, this patch does not actually allow the migration to be
> cancelled at any phase. It just allow the migration to be cancelled during
> Perform phase or before. And the cancellation would actually happen only
> during Perform phase thus possibly doing unnecessary Prepare phase in case
> someone tried to cancel the migration at the very beginning. However, since
> even such improvement in this area is a nice step forward and this particular
> case of aborting migration before it gets to Perform phase is the most visible
> issue, we can address the rest of the issues later.
> 
> I'm looking forward to a simplified v2 :-)
> 
> Jirka
>