[libvirt] [PATCH] qemu: keep pre-migration domain state after failed migration

Martin Kletzander mkletzan at redhat.com
Fri Feb 7 09:29:39 UTC 2014


On Thu, Feb 06, 2014 at 06:23:43PM +0100, Jiri Denemark wrote:
> On Thu, Feb 06, 2014 at 17:33:14 +0100, Martin Kletzander wrote:
> > Couple of codepaths shared the same code which can be moved out to a
> > function and on one of such places, qemuMigrationConfirmPhase(), the
> > domain was resumed even if it wasn't running before the migration
> > started.
> >
> > Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1057407
> >
> > Signed-off-by: Martin Kletzander <mkletzan at redhat.com>
> > ---
> >  src/qemu/qemu_domain.h    |   3 +-
> >  src/qemu/qemu_migration.c | 117 +++++++++++++++++++++++++---------------------
> >  2 files changed, 66 insertions(+), 54 deletions(-)
> >
> > diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
> > index 6a92351..84624f9 100644
> > --- a/src/qemu/qemu_domain.h
> > +++ b/src/qemu/qemu_domain.h
> > @@ -1,7 +1,7 @@
> >  /*
> >   * qemu_domain.h: QEMU domain private state
> >   *
> > - * Copyright (C) 2006-2013 Red Hat, Inc.
> > + * Copyright (C) 2006-2014 Red Hat, Inc.
> >   * Copyright (C) 2006 Daniel P. Berrange
> >   *
> >   * This library is free software; you can redistribute it and/or
> > @@ -161,6 +161,7 @@ struct _qemuDomainObjPrivate {
> >      char *origname;
> >      int nbdPort; /* Port used for migration with NBD */
> >      unsigned short migrationPort;
> > +    int preMigrationState;
> >
> >      virChrdevsPtr devs;
> >
> > diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
> > index 407fb70..1a29efa 100644
> > --- a/src/qemu/qemu_migration.c
> > +++ b/src/qemu/qemu_migration.c
> > @@ -1075,6 +1075,53 @@ error:
> >      return NULL;
> >  }
> >
> > +static void
> > +qemuDomainObjStorePreMigrationState(virDomainObjPtr vm)
> > +{
> > +    qemuDomainObjPrivatePtr priv = vm->privateData;
> > +    priv->preMigrationState = virDomainObjGetState(vm, NULL);
> > +
> > +    VIR_DEBUG("Storing pre-migration state=%d domain=%p",
> > +              priv->preMigrationState, vm);
> > +}
>
> We are in qemu_migration.c file so this function and the one below
> should use qemuMigration prefix.
>

I've found one exception now, but I'm not basing it on that.  I
changed these to qemuMigration{S,Res}toreDomainState().

> > +
> > +/* Returns true if the domain was resumed, false otherwise */
> > +static bool
> > +qemuDomainObjRestorePreMigrationState(virConnectPtr conn, virDomainObjPtr vm)
> > +{
> > +    virQEMUDriverPtr driver = conn->privateData;
> > +    qemuDomainObjPrivatePtr priv = vm->privateData;
> > +    int state = virDomainObjGetState(vm, NULL);
> > +    bool ret = false;
> > +
> > +    VIR_DEBUG("driver=%p, vm=%p, pre-mig-state=%d, state=%d",
> > +              driver, vm, priv->preMigrationState, state);
> > +
> > +    if (state == VIR_DOMAIN_PAUSED &&
> > +        priv->preMigrationState == VIR_DOMAIN_RUNNING) {
> > +        /* This is basically the only restore possibility that's safe
> > +         * and we should attemt to do */
> > +

Changed this to attempt.

> > +        VIR_DEBUG("Restoring pre-migration state due to migration error");
> > +
> > +        /* we got here through some sort of failure; start the domain again */
> > +        if (qemuProcessStartCPUs(driver, vm, conn,
> > +                                 VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
> > +                                 QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
> > +            /* Hm, we already know we are in error here.  We don't want to
> > +             * overwrite the previous error, though, so we just throw something
> > +             * to the logs and hope for the best */
> > +            VIR_ERROR(_("Failed to resume guest %s after failure"), vm->def->name);
> > +            goto cleanup;
> > +        }
> > +        ret = true;
> > +    }
> > +
> > + cleanup:
> > +    priv->preMigrationState = VIR_DOMAIN_NOSTATE;
> > +    return ret;
> > +}
> > +
> >  /**
> >   * qemuMigrationStartNBDServer:
> >   * @driver: qemu driver
> > @@ -1952,8 +1999,8 @@ cleanup:
> >
> >
> >  /* The caller is supposed to lock the vm and start a migration job. */
> > -static char
> > -*qemuMigrationBeginPhase(virQEMUDriverPtr driver,
> > +static char *
> > +qemuMigrationBeginPhase(virQEMUDriverPtr driver,
>
> Heh, it took me a while to see what you did :-) However, the following
> lines are not indented correctly now...
>

It shouldn't be in this patch and there are more cleanups to do, so I
left it out of this patch and will post it with some other cleanups
soon.

> >                           virDomainObjPtr vm,
> >                           const char *xmlin,
> >                           const char *dname,
> > @@ -2075,6 +2122,8 @@ qemuMigrationBegin(virConnectPtr conn,
> >          asyncJob = QEMU_ASYNC_JOB_NONE;
> >      }
> >
> > +    qemuDomainObjStorePreMigrationState(vm);
> > +
> >      if (!virDomainObjIsActive(vm) && !(flags & VIR_MIGRATE_OFFLINE)) {
> >          virReportError(VIR_ERR_OPERATION_INVALID,
> >                         "%s", _("domain is not running"));
> > @@ -2744,22 +2793,12 @@ qemuMigrationConfirmPhase(virQEMUDriverPtr driver,
> >          /* cancel any outstanding NBD jobs */
> >          qemuMigrationCancelDriveMirror(mig, driver, vm);
> >
> > -        /* run 'cont' on the destination, which allows migration on qemu
> > -         * >= 0.10.6 to work properly.  This isn't strictly necessary on
> > -         * older qemu's, but it also doesn't hurt anything there
> > -         */
> > -        if (qemuProcessStartCPUs(driver, vm, conn,
> > -                                 VIR_DOMAIN_RUNNING_MIGRATED,
> > -                                 QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
> > -            if (virGetLastError() == NULL)
> > -                virReportError(VIR_ERR_INTERNAL_ERROR,
> > -                               "%s", _("resume operation failed"));
> > -            goto cleanup;
> > +        if (qemuDomainObjRestorePreMigrationState(conn, vm)) {
> > +            event = virDomainEventLifecycleNewFromObj(vm,
> > +                                                      VIR_DOMAIN_EVENT_RESUMED,
> > +                                                      VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
> >          }
> >
> > -        event = virDomainEventLifecycleNewFromObj(vm,
> > -                                         VIR_DOMAIN_EVENT_RESUMED,
> > -                                         VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
> >          if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0) {
> >              VIR_WARN("Failed to save status on vm %s", vm->def->name);
> >              goto cleanup;
> > @@ -4065,7 +4104,6 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
> >  {
> >      virObjectEventPtr event = NULL;
> >      int ret = -1;
> > -    int resume = 0;
> >      virErrorPtr orig_err = NULL;
> >      virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
> >      bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR);
> > @@ -4085,7 +4123,7 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
> >      if (!(flags & VIR_MIGRATE_UNSAFE) && !qemuMigrationIsSafe(vm->def))
> >          goto endjob;
> >
> > -    resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING;
> > +    qemuDomainObjStorePreMigrationState(vm);
> >
> >      if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) {
> >          ret = doPeer2PeerMigrate(driver, conn, vm, xmlin,
> > @@ -4112,25 +4150,13 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
> >                                           VIR_DOMAIN_EVENT_STOPPED,
> >                                           VIR_DOMAIN_EVENT_STOPPED_MIGRATED);
> >      }
> > -    resume = 0;
> >
> >  endjob:
> >      if (ret < 0)
> >          orig_err = virSaveLastError();
> >
> > -    if (resume && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
> > -        /* we got here through some sort of failure; start the domain again */
> > -        if (qemuProcessStartCPUs(driver, vm, conn,
> > -                                 VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
> > -                                 QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
> > -            /* Hm, we already know we are in error here.  We don't want to
> > -             * overwrite the previous error, though, so we just throw something
> > -             * to the logs and hope for the best
> > -             */
> > -            VIR_ERROR(_("Failed to resume guest %s after failure"),
> > -                      vm->def->name);
> > -        }
> > -
> > +    if (!v3proto &&
> > +        qemuDomainObjRestorePreMigrationState(conn, vm)) {
>
> You could also call qemuDomainObjRestorePreMigrationState
> unconditionally since you always reset preMigrationState which makes it
> safe to call qemuDomainObjRestorePreMigrationState multiple times but it
> does not really matter.
>

I changed it to unconditional call, I wanted to do that at first
(hence the carefulness inside that function), but rather called it
conditionally because I didn't want to explain that :)

> >          event = virDomainEventLifecycleNewFromObj(vm,
> >                                           VIR_DOMAIN_EVENT_RESUMED,
> >                                           VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
> > @@ -4179,7 +4205,6 @@ qemuMigrationPerformPhase(virQEMUDriverPtr driver,
> >  {
> >      virObjectEventPtr event = NULL;
> >      int ret = -1;
> > -    bool resume;
> >      bool hasrefs;
> >
> >      /* If we didn't start the job in the begin phase, start it now. */
> > @@ -4194,32 +4219,18 @@ qemuMigrationPerformPhase(virQEMUDriverPtr driver,
> >      virCloseCallbacksUnset(driver->closeCallbacks, vm,
> >                             qemuMigrationCleanup);
> >
> > -    resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING;
> >      ret = doNativeMigrate(driver, vm, uri, cookiein, cookieinlen,
> >                            cookieout, cookieoutlen,
> >                            flags, resource, NULL, graphicsuri);
> >
> > -    if (ret < 0 && resume &&
> > -        virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
> > -        /* we got here through some sort of failure; start the domain again */
> > -        if (qemuProcessStartCPUs(driver, vm, conn,
> > -                                 VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
> > -                                 QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
> > -            /* Hm, we already know we are in error here.  We don't want to
> > -             * overwrite the previous error, though, so we just throw something
> > -             * to the logs and hope for the best
> > -             */
> > -            VIR_ERROR(_("Failed to resume guest %s after failure"),
> > -                      vm->def->name);
> > +    if (ret < 0) {
> > +        if (qemuDomainObjRestorePreMigrationState(conn, vm)) {
> > +            event = virDomainEventLifecycleNewFromObj(vm,
> > +                                                      VIR_DOMAIN_EVENT_RESUMED,
> > +                                                      VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
> >          }
> > -
> > -        event = virDomainEventLifecycleNewFromObj(vm,
> > -                                         VIR_DOMAIN_EVENT_RESUMED,
> > -                                         VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
> > -    }
> > -
> > -    if (ret < 0)
> >          goto endjob;
> > +    }
> >
> >      qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PERFORM3_DONE);
> >
>
> ACK with the nits addressed.
>

Pushed with the nits fixed.

> Jirka

Martin
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: Digital signature
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20140207/a6000416/attachment-0001.sig>


More information about the libvir-list mailing list