[libvirt] [PATCH] libxl: Fix lock manager lock ordering
Jim Fehlig
jfehlig at suse.com
Wed Nov 6 19:14:54 UTC 2019
Does anyone have a few moments to review this patch? Thanks!
Regards,
Jim
On 10/14/19 3:39 PM, Jim Fehlig wrote:
> The ordering of lock manager locks in the libxl driver has a flaw that was
> uncovered by a migration error path. In the perform phase of migration, the
> source host calls virDomainLockProcessPause to release the lock before
> sending the VM to the destination host. If the send fails an attempt is made
> to reacquire the lock with virDomainLockProcessResume, but that too can fail
> if the destination host has not finished cleaning up the failed VM and
> releasing the lock it acquired when starting to receive the VM.
>
> This change delays calling virDomainLockProcessResume in libxlDomainStart
> until the VM is successfully created, but before it is unpaused. A similar
> approach is used by the qemu driver, avoiding the need to release the lock
> if VM creation fails. In the migration perform phase, releasing the lock
> with virDomainLockProcessPause is delayed until the VM is successfully
> sent to the destination, which avoids reacquiring the lock if the send
> fails.
>
> Signed-off-by: Jim Fehlig <jfehlig at suse.com>
> ---
> src/libxl/libxl_domain.c | 14 +++++++-------
> src/libxl/libxl_migration.c | 14 +++++---------
> 2 files changed, 12 insertions(+), 16 deletions(-)
>
> diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c
> index 4073bf8d46..a830a19b99 100644
> --- a/src/libxl/libxl_domain.c
> +++ b/src/libxl/libxl_domain.c
> @@ -1364,13 +1364,6 @@ libxlDomainStart(libxlDriverPrivatePtr driver,
> NULL) < 0)
> goto cleanup;
>
> - if (virDomainLockProcessResume(driver->lockManager,
> - "xen:///system",
> - vm,
> - priv->lockState) < 0)
> - goto cleanup;
> - VIR_FREE(priv->lockState);
> -
> if (libxlNetworkPrepareDevices(vm->def) < 0)
> goto cleanup_dom;
>
> @@ -1453,6 +1446,13 @@ libxlDomainStart(libxlDriverPrivatePtr driver,
>
> libxlLoggerOpenFile(cfg->logger, domid, vm->def->name, config_json);
>
> + if (virDomainLockProcessResume(driver->lockManager,
> + "xen:///system",
> + vm,
> + priv->lockState) < 0)
> + goto destroy_dom;
> + VIR_FREE(priv->lockState);
> +
> /* Always enable domain death events */
> if (libxl_evenable_domain_death(cfg->ctx, vm->def->id, 0, &priv->deathW))
> goto destroy_dom;
> diff --git a/src/libxl/libxl_migration.c b/src/libxl/libxl_migration.c
> index a1021d499b..8e64dc5d04 100644
> --- a/src/libxl/libxl_migration.c
> +++ b/src/libxl/libxl_migration.c
> @@ -1253,20 +1253,16 @@ libxlDomainMigrationSrcPerform(libxlDriverPrivatePtr driver,
> sockfd = virNetSocketDupFD(sock, true);
> virObjectUnref(sock);
>
> - if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
> - VIR_WARN("Unable to release lease on %s", vm->def->name);
> - VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
> -
> /* suspend vm and send saved data to dst through socket fd */
> virObjectUnlock(vm);
> ret = libxlDoMigrateSrcSend(driver, vm, flags, sockfd);
> virObjectLock(vm);
>
> - if (ret < 0) {
> - virDomainLockProcessResume(driver->lockManager,
> - "xen:///system",
> - vm,
> - priv->lockState);
> + if (ret == 0) {
> + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
> + VIR_WARN("Unable to release lease on %s", vm->def->name);
> + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
> + } else {
> /*
> * Confirm phase will not be executed if perform fails. End the
> * job started in begin phase.
>
More information about the libvir-list
mailing list