[libvirt] [PATCH] qemu: Propagate storage errors on migration

John Ferlan jferlan at redhat.com
Mon Jan 19 21:58:37 UTC 2015



On 01/09/2015 07:22 AM, Michal Privoznik wrote:
> There's one bug that got more visible with my patches that
> automatically precreate storage on migration. The problem is, if
> there's not enough disk space on destination, we successfully finish
> the migration instead of failing.
> 
> Firstly, we instruct qemu to copy the storage. And as it copies the
> data, one write() will return -ENOSPC, eventually, to which qemu
> reacts by emitting BLOCK_JOB_ERROR. The event is successfully ignored
> by us. Then, since the block job has finished, BLOCK_JOB_COMPLETED
> event is emitted too.
> 
> Secondly, we are not checking for the block job completion as we
> should. Currently, we do the check by issuing 'query-block-jobs' and
> then looking in the command's output for the job we are interested in.
> If course, we don't fail if the job is not there, in which case the

s/If/Of

> number of total bytes to be transferred and bytes already transferred,
> well they equal both to zero. This is actually the line causing the

s/well they equal both to zero/will both be equal to zero.

> bug as it sees both numbers equal to each other and gives green light
> to the actual migration.
> 
> The fix consist of two parts:
> 
> In the first, code handling BLOCK_JOB_ERROR is added. It's a monitor
> event and we should have handler for that anyway.
> 
> In the second part, the code doing drive mirroring is rewritten so it
> waits for the events instead of querying on the monitor repeatedly.
> 
> Signed-off-by: Michal Privoznik <mprivozn at redhat.com>
> ---
>  src/conf/domain_conf.c       |  3 ++-
>  src/conf/domain_conf.h       |  1 +
>  src/qemu/qemu_migration.c    | 25 +++++++------------------
>  src/qemu/qemu_monitor_json.c | 10 ++++++++++
>  src/qemu/qemu_process.c      | 14 ++++++++++++++
>  5 files changed, 34 insertions(+), 19 deletions(-)
> 

The concept seems reasonable - of course the code has change since you
first sent this, so a rebase will be necessary...

> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
> index d1a483a..751a9b5 100644
> --- a/src/conf/domain_conf.c
> +++ b/src/conf/domain_conf.c
> @@ -787,7 +787,8 @@ VIR_ENUM_IMPL(virDomainDiskMirrorState, VIR_DOMAIN_DISK_MIRROR_STATE_LAST,
>                "none",
>                "yes",
>                "abort",
> -              "pivot")
> +              "pivot",
> +              "error")
>  
>  VIR_ENUM_IMPL(virDomainLoader,
>                VIR_DOMAIN_LOADER_TYPE_LAST,
> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index ac1f4f8..f18fa80 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -654,6 +654,7 @@ typedef enum {
>      VIR_DOMAIN_DISK_MIRROR_STATE_READY, /* Job in second phase */
>      VIR_DOMAIN_DISK_MIRROR_STATE_ABORT, /* Job aborted, waiting for event */
>      VIR_DOMAIN_DISK_MIRROR_STATE_PIVOT, /* Job pivoted, waiting for event */
> +    VIR_DOMAIN_DISK_MIRROR_STATE_ERROR, /* Job failed */
>  
>      VIR_DOMAIN_DISK_MIRROR_STATE_LAST
>  } virDomainDiskMirrorState;
> diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
> index 77e0b35..e2309ea 100644
> --- a/src/qemu/qemu_migration.c
> +++ b/src/qemu/qemu_migration.c
> @@ -1743,7 +1743,6 @@ qemuMigrationDriveMirror(virQEMUDriverPtr driver,
>  
>      for (i = 0; i < vm->def->ndisks; i++) {
>          virDomainDiskDefPtr disk = vm->def->disks[i];
> -        virDomainBlockJobInfo info;
>  
>          /* skip shared, RO and source-less disks */
>          if (disk->src->shared || disk->src->readonly ||
> @@ -1768,6 +1767,7 @@ qemuMigrationDriveMirror(virQEMUDriverPtr driver,
>          if (mon_ret < 0)
>              goto error;
>  
> +        disk->mirrorJob = VIR_DOMAIN_BLOCK_JOB_TYPE_COPY;

I assume this is how we "ensure" we get into the else of
qemuProcessHandleBlockJob.  Is setting this outside the context of this
bugfix fixing a bug?  That is should it have been set all along? Curious
mostly.

>          lastGood = i;
>  
>          /* wait for completion */
> @@ -1775,38 +1775,27 @@ qemuMigrationDriveMirror(virQEMUDriverPtr driver,
>              /* Poll every 500ms for progress & to allow cancellation */
>              struct timespec ts = { .tv_sec = 0, .tv_nsec = 500 * 1000 * 1000ull };
>  
> -            memset(&info, 0, sizeof(info));
> -
> -            if (qemuDomainObjEnterMonitorAsync(driver, vm,
> -                                               QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
> -                goto error;
>              if (priv->job.asyncAbort) {
>                  /* explicitly do this *after* we entered the monitor,
>                   * as this is a critical section so we are guaranteed
>                   * priv->job.asyncAbort will not change */

So, since we're not entering the monitor any more - does the text need
to change? Also, is it 'safe' to look at priv->job.current->type?  Or
even priv->job.asyncAbort since we won't have a monitor lock?

Considering the recent series on ExitMonitor error checks vis-a-vis the
vm dying during some monitor interaction could this end up being an
infinite loop?  That is would the necessary mirrorState be set in the
case where the vm dies?

> -                qemuDomainObjExitMonitor(driver, vm);
>                  priv->job.current->type = VIR_DOMAIN_JOB_CANCELLED;
>                  virReportError(VIR_ERR_OPERATION_ABORTED, _("%s: %s"),
>                                 qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
>                                 _("canceled by client"));

s/canceled/cancelled

(I see there's also another place in qemuMigrationUpdateJobStatus as
well as libvirt-domain and virsh* modules, sigh)

>                  goto error;
>              }
> -            mon_ret = qemuMonitorBlockJobInfo(priv->mon, diskAlias, &info,
> -                                              NULL);
> -            qemuDomainObjExitMonitor(driver, vm);
>  
> -            if (mon_ret < 0)
> -                goto error;
> -
> -            if (info.cur == info.end) {
> +            if (disk->mirrorState == VIR_DOMAIN_DISK_MIRROR_STATE_READY) {
>                  VIR_DEBUG("Drive mirroring of '%s' completed", diskAlias);
>                  break;
> +            } else if (disk->mirrorState == VIR_DOMAIN_DISK_MIRROR_STATE_ERROR) {
> +                virReportError(VIR_ERR_OPERATION_ABORTED, _("%s: %s"),
> +                               qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
> +                               _("canceled by destination"));

s/canceled/cancelled

> +                goto error;
>              }
>  
> -            /* XXX Frankly speaking, we should listen to the events,
> -             * instead of doing this. But this works for now and we
> -             * are doing something similar in migration itself anyway */
> -
>              virObjectUnlock(vm);
>  
>              nanosleep(&ts, NULL);
> diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
> index e567aa7..5d6bbca 100644
> --- a/src/qemu/qemu_monitor_json.c
> +++ b/src/qemu/qemu_monitor_json.c
> @@ -76,6 +76,7 @@ static void qemuMonitorJSONHandlePMWakeup(qemuMonitorPtr mon, virJSONValuePtr da
>  static void qemuMonitorJSONHandlePMSuspend(qemuMonitorPtr mon, virJSONValuePtr data);
>  static void qemuMonitorJSONHandleBlockJobCompleted(qemuMonitorPtr mon, virJSONValuePtr data);
>  static void qemuMonitorJSONHandleBlockJobCanceled(qemuMonitorPtr mon, virJSONValuePtr data);
> +static void qemuMonitorJSONHandleBlockJobError(qemuMonitorPtr mon, virJSONValuePtr data);
>  static void qemuMonitorJSONHandleBlockJobReady(qemuMonitorPtr mon, virJSONValuePtr data);
>  static void qemuMonitorJSONHandleBalloonChange(qemuMonitorPtr mon, virJSONValuePtr data);
>  static void qemuMonitorJSONHandlePMSuspendDisk(qemuMonitorPtr mon, virJSONValuePtr data);
> @@ -94,6 +95,7 @@ static qemuEventHandler eventHandlers[] = {
>      { "BLOCK_IO_ERROR", qemuMonitorJSONHandleIOError, },
>      { "BLOCK_JOB_CANCELLED", qemuMonitorJSONHandleBlockJobCanceled, },
>      { "BLOCK_JOB_COMPLETED", qemuMonitorJSONHandleBlockJobCompleted, },
> +    { "BLOCK_JOB_ERROR", qemuMonitorJSONHandleBlockJobError, },
>      { "BLOCK_JOB_READY", qemuMonitorJSONHandleBlockJobReady, },
>      { "DEVICE_DELETED", qemuMonitorJSONHandleDeviceDeleted, },
>      { "DEVICE_TRAY_MOVED", qemuMonitorJSONHandleTrayChange, },
> @@ -842,6 +844,14 @@ qemuMonitorJSONHandleBlockJobCanceled(qemuMonitorPtr mon,
>  }
>  
>  static void
> +qemuMonitorJSONHandleBlockJobError(qemuMonitorPtr mon,
> +                                   virJSONValuePtr data)
> +{
> +    qemuMonitorJSONHandleBlockJobImpl(mon, data,
> +                                      VIR_DOMAIN_BLOCK_JOB_FAILED);
> +}
> +
> +static void
>  qemuMonitorJSONHandleBlockJobReady(qemuMonitorPtr mon,
>                                     virJSONValuePtr data)
>  {
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index c18204b..b29ecf1 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -1110,6 +1110,20 @@ qemuProcessHandleBlockJob(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
>                  disk->mirrorJob = VIR_DOMAIN_BLOCK_JOB_TYPE_UNKNOWN;
>                  save = true;
>              }
> +        } else if (disk->mirrorJob == VIR_DOMAIN_BLOCK_JOB_TYPE_COPY) {
> +            switch ((virConnectDomainEventBlockJobStatus) status) {
> +            case VIR_DOMAIN_BLOCK_JOB_READY:
> +            case VIR_DOMAIN_BLOCK_JOB_COMPLETED:

"theoretically" if status == COMPLETED we shouldn't be here either since
that the first "if" checks that...

> +                disk->mirrorState = VIR_DOMAIN_DISK_MIRROR_STATE_READY;
> +                break;
> +            case VIR_DOMAIN_BLOCK_JOB_CANCELED:
> +            case VIR_DOMAIN_BLOCK_JOB_FAILED:
> +                disk->mirrorState = VIR_DOMAIN_DISK_MIRROR_STATE_ERROR;
> +                break;
> +            case VIR_DOMAIN_BLOCK_JOB_LAST:
> +                VIR_DEBUG("should not get here");
> +                break;

Not that we'd get here, but what effect is not setting mirrorState

Is a switch overkill in this instance?

John

> +            }
>          }
>      }
>  
> 




More information about the libvir-list mailing list