[libvirt] [PATCH v3 RESEND 09/12] qemu_migration: Check size prerequisites

Jiri Denemark jdenemar at redhat.com
Wed Feb 20 13:47:11 UTC 2013


On Mon, Feb 18, 2013 at 15:38:42 +0100, Michal Privoznik wrote:
> With new NBD storage migration approach there are several
> requirements that need to be meet for successful use of the

s/meet/met/

> feature. One of them is - the file representing a disk, needs to
> have at least same size as on the source. Hence, we must transfer
> a list of pairs [disk target, size] and check on destination that
> this requirement is met and/or take actions to meet it.
> ---
>  src/qemu/qemu_migration.c | 302 +++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 288 insertions(+), 14 deletions(-)
> 
> diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
> index e2c7804..9b38a0c 100644
> --- a/src/qemu/qemu_migration.c
> +++ b/src/qemu/qemu_migration.c
...
> @@ -525,22 +555,71 @@ qemuMigrationCookieAddNetwork(qemuMigrationCookiePtr mig,
>  
>  static int
>  qemuMigrationCookieAddNBD(qemuMigrationCookiePtr mig,
> -                          virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
> +                          virQEMUDriverPtr driver,
>                            virDomainObjPtr vm)
>  {
>      qemuDomainObjPrivatePtr priv = vm->privateData;
> +    int ret = -1;
> +    size_t i;
>  
>      /* It is not a bug if there already is a NBD data */
>      if (!mig->nbd &&
>          VIR_ALLOC(mig->nbd) < 0) {
>          virReportOOMError();
> -        return -1;
> +        return ret;
> +    }
> +
> +    /* in Begin phase add info about disks */
> +    if (priv->job.phase == QEMU_MIGRATION_PHASE_BEGIN3 &&
> +        vm->def->ndisks) {
> +        if (VIR_ALLOC_N(mig->nbd->disk, vm->def->ndisks) < 0) {
> +            virReportOOMError();
> +            goto cleanup;
> +        }
> +
> +        for (i = 0; i < vm->def->ndisks; i++) {
> +            virDomainDiskDefPtr disk = vm->def->disks[i];
> +            virDomainBlockInfo info;
> +            int format = disk->format;
> +
> +            /* Add only non-shared RW disks with source */
> +            if (!disk->src || disk->shared || disk->readonly)
> +                continue;
> +
> +            memset(&info, 0, sizeof(info));
> +
> +            if (qemuDomainGetDiskBlockInfo(driver, vm, disk, &info) < 0)
> +                goto cleanup;

Hmm, I see solving the need to enter a job for getting disk block info
won't be exactly easy.

> +
> +            if (format != VIR_STORAGE_FILE_RAW &&
> +                format != VIR_STORAGE_FILE_QCOW &&
> +                format != VIR_STORAGE_FILE_QCOW2) {
> +                virReportError(VIR_ERR_OPERATION_UNSUPPORTED,
> +                               _("Cannot pre-create storage of %s format"),
> +                               virStorageFileFormatTypeToString(format));
> +                goto cleanup;
> +            }
> +
> +            if (!(mig->nbd->disk[mig->nbd->ndisks].target = strdup(disk->dst))) {
> +                virReportOOMError();
> +                goto cleanup;
> +            }
> +
> +            mig->nbd->disk[mig->nbd->ndisks].bytes = info.capacity;
> +            mig->nbd->ndisks++;
> +        }
>      }
>  
>      mig->nbd->port = priv->nbdPort;
>      mig->flags |= QEMU_MIGRATION_COOKIE_NBD;
>  
> -    return 0;
> +    ret = 0;
> +
> +cleanup:
> +    if (ret < 0)
> +        qemuMigrationCookieNBDFree(mig->nbd);
> +
> +    return ret;
>  }
>  
>  
...
> @@ -946,20 +1034,56 @@ qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig,
>          virXPathBoolean("boolean(./nbd)", ctxt)) {
>          char *port;
>  
> +        if (VIR_ALLOC(mig->nbd) < 0) {
> +            virReportOOMError();
> +            goto error;
> +        }
> +
>          port = virXPathString("string(./nbd/@port)", ctxt);
> -        if (port) {
> -            if (VIR_ALLOC(mig->nbd) < 0) {
> +        if (port && virStrToLong_i(port, NULL, 10, &mig->nbd->port) < 0) {
> +            VIR_FREE(port);
> +            virReportError(VIR_ERR_INTERNAL_ERROR,
> +                           _("Malformed nbd port '%s'"),
> +                           port);

Oops, I believe you wanted to swap these two lines :-)

> +            goto error;
> +        }
> +        VIR_FREE(port);
> +
> +        if ((n = virXPathNodeSet("./nbd/disk", ctxt, &nodes)) > 0) {
> +            xmlNodePtr oldNode = ctxt->node;
> +            if (VIR_ALLOC_N(mig->nbd->disk, n) < 0) {
>                  virReportOOMError();
>                  goto error;
>              }
> -            if (virStrToLong_i(port, NULL, 10, &mig->nbd->port) < 0) {
> -                VIR_FREE(port);
> -                virReportError(VIR_ERR_INTERNAL_ERROR,
> -                               _("Malformed nbd port '%s'"),
> -                               port);
> -                goto error;
> +            mig->nbd->ndisks = n;
> +
> +            for (i = 0; i < n; i++) {
> +                ctxt->node = nodes[i];

When accessing just attributes, it's usually easier to just call
virXMLPropString(nodes[i], "name") instead of using XPath machinery.

> +
> +                tmp = virXPathString("string(./@target)", ctxt);
> +                if (!tmp) {
> +                    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                                   _("Malformed target attribute"));

I think this should rather be "Malformed NBD disk target attribute".

> +                    goto error;
> +                }
> +                mig->nbd->disk[i].target = tmp;
> +                /* deliberately don't free @tmp here, as the
> +                 * cookie has the reference now and it is
> +                 * responsible for freeing it later */
> +
> +                tmp = virXPathString("string(./@size)", ctxt);
> +                if (virStrToLong_ull(tmp, NULL, 10, (unsigned long long *)
> +                                     &mig->nbd->disk[i].bytes) < 0) {
> +                    virReportError(VIR_ERR_INTERNAL_ERROR,
> +                                   _("Malformed size attribute '%s'"),

You should mention NBD disk here as well.

> +                                   tmp);
> +                    VIR_FREE(tmp);
> +                    goto error;
> +                }
> +                VIR_FREE(tmp);
>              }
> -            VIR_FREE(port);
> +            VIR_FREE(nodes);
> +            ctxt->node = oldNode;
>          }
>      }
>  
> @@ -1352,6 +1476,152 @@ error:
>      goto cleanup;
>  }
>  
> +static int
> +qemuMigrationPreCreateStorageRaw(virDomainDiskDefPtr disk,
> +                                 size_t bytes)
> +{
> +    int ret = -1;
> +    struct stat sb;
> +    int fd = -1;
> +
> +    if ((fd = virFileOpenAs(disk->src, O_RDWR | O_CREAT, 0660,
> +                            -1, -1, VIR_FILE_OPEN_NOFORK)) < 0) {
> +        virReportSystemError(errno, _("Unable to create '%s'"), disk->src);
> +        goto cleanup;
> +    }
> +
> +    if (fstat(fd, &sb) < 0) {
> +        virReportSystemError(errno, _("Unable to stat '%s'"), disk->src);
> +        goto unlink;
> +    }
> +
> +    VIR_DEBUG("File '%s' is %zuB big. Required %zuB", disk->src, sb.st_size, bytes);
> +    if (sb.st_size < bytes) {
> +        if (ftruncate(fd, bytes) < 0) {
> +            virReportSystemError(errno, _("Unable to ftruncate '%s'"), disk->src);
> +            goto unlink;
> +        }

Shouldn't we rather call posix_fallocate or something similar?

> +
> +        if (fsync(fd) < 0) {
> +            ret = -errno;

No need to pass errno to the caller.

> +            virReportSystemError(errno, _("cannot sync data to file '%s'"),
> +                                 disk->src);
> +            goto unlink;
> +        }
> +    }
> +
> +    ret = 0;
> +
> +cleanup:
> +    VIR_FORCE_CLOSE(fd);

You should really call VIR_CLOSE(fd) here and check for errors.

> +    return ret;
> +
> +unlink:
> +    if (unlink(disk->src) < 0)
> +        VIR_WARN("Unable to unlink '%s': %d", disk->src, errno);
> +    goto cleanup;

What if the file was already there but it was just too small and making
it larger failed. I don't think you want to unlink it in this case.

> +}
> +
> +static int
> +qemuMigrationPreCreateStorageQCOW(virQEMUDriverPtr driver,
> +                                  virDomainDiskDefPtr disk,
> +                                  int format,
> +                                  size_t bytes)
> +{
> +    int ret = -1;
> +    const char *imgBinary = qemuFindQemuImgBinary(driver);
> +    virCommandPtr cmd = NULL;
> +    size_t size_arg;
> +
> +    if (!imgBinary)
> +        return ret;
> +
> +    /* Size in KB */
> +    size_arg = VIR_DIV_UP(bytes, 1024);
> +
> +    cmd = virCommandNewArgList(imgBinary, "create", "-f",
> +                              virStorageFileFormatTypeToString(format),
> +                              "-o", "preallocation=metadata",
> +                              disk->src, NULL);

Wrong indentation.

> +
> +    virCommandAddArgFormat(cmd, "%zuK", size_arg);

Any particular reason why we need to round up the size when we can just
use bytes?

       virCommandAddArgFormat(cmd, "%zu", bytes);

> +
> +    if (virCommandRun(cmd, NULL) < 0)
> +        goto unlink;

What happens if the QCOW is already there?

> +
> +    ret = 0;
> +
> +cleanup:
> +    return ret;
> +
> +unlink:
> +    if (unlink(disk->src) < 0)
> +        VIR_WARN("Unable to unlink '%s': %d", disk->src, errno);
> +    goto cleanup;
> +}

Anyway, I feel like all this storage creation code should be placed in a
storage driver somewhere.

> +
> +static int
> +qemuMigrationPreCreateStorage(virQEMUDriverPtr driver,
> +                              virDomainObjPtr vm,
> +                              qemuMigrationCookiePtr mig)
> +{
> +    int ret = -1;
> +    size_t i = 0;
> +
> +    if (!mig->nbd || !mig->nbd->ndisks) {
> +        /* nothing to do here */
> +        return 0;
> +    }
> +
> +    for (i = 0; i < mig->nbd->ndisks; i++) {
> +        virDomainDiskDefPtr disk;
> +        int format;
> +        size_t bytes;
> +        int index;

This fails to compile:

    error: declaration of 'index' shadows a global declaration

> +
> +        index = virDomainDiskIndexByName(vm->def, mig->nbd->disk[i].target, false);
> +        if (index < 0) {
> +            virReportError(VIR_ERR_INTERNAL_ERROR,
> +                           _("No such disk '%s"),
> +                           mig->nbd->disk[i].target);
> +            goto cleanup;
> +        }
> +
> +        disk = vm->def->disks[i];

As John already said, I think you want vm->def->disks[index] here.

> +        format = disk->format;
> +        bytes = mig->nbd->disk[i].bytes;
> +
> +        VIR_DEBUG("Checking '%s' of %s format for its size (requested %zuB)",
> +                  disk->src,  virStorageFileFormatTypeToString(format), bytes);
> +        switch (format) {
> +        case VIR_STORAGE_FILE_RAW:
> +            if (qemuMigrationPreCreateStorageRaw(disk, bytes) < 0)
> +                goto cleanup;
> +            break;
> +        case VIR_STORAGE_FILE_QCOW:
> +        case VIR_STORAGE_FILE_QCOW2:
> +            if (qemuMigrationPreCreateStorageQCOW(driver, disk,
> +                                                  format, bytes) < 0)
> +                goto cleanup;
> +            break;
> +        default:
> +            virReportError(VIR_ERR_OPERATION_UNSUPPORTED,
> +                           _("Cannot pre-create disks of %s format"),
> +                           virStorageFileFormatTypeToString(format));
> +            goto cleanup;
> +        }
> +
> +    }
> +
> +    ret = 0;
> +cleanup:
> +    /* free from migration data to prevent
> +     * infinite sending from src to dst and back */
> +    VIR_FREE(mig->nbd->disk);

This leaks all disk targets.

> +    mig->nbd->ndisks = 0;
> +    return ret;
> +}
> +
>  /* Validate whether the domain is safe to migrate.  If vm is NULL,
>   * then this is being run in the v2 Prepare stage on the destination
>   * (where we only have the target xml); if vm is provided, then this
> @@ -2014,6 +2284,10 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
>                                         QEMU_MIGRATION_COOKIE_NBD)))
>          goto cleanup;
>  
> +    /* pre-create all storage */
> +    if (qemuMigrationPreCreateStorage(driver, vm, mig) < 0)
> +        goto cleanup;
> +

What happens if the disks we are pre-creating exist and are used by
another domain on the destination host? :-P Normally, migration would
fail if locking manager is in use but with your code, we'd just happily
overwrite all disks, right? :-) I'm afraid we should deal with lock
manager somehow to avoid doing that :-/

>      if (qemuMigrationJobStart(driver, vm, QEMU_ASYNC_JOB_MIGRATION_IN) < 0)
>          goto cleanup;
>      qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PREPARE);

Jirka




More information about the libvir-list mailing list