[libvirt] [PATCHv4 00/18] live block migration via virDomainBlockCopy

Eric Blake eblake at redhat.com
Tue Apr 10 04:02:00 UTC 2012


On 04/09/2012 09:52 PM, Eric Blake wrote:
> v3 was here:
> https://www.redhat.com/archives/libvir-list/2012-April/msg00288.html
> 
> changes from v3:
> - split v3 patch 8 into 1/18 and 9/18
> - incorporate Laine's review comments on v3 patches 1 and 2 (here 2 and 3)
> - fix more bugs found during testing
> - upgrade to Paolo's latest build (which dropped 'mode':'no-backing-file'
> and replaced it with 'full':'true')
> - added a new patch 14/18 that lets blockcopy work with SELinux enforcing
> (it still leaks files rather than clean up context on job abort or pivot,
> but that's a harder problem to solve)
> 
> repo.or.cz is acting up on me at the moment, so I won't have a repo
> to point to until I can investigate why tomorrow.  I'll send an interdiff
> in reply to this message.
> 

Interdiff from v3 is attached

 src/conf/domain_conf.c       |    2 +-
 src/libvirt.c                |    9 +++-
 src/qemu/qemu_driver.c       |  102
+++++++++++++++++++++++++++++-------------
 src/qemu/qemu_monitor.c      |    8 ++--
 src/qemu/qemu_monitor.h      |   10 +----
 src/qemu/qemu_monitor_json.c |   15 +++---
 src/qemu/qemu_monitor_json.h |    2 +-
 tools/virsh.c                |    3 +-
 tools/virsh.pod              |   33 +++++++++-----
 9 files changed, 114 insertions(+), 70 deletions(-)

-- 
Eric Blake   eblake at redhat.com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org
-------------- next part --------------
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index e67be42..3aa6861 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -10880,7 +10880,7 @@ virDomainDiskDefFormat(virBufferPtr buf,
         virBufferEscapeString(buf, "      <mirror file='%s'", def->mirror);
         if (def->mirrorFormat)
             virBufferAsprintf(buf, " format='%s'", def->mirrorFormat);
-        virBufferAddLit(buf, ">\n");
+        virBufferAddLit(buf, "/>\n");
     }

     virBufferAsprintf(buf, "      <target dev='%s' bus='%s'",
diff --git a/src/libvirt.c b/src/libvirt.c
index 7a8f7c0..bdde654 100644
--- a/src/libvirt.c
+++ b/src/libvirt.c
@@ -17936,9 +17936,12 @@ error:
  * BlockJob operations may take a long time to complete, and during this time
  * further domain interactions may be unresponsive.  To avoid this problem,
  * pass VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC in the @flags argument to enable
- * asynchronous behavior.  Either way, when the job has been cancelled, a
- * BlockJob event will be emitted, with status VIR_DOMAIN_BLOCK_JOB_CANCELLED.
- * In this usage, @flags must not contain VIR_DOMAIN_BLOCK_JOB_ABORT_PIVOT.
+ * asynchronous behavior; this flag fails if asynchronous support is not
+ * possible.  If asynchronous jobs are supported, then when the job has
+ * been canceled, a BlockJob event will be emitted, with status
+ * VIR_DOMAIN_BLOCK_JOB_CANCELED (even if the ABORT_ASYNC flag was not
+ * used); but since events can be missed, it is also possible to use
+ * virDomainBlockJobInfo() to poll if the job is still running.
  *
  * If the current block job for @disk is VIR_DOMAIN_BLOCK_JOB_TYPE_COPY, then
  * the default is to abort the mirroring and revert to the source disk;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 7ba41ee..04dfece 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -11733,6 +11733,16 @@ qemuDomainBlockJobImpl(virDomainPtr dom, const char *path, const char *base,
     disk = vm->def->disks[idx];
     priv = vm->privateData;

+    /* Asynchronous job cancellation was introduced at the same time
+     * as drive-mirror jobs, for upstream qemu 1.1.  */
+    if (mode == QEMU_JOB_ABORT &&
+        (flags & VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC) &&
+        !qemuCapsGet(priv->qemuCaps, QEMU_CAPS_DRIVE_MIRROR)) {
+        qemuReportError(VIR_ERR_OPERATION_INVALID, "%s",
+                        _("Asynchronous cancel is not supported with "
+                          "this QEMU binary"));
+        goto cleanup;
+    }
     if (disk->mirror && mode == BLOCK_JOB_PULL) {
         qemuReportError(VIR_ERR_BLOCK_COPY_ACTIVE,
                         _("disk '%s' already in active block copy job"),
@@ -11799,11 +11809,10 @@ qemuDomainBlockJobImpl(virDomainPtr dom, const char *path, const char *base,
      */
     if (mode == BLOCK_JOB_ABORT &&
         !(flags & VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC)) {
-        ret = 1;
         while (1) {
             /* Poll every 50ms */
-            struct timespec ts = { .tv_sec = 0,
-                                   .tv_nsec = 50 * 1000 * 1000ull };
+            static struct timespec ts = { .tv_sec = 0,
+                                          .tv_nsec = 50 * 1000 * 1000ull };
             virDomainBlockJobInfo dummy;

             qemuDomainObjEnterMonitorWithDriver(driver, vm);
@@ -11885,8 +11894,12 @@ qemuDomainBlockCopy(virDomainPtr dom, const char *path,
     virDomainDiskDefPtr disk;
     int ret = -1;
     int idx;
-    int mode;
     struct stat st;
+    bool need_unlink = false;
+    char *mirror = NULL;
+    char *mirrorFormat = NULL;
+    char *origsrc = NULL;
+    char *origdriver = NULL;

     /* Preliminaries: find the disk we are editing, sanity checks */
     virCheckFlags(VIR_DOMAIN_BLOCK_COPY_SHALLOW |
@@ -11935,6 +11948,9 @@ qemuDomainBlockCopy(virDomainPtr dom, const char *path,
         goto endjob;
     }

+    /* XXX this is pessimistic; we could use 'query-block' or even
+     * keep track of the backing chain ourselves, rather than assuming
+     * that all non-raw source files have a current backing image */
     if ((flags & VIR_DOMAIN_BLOCK_COPY_SHALLOW) &&
         STREQ_NULLABLE(format, "raw") &&
         STRNEQ_NULLABLE(disk->driverType, "raw")) {
@@ -11943,18 +11959,6 @@ qemuDomainBlockCopy(virDomainPtr dom, const char *path,
                         disk->dst);
         goto endjob;
     }
-    /* XXX this is pessimistic; we could use 'query-block' or even
-     * keep track of the backing chain ourselves, rather than assuming
-     * that all non-raw source files have a current backing image */
-    if ((flags & VIR_DOMAIN_BLOCK_COPY_REUSE_EXT) &&
-        !(flags & VIR_DOMAIN_BLOCK_COPY_SHALLOW) &&
-        STRNEQ_NULLABLE(disk->driverType, "raw")) {
-        qemuReportError(VIR_ERR_ARGUMENT_UNSUPPORTED,
-                        _("non-shallow copy of non-raw disk '%s' to existing "
-                          "destination not supported"),
-                        disk->dst);
-        goto endjob;
-    }

     /* Prepare the destination file.  */
     if (stat(dest, &st) < 0) {
@@ -11977,35 +11981,69 @@ qemuDomainBlockCopy(virDomainPtr dom, const char *path,
         goto endjob;
     }

-    /* XXX We also need to add security labeling, lock manager lease,
-     * and auditing of those events.  */
-    if (!format && !(flags & VIR_DOMAIN_BLOCK_COPY_REUSE_EXT))
-        format = disk->driverType;
-    if ((format && !(disk->mirrorFormat = strdup(format))) ||
-        !(disk->mirror = strdup(dest))) {
+    if (!(flags & VIR_DOMAIN_BLOCK_COPY_REUSE_EXT)) {
+        int fd = qemuOpenFile(driver, dest, O_WRONLY | O_TRUNC | O_CREAT,
+                              &need_unlink, NULL);
+        if (fd < 0)
+            goto endjob;
+        VIR_FORCE_CLOSE(fd);
+        if (!format)
+            format = disk->driverType;
+    }
+    if ((format && !(mirrorFormat = strdup(format))) ||
+        !(mirror = strdup(dest))) {
         virReportOOMError();
         goto endjob;
     }
-    if (flags & VIR_DOMAIN_BLOCK_COPY_REUSE_EXT)
-        mode = QEMU_MONITOR_DRIVE_MIRROR_EXISTING;
-    else if (flags & VIR_DOMAIN_BLOCK_COPY_SHALLOW)
-        mode = QEMU_MONITOR_DRIVE_MIRROR_ABSOLUTE;
-    else
-        mode = QEMU_MONITOR_DRIVE_MIRROR_NO_BACKING;
+
+    /* Manipulate disk in place, in a way that can be reverted on
+     * failure, in order to set up labeling and locking.  */
+    origsrc = disk->src;
+    disk->src = (char *) dest;
+    origdriver = disk->driverType;
+    disk->driverType = (char *) "raw"; /* Don't want to probe backing files */
+
+    if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0)
+        goto endjob;
+    if (virSecurityManagerSetImageLabel(driver->securityManager, vm->def,
+                                        disk) < 0) {
+        if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0)
+            VIR_WARN("Unable to release lock on %s", dest);
+        goto endjob;
+    }
+
+    disk->src = origsrc;
+    origsrc = NULL;
+    disk->driverType = origdriver;
+    origdriver = NULL;

     /* Actually start the mirroring */
     qemuDomainObjEnterMonitorWithDriver(driver, vm);
-    ret = qemuMonitorDriveMirror(priv->mon, NULL, device, dest, format, mode);
+    ret = qemuMonitorDriveMirror(priv->mon, NULL, device, dest, format, flags);
+    virDomainAuditDisk(vm, NULL, dest, "mirror", ret >= 0);
     if (ret == 0 && bandwidth != 0)
         ret = qemuMonitorBlockJob(priv->mon, device, NULL, bandwidth, NULL,
                                   BLOCK_JOB_SPEED_INTERNAL);
     qemuDomainObjExitMonitorWithDriver(driver, vm);
+    if (ret < 0)
+        goto endjob;
+
+    /* Update vm in place to match changes.  */
+    need_unlink = false;
+    disk->mirror = mirror;
+    disk->mirrorFormat = mirrorFormat;
+    mirror = NULL;
+    mirrorFormat = NULL;

 endjob:
-    if (ret < 0) {
-        VIR_FREE(disk->mirror);
-        VIR_FREE(disk->mirrorFormat);
+    if (origsrc) {
+        disk->src = origsrc;
+        disk->driverType = origdriver;
     }
+    if (need_unlink && unlink(dest))
+        VIR_WARN("unable to unlink just-created %s", dest);
+    VIR_FREE(mirror);
+    VIR_FREE(mirrorFormat);
     if (qemuDomainObjEndJob(driver, vm) == 0) {
         vm = NULL;
         goto cleanup;
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index 75694b7..969e453 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -2689,12 +2689,12 @@ qemuMonitorDiskSnapshot(qemuMonitorPtr mon, virJSONValuePtr actions,
 int
 qemuMonitorDriveMirror(qemuMonitorPtr mon, virJSONValuePtr actions,
                        const char *device, const char *file,
-                       const char *format, int mode)
+                       const char *format, unsigned int flags)
 {
     int ret = -1;

-    VIR_DEBUG("mon=%p, actions=%p, device=%s, file=%s, format=%s, mode=%o",
-              mon, actions, device, file, format, mode);
+    VIR_DEBUG("mon=%p, actions=%p, device=%s, file=%s, format=%s, flags=%x",
+              mon, actions, device, file, format, flags);

     if (!mon) {
         qemuReportError(VIR_ERR_INVALID_ARG, "%s",
@@ -2704,7 +2704,7 @@ qemuMonitorDriveMirror(qemuMonitorPtr mon, virJSONValuePtr actions,

     if (mon->json)
         ret = qemuMonitorJSONDriveMirror(mon, actions, device, file, format,
-                                         mode);
+                                         flags);
     else
         qemuReportError(VIR_ERR_INVALID_ARG, "%s",
                         _("drive-mirror requires JSON monitor"));
diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
index a432b6f..0534b4a 100644
--- a/src/qemu/qemu_monitor.h
+++ b/src/qemu/qemu_monitor.h
@@ -509,20 +509,12 @@ int qemuMonitorDiskSnapshot(qemuMonitorPtr mon,
                             const char *format,
                             bool reuse);

-typedef enum {
-    QEMU_MONITOR_DRIVE_MIRROR_ABSOLUTE,
-    QEMU_MONITOR_DRIVE_MIRROR_EXISTING,
-    QEMU_MONITOR_DRIVE_MIRROR_NO_BACKING,
-
-    QEMU_MONITOR_DRIVE_MIRROR_LAST
-} qemuMonitorDriveMirrorMode;
-
 int qemuMonitorDriveMirror(qemuMonitorPtr mon,
                            virJSONValuePtr actions,
                            const char *device,
                            const char *file,
                            const char *format,
-                           int mode)
+                           unsigned int flags)
     ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(3) ATTRIBUTE_NONNULL(4);
 int qemuMonitorTransaction(qemuMonitorPtr mon, virJSONValuePtr actions)
     ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
index 2718fef..27bed97 100644
--- a/src/qemu/qemu_monitor_json.c
+++ b/src/qemu/qemu_monitor_json.c
@@ -797,9 +797,11 @@ qemuMonitorJSONHandleBlockJobImpl(qemuMonitorPtr mon,
             if (offset != len)
                 event = VIR_DOMAIN_BLOCK_JOB_FAILED;
             break;
-        case VIR_DOMAIN_BLOCK_JOB_FAILED:
         case VIR_DOMAIN_BLOCK_JOB_CANCELED:
             break;
+        case VIR_DOMAIN_BLOCK_JOB_FAILED:
+            VIR_DEBUG("should not get here");
+            break;
     }

 out:
@@ -3188,26 +3190,25 @@ cleanup:
     return ret;
 }

-VIR_ENUM_DECL(qemuMonitorDriveMirror)
-VIR_ENUM_IMPL(qemuMonitorDriveMirror, QEMU_MONITOR_DRIVE_MIRROR_LAST,
-              "absolute-paths", "exsisting", "no-backing-file");
-
 int
 qemuMonitorJSONDriveMirror(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                            virJSONValuePtr actions,
                            const char *device, const char *file,
-                           const char *format, int mode)
+                           const char *format, unsigned int flags)
 {
     int ret = -1;
     virJSONValuePtr cmd;
     virJSONValuePtr reply = NULL;
+    bool shallow = (flags & VIR_DOMAIN_BLOCK_REBASE_SHALLOW) != 0;
+    bool reuse = (flags & VIR_DOMAIN_BLOCK_REBASE_REUSE_EXT) != 0;

     cmd = qemuMonitorJSONMakeCommandRaw(actions != NULL,
                                         "__com.redhat_drive-mirror",
                                         "s:device", device,
                                         "s:target", file,
+                                        "b:full", !shallow,
                                         "s:mode",
-                                        qemuMonitorDriveMirrorTypeToString(mode),
+                                        reuse ? "existing" : "absolute-paths",
                                         format ? "s:format" : NULL, format,
                                         NULL);
     if (!cmd)
diff --git a/src/qemu/qemu_monitor_json.h b/src/qemu/qemu_monitor_json.h
index 136c0f3..9cfae8d 100644
--- a/src/qemu/qemu_monitor_json.h
+++ b/src/qemu/qemu_monitor_json.h
@@ -238,7 +238,7 @@ int qemuMonitorJSONDriveMirror(qemuMonitorPtr mon,
                                const char *device,
                                const char *file,
                                const char *format,
-                               int mode)
+                               unsigned int flags)
     ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(3) ATTRIBUTE_NONNULL(4);
 int qemuMonitorJSONTransaction(qemuMonitorPtr mon, virJSONValuePtr actions)
     ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
diff --git a/tools/virsh.c b/tools/virsh.c
index e855951..8669a8a 100644
--- a/tools/virsh.c
+++ b/tools/virsh.c
@@ -7567,7 +7567,6 @@ blockJobImpl(vshControl *ctl, const vshCmd *cmd,
             ret = virDomainBlockPull(dom, path, bandwidth, 0);
         break;
     case VSH_CMD_BLOCK_JOB_COPY:
-        flags |= VIR_DOMAIN_BLOCK_REBASE_COPY;
         if (vshCommandOptBool(cmd, "shallow"))
             flags |= VIR_DOMAIN_BLOCK_COPY_SHALLOW;
         if (vshCommandOptBool(cmd, "reuse-external"))
@@ -7693,7 +7692,7 @@ cmdBlockJob(vshControl *ctl, const vshCmd *cmd)

     if (abortMode + infoMode + bandwidth > 1) {
         vshError(ctl, "%s",
-                 _("One of --abort, --info, or --bandwidth is required"));
+                 _("conflict between --abort, --info, and --bandwidth modes"));
         return false;
     }

diff --git a/tools/virsh.pod b/tools/virsh.pod
index 1e5ce12..65e2429 100644
--- a/tools/virsh.pod
+++ b/tools/virsh.pod
@@ -677,7 +677,10 @@ pulled, the disk no longer depends on that portion of the backing chain.
 It pulls data for the entire disk in the background, the process of the
 operation can be checked with B<blockjob>.

-I<path> specifies fully-qualified path of the disk.
+I<path> specifies fully-qualified path of the disk; it corresponds
+to a unique target name (<target dev='name'/>) or source file (<source
+file='name'/>) for one of the disk devices attached to I<domain> (see
+also B<domblklist> for listing these names).
 I<bandwidth> specifies copying bandwidth limit in Mbps.

 =item B<blkdeviotune> I<domain> I<device>
@@ -714,13 +717,17 @@ Both I<--live> and I<--current> flags may be given, but I<--current> is
 exclusive. If no flag is specified, behavior is different depending
 on hypervisor.

-=item B<blockjob> I<domain> I<path> { [I<--abort>] [I<--async>] [I<--pivot] |
-[I<--info>] | I<bandwidth> }
+=item B<blockjob> I<domain> I<path> { [I<--abort>] [I<--async>] [I<--pivot>] |
+[I<--info>] | [I<bandwidth>] }

-Manage active block operations.  If no mode is chosen, I<--info> is assumed,
-except that I<--async> and I<--pivot> imply I<--abort>.
+Manage active block operations.  There are three modes: I<--info>,
+I<bandwidth>, and I<--abort>; I<--info> is default except that I<--async>
+or I<--pivot> implies I<--abort>.

-I<path> specifies fully-qualified path of the disk.
+I<path> specifies fully-qualified path of the disk; it corresponds
+to a unique target name (<target dev='name'/>) or source file (<source
+file='name'/>) for one of the disk devices attached to I<domain> (see
+also B<domblklist> for listing these names).

 If I<--abort> is specified, the active job on the specified disk will
 be aborted.  If I<--async> is also specified, this command will return
@@ -734,11 +741,15 @@ I<bandwidth> can be used to set bandwidth limit for the active job.
 =item B<blockresize> I<domain> I<path> I<size>

 Resize a block device of domain while the domain is running, I<path>
-specifies the absolute path of the block device, I<size> is a scaled
-integer (see B<NOTES> above) which defaults to KiB (blocks of 1024 bytes)
-if there is no suffix.  You must use a suffix of "B" to get bytes (note
-that for historical reasons, this differs from B<vol-resize> which
-defaults to bytes without a suffix).
+specifies the absolute path of the block device; it corresponds
+to a unique target name (<target dev='name'/>) or source file (<source
+file='name'/>) for one of the disk devices attached to I<domain> (see
+also B<domblklist> for listing these names).
+
+I<size> is a scaled integer (see B<NOTES> above) which defaults to KiB
+(blocks of 1024 bytes) if there is no suffix.  You must use a suffix of
+"B" to get bytes (note that for historical reasons, this differs from
+B<vol-resize> which defaults to bytes without a suffix).

 =item B<dominfo> I<domain-id>

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 620 bytes
Desc: OpenPGP digital signature
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20120409/2f9c66ec/attachment-0001.sig>


More information about the libvir-list mailing list