[libvirt] [RFC PATCH v2 3/3] qemu: memory pre-pinning support for RDMA migration

mrhines at linux.vnet.ibm.com mrhines at linux.vnet.ibm.com
Mon Jan 13 06:28:12 UTC 2014


From: "Michael R. Hines" <mrhines at us.ibm.com>

RDMA Live migration requires registering memory with the hardware,
and thus QEMU offers a new 'capability' which supports the ability
to pre-register / mlock() the guest memory in advance for higher
RDMA performance before the migration begins.

This patch exposes this capability with the following example usage:

virsh migrate --live --rdma-pin-all --migrateuri rdma:hostname domain qemu+ssh://hostname/system

This capability is disabled by default, and thus ommiting it will
cause QEMU to register the memory with the hardware in an on-demand basis.

Signed-off-by: Michael R. Hines <mrhines at us.ibm.com>
---
 include/libvirt/libvirt.h.in |  1 +
 src/qemu/qemu_migration.c    | 64 ++++++++++++++++++++++++++++++++++++++++++++
 src/qemu/qemu_migration.h    |  3 ++-
 src/qemu/qemu_monitor.c      |  2 +-
 src/qemu/qemu_monitor.h      |  1 +
 tools/virsh-domain.c         |  7 +++++
 6 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
index 5ac2694..476521b 100644
--- a/include/libvirt/libvirt.h.in
+++ b/include/libvirt/libvirt.h.in
@@ -1192,6 +1192,7 @@ typedef enum {
     VIR_MIGRATE_OFFLINE           = (1 << 10), /* offline migrate */
     VIR_MIGRATE_COMPRESSED        = (1 << 11), /* compress data during migration */
     VIR_MIGRATE_ABORT_ON_ERROR    = (1 << 12), /* abort migration on I/O errors happened during migration */
+    VIR_MIGRATE_RDMA_PIN_ALL      = (1 << 13), /* RDMA memory pinning */
 } virDomainMigrateFlags;
 
 
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 1e0f538..f4358ba 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -1566,6 +1566,46 @@ cleanup:
 }
 
 static int
+qemuMigrationSetPinAll(virQEMUDriverPtr driver,
+                            virDomainObjPtr vm,
+                            enum qemuDomainAsyncJob job)
+{
+    qemuDomainObjPrivatePtr priv = vm->privateData;
+    int ret;
+
+    if (qemuDomainObjEnterMonitorAsync(driver, vm, job) < 0)
+        return -1;
+
+    ret = qemuMonitorGetMigrationCapability(
+                priv->mon,
+                QEMU_MONITOR_MIGRATION_CAPS_RDMA_PIN_ALL);
+
+    if (ret < 0) {
+        goto cleanup;
+    } else if (ret == 0) {
+        if (job == QEMU_ASYNC_JOB_MIGRATION_IN) {
+            virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s",
+                           _("rdma pinning migration is not supported by "
+                             "target QEMU binary"));
+        } else {
+            virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s",
+                           _("rdma pinning migration is not supported by "
+                             "source QEMU binary"));
+        }
+        ret = -1;
+        goto cleanup;
+    }
+
+    ret = qemuMonitorSetMigrationCapability(
+                priv->mon,
+                QEMU_MONITOR_MIGRATION_CAPS_RDMA_PIN_ALL);
+
+cleanup:
+    qemuDomainObjExitMonitor(driver, vm);
+    return ret;
+}
+
+static int
 qemuMigrationWaitForSpice(virQEMUDriverPtr driver,
                           virDomainObjPtr vm)
 {
@@ -2395,6 +2435,18 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
                                     QEMU_ASYNC_JOB_MIGRATION_IN) < 0)
         goto stop;
 
+    if (flags & VIR_MIGRATE_RDMA_PIN_ALL &&
+        qemuMigrationSetPinAll(driver, vm,
+                                    QEMU_ASYNC_JOB_MIGRATION_IN) < 0)
+        goto stop;
+
+    if (strstr(protocol, "rdma")) {
+        unsigned long long memKB = vm->def->mem.hard_limit ?
+                                   vm->def->mem.hard_limit :
+                vm->def->mem.max_balloon + 1024 * 1024;
+                virProcessSetMaxMemLock(vm->pid, memKB * 3);
+    }
+
     if (mig->lockState) {
         VIR_DEBUG("Received lockstate %s", mig->lockState);
         VIR_FREE(priv->lockState);
@@ -3209,6 +3261,11 @@ qemuMigrationRun(virQEMUDriverPtr driver,
                                     QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
         goto cleanup;
 
+    if (flags & VIR_MIGRATE_RDMA_PIN_ALL &&
+        qemuMigrationSetPinAll(driver, vm,
+                                    QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
+        goto cleanup;
+
     if (qemuDomainObjEnterMonitorAsync(driver, vm,
                                        QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
         goto cleanup;
@@ -3238,6 +3295,13 @@ qemuMigrationRun(virQEMUDriverPtr driver,
 
     switch (spec->destType) {
     case MIGRATION_DEST_HOST:
+        if (strstr(spec->dest.host.proto, "rdma")) {
+            unsigned long long memKB = vm->def->mem.hard_limit ?
+                                       vm->def->mem.hard_limit :
+                    vm->def->mem.max_balloon + 1024 * 1024;
+                    virProcessSetMaxMemLock(vm->pid, memKB * 3);
+        }
+
         ret = qemuMonitorMigrateToHost(priv->mon, migrate_flags,
                                        spec->dest.host.proto,
                                        spec->dest.host.name,
diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h
index cafa2a2..a76aaef 100644
--- a/src/qemu/qemu_migration.h
+++ b/src/qemu/qemu_migration.h
@@ -39,7 +39,8 @@
      VIR_MIGRATE_UNSAFE |                       \
      VIR_MIGRATE_OFFLINE |                      \
      VIR_MIGRATE_COMPRESSED |                   \
-     VIR_MIGRATE_ABORT_ON_ERROR)
+     VIR_MIGRATE_ABORT_ON_ERROR |               \
+     VIR_MIGRATE_RDMA_PIN_ALL)
 
 /* All supported migration parameters and their types. */
 # define QEMU_MIGRATION_PARAMETERS                              \
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index 5a450e2..86bffaa 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -118,7 +118,7 @@ VIR_ENUM_IMPL(qemuMonitorMigrationStatus,
 
 VIR_ENUM_IMPL(qemuMonitorMigrationCaps,
               QEMU_MONITOR_MIGRATION_CAPS_LAST,
-              "xbzrle")
+              "xbzrle", "rdma-pin-all")
 
 VIR_ENUM_IMPL(qemuMonitorVMStatus,
               QEMU_MONITOR_VM_STATUS_LAST,
diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
index 16b0b77..a8b1cc6 100644
--- a/src/qemu/qemu_monitor.h
+++ b/src/qemu/qemu_monitor.h
@@ -452,6 +452,7 @@ int qemuMonitorGetSpiceMigrationStatus(qemuMonitorPtr mon,
 
 typedef enum {
     QEMU_MONITOR_MIGRATION_CAPS_XBZRLE,
+    QEMU_MONITOR_MIGRATION_CAPS_RDMA_PIN_ALL,
 
     QEMU_MONITOR_MIGRATION_CAPS_LAST
 } qemuMonitorMigrationCaps;
diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c
index 1fe138c..31df7f6 100644
--- a/tools/virsh-domain.c
+++ b/tools/virsh-domain.c
@@ -8532,6 +8532,10 @@ static const vshCmdOptDef opts_migrate[] = {
      .type = VSH_OT_BOOL,
      .help = N_("compress repeated pages during live migration")
     },
+    {.name = "rdma-pin-all",
+     .type = VSH_OT_BOOL,
+     .help = N_("support memory pinning during RDMA live migration")
+    },
     {.name = "abort-on-error",
      .type = VSH_OT_BOOL,
      .help = N_("abort on soft errors during migration")
@@ -8676,6 +8680,9 @@ doMigrate(void *opaque)
     if (vshCommandOptBool(cmd, "compressed"))
         flags |= VIR_MIGRATE_COMPRESSED;
 
+    if (vshCommandOptBool(cmd, "rdma-pin-all"))
+        flags |= VIR_MIGRATE_RDMA_PIN_ALL;
+
     if (vshCommandOptBool(cmd, "offline")) {
         flags |= VIR_MIGRATE_OFFLINE;
     }
-- 
1.8.1.2




More information about the libvir-list mailing list