[libvirt] [RFC PATCH v2 3/3] qemu: memory pre-pinning support for RDMA migration

Jiri Denemark jdenemar at redhat.com
Tue Feb 4 14:56:12 UTC 2014


On Mon, Jan 13, 2014 at 14:28:12 +0800, mrhines at linux.vnet.ibm.com wrote:
> From: "Michael R. Hines" <mrhines at us.ibm.com>
> 
> RDMA Live migration requires registering memory with the hardware,

Hmm, I forgot to ask when I was reviewing the previous patch but does
any of this RDMA migration functionality require special privileges or
is any unprivileged process able to use RDMA?

> and thus QEMU offers a new 'capability' which supports the ability
> to pre-register / mlock() the guest memory in advance for higher
> RDMA performance before the migration begins.
> 
> This patch exposes this capability with the following example usage:
> 
> virsh migrate --live --rdma-pin-all --migrateuri rdma:hostname domain qemu+ssh://hostname/system

The URI should be rdma://hostname

> This capability is disabled by default, and thus ommiting it will
> cause QEMU to register the memory with the hardware in an on-demand basis.
> 
> Signed-off-by: Michael R. Hines <mrhines at us.ibm.com>
> ---
>  include/libvirt/libvirt.h.in |  1 +
>  src/qemu/qemu_migration.c    | 64 ++++++++++++++++++++++++++++++++++++++++++++
>  src/qemu/qemu_migration.h    |  3 ++-
>  src/qemu/qemu_monitor.c      |  2 +-
>  src/qemu/qemu_monitor.h      |  1 +
>  tools/virsh-domain.c         |  7 +++++
>  6 files changed, 76 insertions(+), 2 deletions(-)
> 
> diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
> index 5ac2694..476521b 100644
> --- a/include/libvirt/libvirt.h.in
> +++ b/include/libvirt/libvirt.h.in
> @@ -1192,6 +1192,7 @@ typedef enum {
>      VIR_MIGRATE_OFFLINE           = (1 << 10), /* offline migrate */
>      VIR_MIGRATE_COMPRESSED        = (1 << 11), /* compress data during migration */
>      VIR_MIGRATE_ABORT_ON_ERROR    = (1 << 12), /* abort migration on I/O errors happened during migration */
> +    VIR_MIGRATE_RDMA_PIN_ALL      = (1 << 13), /* RDMA memory pinning */
>  } virDomainMigrateFlags;
>  
>  
> diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
> index 1e0f538..f4358ba 100644
> --- a/src/qemu/qemu_migration.c
> +++ b/src/qemu/qemu_migration.c
> @@ -1566,6 +1566,46 @@ cleanup:
>  }
>  
>  static int
> +qemuMigrationSetPinAll(virQEMUDriverPtr driver,
> +                            virDomainObjPtr vm,
> +                            enum qemuDomainAsyncJob job)

Bad indentation.

> +{
> +    qemuDomainObjPrivatePtr priv = vm->privateData;
> +    int ret;
> +
> +    if (qemuDomainObjEnterMonitorAsync(driver, vm, job) < 0)
> +        return -1;
> +
> +    ret = qemuMonitorGetMigrationCapability(
> +                priv->mon,
> +                QEMU_MONITOR_MIGRATION_CAPS_RDMA_PIN_ALL);

Is this capability always present when QEMU supports RDMA migration? If
so, it could be used when we check if QEMU supports RDMA migration.

> +
> +    if (ret < 0) {
> +        goto cleanup;
> +    } else if (ret == 0) {
> +        if (job == QEMU_ASYNC_JOB_MIGRATION_IN) {
> +            virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s",
> +                           _("rdma pinning migration is not supported by "
> +                             "target QEMU binary"));
> +        } else {
> +            virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s",
> +                           _("rdma pinning migration is not supported by "
> +                             "source QEMU binary"));
> +        }
> +        ret = -1;
> +        goto cleanup;
> +    }
> +
> +    ret = qemuMonitorSetMigrationCapability(
> +                priv->mon,
> +                QEMU_MONITOR_MIGRATION_CAPS_RDMA_PIN_ALL);
> +
> +cleanup:
> +    qemuDomainObjExitMonitor(driver, vm);
> +    return ret;
> +}
> +
> +static int
>  qemuMigrationWaitForSpice(virQEMUDriverPtr driver,
>                            virDomainObjPtr vm)
>  {
> @@ -2395,6 +2435,18 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
>                                      QEMU_ASYNC_JOB_MIGRATION_IN) < 0)
>          goto stop;
>  
> +    if (flags & VIR_MIGRATE_RDMA_PIN_ALL &&
> +        qemuMigrationSetPinAll(driver, vm,
> +                                    QEMU_ASYNC_JOB_MIGRATION_IN) < 0)

Indentation. And the flag should be rejected when protocol is not rdma.

> +        goto stop;
> +
> +    if (strstr(protocol, "rdma")) {

I think we don't want to match just a part of the protocol; use STREQ()
instead.

> +        unsigned long long memKB = vm->def->mem.hard_limit ?
> +                                   vm->def->mem.hard_limit :
> +                vm->def->mem.max_balloon + 1024 * 1024;
> +                virProcessSetMaxMemLock(vm->pid, memKB * 3);

Apart from weird indentation of the virProcessSetMaxMemLock line, there
are several issues here:

- this code should be moved inside qemuMigrationSetPinAll and done only
  if VIR_MIGRATE_RDMA_PIN_ALL flag was used.

- virProcessSetMaxMemLock wants the value in bytes, thus memKB should
  rather by multiplied by 1024.

- what memory is going to be mlock()ed with rdma-pin-all? Is it going to
  be all memory allocated by QEMU or just the domain's memory? If it's
  all QEMU memory, we already painfully found out it's impossible to
  automatically compute how much memory QEMU consumes in addition to the
  configured domain's memory and I think a better approach would be to
  just migration with rdma-pin-all unless hard_limit is specified.

> +    }
> +
>      if (mig->lockState) {
>          VIR_DEBUG("Received lockstate %s", mig->lockState);
>          VIR_FREE(priv->lockState);
> @@ -3209,6 +3261,11 @@ qemuMigrationRun(virQEMUDriverPtr driver,
>                                      QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
>          goto cleanup;
>  
> +    if (flags & VIR_MIGRATE_RDMA_PIN_ALL &&
> +        qemuMigrationSetPinAll(driver, vm,
> +                                    QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
> +        goto cleanup;
> +
>      if (qemuDomainObjEnterMonitorAsync(driver, vm,
>                                         QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
>          goto cleanup;

Same comments as for the equivalent hunk in qemuMigrationPrepareAny.

> @@ -3238,6 +3295,13 @@ qemuMigrationRun(virQEMUDriverPtr driver,
>  
>      switch (spec->destType) {
>      case MIGRATION_DEST_HOST:
> +        if (strstr(spec->dest.host.proto, "rdma")) {
> +            unsigned long long memKB = vm->def->mem.hard_limit ?
> +                                       vm->def->mem.hard_limit :
> +                    vm->def->mem.max_balloon + 1024 * 1024;
> +                    virProcessSetMaxMemLock(vm->pid, memKB * 3);
> +        }
> +
>          ret = qemuMonitorMigrateToHost(priv->mon, migrate_flags,
>                                         spec->dest.host.proto,
>                                         spec->dest.host.name,

Same comments as for the equivalent hunk in qemuMigrationPrepareAny.

Jirka




More information about the libvir-list mailing list