[libvirt] [RFC PATCH] libxl: add tunnelled migration support

Bob Liu bob.liu at oracle.com
Thu Oct 27 12:34:55 UTC 2016


On 10/26/2016 07:24 PM, Joao Martins wrote:
> On 10/26/2016 07:33 AM, Bob Liu wrote:
>> Tunnelled migration doesn't require any extra network connections beside the
>> libvirt daemon.
>> It's capable of strong encryption and is the default option in openstack-nova.
>>
>> This patch add the tunnelled migration(Tunnel3params) support to libxl.
>> The data flow in the src side is:
>>  * libxlDoMigrateSend() -> pipe
>>  * libxlTunnel3MigrationFunc() poll pipe out and then write to dest stream.
>>
>> While in the dest side:
>> Stream -> pipe -> 'recvfd of libxlDomainStartRestore'
>>
>> The usage is the same as p2p migration, execpt adding one more '--tunnelled' to
>                                           ^^^^^^ except
>> the libvirt p2p migration command.
>>
>> Signed-off-by: Bob Liu <bob.liu at oracle.com>
> Nice :) Now openstack no longer needs to have tunnelled flag removed on nova to
> get migration working.
> 
> See some comments below, its a first review as I would still like to test it.
> 

Will take all of your suggestions.
But let's wait for more reviews before I post a updated version.

Thanks,
Bob

>> ---
>>  src/libxl/libxl_driver.c    |  58 ++++++++++-
>>  src/libxl/libxl_migration.c | 241 +++++++++++++++++++++++++++++++++++++++++---
>>  src/libxl/libxl_migration.h |   9 ++
>>  3 files changed, 292 insertions(+), 16 deletions(-)
>>
>> diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
>> index b66cb1f..a01bbff 100644
>> --- a/src/libxl/libxl_driver.c
>> +++ b/src/libxl/libxl_driver.c
>> @@ -5918,6 +5918,61 @@ libxlDomainMigrateBegin3Params(virDomainPtr domain,
>>  }
>>  
>>  static int
>> +libxlDomainMigratePrepareTunnel3Params(virConnectPtr dconn,
>> +                                       virStreamPtr st,
>> +                                       virTypedParameterPtr params,
>> +                                       int nparams,
>> +                                       const char *cookiein,
>> +                                       int cookieinlen,
>> +                                       char **cookieout ATTRIBUTE_UNUSED,
>> +                                       int *cookieoutlen ATTRIBUTE_UNUSED,
>> +                                       unsigned int flags)
>> +{
>> +    libxlDriverPrivatePtr driver = dconn->privateData;
>> +    virDomainDefPtr def = NULL;
>> +    const char *dom_xml = NULL;
>> +    const char *dname = NULL;
>> +    const char *uri_in = NULL;
>> +
>> +#ifdef LIBXL_HAVE_NO_SUSPEND_RESUME
>> +    virReportUnsupportedError();
>> +    return -1;
>> +#endif
>> +
>> +    virCheckFlags(LIBXL_MIGRATION_FLAGS, -1);
>> +    if (virTypedParamsValidate(params, nparams, LIBXL_MIGRATION_PARAMETERS) < 0)
>> +        goto error;
>> +
>> +    if (virTypedParamsGetString(params, nparams,
>> +                                VIR_MIGRATE_PARAM_DEST_XML,
>> +                                &dom_xml) < 0 ||
>> +        virTypedParamsGetString(params, nparams,
>> +                                VIR_MIGRATE_PARAM_DEST_NAME,
>> +                                &dname) < 0 ||
>> +        virTypedParamsGetString(params, nparams,
>> +                                VIR_MIGRATE_PARAM_URI,
>> +                                &uri_in) < 0)
>> +
>> +        goto error;
>> +
>> +    if (!(def = libxlDomainMigrationPrepareDef(driver, dom_xml, dname)))
>> +        goto error;
>> +
>> +    if (virDomainMigratePrepareTunnel3ParamsEnsureACL(dconn, def) < 0)
>> +        goto error;
>> +
>> +    if (libxlDomainMigrationPrepareTunnel3(dconn, st, &def, cookiein,
>> +                                           cookieinlen, flags) < 0)
>> +        goto error;
>> +
>> +    return 0;
>> +
>> + error:
>> +    virDomainDefFree(def);
>> +    return -1;
>> +}
>> +
>> +static int
>>  libxlDomainMigratePrepare3Params(virConnectPtr dconn,
>>                                   virTypedParameterPtr params,
>>                                   int nparams,
>> @@ -6017,7 +6072,7 @@ libxlDomainMigratePerform3Params(virDomainPtr dom,
>>      if (virDomainMigratePerform3ParamsEnsureACL(dom->conn, vm->def) < 0)
>>          goto cleanup;
>>  
>> -    if (flags & VIR_MIGRATE_PEER2PEER) {
>> +    if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) {
>>          if (libxlDomainMigrationPerformP2P(driver, vm, dom->conn, dom_xml,
>>                                             dconnuri, uri, dname, flags) < 0)
>>              goto cleanup;
>> @@ -6501,6 +6556,7 @@ static virHypervisorDriver libxlHypervisorDriver = {
>>      .nodeDeviceReset = libxlNodeDeviceReset, /* 1.2.3 */
>>      .domainMigrateBegin3Params = libxlDomainMigrateBegin3Params, /* 1.2.6 */
>>      .domainMigratePrepare3Params = libxlDomainMigratePrepare3Params, /* 1.2.6 */
>> +    .domainMigratePrepareTunnel3Params = libxlDomainMigratePrepareTunnel3Params, /* 2.3.1 */
> The version here is incorrect. It should be the next one to be tagged (after the
> ongoing freeze). Which means 2.5.0. Note that the versioning used has changed a
> bit: major number is incremented per year, minor per month and bugfix number for
> -maint releases.
> 
>>      .domainMigratePerform3Params = libxlDomainMigratePerform3Params, /* 1.2.6 */
>>      .domainMigrateFinish3Params = libxlDomainMigrateFinish3Params, /* 1.2.6 */
>>      .domainMigrateConfirm3Params = libxlDomainMigrateConfirm3Params, /* 1.2.6 */
>> diff --git a/src/libxl/libxl_migration.c b/src/libxl/libxl_migration.c
>> index 534abb8..88c9bb8 100644
>> --- a/src/libxl/libxl_migration.c
>> +++ b/src/libxl/libxl_migration.c
>> @@ -44,6 +44,7 @@
>>  #include "libxl_migration.h"
>>  #include "locking/domain_lock.h"
>>  #include "virtypedparam.h"
>> +#include "fdstream.h"
>>  
>>  #define VIR_FROM_THIS VIR_FROM_LIBXL
>>  
>> @@ -484,6 +485,90 @@ libxlDomainMigrationPrepareDef(libxlDriverPrivatePtr driver,
>>  }
>>  
>>  int
>> +libxlDomainMigrationPrepareTunnel3(virConnectPtr dconn,
>> +                                   virStreamPtr st,
>> +                                   virDomainDefPtr *def,
>> +                                   const char *cookiein,
>> +                                   int cookieinlen,
>> +                                   unsigned int flags)
>> +{
>> +    libxlMigrationCookiePtr mig = NULL;
>> +    libxlDriverPrivatePtr driver = dconn->privateData;
>> +    virDomainObjPtr vm = NULL;
>> +    libxlMigrationDstArgs *args = NULL;
>> +    virThread thread;
>> +    int dataFD[2] = { -1, -1 };
>> +    int ret = 0;
> The general codestyle I usually see is to have this initialized to -1, and then
> set ret = 0 on success (before the goto done) mentioned further below. In other
> words, assume error and set it to 0 if all good, as you have more error paths
> than the sole success one.
> 
>> +
>> +    if (libxlMigrationEatCookie(cookiein, cookieinlen, &mig) < 0)
>> +        goto error;
>> +
>> +    if (mig->xenMigStreamVer > LIBXL_SAVE_VERSION) {
>> +        virReportError(VIR_ERR_OPERATION_UNSUPPORTED,
>> +                       _("Xen migration stream version '%d' is not supported on this host"),
>> +                       mig->xenMigStreamVer);
>> +        goto error;
>> +    }
>> +
>> +    if (!(vm = virDomainObjListAdd(driver->domains, *def,
>> +                                   driver->xmlopt,
>> +                                   VIR_DOMAIN_OBJ_LIST_ADD_LIVE |
>> +                                   VIR_DOMAIN_OBJ_LIST_ADD_CHECK_LIVE,
>> +                                   NULL)))
>> +        goto error;
>> +
>> +    /*
>> +     * The data flow of tunnel3 migration in the dest side:
>> +     * stream -> pipe -> recvfd of libxlDomainStartRestore
>> +     */
>> +    if (pipe(dataFD) < 0)
>> +        goto error;
>> +
>> +    /* Stream data will be written to pipeIn */
>> +    if (virFDStreamOpen(st, dataFD[1]) < 0)
>> +        goto error;
> We probably need to add this (same as qemu):
> 
> dataFD[1] = -1; /* 'st' owns the FD now & will close it */
> 
>> +
>> +    if (libxlMigrationDstArgsInitialize() < 0)
>> +        goto error;
>> +
>> +    if (!(args = virObjectNew(libxlMigrationDstArgsClass)))
>> +        goto error;
>> +
>> +    args->conn = dconn;
>> +    args->vm = vm;
>> +    args->flags = flags;
>> +    args->migcookie = mig;
>> +    /* Receive from pipeOut */
>> +    args->recvfd = dataFD[0];
>> +    args->nsocks = 0;
>> +    if (virThreadCreate(&thread, false, libxlDoMigrateReceive, args) < 0) {
>> +        virReportError(VIR_ERR_OPERATION_FAILED, "%s",
>> +                       _("Failed to create thread for receiving migration data"));
>> +        goto error;
>> +    }
>> +
>> +    goto done;
> Have ret = 0 before "goto done".
> 
>> +
>> + error:
>> +    VIR_FORCE_CLOSE(dataFD[1]);
>> +    VIR_FORCE_CLOSE(dataFD[0]);
>> +    virObjectUnref(args);
>> +    /* Remove virDomainObj from domain list */
>> +    if (vm) {
>> +        virDomainObjListRemove(driver->domains, vm);
>> +        vm = NULL;
>> +    }
>> +    ret = -1;
> And removing this one. Albeit the three comments about the style are just
> nitpick and could be ignored if others are ok with it.
> 
>> +
>> + done:
>> +    /* Nobody will close dataFD[1]? */
> virFDStreamOpen(st, dataFD[1]) will set virStream private data which stores this
> file descriptor. A later call to virStreamFinish would result in a call to
> virFDStreamClose(...) which closes the file descriptor. So unless I
> misunderstood, I think it's reasonable to remove this comment here, assuming you
> set dataFD[1] = -1 as commented above.
> 
>> +    if (vm)
>> +        virObjectUnlock(vm);
>> +
>> +    return ret;
>> +}
>> +
>> +int
>>  libxlDomainMigrationPrepare(virConnectPtr dconn,
>>                              virDomainDefPtr *def,
>>                              const char *uri_in,
>> @@ -710,9 +795,90 @@ libxlDomainMigrationPrepare(virConnectPtr dconn,
>>      return ret;
>>  }
>>  
>> -/* This function is a simplification of virDomainMigrateVersion3Full
>> - * excluding tunnel support and restricting it to migration v3
>> - * with params since it was the first to be introduced in libxl.
>> +typedef struct _libxlTunnelMigrationThread libxlTunnelMigrationThread;
>> +struct _libxlTunnelMigrationThread {
>> +    virThread thread;
>> +    virStreamPtr st;
>> +    int srcFD;
>> +};
>> +#define TUNNEL_SEND_BUF_SIZE 65536
>> +
>> +/*
>> + * The data flow of tunnel3 migration in the src side:
>> + * libxlDoMigrateSend() -> pipe
>> + * libxlTunnel3MigrationFunc() polls pipe out and then write to dest stream.
>> + */
>> +static void libxlTunnel3MigrationFunc(void *arg)
>> +{
>> +    libxlTunnelMigrationThread *data = (libxlTunnelMigrationThread *)arg;
>> +    char *buffer = NULL;
>> +    struct pollfd fds[1];
>> +    int timeout = -1;
>> +
>> +    if (VIR_ALLOC_N(buffer, TUNNEL_SEND_BUF_SIZE) < 0) {
>> +        virReportError(errno, "%s", _("poll failed in migration tunnel"));
>> +        return;
>> +    }
>> +
>> +    fds[0].fd = data->srcFD;
>> +    for (;;) {
>> +        int ret;
>> +
>> +        fds[0].events = POLLIN;
>> +        fds[0].revents = 0;
>> +        ret = poll(fds, ARRAY_CARDINALITY(fds), timeout);
>> +        if (ret < 0) {
>> +            if (errno == EAGAIN || errno == EINTR)
>> +                continue;
>> +            virReportError(errno, "%s",
>> +                           _("poll failed in libxlTunnel3MigrationFunc"));
>> +            goto abrt;
>> +        }
>> +
>> +        if (ret == 0) {
>> +            VIR_DEBUG("poll got timeout");
>> +            break;
>> +        }
>> +
>> +        if (fds[0].revents & (POLLIN | POLLERR | POLLHUP)) {
>> +            int nbytes;
>> +
>> +            nbytes = read(data->srcFD, buffer, TUNNEL_SEND_BUF_SIZE);
>> +            if (nbytes > 0) {
>> +                /* Write to dest stream */
>> +                if (virStreamSend(data->st, buffer, nbytes) < 0) {
>> +                    virReportError(errno, "%s",
>> +                                   _("tunnelled migration failed to send to virStream"));
>> +                    goto abrt;
>> +                }
>> +            } else if (nbytes < 0) {
>> +                virReportError(errno, "%s",
>> +                               _("tunnelled migration failed to read from xen side"));
>> +                goto abrt;
>> +            } else {
>> +                /* EOF; transferred all data */
>> +                break;
>> +            }
>> +        }
>> +    }
>> +
>> +    if (virStreamFinish(data->st) < 0)
>> +        virReportError(errno, "%s",
>> +                       _("tunnelled migration failed to finish stream"));
>> +
>> + cleanup:
>> +    VIR_FREE(buffer);
>> +
>> +    return;
>> +
>> + abrt:
> Do you mean 'abort' here? Ahh, but there aren't any 'abort' labels in the
> whole source code, so I assume you used it to be in line with the rest.
> 
>> +    virStreamAbort(data->st);
>> +    goto cleanup;
>> +}
>> +
>> +/* This function is a simplification of virDomainMigrateVersion3Full and
>> + * restricting it to migration v3 with params since it was the first to be
>> + * introduced in libxl.
>>   */
>>  static int
>>  libxlDoMigrateP2P(libxlDriverPrivatePtr driver,
>> @@ -737,6 +903,10 @@ libxlDoMigrateP2P(libxlDriverPrivatePtr driver,
>>      bool cancelled = true;
>>      virErrorPtr orig_err = NULL;
>>      int ret = -1;
>> +    /* For tunnel migration */
>> +    virStreamPtr st = NULL;
>> +    libxlTunnelMigrationThread *libxlTunnelMigationThreadPtr = NULL;
> This variable could probably be smaller, like "tnlthread". This would allow you
> to avoid both the long name, plus the typo you have in variable name.
> 
>> +    int dataFD[2] = { -1, -1 };
>>  
>>      dom_xml = libxlDomainMigrationBegin(sconn, vm, xmlin,
>>                                          &cookieout, &cookieoutlen);
>> @@ -764,29 +934,62 @@ libxlDoMigrateP2P(libxlDriverPrivatePtr driver,
>>  
>>      VIR_DEBUG("Prepare3");
>>      virObjectUnlock(vm);
>> -    ret = dconn->driver->domainMigratePrepare3Params
>> -        (dconn, params, nparams, cookieout, cookieoutlen, NULL, NULL, &uri_out, destflags);
>> +    if (flags & VIR_MIGRATE_TUNNELLED) {
>> +        if (!(st = virStreamNew(dconn, 0)))
>> +            goto cleanup;
>> +        ret = dconn->driver->domainMigratePrepareTunnel3Params
>> +            (dconn, st, params, nparams, cookieout, cookieoutlen, NULL, NULL, destflags);
>> +    } else {
>> +        ret = dconn->driver->domainMigratePrepare3Params
>> +            (dconn, params, nparams, cookieout, cookieoutlen, NULL, NULL, &uri_out, destflags);
>> +    }
>>      virObjectLock(vm);
>>  
>>      if (ret == -1)
>>          goto cleanup;
>>  
>> -    if (uri_out) {
>> -        if (virTypedParamsReplaceString(&params, &nparams,
>> -                                        VIR_MIGRATE_PARAM_URI, uri_out) < 0) {
>> -            orig_err = virSaveLastError();
>> +    if (!(flags & VIR_MIGRATE_TUNNELLED)) {
>> +        if (uri_out) {
>> +            if (virTypedParamsReplaceString(&params, &nparams,
>> +                                            VIR_MIGRATE_PARAM_URI, uri_out) < 0) {
>> +                orig_err = virSaveLastError();
>> +                goto finish;
>> +            }
>> +        } else {
>> +            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> +                           _("domainMigratePrepare3 did not set uri"));
>>              goto finish;
>>          }
>> -    } else {
>> -        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> -                       _("domainMigratePrepare3 did not set uri"));
>> -        goto finish;
>>      }
>>  
>>      VIR_DEBUG("Perform3 uri=%s", NULLSTR(uri_out));
>> -    ret = libxlDomainMigrationPerform(driver, vm, NULL, NULL,
>> -                                      uri_out, NULL, flags);
>> +    if (flags & VIR_MIGRATE_TUNNELLED) {
>> +        if (VIR_ALLOC(libxlTunnelMigationThreadPtr) < 0)
>> +            goto cleanup;
>> +        if (pipe(dataFD) < 0) {
>> +            virReportError(errno, "%s", _("Unable to make pipes"));
>> +            goto cleanup;
>> +        }
>> +        /* Read from pipe */
>> +        libxlTunnelMigationThreadPtr->srcFD = dataFD[0];
>> +        /* Write to dest stream */
>> +        libxlTunnelMigationThreadPtr->st = st;
>> +        if (virThreadCreate(&libxlTunnelMigationThreadPtr->thread, true,
>> +                            libxlTunnel3MigrationFunc,
>> +                            libxlTunnelMigationThreadPtr) < 0) {
>> +            virReportError(errno, "%s",
>> +                           _("Unable to create tunnel migration thread"));
>> +            goto cleanup;
>> +        }
> I still wonder how this chunk above inside VIR_MIGRATE_TUNNELLED and ...
> 
>>  
>> +        virObjectUnlock(vm);
>> +        /* Send data to pipe */
>> +        ret = libxlDoMigrateSend(driver, vm, flags, dataFD[1]);
>> +        virObjectLock(vm);
>> +    } else {
>> +        ret = libxlDomainMigrationPerform(driver, vm, NULL, NULL,
>> +                                          uri_out, NULL, flags);
>> +    }
>>      if (ret < 0)
>>          orig_err = virSaveLastError();
>>  
>> @@ -824,6 +1027,14 @@ libxlDoMigrateP2P(libxlDriverPrivatePtr driver,
>>                   vm->def->name);
>>  
>>   cleanup:
>> +    if (libxlTunnelMigationThreadPtr) {
>> +        virThreadCancel(&libxlTunnelMigationThreadPtr->thread);
>> +        VIR_FREE(libxlTunnelMigationThreadPtr);
>> +    }
>> +    VIR_FORCE_CLOSE(dataFD[0]);
>> +    VIR_FORCE_CLOSE(dataFD[1]);
> ... and this one could be turned into helpers like libxlMigrationStartTunnel and
> libxlMigrationStopTunnel? The latter you probably add a dstFD such that you can
> also cleanup both descriptors in StopTunnel. I sort of understand the way you
> propose is to consolidate cleanup. But it would help the reader of the P2P
> migration function (which is considerably big) to move the big portions of
> tunnel-specific codepaths into helpers, such that the migration flow is clearer.
> This is just a suggestion though, unless others feel strongly about it.
> 
> One other issue is that virThreadCancel is asynchronous and then you close the
> file descriptors after requesting the cancellation request, which sounds
> erroneous prone to me IIUC. It's not guaranteed the thread has closed but that
> after returning the request for canceling was successful. So probably you would
> need a virThreadJoin after that, such that you can actually know the thread
> really has cancelled? Then it's probably safe to close both descriptors
> afterwards. Qemu driver also seems to write a single byte signalling the Tunnel
> thread to abort/finish.
> 




More information about the libvir-list mailing list