[libvirt] [RFC PATCH] libxl: add tunnelled migration support
Bob Liu
bob.liu at oracle.com
Thu Oct 27 12:34:55 UTC 2016
On 10/26/2016 07:24 PM, Joao Martins wrote:
> On 10/26/2016 07:33 AM, Bob Liu wrote:
>> Tunnelled migration doesn't require any extra network connections beside the
>> libvirt daemon.
>> It's capable of strong encryption and is the default option in openstack-nova.
>>
>> This patch add the tunnelled migration(Tunnel3params) support to libxl.
>> The data flow in the src side is:
>> * libxlDoMigrateSend() -> pipe
>> * libxlTunnel3MigrationFunc() poll pipe out and then write to dest stream.
>>
>> While in the dest side:
>> Stream -> pipe -> 'recvfd of libxlDomainStartRestore'
>>
>> The usage is the same as p2p migration, execpt adding one more '--tunnelled' to
> ^^^^^^ except
>> the libvirt p2p migration command.
>>
>> Signed-off-by: Bob Liu <bob.liu at oracle.com>
> Nice :) Now openstack no longer needs to have tunnelled flag removed on nova to
> get migration working.
>
> See some comments below, its a first review as I would still like to test it.
>
Will take all of your suggestions.
But let's wait for more reviews before I post a updated version.
Thanks,
Bob
>> ---
>> src/libxl/libxl_driver.c | 58 ++++++++++-
>> src/libxl/libxl_migration.c | 241 +++++++++++++++++++++++++++++++++++++++++---
>> src/libxl/libxl_migration.h | 9 ++
>> 3 files changed, 292 insertions(+), 16 deletions(-)
>>
>> diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
>> index b66cb1f..a01bbff 100644
>> --- a/src/libxl/libxl_driver.c
>> +++ b/src/libxl/libxl_driver.c
>> @@ -5918,6 +5918,61 @@ libxlDomainMigrateBegin3Params(virDomainPtr domain,
>> }
>>
>> static int
>> +libxlDomainMigratePrepareTunnel3Params(virConnectPtr dconn,
>> + virStreamPtr st,
>> + virTypedParameterPtr params,
>> + int nparams,
>> + const char *cookiein,
>> + int cookieinlen,
>> + char **cookieout ATTRIBUTE_UNUSED,
>> + int *cookieoutlen ATTRIBUTE_UNUSED,
>> + unsigned int flags)
>> +{
>> + libxlDriverPrivatePtr driver = dconn->privateData;
>> + virDomainDefPtr def = NULL;
>> + const char *dom_xml = NULL;
>> + const char *dname = NULL;
>> + const char *uri_in = NULL;
>> +
>> +#ifdef LIBXL_HAVE_NO_SUSPEND_RESUME
>> + virReportUnsupportedError();
>> + return -1;
>> +#endif
>> +
>> + virCheckFlags(LIBXL_MIGRATION_FLAGS, -1);
>> + if (virTypedParamsValidate(params, nparams, LIBXL_MIGRATION_PARAMETERS) < 0)
>> + goto error;
>> +
>> + if (virTypedParamsGetString(params, nparams,
>> + VIR_MIGRATE_PARAM_DEST_XML,
>> + &dom_xml) < 0 ||
>> + virTypedParamsGetString(params, nparams,
>> + VIR_MIGRATE_PARAM_DEST_NAME,
>> + &dname) < 0 ||
>> + virTypedParamsGetString(params, nparams,
>> + VIR_MIGRATE_PARAM_URI,
>> + &uri_in) < 0)
>> +
>> + goto error;
>> +
>> + if (!(def = libxlDomainMigrationPrepareDef(driver, dom_xml, dname)))
>> + goto error;
>> +
>> + if (virDomainMigratePrepareTunnel3ParamsEnsureACL(dconn, def) < 0)
>> + goto error;
>> +
>> + if (libxlDomainMigrationPrepareTunnel3(dconn, st, &def, cookiein,
>> + cookieinlen, flags) < 0)
>> + goto error;
>> +
>> + return 0;
>> +
>> + error:
>> + virDomainDefFree(def);
>> + return -1;
>> +}
>> +
>> +static int
>> libxlDomainMigratePrepare3Params(virConnectPtr dconn,
>> virTypedParameterPtr params,
>> int nparams,
>> @@ -6017,7 +6072,7 @@ libxlDomainMigratePerform3Params(virDomainPtr dom,
>> if (virDomainMigratePerform3ParamsEnsureACL(dom->conn, vm->def) < 0)
>> goto cleanup;
>>
>> - if (flags & VIR_MIGRATE_PEER2PEER) {
>> + if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) {
>> if (libxlDomainMigrationPerformP2P(driver, vm, dom->conn, dom_xml,
>> dconnuri, uri, dname, flags) < 0)
>> goto cleanup;
>> @@ -6501,6 +6556,7 @@ static virHypervisorDriver libxlHypervisorDriver = {
>> .nodeDeviceReset = libxlNodeDeviceReset, /* 1.2.3 */
>> .domainMigrateBegin3Params = libxlDomainMigrateBegin3Params, /* 1.2.6 */
>> .domainMigratePrepare3Params = libxlDomainMigratePrepare3Params, /* 1.2.6 */
>> + .domainMigratePrepareTunnel3Params = libxlDomainMigratePrepareTunnel3Params, /* 2.3.1 */
> The version here is incorrect. It should be the next one to be tagged (after the
> ongoing freeze). Which means 2.5.0. Note that the versioning used has changed a
> bit: major number is incremented per year, minor per month and bugfix number for
> -maint releases.
>
>> .domainMigratePerform3Params = libxlDomainMigratePerform3Params, /* 1.2.6 */
>> .domainMigrateFinish3Params = libxlDomainMigrateFinish3Params, /* 1.2.6 */
>> .domainMigrateConfirm3Params = libxlDomainMigrateConfirm3Params, /* 1.2.6 */
>> diff --git a/src/libxl/libxl_migration.c b/src/libxl/libxl_migration.c
>> index 534abb8..88c9bb8 100644
>> --- a/src/libxl/libxl_migration.c
>> +++ b/src/libxl/libxl_migration.c
>> @@ -44,6 +44,7 @@
>> #include "libxl_migration.h"
>> #include "locking/domain_lock.h"
>> #include "virtypedparam.h"
>> +#include "fdstream.h"
>>
>> #define VIR_FROM_THIS VIR_FROM_LIBXL
>>
>> @@ -484,6 +485,90 @@ libxlDomainMigrationPrepareDef(libxlDriverPrivatePtr driver,
>> }
>>
>> int
>> +libxlDomainMigrationPrepareTunnel3(virConnectPtr dconn,
>> + virStreamPtr st,
>> + virDomainDefPtr *def,
>> + const char *cookiein,
>> + int cookieinlen,
>> + unsigned int flags)
>> +{
>> + libxlMigrationCookiePtr mig = NULL;
>> + libxlDriverPrivatePtr driver = dconn->privateData;
>> + virDomainObjPtr vm = NULL;
>> + libxlMigrationDstArgs *args = NULL;
>> + virThread thread;
>> + int dataFD[2] = { -1, -1 };
>> + int ret = 0;
> The general codestyle I usually see is to have this initialized to -1, and then
> set ret = 0 on success (before the goto done) mentioned further below. In other
> words, assume error and set it to 0 if all good, as you have more error paths
> than the sole success one.
>
>> +
>> + if (libxlMigrationEatCookie(cookiein, cookieinlen, &mig) < 0)
>> + goto error;
>> +
>> + if (mig->xenMigStreamVer > LIBXL_SAVE_VERSION) {
>> + virReportError(VIR_ERR_OPERATION_UNSUPPORTED,
>> + _("Xen migration stream version '%d' is not supported on this host"),
>> + mig->xenMigStreamVer);
>> + goto error;
>> + }
>> +
>> + if (!(vm = virDomainObjListAdd(driver->domains, *def,
>> + driver->xmlopt,
>> + VIR_DOMAIN_OBJ_LIST_ADD_LIVE |
>> + VIR_DOMAIN_OBJ_LIST_ADD_CHECK_LIVE,
>> + NULL)))
>> + goto error;
>> +
>> + /*
>> + * The data flow of tunnel3 migration in the dest side:
>> + * stream -> pipe -> recvfd of libxlDomainStartRestore
>> + */
>> + if (pipe(dataFD) < 0)
>> + goto error;
>> +
>> + /* Stream data will be written to pipeIn */
>> + if (virFDStreamOpen(st, dataFD[1]) < 0)
>> + goto error;
> We probably need to add this (same as qemu):
>
> dataFD[1] = -1; /* 'st' owns the FD now & will close it */
>
>> +
>> + if (libxlMigrationDstArgsInitialize() < 0)
>> + goto error;
>> +
>> + if (!(args = virObjectNew(libxlMigrationDstArgsClass)))
>> + goto error;
>> +
>> + args->conn = dconn;
>> + args->vm = vm;
>> + args->flags = flags;
>> + args->migcookie = mig;
>> + /* Receive from pipeOut */
>> + args->recvfd = dataFD[0];
>> + args->nsocks = 0;
>> + if (virThreadCreate(&thread, false, libxlDoMigrateReceive, args) < 0) {
>> + virReportError(VIR_ERR_OPERATION_FAILED, "%s",
>> + _("Failed to create thread for receiving migration data"));
>> + goto error;
>> + }
>> +
>> + goto done;
> Have ret = 0 before "goto done".
>
>> +
>> + error:
>> + VIR_FORCE_CLOSE(dataFD[1]);
>> + VIR_FORCE_CLOSE(dataFD[0]);
>> + virObjectUnref(args);
>> + /* Remove virDomainObj from domain list */
>> + if (vm) {
>> + virDomainObjListRemove(driver->domains, vm);
>> + vm = NULL;
>> + }
>> + ret = -1;
> And removing this one. Albeit the three comments about the style are just
> nitpick and could be ignored if others are ok with it.
>
>> +
>> + done:
>> + /* Nobody will close dataFD[1]? */
> virFDStreamOpen(st, dataFD[1]) will set virStream private data which stores this
> file descriptor. A later call to virStreamFinish would result in a call to
> virFDStreamClose(...) which closes the file descriptor. So unless I
> misunderstood, I think it's reasonable to remove this comment here, assuming you
> set dataFD[1] = -1 as commented above.
>
>> + if (vm)
>> + virObjectUnlock(vm);
>> +
>> + return ret;
>> +}
>> +
>> +int
>> libxlDomainMigrationPrepare(virConnectPtr dconn,
>> virDomainDefPtr *def,
>> const char *uri_in,
>> @@ -710,9 +795,90 @@ libxlDomainMigrationPrepare(virConnectPtr dconn,
>> return ret;
>> }
>>
>> -/* This function is a simplification of virDomainMigrateVersion3Full
>> - * excluding tunnel support and restricting it to migration v3
>> - * with params since it was the first to be introduced in libxl.
>> +typedef struct _libxlTunnelMigrationThread libxlTunnelMigrationThread;
>> +struct _libxlTunnelMigrationThread {
>> + virThread thread;
>> + virStreamPtr st;
>> + int srcFD;
>> +};
>> +#define TUNNEL_SEND_BUF_SIZE 65536
>> +
>> +/*
>> + * The data flow of tunnel3 migration in the src side:
>> + * libxlDoMigrateSend() -> pipe
>> + * libxlTunnel3MigrationFunc() polls pipe out and then write to dest stream.
>> + */
>> +static void libxlTunnel3MigrationFunc(void *arg)
>> +{
>> + libxlTunnelMigrationThread *data = (libxlTunnelMigrationThread *)arg;
>> + char *buffer = NULL;
>> + struct pollfd fds[1];
>> + int timeout = -1;
>> +
>> + if (VIR_ALLOC_N(buffer, TUNNEL_SEND_BUF_SIZE) < 0) {
>> + virReportError(errno, "%s", _("poll failed in migration tunnel"));
>> + return;
>> + }
>> +
>> + fds[0].fd = data->srcFD;
>> + for (;;) {
>> + int ret;
>> +
>> + fds[0].events = POLLIN;
>> + fds[0].revents = 0;
>> + ret = poll(fds, ARRAY_CARDINALITY(fds), timeout);
>> + if (ret < 0) {
>> + if (errno == EAGAIN || errno == EINTR)
>> + continue;
>> + virReportError(errno, "%s",
>> + _("poll failed in libxlTunnel3MigrationFunc"));
>> + goto abrt;
>> + }
>> +
>> + if (ret == 0) {
>> + VIR_DEBUG("poll got timeout");
>> + break;
>> + }
>> +
>> + if (fds[0].revents & (POLLIN | POLLERR | POLLHUP)) {
>> + int nbytes;
>> +
>> + nbytes = read(data->srcFD, buffer, TUNNEL_SEND_BUF_SIZE);
>> + if (nbytes > 0) {
>> + /* Write to dest stream */
>> + if (virStreamSend(data->st, buffer, nbytes) < 0) {
>> + virReportError(errno, "%s",
>> + _("tunnelled migration failed to send to virStream"));
>> + goto abrt;
>> + }
>> + } else if (nbytes < 0) {
>> + virReportError(errno, "%s",
>> + _("tunnelled migration failed to read from xen side"));
>> + goto abrt;
>> + } else {
>> + /* EOF; transferred all data */
>> + break;
>> + }
>> + }
>> + }
>> +
>> + if (virStreamFinish(data->st) < 0)
>> + virReportError(errno, "%s",
>> + _("tunnelled migration failed to finish stream"));
>> +
>> + cleanup:
>> + VIR_FREE(buffer);
>> +
>> + return;
>> +
>> + abrt:
> Do you mean 'abort' here? Ahh, but there aren't any 'abort' labels in the
> whole source code, so I assume you used it to be in line with the rest.
>
>> + virStreamAbort(data->st);
>> + goto cleanup;
>> +}
>> +
>> +/* This function is a simplification of virDomainMigrateVersion3Full and
>> + * restricting it to migration v3 with params since it was the first to be
>> + * introduced in libxl.
>> */
>> static int
>> libxlDoMigrateP2P(libxlDriverPrivatePtr driver,
>> @@ -737,6 +903,10 @@ libxlDoMigrateP2P(libxlDriverPrivatePtr driver,
>> bool cancelled = true;
>> virErrorPtr orig_err = NULL;
>> int ret = -1;
>> + /* For tunnel migration */
>> + virStreamPtr st = NULL;
>> + libxlTunnelMigrationThread *libxlTunnelMigationThreadPtr = NULL;
> This variable could probably be smaller, like "tnlthread". This would allow you
> to avoid both the long name, plus the typo you have in variable name.
>
>> + int dataFD[2] = { -1, -1 };
>>
>> dom_xml = libxlDomainMigrationBegin(sconn, vm, xmlin,
>> &cookieout, &cookieoutlen);
>> @@ -764,29 +934,62 @@ libxlDoMigrateP2P(libxlDriverPrivatePtr driver,
>>
>> VIR_DEBUG("Prepare3");
>> virObjectUnlock(vm);
>> - ret = dconn->driver->domainMigratePrepare3Params
>> - (dconn, params, nparams, cookieout, cookieoutlen, NULL, NULL, &uri_out, destflags);
>> + if (flags & VIR_MIGRATE_TUNNELLED) {
>> + if (!(st = virStreamNew(dconn, 0)))
>> + goto cleanup;
>> + ret = dconn->driver->domainMigratePrepareTunnel3Params
>> + (dconn, st, params, nparams, cookieout, cookieoutlen, NULL, NULL, destflags);
>> + } else {
>> + ret = dconn->driver->domainMigratePrepare3Params
>> + (dconn, params, nparams, cookieout, cookieoutlen, NULL, NULL, &uri_out, destflags);
>> + }
>> virObjectLock(vm);
>>
>> if (ret == -1)
>> goto cleanup;
>>
>> - if (uri_out) {
>> - if (virTypedParamsReplaceString(¶ms, &nparams,
>> - VIR_MIGRATE_PARAM_URI, uri_out) < 0) {
>> - orig_err = virSaveLastError();
>> + if (!(flags & VIR_MIGRATE_TUNNELLED)) {
>> + if (uri_out) {
>> + if (virTypedParamsReplaceString(¶ms, &nparams,
>> + VIR_MIGRATE_PARAM_URI, uri_out) < 0) {
>> + orig_err = virSaveLastError();
>> + goto finish;
>> + }
>> + } else {
>> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> + _("domainMigratePrepare3 did not set uri"));
>> goto finish;
>> }
>> - } else {
>> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> - _("domainMigratePrepare3 did not set uri"));
>> - goto finish;
>> }
>>
>> VIR_DEBUG("Perform3 uri=%s", NULLSTR(uri_out));
>> - ret = libxlDomainMigrationPerform(driver, vm, NULL, NULL,
>> - uri_out, NULL, flags);
>> + if (flags & VIR_MIGRATE_TUNNELLED) {
>> + if (VIR_ALLOC(libxlTunnelMigationThreadPtr) < 0)
>> + goto cleanup;
>> + if (pipe(dataFD) < 0) {
>> + virReportError(errno, "%s", _("Unable to make pipes"));
>> + goto cleanup;
>> + }
>> + /* Read from pipe */
>> + libxlTunnelMigationThreadPtr->srcFD = dataFD[0];
>> + /* Write to dest stream */
>> + libxlTunnelMigationThreadPtr->st = st;
>> + if (virThreadCreate(&libxlTunnelMigationThreadPtr->thread, true,
>> + libxlTunnel3MigrationFunc,
>> + libxlTunnelMigationThreadPtr) < 0) {
>> + virReportError(errno, "%s",
>> + _("Unable to create tunnel migration thread"));
>> + goto cleanup;
>> + }
> I still wonder how this chunk above inside VIR_MIGRATE_TUNNELLED and ...
>
>>
>> + virObjectUnlock(vm);
>> + /* Send data to pipe */
>> + ret = libxlDoMigrateSend(driver, vm, flags, dataFD[1]);
>> + virObjectLock(vm);
>> + } else {
>> + ret = libxlDomainMigrationPerform(driver, vm, NULL, NULL,
>> + uri_out, NULL, flags);
>> + }
>> if (ret < 0)
>> orig_err = virSaveLastError();
>>
>> @@ -824,6 +1027,14 @@ libxlDoMigrateP2P(libxlDriverPrivatePtr driver,
>> vm->def->name);
>>
>> cleanup:
>> + if (libxlTunnelMigationThreadPtr) {
>> + virThreadCancel(&libxlTunnelMigationThreadPtr->thread);
>> + VIR_FREE(libxlTunnelMigationThreadPtr);
>> + }
>> + VIR_FORCE_CLOSE(dataFD[0]);
>> + VIR_FORCE_CLOSE(dataFD[1]);
> ... and this one could be turned into helpers like libxlMigrationStartTunnel and
> libxlMigrationStopTunnel? The latter you probably add a dstFD such that you can
> also cleanup both descriptors in StopTunnel. I sort of understand the way you
> propose is to consolidate cleanup. But it would help the reader of the P2P
> migration function (which is considerably big) to move the big portions of
> tunnel-specific codepaths into helpers, such that the migration flow is clearer.
> This is just a suggestion though, unless others feel strongly about it.
>
> One other issue is that virThreadCancel is asynchronous and then you close the
> file descriptors after requesting the cancellation request, which sounds
> erroneous prone to me IIUC. It's not guaranteed the thread has closed but that
> after returning the request for canceling was successful. So probably you would
> need a virThreadJoin after that, such that you can actually know the thread
> really has cancelled? Then it's probably safe to close both descriptors
> afterwards. Qemu driver also seems to write a single byte signalling the Tunnel
> thread to abort/finish.
>
More information about the libvir-list
mailing list