[libvirt] [PATCH 4 of 4] [LXC] Add setup/cleanup of container network interfaces
Daniel Lezcano
dlezcano at fr.ibm.com
Wed Jun 25 20:56:04 UTC 2008
Dan Smith wrote:
> Changes:
> - Remove extraneous "i" variables from various functions
> - Only bring up lo if we have other interfaces (and thus NETNS)
> - Fail setup of interfaces if NETNS support is not present
> - Only add CLONE_NEWNET to start flags if domain has interfaces defined
> - Make lxc_vm_t parameters const in helper functions (where appropriate)
> - Cleanup DomainStart procedure to bail on start/move/continue errors
>
> diff -r bb48967cf19e -r 88267b7327be src/lxc_conf.h
> --- a/src/lxc_conf.h Mon Jun 23 11:53:45 2008 -0700
> +++ b/src/lxc_conf.h Mon Jun 23 11:53:45 2008 -0700
> @@ -35,6 +35,12 @@
> #define LXC_MAX_XML_LENGTH 16384
> #define LXC_MAX_ERROR_LEN 1024
> #define LXC_DOMAIN_TYPE "lxc"
> +#define LXC_PARENT_SOCKET 0
> +#define LXC_CONTAINER_SOCKET 1
> +
> +/* messages between parent and container */
> +typedef char lxc_message_t;
> +#define LXC_CONTINUE_MSG 'c'
>
> /* types of networks for containers */
> enum lxc_net_type {
> @@ -96,6 +102,8 @@
> int parentTty;
> int containerTtyFd;
> char *containerTty;
> +
> + int sockpair[2];
>
> lxc_vm_def_t *def;
>
> diff -r bb48967cf19e -r 88267b7327be src/lxc_container.c
> --- a/src/lxc_container.c Mon Jun 23 11:53:45 2008 -0700
> +++ b/src/lxc_container.c Mon Jun 23 11:53:45 2008 -0700
> @@ -36,6 +36,7 @@
> #include "lxc_conf.h"
> #include "util.h"
> #include "memory.h"
> +#include "veth.h"
>
> #define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__)
> #define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg)
> @@ -159,6 +160,72 @@
> }
>
> /**
> + * lxcWaitForContinue:
> + * @vm: Pointer to vm structure
> + *
> + * This function will wait for the container continue message from the
> + * parent process. It will send this message on the socket pair stored in
> + * the vm structure once it has completed the post clone container setup.
> + *
> + * Returns 0 on success or -1 in case of error
> + */
> +static int lxcWaitForContinue(lxc_vm_t *vm)
> +{
> + int rc = -1;
> + lxc_message_t msg;
> + int readLen;
> +
> + readLen = saferead(vm->sockpair[LXC_CONTAINER_SOCKET], &msg, sizeof(msg));
> + if (readLen != sizeof(msg)) {
> + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
> + _("Failed to read the container continue message: %s"),
> + strerror(errno));
> + goto error_out;
> + }
> +
> + DEBUG0("Received container continue message");
> +
> + close(vm->sockpair[LXC_PARENT_SOCKET]);
> + vm->sockpair[LXC_PARENT_SOCKET] = -1;
> + close(vm->sockpair[LXC_CONTAINER_SOCKET]);
> + vm->sockpair[LXC_CONTAINER_SOCKET] = -1;
> +
> + rc = 0;
> +
> +error_out:
> + return rc;
> +}
> +
> +/**
> + * lxcEnableInterfaces:
> + * @vm: Pointer to vm structure
> + *
> + * This function will enable the interfaces for this container.
> + *
> + * Returns 0 on success or nonzero in case of error
> + */
> +static int lxcEnableInterfaces(const lxc_vm_t *vm)
> +{
> + int rc = 0;
> + const lxc_net_def_t *net;
> +
> + for (net = vm->def->nets; net; net = net->next) {
> + DEBUG("Enabling %s", net->containerVeth);
> + rc = vethInterfaceUpOrDown(net->containerVeth, 1);
> + if (0 != rc) {
> + goto error_out;
> + }
> + }
> +
> + /* enable lo device only if there were other net devices */
> + if (vm->def->nets)
> + rc = vethInterfaceUpOrDown("lo", 1);
> +
> +error_out:
> + return rc;
> +}
> +
> +/**
> * lxcChild:
> * @argv: Pointer to container arguments
> *
> @@ -210,6 +277,16 @@
> goto cleanup;
> }
>
> + /* Wait for interface devices to show up */
> + if (0 != (rc = lxcWaitForContinue(vm))) {
> + goto cleanup;
> + }
> +
> + /* enable interfaces */
> + if (0 != (rc = lxcEnableInterfaces(vm))) {
> + goto cleanup;
> + }
> +
> rc = lxcExecWithTty(vm);
> /* this function will only return if an error occured */
>
> diff -r bb48967cf19e -r 88267b7327be src/lxc_driver.c
> --- a/src/lxc_driver.c Mon Jun 23 11:53:45 2008 -0700
> +++ b/src/lxc_driver.c Mon Jun 23 11:53:45 2008 -0700
> @@ -44,6 +44,9 @@
> #include "memory.h"
> #include "util.h"
> #include "memory.h"
> +#include "bridge.h"
> +#include "qemu_conf.h"
> +#include "veth.h"
>
> /* debug macros */
> #define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__)
> @@ -395,6 +398,202 @@
> }
>
> /**
> + * lxcSetupInterfaces:
> + * @conn: pointer to connection
> + * @vm: pointer to virtual machine structure
> + *
> + * Sets up the container interfaces by creating the veth device pairs and
> + * attaching the parent end to the appropriate bridge. The container end
> + * will moved into the container namespace later after clone has been called.
> + *
> + * Returns 0 on success or -1 in case of error
> + */
> +static int lxcSetupInterfaces(virConnectPtr conn,
> + lxc_vm_t *vm)
> +{
> + int rc = -1;
> + lxc_driver_t *driver = conn->privateData;
> + struct qemud_driver *networkDriver =
> + (struct qemud_driver *)(conn->networkPrivateData);
> + lxc_net_def_t *net = vm->def->nets;
> + char* bridge;
> + char parentVeth[PATH_MAX] = "";
> + char containerVeth[PATH_MAX] = "";
> +
> + if ((vm->def->nets != NULL) && (driver->have_netns == 0)) {
> + lxcError(conn, NULL, VIR_ERR_NO_SUPPORT,
> + _("System lacks NETNS support"));
> + return -1;
> + }
> +
> + for (net = vm->def->nets; net; net = net->next) {
> + if (LXC_NET_NETWORK == net->type) {
> + virNetworkPtr network = virNetworkLookupByName(conn, net->txName);
> + if (!network) {
> + goto error_exit;
> + }
> +
> + bridge = virNetworkGetBridgeName(network);
> +
> + virNetworkFree(network);
> +
> + } else {
> + bridge = net->txName;
> + }
> +
> + DEBUG("bridge: %s", bridge);
> + if (NULL == bridge) {
> + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
> + _("failed to get bridge for interface"));
> + goto error_exit;
> + }
> +
> + DEBUG0("calling vethCreate()");
> + if (NULL != net->parentVeth) {
> + strcpy(parentVeth, net->parentVeth);
> + }
> + if (NULL != net->containerVeth) {
> + strcpy(containerVeth, net->containerVeth);
> + }
> + DEBUG("parentVeth: %s, containerVeth: %s", parentVeth, containerVeth);
> + if (0 != (rc = vethCreate(parentVeth, PATH_MAX, containerVeth, PATH_MAX))) {
> + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
> + _("failed to create veth device pair: %d"), rc);
> + goto error_exit;
> + }
> + if (NULL == net->parentVeth) {
> + net->parentVeth = strdup(parentVeth);
> + }
> + if (NULL == net->containerVeth) {
> + net->containerVeth = strdup(containerVeth);
> + }
> +
> + if ((NULL == net->parentVeth) || (NULL == net->containerVeth)) {
> + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
> + _("failed to allocate veth names"));
> + goto error_exit;
> + }
> +
> + if (!(networkDriver->brctl) && (rc = brInit(&(networkDriver->brctl)))) {
> + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
> + _("cannot initialize bridge support: %s"),
> + strerror(rc));
> + goto error_exit;
> + }
> +
> + if (0 != (rc = brAddInterface(networkDriver->brctl, bridge, parentVeth))) {
> + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
> + _("failed to add %s device to %s: %s"),
> + parentVeth,
> + bridge,
> + strerror(rc));
> + goto error_exit;
> + }
> +
> + if (0 != (rc = vethInterfaceUpOrDown(parentVeth, 1))) {
> + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
> + _("failed to enable parent ns veth device: %d"), rc);
> + goto error_exit;
> + }
> +
> + }
> +
> + rc = 0;
> +
> +error_exit:
> + return rc;
> +}
> +
> +/**
> + * lxcMoveInterfacesToNetNs:
> + * @conn: pointer to connection
> + * @vm: pointer to virtual machine structure
> + *
> + * Starts a container process by calling clone() with the namespace flags
> + *
> + * Returns 0 on success or -1 in case of error
> + */
> +static int lxcMoveInterfacesToNetNs(virConnectPtr conn,
> + const lxc_vm_t *vm)
> +{
> + int rc = -1;
> + lxc_net_def_t *net;
> +
> + for (net = vm->def->nets; net; net = net->next) {
> + if (0 != moveInterfaceToNetNs(net->containerVeth, vm->def->id)) {
> + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
> + _("failed to move interface %s to ns %d"),
> + net->containerVeth, vm->def->id);
> + goto error_exit;
> + }
> + }
> +
> + rc = 0;
> +
> +error_exit:
> + return rc;
> +}
> +
> +/**
> + * lxcCleanupInterfaces:
> + * @conn: pointer to connection
> + * @vm: pointer to virtual machine structure
> + *
> + * Cleans up the container interfaces by deleting the veth device pairs.
> + *
> + * Returns 0 on success or -1 in case of error
> + */
> +static int lxcCleanupInterfaces(const lxc_vm_t *vm)
> +{
> + int rc = -1;
> + lxc_net_def_t *net;
> +
> + for (net = vm->def->nets; net; net = net->next) {
> + if (0 != (rc = vethDelete(net->parentVeth))) {
> + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
> + _("failed to delete veth: %s"), net->parentVeth);
> + /* will continue to try to cleanup any other interfaces */
> + }
> + }
> +
> + return 0;
> +}
> +
> +/**
> + * lxcSendContainerContinue:
> + * @vm: pointer to virtual machine structure
> + *
> + * Sends the continue message via the socket pair stored in the vm
> + * structure.
> + *
> + * Returns 0 on success or -1 in case of error
> + */
> +static int lxcSendContainerContinue(const lxc_vm_t *vm)
> +{
> + int rc = -1;
> + lxc_message_t msg = LXC_CONTINUE_MSG;
> + int writeCount = 0;
> +
> + if (NULL == vm) {
> + goto error_out;
> + }
> +
> + writeCount = safewrite(vm->sockpair[LXC_PARENT_SOCKET], &msg,
> + sizeof(msg));
> + if (writeCount != sizeof(msg)) {
> + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
> + _("unable to send container continue message: %s"),
> + strerror(errno));
> + goto error_out;
> + }
> +
> + rc = 0;
> +
> +error_out:
> + return rc;
> +}
> +
> +/**
> * lxcStartContainer:
> * @conn: pointer to connection
> * @driver: pointer to driver structure
> @@ -422,6 +621,9 @@
> stacktop = stack + stacksize;
>
> flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD;
> +
> + if (vm->def->nets != NULL)
> + flags |= CLONE_NEWNET;
>
> vm->def->id = clone(lxcChild, stacktop, flags, (void *)vm);
>
> @@ -819,15 +1021,42 @@
> close(vm->parentTty);
> close(vm->containerTtyFd);
>
> - rc = lxcStartContainer(conn, driver, vm);
> -
> - if (rc == 0) {
> - vm->state = VIR_DOMAIN_RUNNING;
> - driver->ninactivevms--;
> - driver->nactivevms++;
> + if (0 != (rc = lxcSetupInterfaces(conn, vm))) {
> + goto cleanup;
> }
>
> + /* create a socket pair to send continue message to the container once */
> + /* we've completed the post clone configuration */
> + if (0 != socketpair(PF_UNIX, SOCK_STREAM, 0, vm->sockpair)) {
> + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
> + _("sockpair failed: %s"), strerror(errno));
> + goto cleanup;
> + }
> +
> + /* check this rc */
> +
> + rc = lxcStartContainer(conn, driver, vm);
> + if (rc != 0)
> + goto cleanup;
> +
> + rc = lxcMoveInterfacesToNetNs(conn, vm);
> + if (rc != 0)
> + goto cleanup;
> +
> + rc = lxcSendContainerContinue(vm);
> + if (rc != 0)
> + goto cleanup;
> +
> + vm->state = VIR_DOMAIN_RUNNING;
> + driver->ninactivevms--;
> + driver->nactivevms++;
> +
> cleanup:
> + close(vm->sockpair[LXC_PARENT_SOCKET]);
> + vm->sockpair[LXC_PARENT_SOCKET] = -1;
> + close(vm->sockpair[LXC_CONTAINER_SOCKET]);
> + vm->sockpair[LXC_CONTAINER_SOCKET] = -1;
> +
> return rc;
> }
>
> @@ -957,6 +1186,9 @@
> int rc = -1;
> int waitRc;
> int childStatus = -1;
> +
> + /* if this fails, we'll continue. it will report any errors */
> + lxcCleanupInterfaces(vm);
Is it called when the last process of the container dies ?
> while (((waitRc = waitpid(vm->def->id, &childStatus, 0)) == -1) &&
> errno == EINTR);
More information about the libvir-list
mailing list