[libvirt] PATCH: 2/5: Re-arrange methods acros LXC source files

Daniel P. Berrange berrange at redhat.com
Tue Jul 15 14:12:55 UTC 2008


The lxc_driver.c file contains quite a large amount of code,
serving two reasonably well separated purposes. First there
is the direct implemntation of each of the libvirt driver
APIs. Second there is the code to spawn a container and a
controller for forwarding I/O to/from the PTYs. This patch
attempts to re-arrange the code across files to better reflect
the split in functionality. The general idea is thus:

 - lxc_driver.c: implementation of the libvirt driver APIs
 - lxc_container.c: code for creating containers
 - lxc_controller.c: code for managing an active container

So this entails the following re-arrangement:

 - All calls to clone() move into  lxc_container.c. In
   particular there is a lxcContainerAvailable() method
   to querying capabilities of current kernel, and the
   lxcContainerStart() method for actually starting a
   new container.

 - The I/O forwarding code moves into lxc_controller.c
   with a lxcControllerMain() function containing the
   epoll() event loop.

The container code previously would pass the 'init' string path
for the container's init program to /bin/sh for execution.
This is bad because it assumes /bin/sh exists in the container's
root, and more seriously does no escaping, so allows arbitrary
shell code to run. So this also switches to passing the 'init'
string directly to execve(). If we want to support passing args
to the container startup program, we need explicit representation
of the args in the XML, so we can safely pass them to execve()
via the argv[]  array.

Aside from that, this patch should have no functional change to
the way containers run.

 b/src/lxc_controller.c |  205 +++++++++++++++++++++++++++++
 b/src/lxc_controller.h |   33 ++++
 src/Makefile.am        |    1 
 src/lxc_container.c    |  219 +++++++++++++++++++++++--------
 src/lxc_container.h    |   19 +-
 src/lxc_driver.c       |  345 ++-----------------------------------------------
 6 files changed, 433 insertions(+), 389 deletions(-)


Daniel


diff -r 985e6f9b7d78 src/Makefile.am
--- a/src/Makefile.am	Mon Jul 14 17:18:28 2008 +0100
+++ b/src/Makefile.am	Mon Jul 14 17:18:49 2008 +0100
@@ -65,6 +65,7 @@
 		openvz_conf.c openvz_conf.h			\
 		openvz_driver.c openvz_driver.h			\
 		lxc_driver.c lxc_driver.h			\
+		lxc_controller.c lxc_controller.h		\
 		lxc_conf.c lxc_conf.h				\
 		lxc_container.c lxc_container.h			\
 		veth.c veth.h			\
diff -r 985e6f9b7d78 src/lxc_container.c
--- a/src/lxc_container.c	Mon Jul 14 17:18:28 2008 +0100
+++ b/src/lxc_container.c	Mon Jul 14 17:18:49 2008 +0100
@@ -30,6 +30,7 @@
 #include <stdlib.h>
 #include <sys/ioctl.h>
 #include <sys/mount.h>
+#include <sys/wait.h>
 #include <unistd.h>
 
 #include "lxc_container.h"
@@ -40,49 +41,69 @@
 #define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__)
 #define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg)
 
+/*
+ * GLibc headers are behind the kernel, so we define these
+ * constants if they're not present already.
+ */
+
+#ifndef CLONE_NEWPID
+#define CLONE_NEWPID  0x20000000
+#endif
+#ifndef CLONE_NEWUTS
+#define CLONE_NEWUTS  0x04000000
+#endif
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER 0x10000000
+#endif
+#ifndef CLONE_NEWIPC
+#define CLONE_NEWIPC  0x08000000
+#endif
+#ifndef CLONE_NEWNET
+#define CLONE_NEWNET  0x40000000 /* New network namespace */
+#endif
+
+/* messages between parent and container */
+typedef char lxc_message_t;
+#define LXC_CONTINUE_MSG 'c'
+
+typedef struct __lxc_child_argv lxc_child_argv_t;
+struct __lxc_child_argv {
+    lxc_vm_def_t *config;
+    int monitor;
+    char *ttyPath;
+};
+
+
 /**
- * lxcExecContainerInit:
+ * lxcContainerExecInit:
  * @vmDef: Ptr to vm definition structure
  *
- * Exec the container init string.  The container init will replace then
+ * Exec the container init string. The container init will replace then
  * be running in the current process
  *
- * Returns 0 on success or -1 in case of error
+ * Does not return
  */
-static int lxcExecContainerInit(const lxc_vm_def_t *vmDef)
+static int lxcContainerExecInit(const lxc_vm_def_t *vmDef)
 {
-    int rc = -1;
-    char* execString;
-    size_t execStringLen = strlen(vmDef->init) + 1 + 5;
+    const char *const argv[] = {
+        vmDef->init,
+        NULL,
+    };
 
-    if (VIR_ALLOC_N(execString, execStringLen) < 0) {
-        lxcError(NULL, NULL, VIR_ERR_NO_MEMORY,
-                 _("failed to calloc memory for init string: %s"),
-                 strerror(errno));
-        goto error_out;
-    }
-
-    strcpy(execString, "exec ");
-    strcat(execString, vmDef->init);
-
-    execl("/bin/sh", "sh", "-c", execString, (char*)NULL);
-    lxcError(NULL, NULL, VIR_ERR_NO_MEMORY,
-             _("execl failed to exec init: %s"), strerror(errno));
-
-error_out:
-    exit(rc);
+    return execve(argv[0], (char **)argv, NULL);
 }
 
 /**
- * lxcSetContainerStdio:
- * @ttyName: Name of tty to set as the container console
+ * lxcContainerSetStdio:
+ * @control: the conrol FD
+ * @ttyPath: Name of tty to set as the container console
  *
  * Sets the given tty as the primary conosole for the container as well as
  * stdout, stdin and stderr.
  *
  * Returns 0 on success or -1 in case of error
  */
-static int lxcSetContainerStdio(const char *ttyPath)
+static int lxcContainerSetStdio(int control, const char *ttyPath)
 {
     int rc = -1;
     int ttyfd;
@@ -111,7 +132,7 @@
      * close all FDs before executing the container */
     open_max = sysconf (_SC_OPEN_MAX);
     for (i = 0; i < open_max; i++)
-        if (i != ttyfd)
+        if (i != ttyfd && i != control)
             close(i);
 
     if (dup2(ttyfd, 0) < 0) {
@@ -142,30 +163,38 @@
 }
 
 /**
- * lxcExecWithTty:
- * @vm: Ptr to vm structure
+ * lxcContainerSendContinue:
+ * @monitor: control FD to child
  *
- * Sets container console and stdio and then execs container init
+ * Sends the continue message via the socket pair stored in the vm
+ * structure.
  *
  * Returns 0 on success or -1 in case of error
  */
-static int lxcExecWithTty(lxc_vm_def_t *vmDef, char *ttyPath)
+int lxcContainerSendContinue(virConnectPtr conn,
+                             int control)
 {
     int rc = -1;
+    lxc_message_t msg = LXC_CONTINUE_MSG;
+    int writeCount = 0;
 
-    if(lxcSetContainerStdio(ttyPath) < 0) {
-        goto exit_with_error;
+    writeCount = safewrite(control, &msg, sizeof(msg));
+    if (writeCount != sizeof(msg)) {
+        lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("unable to send container continue message: %s"),
+                 strerror(errno));
+        goto error_out;
     }
 
-    lxcExecContainerInit(vmDef);
+    rc = 0;
 
-exit_with_error:
-    exit(rc);
+error_out:
+    return rc;
 }
 
 /**
- * lxcWaitForContinue:
- * @monitor: monitor FD from parent
+ * lxcContainerWaitForContinue:
+ * @control: control FD from parent
  *
  * This function will wait for the container continue message from the
  * parent process.  It will send this message on the socket pair stored in
@@ -173,12 +202,12 @@
  *
  * Returns 0 on success or -1 in case of error
  */
-static int lxcWaitForContinue(int monitor)
+static int lxcContainerWaitForContinue(int control)
 {
     lxc_message_t msg;
     int readLen;
 
-    readLen = saferead(monitor, &msg, sizeof(msg));
+    readLen = saferead(control, &msg, sizeof(msg));
     if (readLen != sizeof(msg) ||
         msg != LXC_CONTINUE_MSG) {
         lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
@@ -186,6 +215,7 @@
                  strerror(errno));
         return -1;
     }
+    close(control);
 
     DEBUG0("Received container continue message");
 
@@ -200,7 +230,7 @@
  *
  * Returns 0 on success or nonzero in case of error
  */
-static int lxcEnableInterfaces(const lxc_vm_def_t *def)
+static int lxcContainerEnableInterfaces(const lxc_vm_def_t *def)
 {
     int rc = 0;
     const lxc_net_def_t *net;
@@ -233,7 +263,7 @@
  *
  * Returns 0 on success or -1 in case of error
  */
-int lxcChild( void *data )
+static int lxcContainerChild( void *data )
 {
     int rc = -1;
     lxc_child_argv_t *argv = data;
@@ -244,7 +274,7 @@
     if (NULL == vmDef) {
         lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
                  _("lxcChild() passed invalid vm definition"));
-        goto cleanup;
+        return -1;
     }
 
     /* handle the bind mounts first before doing anything else that may */
@@ -260,7 +290,7 @@
             lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
                      _("failed to mount %s at %s for container: %s"),
                      curMount->source, curMount->target, strerror(errno));
-            goto cleanup;
+            return -1;
         }
     }
 
@@ -270,24 +300,105 @@
         lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
                  _("failed to mount /proc for container: %s"),
                  strerror(errno));
-        goto cleanup;
+        return -1;
     }
 
+    if (lxcContainerSetStdio(argv->monitor, argv->ttyPath) < 0)
+        return -1;
+
     /* Wait for interface devices to show up */
-    if (0 != (rc = lxcWaitForContinue(argv->monitor))) {
-        goto cleanup;
+    if (lxcContainerWaitForContinue(argv->monitor) < 0)
+        return -1;
+
+    /* enable interfaces */
+    if (lxcContainerEnableInterfaces(vmDef) < 0)
+        return -1;
+
+    /* this function will only return if an error occured */
+    return lxcContainerExecInit(vmDef);
+}
+
+/**
+ * lxcContainerStart:
+ * @conn: pointer to connection
+ * @driver: pointer to driver structure
+ * @vm: pointer to virtual machine structure
+ *
+ * Starts a container process by calling clone() with the namespace flags
+ *
+ * Returns PID of container on success or -1 in case of error
+ */
+int lxcContainerStart(virConnectPtr conn,
+                      lxc_vm_def_t *def,
+                      int control,
+                      char *ttyPath)
+{
+    pid_t pid;
+    int flags;
+    int stacksize = getpagesize() * 4;
+    char *stack, *stacktop;
+    lxc_child_argv_t args = { def, control, ttyPath };
+
+    /* allocate a stack for the container */
+    if (VIR_ALLOC_N(stack, stacksize) < 0) {
+        lxcError(conn, NULL, VIR_ERR_NO_MEMORY,
+                 _("unable to allocate container stack"));
+        return -1;
+    }
+    stacktop = stack + stacksize;
+
+    flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD;
+
+    if (def->nets != NULL)
+        flags |= CLONE_NEWNET;
+
+    pid = clone(lxcContainerChild, stacktop, flags, &args);
+    VIR_FREE(stack);
+    DEBUG("clone() returned, %d", pid);
+
+    if (pid < 0) {
+        lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("clone() failed, %s"), strerror(errno));
+        return -1;
     }
 
-    /* enable interfaces */
-    if (0 != (rc = lxcEnableInterfaces(vmDef))) {
-        goto cleanup;
+    return pid;
+}
+
+static int lxcContainerDummyChild(void *argv ATTRIBUTE_UNUSED)
+{
+    _exit(0);
+}
+
+int lxcContainerAvailable(int features)
+{
+    int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|
+        CLONE_NEWIPC|SIGCHLD;
+    int cpid;
+    char *childStack;
+    char *stack;
+    int childStatus;
+
+    if (features & LXC_CONTAINER_FEATURE_NET)
+        flags |= CLONE_NEWNET;
+
+    if (VIR_ALLOC_N(stack, getpagesize() * 4) < 0) {
+        DEBUG0("Unable to allocate stack");
+        return -1;
     }
 
-    rc = lxcExecWithTty(vmDef, argv->ttyPath);
-    /* this function will only return if an error occured */
+    childStack = stack + (getpagesize() * 4);
 
-cleanup:
-    return rc;
+    cpid = clone(lxcContainerDummyChild, childStack, flags, NULL);
+    VIR_FREE(stack);
+    if ((0 > cpid) && (EINVAL == errno)) {
+        DEBUG0("clone call returned EINVAL, container support is not enabled");
+        return -1;
+    } else {
+        waitpid(cpid, &childStatus, 0);
+    }
+
+    return 0;
 }
 
 #endif /* WITH_LXC */
diff -r 985e6f9b7d78 src/lxc_container.h
--- a/src/lxc_container.h	Mon Jul 14 17:18:28 2008 +0100
+++ b/src/lxc_container.h	Mon Jul 14 17:18:49 2008 +0100
@@ -28,20 +28,19 @@
 
 #ifdef WITH_LXC
 
-typedef struct __lxc_child_argv lxc_child_argv_t;
-struct __lxc_child_argv {
-    lxc_vm_def_t *config;
-    int monitor;
-    char *ttyPath;
+enum {
+    LXC_CONTAINER_FEATURE_NET = (1 << 0),
 };
 
-/* messages between parent and container */
-typedef char lxc_message_t;
-#define LXC_CONTINUE_MSG 'c'
+int lxcContainerSendContinue(virConnectPtr conn,
+                             int control);
 
+int lxcContainerStart(virConnectPtr conn,
+                      lxc_vm_def_t *def,
+                      int control,
+                      char *ttyPath);
 
-/* Function declarations */
-int lxcChild( void *argv );
+int lxcContainerAvailable(int features);
 
 #endif /* LXC_DRIVER_H */
 
diff -r 985e6f9b7d78 src/lxc_controller.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lxc_controller.c	Mon Jul 14 17:18:49 2008 +0100
@@ -0,0 +1,205 @@
+/*
+ * Copyright IBM Corp. 2008
+ *
+ * lxc_controller.c: linux container process controller
+ *
+ * Authors:
+ *  David L. Leskovec <dlesko at linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <config.h>
+
+#ifdef WITH_LXC
+
+#include <sys/epoll.h>
+#include <unistd.h>
+
+#include "internal.h"
+#include "util.h"
+
+#include "lxc_conf.h"
+#include "lxc_controller.h"
+
+
+#define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__)
+
+/**
+ * lxcFdForward:
+ * @readFd: file descriptor to read
+ * @writeFd: file desriptor to write
+ *
+ * Reads 1 byte of data from readFd and writes to writeFd.
+ *
+ * Returns 0 on success, EAGAIN if returned on read, or -1 in case of error
+ */
+static int lxcFdForward(int readFd, int writeFd)
+{
+    int rc = -1;
+    char buf[2];
+
+    if (1 != (saferead(readFd, buf, 1))) {
+        if (EAGAIN == errno) {
+            rc = EAGAIN;
+            goto cleanup;
+        }
+
+        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("read of fd %d failed: %s"), readFd, strerror(errno));
+        goto cleanup;
+    }
+
+    if (1 != (safewrite(writeFd, buf, 1))) {
+        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("write to fd %d failed: %s"), writeFd, strerror(errno));
+        goto cleanup;
+    }
+
+    rc = 0;
+
+cleanup:
+    return rc;
+}
+
+typedef struct _lxcTtyForwardFd_t {
+    int fd;
+    int active;
+} lxcTtyForwardFd_t;
+
+/**
+ * lxcTtyForward:
+ * @appPty: Open fd for application facing Pty
+ * @contPty: Open fd for container facing Pty
+ *
+ * Forwards traffic between fds.  Data read from appPty will be written to contPty
+ * This process loops forever.
+ * This uses epoll in edge triggered mode to avoid a hard loop on POLLHUP
+ * events when the user disconnects the virsh console via ctrl-]
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+int lxcControllerMain(int appPty, int contPty)
+{
+    int rc = -1;
+    int epollFd;
+    struct epoll_event epollEvent;
+    int numEvents;
+    int numActive = 0;
+    lxcTtyForwardFd_t fdArray[2];
+    int timeout = -1;
+    int curFdOff = 0;
+    int writeFdOff = 0;
+
+    fdArray[0].fd = appPty;
+    fdArray[0].active = 0;
+    fdArray[1].fd = contPty;
+    fdArray[1].active = 0;
+
+    /* create the epoll fild descriptor */
+    epollFd = epoll_create(2);
+    if (0 > epollFd) {
+        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("epoll_create(2) failed: %s"), strerror(errno));
+        goto cleanup;
+    }
+
+    /* add the file descriptors the epoll fd */
+    memset(&epollEvent, 0x00, sizeof(epollEvent));
+    epollEvent.events = EPOLLIN|EPOLLET;    /* edge triggered */
+    epollEvent.data.fd = appPty;
+    epollEvent.data.u32 = 0;                /* fdArray position */
+    if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, appPty, &epollEvent)) {
+        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("epoll_ctl(appPty) failed: %s"), strerror(errno));
+        goto cleanup;
+    }
+    epollEvent.data.fd = contPty;
+    epollEvent.data.u32 = 1;                /* fdArray position */
+    if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, contPty, &epollEvent)) {
+        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("epoll_ctl(contPty) failed: %s"), strerror(errno));
+        goto cleanup;
+    }
+
+    while (1) {
+        /* if active fd's, return if no events, else wait forever */
+        timeout = (numActive > 0) ? 0 : -1;
+        numEvents = epoll_wait(epollFd, &epollEvent, 1, timeout);
+        if (0 < numEvents) {
+            if (epollEvent.events & EPOLLIN) {
+                curFdOff = epollEvent.data.u32;
+                if (!fdArray[curFdOff].active) {
+                    fdArray[curFdOff].active = 1;
+                    ++numActive;
+                }
+
+            } else if (epollEvent.events & EPOLLHUP) {
+                DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd);
+                continue;
+            } else {
+                lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                         _("error event %d"), epollEvent.events);
+                goto cleanup;
+            }
+
+        } else if (0 == numEvents) {
+            if (2 == numActive) {
+                /* both fds active, toggle between the two */
+                curFdOff ^= 1;
+            } else {
+                /* only one active, if current is active, use it, else it */
+                /* must be the other one (ie. curFd just went inactive) */
+                curFdOff = fdArray[curFdOff].active ? curFdOff : curFdOff ^ 1;
+            }
+
+        } else  {
+            if (EINTR == errno) {
+                continue;
+            }
+
+            /* error */
+            lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                     _("epoll_wait() failed: %s"), strerror(errno));
+            goto cleanup;
+
+        }
+
+        if (0 < numActive) {
+            writeFdOff = curFdOff ^ 1;
+            rc = lxcFdForward(fdArray[curFdOff].fd, fdArray[writeFdOff].fd);
+
+            if (EAGAIN == rc) {
+                /* this fd no longer has data, set it as inactive */
+                --numActive;
+                fdArray[curFdOff].active = 0;
+            } else if (-1 == rc) {
+                goto cleanup;
+            }
+
+        }
+
+    }
+
+    rc = 0;
+
+cleanup:
+    close(appPty);
+    close(contPty);
+    close(epollFd);
+    return rc;
+}
+
+#endif
diff -r 985e6f9b7d78 src/lxc_controller.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lxc_controller.h	Mon Jul 14 17:18:49 2008 +0100
@@ -0,0 +1,33 @@
+/*
+ * Copyright IBM Corp. 2008
+ *
+ * lxc_controller.h: linux container process controller
+ *
+ * Authors:
+ *  David L. Leskovec <dlesko at linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef LXC_CONTROLLER_H
+#define LXC_CONTROLLER_H
+
+#ifdef WITH_LXC
+
+int lxcControllerMain(int appPty, int contPty);
+
+#endif /* WITH_LXC */
+
+#endif /* LXC_CONTROLLER_H */
diff -r 985e6f9b7d78 src/lxc_driver.c
--- a/src/lxc_driver.c	Mon Jul 14 17:18:28 2008 +0100
+++ b/src/lxc_driver.c	Mon Jul 14 17:18:49 2008 +0100
@@ -26,7 +26,6 @@
 #ifdef WITH_LXC
 
 #include <fcntl.h>
-#include <sys/epoll.h>
 #include <sched.h>
 #include <sys/utsname.h>
 #include <stdbool.h>
@@ -39,6 +38,7 @@
 #include "lxc_conf.h"
 #include "lxc_container.h"
 #include "lxc_driver.h"
+#include "lxc_controller.h"
 #include "driver.h"
 #include "internal.h"
 #include "memory.h"
@@ -52,77 +52,19 @@
 #define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__)
 #define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg)
 
-/*
- * GLibc headers are behind the kernel, so we define these
- * constants if they're not present already.
- */
-
-#ifndef CLONE_NEWPID
-#define CLONE_NEWPID  0x20000000
-#endif
-#ifndef CLONE_NEWUTS
-#define CLONE_NEWUTS  0x04000000
-#endif
-#ifndef CLONE_NEWUSER
-#define CLONE_NEWUSER 0x10000000
-#endif
-#ifndef CLONE_NEWIPC
-#define CLONE_NEWIPC  0x08000000
-#endif
-#ifndef CLONE_NEWNET
-#define CLONE_NEWNET  0x40000000 /* New network namespace */
-#endif
 
 static int lxcStartup(void);
 static int lxcShutdown(void);
 static lxc_driver_t *lxc_driver = NULL;
 
 /* Functions */
-static int lxcDummyChild( void *argv ATTRIBUTE_UNUSED )
-{
-    exit(0);
-}
-
-static int lxcCheckContainerSupport(int extra_flags)
-{
-    int rc = 0;
-    int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|
-        CLONE_NEWIPC|SIGCHLD|extra_flags;
-    int cpid;
-    char *childStack;
-    char *stack;
-    int childStatus;
-
-    if (VIR_ALLOC_N(stack, getpagesize() * 4) < 0) {
-        DEBUG0("Unable to allocate stack");
-        rc = -1;
-        goto check_complete;
-    }
-
-    childStack = stack + (getpagesize() * 4);
-
-    cpid = clone(lxcDummyChild, childStack, flags, NULL);
-    if ((0 > cpid) && (EINVAL == errno)) {
-        DEBUG0("clone call returned EINVAL, container support is not enabled");
-        rc = -1;
-    } else {
-        waitpid(cpid, &childStatus, 0);
-    }
-
-    VIR_FREE(stack);
-
-check_complete:
-    return rc;
-}
 
 static const char *lxcProbe(void)
 {
-#ifdef __linux__
-    if (0 == lxcCheckContainerSupport(0)) {
-        return("lxc:///");
-    }
-#endif
-    return(NULL);
+    if (lxcContainerAvailable(0) < 0)
+        return NULL;
+
+    return("lxc:///");
 }
 
 static virDrvOpenStatus lxcOpen(virConnectPtr conn,
@@ -559,89 +501,6 @@
     return 0;
 }
 
-/**
- * lxcSendContainerContinue:
- * @monitor: FD for communicating with child
- *
- * Sends the continue message via the socket pair stored in the vm
- * structure.
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcSendContainerContinue(virConnectPtr conn,
-                                    int monitor)
-{
-    int rc = -1;
-    lxc_message_t msg = LXC_CONTINUE_MSG;
-    int writeCount = 0;
-
-    writeCount = safewrite(monitor, &msg, sizeof(msg));
-    if (writeCount != sizeof(msg)) {
-        lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
-                 _("unable to send container continue message: %s"),
-                 strerror(errno));
-        goto error_out;
-    }
-
-    rc = 0;
-
-error_out:
-    return rc;
-}
-
-/**
- * lxcStartContainer:
- * @conn: pointer to connection
- * @driver: pointer to driver structure
- * @vm: pointer to virtual machine structure
- *
- * Starts a container process by calling clone() with the namespace flags
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcStartContainer(virConnectPtr conn,
-                             lxc_driver_t* driver,
-                             lxc_vm_t *vm,
-                             int monitor,
-                             char *ttyPath)
-{
-    int rc = -1;
-    int flags;
-    int stacksize = getpagesize() * 4;
-    char *stack, *stacktop;
-    lxc_child_argv_t args = { vm->def, monitor, ttyPath };
-
-    /* allocate a stack for the container */
-    if (VIR_ALLOC_N(stack, stacksize) < 0) {
-        lxcError(conn, NULL, VIR_ERR_NO_MEMORY,
-                 _("unable to allocate container stack"));
-        goto error_exit;
-    }
-    stacktop = stack + stacksize;
-
-    flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD;
-
-    if (vm->def->nets != NULL)
-        flags |= CLONE_NEWNET;
-
-    vm->def->id = clone(lxcChild, stacktop, flags, &args);
-
-    DEBUG("clone() returned, %d", vm->def->id);
-
-    if (vm->def->id < 0) {
-        lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
-                 _("clone() failed, %s"), strerror(errno));
-        goto error_exit;
-    }
-
-    lxcSaveConfig(NULL, driver, vm, vm->def);
-
-    rc = 0;
-
-error_exit:
-    return rc;
-}
-
 
 /**
  * lxcOpenTty:
@@ -716,170 +575,6 @@
     return rc;
 }
 
-/**
- * lxcFdForward:
- * @readFd: file descriptor to read
- * @writeFd: file desriptor to write
- *
- * Reads 1 byte of data from readFd and writes to writeFd.
- *
- * Returns 0 on success, EAGAIN if returned on read, or -1 in case of error
- */
-static int lxcFdForward(int readFd, int writeFd)
-{
-    int rc = -1;
-    char buf[2];
-
-    if (1 != (saferead(readFd, buf, 1))) {
-        if (EAGAIN == errno) {
-            rc = EAGAIN;
-            goto cleanup;
-        }
-
-        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
-                 _("read of fd %d failed: %s"), readFd, strerror(errno));
-        goto cleanup;
-    }
-
-    if (1 != (safewrite(writeFd, buf, 1))) {
-        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
-                 _("write to fd %d failed: %s"), writeFd, strerror(errno));
-        goto cleanup;
-    }
-
-    rc = 0;
-
-cleanup:
-    return rc;
-}
-
-typedef struct _lxcTtyForwardFd_t {
-    int fd;
-    bool active;
-} lxcTtyForwardFd_t;
-
-/**
- * lxcTtyForward:
- * @fd1: Open fd
- * @fd1: Open fd
- *
- * Forwards traffic between fds.  Data read from fd1 will be written to fd2
- * This process loops forever.
- * This uses epoll in edge triggered mode to avoid a hard loop on POLLHUP
- * events when the user disconnects the virsh console via ctrl-]
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcTtyForward(int fd1, int fd2)
-{
-    int rc = -1;
-    int epollFd;
-    struct epoll_event epollEvent;
-    int numEvents;
-    int numActive = 0;
-    lxcTtyForwardFd_t fdArray[2];
-    int timeout = -1;
-    int curFdOff = 0;
-    int writeFdOff = 0;
-
-    fdArray[0].fd = fd1;
-    fdArray[0].active = false;
-    fdArray[1].fd = fd2;
-    fdArray[1].active = false;
-
-    /* create the epoll fild descriptor */
-    epollFd = epoll_create(2);
-    if (0 > epollFd) {
-        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
-                 _("epoll_create(2) failed: %s"), strerror(errno));
-        goto cleanup;
-    }
-
-    /* add the file descriptors the epoll fd */
-    memset(&epollEvent, 0x00, sizeof(epollEvent));
-    epollEvent.events = EPOLLIN|EPOLLET;    /* edge triggered */
-    epollEvent.data.fd = fd1;
-    epollEvent.data.u32 = 0;                /* fdArray position */
-    if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, fd1, &epollEvent)) {
-        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
-                 _("epoll_ctl(fd1) failed: %s"), strerror(errno));
-        goto cleanup;
-    }
-    epollEvent.data.fd = fd2;
-    epollEvent.data.u32 = 1;                /* fdArray position */
-    if (0 > epoll_ctl(epollFd, EPOLL_CTL_ADD, fd2, &epollEvent)) {
-        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
-                 _("epoll_ctl(fd2) failed: %s"), strerror(errno));
-        goto cleanup;
-    }
-
-    while (1) {
-        /* if active fd's, return if no events, else wait forever */
-        timeout = (numActive > 0) ? 0 : -1;
-        numEvents = epoll_wait(epollFd, &epollEvent, 1, timeout);
-        if (0 < numEvents) {
-            if (epollEvent.events & EPOLLIN) {
-                curFdOff = epollEvent.data.u32;
-                if (!fdArray[curFdOff].active) {
-                    fdArray[curFdOff].active = true;
-                    ++numActive;
-                }
-
-            } else if (epollEvent.events & EPOLLHUP) {
-                DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd);
-                continue;
-            } else {
-                lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
-                         _("error event %d"), epollEvent.events);
-                goto cleanup;
-            }
-
-        } else if (0 == numEvents) {
-            if (2 == numActive) {
-                /* both fds active, toggle between the two */
-                curFdOff ^= 1;
-            } else {
-                /* only one active, if current is active, use it, else it */
-                /* must be the other one (ie. curFd just went inactive) */
-                curFdOff = fdArray[curFdOff].active ? curFdOff : curFdOff ^ 1;
-            }
-
-        } else  {
-            if (EINTR == errno) {
-                continue;
-            }
-
-            /* error */
-            lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
-                     _("epoll_wait() failed: %s"), strerror(errno));
-            goto cleanup;
-
-        }
-
-        if (0 < numActive) {
-            writeFdOff = curFdOff ^ 1;
-            rc = lxcFdForward(fdArray[curFdOff].fd, fdArray[writeFdOff].fd);
-
-            if (EAGAIN == rc) {
-                /* this fd no longer has data, set it as inactive */
-                --numActive;
-                fdArray[curFdOff].active = false;
-            } else if (-1 == rc) {
-                goto cleanup;
-            }
-
-        }
-
-    }
-
-    rc = 0;
-
-cleanup:
-    close(fd1);
-    close(fd2);
-    close(epollFd);
-    exit(rc);
-}
 
 /**
  * lxcVmStart:
@@ -921,7 +616,7 @@
 
     if (vm->pid  == 0) {
         /* child process calls forward routine */
-        lxcTtyForward(parentTty, containerTty);
+        lxcControllerMain(parentTty, containerTty);
     }
 
     if (lxcStoreTtyPid(driver, vm)) {
@@ -945,17 +640,19 @@
 
     /* check this rc */
 
-    rc = lxcStartContainer(conn, driver, vm,
-                           sockpair[1],
-                           containerTtyPath);
-    if (rc != 0)
+    vm->def->id = lxcContainerStart(conn,
+                                    vm->def,
+                                    sockpair[1],
+                                    containerTtyPath);
+    if (vm->def->id == -1)
         goto cleanup;
+    lxcSaveConfig(conn, driver, vm, vm->def);
 
     rc = lxcMoveInterfacesToNetNs(conn, vm);
     if (rc != 0)
         goto cleanup;
 
-    rc = lxcSendContainerContinue(conn, sockpair[0]);
+    rc = lxcContainerSendContinue(conn, sockpair[0]);
     if (rc != 0)
         goto cleanup;
 
@@ -1196,16 +893,15 @@
 {
     const char *argv[] = {"ip", "link", "set", "lo", "netns", "-1", NULL};
     int ip_rc;
-    int user_netns = 0;
-    int kern_netns = 0;
 
-    if (virRun(NULL, (char **)argv, &ip_rc) == 0)
-        user_netns = WIFEXITED(ip_rc) && (WEXITSTATUS(ip_rc) != 255);
+    if (virRun(NULL, (char **)argv, &ip_rc) < 0 ||
+        !(WIFEXITED(ip_rc) && (WEXITSTATUS(ip_rc) != 255)))
+        return 0;
 
-    if (lxcCheckContainerSupport(CLONE_NEWNET) == 0)
-        kern_netns = 1;
+    if (lxcContainerAvailable(LXC_CONTAINER_FEATURE_NET) < 0)
+        return 0;
 
-    return kern_netns && user_netns;
+    return 1;
 }
 
 static int lxcStartup(void)
@@ -1222,9 +918,8 @@
     }
 
     /* Check that this is a container enabled kernel */
-    if(0 != lxcCheckContainerSupport(0)) {
+    if(lxcContainerAvailable(0) < 0)
         return -1;
-    }
 
     lxc_driver->have_netns = lxcCheckNetNsSupport();
 


-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|




More information about the libvir-list mailing list