[libvirt] [PATCHv2] lxc: give RW access to /proc/sys/net/ipv[46] to containers

Cédric Bosdonnat cbosdonnat at suse.com
Tue Dec 9 09:47:24 UTC 2014


Some programs want to change some values for the network interfaces
configuration in /proc/sys/net/ipv[46] folders. Giving RW access on them
allows wicked to work on openSUSE 13.2+.

In order to mount those folders RW but keep the rest of /proc/sys RO,
we add temporary mounts for these folders before bind-mounting
/proc/sys. Those mounts will be skipped if the container doesn't have
its own network namespace.

It may happen that one of the temporary mounts in /proc/ filesystem
isn't available due to a missing kernel feature. We need not to fail
in that case.
---

Diffs to v1:

 * Only mount the /proc/sys/net/ipv[46] if the container has its own netns
 * Don't test for the existence of files in /proc before mounting them: they
   may not be ready when checking. Instead try to mount them and skip them if
   the source doesn't exist.
 * Use existing lxcNeedNetworkNamespace to tell lxcContainerMountBasicFS if we
   have our own netns: at least we now have the proper value.

 src/lxc/lxc_container.c | 153 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 121 insertions(+), 32 deletions(-)

diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 3b08b86..140d54f 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -800,15 +800,21 @@ typedef struct {
     int mflags;
     bool skipUserNS;
     bool skipUnmounted;
+    bool skipNoPrivNet;
+    bool temporary;
 } virLXCBasicMountInfo;
 
 static const virLXCBasicMountInfo lxcBasicMounts[] = {
-    { "proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, false, false },
-    { "/proc/sys", "/proc/sys", NULL, MS_BIND|MS_RDONLY, false, false },
-    { "sysfs", "/sys", "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, false, false },
-    { "securityfs", "/sys/kernel/security", "securityfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, true, true },
+    { "proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, false, false, false, false },
+    { "/proc/sys/net/ipv4", "TMP1", NULL, MS_BIND, false, false, true, true },
+    { "/proc/sys/net/ipv6", "TMP2", NULL, MS_BIND, false, false, true, true },
+    { "/proc/sys", "/proc/sys", NULL, MS_BIND|MS_RDONLY, false, false, false, false },
+    { "TMP1", "/proc/sys/net/ipv4", NULL, MS_BIND, false, false, true, false },
+    { "TMP2", "/proc/sys/net/ipv6", NULL, MS_BIND, false, false, true, false },
+    { "sysfs", "/sys", "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, false, false, false, false },
+    { "securityfs", "/sys/kernel/security", "securityfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, true, true, false, false },
 #if WITH_SELINUX
-    { SELINUX_MOUNT, SELINUX_MOUNT, "selinuxfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, true, true },
+    { SELINUX_MOUNT, SELINUX_MOUNT, "selinuxfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, true, true, false, false },
 #endif
 };
 
@@ -885,14 +891,23 @@ static int lxcContainerSetReadOnly(void)
 static int lxcContainerMountBasicFS(bool userns_enabled,
                                     bool netns_disabled)
 {
-    size_t i;
+    size_t i, j;
     int rc = -1;
     char* mnt_src = NULL;
+    char* mnt_dst = NULL;
     int mnt_mflags;
+    char **tmpkeys = NULL;
+    char **tmppaths = NULL;
+    size_t nmounts = ARRAY_CARDINALITY(lxcBasicMounts);
 
     VIR_DEBUG("Mounting basic filesystems");
 
-    for (i = 0; i < ARRAY_CARDINALITY(lxcBasicMounts); i++) {
+    if (VIR_ALLOC_N(tmpkeys, nmounts) < 0 ||
+        VIR_ALLOC_N(tmppaths, nmounts) < 0) {
+        goto cleanup;
+    }
+
+    for (i = 0; i < nmounts; i++) {
         bool bindOverReadonly;
         virLXCBasicMountInfo const *mnt = &lxcBasicMounts[i];
 
@@ -906,11 +921,41 @@ static int lxcContainerMountBasicFS(bool userns_enabled,
                 goto cleanup;
             mnt_mflags = MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY|MS_BIND;
         } else {
-            if (VIR_STRDUP(mnt_src, mnt->src) < 0)
+            bool foundKey = false;
+            /* Look for potential temporary folder match */
+            for (j = 0; j < i; j++) {
+                if (STREQ_NULLABLE(mnt->src, tmpkeys[j])) {
+                    /* We found the key without path: skip */
+                    foundKey = true;
+                    if (tmppaths[j] && VIR_STRDUP(mnt_src, tmppaths[j]) < 0)
+                        goto cleanup;
+                    break;
+                }
+            }
+            if (foundKey && !mnt_src)
+                continue;
+            if (!mnt_src && VIR_STRDUP(mnt_src, mnt->src) < 0)
                 goto cleanup;
             mnt_mflags = mnt->mflags;
         }
 
+        if (mnt->temporary) {
+            char tmppath[] = "/tmp/mount-XXXXXX";
+            if (mkdtemp(tmppath) == NULL) {
+                virReportSystemError(errno,
+                                     _("Failed to create temporary folder %s"),
+                                     tmppath);
+            }
+            if (VIR_STRDUP(tmppaths[i], tmppath) < 0 ||
+                VIR_STRDUP(tmpkeys[i], mnt->dst) < 0 ||
+                VIR_STRDUP(mnt_dst, tmppath) < 0) {
+                goto cleanup;
+            }
+        } else {
+            if (VIR_STRDUP(mnt_dst, mnt->dst) < 0)
+                goto cleanup;
+        }
+
         VIR_DEBUG("Processing %s -> %s",
                   mnt_src, mnt->dst);
 
@@ -930,6 +975,7 @@ static int lxcContainerMountBasicFS(bool userns_enabled,
                 VIR_DEBUG("Skipping '%s' which isn't mounted in host",
                           mnt->dst);
                 VIR_FREE(mnt_src);
+                VIR_FREE(mnt_dst);
                 continue;
             }
         }
@@ -937,13 +983,21 @@ static int lxcContainerMountBasicFS(bool userns_enabled,
         if (mnt->skipUserNS && userns_enabled) {
             VIR_DEBUG("Skipping due to user ns enablement");
             VIR_FREE(mnt_src);
+            VIR_FREE(mnt_dst);
+            continue;
+        }
+
+        if (mnt->skipNoPrivNet && netns_disabled) {
+            VIR_DEBUG("Skipping due to absence of network namespace");
+            VIR_FREE(mnt_src);
+            VIR_FREE(mnt_dst);
             continue;
         }
 
-        if (virFileMakePath(mnt->dst) < 0) {
+        if (virFileMakePath(mnt_dst) < 0) {
             virReportSystemError(errno,
                                  _("Failed to mkdir %s"),
-                                 mnt_src);
+                                 mnt_dst);
             goto cleanup;
         }
 
@@ -957,32 +1011,67 @@ static int lxcContainerMountBasicFS(bool userns_enabled,
         bindOverReadonly = !!(mnt_mflags & MS_RDONLY);
 
         VIR_DEBUG("Mount %s on %s type=%s flags=%x",
-                  mnt_src, mnt->dst, mnt->type, mnt_mflags & ~MS_RDONLY);
-        if (mount(mnt_src, mnt->dst, mnt->type, mnt_mflags & ~MS_RDONLY, NULL) < 0) {
+                  mnt_src, mnt_dst, mnt->type, mnt_mflags & ~MS_RDONLY);
+        if (mount(mnt_src, mnt_dst, mnt->type, mnt_mflags & ~MS_RDONLY, NULL) < 0) {
+            /* Don't shout if some folder doesn't exist in /proc since they
+             * can be depending on an unloaded kernel module. */
+            if (errno == ENOENT && STRPREFIX(mnt_src, "/proc/")) {
+                VIR_DEBUG("Skipped: %s doesn't exist", mnt_src);
+                /* Cleanup the temporary path */
+                if (virFileDeleteTree(tmppaths[i]) < 0)
+                    virReportError(VIR_ERR_INTERNAL_ERROR,
+                                   _("Failed to remove temporary folder %s"),
+                                   tmppaths[i]);
+                VIR_FREE(tmppaths[i]);
+
+                VIR_FREE(mnt_src);
+                VIR_FREE(mnt_dst);
+                continue;
+            }
             virReportSystemError(errno,
                                  _("Failed to mount %s on %s type %s flags=%x"),
-                                 mnt_src, mnt->dst, NULLSTR(mnt->type),
+                                 mnt_src, mnt_dst, NULLSTR(mnt->type),
                                  mnt_mflags & ~MS_RDONLY);
             goto cleanup;
         }
 
         if (bindOverReadonly &&
-            mount(mnt_src, mnt->dst, NULL,
+            mount(mnt_src, mnt_dst, NULL,
                   MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) {
             virReportSystemError(errno,
                                  _("Failed to re-mount %s on %s flags=%x"),
-                                 mnt_src, mnt->dst,
+                                 mnt_src, mnt_dst,
                                  MS_BIND|MS_REMOUNT|MS_RDONLY);
             goto cleanup;
         }
 
         VIR_FREE(mnt_src);
+        VIR_FREE(mnt_dst);
     }
 
     rc = 0;
 
  cleanup:
+    /* Cleanup temporary mounts */
+    for (i = 0; i < nmounts; i++) {
+        virLXCBasicMountInfo const *mnt = &lxcBasicMounts[i];
+        if (mnt->temporary && tmppaths[i]) {
+            if (umount(tmppaths[i]) < 0) {
+                virReportSystemError(errno,
+                                     _("Failed to un-mount temporary %s"),
+                                     tmppaths[i]);
+            }
+            if (virFileDeleteTree(tmppaths[i]) < 0)
+               virReportError(VIR_ERR_INTERNAL_ERROR,
+                              _("Failed to remove temporary folder %s"),
+                              tmppaths[i]);
+        }
+    }
+
+    virStringFreeList(tmpkeys);
+    virStringFreeList(tmppaths);
     VIR_FREE(mnt_src);
+    VIR_FREE(mnt_dst);
     VIR_DEBUG("rc=%d", rc);
     return rc;
 }
@@ -1696,6 +1785,22 @@ static int lxcContainerUnmountForSharedRoot(const char *stateDir,
     return ret;
 }
 
+static bool
+lxcNeedNetworkNamespace(virDomainDefPtr def)
+{
+    size_t i;
+    if (def->nets != NULL)
+        return true;
+    if (def->features[VIR_DOMAIN_FEATURE_PRIVNET] == VIR_TRISTATE_SWITCH_ON)
+        return true;
+    for (i = 0; i < def->nhostdevs; i++) {
+        if (def->hostdevs[i]->mode == VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES &&
+            def->hostdevs[i]->source.caps.type == VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET)
+            return true;
+    }
+    return false;
+}
+
 
 /* Got a FS mapped to /, we're going the pivot_root
  * approach to do a better-chroot-than-chroot
@@ -1741,7 +1846,7 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
 
     /* Mounts the core /proc, /sys, etc filesystems */
     if (lxcContainerMountBasicFS(vmDef->idmap.nuidmap,
-                                 !vmDef->nnets) < 0)
+                                 !lxcNeedNetworkNamespace(vmDef)) < 0)
         goto cleanup;
 
     /* Ensure entire root filesystem (except /.oldroot) is readonly */
@@ -2240,22 +2345,6 @@ virArch lxcContainerGetAlt32bitArch(virArch arch)
 }
 
 
-static bool
-lxcNeedNetworkNamespace(virDomainDefPtr def)
-{
-    size_t i;
-    if (def->nets != NULL)
-        return true;
-    if (def->features[VIR_DOMAIN_FEATURE_PRIVNET] == VIR_TRISTATE_SWITCH_ON)
-        return true;
-    for (i = 0; i < def->nhostdevs; i++) {
-        if (def->hostdevs[i]->mode == VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES &&
-            def->hostdevs[i]->source.caps.type == VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET)
-            return true;
-    }
-    return false;
-}
-
 /**
  * lxcContainerStart:
  * @def: pointer to virtual machine structure
-- 
2.1.2




More information about the libvir-list mailing list