[libvirt] [PATCH] Avoid hidden cgroup mount points

Martin Kletzander mkletzan at redhat.com
Tue Jul 11 07:25:53 UTC 2017


On Thu, Jul 06, 2017 at 05:03:31PM +0200, Juan Hernandez wrote:
>Currently the scan of the /proc/mounts file used to find cgroup mount
>points doesn't take into account that mount points may hidden by other
>mount points. For, example in certain Kubernetes environments the
>/proc/mounts contains the following lines:
>
>  cgroup /sys/fs/cgroup/net_prio,net_cls cgroup ...
>  tmpfs /sys/fs/cgroup tmpfs ...
>  cgroup /sys/fs/cgroup/net_cls,net_prio cgroup ...
>
>In this particular environment the first mount point is hidden by the
>second one. The correct mount point is the third one, but libvirt will
>never process it because it only checks the first mount point for each
>controller (net_cls in this case). So libvirt will try to use the first
>mount point, which doesn't actually exist, and the complete detection
>process will fail.
>
>To avoid that issue this patch changes the virCgroupDetectMountsFromFile
>function so that when there are duplicates it takes the information from
>the last line in /proc/mounts. This requires removing the previous
>explicit condition to skip duplicates, and adding code to free the
>memory used by the processing of duplicated lines.
>
>Related-To: https://bugzilla.redhat.com/1468214
>Related-To: https://github.com/kubevirt/libvirt/issues/4
>Signed-off-by: Juan Hernandez <jhernand at redhat.com>
>---
> src/util/vircgroup.c                | 23 ++++++++++++++---------
> tests/vircgroupdata/kubevirt.mounts | 36 ++++++++++++++++++++++++++++++++++++
> tests/vircgroupdata/kubevirt.parsed | 10 ++++++++++
> tests/vircgrouptest.c               |  1 +
> 4 files changed, 61 insertions(+), 9 deletions(-)
> create mode 100644 tests/vircgroupdata/kubevirt.mounts
> create mode 100644 tests/vircgroupdata/kubevirt.parsed
>
>diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
>index 5aa1db5..41d90e7 100644
>--- a/src/util/vircgroup.c
>+++ b/src/util/vircgroup.c
>@@ -397,6 +397,7 @@ virCgroupDetectMountsFromFile(virCgroupPtr group,
>             const char *typestr = virCgroupControllerTypeToString(i);
>             int typelen = strlen(typestr);
>             char *tmp = entry.mnt_opts;
>+            struct virCgroupController *controller = &group->controllers[i];
>             while (tmp) {
>                 char *next = strchr(tmp, ',');
>                 int len;
>@@ -406,18 +407,21 @@ virCgroupDetectMountsFromFile(virCgroupPtr group,
>                 } else {
>                     len = strlen(tmp);
>                 }
>-                /* NB, the same controller can appear >1 time in mount list
>-                 * due to bind mounts from one location to another. Pick the
>-                 * first entry only
>-                 */
>-                if (typelen == len && STREQLEN(typestr, tmp, len) &&
>-                    !group->controllers[i].mountPoint) {
>+
>+                if (typelen == len && STREQLEN(typestr, tmp, len)) {
>                     char *linksrc;
>                     struct stat sb;
>                     char *tmp2;
>
>-                    if (VIR_STRDUP(group->controllers[i].mountPoint,
>-                                   entry.mnt_dir) < 0)
>+                    /* Note that the lines in /proc/mounts have the same
>+                     * order than the mount operations, and that there may
>+                     * be duplicates due to bind mounts. This means
>+                     * that the same mount point may be processed more than
>+                     * once. We need to save the results of the last one,
>+                     * and we need to be careful to release the memory used
>+                     * by previous processing. */
>+                    VIR_FREE(controller->mountPoint);

You need to free the linkPoint here as well as it is no longer valid if
we are throwing out the previous entry.

There's also pre-existing leak with linksrc in this function.

>+                    if (VIR_STRDUP(controller->mountPoint, entry.mnt_dir) < 0)
>                         goto error;
>
>                     tmp2 = strrchr(entry.mnt_dir, '/');
>@@ -453,7 +457,8 @@ virCgroupDetectMountsFromFile(virCgroupPtr group,
>                                 VIR_WARN("Expecting a symlink at %s for controller %s",
>                                          linksrc, typestr);
>                             } else {
>-                                group->controllers[i].linkPoint = linksrc;
>+                                VIR_FREE(controller->linkPoint);
>+                                controller->linkPoint = linksrc;
>                             }
>                         }
>                     }
>diff --git a/tests/vircgroupdata/kubevirt.mounts b/tests/vircgroupdata/kubevirt.mounts
>new file mode 100644
>index 0000000..b0d31bb
>--- /dev/null
>+++ b/tests/vircgroupdata/kubevirt.mounts
>@@ -0,0 +1,36 @@
>+tmpfs /sys/fs/cgroup tmpfs rw,nosuid,nodev,noexec,relatime,mode=755 0 0
>+cgroup /sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd 0 0
>+cgroup /sys/fs/cgroup/cpuacct,cpu cgroup rw,nosuid,nodev,noexec,relatime,cpuacct,cpu 0 0
>+cgroup /sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids 0 0
>+cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0
>+cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
>+cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0
>+cgroup /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0
>+cgroup /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0
>+cgroup /sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0
>+cgroup /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0
>+cgroup /sys/fs/cgroup/net_prio,net_cls cgroup rw,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0
>+tmpfs /host-sys/fs/cgroup tmpfs ro,nosuid,nodev,noexec,relatime,mode=755 0 0
>+cgroup /host-sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd 0 0
>+cgroup /host-sys/fs/cgroup/cpu,cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpuacct,cpu 0 0
>+cgroup /host-sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids 0 0
>+cgroup /host-sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0
>+cgroup /host-sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
>+cgroup /host-sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0
>+cgroup /host-sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0
>+cgroup /host-sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0
>+cgroup /host-sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0
>+cgroup /host-sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0
>+cgroup /host-sys/fs/cgroup/net_cls,net_prio cgroup rw,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0
>+tmpfs /sys/fs/cgroup tmpfs ro,nosuid,nodev,noexec,relatime,mode=755 0 0
>+cgroup /sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd 0 0
>+cgroup /sys/fs/cgroup/cpu,cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpuacct,cpu 0 0
>+cgroup /sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids 0 0
>+cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0
>+cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
>+cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0
>+cgroup /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0
>+cgroup /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0
>+cgroup /sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0
>+cgroup /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0
>+cgroup /sys/fs/cgroup/net_cls,net_prio cgroup rw,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0

It's not clear what we're testing here, I would either add just two
controllers mounted in three places (just keep the net_{cls,prio}) or
copy some existing test and add two lines so that the diff later in the
history shows exactly what is being tested if used with '-C', e.g.:

diff --git a/tests/vircgroupdata/cgroups2.mounts b/tests/vircgroupdata/cgroups4.mounts
similarity index 86%
copy from tests/vircgroupdata/cgroups2.mounts
copy to tests/vircgroupdata/cgroups4.mounts
index 21ab867d71ed..2b06c2ea1e16 100644
--- a/tests/vircgroupdata/cgroups2.mounts
+++ b/tests/vircgroupdata/cgroups4.mounts
@@ -10,8 +10,10 @@ shm /dev/shm tmpfs rw,nosuid,nodev,noexec,relatime 0 0
 debugfs /sys/kernel/debug debugfs rw,nosuid,nodev,noexec,relatime 0 0
 cgroup_root /sys/fs/cgroup tmpfs rw,nosuid,nodev,noexec,relatime,size=10240k,mode=755 0 0
 openrc /sys/fs/cgroup/openrc cgroup rw,nosuid,nodev,noexec,relatime,release_agent=/lib64/rc/sh/cgroup-release-agent.sh,name=openrc 0 0
+cpuset /some/random/location/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
 cpuset /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
 cpu /sys/fs/cgroup/cpu cgroup rw,nosuid,nodev,noexec,relatime,cpu 0 0
+cpuacct /some/random/location cgroup rw,nosuid,nodev,noexec,relatime,cpuacct 0 0
 cpuacct /sys/fs/cgroup/cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpuacct 0 0
 memory /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0
 devices /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0
@@ -20,3 +22,4 @@ blkio /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0
 perf_event /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0
 hugetlb /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0
 binfmt_misc /proc/sys/fs/binfmt_misc binfmt_misc rw,nosuid,nodev,noexec,relatime 0 0
+freezer /some/random/location/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0
diff --git a/tests/vircgroupdata/cgroups2.parsed b/tests/vircgroupdata/cgroups4.parsed
similarity index 100%
copy from tests/vircgroupdata/cgroups2.parsed
copy to tests/vircgroupdata/cgroups4.parsed
--

You see how clear that is from the above diff?

Other than that I think this works nicely, so ACK if you are OK with the
proposed changes, I'll squash them in and push it.

Have a nice day,
Martin
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: Digital signature
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20170711/d413ffef/attachment-0001.sig>


More information about the libvir-list mailing list