[libvirt] [PATCH] Avoid hidden cgroup mount points

Juan Antonio Hernandez Fernandez jhernand at redhat.com
Wed Jul 12 12:44:30 UTC 2017


On Tue, Jul 11, 2017 at 9:25 AM, Martin Kletzander <mkletzan at redhat.com>
wrote:

> On Thu, Jul 06, 2017 at 05:03:31PM +0200, Juan Hernandez wrote:
>
>> Currently the scan of the /proc/mounts file used to find cgroup mount
>> points doesn't take into account that mount points may hidden by other
>> mount points. For, example in certain Kubernetes environments the
>> /proc/mounts contains the following lines:
>>
>>  cgroup /sys/fs/cgroup/net_prio,net_cls cgroup ...
>>  tmpfs /sys/fs/cgroup tmpfs ...
>>  cgroup /sys/fs/cgroup/net_cls,net_prio cgroup ...
>>
>> In this particular environment the first mount point is hidden by the
>> second one. The correct mount point is the third one, but libvirt will
>> never process it because it only checks the first mount point for each
>> controller (net_cls in this case). So libvirt will try to use the first
>> mount point, which doesn't actually exist, and the complete detection
>> process will fail.
>>
>> To avoid that issue this patch changes the virCgroupDetectMountsFromFile
>> function so that when there are duplicates it takes the information from
>> the last line in /proc/mounts. This requires removing the previous
>> explicit condition to skip duplicates, and adding code to free the
>> memory used by the processing of duplicated lines.
>>
>> Related-To: https://bugzilla.redhat.com/1468214
>> Related-To: https://github.com/kubevirt/libvirt/issues/4
>> Signed-off-by: Juan Hernandez <jhernand at redhat.com>
>> ---
>> src/util/vircgroup.c                | 23 ++++++++++++++---------
>> tests/vircgroupdata/kubevirt.mounts | 36 ++++++++++++++++++++++++++++++
>> ++++++
>> tests/vircgroupdata/kubevirt.parsed | 10 ++++++++++
>> tests/vircgrouptest.c               |  1 +
>> 4 files changed, 61 insertions(+), 9 deletions(-)
>> create mode 100644 tests/vircgroupdata/kubevirt.mounts
>> create mode 100644 tests/vircgroupdata/kubevirt.parsed
>>
>> diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
>> index 5aa1db5..41d90e7 100644
>> --- a/src/util/vircgroup.c
>> +++ b/src/util/vircgroup.c
>> @@ -397,6 +397,7 @@ virCgroupDetectMountsFromFile(virCgroupPtr group,
>>             const char *typestr = virCgroupControllerTypeToString(i);
>>             int typelen = strlen(typestr);
>>             char *tmp = entry.mnt_opts;
>> +            struct virCgroupController *controller =
>> &group->controllers[i];
>>             while (tmp) {
>>                 char *next = strchr(tmp, ',');
>>                 int len;
>> @@ -406,18 +407,21 @@ virCgroupDetectMountsFromFile(virCgroupPtr group,
>>                 } else {
>>                     len = strlen(tmp);
>>                 }
>> -                /* NB, the same controller can appear >1 time in mount
>> list
>> -                 * due to bind mounts from one location to another. Pick
>> the
>> -                 * first entry only
>> -                 */
>> -                if (typelen == len && STREQLEN(typestr, tmp, len) &&
>> -                    !group->controllers[i].mountPoint) {
>> +
>> +                if (typelen == len && STREQLEN(typestr, tmp, len)) {
>>                     char *linksrc;
>>                     struct stat sb;
>>                     char *tmp2;
>>
>> -                    if (VIR_STRDUP(group->controllers[i].mountPoint,
>> -                                   entry.mnt_dir) < 0)
>> +                    /* Note that the lines in /proc/mounts have the same
>> +                     * order than the mount operations, and that there
>> may
>> +                     * be duplicates due to bind mounts. This means
>> +                     * that the same mount point may be processed more
>> than
>> +                     * once. We need to save the results of the last one,
>> +                     * and we need to be careful to release the memory
>> used
>> +                     * by previous processing. */
>> +                    VIR_FREE(controller->mountPoint);
>>
>
> You need to free the linkPoint here as well as it is no longer valid if
> we are throwing out the previous entry.
>
> There's also pre-existing leak with linksrc in this function.
>
>
> +                    if (VIR_STRDUP(controller->mountPoint,
>> entry.mnt_dir) < 0)
>>                         goto error;
>>
>>                     tmp2 = strrchr(entry.mnt_dir, '/');
>> @@ -453,7 +457,8 @@ virCgroupDetectMountsFromFile(virCgroupPtr group,
>>                                 VIR_WARN("Expecting a symlink at %s for
>> controller %s",
>>                                          linksrc, typestr);
>>                             } else {
>> -                                group->controllers[i].linkPoint =
>> linksrc;
>> +                                VIR_FREE(controller->linkPoint);
>> +                                controller->linkPoint = linksrc;
>>                             }
>>                         }
>>                     }
>> diff --git a/tests/vircgroupdata/kubevirt.mounts
>> b/tests/vircgroupdata/kubevirt.mounts
>> new file mode 100644
>> index 0000000..b0d31bb
>> --- /dev/null
>> +++ b/tests/vircgroupdata/kubevirt.mounts
>> @@ -0,0 +1,36 @@
>> +tmpfs /sys/fs/cgroup tmpfs rw,nosuid,nodev,noexec,relatime,mode=755 0 0
>> +cgroup /sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatim
>> e,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
>> 0 0
>> +cgroup /sys/fs/cgroup/cpuacct,cpu cgroup rw,nosuid,nodev,noexec,relatime,cpuacct,cpu
>> 0 0
>> +cgroup /sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids
>> 0 0
>> +cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio
>> 0 0
>> +cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset
>> 0 0
>> +cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory
>> 0 0
>> +cgroup /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb
>> 0 0
>> +cgroup /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices
>> 0 0
>> +cgroup /sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer
>> 0 0
>> +cgroup /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event
>> 0 0
>> +cgroup /sys/fs/cgroup/net_prio,net_cls cgroup
>> rw,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0
>> +tmpfs /host-sys/fs/cgroup tmpfs ro,nosuid,nodev,noexec,relatime,mode=755
>> 0 0
>> +cgroup /host-sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatim
>> e,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
>> 0 0
>> +cgroup /host-sys/fs/cgroup/cpu,cpuacct cgroup
>> rw,nosuid,nodev,noexec,relatime,cpuacct,cpu 0 0
>> +cgroup /host-sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids
>> 0 0
>> +cgroup /host-sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio
>> 0 0
>> +cgroup /host-sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset
>> 0 0
>> +cgroup /host-sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory
>> 0 0
>> +cgroup /host-sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb
>> 0 0
>> +cgroup /host-sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices
>> 0 0
>> +cgroup /host-sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer
>> 0 0
>> +cgroup /host-sys/fs/cgroup/perf_event cgroup
>> rw,nosuid,nodev,noexec,relatime,perf_event 0 0
>> +cgroup /host-sys/fs/cgroup/net_cls,net_prio cgroup
>> rw,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0
>> +tmpfs /sys/fs/cgroup tmpfs ro,nosuid,nodev,noexec,relatime,mode=755 0 0
>> +cgroup /sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatim
>> e,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
>> 0 0
>> +cgroup /sys/fs/cgroup/cpu,cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpuacct,cpu
>> 0 0
>> +cgroup /sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids
>> 0 0
>> +cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio
>> 0 0
>> +cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset
>> 0 0
>> +cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory
>> 0 0
>> +cgroup /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb
>> 0 0
>> +cgroup /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices
>> 0 0
>> +cgroup /sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer
>> 0 0
>> +cgroup /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event
>> 0 0
>> +cgroup /sys/fs/cgroup/net_cls,net_prio cgroup
>> rw,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0
>>
>
> It's not clear what we're testing here, I would either add just two
> controllers mounted in three places (just keep the net_{cls,prio}) or
> copy some existing test and add two lines so that the diff later in the
> history shows exactly what is being tested if used with '-C', e.g.:
>
> diff --git a/tests/vircgroupdata/cgroups2.mounts
> b/tests/vircgroupdata/cgroups4.mounts
> similarity index 86%
> copy from tests/vircgroupdata/cgroups2.mounts
> copy to tests/vircgroupdata/cgroups4.mounts
> index 21ab867d71ed..2b06c2ea1e16 100644
> --- a/tests/vircgroupdata/cgroups2.mounts
> +++ b/tests/vircgroupdata/cgroups4.mounts
> @@ -10,8 +10,10 @@ shm /dev/shm tmpfs rw,nosuid,nodev,noexec,relatime 0 0
> debugfs /sys/kernel/debug debugfs rw,nosuid,nodev,noexec,relatime 0 0
> cgroup_root /sys/fs/cgroup tmpfs rw,nosuid,nodev,noexec,relatime,size=10240k,mode=755
> 0 0
> openrc /sys/fs/cgroup/openrc cgroup rw,nosuid,nodev,noexec,relatim
> e,release_agent=/lib64/rc/sh/cgroup-release-agent.sh,name=openrc 0 0
> +cpuset /some/random/location/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset
> 0 0
> cpuset /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset
> 0 0
> cpu /sys/fs/cgroup/cpu cgroup rw,nosuid,nodev,noexec,relatime,cpu 0 0
> +cpuacct /some/random/location cgroup rw,nosuid,nodev,noexec,relatime,cpuacct
> 0 0
> cpuacct /sys/fs/cgroup/cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpuacct
> 0 0
> memory /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory
> 0 0
> devices /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices
> 0 0
> @@ -20,3 +22,4 @@ blkio /sys/fs/cgroup/blkio cgroup
> rw,nosuid,nodev,noexec,relatime,blkio 0 0
> perf_event /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event
> 0 0
> hugetlb /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb
> 0 0
> binfmt_misc /proc/sys/fs/binfmt_misc binfmt_misc
> rw,nosuid,nodev,noexec,relatime 0 0
> +freezer /some/random/location/freezer cgroup
> rw,nosuid,nodev,noexec,relatime,freezer 0 0
> diff --git a/tests/vircgroupdata/cgroups2.parsed
> b/tests/vircgroupdata/cgroups4.parsed
> similarity index 100%
> copy from tests/vircgroupdata/cgroups2.parsed
> copy to tests/vircgroupdata/cgroups4.parsed
> --
>
> You see how clear that is from the above diff?
>
> Other than that I think this works nicely, so ACK if you are OK with the
> proposed changes, I'll squash them in and push it.
>
>
Your suggestions look good to me, so ACK, feel free to squash and push
Martin.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20170712/3410985e/attachment-0001.htm>


More information about the libvir-list mailing list