[libvirt] [PATCH] nodeinfo: skip offline CPUs
Daniel Veillard
veillard at redhat.com
Tue Aug 10 22:01:00 UTC 2010
On Tue, Aug 10, 2010 at 03:44:37PM -0600, Eric Blake wrote:
> https://bugzilla.redhat.com/622515 - When hot-unplugging CPUs,
> libvirt failed to start a guest that had been pinned to CPUs that
> were still online, because it was failing to read status from
> unrelated offline CPUs.
Argh, yes that's a nasty problem !
> Tested on a dual-core laptop, where I also discovered that, per
> http://www.cyberciti.biz/files/linux-kernel/Documentation/cpu-hotplug.txt,
> /sys/devices/system/cpu/cpu0/online does not exist on systems where it
> cannot be hot-unplugged.
>
> * src/nodeinfo.c (linuxNodeInfoCPUPopulate): Ignore CPUs that are
> currently offline. Detect readdir failure.
> (parse_socket): Move guts...
> (get_cpu_value): ...to new function, shared with...
> (cpu_online): New function.
> ---
> src/nodeinfo.c | 109 +++++++++++++++++++++++++++++++++++++------------------
> 1 files changed, 73 insertions(+), 36 deletions(-)
>
> diff --git a/src/nodeinfo.c b/src/nodeinfo.c
> index 5ec1bcf..6286aa2 100644
> --- a/src/nodeinfo.c
> +++ b/src/nodeinfo.c
> @@ -23,6 +23,7 @@
>
> #include <config.h>
>
> +#include <stdbool.h>
> #include <stdio.h>
> #include <string.h>
> #include <stdlib.h>
> @@ -60,6 +61,58 @@
> int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
> virNodeInfoPtr nodeinfo);
>
> +/* Return the positive decimal contents of the given
> + * CPU_SYS_PATH/cpu%u/FILE, or -1 on error. If MISSING_OK and the
> + * file could not be found, return 1 instead of an error; this is
> + * because some machines cannot hot-unplug cpu0. */
> +static int get_cpu_value(unsigned int cpu, const char *file, bool missing_ok)
> +{
> + char *path;
> + FILE *pathfp;
> + char value_str[1024];
> + char *tmp;
> + int value = -1;
> +
> + if (virAsprintf(&path, CPU_SYS_PATH "/cpu%u/%s", cpu, file) < 0) {
> + virReportOOMError();
> + return -1;
> + }
> +
> + pathfp = fopen(path, "r");
> + if (pathfp == NULL) {
> + if (missing_ok && errno == ENOENT)
> + value = 1;
> + else
> + virReportSystemError(errno, _("cannot open %s"), path);
> + goto cleanup;
> + }
> +
> + if (fgets(value_str, sizeof(value_str), pathfp) == NULL) {
> + virReportSystemError(errno, _("cannot read from %s"), path);
> + goto cleanup;
> + }
> + if (virStrToLong_i(value_str, &tmp, 10, &value) < 0) {
> + nodeReportError(VIR_ERR_INTERNAL_ERROR,
> + _("could not convert '%s' to an integer"),
> + value_str);
> + goto cleanup;
> + }
> +
> +cleanup:
> + if (pathfp)
> + fclose(pathfp);
> + VIR_FREE(path);
> +
> + return value;
> +}
> +
> +/* Check if CPU is online via CPU_SYS_PATH/cpu%u/online. Return 1 if online,
> + 0 if offline, and -1 on error. */
> +static int cpu_online(unsigned int cpu)
> +{
> + return get_cpu_value(cpu, "online", cpu == 0);
> +}
> +
> static unsigned long count_thread_siblings(unsigned int cpu)
> {
> unsigned long ret = 0;
> @@ -106,41 +159,7 @@ cleanup:
>
> static int parse_socket(unsigned int cpu)
> {
> - char *path;
> - FILE *pathfp;
> - char socket_str[1024];
> - char *tmp;
> - int socket = -1;
> -
> - if (virAsprintf(&path, CPU_SYS_PATH "/cpu%u/topology/physical_package_id",
> - cpu) < 0) {
> - virReportOOMError();
> - return -1;
> - }
> -
> - pathfp = fopen(path, "r");
> - if (pathfp == NULL) {
> - virReportSystemError(errno, _("cannot open %s"), path);
> - VIR_FREE(path);
> - return -1;
> - }
> -
> - if (fgets(socket_str, sizeof(socket_str), pathfp) == NULL) {
> - virReportSystemError(errno, _("cannot read from %s"), path);
> - goto cleanup;
> - }
> - if (virStrToLong_i(socket_str, &tmp, 10, &socket) < 0) {
> - nodeReportError(VIR_ERR_INTERNAL_ERROR,
> - _("could not convert '%s' to an integer"),
> - socket_str);
> - goto cleanup;
> - }
> -
> -cleanup:
> - fclose(pathfp);
> - VIR_FREE(path);
> -
> - return socket;
> + return get_cpu_value(cpu, "topology/physical_package_id", false);
> }
>
> int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
> @@ -153,6 +172,8 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
> unsigned long cur_threads;
> int socket;
> unsigned long long socket_mask = 0;
> + unsigned int remaining;
> + int online;
>
> nodeinfo->cpus = 0;
> nodeinfo->mhz = 0;
> @@ -222,15 +243,25 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
> /* OK, we've parsed what we can out of /proc/cpuinfo. Get the socket
> * and thread information from /sys
> */
> + remaining = nodeinfo->cpus;
> cpudir = opendir(CPU_SYS_PATH);
> if (cpudir == NULL) {
> virReportSystemError(errno, _("cannot opendir %s"), CPU_SYS_PATH);
> return -1;
> }
> - while ((cpudirent = readdir(cpudir))) {
> + while (errno = 0, remaining && (cpudirent = readdir(cpudir))) {
> if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
> continue;
>
> + online = cpu_online(cpu);
> + if (online < 0) {
> + closedir(cpudir);
> + return -1;
> + }
> + if (!online)
> + continue;
> + remaining--;
> +
> socket = parse_socket(cpu);
> if (socket < 0) {
> closedir(cpudir);
> @@ -249,6 +280,12 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
> if (cur_threads > nodeinfo->threads)
> nodeinfo->threads = cur_threads;
> }
> + if (errno) {
> + virReportSystemError(errno,
> + _("problem reading %s"), CPU_SYS_PATH);
> + closedir(cpudir);
> + return -1;
> + }
>
> closedir(cpudir);
>
ACK, that looks fine !
thanks !
Daniel
--
Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/
daniel at veillard.com | Rpmfind RPM search engine http://rpmfind.net/
http://veillard.com/ | virtualization library http://libvirt.org/
More information about the libvir-list
mailing list