[libvirt] PATCH: Fix redetection of transient QEMU VMs on daemon restarts
Daniel Veillard
veillard at redhat.com
Tue Jun 2 13:55:27 UTC 2009
On Wed, May 27, 2009 at 04:58:44PM +0100, Daniel P. Berrange wrote:
> When the libvirtd daemon starts up, it reads all config files from
> /etc/libvirt/qemu. For each config file loaded, it then probes for
> a pidfile / live status XML config in /var/run/libvirt/qemu.
>
> In retrospect there is an obvious problem with this approach: it misses
> all transient VMs which don't ever have config files in /etc/libvirt/qemu.
> The core goal of this patch, is thus to change the way we deal with startup
> to apply the following logic.
>
> - Scan & load all status files in /var/run/libvirt/qemu for running VMs
> - Reconnect to the monitor for all VMs, and re-detect VCPU threads.
> - Scan & load all inactive configs in /etc/libvirt/qemu
>
> This ensures we always re-detect all inactive and running VMs, whether
> transient or persistent.
>
> It also fixes two other bugs, first we forgot to detect VCPU threads,
> so vCPU affinity functions were broken, and it also re-reserves the MCS
> category with selinux so it is not re-used later.
>
> Finally, if the attempt to reconnect to the QEMU monitor for a running
> VM fails, then we explicitly kill off that VM, rather than just ignoring
> it. This avoids leaving orphaned QEMU processes.
>
> The patch is larger than anticipated, because I moved all the status
Well that's a lot of changes too !
But this sounds good. Maybe we shoudl try some testing like repeating
/etc/init.d/libvirtd start and stop a few hundred times while keeping
some activity and see what's left at the end ...
> file code out of QEMU driver into the generic domain_conf.c file. This
> enables both scanning loops to be done via the single method
> virDomainLoadAllConfigs, just by pasing different directories. I also
> want to re-use this code in the LXC and UML drivers to improve bugs in
> their logic
okay, so there is some general refactoring too.
[...]
> + xmlXPathContextPtr ctxt)
> +{
> + char *tmp = NULL;
> + long val;
> + xmlNodePtr config;
> + xmlNodePtr oldctxt;
I would s/oldctxt/oldnode/ as what is saved is really only the old
XPath current node not the context itself.
[...]
> +char *virDomainObjFormat(virConnectPtr conn,
> + virDomainObjPtr obj,
> + int flags)
> +{
> + char *config_xml = NULL, *xml = NULL;
> + virBuffer buf = VIR_BUFFER_INITIALIZER;
> +
> + virBufferVSprintf(&buf, "<domstatus state='%s' pid='%d'>\n",
> + virDomainStateTypeToString(obj->state),
> + obj->pid);
> + virBufferEscapeString(&buf, " <monitor path='%s'/>\n", obj->monitorpath);
> +
> + if (!(config_xml = virDomainDefFormat(conn,
> + obj->def,
> + flags)))
Hum we are leaking the buffer content here.
> + goto cleanup;
> +
> + virBufferAdd(&buf, config_xml, strlen(config_xml));
> + virBufferAddLit(&buf, "</domstatus>\n");
> +
> + xml = virBufferContentAndReset(&buf);
> +cleanup:
> + VIR_FREE(config_xml);
> + return xml;
> +
> +}
[...]
> +static virDomainObjPtr virDomainLoadStatus(virConnectPtr conn,
> + virCapsPtr caps,
> + virDomainObjListPtr doms,
> + const char *statusDir,
> + const char *name,
> + virDomainLoadConfigNotify notify,
> + void *opaque)
> +{
> + char *statusFile = NULL;
> + virDomainObjPtr obj = NULL;
> + virDomainObjPtr tmp = NULL;
> +
> + if ((statusFile = virDomainConfigFile(conn, statusDir, name)) == NULL)
> + goto error;
> +
> + if (!(obj = virDomainObjParseFile(conn, caps, statusFile)))
> + goto error;
> +
> + tmp = virDomainFindByName(doms, obj->def->name);
> + if (tmp) {
> + virDomainObjUnlock(obj);
> + virDomainReportError(conn, VIR_ERR_INTERNAL_ERROR,
> + _("unexpected domain %s already exists"), obj->def->name);
let's wrap to 80 columns
[...]
> +/*
> + * Open an existing VM's monitor, and re-detect VCPUs
> + * threads
maybe update the comment about the security labels too, especially as
this is a bit arcane.
> + */
> +static int
> +qemuReconnectDomain(struct qemud_driver *driver,
> + virDomainObjPtr obj)
> +{
> + int rc;
> +
> + if ((rc = qemudOpenMonitor(NULL, driver, obj, obj->monitorpath, 1)) != 0) {
> + VIR_ERROR(_("Failed to reconnect monitor for %s: %d\n"),
> + obj->def->name, rc);
> + goto error;
> + }
> +
> + if (qemudDetectVcpuPIDs(NULL, obj) < 0) {
> + goto error;
> + }
> +
> + if (obj->def->seclabel.type == VIR_DOMAIN_SECLABEL_DYNAMIC &&
> + driver->securityDriver &&
> + driver->securityDriver->domainReserveSecurityLabel &&
> + driver->securityDriver->domainReserveSecurityLabel(NULL, obj) < 0)
> + return -1;
> +
> + if (obj->def->id >= driver->nextvmid)
> + driver->nextvmid = obj->def->id + 1;
> +
> + return 0;
> +
> +error:
> + return -1;
> +}
> +
[...]
> @@ -1331,6 +1330,7 @@ static int qemudStartVMDaemon(virConnect
> int pos = -1;
> char ebuf[1024];
> char *pidfile = NULL;
> + int logfile;
>
> struct gemudHookData hookData;
> hookData.conn = conn;
> @@ -1372,7 +1372,7 @@ static int qemudStartVMDaemon(virConnect
> goto cleanup;
> }
>
> - if((vm->logfile = qemudLogFD(conn, driver->logDir, vm->def->name)) < 0)
> + if ((logfile = qemudLogFD(conn, driver->logDir, vm->def->name)) < 0)
> goto cleanup;
>
> emulator = vm->def->emulator;
> @@ -1421,29 +1421,29 @@ static int qemudStartVMDaemon(virConnect
>
> tmp = progenv;
> while (*tmp) {
> - if (safewrite(vm->logfile, *tmp, strlen(*tmp)) < 0)
> + if (safewrite(logfile, *tmp, strlen(*tmp)) < 0)
> VIR_WARN(_("Unable to write envv to logfile: %s\n"),
> virStrerror(errno, ebuf, sizeof ebuf));
> - if (safewrite(vm->logfile, " ", 1) < 0)
> + if (safewrite(logfile, " ", 1) < 0)
> VIR_WARN(_("Unable to write envv to logfile: %s\n"),
> virStrerror(errno, ebuf, sizeof ebuf));
> tmp++;
> }
> tmp = argv;
> while (*tmp) {
> - if (safewrite(vm->logfile, *tmp, strlen(*tmp)) < 0)
> + if (safewrite(logfile, *tmp, strlen(*tmp)) < 0)
> VIR_WARN(_("Unable to write argv to logfile: %s\n"),
> virStrerror(errno, ebuf, sizeof ebuf));
> - if (safewrite(vm->logfile, " ", 1) < 0)
> + if (safewrite(logfile, " ", 1) < 0)
> VIR_WARN(_("Unable to write argv to logfile: %s\n"),
> virStrerror(errno, ebuf, sizeof ebuf));
> tmp++;
> }
> - if (safewrite(vm->logfile, "\n", 1) < 0)
> + if (safewrite(logfile, "\n", 1) < 0)
> VIR_WARN(_("Unable to write argv to logfile: %s\n"),
> virStrerror(errno, ebuf, sizeof ebuf));
>
> - if ((pos = lseek(vm->logfile, 0, SEEK_END)) < 0)
> + if ((pos = lseek(logfile, 0, SEEK_END)) < 0)
> VIR_WARN(_("Unable to seek to end of logfile: %s\n"),
> virStrerror(errno, ebuf, sizeof ebuf));
>
> @@ -1451,7 +1451,7 @@ static int qemudStartVMDaemon(virConnect
> FD_SET(tapfds[i], &keepfd);
>
> ret = virExecDaemonize(conn, argv, progenv, &keepfd, &child,
> - stdin_fd, &vm->logfile, &vm->logfile,
> + stdin_fd, &logfile, &logfile,
> VIR_EXEC_NONBLOCK,
> qemudSecurityHook, &hookData,
> pidfile);
> @@ -1501,7 +1501,7 @@ static int qemudStartVMDaemon(virConnect
> (qemudInitCpus(conn, vm, migrateFrom) < 0) ||
> (qemudInitPasswords(conn, driver, vm) < 0) ||
> (qemudDomainSetMemoryBalloon(conn, vm, vm->def->memory) < 0) ||
> - (qemudSaveDomainStatus(conn, qemu_driver, vm) < 0)) {
> + (virDomainSaveStatus(conn, driver->stateDir, vm) < 0)) {
> qemudShutdownVMDaemon(conn, driver, vm);
> ret = -1;
> /* No need for 'goto cleanup' now since qemudShutdownVMDaemon does enough */
> @@ -1519,10 +1519,8 @@ cleanup:
> vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC &&
> vm->def->graphics[0]->data.vnc.autoport)
> vm->def->graphics[0]->data.vnc.port = -1;
> - if (vm->logfile != -1) {
> - close(vm->logfile);
> - vm->logfile = -1;
> - }
> + if (logfile != -1)
> + close(logfile);
> vm->def->id = -1;
> return -1;
> }
Hum, do we still use vm->logfile field then ? Maybe I didn't see the
place where it was removed from the structure.
Looks good except for the couple of things raised earlier, thanks !
ACK
Daniel
--
Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/
daniel at veillard.com | Rpmfind RPM search engine http://rpmfind.net/
http://veillard.com/ | virtualization library http://libvirt.org/
More information about the libvir-list
mailing list