[libvirt] [PATCH] qemu: allocate network connections sooner during domain startup
Michal Privoznik
mprivozn at redhat.com
Tue May 7 09:52:52 UTC 2013
On 06.05.2013 22:16, Laine Stump wrote:
> VFIO device assignment requires a cgroup ACL to be setup for access to
> the /dev/vfio/nn "group" device for any devices that will be assigned
> to a guest. In the case of a host device that is allocated from a
> pool, it was being allocated during qemuBuildCommandLine(), which is
> called by qemuProcessStart() *after* the all-encompassing
> qemuSetupCgroup() is called, meaning that the standard Cgroup ACL
> setup wasn't catching these devices allocated from pools.
>
> One possible solution was to manually add a single ACL down inside
> qemuBuildCommandLine() when networkAllocateActualDevice() is called,
> but that has two problems: 1) the function that adds the cgroup ACL
> requires a virDomainObjPtr, which isn't available in
> qemuBuildCommandLine(), and 2) we really shouldn't be doing network
> device setup inside qemuBuildCommandLine() anyway.
>
> Instead, I've created a new function called
> qemuNetworkPrepareDevices() which is called just before
> qemuPrepareHostDevices() during qemuProcessStart() (explanation of
> ordering in the comments), i.e. well before the call to
> qemuSetupCgroup(). To minimize code churn in a patch that will be
> backported to 1.0.5-maint, qemuNetworkPrepareDevices only does
> networkAllocateActualDevice() and the bare amount of setup required
> for type='hostdev network devices.
>
> Note that some of the code that was previously needed in
> qemuBuildCommandLine() is no longer required when
> networkAllocateActualDevice() is called earlier:
>
> * qemuAssignDeviceHostdevAlias() is already done further down in
> qemuProcessStart().
>
> * qemuPrepareHostdevPCIDevices() is called by
> qemuPrepareHostDevices() which is called after
> qemuNetworkPrepareDevices() in qemuProcessStart().
>
> This new function should be moved into a separate qemu_network.c (or
> similarly named) file along with qemuPhysIfaceConnect(),
> qemuNetworkIfaceConnect(), and qemuOpenVhostNet(), and expanded to call
> those functions as well, then the nnets loop in qemuBuildCommandLine() should
> be reduced to only build the commandline string. However, this will
> require storing away an array of tapfd and vhostfd that are needed
> for the commandline, so I would rather do that in a separate patch and
> leave this patch at the minimum to fix the bug.
> ---
> src/qemu/qemu_command.c | 104 ++++++++++++++++++++++++++----------------------
> src/qemu/qemu_command.h | 4 +-
> src/qemu/qemu_process.c | 8 ++++
> 3 files changed, 67 insertions(+), 49 deletions(-)
>
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index 144620c..5ce97e8 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -457,6 +457,58 @@ qemuOpenVhostNet(virDomainDefPtr def,
> return 0;
> }
>
> +int
> +qemuNetworkPrepareDevices(virDomainDefPtr def)
> +{
> + int ret = -1;
> + int ii;
> +
> + for (ii = 0; ii < def->nnets; ii++) {
> + virDomainNetDefPtr net = def->nets[ii];
> + int actualType;
> +
> + /* If appropriate, grab a physical device from the configured
> + * network's pool of devices, or resolve bridge device name
> + * to the one defined in the network definition.
> + */
> + if (networkAllocateActualDevice(net) < 0)
> + goto cleanup;
> +
> + actualType = virDomainNetGetActualType(net);
> + if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV &&
> + net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
> + /* Each type='hostdev' network device must also have a
> + * corresponding entry in the hostdevs array. For netdevs
> + * that are hardcoded as type='hostdev', this is already
> + * done by the parser, but for those allocated from a
> + * network / determined at runtime, we need to do it
> + * separately.
> + */
> + virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net);
> +
> + if (virDomainHostdevFind(def, hostdev, NULL) >= 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("PCI device %04x:%02x:%02x.%x "
> + "allocated from network %s is already "
> + "in use by domain %s"),
> + hostdev->source.subsys.u.pci.addr.domain,
> + hostdev->source.subsys.u.pci.addr.bus,
> + hostdev->source.subsys.u.pci.addr.slot,
> + hostdev->source.subsys.u.pci.addr.function,
> + net->data.network.name,
> + def->name);
> + goto cleanup;
> + }
> + if (virDomainHostdevInsert(def, hostdev) < 0) {
> + virReportOOMError();
> + goto cleanup;
> + }
> + }
> + }
> + ret = 0;
> +cleanup:
> + return ret;
> +}
>
> static int qemuDomainDeviceAliasIndex(virDomainDeviceInfoPtr info,
> const char *prefix)
> @@ -7106,12 +7158,15 @@ qemuBuildCommandLine(virConnectPtr conn,
> char vhostfd_name[50] = "";
> int vlan;
> int bootindex = bootNet;
> - int actualType;
> + int actualType = virDomainNetGetActualType(net);
>
> bootNet = 0;
> if (!bootindex)
> bootindex = net->info.bootIndex;
>
I'd expect a one line comment here at least to give a reason why we are
skipping VIR_DOMAIN_NET_TYPE_HOSTDEV. Something like:
/* hostdev interfaces already handled by qemuNetworkPrepareDevices */
It's obvious now, but if we get later to this code it won't be IMO.
> + if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV)
> + continue;
> +
> /* VLANs are not used with -netdev, so don't record them */
> if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_NETDEV) &&
> virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE))
I actually posted a patch which moves the whole block into a separate
function, which is what we should do:
https://www.redhat.com/archives/libvir-list/2013-April/msg01550.html
> @@ -7119,53 +7174,6 @@ qemuBuildCommandLine(virConnectPtr conn,
> else
> vlan = i;
>
> - /* If appropriate, grab a physical device from the configured
> - * network's pool of devices, or resolve bridge device name
> - * to the one defined in the network definition.
> - */
> - if (networkAllocateActualDevice(net) < 0)
> - goto error;
> -
> - actualType = virDomainNetGetActualType(net);
> - if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
> - if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
> - virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net);
> - virDomainHostdevDefPtr found;
> - /* For a network with <forward mode='hostdev'>, there is a need to
> - * add the newly minted hostdev to the hostdevs array.
> - */
> - if (qemuAssignDeviceHostdevAlias(def, hostdev,
> - (def->nhostdevs-1)) < 0) {
> - goto error;
> - }
> -
> - if (virDomainHostdevFind(def, hostdev, &found) < 0) {
> - if (virDomainHostdevInsert(def, hostdev) < 0) {
> - virReportOOMError();
> - goto error;
> - }
> - if (qemuPrepareHostdevPCIDevices(driver, def->name, def->uuid,
> - &hostdev, 1) < 0) {
> - goto error;
> - }
> - }
> - else {
> - virReportError(VIR_ERR_INTERNAL_ERROR,
> - _("PCI device %04x:%02x:%02x.%x "
> - "allocated from network %s is already "
> - "in use by domain %s"),
> - hostdev->source.subsys.u.pci.addr.domain,
> - hostdev->source.subsys.u.pci.addr.bus,
> - hostdev->source.subsys.u.pci.addr.slot,
> - hostdev->source.subsys.u.pci.addr.function,
> - net->data.network.name,
> - def->name);
> - goto error;
> - }
> - }
> - continue;
> - }
> -
> if (actualType == VIR_DOMAIN_NET_TYPE_NETWORK ||
> actualType == VIR_DOMAIN_NET_TYPE_BRIDGE) {
> int tapfd = qemuNetworkIfaceConnect(def, conn, driver, net,
> diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h
> index a706942..e690dee 100644
> --- a/src/qemu/qemu_command.h
> +++ b/src/qemu/qemu_command.h
> @@ -1,7 +1,7 @@
> /*
> * qemu_command.h: QEMU command generation
> *
> - * Copyright (C) 2006-2012 Red Hat, Inc.
> + * Copyright (C) 2006-2013 Red Hat, Inc.
> * Copyright (C) 2006 Daniel P. Berrange
> *
> * This library is free software; you can redistribute it and/or
> @@ -161,6 +161,8 @@ int qemuOpenVhostNet(virDomainDefPtr def,
> virQEMUCapsPtr qemuCaps,
> int *vhostfd);
>
> +int qemuNetworkPrepareDevices(virDomainDefPtr def);
> +
> /*
> * NB: def->name can be NULL upon return and the caller
> * *must* decide how to fill in a name in this case
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index 3dd178c..d7b0aee 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -3422,6 +3422,14 @@ int qemuProcessStart(virConnectPtr conn,
> goto cleanup;
> }
>
> + /* network devices must be "prepared" before hostdevs, because
> + * setting up a network device might create a new hostdev that
> + * will need to be setup.
> + */
> + VIR_DEBUG("Preparing network devices");
Is it worth mentioning s/network/network host/?
> + if (qemuNetworkPrepareDevices(vm->def) < 0)
> + goto cleanup;
> +
> /* Must be run before security labelling */
> VIR_DEBUG("Preparing host devices");
> if (qemuPrepareHostDevices(driver, vm->def, !migrateFrom) < 0)
>
ACK if you add the comment. The debug message fix is optional.
Michal
More information about the libvir-list
mailing list