[libvirt] [PATCH] qemu: allocate network connections sooner during domain startup

Michal Privoznik mprivozn at redhat.com
Tue May 7 09:52:52 UTC 2013


On 06.05.2013 22:16, Laine Stump wrote:
> VFIO device assignment requires a cgroup ACL to be setup for access to
> the /dev/vfio/nn "group" device for any devices that will be assigned
> to a guest. In the case of a host device that is allocated from a
> pool, it was being allocated during qemuBuildCommandLine(), which is
> called by qemuProcessStart() *after* the all-encompassing
> qemuSetupCgroup() is called, meaning that the standard Cgroup ACL
> setup wasn't catching these devices allocated from pools.
> 
> One possible solution was to manually add a single ACL down inside
> qemuBuildCommandLine() when networkAllocateActualDevice() is called,
> but that has two problems: 1) the function that adds the cgroup ACL
> requires a virDomainObjPtr, which isn't available in
> qemuBuildCommandLine(), and 2) we really shouldn't be doing network
> device setup inside qemuBuildCommandLine() anyway.
> 
> Instead, I've created a new function called
> qemuNetworkPrepareDevices() which is called just before
> qemuPrepareHostDevices() during qemuProcessStart() (explanation of
> ordering in the comments), i.e. well before the call to
> qemuSetupCgroup(). To minimize code churn in a patch that will be
> backported to 1.0.5-maint, qemuNetworkPrepareDevices only does
> networkAllocateActualDevice() and the bare amount of setup required
> for type='hostdev network devices.
> 
> Note that some of the code that was previously needed in
> qemuBuildCommandLine() is no longer required when
> networkAllocateActualDevice() is called earlier:
> 
>  * qemuAssignDeviceHostdevAlias() is already done further down in
>    qemuProcessStart().
> 
>  * qemuPrepareHostdevPCIDevices() is called by
>    qemuPrepareHostDevices() which is called after
>    qemuNetworkPrepareDevices() in qemuProcessStart().
> 
> This new function should be moved into a separate qemu_network.c (or
> similarly named) file along with qemuPhysIfaceConnect(),
> qemuNetworkIfaceConnect(), and qemuOpenVhostNet(), and expanded to call
> those functions as well, then the nnets loop in qemuBuildCommandLine() should
> be reduced to only build the commandline string. However, this will
> require storing away an array of tapfd and vhostfd that are needed
> for the commandline, so I would rather do that in a separate patch and
> leave this patch at the minimum to fix the bug.
> ---
>  src/qemu/qemu_command.c | 104 ++++++++++++++++++++++++++----------------------
>  src/qemu/qemu_command.h |   4 +-
>  src/qemu/qemu_process.c |   8 ++++
>  3 files changed, 67 insertions(+), 49 deletions(-)
> 
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index 144620c..5ce97e8 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -457,6 +457,58 @@ qemuOpenVhostNet(virDomainDefPtr def,
>      return 0;
>  }
>  
> +int
> +qemuNetworkPrepareDevices(virDomainDefPtr def)
> +{
> +    int ret = -1;
> +    int ii;
> +
> +    for (ii = 0; ii < def->nnets; ii++) {
> +        virDomainNetDefPtr net = def->nets[ii];
> +        int actualType;
> +
> +        /* If appropriate, grab a physical device from the configured
> +         * network's pool of devices, or resolve bridge device name
> +         * to the one defined in the network definition.
> +         */
> +        if (networkAllocateActualDevice(net) < 0)
> +            goto cleanup;
> +
> +        actualType = virDomainNetGetActualType(net);
> +        if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV &&
> +            net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
> +            /* Each type='hostdev' network device must also have a
> +             * corresponding entry in the hostdevs array. For netdevs
> +             * that are hardcoded as type='hostdev', this is already
> +             * done by the parser, but for those allocated from a
> +             * network / determined at runtime, we need to do it
> +             * separately.
> +             */
> +            virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net);
> +
> +            if (virDomainHostdevFind(def, hostdev, NULL) >= 0) {
> +                virReportError(VIR_ERR_INTERNAL_ERROR,
> +                               _("PCI device %04x:%02x:%02x.%x "
> +                                 "allocated from network %s is already "
> +                                 "in use by domain %s"),
> +                               hostdev->source.subsys.u.pci.addr.domain,
> +                               hostdev->source.subsys.u.pci.addr.bus,
> +                               hostdev->source.subsys.u.pci.addr.slot,
> +                               hostdev->source.subsys.u.pci.addr.function,
> +                               net->data.network.name,
> +                               def->name);
> +                goto cleanup;
> +            }
> +            if (virDomainHostdevInsert(def, hostdev) < 0) {
> +                virReportOOMError();
> +                goto cleanup;
> +            }
> +        }
> +    }
> +    ret = 0;
> +cleanup:
> +    return ret;
> +}
>  
>  static int qemuDomainDeviceAliasIndex(virDomainDeviceInfoPtr info,
>                                        const char *prefix)
> @@ -7106,12 +7158,15 @@ qemuBuildCommandLine(virConnectPtr conn,
>              char vhostfd_name[50] = "";
>              int vlan;
>              int bootindex = bootNet;
> -            int actualType;
> +            int actualType = virDomainNetGetActualType(net);
>  
>              bootNet = 0;
>              if (!bootindex)
>                  bootindex = net->info.bootIndex;
>  

I'd expect a one line comment here at least to give a reason why we are
skipping VIR_DOMAIN_NET_TYPE_HOSTDEV. Something like:
/* hostdev interfaces already handled by qemuNetworkPrepareDevices */

It's obvious now, but if we get later to this code it won't be IMO.

> +            if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV)
> +                continue;
> +
>              /* VLANs are not used with -netdev, so don't record them */
>              if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_NETDEV) &&
>                  virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE))

I actually posted a patch which moves the whole block into a separate
function, which is what we should do:

https://www.redhat.com/archives/libvir-list/2013-April/msg01550.html


> @@ -7119,53 +7174,6 @@ qemuBuildCommandLine(virConnectPtr conn,
>              else
>                  vlan = i;
>  
> -            /* If appropriate, grab a physical device from the configured
> -             * network's pool of devices, or resolve bridge device name
> -             * to the one defined in the network definition.
> -             */
> -            if (networkAllocateActualDevice(net) < 0)
> -               goto error;
> -
> -            actualType = virDomainNetGetActualType(net);
> -            if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
> -                if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
> -                    virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net);
> -                    virDomainHostdevDefPtr found;
> -                    /* For a network with <forward mode='hostdev'>, there is a need to
> -                     * add the newly minted hostdev to the hostdevs array.
> -                     */
> -                    if (qemuAssignDeviceHostdevAlias(def, hostdev,
> -                                                     (def->nhostdevs-1)) < 0) {
> -                        goto error;
> -                    }
> -
> -                    if (virDomainHostdevFind(def, hostdev, &found) < 0) {
> -                        if (virDomainHostdevInsert(def, hostdev) < 0) {
> -                            virReportOOMError();
> -                            goto error;
> -                        }
> -                        if (qemuPrepareHostdevPCIDevices(driver, def->name, def->uuid,
> -                                                         &hostdev, 1) < 0) {
> -                            goto error;
> -                        }
> -                    }
> -                    else {
> -                        virReportError(VIR_ERR_INTERNAL_ERROR,
> -                                       _("PCI device %04x:%02x:%02x.%x "
> -                                         "allocated from network %s is already "
> -                                         "in use by domain %s"),
> -                                       hostdev->source.subsys.u.pci.addr.domain,
> -                                       hostdev->source.subsys.u.pci.addr.bus,
> -                                       hostdev->source.subsys.u.pci.addr.slot,
> -                                       hostdev->source.subsys.u.pci.addr.function,
> -                                       net->data.network.name,
> -                                       def->name);
> -                        goto error;
> -                    }
> -                }
> -                continue;
> -            }
> -
>              if (actualType == VIR_DOMAIN_NET_TYPE_NETWORK ||
>                  actualType == VIR_DOMAIN_NET_TYPE_BRIDGE) {
>                  int tapfd = qemuNetworkIfaceConnect(def, conn, driver, net,
> diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h
> index a706942..e690dee 100644
> --- a/src/qemu/qemu_command.h
> +++ b/src/qemu/qemu_command.h
> @@ -1,7 +1,7 @@
>  /*
>   * qemu_command.h: QEMU command generation
>   *
> - * Copyright (C) 2006-2012 Red Hat, Inc.
> + * Copyright (C) 2006-2013 Red Hat, Inc.
>   * Copyright (C) 2006 Daniel P. Berrange
>   *
>   * This library is free software; you can redistribute it and/or
> @@ -161,6 +161,8 @@ int qemuOpenVhostNet(virDomainDefPtr def,
>                       virQEMUCapsPtr qemuCaps,
>                       int *vhostfd);
>  
> +int qemuNetworkPrepareDevices(virDomainDefPtr def);
> +
>  /*
>   * NB: def->name can be NULL upon return and the caller
>   * *must* decide how to fill in a name in this case
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index 3dd178c..d7b0aee 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -3422,6 +3422,14 @@ int qemuProcessStart(virConnectPtr conn,
>              goto cleanup;
>      }
>  
> +    /* network devices must be "prepared" before hostdevs, because
> +     * setting up a network device might create a new hostdev that
> +     * will need to be setup.
> +     */
> +    VIR_DEBUG("Preparing network devices");

Is it worth mentioning s/network/network host/?

> +    if (qemuNetworkPrepareDevices(vm->def) < 0)
> +       goto cleanup;
> +
>      /* Must be run before security labelling */
>      VIR_DEBUG("Preparing host devices");
>      if (qemuPrepareHostDevices(driver, vm->def, !migrateFrom) < 0)
> 

ACK if you add the comment. The debug message fix is optional.

Michal




More information about the libvir-list mailing list