[libvirt] [PATCHv4 3/4] qemu: Adapt qemuBuildInterfaceCommandLine to to multiqueue net
Laine Stump
laine at laine.org
Tue May 21 16:37:51 UTC 2013
On 05/21/2013 10:18 AM, Michal Privoznik wrote:
> In order to learn libvirt multiqueue several things must be done:
>
> 1) The '/dev/net/tun' device needs to be opened multiple times with
> IFF_MULTI_QUEUE flag passed to ioctl(fd, TUNSETIFF, &ifr);
>
> 2) Similar, the '/dev/vhost-net' must be opened as many times as in 1)
s/Similar, the/Similarly,/
> in order to keep 1:1 ratio recommended by qemu and kernel folks.
>
> 3) The command line construction code needs to switch from 'fd=X' to
> 'fds=X:Y:...:Z' and from 'vhostfd=X' to 'vhostfds=X:Y:...:Z'.
>
> 4) The monitor handling code needs to learn to pass multiple FDs.
> ---
> src/network/bridge_driver.c | 2 +-
> src/qemu/qemu_command.c | 260 ++++++++++++++++++++++++++++++--------------
> src/qemu/qemu_command.h | 13 ++-
> src/qemu/qemu_hotplug.c | 98 ++++++++++++-----
> src/qemu/qemu_monitor.c | 78 +++++++------
> src/qemu/qemu_monitor.h | 8 +-
> src/uml/uml_conf.c | 5 +-
> src/util/virnetdevtap.c | 113 ++++++++++---------
> src/util/virnetdevtap.h | 2 +
> 9 files changed, 378 insertions(+), 201 deletions(-)
>
> diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c
> index 6c6ce6d..ad4ab00 100644
> --- a/src/network/bridge_driver.c
> +++ b/src/network/bridge_driver.c
> @@ -2495,7 +2495,7 @@ networkStartNetworkVirtual(struct network_driver *driver,
> /* Keep tun fd open and interface up to allow for IPv6 DAD to happen */
> if (virNetDevTapCreateInBridgePort(network->def->bridge,
> &macTapIfName, &network->def->mac,
> - NULL, &tapfd, NULL, NULL,
> + NULL, &tapfd, 1, NULL, NULL,
> VIR_NETDEV_TAP_CREATE_USE_MAC_FOR_BRIDGE |
> VIR_NETDEV_TAP_CREATE_IFUP |
> VIR_NETDEV_TAP_CREATE_PERSIST) < 0) {
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index 6f4028e..7059b08 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -281,11 +281,12 @@ qemuNetworkIfaceConnect(virDomainDefPtr def,
> virConnectPtr conn,
> virQEMUDriverPtr driver,
> virDomainNetDefPtr net,
> - virQEMUCapsPtr qemuCaps)
> + virQEMUCapsPtr qemuCaps,
> + int *tapfd,
> + int *tapfdSize)
> {
> char *brname = NULL;
> - int err;
> - int tapfd = -1;
> + int ret = -1;
> unsigned int tap_create_flags = VIR_NETDEV_TAP_CREATE_IFUP;
> bool template_ifname = false;
> int actualType = virDomainNetGetActualType(net);
> @@ -297,7 +298,7 @@ qemuNetworkIfaceConnect(virDomainDefPtr def,
> virNetworkPtr network = virNetworkLookupByName(conn,
> net->data.network.name);
> if (!network)
> - return -1;
> + return ret;
>
> active = virNetworkIsActive(network);
> if (active != 1) {
> @@ -322,18 +323,18 @@ qemuNetworkIfaceConnect(virDomainDefPtr def,
> virFreeError(errobj);
>
> if (fail)
> - return -1;
> + return ret;
>
> } else if (actualType == VIR_DOMAIN_NET_TYPE_BRIDGE) {
> if (!(brname = strdup(virDomainNetGetActualBridgeName(net)))) {
> virReportOOMError();
> - return -1;
> + return ret;
> }
> } else {
> virReportError(VIR_ERR_INTERNAL_ERROR,
> _("Network type %d is not supported"),
> virDomainNetGetActualType(net));
> - return -1;
> + return ret;
> }
>
> if (!net->ifname ||
> @@ -353,69 +354,95 @@ qemuNetworkIfaceConnect(virDomainDefPtr def,
> tap_create_flags |= VIR_NETDEV_TAP_CREATE_VNET_HDR;
> }
>
> - if (cfg->privileged)
> - err = virNetDevTapCreateInBridgePort(brname, &net->ifname, &net->mac,
> - def->uuid, &tapfd,
> - virDomainNetGetActualVirtPortProfile(net),
> - virDomainNetGetActualVlan(net),
> - tap_create_flags);
> - else
> - err = qemuCreateInBridgePortWithHelper(cfg, brname,
> - &net->ifname,
> - &tapfd, tap_create_flags);
> -
> - virDomainAuditNetDevice(def, net, "/dev/net/tun", tapfd >= 0);
> - if (err < 0) {
> - if (template_ifname)
> - VIR_FREE(net->ifname);
> - tapfd = -1;
> + if (cfg->privileged) {
> + if (virNetDevTapCreateInBridgePort(brname, &net->ifname, &net->mac,
> + def->uuid, tapfd, *tapfdSize,
> + virDomainNetGetActualVirtPortProfile(net),
> + virDomainNetGetActualVlan(net),
> + tap_create_flags) < 0) {
> + virDomainAuditNetDevice(def, net, "/dev/net/tun", false);
> + goto cleanup;
> + }
> + } else {
> + if (qemuCreateInBridgePortWithHelper(cfg, brname,
> + &net->ifname,
> + tapfd, tap_create_flags) < 0) {
> + virDomainAuditNetDevice(def, net, "/dev/net/tun", false);
> + goto cleanup;
> + }
> + /* qemuCreateInBridgePortWithHelper can only create a single FD */
> + *tapfdSize = 1;
Should we let this slide by silently? Or should we log an error and fail?
> }
>
> - if (cfg->macFilter) {
> - if ((err = networkAllowMacOnPort(driver, net->ifname, &net->mac))) {
> - virReportSystemError(err,
> - _("failed to add ebtables rule to allow MAC address on '%s'"),
> - net->ifname);
> - }
> + virDomainAuditNetDevice(def, net, "/dev/net/tun", true);
> +
> + if (cfg->macFilter &&
> + (ret = networkAllowMacOnPort(driver, net->ifname, &net->mac)) < 0) {
> + virReportSystemError(ret,
> + _("failed to add ebtables rule "
> + "to allow MAC address on '%s'"),
> + net->ifname);
I know this is existing code, but it's strange that this reports an
error then just continues.
> }
>
> - if (tapfd >= 0 &&
> - virNetDevBandwidthSet(net->ifname,
> + if (virNetDevBandwidthSet(net->ifname,
> virDomainNetGetActualBandwidth(net),
> false) < 0) {
> virReportError(VIR_ERR_INTERNAL_ERROR,
> _("cannot set bandwidth limits on %s"),
> net->ifname);
> - VIR_FORCE_CLOSE(tapfd);
> goto cleanup;
> }
>
> - if (tapfd >= 0) {
> - if ((net->filter) && (net->ifname)) {
> - if (virDomainConfNWFilterInstantiate(conn, def->uuid, net) < 0)
> - VIR_FORCE_CLOSE(tapfd);
> - }
> - }
> + if (net->filter && net->ifname &&
> + virDomainConfNWFilterInstantiate(conn, def->uuid, net) < 0)
> + goto cleanup;
I may be in the minority here, but I like putting braces around the body
when the conditional takes multiple lines too.
> +
> +
> + ret = 0;
>
> cleanup:
> + if (ret < 0) {
> + int i;
> + for (i = 0; i < *tapfdSize; i++)
> + VIR_FORCE_CLOSE(tapfd[i]);
> + if (template_ifname)
> + VIR_FREE(net->ifname);
> + }
> VIR_FREE(brname);
> virObjectUnref(cfg);
>
> - return tapfd;
> + return ret;
> }
>
>
> +/**
> + * qemuOpenVhostNet:
> + * @def: domain definition
> + * @net: network definition
> + * @qemuCaps: qemu binary capabilities
> + * @vhostfd: array of opened vhost-net device
> + * @vhostfdSize: number of file descriptors in @vhostfd array
> + *
> + * Open vhost-net, multiple times - if requested.
> + * In case, no vhost-net is needed, @vhostfdSize is set to 0
> + * and 0 is returned.
> + *
> + * Returns: 0 on success
> + * -1 on failure
> + */
> int
> qemuOpenVhostNet(virDomainDefPtr def,
> virDomainNetDefPtr net,
> virQEMUCapsPtr qemuCaps,
> - int *vhostfd)
> + int *vhostfd,
> + int *vhostfdSize)
> {
> - *vhostfd = -1; /* assume we won't use vhost */
> + int i;
>
> /* If the config says explicitly to not use vhost, return now */
> if (net->driver.virtio.name == VIR_DOMAIN_NET_BACKEND_TYPE_QEMU) {
> - return 0;
> + *vhostfdSize = 0;
> + return 0;
> }
>
> /* If qemu doesn't support vhost-net mode (including the -netdev command
> @@ -430,6 +457,7 @@ qemuOpenVhostNet(virDomainDefPtr def,
> "this QEMU binary"));
> return -1;
> }
> + *vhostfdSize = 0;
> return 0;
> }
>
> @@ -441,23 +469,34 @@ qemuOpenVhostNet(virDomainDefPtr def,
> "virtio network interfaces"));
> return -1;
> }
> + *vhostfdSize = 0;
> return 0;
> }
>
> - *vhostfd = open("/dev/vhost-net", O_RDWR);
> - virDomainAuditNetDevice(def, net, "/dev/vhost-net", *vhostfd >= 0);
> + for (i = 0; i < *vhostfdSize; i++) {
> + vhostfd[i] = open("/dev/vhost-net", O_RDWR);
>
> - /* If the config says explicitly to use vhost and we couldn't open it,
> - * report an error.
> - */
> - if ((*vhostfd < 0) &&
> - (net->driver.virtio.name == VIR_DOMAIN_NET_BACKEND_TYPE_VHOST)) {
> - virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> - "%s", _("vhost-net was requested for an interface, "
> - "but is unavailable"));
> - return -1;
> + /* If the config says explicitly to use vhost and we couldn't open it,
> + * report an error.
> + */
> + if (vhostfd[i] < 0) {
> + virDomainAuditNetDevice(def, net, "/dev/vhost-net", false);
> + if (net->driver.virtio.name == VIR_DOMAIN_NET_BACKEND_TYPE_VHOST) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + "%s", _("vhost-net was requested for an interface, "
> + "but is unavailable"));
> + goto error;
> + }
> + }
> }
> + virDomainAuditNetDevice(def, net, "/dev/vhost-net", *vhostfdSize);
> return 0;
> +
> +error:
> + while (i--)
> + VIR_FORCE_CLOSE(vhostfd[i]);
> +
> + return -1;
> }
>
> int
> @@ -4109,13 +4148,16 @@ qemuBuildHostNetStr(virDomainNetDefPtr net,
> virQEMUDriverPtr driver,
> char type_sep,
> int vlan,
> - const char *tapfd,
> - const char *vhostfd)
> + char **tapfd,
> + int tapfdSize,
> + char **vhostfd,
> + int vhostfdSize)
> {
> bool is_tap = false;
> virBuffer buf = VIR_BUFFER_INITIALIZER;
> enum virDomainNetType netType = virDomainNetGetActualType(net);
> virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
> + int i;
>
> if (net->script && netType != VIR_DOMAIN_NET_TYPE_ETHERNET) {
> virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> @@ -4134,7 +4176,19 @@ qemuBuildHostNetStr(virDomainNetDefPtr net,
> case VIR_DOMAIN_NET_TYPE_BRIDGE:
> case VIR_DOMAIN_NET_TYPE_NETWORK:
> case VIR_DOMAIN_NET_TYPE_DIRECT:
> - virBufferAsprintf(&buf, "tap%cfd=%s", type_sep, tapfd);
> + virBufferAsprintf(&buf, "tap%c", type_sep);
> + /* for one tapfd 'fd=' shall be used,
> + * for more than one 'fds=' is the right choice */
> + if (tapfdSize == 1) {
> + virBufferAsprintf(&buf, "fd=%s", tapfd[0]);
> + } else {
> + virBufferAddLit(&buf, "fds=");
> + for (i = 0; i < tapfdSize; i++) {
> + if (i)
> + virBufferAddChar(&buf, ':');
> + virBufferAdd(&buf, tapfd[i], -1);
> + }
> + }
> type_sep = ',';
> is_tap = true;
> break;
> @@ -4194,8 +4248,19 @@ qemuBuildHostNetStr(virDomainNetDefPtr net,
> }
>
> if (is_tap) {
> - if (vhostfd && *vhostfd)
> - virBufferAsprintf(&buf, ",vhost=on,vhostfd=%s", vhostfd);
> + if (vhostfdSize) {
> + virBufferAddLit(&buf, ",vhost=on,");
> + if (vhostfdSize == 1) {
> + virBufferAsprintf(&buf, "vhostfd=%s", vhostfd[0]);
> + } else {
> + virBufferAddLit(&buf, "vhostfds=");
> + for (i = 0; i < vhostfdSize; i++) {
> + if (i)
> + virBufferAddChar(&buf, ':');
> + virBufferAdd(&buf, vhostfd[i], -1);
> + }
> + }
> + }
> if (net->tune.sndbuf_specified)
> virBufferAsprintf(&buf, ",sndbuf=%lu", net->tune.sndbuf);
> }
> @@ -6418,12 +6483,15 @@ qemuBuildInterfaceCommandLine(virCommandPtr cmd,
> enum virNetDevVPortProfileOp vmop)
> {
> int ret = -1;
> - int tapfd = -1;
> - int vhostfd = -1;
> char *nic = NULL, *host = NULL;
> - char *tapfdName = NULL;
> - char *vhostfdName = NULL;
> + int *tapfd = NULL;
> + int tapfdSize = 0;
> + int *vhostfd = NULL;
> + int vhostfdSize = 0;
> + char **tapfdName = NULL;
> + char **vhostfdName = NULL;
> int actualType = virDomainNetGetActualType(net);
> + int i;
>
> if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
> /* NET_TYPE_HOSTDEV devices are really hostdev devices, so
> @@ -6437,12 +6505,24 @@ qemuBuildInterfaceCommandLine(virCommandPtr cmd,
>
> if (actualType == VIR_DOMAIN_NET_TYPE_NETWORK ||
> actualType == VIR_DOMAIN_NET_TYPE_BRIDGE) {
> - tapfd = qemuNetworkIfaceConnect(def, conn, driver, net, qemuCaps);
> - if (tapfd < 0)
> + if (VIR_ALLOC(tapfd) < 0 || VIR_ALLOC(tapfdName) < 0) {
> + virReportOOMError();
> + goto cleanup;
> + }
> +
> + tapfdSize = 1;
> + if (qemuNetworkIfaceConnect(def, conn, driver, net,
> + qemuCaps, tapfd, &tapfdSize) < 0)
> goto cleanup;
> } else if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT) {
> - tapfd = qemuPhysIfaceConnect(def, driver, net, qemuCaps, vmop);
> - if (tapfd < 0)
> + if (VIR_ALLOC(tapfd) < 0 || VIR_ALLOC(tapfdName) < 0) {
> + virReportOOMError();
> + goto cleanup;
> + }
> + tapfdSize = 1;
> + tapfd[0] = qemuPhysIfaceConnect(def, driver, net,
> + qemuCaps, vmop);
> + if (tapfd[0] < 0)
> goto cleanup;
> }
>
> @@ -6452,23 +6532,31 @@ qemuBuildInterfaceCommandLine(virCommandPtr cmd,
> actualType == VIR_DOMAIN_NET_TYPE_DIRECT) {
> /* Attempt to use vhost-net mode for these types of
> network device */
> - if (qemuOpenVhostNet(def, net, qemuCaps, &vhostfd) < 0)
> + if (VIR_ALLOC(vhostfd) < 0 || VIR_ALLOC(vhostfdName)) {
> + virReportOOMError();
> + goto cleanup;
> + }
> + vhostfdSize = 1;
> +
> + if (qemuOpenVhostNet(def, net, qemuCaps, vhostfd, &vhostfdSize) < 0)
> goto cleanup;
> }
>
> - if (tapfd >= 0) {
> - virCommandTransferFD(cmd, tapfd);
> - if (virAsprintf(&tapfdName, "%d", tapfd) < 0) {
> + for (i = 0; i < tapfdSize; i++) {
> + virCommandTransferFD(cmd, tapfd[i]);
> + if (virAsprintf(&tapfdName[i], "%d", tapfd[i]) < 0) {
> virReportOOMError();
> goto cleanup;
> }
> }
>
> - if (vhostfd >= 0) {
> - virCommandTransferFD(cmd, vhostfd);
> - if (virAsprintf(&vhostfdName, "%d", vhostfd) < 0) {
> - virReportOOMError();
> - goto cleanup;
> + for (i = 0; i < vhostfdSize; i++) {
> + if (vhostfd[i] >= 0) {
> + virCommandTransferFD(cmd, vhostfd[i]);
> + if (virAsprintf(&vhostfdName[i], "%d", vhostfd[i]) < 0) {
> + virReportOOMError();
> + goto cleanup;
> + }
> }
> }
>
> @@ -6483,8 +6571,9 @@ qemuBuildInterfaceCommandLine(virCommandPtr cmd,
> if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_NETDEV) &&
> virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE)) {
> if (!(host = qemuBuildHostNetStr(net, driver,
> - ',', vlan, tapfdName,
> - vhostfdName)))
> + ',', vlan,
> + tapfdName, tapfdSize,
> + vhostfdName, vhostfdSize)))
> goto cleanup;
> virCommandAddArgList(cmd, "-netdev", host, NULL);
> }
> @@ -6500,8 +6589,9 @@ qemuBuildInterfaceCommandLine(virCommandPtr cmd,
> if (!(virQEMUCapsGet(qemuCaps, QEMU_CAPS_NETDEV) &&
> virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE))) {
> if (!(host = qemuBuildHostNetStr(net, driver,
> - ',', vlan, tapfdName,
> - vhostfdName)))
> + ',', vlan,
> + tapfdName, tapfdSize,
> + vhostfdName, vhostfdSize)))
> goto cleanup;
> virCommandAddArgList(cmd, "-net", host, NULL);
> }
> @@ -6510,6 +6600,18 @@ qemuBuildInterfaceCommandLine(virCommandPtr cmd,
> cleanup:
> if (ret < 0)
> virDomainConfNWFilterTeardown(net);
> + for (i = 0; i < tapfdSize; i++) {
> + if (ret < 0)
> + VIR_FORCE_CLOSE(tapfd[i]);
> + VIR_FREE(tapfdName[i]);
> + }
> + for (i = 0; i < vhostfdSize; i++) {
> + if (ret < 0)
> + VIR_FORCE_CLOSE(vhostfd[i]);
> + VIR_FREE(vhostfdName[i]);
> + }
> + VIR_FREE(tapfd);
> + VIR_FREE(vhostfd);
> VIR_FREE(nic);
> VIR_FREE(host);
> VIR_FREE(tapfdName);
> diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h
> index 133e0b2..2993448 100644
> --- a/src/qemu/qemu_command.h
> +++ b/src/qemu/qemu_command.h
> @@ -87,8 +87,10 @@ char * qemuBuildHostNetStr(virDomainNetDefPtr net,
> virQEMUDriverPtr driver,
> char type_sep,
> int vlan,
> - const char *tapfd,
> - const char *vhostfd);
> + char **tapfd,
> + int tapfdSize,
> + char **vhostfd,
> + int vhostfdSize);
>
> /* Legacy, pre device support */
> char * qemuBuildNicStr(virDomainNetDefPtr net,
> @@ -169,7 +171,9 @@ int qemuNetworkIfaceConnect(virDomainDefPtr def,
> virConnectPtr conn,
> virQEMUDriverPtr driver,
> virDomainNetDefPtr net,
> - virQEMUCapsPtr qemuCaps)
> + virQEMUCapsPtr qemuCaps,
> + int *tapfd,
> + int *tapfdSize)
> ATTRIBUTE_NONNULL(2);
>
> int qemuPhysIfaceConnect(virDomainDefPtr def,
> @@ -181,7 +185,8 @@ int qemuPhysIfaceConnect(virDomainDefPtr def,
> int qemuOpenVhostNet(virDomainDefPtr def,
> virDomainNetDefPtr net,
> virQEMUCapsPtr qemuCaps,
> - int *vhostfd);
> + int *vhostfd,
> + int *vhostfdSize);
>
> int qemuNetworkPrepareDevices(virDomainDefPtr def);
>
> diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
> index 88c3a6c..7e50592 100644
> --- a/src/qemu/qemu_hotplug.c
> +++ b/src/qemu/qemu_hotplug.c
> @@ -690,10 +690,12 @@ int qemuDomainAttachNetDevice(virConnectPtr conn,
> virDomainNetDefPtr net)
> {
> qemuDomainObjPrivatePtr priv = vm->privateData;
> - char *tapfd_name = NULL;
> - int tapfd = -1;
> - char *vhostfd_name = NULL;
> - int vhostfd = -1;
> + char **tapfdName = NULL;
> + int *tapfd = NULL;
> + int tapfdSize = 0;
> + char **vhostfdName = NULL;
> + int *vhostfd = NULL;
> + int vhostfdSize = 0;
> char *nicstr = NULL;
> char *netstr = NULL;
> virNetDevVPortProfilePtr vport = NULL;
> @@ -704,6 +706,7 @@ int qemuDomainAttachNetDevice(virConnectPtr conn,
> bool iface_connected = false;
> int actualType;
> virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
> + int i;
>
> /* preallocate new slot for device */
> if (VIR_REALLOC_N(vm->def->nets, vm->def->nnets+1) < 0) {
> @@ -739,22 +742,37 @@ int qemuDomainAttachNetDevice(virConnectPtr conn,
>
> if (actualType == VIR_DOMAIN_NET_TYPE_BRIDGE ||
> actualType == VIR_DOMAIN_NET_TYPE_NETWORK) {
> - if ((tapfd = qemuNetworkIfaceConnect(vm->def, conn, driver, net,
> - priv->qemuCaps)) < 0)
> + if (VIR_ALLOC(tapfd) < 0 || VIR_ALLOC(vhostfd) < 0) {
> + virReportOOMError();
> + goto cleanup;
> + }
> + tapfdSize = vhostfdSize = 1;
> + if (qemuNetworkIfaceConnect(vm->def, conn, driver, net,
> + priv->qemuCaps, tapfd, &tapfdSize) < 0)
> goto cleanup;
> iface_connected = true;
> - if (qemuOpenVhostNet(vm->def, net, priv->qemuCaps, &vhostfd) < 0)
> + if (qemuOpenVhostNet(vm->def, net, priv->qemuCaps, vhostfd, &vhostfdSize) < 0)
> goto cleanup;
> } else if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT) {
> - if ((tapfd = qemuPhysIfaceConnect(vm->def, driver, net,
> - priv->qemuCaps,
> - VIR_NETDEV_VPORT_PROFILE_OP_CREATE)) < 0)
> + if (VIR_ALLOC(tapfd) < 0 || VIR_ALLOC(vhostfd) < 0) {
> + virReportOOMError();
> + goto cleanup;
> + }
> + tapfdSize = vhostfdSize = 1;
> + if ((tapfd[0] = qemuPhysIfaceConnect(vm->def, driver, net,
> + priv->qemuCaps,
> + VIR_NETDEV_VPORT_PROFILE_OP_CREATE)) < 0)
> goto cleanup;
> iface_connected = true;
> - if (qemuOpenVhostNet(vm->def, net, priv->qemuCaps, &vhostfd) < 0)
> + if (qemuOpenVhostNet(vm->def, net, priv->qemuCaps, vhostfd, &vhostfdSize) < 0)
> goto cleanup;
> } else if (actualType == VIR_DOMAIN_NET_TYPE_ETHERNET) {
> - if (qemuOpenVhostNet(vm->def, net, priv->qemuCaps, &vhostfd) < 0)
> + if (VIR_ALLOC(vhostfd) < 0) {
> + virReportOOMError();
> + goto cleanup;
> + }
> + vhostfdSize = 1;
> + if (qemuOpenVhostNet(vm->def, net, priv->qemuCaps, vhostfd, &vhostfdSize) < 0)
> goto cleanup;
> }
>
> @@ -792,41 +810,51 @@ int qemuDomainAttachNetDevice(virConnectPtr conn,
> }
> }
>
> - if (tapfd != -1) {
> - if (virAsprintf(&tapfd_name, "fd-%s", net->info.alias) < 0)
> + if (VIR_ALLOC_N(tapfdName, tapfdSize) < 0 ||
> + VIR_ALLOC_N(vhostfdName, vhostfdSize) < 0) {
> + virReportOOMError();
> + goto cleanup;
> + }
> +
> + for (i = 0; i < tapfdSize; i++) {
> + if (virAsprintf(&tapfdName[i], "fd-%s%d", net->info.alias, i) < 0)
> goto no_memory;
> }
>
> - if (vhostfd != -1) {
> - if (virAsprintf(&vhostfd_name, "vhostfd-%s", net->info.alias) < 0)
> + for (i = 0; i < vhostfdSize; i++) {
> + if (virAsprintf(&vhostfdName[i], "vhostfd-%s%d", net->info.alias, i) < 0)
> goto no_memory;
> }
>
> if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_NETDEV) &&
> virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE)) {
> if (!(netstr = qemuBuildHostNetStr(net, driver,
> - ',', -1, tapfd_name,
> - vhostfd_name)))
> + ',', -1,
> + tapfdName, tapfdSize,
> + vhostfdName, vhostfdSize)))
> goto cleanup;
> } else {
> if (!(netstr = qemuBuildHostNetStr(net, driver,
> - ' ', vlan, tapfd_name,
> - vhostfd_name)))
> + ' ', vlan,
> + tapfdName, tapfdSize,
> + vhostfdName, vhostfdSize)))
> goto cleanup;
> }
>
> qemuDomainObjEnterMonitor(driver, vm);
> if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_NETDEV) &&
> virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE)) {
> - if (qemuMonitorAddNetdev(priv->mon, netstr, tapfd, tapfd_name,
> - vhostfd, vhostfd_name) < 0) {
> + if (qemuMonitorAddNetdev(priv->mon, netstr,
> + tapfd, tapfdName, tapfdSize,
> + vhostfd, vhostfdName, vhostfdSize) < 0) {
> qemuDomainObjExitMonitor(driver, vm);
> virDomainAuditNet(vm, NULL, net, "attach", false);
> goto cleanup;
> }
> } else {
> - if (qemuMonitorAddHostNetwork(priv->mon, netstr, tapfd, tapfd_name,
> - vhostfd, vhostfd_name) < 0) {
> + if (qemuMonitorAddHostNetwork(priv->mon, netstr,
> + tapfd, tapfdName, tapfdSize,
> + vhostfd, vhostfdName, vhostfdSize) < 0) {
> qemuDomainObjExitMonitor(driver, vm);
> virDomainAuditNet(vm, NULL, net, "attach", false);
> goto cleanup;
> @@ -834,8 +862,10 @@ int qemuDomainAttachNetDevice(virConnectPtr conn,
> }
> qemuDomainObjExitMonitor(driver, vm);
>
> - VIR_FORCE_CLOSE(tapfd);
> - VIR_FORCE_CLOSE(vhostfd);
> + for (i = 0; i < tapfdSize; i++)
> + VIR_FORCE_CLOSE(tapfd[i]);
> + for (i = 0; i < vhostfdSize; i++)
> + VIR_FORCE_CLOSE(vhostfd[i]);
>
> if (!virDomainObjIsActive(vm)) {
> virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> @@ -931,10 +961,18 @@ cleanup:
>
> VIR_FREE(nicstr);
> VIR_FREE(netstr);
> - VIR_FREE(tapfd_name);
> - VIR_FORCE_CLOSE(tapfd);
> - VIR_FREE(vhostfd_name);
> - VIR_FORCE_CLOSE(vhostfd);
> + for (i = 0; i < tapfdSize; i++) {
> + VIR_FORCE_CLOSE(tapfd[i]);
> + VIR_FREE(tapfdName[i]);
> + }
> + VIR_FREE(tapfd);
> + VIR_FREE(tapfdName);
> + for (i = 0; i < vhostfdSize; i++) {
> + VIR_FORCE_CLOSE(vhostfd[i]);
> + VIR_FREE(vhostfdName[i]);
> + }
> + VIR_FREE(vhostfd);
> + VIR_FREE(vhostfdName);
> virObjectUnref(cfg);
>
> return ret;
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index f6d8ef4..524eb9d 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -2477,14 +2477,16 @@ cleanup:
>
> int qemuMonitorAddHostNetwork(qemuMonitorPtr mon,
> const char *netstr,
> - int tapfd, const char *tapfd_name,
> - int vhostfd, const char *vhostfd_name)
> + int *tapfd, char **tapfdName, int tapfdSize,
> + int *vhostfd, char **vhostfdName, int vhostfdSize)
> {
> int ret = -1;
> - VIR_DEBUG("mon=%p netstr=%s tapfd=%d tapfd_name=%s "
> - "vhostfd=%d vhostfd_name=%s",
> - mon, netstr, tapfd, NULLSTR(tapfd_name),
> - vhostfd, NULLSTR(vhostfd_name));
> + int i = 0, j = 0;
> +
> + VIR_DEBUG("mon=%p netstr=%s tapfd=%p tapfdName=%p tapfdSize=%d "
> + "vhostfd=%p vhostfdName=%p vhostfdSize=%d",
> + mon, netstr, tapfd, tapfdName, tapfdSize,
> + vhostfd, vhostfdName, vhostfdSize);
>
> if (!mon) {
> virReportError(VIR_ERR_INVALID_ARG, "%s",
> @@ -2492,12 +2494,13 @@ int qemuMonitorAddHostNetwork(qemuMonitorPtr mon,
> return -1;
> }
>
> - if (tapfd >= 0 && qemuMonitorSendFileHandle(mon, tapfd_name, tapfd) < 0)
> - return -1;
> - if (vhostfd >= 0 &&
> - qemuMonitorSendFileHandle(mon, vhostfd_name, vhostfd) < 0) {
> - vhostfd = -1;
> - goto cleanup;
> + for (i = 0; i < tapfdSize; i++) {
> + if (qemuMonitorSendFileHandle(mon, tapfdName[i], tapfd[i]) < 0)
> + goto cleanup;
> + }
> + for (j = 0; j < vhostfdSize; j++) {
> + if (qemuMonitorSendFileHandle(mon, vhostfdName[j], vhostfd[j]) < 0)
> + goto cleanup;
> }
>
> if (mon->json)
> @@ -2508,10 +2511,14 @@ int qemuMonitorAddHostNetwork(qemuMonitorPtr mon,
>
> cleanup:
> if (ret < 0) {
> - if (tapfd >= 0 && qemuMonitorCloseFileHandle(mon, tapfd_name) < 0)
> - VIR_WARN("failed to close device handle '%s'", tapfd_name);
> - if (vhostfd >= 0 && qemuMonitorCloseFileHandle(mon, vhostfd_name) < 0)
> - VIR_WARN("failed to close device handle '%s'", vhostfd_name);
> + while (i--) {
> + if (qemuMonitorCloseFileHandle(mon, tapfdName[i]) < 0)
> + VIR_WARN("failed to close device handle '%s'", tapfdName[i]);
> + }
> + while (j--) {
> + if (qemuMonitorCloseFileHandle(mon, vhostfdName[j]) < 0)
> + VIR_WARN("failed to close device handle '%s'", vhostfdName[j]);
> + }
> }
>
> return ret;
> @@ -2543,14 +2550,16 @@ int qemuMonitorRemoveHostNetwork(qemuMonitorPtr mon,
>
> int qemuMonitorAddNetdev(qemuMonitorPtr mon,
> const char *netdevstr,
> - int tapfd, const char *tapfd_name,
> - int vhostfd, const char *vhostfd_name)
> + int *tapfd, char **tapfdName, int tapfdSize,
> + int *vhostfd, char **vhostfdName, int vhostfdSize)
> {
> int ret = -1;
> - VIR_DEBUG("mon=%p netdevstr=%s tapfd=%d tapfd_name=%s "
> - "vhostfd=%d vhostfd_name=%s",
> - mon, netdevstr, tapfd, NULLSTR(tapfd_name),
> - vhostfd, NULLSTR(vhostfd_name));
> + int i = 0, j = 0;
> +
> + VIR_DEBUG("mon=%p netdevstr=%s tapfd=%p tapfdName=%p tapfdSize=%d"
> + "vhostfd=%p vhostfdName=%p vhostfdSize=%d",
> + mon, netdevstr, tapfd, tapfdName, tapfdSize,
> + vhostfd, vhostfdName, tapfdSize);
>
> if (!mon) {
> virReportError(VIR_ERR_INVALID_ARG, "%s",
> @@ -2558,12 +2567,13 @@ int qemuMonitorAddNetdev(qemuMonitorPtr mon,
> return -1;
> }
>
> - if (tapfd >= 0 && qemuMonitorSendFileHandle(mon, tapfd_name, tapfd) < 0)
> - return -1;
> - if (vhostfd >= 0 &&
> - qemuMonitorSendFileHandle(mon, vhostfd_name, vhostfd) < 0) {
> - vhostfd = -1;
> - goto cleanup;
> + for (i = 0; i < tapfdSize; i++) {
> + if (qemuMonitorSendFileHandle(mon, tapfdName[i], tapfd[i]) < 0)
> + goto cleanup;
> + }
> + for (j = 0; j < vhostfdSize; j++) {
> + if (qemuMonitorSendFileHandle(mon, vhostfdName[j], vhostfd[j]) < 0)
> + goto cleanup;
> }
>
> if (mon->json)
> @@ -2573,10 +2583,14 @@ int qemuMonitorAddNetdev(qemuMonitorPtr mon,
>
> cleanup:
> if (ret < 0) {
> - if (tapfd >= 0 && qemuMonitorCloseFileHandle(mon, tapfd_name) < 0)
> - VIR_WARN("failed to close device handle '%s'", tapfd_name);
> - if (vhostfd >= 0 && qemuMonitorCloseFileHandle(mon, vhostfd_name) < 0)
> - VIR_WARN("failed to close device handle '%s'", vhostfd_name);
> + while (i--) {
> + if (qemuMonitorCloseFileHandle(mon, tapfdName[i]) < 0)
> + VIR_WARN("failed to close device handle '%s'", tapfdName[i]);
> + }
> + while (j--) {
> + if (qemuMonitorCloseFileHandle(mon, vhostfdName[j]) < 0)
> + VIR_WARN("failed to close device handle '%s'", vhostfdName[j]);
> + }
> }
>
> return ret;
> diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
> index 8f9c182..a607712 100644
> --- a/src/qemu/qemu_monitor.h
> +++ b/src/qemu/qemu_monitor.h
> @@ -501,8 +501,8 @@ int qemuMonitorRemoveFd(qemuMonitorPtr mon, int fdset, int fd);
> */
> int qemuMonitorAddHostNetwork(qemuMonitorPtr mon,
> const char *netstr,
> - int tapfd, const char *tapfd_name,
> - int vhostfd, const char *vhostfd_name);
> + int *tapfd, char **tapfdName, int tapfdSize,
> + int *vhostfd, char **vhostfdName, int vhostfdSize);
>
> int qemuMonitorRemoveHostNetwork(qemuMonitorPtr mon,
> int vlan,
> @@ -510,8 +510,8 @@ int qemuMonitorRemoveHostNetwork(qemuMonitorPtr mon,
>
> int qemuMonitorAddNetdev(qemuMonitorPtr mon,
> const char *netdevstr,
> - int tapfd, const char *tapfd_name,
> - int vhostfd, const char *vhostfd_name);
> + int *tapfd, char **tapfdName, int tapfdSize,
> + int *vhostfd, char **vhostfdName, int vhostfdSize);
>
> int qemuMonitorRemoveNetdev(qemuMonitorPtr mon,
> const char *alias);
> diff --git a/src/uml/uml_conf.c b/src/uml/uml_conf.c
> index 52b705c..3fda7e4 100644
> --- a/src/uml/uml_conf.c
> +++ b/src/uml/uml_conf.c
> @@ -109,6 +109,7 @@ umlConnectTapDevice(virConnectPtr conn,
> const char *bridge)
> {
> bool template_ifname = false;
> + int tapfd;
>
> if (!net->ifname ||
> STRPREFIX(net->ifname, VIR_NET_GENERATED_PREFIX) ||
> @@ -121,7 +122,7 @@ umlConnectTapDevice(virConnectPtr conn,
> }
>
> if (virNetDevTapCreateInBridgePort(bridge, &net->ifname, &net->mac,
> - vm->uuid, NULL,
> + vm->uuid, &tapfd, 1,
> virDomainNetGetActualVirtPortProfile(net),
> virDomainNetGetActualVlan(net),
> VIR_NETDEV_TAP_CREATE_IFUP |
> @@ -139,9 +140,11 @@ umlConnectTapDevice(virConnectPtr conn,
> }
> }
>
> + VIR_FORCE_CLOSE(tapfd);
> return 0;
>
> error:
> + VIR_FORCE_CLOSE(tapfd);
> return -1;
> }
>
> diff --git a/src/util/virnetdevtap.c b/src/util/virnetdevtap.c
> index 75599db..aa41b9c 100644
> --- a/src/util/virnetdevtap.c
> +++ b/src/util/virnetdevtap.c
> @@ -140,7 +140,8 @@ virNetDevProbeVnetHdr(int tapfd)
> /**
> * virNetDevTapCreate:
> * @ifname: the interface name
> - * @tapfd: file descriptor return value for the new tap device
> + * @tapfds: array of file descriptors return value for the new tap device
> + * @tapfdSize: number of file descriptors in @tapfd
> * @flags: OR of virNetDevTapCreateFlags. Only one flag is recognized:
> *
> * VIR_NETDEV_TAP_CREATE_VNET_HDR
> @@ -148,76 +149,85 @@ virNetDevProbeVnetHdr(int tapfd)
> * VIR_NETDEV_TAP_CREATE_PERSIST
> * - The device will persist after the file descriptor is closed
> *
> - * Creates a tap interface.
> - * If the @tapfd parameter is supplied, the open tap device file descriptor
> - * will be returned, otherwise the TAP device will be closed. The caller must
> - * use virNetDevTapDelete to remove a persistent TAP device when it is no
> - * longer needed.
> + * Creates a tap interface. The caller must use virNetDevTapDelete to
> + * remove a persistent TAP device when it is no longer needed. In case
> + * @tapfdSize is greater than one, multiqueue extension is requested
> + * from kernel.
> *
> * Returns 0 in case of success or -1 on failure.
> */
> int virNetDevTapCreate(char **ifname,
> int *tapfd,
> + int tapfdSize,
> unsigned int flags)
> {
> - int fd;
> + int i;
> struct ifreq ifr;
> int ret = -1;
> -
> - if ((fd = open("/dev/net/tun", O_RDWR)) < 0) {
> - virReportSystemError(errno, "%s",
> - _("Unable to open /dev/net/tun, is tun module loaded?"));
> - return -1;
> - }
> + int fd;
>
> memset(&ifr, 0, sizeof(ifr));
> + for (i = 0; i < tapfdSize; i++) {
> + if ((fd = open("/dev/net/tun", O_RDWR)) < 0) {
> + virReportSystemError(errno, "%s",
> + _("Unable to open /dev/net/tun, is tun module loaded?"));
> + goto cleanup;
> + }
>
> - ifr.ifr_flags = IFF_TAP|IFF_NO_PI;
> + memset(&ifr, 0, sizeof(ifr));
> +
> + ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
> + /* If tapfdSize is greater than one, request multiqueue */
> + if (tapfdSize > 1)
> + ifr.ifr_flags |= IFF_MULTI_QUEUE;
>
> # ifdef IFF_VNET_HDR
> - if ((flags & VIR_NETDEV_TAP_CREATE_VNET_HDR) &&
> - virNetDevProbeVnetHdr(fd))
> - ifr.ifr_flags |= IFF_VNET_HDR;
> + if ((flags & VIR_NETDEV_TAP_CREATE_VNET_HDR) &&
> + virNetDevProbeVnetHdr(fd))
> + ifr.ifr_flags |= IFF_VNET_HDR;
> # endif
>
> - if (virStrcpyStatic(ifr.ifr_name, *ifname) == NULL) {
> - virReportSystemError(ERANGE,
> - _("Network interface name '%s' is too long"),
> - *ifname);
> - goto cleanup;
> + if (virStrcpyStatic(ifr.ifr_name, *ifname) == NULL) {
> + virReportSystemError(ERANGE,
> + _("Network interface name '%s' is too long"),
> + *ifname);
> + goto cleanup;
>
> - }
> + }
>
> - if (ioctl(fd, TUNSETIFF, &ifr) < 0) {
> - virReportSystemError(errno,
> - _("Unable to create tap device %s"),
> - NULLSTR(*ifname));
> - goto cleanup;
> - }
> + if (ioctl(fd, TUNSETIFF, &ifr) < 0) {
> + virReportSystemError(errno,
> + _("Unable to create tap device %s"),
> + NULLSTR(*ifname));
> + goto cleanup;
> + }
>
> - if ((flags & VIR_NETDEV_TAP_CREATE_PERSIST) &&
> - (errno = ioctl(fd, TUNSETPERSIST, 1))) {
> - virReportSystemError(errno,
> - _("Unable to set tap device %s to persistent"),
> - NULLSTR(*ifname));
> - goto cleanup;
> - }
> + if (i == 0) {
> + /* In case we are looping more than once, set other
> + * TAPs to have the same name */
> + VIR_FREE(*ifname);
> + if (ifr.ifr_name && VIR_STRDUP(*ifname, ifr.ifr_name) < 0)
> + goto cleanup;
> + }
>
> - VIR_FREE(*ifname);
> - if (!(*ifname = strdup(ifr.ifr_name))) {
> - virReportOOMError();
> - goto cleanup;
> + if ((flags & VIR_NETDEV_TAP_CREATE_PERSIST) &&
> + (errno = ioctl(fd, TUNSETPERSIST, 1))) {
> + virReportSystemError(errno,
> + _("Unable to set tap device %s to persistent"),
> + NULLSTR(*ifname));
> + goto cleanup;
> + }
> + tapfd[i] = fd;
> }
> - if (tapfd)
> - *tapfd = fd;
> - else
> - VIR_FORCE_CLOSE(fd);
>
> ret = 0;
>
> cleanup:
> - if (ret < 0)
> + if (ret < 0) {
> VIR_FORCE_CLOSE(fd);
> + while (i--)
> + VIR_FORCE_CLOSE(tapfd[i]);
> + }
>
> return ret;
> }
> @@ -266,6 +276,7 @@ cleanup:
> #else /* ! TUNSETIFF */
> int virNetDevTapCreate(char **ifname ATTRIBUTE_UNUSED,
> int *tapfd ATTRIBUTE_UNUSED,
> + int tapfdSize ATTRIBUTE_UNUSED,
> unsigned int flags ATTRIBUTE_UNUSED)
> {
> virReportSystemError(ENOSYS, "%s",
> @@ -286,7 +297,8 @@ int virNetDevTapDelete(const char *ifname ATTRIBUTE_UNUSED)
> * @brname: the bridge name
> * @ifname: the interface name (or name template)
> * @macaddr: desired MAC address
> - * @tapfd: file descriptor return value for the new tap device
> + * @tapfd: array of file descriptor return value for the new tap device
> + * @tapfdSize: number of file descriptors in @tapfd
> * @virtPortProfile: bridge/port specific configuration
> * @flags: OR of virNetDevTapCreateFlags:
>
> @@ -314,6 +326,7 @@ int virNetDevTapCreateInBridgePort(const char *brname,
> const virMacAddrPtr macaddr,
> const unsigned char *vmuuid,
> int *tapfd,
> + int tapfdSize,
> virNetDevVPortProfilePtr virtPortProfile,
> virNetDevVlanPtr virtVlan,
> unsigned int flags)
> @@ -321,7 +334,7 @@ int virNetDevTapCreateInBridgePort(const char *brname,
> virMacAddr tapmac;
> char macaddrstr[VIR_MAC_STRING_BUFLEN];
>
> - if (virNetDevTapCreate(ifname, tapfd, flags) < 0)
> + if (virNetDevTapCreate(ifname, tapfd, tapfdSize, flags) < 0)
> return -1;
>
> /* We need to set the interface MAC before adding it
> @@ -372,9 +385,9 @@ int virNetDevTapCreateInBridgePort(const char *brname,
>
> return 0;
>
> - error:
> - if (tapfd)
> - VIR_FORCE_CLOSE(*tapfd);
> +error:
> + while (tapfdSize)
> + VIR_FORCE_CLOSE(tapfd[--tapfdSize]);
>
> return -1;
> }
> diff --git a/src/util/virnetdevtap.h b/src/util/virnetdevtap.h
> index 6bfc80c..cb6c284 100644
> --- a/src/util/virnetdevtap.h
> +++ b/src/util/virnetdevtap.h
> @@ -29,6 +29,7 @@
>
> int virNetDevTapCreate(char **ifname,
> int *tapfd,
> + int tapfdSize,
> unsigned int flags)
> ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK;
>
> @@ -55,6 +56,7 @@ int virNetDevTapCreateInBridgePort(const char *brname,
> const virMacAddrPtr macaddr,
> const unsigned char *vmuuid,
> int *tapfd,
> + int tapfdSize,
> virNetDevVPortProfilePtr virtPortProfile,
> virNetDevVlanPtr virtVlan,
> unsigned int flags)
ACK. (I'm undecided if we should error out if multi-queue is requested
when running non-privileged, and I don't care enough about the extra
braces to require you to add them (and it's not in the official coding
style))
More information about the libvir-list
mailing list