[libvirt] [PATCH 6/7] Forward Mode Hostdev network driver Implementation

Shradha Shah sshah at solarflare.com
Thu Aug 16 15:42:31 UTC 2012


This patch updates the network driver to properly utilize the new
attributes/elements that are now in virNetworkDef

Signed-off-by: Shradha Shah <sshah at solarflare.com>
---
 docs/formatnetwork.html.in  |   62 ++++++++++
 src/network/bridge_driver.c |  282 ++++++++++++++++++++++++++++++++++---------
 2 files changed, 284 insertions(+), 60 deletions(-)

diff --git a/docs/formatnetwork.html.in b/docs/formatnetwork.html.in
index ed9f7a9..5ab5f3c 100644
--- a/docs/formatnetwork.html.in
+++ b/docs/formatnetwork.html.in
@@ -223,6 +223,37 @@
             (usually either a domain start, or a hotplug interface
             attach to a domain).<span class="since">Since 0.9.4</span>
           </dd>
+          <dt><code>hostdev</code></dt>
+          <dd>
+            This network facilitates PCI Passthrough of a network device.
+            A network device is chosen from the interface pool and 
+            directly assigned to the guest using generic device
+            passthrough, after first optionally setting the device's MAC
+            address to the configured value, and optionally associating 
+            the device with an 802.1Qbh capable switch using an optionally 
+            specified <code><virtualport></code> element. 
+            Note that - due to limitations in standard single-port PCI 
+            ethernet card driver design - only SR-IOV (Single Root I/O 
+            Virtualization) virtual function (VF) devices can be assigned 
+            in this manner; to assign a standard single-port PCI or PCIe 
+            ethernet card to a guest, use the traditional <code><
+            hostdev></code> device definition. <span class="since">
+            Since 0.10.0</span>
+
+            <p>Note that this "intelligent passthrough" of network devices is
+            very similar to the functionality of a standard <code><
+            hostdev></code> device, the difference being that this 
+            method allows specifying a MAC address and <code><virtualport
+            ></code> for the passed-through device. If these capabilities 
+            are not required, if you have a standard single-port PCI, PCIe, 
+            or USB network card that doesn't support SR-IOV (and hence would 
+            anyway lose the configured MAC address during reset after being 
+            assigned to the guest domain), or if you are using a version of 
+            libvirt older than 0.10.0, you should use a standard 
+            <code><hostdev></code> definition to assign the device to 
+            the guest instead of <code><forward mode='hostdev'/></code>.
+            </p>
+          </dd>
         </dl>
         As mentioned above, a <code><forward></code> element can
         have multiple <code><interface></code> subelements, each
@@ -272,6 +303,37 @@
         particular, 'passthrough' mode, and 'private' mode when using
         802.1Qbh), libvirt will choose an unused physical interface
         or, if it can't find an unused interface, fail the operation.</p>
+
+        <span class="since">since 0.9.12</span> and when using forward mode 
+        'hostdev' we specify the interface pool by using the 
+        <code><address></code> element and <code><
+        type></code> <code><domain></code> <code><bus></code>
+        <code><slot></code> and <code><function></code>
+        sub-elements.
+
+        <pre>
+...
+  <forward mode='hostdev' managed='yes'>
+    <address type='pci' domain='0' bus='4' slot='0' function='1'/>
+    <address type='pci' domain='0' bus='4' slot='0' function='2'/>
+    <address type='pci' domain='0' bus='4' slot='0' function='3'/>
+  </forward> 
+...
+        </pre>
+        
+        Alternatively the interface pool can also be defined using a
+        single physical function  <code><pf></code> subelement to 
+        call out the  corresponding physical interface associated with 
+        multiple virtual interfaces (similar to passthrough mode):
+
+        <pre>
+...
+  <forward mode='hostdev' managed='yes'>
+    <pf dev='eth0'/>
+  </forward>
+...
+        </pre>
+
       </dd>
     </dl>
     <h5><a name="elementQoS">Quality of service</a></h5>
diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c
index 38f6d12..065af3e 100644
--- a/src/network/bridge_driver.c
+++ b/src/network/bridge_driver.c
@@ -47,6 +47,7 @@
 #include "datatypes.h"
 #include "bridge_driver.h"
 #include "network_conf.h"
+#include "device_conf.h"
 #include "driver.h"
 #include "buf.h"
 #include "virpidfile.h"
@@ -1935,7 +1936,7 @@ networkStartNetworkExternal(struct network_driver *driver ATTRIBUTE_UNUSED,
                             virNetworkObjPtr network ATTRIBUTE_UNUSED)
 {
     /* put anything here that needs to be done each time a network of
-     * type BRIDGE, PRIVATE, VEPA, or PASSTHROUGH is started. On
+     * type BRIDGE, PRIVATE, VEPA, HOSTDEV or PASSTHROUGH is started. On
      * failure, undo anything you've done, and return -1. On success
      * return 0.
      */
@@ -1946,7 +1947,7 @@ static int networkShutdownNetworkExternal(struct network_driver *driver ATTRIBUT
                                         virNetworkObjPtr network ATTRIBUTE_UNUSED)
 {
     /* put anything here that needs to be done each time a network of
-     * type BRIDGE, PRIVATE, VEPA, or PASSTHROUGH is shutdown. On
+     * type BRIDGE, PRIVATE, VEPA, HOSTDEV or PASSTHROUGH is shutdown. On
      * failure, undo anything you've done, and return -1. On success
      * return 0.
      */
@@ -1977,6 +1978,7 @@ networkStartNetwork(struct network_driver *driver,
     case VIR_NETWORK_FORWARD_PRIVATE:
     case VIR_NETWORK_FORWARD_VEPA:
     case VIR_NETWORK_FORWARD_PASSTHROUGH:
+    case VIR_NETWORK_FORWARD_HOSTDEV:
         ret = networkStartNetworkExternal(driver, network);
         break;
     }
@@ -2036,6 +2038,7 @@ static int networkShutdownNetwork(struct network_driver *driver,
     case VIR_NETWORK_FORWARD_PRIVATE:
     case VIR_NETWORK_FORWARD_VEPA:
     case VIR_NETWORK_FORWARD_PASSTHROUGH:
+    case VIR_NETWORK_FORWARD_HOSTDEV:
         ret = networkShutdownNetworkExternal(driver, network);
         break;
     }
@@ -2825,6 +2828,13 @@ networkCreateInterfacePool(virNetworkDefPtr netdef) {
                 goto finish;
             }
         }
+        else if (netdef->forwardType == VIR_NETWORK_FORWARD_HOSTDEV) {
+            netdef->forwardIfs[ii].type = VIR_NETWORK_FORWARD_HOSTDEV_DEVICE_PCI; /*Assuming PCI as VF's are PCI devices */
+            netdef->forwardIfs[ii].device.pci.domain = virt_fns[ii]->domain;
+            netdef->forwardIfs[ii].device.pci.bus = virt_fns[ii]->bus;
+            netdef->forwardIfs[ii].device.pci.slot = virt_fns[ii]->slot;
+            netdef->forwardIfs[ii].device.pci.function = virt_fns[ii]->function;
+        }
     }
     
     ret = 0;
@@ -2958,6 +2968,65 @@ networkAllocateActualDevice(virDomainNetDefPtr iface)
             }
         }
 
+    } else if (netdef->forwardType == VIR_NETWORK_FORWARD_HOSTDEV) {
+        if (!iface->data.network.actual
+            && (VIR_ALLOC(iface->data.network.actual) < 0)) {
+            virReportOOMError();
+            goto cleanup;
+        }
+        
+        iface->data.network.actual->type = actualType = VIR_DOMAIN_NET_TYPE_HOSTDEV;
+        if ((netdef->nForwardPfs > 0) && (netdef->nForwardIfs <= 0)) {
+            if(networkCreateInterfacePool(netdef) < 0) {
+                goto cleanup;
+            }
+        }
+        /* pick first dev with 0 connections */
+        
+        for (ii = 0; ii < netdef->nForwardIfs; ii++) {
+            if (netdef->forwardIfs[ii].connections == 0) {
+                dev = &netdef->forwardIfs[ii];
+                break;
+            }
+        }
+        if (!dev) {
+            virReportError(VIR_ERR_INTERNAL_ERROR,
+                           _("network '%s' requires exclusive access to interfaces, but none are available"),
+                           netdef->name);
+            goto cleanup;
+        }
+        iface->data.network.actual->data.hostdev.def.parent.type = VIR_DOMAIN_DEVICE_NET;
+        iface->data.network.actual->data.hostdev.def.parent.data.net = iface;
+        iface->data.network.actual->data.hostdev.def.info = &iface->info;
+        iface->data.network.actual->data.hostdev.def.mode = VIR_DOMAIN_HOSTDEV_MODE_SUBSYS;
+        iface->data.network.actual->data.hostdev.def.managed = netdef->managed;
+        iface->data.network.actual->data.hostdev.def.source.subsys.type = dev->type;
+        iface->data.network.actual->data.hostdev.def.source.subsys.u.pci = dev->device.pci;
+
+        /* merge virtualports from interface, network, and portgroup to
+         * arrive at actual virtualport to use
+         */
+        if (virNetDevVPortProfileMerge3(&iface->data.network.actual->virtPortProfile,
+                                        iface->virtPortProfile,
+                                        netdef->virtPortProfile,
+                                        portgroup
+                                        ? portgroup->virtPortProfile : NULL) < 0) {
+            goto error;
+        }
+        virtport = iface->data.network.actual->virtPortProfile;
+        if (virtport) {
+            /* make sure type is supported for macvtap connections */
+            if (virtport->virtPortType != VIR_NETDEV_VPORT_PROFILE_8021QBG &&
+                virtport->virtPortType != VIR_NETDEV_VPORT_PROFILE_8021QBH) {
+                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                               _("<virtualport type='%s'> not supported for network "
+                                 "'%s' which uses a macvtap device"),
+                               virNetDevVPortTypeToString(virtport->virtPortType),
+                               netdef->name);
+                goto error;
+            }
+        }
+        
     } else if ((netdef->forwardType == VIR_NETWORK_FORWARD_BRIDGE) ||
                (netdef->forwardType == VIR_NETWORK_FORWARD_PRIVATE) ||
                (netdef->forwardType == VIR_NETWORK_FORWARD_VEPA) ||
@@ -3124,8 +3193,14 @@ validate:
     if (dev) {
         /* we are now assured of success, so mark the allocation */
         dev->connections++;
-        VIR_DEBUG("Using physical device %s, %d connections",
-                  dev->device.dev, dev->connections);
+        if (actualType != VIR_DOMAIN_NET_TYPE_HOSTDEV)
+            VIR_DEBUG("Using physical device %s, %d connections",
+                      dev->device.dev, dev->connections);
+        else
+            VIR_DEBUG("Using physical device with domain=%d bus=%d slot=%d function=%d, connections %d",
+                      dev->device.pci.domain, dev->device.pci.bus,
+                      dev->device.pci.slot, dev->device.pci.function,
+                      dev->connections);
     }
 
     if (netdef) {
@@ -3164,9 +3239,12 @@ networkNotifyActualDevice(virDomainNetDefPtr iface)
     struct network_driver *driver = driverState;
     virNetworkObjPtr network;
     virNetworkDefPtr netdef;
-    const char *actualDev;
+    const char *actualDev = NULL;
+    virDomainHostdevDefPtr def = NULL;
     int ret = -1;
 
+    enum virDomainNetType actualType = virDomainNetGetActualType(iface);
+
     if (iface->type != VIR_DOMAIN_NET_TYPE_NETWORK)
        return 0;
 
@@ -3181,52 +3259,86 @@ networkNotifyActualDevice(virDomainNetDefPtr iface)
     }
     netdef = network->def;
 
-    if (!iface->data.network.actual ||
-        (virDomainNetGetActualType(iface) != VIR_DOMAIN_NET_TYPE_DIRECT)) {
+    if ((!iface->data.network.actual) ||
+        ((actualType != VIR_DOMAIN_NET_TYPE_DIRECT) &&
+         (actualType != VIR_DOMAIN_NET_TYPE_HOSTDEV))) {
         VIR_DEBUG("Nothing to claim from network %s", iface->data.network.name);
         goto success;
     }
 
-    actualDev = virDomainNetGetActualDirectDev(iface);
-    if (!actualDev) {
-        virReportError(VIR_ERR_INTERNAL_ERROR,
-                       "%s", _("the interface uses a direct "
-                               "mode, but has no source dev"));
-        goto error;
+    if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT) {
+        actualDev = virDomainNetGetActualDirectDev(iface);
+        if (!actualDev) {
+            virReportError(VIR_ERR_INTERNAL_ERROR,
+                           "%s", _("the interface uses a direct mode, but has no source dev"));
+            goto error;
+        }
+    }
+    else if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
+        def = virDomainNetGetActualHostdev(iface);
+        if (!def) {
+            virReportError(VIR_ERR_INTERNAL_ERROR,
+                           "%s", _("the interface uses a hostdev mode, but has no hostdev"));
+            goto error;
+        }
     }
 
-    if (netdef->nForwardIfs == 0) {
+    if ((netdef->nForwardIfs == 0) && (netdef->nForwardPfs == 0)) {
         virReportError(VIR_ERR_INTERNAL_ERROR,
-                       _("network '%s' uses a direct mode, but "
-                         "has no forward dev and no interface pool"),
+                       _("network '%s' uses a direct/hostdev mode, but has no forward dev and no interface pool"),
                        netdef->name);
         goto error;
     } else {
         int ii;
         virNetworkForwardIfDefPtr dev = NULL;
 
-        /* find the matching interface and increment its connections */
-
-        for (ii = 0; ii < netdef->nForwardIfs; ii++) {
-            if (STREQ(actualDev, netdef->forwardIfs[ii].device.dev)) {
-                dev = &netdef->forwardIfs[ii];
-                break;
+        /* find the matching interface in the pool and increment its connections */
+        if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT) {
+            for (ii = 0; ii < netdef->nForwardIfs; ii++) {
+                if (STREQ(actualDev, netdef->forwardIfs[ii].device.dev)) {
+                    dev = &netdef->forwardIfs[ii];
+                    break;
+                }
             }
+            if (!dev) {
+                virReportError(VIR_ERR_INTERNAL_ERROR,
+                               _("network '%s' doesn't have dev='%s' in use by domain"),
+                               netdef->name, actualDev);
+                goto error;
+            } 
         }
-        /* dev points at the physical device we want to use */
-        if (!dev) {
-            virReportError(VIR_ERR_INTERNAL_ERROR,
-                           _("network '%s' doesn't have dev='%s' in use by domain"),
-                           netdef->name, actualDev);
-            goto error;
+        else if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
+            if ((netdef->nForwardPfs > 0) && (netdef->nForwardIfs <= 0)) {
+                if(networkCreateInterfacePool(netdef) < 0) {
+                    goto error;
+                }
+            }
+            for (ii = 0; ii < netdef->nForwardIfs; ii++) {
+                if (virDevicePCIAddressEqual(def->source.subsys.u.pci,
+                                             netdef->forwardIfs[ii].device.pci) == 0) {
+                    dev = &netdef->forwardIfs[ii];
+                    break;
+                }
+            }
+            if (!dev) {
+                virReportError(VIR_ERR_INTERNAL_ERROR,
+                               _("network '%s' doesn't have dev='%04x:%02x:%02x.%x' in use by domain"),
+                               netdef->name, 
+                               def->source.subsys.u.pci.domain,
+                               def->source.subsys.u.pci.bus,
+                               def->source.subsys.u.pci.slot,
+                               def->source.subsys.u.pci.function);
+                goto error;
+            }
         }
 
-        /* PASSTHROUGH mode, and PRIVATE Mode + 802.1Qbh both require
-         * exclusive access to a device, so current connections count
-         * must be 0 in those cases.
-         */
+       /* PASSTHROUGH mode, and PRIVATE Mode + 802.1Qbh both require
+        * exclusive access to a device, so current connections count
+        * must be 0 in those cases.
+        */
         if ((dev->connections > 0) &&
             ((netdef->forwardType == VIR_NETWORK_FORWARD_PASSTHROUGH) ||
+             (netdef->forwardType == VIR_NETWORK_FORWARD_HOSTDEV) || 
              ((netdef->forwardType == VIR_NETWORK_FORWARD_PRIVATE) &&
               iface->data.network.actual->virtPortProfile &&
               (iface->data.network.actual->virtPortProfile->virtPortType
@@ -3238,8 +3350,16 @@ networkNotifyActualDevice(virDomainNetDefPtr iface)
         }
         /* we are now assured of success, so mark the allocation */
         dev->connections++;
-        VIR_DEBUG("Using physical device %s, %d connections",
-                  dev->device.dev, dev->connections);
+        if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT) {
+            VIR_DEBUG("Using physical device %s, connections %d",
+                      dev->device.dev, dev->connections);
+        }
+        else if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
+            VIR_DEBUG("Using physical device with domain=%d bus=%d slot=%d function=%d, connections %d",
+                      dev->device.pci.domain, dev->device.pci.bus,
+                      dev->device.pci.slot, dev->device.pci.function,
+                      dev->connections);
+        }
     }
 
 success:
@@ -3273,9 +3393,12 @@ networkReleaseActualDevice(virDomainNetDefPtr iface)
     struct network_driver *driver = driverState;
     virNetworkObjPtr network;
     virNetworkDefPtr netdef;
-    const char *actualDev;
+    const char *actualDev = NULL;
+    virDomainHostdevDefPtr def = NULL;
     int ret = -1;
 
+    enum virDomainNetType actualType = virDomainNetGetActualType(iface);
+
     if (iface->type != VIR_DOMAIN_NET_TYPE_NETWORK)
        return 0;
 
@@ -3290,47 +3413,86 @@ networkReleaseActualDevice(virDomainNetDefPtr iface)
     }
     netdef = network->def;
 
-    if (!iface->data.network.actual ||
-        (virDomainNetGetActualType(iface) != VIR_DOMAIN_NET_TYPE_DIRECT)) {
+    if ((!iface->data.network.actual) ||
+        ((actualType != VIR_DOMAIN_NET_TYPE_DIRECT) &&
+         (actualType != VIR_DOMAIN_NET_TYPE_HOSTDEV))) {
         VIR_DEBUG("Nothing to release to network %s", iface->data.network.name);
         goto success;
     }
 
-    actualDev = virDomainNetGetActualDirectDev(iface);
-    if (!actualDev) {
-        virReportError(VIR_ERR_INTERNAL_ERROR,
-                       "%s", _("the interface uses a direct "
-                               "mode, but has no source dev"));
-        goto error;
+    if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT) {
+        actualDev = virDomainNetGetActualDirectDev(iface);
+        if (!actualDev) {
+            virReportError(VIR_ERR_INTERNAL_ERROR,
+                           "%s", _("the interface uses a direct mode, but has no source dev"));
+            goto error;
+        }
+    }
+    else if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
+        def = virDomainNetGetActualHostdev(iface);
+        if (!def) {
+            virReportError(VIR_ERR_INTERNAL_ERROR,
+                           "%s", _("the interface uses a hostdev mode, but has no hostdev"));
+            goto error;
+        }
     }
 
-    if (netdef->nForwardIfs == 0) {
+    if ((netdef->nForwardIfs == 0) && (netdef->nForwardPfs == 0)) {
         virReportError(VIR_ERR_INTERNAL_ERROR,
-                       _("network '%s' uses a direct mode, but "
+                       _("network '%s' uses a direct/hostdev mode, but "
                          "has no forward dev and no interface pool"),
                        netdef->name);
         goto error;
     } else {
         int ii;
         virNetworkForwardIfDefPtr dev = NULL;
-
-        for (ii = 0; ii < netdef->nForwardIfs; ii++) {
-            if (STREQ(actualDev, netdef->forwardIfs[ii].device.dev)) {
-                dev = &netdef->forwardIfs[ii];
-                break;
+        
+        if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT) {
+            for (ii = 0; ii < netdef->nForwardIfs; ii++) {
+                if (STREQ(actualDev, netdef->forwardIfs[ii].device.dev)) {
+                    dev = &netdef->forwardIfs[ii];
+                    break;
+                }
+            }
+            if (!dev) {
+                virReportError(VIR_ERR_INTERNAL_ERROR,
+                               _("network '%s' doesn't have dev='%s' in use by domain"),
+                               netdef->name, actualDev);
+                goto error;
+            } 
+        }        
+        else if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
+            for (ii = 0; ii < netdef->nForwardIfs; ii++) {
+                if (virDevicePCIAddressEqual(def->source.subsys.u.pci,
+                                             netdef->forwardIfs[ii].device.pci) == 0) {
+                    dev = &netdef->forwardIfs[ii];
+                    break;
+                }
+            }
+            if (!dev) {
+                virReportError(VIR_ERR_INTERNAL_ERROR,
+                               _("network '%s' doesn't have dev='%04x:%02x:%02x.%x' in use by domain"),
+                               netdef->name, 
+                               def->source.subsys.u.pci.domain,
+                               def->source.subsys.u.pci.bus,
+                               def->source.subsys.u.pci.slot,
+                               def->source.subsys.u.pci.function);
+                goto error;
             }
         }
-        /* dev points at the physical device we've been using */
-        if (!dev) {
-            virReportError(VIR_ERR_INTERNAL_ERROR,
-                           _("network '%s' doesn't have dev='%s' in use by domain"),
-                           netdef->name, actualDev);
-            goto error;
-        }
-
         dev->connections--;
-        VIR_DEBUG("Releasing physical device %s, %d connections",
-                  dev->device.dev, dev->connections);
+        if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT) {
+            VIR_DEBUG("Releasing physical device %s, connections %d",
+                      dev->device.dev, dev->connections);
+        }
+        else if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
+            VIR_DEBUG("Releasing physical device with domain=%d bus=%d slot=%d function=%d, connections %d",
+                      dev->device.pci.domain,
+                      dev->device.pci.bus,
+                      dev->device.pci.slot,
+                      dev->device.pci.function,
+                      dev->connections);
+        } 
     }
 
 success:
-- 
1.7.4.4





More information about the libvir-list mailing list