[libvirt] [PATCHv2] Enable tuning of qemu network tap device "sndbuf" size

Laine Stump laine at laine.org
Fri Jan 14 17:35:30 UTC 2011


This is in response to a request in:

   https://bugzilla.redhat.com/show_bug.cgi?id=665293

In short, under heavy load, it's possible for qemu's networking to
lock up due to the tap device's default 1MB sndbuf being
inadequate. adding "sndbuf=0" to the qemu commandline -netdevice
option will alleviate this problem (sndbuf=0 actually sets it to
0xffffffff).

Because we must be able to explicitly specify "0" as a value, the
standard practice of "0 means not specified" won't work here. Instead,
virDomainNetDef also has a sndbuf_specified, which defaults to 0, but
is set to 1 if some value was given.

The sndbuf value is put inside a <tune> element of each <interface> in
the domain. The intent is that further tunable settings will also be
placed inside this element.

     <interface type='network'>
       ...
       <tune>
         <sndbuf>0</sndbuf>
       ...
       </tune>
     </interface>
---

Changes from V1:

sndbuf_specified is now a bool rather than an int bitfield.

sndbuf is now unsigned long. Made possible by eblake's patch adding new
virXPath* and virStrToLong_* functions.

 docs/schemas/domain.rng |   10 ++++++++++
 src/conf/domain_conf.c  |   29 +++++++++++++++++++++++++++--
 src/conf/domain_conf.h  |    4 ++++
 src/qemu/qemu_command.c |   19 +++++++++++++++++--
 4 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng
index 5e140fb..a79ca6a 100644
--- a/docs/schemas/domain.rng
+++ b/docs/schemas/domain.rng
@@ -1025,6 +1025,16 @@
           <ref name="filterref-node-attributes"/>
         </element>
       </optional>
+      <optional>
+        <element name="tune">
+          <optional>
+            <!-- size of send buffer for network tap devices -->
+            <element name="sndbuf">
+              <ref name="unsignedInt"/>
+            </element>
+          </optional>
+        </element>
+      </optional>
     </interleave>
   </define>
   <define name="virtualPortProfile">
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 1cef112..396574e 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -2282,6 +2282,7 @@ err_exit:
 static virDomainNetDefPtr
 virDomainNetDefParseXML(virCapsPtr caps,
                         xmlNodePtr node,
+                        xmlXPathContextPtr ctxt,
                         int flags ATTRIBUTE_UNUSED) {
     virDomainNetDefPtr def;
     xmlNodePtr cur;
@@ -2303,12 +2304,15 @@ virDomainNetDefParseXML(virCapsPtr caps,
     virNWFilterHashTablePtr filterparams = NULL;
     virVirtualPortProfileParams virtPort;
     bool virtPortParsed = false;
+    xmlNodePtr oldnode = ctxt->node;
 
     if (VIR_ALLOC(def) < 0) {
         virReportOOMError();
         return NULL;
     }
 
+    ctxt->node = node;
+
     type = virXMLPropString(node, "type");
     if (type != NULL) {
         if ((int)(def->type = virDomainNetTypeFromString(type)) < 0) {
@@ -2596,7 +2600,12 @@ virDomainNetDefParseXML(virCapsPtr caps,
         }
     }
 
+    if (virXPathULong("string(./tune/sndbuf)", ctxt, &def->tune.sndbuf) >= 0) {
+        def->tune.sndbuf_specified = true;
+    }
+
 cleanup:
+    ctxt->node = oldnode;
     VIR_FREE(macaddr);
     VIR_FREE(network);
     VIR_FREE(address);
@@ -4306,6 +4315,7 @@ virDomainDeviceDefPtr virDomainDeviceDefParse(virCapsPtr caps,
 {
     xmlDocPtr xml;
     xmlNodePtr node;
+    xmlXPathContextPtr ctxt = NULL;
     virDomainDeviceDefPtr dev = NULL;
 
     if (!(xml = xmlReadDoc(BAD_CAST xmlStr, "device.xml", NULL,
@@ -4322,6 +4332,13 @@ virDomainDeviceDefPtr virDomainDeviceDefParse(virCapsPtr caps,
         goto error;
     }
 
+    ctxt = xmlXPathNewContext(xml);
+    if (ctxt == NULL) {
+        virReportOOMError();
+        goto error;
+    }
+    ctxt->node = node;
+
     if (VIR_ALLOC(dev) < 0) {
         virReportOOMError();
         goto error;
@@ -4337,7 +4354,7 @@ virDomainDeviceDefPtr virDomainDeviceDefParse(virCapsPtr caps,
             goto error;
     } else if (xmlStrEqual(node->name, BAD_CAST "interface")) {
         dev->type = VIR_DOMAIN_DEVICE_NET;
-        if (!(dev->data.net = virDomainNetDefParseXML(caps, node, flags)))
+        if (!(dev->data.net = virDomainNetDefParseXML(caps, node, ctxt, flags)))
             goto error;
     } else if (xmlStrEqual(node->name, BAD_CAST "input")) {
         dev->type = VIR_DOMAIN_DEVICE_INPUT;
@@ -4375,11 +4392,12 @@ virDomainDeviceDefPtr virDomainDeviceDefParse(virCapsPtr caps,
     }
 
     xmlFreeDoc(xml);
-
+    xmlXPathFreeContext(ctxt);
     return dev;
 
   error:
     xmlFreeDoc(xml);
+    xmlXPathFreeContext(ctxt);
     VIR_FREE(dev);
     return NULL;
 }
@@ -5050,6 +5068,7 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps,
     for (i = 0 ; i < n ; i++) {
         virDomainNetDefPtr net = virDomainNetDefParseXML(caps,
                                                          nodes[i],
+                                                         ctxt,
                                                          flags);
         if (!net)
             goto error;
@@ -6318,6 +6337,12 @@ virDomainNetDefFormat(virBufferPtr buf,
         VIR_FREE(attrs);
     }
 
+    if (def->tune.sndbuf_specified) {
+        virBufferAddLit(buf,   "      <tune>\n");
+        virBufferVSprintf(buf, "        <sndbuf>%lu</sndbuf>\n", def->tune.sndbuf);
+        virBufferAddLit(buf,   "      </tune>\n");
+    }
+
     if (virDomainDeviceInfoFormat(buf, &def->info, flags) < 0)
         return -1;
 
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index a73fd14..1269ed6 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -346,6 +346,10 @@ struct _virDomainNetDef {
             virVirtualPortProfileParams virtPortProfile;
         } direct;
     } data;
+    struct {
+        bool sndbuf_specified;
+        unsigned long sndbuf;
+    } tune;
     char *ifname;
     virDomainDeviceInfo info;
     char *filter;
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index a3b5ff3..c37fdb2 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -1584,6 +1584,7 @@ qemuBuildHostNetStr(virDomainNetDefPtr net,
                     const char *tapfd,
                     const char *vhostfd)
 {
+    bool is_tap = false;
     virBuffer buf = VIR_BUFFER_INITIALIZER;
 
     switch (net->type) {
@@ -1593,6 +1594,7 @@ qemuBuildHostNetStr(virDomainNetDefPtr net,
         virBufferAddLit(&buf, "tap");
         virBufferVSprintf(&buf, "%cfd=%s", type_sep, tapfd);
         type_sep = ',';
+        is_tap = true;
         break;
 
     case VIR_DOMAIN_NET_TYPE_ETHERNET:
@@ -1606,6 +1608,7 @@ qemuBuildHostNetStr(virDomainNetDefPtr net,
                               net->data.ethernet.script);
             type_sep = ',';
         }
+        is_tap = true;
         break;
 
     case VIR_DOMAIN_NET_TYPE_CLIENT:
@@ -1659,8 +1662,11 @@ qemuBuildHostNetStr(virDomainNetDefPtr net,
                           type_sep, net->info.alias);
     }
 
-    if (vhostfd && *vhostfd) {
-        virBufferVSprintf(&buf, ",vhost=on,vhostfd=%s", vhostfd);
+    if (is_tap) {
+        if (vhostfd && *vhostfd)
+            virBufferVSprintf(&buf, ",vhost=on,vhostfd=%s", vhostfd);
+        if (net->tune.sndbuf_specified)
+            virBufferVSprintf(&buf, ",sndbuf=%lu", net->tune.sndbuf);
     }
 
     if (virBufferError(&buf)) {
@@ -4661,6 +4667,15 @@ qemuParseCommandLineNet(virCapsPtr caps,
             } else if (STREQ(keywords[i], "off")) {
                 def->backend = VIR_DOMAIN_NET_BACKEND_TYPE_QEMU;
             }
+        } else if (STREQ(keywords[i], "sndbuf") && values[i]) {
+            if (virStrToLong_ul(values[i], NULL, 10, &def->tune.sndbuf) < 0) {
+                qemuReportError(VIR_ERR_INTERNAL_ERROR,
+                                _("cannot parse sndbuf size in '%s'"), val);
+                virDomainNetDefFree(def);
+                def = NULL;
+                goto cleanup;
+            }
+            def->tune.sndbuf_specified = true;
         }
     }
 
-- 
1.7.3.4




More information about the libvir-list mailing list