[libvirt] [PATCHv6 3/7] Support block I/O throttle in XML

Eric Blake eblake at redhat.com
Wed Nov 23 21:44:45 UTC 2011


From: Lei Li <lilei at linux.vnet.ibm.com>

Enable block I/O throttle for per-disk in XML, as the first
per-disk IO tuning parameter.

Signed-off-by: Lei Li <lilei at linux.vnet.ibm.com>
Signed-off-by: Zhi Yong Wu <wuzhy at linux.vnet.ibm.com>
Signed-off-by: Eric Blake <eblake at redhat.com>
---
 docs/formatdomain.html.in     |   39 +++++++++++++
 docs/schemas/domaincommon.rng |  122 +++++++++++++++++++++++++++++------------
 src/conf/domain_conf.c        |   90 ++++++++++++++++++++++++++++++-
 src/conf/domain_conf.h        |   14 +++++
 4 files changed, 228 insertions(+), 37 deletions(-)

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 61123ac..f45d0ce 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -922,6 +922,11 @@
       <driver name="tap" type="aio" cache="default"/>
       <source file='/var/lib/xen/images/fv0'/ startupPolicy='optional'>
       <target dev='hda' bus='ide'/>
+      <iotune>
+        <total_bytes_sec>10000000</total_bytes_sec>
+        <read_iops_sec>400000</read_iops_sec>
+        <write_iops_sec>100000</write_iops_sec>
+      </iotune>
       <boot order='2'/>
       <encryption type='...'>
         ...
@@ -1039,6 +1044,40 @@
         <span class="since">Since 0.0.3; <code>bus</code> attribute since 0.4.3;
         "usb" attribute value since after 0.4.4; "sata" attribute value since
         0.9.7</span></dd>
+      <dt><code>iotune</code></dt>
+      <dd>The optional <code>iotune</code> element provides the
+        ability to provide additional per-device I/O tuning, with
+        values that can vary for each device (contrast this to
+        the <a href="#elementsBlockTuning"><code><blkiotune></code></a>
+        element, which applies globally to the domain).  Currently,
+        the only tuning available is Block I/O throttling for qemu.
+        This element has optional sub-elements; any sub-element not
+        specified or given with a value of 0 implies no
+        limit.  <span class="since">Since 0.9.8</span>
+        <dl>
+          <dt><code>total_bytes_sec</code></dt>
+          <dd>The optional <code>total_bytes_sec</code> element is the
+            total throughput limit in bytes per second.  This cannot
+            appear with <code>read_bytes_sec</code>
+            or <code>write_bytes_sec</code>.</dd>
+          <dt><code>read_bytes_sec</code></dt>
+          <dd>The optional <code>read_bytes_sec</code> element is the
+            read throughput limit in bytes per second.</dd>
+          <dt><code>write_bytes_sec</code></dt>
+          <dd>The optional <code>write_bytes_sec</code> element is the
+            write throughput limit in bytes per second.</dd>
+          <dt><code>total_iops_sec</code></dt>
+          <dd>The optional <code>total_iops_sec</code> element is the
+            total I/O operations per second.  This cannot
+            appear with <code>read_iops_sec</code>
+            or <code>write_iops_sec</code>.</dd>
+          <dt><code>read_iops_sec</code></dt>
+          <dd>The optional <code>read_iops_sec</code> element is the
+            read I/O operations per second.</dd>
+          <dt><code>write_iops_sec</code></dt>
+          <dd>The optional <code>write_iops_sec</code> element is the
+            write I/O operations per second.</dd>
+        </dl>
       <dt><code>driver</code></dt>
       <dd>
         The optional driver element allows specifying further details
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index 8968ee6..bb6d94d 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -599,42 +599,47 @@
     </element>
   </define>
   <define name="diskspec">
-    <optional>
-      <ref name="driver"/>
-    </optional>
-    <optional>
-      <ref name="diskAuth"/>
-    </optional>
-    <ref name="target"/>
-    <optional>
-      <ref name="deviceBoot"/>
-    </optional>
-    <optional>
-      <element name="readonly">
-        <empty/>
-      </element>
-    </optional>
-    <optional>
-      <element name="shareable">
-        <empty/>
-      </element>
-    </optional>
-    <optional>
-      <element name="transient">
-        <empty/>
-      </element>
-    </optional>
-    <optional>
-      <element name="serial">
-        <ref name="diskSerial"/>
-      </element>
-    </optional>
-    <optional>
-      <ref name="encryption"/>
-    </optional>
-    <optional>
-      <ref name="address"/>
-    </optional>
+    <interleave>
+      <optional>
+        <ref name="driver"/>
+      </optional>
+      <optional>
+        <ref name="diskAuth"/>
+      </optional>
+      <ref name="target"/>
+      <optional>
+        <ref name="deviceBoot"/>
+      </optional>
+      <optional>
+        <element name="readonly">
+          <empty/>
+        </element>
+      </optional>
+      <optional>
+        <element name="shareable">
+          <empty/>
+        </element>
+      </optional>
+      <optional>
+        <element name="transient">
+          <empty/>
+        </element>
+      </optional>
+      <optional>
+        <element name="serial">
+          <ref name="diskSerial"/>
+        </element>
+      </optional>
+      <optional>
+        <ref name="encryption"/>
+      </optional>
+      <optional>
+        <ref name="diskIoTune"/>
+      </optional>
+      <optional>
+        <ref name="address"/>
+      </optional>
+    </interleave>
   </define>
   <define name="snapshot">
     <attribute name="snapshot">
@@ -2596,6 +2601,51 @@
     </element>
   </define>

+  <define name='diskIoTune'>
+    <element name="iotune">
+      <interleave>
+        <choice>
+          <element name="total_bytes_sec">
+            <data type="unsignedLong"/>
+          </element>
+          <group>
+            <interleave>
+              <optional>
+                <element name="read_bytes_sec">
+                  <data type="unsignedLong"/>
+                </element>
+              </optional>
+              <optional>
+                <element name="write_bytes_sec">
+                  <data type="unsignedLong"/>
+                </element>
+              </optional>
+            </interleave>
+          </group>
+        </choice>
+        <choice>
+          <element name="total_iops_sec">
+            <data type="unsignedLong"/>
+          </element>
+          <group>
+            <interleave>
+              <optional>
+                <element name="read_iops_sec">
+                  <data type="unsignedLong"/>
+                </element>
+              </optional>
+              <optional>
+                <element name="write_iops_sec">
+                  <data type="unsignedLong"/>
+                </element>
+              </optional>
+            </interleave>
+          </group>
+        </choice>
+      </interleave>
+    </element>
+  </define>
+
   <!--
        Optional hypervisor extensions in their own namespace:
          QEmu
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index d365cee..a2702c5 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -2394,6 +2394,7 @@ cleanup:
 static virDomainDiskDefPtr
 virDomainDiskDefParseXML(virCapsPtr caps,
                          xmlNodePtr node,
+                         xmlXPathContextPtr ctxt,
                          virBitmapPtr bootMap,
                          unsigned int flags)
 {
@@ -2594,6 +2595,50 @@ virDomainDiskDefParseXML(virCapsPtr caps,
                     }
                     child = child->next;
                 }
+            } else if (xmlStrEqual(cur->name, BAD_CAST "iotune")) {
+                if (virXPathULongLong("string(./devices/disk/iotune/total_bytes_sec)",
+                                      ctxt, &def->blkdeviotune.total_bytes_sec) < 0) {
+                    def->blkdeviotune.total_bytes_sec = 0;
+                }
+
+                if (virXPathULongLong("string(./devices/disk/iotune/read_bytes_sec)",
+                                      ctxt, &def->blkdeviotune.read_bytes_sec) < 0) {
+                    def->blkdeviotune.read_bytes_sec = 0;
+                }
+
+                if (virXPathULongLong("string(./devices/disk/iotune/write_bytes_sec)",
+                                      ctxt, &def->blkdeviotune.write_bytes_sec) < 0) {
+                    def->blkdeviotune.write_bytes_sec = 0;
+                }
+
+                if (virXPathULongLong("string(./devices/disk/iotune/total_iops_sec)",
+                                      ctxt, &def->blkdeviotune.total_iops_sec) < 0) {
+                    def->blkdeviotune.total_iops_sec = 0;
+                }
+
+                if (virXPathULongLong("string(./devices/disk/iotune/read_iops_sec)",
+                                      ctxt, &def->blkdeviotune.read_iops_sec) < 0) {
+                    def->blkdeviotune.read_iops_sec = 0;
+                }
+
+                if (virXPathULongLong("string(./devices/disk/iotune/write_iops_sec)",
+                                      ctxt, &def->blkdeviotune.write_iops_sec) < 0) {
+                    def->blkdeviotune.write_iops_sec = 0;
+                }
+
+                if ((def->blkdeviotune.total_bytes_sec && def->blkdeviotune.read_bytes_sec)
+                    || (def->blkdeviotune.total_bytes_sec && def->blkdeviotune.write_bytes_sec)) {
+                    virDomainReportError(VIR_ERR_XML_ERROR,
+                                         _("total and read/write bytes_sec cannot be set at the same time"));
+                    goto error;
+                }
+
+                if ((def->blkdeviotune.total_iops_sec && def->blkdeviotune.read_iops_sec)
+                    || (def->blkdeviotune.total_iops_sec && def->blkdeviotune.write_iops_sec)) {
+                    virDomainReportError(VIR_ERR_XML_ERROR,
+                                         _("total and read/write iops_sec cannot be set at the same time"));
+                    goto error;
+                }
             } else if (xmlStrEqual(cur->name, BAD_CAST "readonly")) {
                 def->readonly = 1;
             } else if (xmlStrEqual(cur->name, BAD_CAST "shareable")) {
@@ -6078,7 +6123,7 @@ virDomainDeviceDefPtr virDomainDeviceDefParse(virCapsPtr caps,

     if (xmlStrEqual(node->name, BAD_CAST "disk")) {
         dev->type = VIR_DOMAIN_DEVICE_DISK;
-        if (!(dev->data.disk = virDomainDiskDefParseXML(caps, node,
+        if (!(dev->data.disk = virDomainDiskDefParseXML(caps, node, ctxt,
                                                         NULL, flags)))
             goto error;
     } else if (xmlStrEqual(node->name, BAD_CAST "lease")) {
@@ -7148,6 +7193,7 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps,
     for (i = 0 ; i < n ; i++) {
         virDomainDiskDefPtr disk = virDomainDiskDefParseXML(caps,
                                                             nodes[i],
+                                                            ctxt,
                                                             bootMap,
                                                             flags);
         if (!disk)
@@ -9589,6 +9635,48 @@ virDomainDiskDefFormat(virBufferPtr buf,
     virBufferAsprintf(buf, "      <target dev='%s' bus='%s'/>\n",
                       def->dst, bus);

+    /*disk I/O throttling*/
+    if (def->blkdeviotune.total_bytes_sec ||
+        def->blkdeviotune.read_bytes_sec ||
+        def->blkdeviotune.write_bytes_sec ||
+        def->blkdeviotune.total_iops_sec ||
+        def->blkdeviotune.read_iops_sec ||
+        def->blkdeviotune.write_iops_sec) {
+        virBufferAddLit(buf, "      <iotune>\n");
+        if (def->blkdeviotune.total_bytes_sec) {
+            virBufferAsprintf(buf, "        <total_bytes_sec>%llu</total_bytes_sec>\n",
+                              def->blkdeviotune.total_bytes_sec);
+        }
+
+        if (def->blkdeviotune.read_bytes_sec) {
+            virBufferAsprintf(buf, "        <read_bytes_sec>%llu</read_bytes_sec>\n",
+                              def->blkdeviotune.read_bytes_sec);
+
+        }
+
+        if (def->blkdeviotune.write_bytes_sec) {
+            virBufferAsprintf(buf, "        <write_bytes_sec>%llu</write_bytes_sec>\n",
+                              def->blkdeviotune.write_bytes_sec);
+        }
+
+        if (def->blkdeviotune.total_iops_sec) {
+            virBufferAsprintf(buf, "        <total_iops_sec>%llu</total_iops_sec>\n",
+                              def->blkdeviotune.total_iops_sec);
+        }
+
+        if (def->blkdeviotune.read_iops_sec) {
+            virBufferAsprintf(buf, "        <read_iops_sec>%llu</read_iops_sec>",
+                              def->blkdeviotune.read_iops_sec);
+        }
+
+        if (def->blkdeviotune.write_iops_sec) {
+            virBufferAsprintf(buf, "        <write_iops_sec>%llu</write_iops_sec>",
+                              def->blkdeviotune.write_iops_sec);
+        }
+
+        virBufferAddLit(buf, "      </iotune>\n");
+    }
+
     if (def->bootIndex)
         virBufferAsprintf(buf, "      <boot order='%d'/>\n", def->bootIndex);
     if (def->readonly)
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 7511178..ff6921a 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -313,6 +313,17 @@ enum virDomainDiskSecretType {
     VIR_DOMAIN_DISK_SECRET_TYPE_LAST
 };

+typedef struct _virDomainBlockIoTuneInfo virDomainBlockIoTuneInfo;
+struct _virDomainBlockIoTuneInfo {
+    unsigned long long total_bytes_sec;
+    unsigned long long read_bytes_sec;
+    unsigned long long write_bytes_sec;
+    unsigned long long total_iops_sec;
+    unsigned long long read_iops_sec;
+    unsigned long long write_iops_sec;
+};
+typedef virDomainBlockIoTuneInfo *virDomainBlockIoTuneInfoPtr;
+
 /* Stores the virtual disk configuration */
 typedef struct _virDomainDiskDef virDomainDiskDef;
 typedef virDomainDiskDef *virDomainDiskDefPtr;
@@ -335,6 +346,9 @@ struct _virDomainDiskDef {
     } auth;
     char *driverName;
     char *driverType;
+
+    virDomainBlockIoTuneInfo blkdeviotune;
+
     char *serial;
     int cachemode;
     int error_policy;  /* enum virDomainDiskErrorPolicy */
-- 
1.7.7.3




More information about the libvir-list mailing list