[libvirt] [PATCH 1/6] unpriv_sgio: Add docs and rng schema for new XML unpriv_sgio

Osier Yang jyang at redhat.com
Mon Nov 26 10:21:37 UTC 2012


Since "rawio" and "unpriv_sgio" are only valid for "lun", this
groups them together. And since both of them intend to allow
the unprivledged user to use the SCSI commands, they are must be
exclusive. Actually "unpriv_sgio" supersedes "rawio", as it
confines the capability per-device, unlike "rawio", which gives
the domain process broad capablity.
---
 docs/formatdomain.html.in     |   10 +++-
 docs/schemas/domaincommon.rng |   52 ++++++++++----
 src/conf/domain_conf.c        |   56 ++++++++++++----
 src/conf/domain_conf.h        |   11 +++
 src/libvirt_private.syms      |    4 +
 src/qemu/qemu_process.c       |   30 ++++++++
 src/util/util.c               |  153 +++++++++++++++++++++++++++++++++++++++++
 src/util/util.h               |    7 ++
 8 files changed, 293 insertions(+), 30 deletions(-)

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 6a3b976..f3f6a9e 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -1395,7 +1395,15 @@
         rawio='yes', rawio capability will be enabled for all disks in
         the domain (because, in the case of QEMU, this capability can
         only be set on a per-process basis). This attribute is only
-        valid when device is "lun".
+        valid when device is "lun". NB, <code>rawio</code> gives
+        the domain process broad capability, to confined the capability
+        as much as possible, one should use <code>unpriv_sgio</code>
+        instead, which controls the capability per-device.
+        The optional <code>unpriv_sgio</code> attribute
+        (<span class="since">since 1.0.1</span>) indicates whether the
+        disk will allow unprivileged SG_IO, valid settings are "yes"
+        or "no" (defaults to "no"). Note that it's exclusive with
+        attribute <code>rawio</code>;
         The optional <code>snapshot</code> attribute indicates the default
         behavior of the disk during disk snapshots: "internal"
         requires a file format such as qcow2 that can store both the
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index 02ad477..7da571c 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -957,24 +957,44 @@
     -->
   <define name="disk">
     <element name="disk">
-      <optional>
-        <attribute name="device">
-          <choice>
-            <value>floppy</value>
-            <value>disk</value>
-            <value>cdrom</value>
-            <value>lun</value>
-          </choice>
-        </attribute>
-      </optional>
-      <optional>
-        <attribute name="rawio">
+      <choice>
+        <group>
+          <optional>
+            <attribute name="device">
+              <choice>
+                <value>floppy</value>
+                <value>disk</value>
+                <value>cdrom</value>
+              </choice>
+            </attribute>
+          </optional>
+        </group>
+        <group>
+          <optional>
+            <attribute name="device">
+              <value>lun</value>
+            </attribute>
+          </optional>
           <choice>
-            <value>yes</value>
-            <value>no</value>
+            <optional>
+              <attribute name="rawio">
+                <choice>
+                  <value>yes</value>
+                  <value>no</value>
+                </choice>
+              </attribute>
+            </optional>
+            <optional>
+              <attribute name="unpriv_sgio">
+                <choice>
+                  <value>yes</value>
+                  <value>no</value>
+                </choice>
+              </attribute>
+            </optional>
           </choice>
-        </attribute>
-      </optional>
+        </group>
+      </choice>
       <optional>
         <ref name="snapshot"/>
       </optional>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index ed8b53f..e57dbd0 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -236,6 +236,10 @@ VIR_ENUM_IMPL(virDomainDiskIo, VIR_DOMAIN_DISK_IO_LAST,
               "default",
               "native",
               "threads")
+VIR_ENUM_IMPL(virDomainDiskSGIO, VIR_DOMAIN_DISK_SGIO_LAST,
+              "default",
+              "yes",
+              "no")
 VIR_ENUM_IMPL(virDomainIoEventFd, VIR_DOMAIN_IO_EVENT_FD_LAST,
               "default",
               "on",
@@ -3515,6 +3519,7 @@ virDomainDiskDefParseXML(virCapsPtr caps,
     char *device = NULL;
     char *snapshot = NULL;
     char *rawio = NULL;
+    char *sgio = NULL;
     char *driverName = NULL;
     char *driverType = NULL;
     char *source = NULL;
@@ -3576,6 +3581,7 @@ virDomainDiskDefParseXML(virCapsPtr caps,
     snapshot = virXMLPropString(node, "snapshot");
 
     rawio = virXMLPropString(node, "rawio");
+    sgio = virXMLPropString(node, "sgio");
 
     cur = node->children;
     while (cur != NULL) {
@@ -3966,24 +3972,44 @@ virDomainDiskDefParseXML(virCapsPtr caps,
         def->snapshot = VIR_DOMAIN_SNAPSHOT_LOCATION_NONE;
     }
 
+    if (rawio && sgio) {
+        virReportError(VIR_ERR_XML_ERROR, "%s",
+                       _("rawio and sgio are exclusive"));
+        goto error;
+    }
+
+    if ((rawio || sgio) &&
+        (def->device != VIR_DOMAIN_DISK_DEVICE_LUN)) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                       _("rawio can be used only with device='lun'"));
+        goto error;
+    }
+
     if (rawio) {
         def->rawio_specified = true;
-        if (def->device == VIR_DOMAIN_DISK_DEVICE_LUN) {
-            if (STREQ(rawio, "yes")) {
-                def->rawio = 1;
-            } else if (STREQ(rawio, "no")) {
-                def->rawio = 0;
-            } else {
-                virReportError(VIR_ERR_XML_ERROR,
-                               _("unknown disk rawio setting '%s'"),
-                               rawio);
-                goto error;
-            }
+        if (STREQ(rawio, "yes")) {
+            def->rawio = 1;
+        } else if (STREQ(rawio, "no")) {
+            def->rawio = 0;
         } else {
-            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
-                           _("rawio can be used only with device='lun'"));
+            virReportError(VIR_ERR_XML_ERROR,
+                           _("unknown disk rawio setting '%s'"),
+                           rawio);
+            goto error;
+        }
+    }
+
+    if (sgio) {
+        int sgioVal = 0;
+
+        if ((sgioVal = virDomainDiskSGIOTypeFromString(sgio)) < 0) {
+            virReportError(VIR_ERR_XML_ERROR,
+                           _("unknown disk sgio setting '%s'"),
+                           sgio);
             goto error;
         }
+
+        def->sgio = sgioVal;
     }
 
     if (bus) {
@@ -4220,6 +4246,7 @@ cleanup:
     VIR_FREE(type);
     VIR_FREE(snapshot);
     VIR_FREE(rawio);
+    VIR_FREE(sgio);
     VIR_FREE(target);
     VIR_FREE(source);
     VIR_FREE(tray);
@@ -11897,6 +11924,9 @@ virDomainDiskDefFormat(virBufferPtr buf,
             virBufferAddLit(buf, " rawio='no'");
         }
     }
+    if (def->sgio)
+        virBufferAsprintf(buf, "   sgio='%s'",
+                          virDomainDiskSGIOTypeToString(def->sgio));
     if (def->snapshot &&
         !(def->snapshot == VIR_DOMAIN_SNAPSHOT_LOCATION_NONE && def->readonly))
         virBufferAsprintf(buf, " snapshot='%s'",
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index c3e8c16..9e1a9bb 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -496,6 +496,14 @@ enum  virDomainDiskIo {
     VIR_DOMAIN_DISK_IO_LAST
 };
 
+enum virDomainDiskSGIO {
+    VIR_DOMAIN_DISK_SGIO_DEFAULT = 0,
+    VIR_DOMAIN_DISK_SGIO_YES,
+    VIR_DOMAIN_DISK_SGIO_NO,
+
+    VIR_DOMAIN_DISK_SGIO_LAST
+};
+
 enum virDomainIoEventFd {
     VIR_DOMAIN_IO_EVENT_FD_DEFAULT = 0,
     VIR_DOMAIN_IO_EVENT_FD_ON,
@@ -607,6 +615,8 @@ struct _virDomainDiskDef {
     virStorageEncryptionPtr encryption;
     bool rawio_specified;
     int rawio; /* no = 0, yes = 1 */
+    int sgio;  /* no = 0, yes = 1 */
+    int old_sgio; /* To record the old unpriv_sgio value, internally */
 
     size_t nseclabels;
     virSecurityDeviceLabelDefPtr *seclabels;
@@ -2197,6 +2207,7 @@ VIR_ENUM_DECL(virDomainDiskCache)
 VIR_ENUM_DECL(virDomainDiskErrorPolicy)
 VIR_ENUM_DECL(virDomainDiskProtocol)
 VIR_ENUM_DECL(virDomainDiskIo)
+VIR_ENUM_DECL(virDomainDiskSGIO)
 VIR_ENUM_DECL(virDomainDiskSecretType)
 VIR_ENUM_DECL(virDomainDiskTray)
 VIR_ENUM_DECL(virDomainIoEventFd)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 0115db1..7abaf27 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1257,6 +1257,9 @@ virFileWaitForDevices;
 virFileWriteStr;
 virFindFileInPath;
 virFormatIntDecimal;
+virGetDeviceMajor;
+virGetDeviceMinor;
+virGetDeviceSGIO;
 virGetGroupID;
 virGetGroupName;
 virGetHostname;
@@ -1275,6 +1278,7 @@ virPipeReadUntilEOF;
 virScaleInteger;
 virSetBlocking;
 virSetCloseExec;
+virSetDeviceSGIO;
 virSetInherit;
 virSetNonBlock;
 virSetUIDGID;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 3d7a5a0..98096c5 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3703,6 +3703,25 @@ int qemuProcessStart(virConnectPtr conn,
             virCommandAllowCap(cmd, CAP_SYS_RAWIO);
     }
 
+    /* Set unpriv_sgio for disks */
+    for (i = 0; i < vm->def->ndisks; i++) {
+        virDomainDiskDefPtr disk = vm->def->disks[i];
+        int old_sgio;
+
+        if (!disk->sgio)
+            continue;
+
+        if (virGetDeviceSGIO(disk->src, &old_sgio) < 0)
+            goto cleanup;
+
+        disk->old_sgio = old_sgio;
+
+        if (virSetDeviceSGIO(disk->src,
+                             (disk->sgio == VIR_DOMAIN_DISK_SGIO_YES)
+                             ? 1 : 0) < 0)
+            goto cleanup;
+    }
+
     virCommandSetPreExecHook(cmd, qemuProcessHook, &hookData);
 
     virCommandSetOutputFD(cmd, &logfile);
@@ -4093,6 +4112,17 @@ void qemuProcessStop(struct qemud_driver *driver,
                                           flags & VIR_QEMU_PROCESS_STOP_MIGRATED);
     virSecurityManagerReleaseLabel(driver->securityManager, vm->def);
 
+    /* Restore disk's unpriv_sgio */
+    for (i = 0; i < vm->def->ndisks; i++) {
+        virDomainDiskDefPtr disk = vm->def->disks[i];
+
+        if (!disk->sgio)
+            continue;
+
+        if (virSetDeviceSGIO(disk->src, disk->old_sgio) < 0)
+            VIR_WARN("Unable to restore unpriv_sgio for disk '%s'", disk->src);
+    }
+
     /* Clear out dynamically assigned labels */
     for (i = 0; i < vm->def->nseclabels; i++) {
         if (vm->def->seclabels[i]->type == VIR_DOMAIN_SECLABEL_DYNAMIC) {
diff --git a/src/util/util.c b/src/util/util.c
index 2fd0f2c..746f3e9 100644
--- a/src/util/util.c
+++ b/src/util/util.c
@@ -3113,3 +3113,156 @@ virValidateWWN(const char *wwn) {
 
     return true;
 }
+
+#if defined(major) && defined(minor)
+int
+virGetDeviceMajor(const char *path)
+{
+    struct stat sb;
+
+    if (stat(path, &sb) < 0)
+        return -errno;
+
+    if (!S_ISBLK(sb.st_mode))
+        return -EINVAL;
+
+    return major(sb.st_rdev);
+}
+
+int
+virGetDeviceMinor(const char *path)
+{
+    struct stat sb;
+
+    if (stat(path, &sb) < 0)
+        return -errno;
+
+    if (!S_ISBLK(sb.st_mode))
+        return -EINVAL;
+
+    return minor(sb.st_rdev);
+}
+#else
+int
+virGetDeviceMajor(const char *path)
+{
+    return -ENOSYS;
+}
+
+int
+virGetDeviceMinor(const char *path)
+{
+    return -ENOSYS;
+}
+#endif
+
+int
+virSetDeviceSGIO(const char *path,
+                 int sgio)
+{
+    char *sysfs_path = NULL;
+    char *val = NULL;
+    int major;
+    int minor;
+    int ret = -1;
+    int rc = -1;
+
+    if ((major = virGetDeviceMajor(path)) < 0) {
+        virReportSystemError(-major,
+                             _("Unable to get major number of device '%s'"),
+                             path);
+        return -1;
+    }
+
+    if ((minor = virGetDeviceMinor(path)) < 0) {
+        virReportSystemError(-minor,
+                             _("Unable to get minor number of device '%s'"),
+                             path);
+        return -1;
+    }
+
+    if (virAsprintf(&sysfs_path, "/sys/dev/block/%d:%d/queue/unpriv_sgio",
+                    major, minor) < 0) {
+        virReportOOMError();
+        return -1;
+    }
+
+    if (!virFileExists(sysfs_path)) {
+        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
+                       _("unpriv_sgio is not supported by this kernel"));
+        goto cleanup;
+    }
+
+    if (virAsprintf(&val, "%d", sgio) < 0) {
+        virReportOOMError();
+        goto cleanup;
+    }
+
+    if ((rc = virFileWriteStr(sysfs_path, val, 0)) < 0) {
+        virReportSystemError(-rc, _("failed to set %s"), sysfs_path);
+        goto cleanup;
+    }
+
+    ret = 0;
+cleanup:
+    VIR_FREE(sysfs_path);
+    VIR_FREE(val);
+    return ret;
+}
+
+int
+virGetDeviceSGIO(const char *path,
+                 int *sgio)
+{
+    char *sysfs_path = NULL;
+    char *buf = NULL;
+    int major;
+    int minor;
+    int ret = -1;
+    int rc = -1;
+
+    if ((major = virGetDeviceMajor(path)) < 0) {
+        virReportSystemError(-major,
+                             _("Unable to get major number of device '%s'"),
+                             path);
+        return -1;
+    }
+
+    if ((minor = virGetDeviceMinor(path)) < 0) {
+        virReportSystemError(-minor,
+                             _("Unable to get minor number of device '%s'"),
+                             path);
+        return -1;
+    }
+
+    if (virAsprintf(&sysfs_path, "/sys/dev/block/%d:%d/queue/unpriv_sgio",
+                    major, minor) < 0) {
+        virReportOOMError();
+        return -1;
+    }
+
+    if (!virFileExists(sysfs_path)) {
+        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
+                       _("unpriv_sgio is not supported by this kernel"));
+        goto cleanup;
+    }
+
+    if (virFileReadAll(sysfs_path, 1024, &buf) < 0)
+        goto cleanup;
+
+    if ((tmp = strchr(buf, '\n')))
+        *tmp = '\0';
+
+    if ((rc = virStrToLong_i(buf, NULL, 10,
+                             sgio)) < 0) {
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       _("failed to parse value of %s"), sysfs_path);
+        goto cleanup;
+    }
+
+    ret = 0;
+cleanup:
+    VIR_FREE(sysfs_path);
+    VIR_FREE(buf);
+    return ret;
+}
diff --git a/src/util/util.h b/src/util/util.h
index 4316ab1..0bd9f79 100644
--- a/src/util/util.h
+++ b/src/util/util.h
@@ -280,4 +280,11 @@ bool virIsDevMapperDevice(const char *dev_name) ATTRIBUTE_NONNULL(1);
 
 bool virValidateWWN(const char *wwn);
 
+int virGetDeviceMajor(const char *path);
+int virGetDeviceMinor(const char *path);
+int virSetDeviceSGIO(const char *path,
+                     int sgio);
+int virGetDeviceSGIO(const char *path,
+                     int *sgio);
+
 #endif /* __VIR_UTIL_H__ */
-- 
1.7.7.6




More information about the libvir-list mailing list