[libvirt] [RFC PATCH] threshold: new API virDomainBlockSetWriteThreshold

Eric Blake eblake at redhat.com
Sun May 24 03:45:56 UTC 2015


qemu 2.3 added a new QMP command block-set-write-threshold,
which allows callers to get an interrupt when a file hits a
write threshold, rather than the current approach of repeatedly
polling for file allocation.  This patch prepares the API for
callers to register to receive the event, as well as a way
to query the threshold.

The event is one-shot in qemu - a guest must re-register a new
threshold after each time it triggers.  However, the
virConnectDomainEventRegisterAny() call does not allow
parameterization, so callers must use a pair of APIs - one
to register the callback (one-time call), and another to
repeatedly set the desired threshold (repeated each time a
threshold changes).

Note that the threshold is registered as a double, but reported
as an unsigned long long.  This is intentional, as it allows
the use of a flag for registering a threshold via a percentage,
but once registered, the threshold is normalized according to
the current size of the disk.

To make the patch series more digestible, this patch
intentionally omits remote support, by using a couple of
placeholders at a point where the compiler forces the addition
of a case within a switch statement.

Signed-off-by: Eric Blake <eblake at redhat.com>
---

Posting now to get feedback on the proposed API, before the actual
implementation is complete.

 daemon/remote.c                  |   2 +
 include/libvirt/libvirt-domain.h |  48 +++++++++++++++++++
 src/conf/domain_event.c          |   4 +-
 src/driver-hypervisor.h          |   7 +++
 src/libvirt-domain.c             | 101 +++++++++++++++++++++++++++++++++++++++
 src/libvirt_public.syms          |   1 +
 tools/virsh-domain.c             |  23 +++++++++
 tools/virsh.pod                  |   1 +
 8 files changed, 186 insertions(+), 1 deletion(-)

diff --git a/daemon/remote.c b/daemon/remote.c
index e259a76..51d7de5 100644
--- a/daemon/remote.c
+++ b/daemon/remote.c
@@ -1102,6 +1102,8 @@ static virConnectDomainEventGenericCallback domainEventCallbacks[] = {
     VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventTunable),
     VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventAgentLifecycle),
     VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventDeviceAdded),
+    /* TODO: Implement RPC support for this */
+    VIR_DOMAIN_EVENT_CALLBACK(NULL),
 };

 verify(ARRAY_CARDINALITY(domainEventCallbacks) == VIR_DOMAIN_EVENT_ID_LAST);
diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h
index d851225..0c4fd62 100644
--- a/include/libvirt/libvirt-domain.h
+++ b/include/libvirt/libvirt-domain.h
@@ -515,6 +515,15 @@ typedef virDomainBlockStatsStruct *virDomainBlockStatsPtr;
 # define VIR_DOMAIN_BLOCK_STATS_ERRS "errs"

 /**
+ * VIR_DOMAIN_BLOCK_STATS_WRITE_THRESHOLD:
+ *
+ * Macro represents the typed parameter, as an llong, that reports
+ * the threshold in bytes at which the block device will trigger a
+ * VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD, or 0 if no threshold is registered.
+ */
+# define VIR_DOMAIN_BLOCK_STATS_WRITE_THRESHOLD "write-threshold"
+
+/**
  * virDomainInterfaceStats:
  *
  * Network interface stats for virDomainInterfaceStats.
@@ -1297,6 +1306,16 @@ int                     virDomainBlockStatsFlags (virDomainPtr dom,
                                                   virTypedParameterPtr params,
                                                   int *nparams,
                                                   unsigned int flags);
+
+typedef enum {
+    /* threshold is a percentage rather than byte limit */
+    VIR_DOMAIN_BLOCK_SET_WRITE_THRESHOLD_PERCENTAGE = (1 << 0),
+} virDomainBlockSetWriteThresholdFlags;
+int virDomainBlockSetWriteThreshold(virDomainPtr dom,
+                                    const char *disk,
+                                    double threshold,
+                                    unsigned int flags);
+
 int                     virDomainInterfaceStats (virDomainPtr dom,
                                                  const char *path,
                                                  virDomainInterfaceStatsPtr stats,
@@ -3246,6 +3265,34 @@ typedef void (*virConnectDomainEventDeviceAddedCallback)(virConnectPtr conn,
                                                          void *opaque);

 /**
+ * virConnectDomainEventWriteThresholdCallback:
+ * @conn: connection object
+ * @dom: domain on which the event occurred
+ * @devAlias: device alias
+ * @threshold: threshold that was exceeded, in bytes
+ * @length: length beyond @threshold that was involved in the triggering write
+ * @opaque: application specified data
+ *
+ * The callback signature to use when registering for an event of type
+ * VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD with virConnectDomainEventRegisterAny()
+ *
+ * This callback occurs when a block device detects a write event that
+ * exceeds a non-zero threshold set by
+ * virDomainBlockSetWriteThreshold().  When this event occurs, the
+ * threshold is reset to 0, and a new limit must be installed to see
+ * the event again on the same device.  The intent of this event is to
+ * allow time for the underlying storage to be resized dynamically
+ * prior to the point where the guest would be paused due to running
+ * out of space, without having to poll for allocation values.
+ */
+typedef void (*virConnectDomainEventWriteThresholdCallback)(virConnectPtr conn,
+                                                            virDomainPtr dom,
+                                                            const char *devAlias,
+                                                            unsigned long long threshold,
+                                                            unsigned long long length,
+                                                            void *opaque);
+
+/**
  * VIR_DOMAIN_TUNABLE_CPU_VCPUPIN:
  *
  * Macro represents formatted pinning for one vcpu specified by id which is
@@ -3528,6 +3575,7 @@ typedef enum {
     VIR_DOMAIN_EVENT_ID_TUNABLE = 17,        /* virConnectDomainEventTunableCallback */
     VIR_DOMAIN_EVENT_ID_AGENT_LIFECYCLE = 18,/* virConnectDomainEventAgentLifecycleCallback */
     VIR_DOMAIN_EVENT_ID_DEVICE_ADDED = 19,   /* virConnectDomainEventDeviceAddedCallback */
+    VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD = 20, /* virConnectDomainEventWriteThreshold */

 # ifdef VIR_ENUM_SENTINELS
     VIR_DOMAIN_EVENT_ID_LAST
diff --git a/src/conf/domain_event.c b/src/conf/domain_event.c
index 20d66e1..c43799f 100644
--- a/src/conf/domain_event.c
+++ b/src/conf/domain_event.c
@@ -1,7 +1,7 @@
 /*
  * domain_event.c: domain event queue processing helpers
  *
- * Copyright (C) 2010-2014 Red Hat, Inc.
+ * Copyright (C) 2010-2015 Red Hat, Inc.
  * Copyright (C) 2008 VirtualIron
  * Copyright (C) 2013 SUSE LINUX Products GmbH, Nuernberg, Germany.
  *
@@ -1614,6 +1614,8 @@ virDomainEventDispatchDefaultFunc(virConnectPtr conn,
             goto cleanup;
         }

+    case VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD:
+        /* TODO: Implement RPC support for this */
     case VIR_DOMAIN_EVENT_ID_LAST:
         break;
     }
diff --git a/src/driver-hypervisor.h b/src/driver-hypervisor.h
index 3275343..81b80d3 100644
--- a/src/driver-hypervisor.h
+++ b/src/driver-hypervisor.h
@@ -484,6 +484,12 @@ typedef int
                                unsigned int flags);

 typedef int
+(*virDrvDomainBlockSetWriteThreshold)(virDomainPtr domain,
+                                      const char *disk,
+                                      double threshold,
+                                      unsigned int flags);
+
+typedef int
 (*virDrvDomainInterfaceStats)(virDomainPtr domain,
                               const char *path,
                               virDomainInterfaceStatsPtr stats);
@@ -1324,6 +1330,7 @@ struct _virHypervisorDriver {
     virDrvDomainBlockResize domainBlockResize;
     virDrvDomainBlockStats domainBlockStats;
     virDrvDomainBlockStatsFlags domainBlockStatsFlags;
+    virDrvDomainBlockSetWriteThreshold domainBlockSetWriteThreshold;
     virDrvDomainInterfaceStats domainInterfaceStats;
     virDrvDomainSetInterfaceParameters domainSetInterfaceParameters;
     virDrvDomainGetInterfaceParameters domainGetInterfaceParameters;
diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c
index 2edba1a..a211de0 100644
--- a/src/libvirt-domain.c
+++ b/src/libvirt-domain.c
@@ -5743,6 +5743,105 @@ virDomainBlockStatsFlags(virDomainPtr dom,


 /**
+ * virDomainBlockSetWriteThreshold:
+ * @dom: pointer to domain object
+ * @disk: path to the block device, or device shorthand
+ * @threshold: limit at which a write threshold event can trigger
+ * @flags: bitwise-OR of virDomainBlockSetWriteThresholdFlags
+ *
+ * This function is used to set a one-shot write threshold.  It should
+ * be used in tandem with virConnectDomainEventRegisterAny()
+ * installing a handler for VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD.  If
+ * the hypervisor detects that a write request (whether guest data, or
+ * host metadata) would exceed the host byte offset specified in
+ * @threshold, then an event is raised, and the threshold is reset to
+ * 0 at that time.  The event registration is only needed once, but
+ * this function must be called each time a new threshold is desired;
+ * the event will only fire if a non-zero threshold is
+ * exceeded.
+ *
+ * By default, @threshold is specified in bytes, and must not exceed
+ * the size of the block device.  However, when @flags includes
+ * VIR_DOMAIN_BLOCK_SET_WRITE_THRESHOLD_PERCENTAGE, @threshold is
+ * instead a value between 0.0 and 100.0, and the driver will compute
+ * a byte value based on the current size of the disk.  A driver may
+ * round the requested threshold to a granularity that can actually
+ * be supported.
+ *
+ * Setting a threshold allows event-based resizing of host resources
+ * that back a guest disk without having to poll the current disk
+ * allocation, while still having enough time to complete the resize
+ * before the guest would end up halting due to insufficient space.
+ * Calling this function to set the threshold back to zero will stop
+ * further firing of the event.  virDomainBlockStatsFlags() and
+ * virConnectGetAllDomainStats() can be used to track the current
+ * threshold value, always in the form normalized to bytes.
+ *
+ * The @disk parameter is either the device target shorthand (the
+ * <target dev='...'/> sub-element, such as "vda"), or (since 0.9.8)
+ * an unambiguous source name of the block device (the <source
+ * file='...'/> sub-element, such as "/path/to/image").  Valid names
+ * can be found by calling virDomainGetXMLDesc() and inspecting
+ * elements within //domain/devices/disk. Some drivers might also
+ * accept strings such as "vda[1]" for setting the threshold of a
+ * backing image, useful when doing a block commit into the backing
+ * image.
+ *
+ * Domains may have more than one block device.  To set thresholds for
+ * each you should make multiple calls to this function.  If write
+ * thresholds are not supported, an application will have to instead
+ * poll virDomainGetBlockInfo() or similar to track allocation.
+ *
+ * Returns -1 in case of error, 0 in case of success.
+ */
+int
+virDomainBlockSetWriteThreshold(virDomainPtr dom,
+                                const char *disk,
+                                double threshold,
+                                unsigned int flags)
+{
+    virConnectPtr conn;
+
+    VIR_DOMAIN_DEBUG(dom, "disk=%s, threshold=%g, flags=%x",
+                     disk, threshold, flags);
+
+    virResetLastError();
+
+    virCheckDomainReturn(dom, -1);
+    virCheckNonNullArgGoto(disk, error);
+    virCheckNonNegativeArgGoto(threshold, error);
+    if (flags & VIR_DOMAIN_BLOCK_SET_WRITE_THRESHOLD_PERCENTAGE) {
+        if (threshold > 100.0)
+            virReportError(VIR_ERR_INVALID_ARG,
+                           _("threshold in %s must be a percentage"),
+                           __FUNCTION__);
+        goto error;
+    } else {
+        if (threshold > LLONG_MAX)
+            virReportError(VIR_ERR_INVALID_ARG,
+                           _("threshold in %s must fit in unsigned long long"),
+                           __FUNCTION__);
+        goto error;
+    }
+    conn = dom->conn;
+
+    if (conn->driver->domainBlockSetWriteThreshold) {
+        int ret;
+        ret = conn->driver->domainBlockSetWriteThreshold(dom, disk, threshold,
+                                                         flags);
+        if (ret < 0)
+            goto error;
+        return ret;
+    }
+    virReportUnsupportedError();
+
+ error:
+    virDispatchError(dom->conn);
+    return -1;
+}
+
+
+/**
  * virDomainInterfaceStats:
  * @dom: pointer to the domain object
  * @path: path to the interface
@@ -11201,6 +11300,8 @@ virConnectGetDomainCapabilities(virConnectPtr conn,
  *                          unsigned long long.
  * "block.<num>.errors" - Xen only: the 'oo_req' value as
  *                        unsigned long long.
+ * "block.<num>.write-threshold" - byte at which a write threshold event
+ *                                 will fire, as unsigned long long.
  * "block.<num>.allocation" - offset of the highest written sector
  *                            as unsigned long long.
  * "block.<num>.capacity" - logical size in bytes of the block device backing
diff --git a/src/libvirt_public.syms b/src/libvirt_public.syms
index 716dd2f..5131fcf 100644
--- a/src/libvirt_public.syms
+++ b/src/libvirt_public.syms
@@ -712,6 +712,7 @@ LIBVIRT_1.2.15 {

 LIBVIRT_1.2.16 {
     global:
+        virDomainBlockSetWriteThreshold;
         virDomainSetUserPassword;
 } LIBVIRT_1.2.15;

diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c
index 91a1ca2..98db6a3 100644
--- a/tools/virsh-domain.c
+++ b/tools/virsh-domain.c
@@ -12150,6 +12150,27 @@ vshEventDeviceAddedPrint(virConnectPtr conn ATTRIBUTE_UNUSED,
 }

 static void
+vshEventWriteThresholdPrint(virConnectPtr conn ATTRIBUTE_UNUSED,
+                            virDomainPtr dom,
+                            const char *alias,
+                            unsigned long long threshold,
+                            unsigned long long length,
+                            void *opaque)
+{
+    vshDomEventData *data = opaque;
+
+    if (!data->loop && *data->count)
+        return;
+    vshPrint(data->ctl,
+             _("event 'write-threshold' for domain %s disk %s: threshold %lld "
+               "exceeded by %lld bytes\n"),
+             virDomainGetName(dom), alias, threshold, length);
+    (*data->count)++;
+    if (!data->loop)
+        vshEventDone(data->ctl);
+}
+
+static void
 vshEventTunablePrint(virConnectPtr conn ATTRIBUTE_UNUSED,
                      virDomainPtr dom,
                      virTypedParameterPtr params,
@@ -12256,6 +12277,8 @@ static vshEventCallback vshEventCallbacks[] = {
       VIR_DOMAIN_EVENT_CALLBACK(vshEventAgentLifecyclePrint), },
     { "device-added",
       VIR_DOMAIN_EVENT_CALLBACK(vshEventDeviceAddedPrint), },
+    { "write-threshold",
+      VIR_DOMAIN_EVENT_CALLBACK(vshEventWriteThresholdPrint), },
 };
 verify(VIR_DOMAIN_EVENT_ID_LAST == ARRAY_CARDINALITY(vshEventCallbacks));

diff --git a/tools/virsh.pod b/tools/virsh.pod
index d588e5a..14dd2c3 100644
--- a/tools/virsh.pod
+++ b/tools/virsh.pod
@@ -919,6 +919,7 @@ local file or block device,
 "block.<num>.fl.reqs" - total flush requests,
 "block.<num>.fl.times" - total time (ns) spent on cache flushing,
 "block.<num>.errors" - Xen only: the 'oo_req' value,
+"block.<num>.write-threshold" - write threshold event trigger, in bytes,
 "block.<num>.allocation" - offset of highest written sector in bytes,
 "block.<num>.capacity" - logical size of source file in bytes,
 "block.<num>.physical" - physical size of source file in bytes
-- 
2.1.0




More information about the libvir-list mailing list