[libvirt] [PATCH] migration: Usable time statistics without requiring NTP

Jiri Denemark jdenemar at redhat.com
Thu Apr 23 09:18:08 UTC 2015


virDomainGetJobStats is able to report statistics of a completed
migration, however to get usable downtime and total time statistics both
hosts have to keep synchronized time. To provide at least some
estimation of the times even when NTP daemons are not running on both
hosts we can just ignore the time needed to transfer a migration cookie
to the destination host. The result will be also inaccurate but a bit
more predictable. The total/down time will just be at least what we
report.

https://bugzilla.redhat.com/show_bug.cgi?id=1213434
Signed-off-by: Jiri Denemark <jdenemar at redhat.com>
---
 include/libvirt/libvirt-domain.h | 23 ++++++++++++++++++++++-
 src/qemu/qemu_domain.c           | 15 +++++++++++++++
 src/qemu/qemu_domain.h           |  9 +++++++++
 src/qemu/qemu_migration.c        | 26 +++++++++++++-------------
 tools/virsh-domain.c             | 16 ++++++++++++++++
 5 files changed, 75 insertions(+), 14 deletions(-)

diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h
index 8a4fe53..5c0a382 100644
--- a/include/libvirt/libvirt-domain.h
+++ b/include/libvirt/libvirt-domain.h
@@ -2548,6 +2548,16 @@ int virDomainAbortJob(virDomainPtr dom);
 # define VIR_DOMAIN_JOB_TIME_ELAPSED             "time_elapsed"
 
 /**
+ * VIR_DOMAIN_JOB_TIME_ELAPSED_NET:
+ *
+ * virDomainGetJobStats field: time (ms) since the beginning of the
+ * migration job NOT including the time required to transfer control
+ * flow from the source host to the destination host,
+ * as VIR_TYPED_PARAM_ULLONG.
+ */
+# define VIR_DOMAIN_JOB_TIME_ELAPSED_NET         "time_elapsed_net"
+
+/**
  * VIR_DOMAIN_JOB_TIME_REMAINING:
  *
  * virDomainGetJobStats field: remaining time (ms) for VIR_DOMAIN_JOB_BOUNDED
@@ -2561,11 +2571,22 @@ int virDomainAbortJob(virDomainPtr dom);
  * VIR_DOMAIN_JOB_DOWNTIME:
  *
  * virDomainGetJobStats field: downtime (ms) that is expected to happen
- * during migration, as VIR_TYPED_PARAM_ULLONG.
+ * during migration, as VIR_TYPED_PARAM_ULLONG. The real computed downtime
+ * between the time guest CPUs were paused and the time they were resumed
+ * is reported for completed migration.
  */
 # define VIR_DOMAIN_JOB_DOWNTIME                 "downtime"
 
 /**
+ * VIR_DOMAIN_JOB_DOWNTIME_NET:
+ *
+ * virDomainGetJobStats field: real measured downtime (ms) NOT including
+ * the time required to transfer control flow from the source host to the
+ * destination host, as VIR_TYPED_PARAM_ULLONG.
+ */
+# define VIR_DOMAIN_JOB_DOWNTIME_NET             "downtime_net"
+
+/**
  * VIR_DOMAIN_JOB_SETUP_TIME:
  *
  * virDomainGetJobStats field: total time in milliseconds spent preparing
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 1368386..ff223a0 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -288,6 +288,13 @@ qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo,
                                 jobInfo->timeElapsed) < 0)
         goto error;
 
+    if (jobInfo->timeDelta &&
+        jobInfo->timeElapsed > jobInfo->timeDelta &&
+        virTypedParamsAddULLong(&par, &npar, &maxpar,
+                                VIR_DOMAIN_JOB_TIME_ELAPSED_NET,
+                                jobInfo->timeElapsed - jobInfo->timeDelta) < 0)
+        goto error;
+
     if (jobInfo->type == VIR_DOMAIN_JOB_BOUNDED &&
         virTypedParamsAddULLong(&par, &npar, &maxpar,
                                 VIR_DOMAIN_JOB_TIME_REMAINING,
@@ -300,6 +307,14 @@ qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo,
                                 status->downtime) < 0)
         goto error;
 
+    if (status->downtime_set &&
+        jobInfo->timeDelta &&
+        status->downtime > jobInfo->timeDelta &&
+        virTypedParamsAddULLong(&par, &npar, &maxpar,
+                                VIR_DOMAIN_JOB_DOWNTIME_NET,
+                                status->downtime - jobInfo->timeDelta) < 0)
+        goto error;
+
     if (status->setup_time_set &&
         virTypedParamsAddULLong(&par, &npar, &maxpar,
                                 VIR_DOMAIN_JOB_SETUP_TIME,
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 6bea7c7..1b93f7d 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -100,9 +100,18 @@ struct _qemuDomainJobInfo {
     virDomainJobType type;
     unsigned long long started; /* When the async job started */
     unsigned long long stopped; /* When the domain's CPUs were stopped */
+    unsigned long long sent; /* When the source sent status info to the
+                                destination (only for migrations). */
+    unsigned long long received; /* When the destination host received status
+                                    info from the source (migrations only). */
     /* Computed values */
     unsigned long long timeElapsed;
     unsigned long long timeRemaining;
+    long long timeDelta; /* delta = sent - received, i.e., the difference
+                            between the source and the destination time plus
+                            the time between the end of Perform phase on the
+                            source and the beginning of Finish phase on the
+                            destination. */
     /* Raw values from QEMU */
     qemuMonitorMigrationStatus status;
 };
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 1da687c..4b3143f 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -688,6 +688,8 @@ qemuMigrationCookieStatisticsXMLFormat(virBufferPtr buf,
 
     virBufferAsprintf(buf, "<started>%llu</started>\n", jobInfo->started);
     virBufferAsprintf(buf, "<stopped>%llu</stopped>\n", jobInfo->stopped);
+    virBufferAsprintf(buf, "<sent>%llu</sent>\n", jobInfo->sent);
+    virBufferAsprintf(buf, "<delta>%lld</delta>\n", jobInfo->timeDelta);
 
     virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n",
                       VIR_DOMAIN_JOB_TIME_ELAPSED,
@@ -1046,11 +1048,14 @@ qemuMigrationCookieStatisticsXMLParse(xmlXPathContextPtr ctxt)
 
     virXPathULongLong("string(./started[1])", ctxt, &jobInfo->started);
     virXPathULongLong("string(./stopped[1])", ctxt, &jobInfo->stopped);
+    virXPathULongLong("string(./sent[1])", ctxt, &jobInfo->sent);
+    virXPathLongLong("string(./delta[1])", ctxt, &jobInfo->timeDelta);
 
     virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_ELAPSED "[1])",
                       ctxt, &jobInfo->timeElapsed);
     virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_REMAINING "[1])",
                       ctxt, &jobInfo->timeRemaining);
+
     if (virXPathULongLong("string(./" VIR_DOMAIN_JOB_DOWNTIME "[1])",
                           ctxt, &status->downtime) == 0)
         status->downtime_set = true;
@@ -3438,18 +3443,9 @@ qemuMigrationConfirmPhase(virQEMUDriverPtr driver,
     /* Update total times with the values sent by the destination daemon */
     if (mig->jobInfo) {
         qemuDomainObjPrivatePtr priv = vm->privateData;
-        if (priv->job.completed) {
-            qemuDomainJobInfoPtr jobInfo = priv->job.completed;
-            if (mig->jobInfo->status.downtime_set) {
-                jobInfo->status.downtime = mig->jobInfo->status.downtime;
-                jobInfo->status.downtime_set = true;
-            }
-            if (mig->jobInfo->timeElapsed)
-                jobInfo->timeElapsed = mig->jobInfo->timeElapsed;
-        } else {
-            priv->job.completed = mig->jobInfo;
-            mig->jobInfo = NULL;
-        }
+        VIR_FREE(priv->job.completed);
+        priv->job.completed = mig->jobInfo;
+        mig->jobInfo = NULL;
     }
 
     if (flags & VIR_MIGRATE_OFFLINE)
@@ -4041,6 +4037,7 @@ qemuMigrationRun(virQEMUDriverPtr driver,
     if (priv->job.completed) {
         qemuDomainJobInfoUpdateTime(priv->job.completed);
         qemuDomainJobInfoUpdateDowntime(priv->job.completed);
+        ignore_value(virTimeMillisNow(&priv->job.completed->sent));
     }
 
     if (priv->job.current->type == VIR_DOMAIN_JOB_UNBOUNDED)
@@ -5164,8 +5161,11 @@ qemuMigrationFinish(virQEMUDriverPtr driver,
         }
 
         if (mig->jobInfo) {
-            priv->job.completed = mig->jobInfo;
+            qemuDomainJobInfoPtr jobInfo = mig->jobInfo;
+            priv->job.completed = jobInfo;
             mig->jobInfo = NULL;
+            if (jobInfo->sent && virTimeMillisNow(&jobInfo->received) == 0)
+                jobInfo->timeDelta = jobInfo->received - jobInfo->sent;
         }
 
         if (!(flags & VIR_MIGRATE_OFFLINE)) {
diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c
index 4f58f07..6e79c4d 100644
--- a/tools/virsh-domain.c
+++ b/tools/virsh-domain.c
@@ -5764,6 +5764,15 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd)
     }
 
     vshPrint(ctl, "%-17s %-12llu ms\n", _("Time elapsed:"), info.timeElapsed);
+    if ((rc = virTypedParamsGetULLong(params, nparams,
+                                      VIR_DOMAIN_JOB_TIME_ELAPSED_NET,
+                                      &value)) < 0) {
+        goto save_error;
+    } else if (rc && value) {
+        vshPrint(ctl, "%-17s %-12llu ms\n", _("Time elapsed w/o network:"),
+                 value);
+    }
+
     if (info.type == VIR_DOMAIN_JOB_BOUNDED)
         vshPrint(ctl, "%-17s %-12llu ms\n", _("Time remaining:"),
                  info.timeRemaining);
@@ -5853,6 +5862,13 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd)
     }
 
     if ((rc = virTypedParamsGetULLong(params, nparams,
+                                      VIR_DOMAIN_JOB_DOWNTIME_NET,
+                                      &value)) < 0)
+        goto save_error;
+    else if (rc)
+        vshPrint(ctl, "%-17s %-12llu ms\n", _("Downtime w/o network:"), value);
+
+    if ((rc = virTypedParamsGetULLong(params, nparams,
                                       VIR_DOMAIN_JOB_SETUP_TIME,
                                       &value)) < 0)
         goto save_error;
-- 
2.3.5




More information about the libvir-list mailing list