[libvirt] [PATCH 5/6] qemu: Recompute downtime and total time when migration completes
John Ferlan
jferlan at redhat.com
Fri Sep 5 18:47:35 UTC 2014
On 09/01/2014 11:05 AM, Jiri Denemark wrote:
> Total time of a migration and total downtime transfered from a source to
> a destination host do not count with the transfer time to the
> destination host and with the time elapsed before guest CPUs are
> resumed. Thus, source libvirtd remembers when migration started and when
> guest CPUs were paused. Both timestamps are transferred to destination
> libvirtd which uses them to compute total migration time and total
> downtime. This, obviously, requires clock to be synchronized between the
s/This, obviously,/Obviously this/
"requires clock" reads funny... "requires the time" seems closer
> two hosts. The reported times are useless otherwise but they would be
> equally useless if we didn't do this recomputation so don't lose
> anything by doing it.
>
Say nothing of inter-timezone migrations right?
> Signed-off-by: Jiri Denemark <jdenemar at redhat.com>
> ---
> src/libvirt.c | 5 ++++-
> src/qemu/qemu_domain.c | 28 ++++++++++++++++++++++++++++
> src/qemu/qemu_domain.h | 2 ++
> src/qemu/qemu_migration.c | 15 ++++++++++++++-
> src/qemu/qemu_process.c | 9 ++++++++-
> tools/virsh.pod | 5 ++++-
> 6 files changed, 60 insertions(+), 4 deletions(-)
>
> diff --git a/src/libvirt.c b/src/libvirt.c
> index 6fa0a6b..61d0543 100644
> --- a/src/libvirt.c
> +++ b/src/libvirt.c
> @@ -17581,7 +17581,10 @@ virDomainGetJobInfo(virDomainPtr domain, virDomainJobInfoPtr info)
> * return statistics about a recently completed job. Specifically, this
> * flag may be used to query statistics of a completed incoming migration.
> * Statistics of a completed job are automatically destroyed once read or
> - * when libvirtd is restarted.
> + * when libvirtd is restarted. Note that time information returned for
> + * completed migrations may be completely irrelevant unless both source and
> + * destination hosts have synchronized time (i.e., NTP daemon is running on
> + * both of them).
> *
> * Returns 0 in case of success and -1 in case of failure.
> */
> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index 18a3761..cec7828 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c
> @@ -222,11 +222,39 @@ qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo)
> if (virTimeMillisNow(&now) < 0)
> return -1;
>
> + if (now < jobInfo->started) {
> + VIR_WARN("Async job starts in the future");
> + jobInfo->started = 0;
> + return 0;
> + }
> +
> jobInfo->timeElapsed = now - jobInfo->started;
> return 0;
> }
>
> int
> +qemuDomainJobInfoUpdateDowntime(qemuDomainJobInfoPtr jobInfo)
> +{
> + unsigned long long now;
> +
Can jobInfo == NULL? - It's the qemuMigrationWaitForCompletion() path
timing concern from patch 1.
> + if (!jobInfo->stopped)
> + return 0;
> +
> + if (virTimeMillisNow(&now) < 0)
> + return -1;
> +
> + if (now < jobInfo->stopped) {
> + VIR_WARN("Guest's CPUs stopped in the future");
> + jobInfo->stopped = 0;
> + return 0;
> + }
> +
> + jobInfo->status.downtime = now - jobInfo->stopped;
> + jobInfo->status.downtime_set = true;
> + return 0;
> +}
> +
> +int
> qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo,
> virDomainJobInfoPtr info)
> {
> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
> index 365238b..435a22b 100644
> --- a/src/qemu/qemu_domain.h
> +++ b/src/qemu/qemu_domain.h
> @@ -105,6 +105,7 @@ typedef qemuDomainJobInfo *qemuDomainJobInfoPtr;
> struct _qemuDomainJobInfo {
> virDomainJobType type;
> unsigned long long started; /* When the async job started */
> + unsigned long long stopped; /* When the domain's CPUs were stopped */
> /* Computed values */
> unsigned long long timeElapsed;
> unsigned long long timeRemaining;
> @@ -390,6 +391,7 @@ bool qemuDomainAgentAvailable(qemuDomainObjPrivatePtr priv,
> bool reportError);
>
> int qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo);
> +int qemuDomainJobInfoUpdateDowntime(qemuDomainJobInfoPtr jobInfo);
Does this also need some sort of ATTRIBUTE_NONNULL(1)?
ACK in general
John
> int qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo,
> virDomainJobInfoPtr info);
> int qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo,
> diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
> index f1b3d50..43b42ac 100644
> --- a/src/qemu/qemu_migration.c
> +++ b/src/qemu/qemu_migration.c
> @@ -623,6 +623,9 @@ qemuMigrationCookieStatisticsXMLFormat(virBufferPtr buf,
> virBufferAddLit(buf, "<statistics>\n");
> virBufferAdjustIndent(buf, 2);
>
> + virBufferAsprintf(buf, "<started>%llu</started>\n", jobInfo->started);
> + virBufferAsprintf(buf, "<stopped>%llu</stopped>\n", jobInfo->stopped);
> +
> virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n",
> VIR_DOMAIN_JOB_TIME_ELAPSED,
> jobInfo->timeElapsed);
> @@ -891,6 +894,9 @@ qemuMigrationCookieStatisticsXMLParse(xmlXPathContextPtr ctxt)
> status = &jobInfo->status;
> jobInfo->type = VIR_DOMAIN_JOB_COMPLETED;
>
> + virXPathULongLong("string(./started[1])", ctxt, &jobInfo->started);
> + virXPathULongLong("string(./stopped[1])", ctxt, &jobInfo->stopped);
> +
> virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_ELAPSED "[1])",
> ctxt, &jobInfo->timeElapsed);
> virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_REMAINING "[1])",
> @@ -2015,6 +2021,7 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver,
> }
>
> if (jobInfo->type == VIR_DOMAIN_JOB_COMPLETED) {
> + qemuDomainJobInfoUpdateDowntime(jobInfo);
> VIR_FREE(priv->job.completed);
> if (VIR_ALLOC(priv->job.completed) == 0)
> *priv->job.completed = *jobInfo;
> @@ -3597,8 +3604,10 @@ qemuMigrationRun(virQEMUDriverPtr driver,
> VIR_FORCE_CLOSE(fd);
> }
>
> - if (priv->job.completed)
> + if (priv->job.completed) {
> qemuDomainJobInfoUpdateTime(priv->job.completed);
> + qemuDomainJobInfoUpdateDowntime(priv->job.completed);
> + }
>
> cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK |
> QEMU_MIGRATION_COOKIE_STATS;
> @@ -4811,6 +4820,10 @@ qemuMigrationFinish(virQEMUDriverPtr driver,
> }
> goto endjob;
> }
> + if (priv->job.completed) {
> + qemuDomainJobInfoUpdateTime(priv->job.completed);
> + qemuDomainJobInfoUpdateDowntime(priv->job.completed);
> + }
> }
>
> dom = virGetDomain(dconn, vm->def->name, vm->def->uuid);
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index f68dfbe..c4c1ce5 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -754,6 +754,9 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
> VIR_DEBUG("Transitioned guest %s to paused state",
> vm->def->name);
>
> + if (priv->job.current)
> + ignore_value(virTimeMillisNow(&priv->job.current->stopped));
> +
> virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_UNKNOWN);
> event = virDomainEventLifecycleNewFromObj(vm,
> VIR_DOMAIN_EVENT_SUSPENDED,
> @@ -2888,7 +2891,8 @@ qemuProcessStartCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm,
> }
>
>
> -int qemuProcessStopCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm,
> +int qemuProcessStopCPUs(virQEMUDriverPtr driver,
> + virDomainObjPtr vm,
> virDomainPausedReason reason,
> qemuDomainAsyncJob asyncJob)
> {
> @@ -2906,6 +2910,9 @@ int qemuProcessStopCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm,
> if (ret < 0)
> goto cleanup;
>
> + if (priv->job.current)
> + ignore_value(virTimeMillisNow(&priv->job.current->stopped));
> +
> virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason);
> if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
> VIR_WARN("Unable to release lease on %s", vm->def->name);
> diff --git a/tools/virsh.pod b/tools/virsh.pod
> index 3c71db9..13187ce 100644
> --- a/tools/virsh.pod
> +++ b/tools/virsh.pod
> @@ -1115,7 +1115,10 @@ Abort the currently running domain job.
> =item B<domjobinfo> I<domain> [I<--completed>]
>
> Returns information about jobs running on a domain. I<--completed> tells
> -virsh to return information about a recently finished job.
> +virsh to return information about a recently finished job. Note that time
> +information returned for completed migrations may be completely irrelevant
> +unless both source and destination hosts have synchronized time (i.e., NTP
> +daemon is running on both of them).
>
> =item B<domname> I<domain-id-or-uuid>
>
>
More information about the libvir-list
mailing list