[libvirt] [PATCH REBASE 5/5] qemu: fix races in beingDestroyed usage

John Ferlan jferlan at redhat.com
Mon Apr 30 22:03:47 UTC 2018



On 04/18/2018 10:44 AM, Nikolay Shirokovskiy wrote:
> Clearing beingDestroyed right after acquiring job condition is racy.
> It is not known when EOF will be delivired. Let's keep this flag

delivered

> set. This makes possible to make a clear distinction between monitor
> errors/eofs and domain being destroyed in qemuDomainObjWait.
> 
> Also let's move setting destroyed flag out of qemuProcessBeginStopJob
> as the function is called from eof handler too.
> 
> Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy at virtuozzo.com>
> ---
>  src/qemu/qemu_domain.c  |  4 ++--
>  src/qemu/qemu_domain.h  |  2 +-
>  src/qemu/qemu_driver.c  |  8 +++++++-
>  src/qemu/qemu_process.c | 24 ++++++++++++------------
>  4 files changed, 22 insertions(+), 16 deletions(-)
> 

This one didn't git am -3 apply as well, but I see you're changing
DomainObjWait so that makes sense as to why.

I do recall looking at this code at one time, but I cannot recall my
exact conclusion given how qemuDomainObjBeginJobInternal allows the
QEMU_JOB_DESTROY to be run, but then waits for whatever is taking place
now to finish.

> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index 1f40ff1..431901c 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c
> @@ -11913,9 +11913,9 @@ qemuDomainObjWait(virDomainObjPtr vm, unsigned long long until)
>          return -1;
>      }
>  
> -    if (!virDomainObjIsActive(vm)) {
> +    if (priv->destroyed) {
>          virReportError(VIR_ERR_OPERATION_FAILED, "%s",
> -                       _("domain is not running"));
> +                       _("domain is destroyed"));
>          return -1;
>      }
>  
> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
> index 494ed35..69112ea 100644
> --- a/src/qemu/qemu_domain.h
> +++ b/src/qemu/qemu_domain.h
> @@ -269,7 +269,7 @@ struct _qemuDomainObjPrivate {
>      bool agentError;
>  
>      bool gotShutdown;
> -    bool beingDestroyed;
> +    bool destroyed;
>      char *pidfile;
>  
>      virDomainPCIAddressSetPtr pciaddrs;
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 03969d8..4356c0d 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -2227,7 +2227,13 @@ qemuDomainDestroyFlags(virDomainPtr dom,
>      state = virDomainObjGetState(vm, &reason);
>      starting = (state == VIR_DOMAIN_PAUSED &&
>                  reason == VIR_DOMAIN_PAUSED_STARTING_UP &&
> -                !priv->beingDestroyed);
> +                !priv->destroyed);
> +
> +    /* We need to prevent monitor EOF callback from doing our work (and
> +     * sending misleading events) while the vm is unlocked inside
> +     * BeginJob/ProcessKill API
> +     */
> +    priv->destroyed = true;

would this be the right place anyway?  especially since you don't clear
it in error paths after setting it.  Once the job starts and we either
goto cleanup or endjob on failure - how does a future call distinguish?

Not sure this works conceptually.  At least with the current model if
DestroyFlags finally gets the job, it checks the domain active state,
and goes to endjob after printing a message.  So if while waiting, EOF
occurred, there's no qemuProcessStop

Perhaps the only thing I recall wondering is whether we clear
beingDestroyed too soon... If we're successful, we're still destroying
but not until destroy is successful should the flag then be cleared
while still holding the job.


John

>  
>      if (qemuProcessBeginStopJob(driver, vm, QEMU_JOB_DESTROY,
>                                  !(flags & VIR_DOMAIN_DESTROY_GRACEFUL)) < 0)
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index d76809e..689fc8b 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -143,8 +143,8 @@ qemuProcessHandleAgentEOF(qemuAgentPtr agent,
>          goto unlock;
>      }
>  
> -    if (priv->beingDestroyed) {
> -        VIR_DEBUG("Domain is being destroyed, agent EOF is expected");
> +    if (priv->destroyed) {
> +        VIR_DEBUG("Domain is destroyed, agent EOF is expected");
>          goto unlock;
>      }
>  
> @@ -286,6 +286,7 @@ qemuProcessNotifyMonitorError(virDomainObjPtr vm,
>      virFreeError(err);
>  }
>  
> +
>  /*
>   * This is a callback registered with a qemuMonitorPtr instance,
>   * and to be invoked when the monitor console hits an end of file
> @@ -308,8 +309,8 @@ qemuProcessHandleMonitorEOF(qemuMonitorPtr mon,
>      VIR_DEBUG("Received EOF on %p '%s'", vm, vm->def->name);
>  
>      priv = vm->privateData;
> -    if (priv->beingDestroyed) {
> -        VIR_DEBUG("Domain is being destroyed, EOF is expected");
> +    if (priv->destroyed) {
> +        VIR_DEBUG("Domain is destroyed, EOF is expected");
>          goto cleanup;
>      }
>  
> @@ -5750,6 +5751,7 @@ qemuProcessPrepareDomain(virQEMUDriverPtr driver,
>      virResetError(&priv->monError);
>      priv->monStart = 0;
>      priv->gotShutdown = false;
> +    priv->destroyed = false;
>  
>      VIR_DEBUG("Updating guest CPU definition");
>      if (qemuProcessUpdateGuestCPU(vm->def, priv->qemuCaps, caps, flags) < 0)
> @@ -6490,16 +6492,9 @@ qemuProcessBeginStopJob(virQEMUDriverPtr driver,
>                          qemuDomainJob job,
>                          bool forceKill)
>  {
> -    qemuDomainObjPrivatePtr priv = vm->privateData;
>      unsigned int killFlags = forceKill ? VIR_QEMU_PROCESS_KILL_FORCE : 0;
>      int ret = -1;
>  
> -    /* We need to prevent monitor EOF callback from doing our work (and
> -     * sending misleading events) while the vm is unlocked inside
> -     * BeginJob/ProcessKill API
> -     */
> -    priv->beingDestroyed = true;
> -
>      if (qemuProcessKill(vm, killFlags) < 0)
>          goto cleanup;
>  
> @@ -6509,7 +6504,6 @@ qemuProcessBeginStopJob(virQEMUDriverPtr driver,
>      ret = 0;
>  
>   cleanup:
> -    priv->beingDestroyed = false;
>      return ret;
>  }
>  
> @@ -7088,6 +7082,12 @@ qemuProcessAutoDestroy(virDomainObjPtr dom,
>  
>      VIR_DEBUG("Killing domain");
>  
> +    /* We need to prevent monitor EOF callback from doing our work (and
> +     * sending misleading events) while the vm is unlocked inside
> +     * BeginJob/ProcessKill API
> +     */
> +    priv->destroyed = true;
> +
>      if (qemuProcessBeginStopJob(driver, dom, QEMU_JOB_DESTROY, true) < 0)
>          return;
>  
> 




More information about the libvir-list mailing list