[libvirt] [PATCH v2 2/2] qemu: completely rework reference counting

Peter Krempa pkrempa at redhat.com
Tue Dec 9 13:40:16 UTC 2014


On 12/05/14 15:03, Martin Kletzander wrote:
> There is one problem that causes various errors in the daemon.  When
> domain is waiting for a job, it is unlocked while waiting on the
> condition.  However, if that domain is for example transient and being
> removed in another API (e.g. cancelling incoming migration), it get's
> unref'd.  If the first call, that was waiting, fails to get the job, it
> unref's the domain object, and because it was the last reference, it
> causes clearing of the whole domain object.  However, when finishing the
> call, the domain must be unlocked, but there is no way for the API to
> know whether it was cleaned or not (unless there is some ugly temporary
> variable, but let's scratch that).
> 
> The root cause is that our APIs don't ref the objects they are using and
> all use the implicit reference that the object has when it is in the
> domain list.  That reference can be removed when the API is waiting for
> a job.  And because each domain doesn't do its ref'ing, it results in
> the ugly checking of the return value of virObjectUnref() that we have
> everywhere.
> 
> This patch changes qemuDomObjFromDomain() to ref the domain (using
> virDomainObjListFindByUUIDRef()) and adds qemuDomObjEndAPI() which
> should be the only function in which the return value of
> virObjectUnref() is checked.  This makes all reference counting
> deterministic and makes the code a bit clearer.
> 
> Signed-off-by: Martin Kletzander <mkletzan at redhat.com>
> ---
>  src/qemu/THREADS.txt      |  40 ++-
>  src/qemu/qemu_domain.c    |  29 +-
>  src/qemu/qemu_domain.h    |  12 +-
>  src/qemu/qemu_driver.c    | 708 ++++++++++++++++------------------------------
>  src/qemu/qemu_migration.c | 108 +++----
>  src/qemu/qemu_migration.h |  10 +-
>  src/qemu/qemu_process.c   |  87 +++---
>  7 files changed, 359 insertions(+), 635 deletions(-)
> 

Continuing where I finished yesterday:

> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 9152cf5..2e0b7fc 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c

[...]

> @@ -15128,21 +14970,18 @@ static virDomainPtr qemuDomainQemuAttach(virConnectPtr conn,
>                                     VIR_DOMAIN_OBJ_LIST_ADD_CHECK_LIVE,
>                                     NULL)))
>          goto cleanup;
> -

Keep the empty line for visual separation.

> +    virObjectRef(vm);
>      def = NULL;
> 
>      if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) {
>          qemuDomainRemoveInactive(driver, vm);
> -        vm = NULL;
>          goto cleanup;
>      }
> 
>      if (qemuProcessAttach(conn, driver, vm, pid,
>                            pidfile, monConfig, monJSON) < 0) {
> -        if (qemuDomainObjEndJob(driver, vm))
> -            qemuDomainRemoveInactive(driver, vm);
> -        vm = NULL;
> -        monConfig = NULL;

Ownership of monConfig is transferred to qemuProcessAttach(), thus you
need to clean the pointer to avoid double free.

> +        qemuDomainObjEndJob(driver, vm);
> +        qemuDomainRemoveInactive(driver, vm);
>          goto cleanup;
>      }
> 

[...]

> @@ -15513,8 +15348,7 @@ qemuDomainBlockPivot(virConnectPtr conn,
>  }
> 
> 
> -/* bandwidth in MiB/s per public API. Caller must lock vm beforehand,
> - * and not access it afterwards.  */
> +/* bandwidth in MiB/s per public API. Caller must lock vm beforehand. */

The semantics didn't change. qemuDomainBlockJobImpl() consumes vm and
calls qemuDomObjEndAPI() on it without adding any reference thus callers
still shouldn't touch VM after passing it to this function.

>  static int
>  qemuDomainBlockJobImpl(virDomainObjPtr vm,
>                         virConnectPtr conn,

[...]


> @@ -15776,7 +15606,7 @@ qemuDomainBlockJobAbort(virDomainPtr dom, const char *path, unsigned int flags)
> 
>  static int
>  qemuDomainGetBlockJobInfo(virDomainPtr dom, const char *path,
> -                           virDomainBlockJobInfoPtr info, unsigned int flags)
> +                          virDomainBlockJobInfoPtr info, unsigned int flags)

Unrelated whitespace fix?

>  {
>      virQEMUDriverPtr driver = dom->conn->privateData;
>      qemuDomainObjPrivatePtr priv;

[...]

> @@ -18733,13 +18523,15 @@ qemuConnectGetAllDomainStats(virConnectPtr conn,
>              continue;
> 
>          if (doms != domlist &&
> -            !virConnectGetAllDomainStatsCheckACL(conn, dom->def))
> +            !virConnectGetAllDomainStatsCheckACL(conn, dom->def)) {
> +            virObjectUnlock(dom);

Hmm, this should be a separate fix as I've done with the migration APIs.
Or perhaps is this a rebase artifact from a un-published patch?

>              continue;
> +        }
> 
> -        if (HAVE_JOB(domflags) &&
> -            qemuDomainObjBeginJob(driver, dom, QEMU_JOB_QUERY) < 0)
> +        if (HAVE_JOB(privflags) &&
> +            qemuDomainObjBeginJob(driver, dom, QEMU_JOB_QUERY) >= 0)
>              /* As it was never requested. Gather as much as possible anyway. */
> -            domflags &= ~QEMU_DOMAIN_STATS_HAVE_JOB;
> +            domflags |= QEMU_DOMAIN_STATS_HAVE_JOB;
> 
>          if (qemuDomainGetStats(conn, dom, stats, &tmp, domflags) < 0)
>              goto endjob;

[...]

> @@ -18763,8 +18553,7 @@ qemuConnectGetAllDomainStats(virConnectPtr conn,
> 
>   endjob:
>      if (HAVE_JOB(domflags) && dom)
> -        if (!qemuDomainObjEndJob(driver, dom))
> -            dom = NULL;
> +        qemuDomainObjEndJob(driver, dom);
> 
>   cleanup:
>      if (dom)
> @@ -18835,8 +18624,7 @@ qemuDomainGetFSInfo(virDomainPtr dom,
>      qemuDomainObjExitAgent(vm);
> 
>   endjob:
> -    if (!qemuDomainObjEndJob(driver, vm))
> -        vm = NULL;
> +    qemuDomainObjEndJob(driver, vm);
> 
>   cleanup:
>      if (vm)

Here's a virObjectUnlock() that you've forgot to change to
qemuDomObjEndAPI().


> diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
> index 0acbb57..e4c8cf8 100644
> --- a/src/qemu/qemu_migration.c
> +++ b/src/qemu/qemu_migration.c

[...]

> +    qemuMigrationJobContinue(vm);
> 
>      if (autoPort)
>          priv->migrationPort = port;
> @@ -3115,16 +3099,12 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
>      VIR_FREE(xmlout);
>      VIR_FORCE_CLOSE(dataFD[0]);
>      VIR_FORCE_CLOSE(dataFD[1]);

(not in context)

This function creates the @vm object with virDomainObjListAdd() but you
don't ref it afterwards as you've used to do in other places that call
that func ...

> -    if (vm) {
> -        if (ret < 0) {
> -            virPortAllocatorRelease(driver->migrationPorts, priv->nbdPort);
> -            priv->nbdPort = 0;
> -        }
> -        if (ret >= 0 || vm->persistent)
> -            virObjectUnlock(vm);
> -        else
> -            qemuDomainRemoveInactive(driver, vm);
> +    if (ret < 0) {
> +        virPortAllocatorRelease(driver->migrationPorts, priv->nbdPort);
> +        priv->nbdPort = 0;
> +        qemuDomainRemoveInactive(driver, vm);
>      }
> +    qemuDomObjEndAPI(&vm);

... and now you'd kill your last reference.

>      if (event)
>          qemuDomainEventQueue(driver, event);
>      qemuMigrationCookieFree(mig);

[...]

> @@ -5301,21 +5268,16 @@ qemuMigrationFinish(virQEMUDriverPtr driver,
>          VIR_WARN("Unable to encode migration cookie");
> 
>   endjob:
> -    if (qemuMigrationJobFinish(driver, vm) == 0) {
> -        vm = NULL;
> -    } else if (!vm->persistent && !virDomainObjIsActive(vm)) {
> +    qemuMigrationJobFinish(driver, vm);
> +    if (!vm->persistent && !virDomainObjIsActive(vm))
>          qemuDomainRemoveInactive(driver, vm);
> -        vm = NULL;
> -    }
> 
>   cleanup:
>      virPortAllocatorRelease(driver->migrationPorts, port);
> -    if (vm) {
> -        if (priv->mon)
> -            qemuMonitorSetDomainLog(priv->mon, -1);
> -        VIR_FREE(priv->origname);
> -        virObjectUnlock(vm);
> -    }
> +    if (priv->mon)
> +        qemuMonitorSetDomainLog(priv->mon, -1);
> +    VIR_FREE(priv->origname);
> +    qemuDomObjEndAPI(&vm);

The finish phase of the migration gathers the domain object via
virDomainObjListFindByName() and thus isn't ref'd.

You should tweak the callers to ref the object, otherwise it might
vanish with us thinking we have the object.


>      if (event)
>          qemuDomainEventQueue(driver, event);
>      qemuMigrationCookieFree(mig);

[...]

> @@ -5594,7 +5554,7 @@ qemuMigrationJobIsActive(virDomainObjPtr vm,
>      return true;
>  }
> 
> -bool
> +void
>  qemuMigrationJobFinish(virQEMUDriverPtr driver, virDomainObjPtr vm)
>  {
>      return qemuDomainObjEndAsyncJob(driver, vm);

returning result of a void function in a void function? :)

[...]

> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index a14b6f7..4776ce8 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -571,6 +571,7 @@ qemuProcessFakeReboot(void *opaque)
>      virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
>      virDomainRunningReason reason = VIR_DOMAIN_RUNNING_BOOTED;
>      int ret = -1;
> +

Spurious whitespace change.

>      VIR_DEBUG("vm=%p", vm);
>      virObjectLock(vm);
>      if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)

[...]

> @@ -935,15 +932,13 @@ qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
>               */
>              virObjectRef(vm);
>              if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
> -                if (!virObjectUnref(vm))
> -                    vm = NULL;
> +                virObjectUnref(vm);
>                  VIR_FREE(processEvent);
>              }
>          }
>      }
> 
> -    if (vm)
> -        virObjectUnlock(vm);
> +    virObjectUnlock(vm);

This can be considered as a separate cleanup as it doesn't change any
semantics due to the fact that we acquired a reference to pass to the
worker thread and are guaranteed that we are getting rid of the same one.

>      if (watchdogEvent)
>          qemuDomainEventQueue(driver, watchdogEvent);
>      if (lifecycleEvent)
> @@ -1439,14 +1434,12 @@ qemuProcessHandleGuestPanic(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
>       */
>      virObjectRef(vm);
>      if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
> -        if (!virObjectUnref(vm))
> -            vm = NULL;
> +        virObjectUnref(vm);
>          VIR_FREE(processEvent);
>      }
> 
>   cleanup:
> -    if (vm)
> -       virObjectUnlock(vm);
> +    virObjectUnlock(vm);

ditto for this hunk.

> 
>      return 0;
>  }

[...]

Overall looks good, but many of the comments warrant for another version.

Peter

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: OpenPGP digital signature
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20141209/bd3aa124/attachment-0001.sig>


More information about the libvir-list mailing list