[libvirt] [PATCH 3/6] use virObject to manage reference-count of virDomainObj
Hu Tao
hutao at cn.fujitsu.com
Fri Apr 8 03:23:47 UTC 2011
On Thu, Apr 07, 2011 at 10:33:03AM +0100, Daniel P. Berrange wrote:
> On Wed, Apr 06, 2011 at 03:19:51PM +0800, Hu Tao wrote:
> > This patch also eliminates a dead-lock bug in
> > qemuDomainObjBeginJobWithDriver: if virCondWaitUntil() timeouts, the
> > thread tries to acquire qemu driver lock while holding virDomainObj
> > lock.
> > ---
> > src/conf/domain_conf.c | 56 ++++----
> > src/conf/domain_conf.h | 6 +-
> > src/openvz/openvz_conf.c | 8 +-
> > src/qemu/qemu_domain.c | 32 ++---
> > src/qemu/qemu_domain.h | 2 +-
> > src/qemu/qemu_driver.c | 304 ++++++++++++++++++++-------------------------
> > src/qemu/qemu_migration.c | 45 +++----
> > src/qemu/qemu_process.c | 33 ++---
> > src/vmware/vmware_conf.c | 2 +-
> > 9 files changed, 215 insertions(+), 273 deletions(-)
> >
> > diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
> > index 90a1317..fc76a00 100644
> > --- a/src/conf/domain_conf.c
> > +++ b/src/conf/domain_conf.c
> > @@ -47,6 +47,7 @@
> > #include "storage_file.h"
> > #include "files.h"
> > #include "bitmap.h"
> > +#include "virobject.h"
> >
> > #define VIR_FROM_THIS VIR_FROM_DOMAIN
> >
> > @@ -395,9 +396,7 @@ static void
> > virDomainObjListDataFree(void *payload, const void *name ATTRIBUTE_UNUSED)
> > {
> > virDomainObjPtr obj = payload;
> > - virDomainObjLock(obj);
> > - if (virDomainObjUnref(obj) > 0)
> > - virDomainObjUnlock(obj);
> > + virDomainObjUnref(obj);
> > }
> >
> > int virDomainObjListInit(virDomainObjListPtr doms)
> > @@ -437,7 +436,7 @@ virDomainObjPtr virDomainFindByID(const virDomainObjListPtr doms,
> > virDomainObjPtr obj;
> > obj = virHashSearch(doms->objs, virDomainObjListSearchID, &id);
> > if (obj)
> > - virDomainObjLock(obj);
> > + virDomainObjRef(obj);
> > return obj;
> > }
> >
> > @@ -452,7 +451,7 @@ virDomainObjPtr virDomainFindByUUID(const virDomainObjListPtr doms,
> >
> > obj = virHashLookup(doms->objs, uuidstr);
> > if (obj)
> > - virDomainObjLock(obj);
> > + virDomainObjRef(obj);
> > return obj;
> > }
> >
> > @@ -476,7 +475,7 @@ virDomainObjPtr virDomainFindByName(const virDomainObjListPtr doms,
> > virDomainObjPtr obj;
> > obj = virHashSearch(doms->objs, virDomainObjListSearchName, name);
> > if (obj)
> > - virDomainObjLock(obj);
> > + virDomainObjRef(obj);
> > return obj;
> > }
>
> This is a major change in semantics, which makes pretty much
> every single caller non-thread safe, unless the callers are
> all changed todo virDomainObjLock immediately after calling
> this. So I don't really see the point in this - it just means
> more code duplication.
If we do this:
obj = virHashSearch(doms->objs, virDomainObjListSearchName, name);
if (obj)
virDomainObjLock(obj);
return obj;
And at the meantime another thread removes the same obj from doms->objs
and frees it, than we are accessing a freed obj.
lock doesn't help prevent object from being freed.
>
> >
> > @@ -967,6 +966,12 @@ static void virDomainObjFree(virDomainObjPtr dom)
> > {
> > if (!dom)
> > return;
> > + virDomainObjUnref(dom);
> > +}
> > +
> > +static void doDomainObjFree(virObjectPtr obj)
> > +{
> > + virDomainObjPtr dom = (virDomainObjPtr)obj;
> >
> > VIR_DEBUG("obj=%p", dom);
> > virDomainDefFree(dom->def);
> > @@ -984,21 +989,13 @@ static void virDomainObjFree(virDomainObjPtr dom)
> >
> > void virDomainObjRef(virDomainObjPtr dom)
> > {
> > - dom->refs++;
> > - VIR_DEBUG("obj=%p refs=%d", dom, dom->refs);
> > + virObjectRef(&dom->obj);
> > }
> >
> >
> > -int virDomainObjUnref(virDomainObjPtr dom)
> > +void virDomainObjUnref(virDomainObjPtr dom)
> > {
> > - dom->refs--;
> > - VIR_DEBUG("obj=%p refs=%d", dom, dom->refs);
> > - if (dom->refs == 0) {
> > - virDomainObjUnlock(dom);
> > - virDomainObjFree(dom);
> > - return 0;
> > - }
> > - return dom->refs;
> > + virObjectUnref(&dom->obj);
> > }
> >
> > static virDomainObjPtr virDomainObjNew(virCapsPtr caps)
> > @@ -1010,6 +1007,11 @@ static virDomainObjPtr virDomainObjNew(virCapsPtr caps)
> > return NULL;
> > }
> >
> > + if (virObjectInit(&domain->obj, doDomainObjFree)) {
> > + VIR_FREE(domain);
> > + return NULL;
> > + }
> > +
> > if (caps->privateDataAllocFunc &&
> > !(domain->privateData = (caps->privateDataAllocFunc)())) {
> > virReportOOMError();
> > @@ -1027,9 +1029,7 @@ static virDomainObjPtr virDomainObjNew(virCapsPtr caps)
> > return NULL;
> > }
> >
> > - virDomainObjLock(domain);
> > domain->state = VIR_DOMAIN_SHUTOFF;
> > - domain->refs = 1;
> >
> > virDomainSnapshotObjListInit(&domain->snapshots);
> >
> > @@ -1075,8 +1075,10 @@ virDomainObjPtr virDomainAssignDef(virCapsPtr caps,
> > domain->def = def;
> >
> > virUUIDFormat(def->uuid, uuidstr);
> > + virDomainObjRef(domain);
> > if (virHashAddEntry(doms->objs, uuidstr, domain) < 0) {
> > - VIR_FREE(domain);
> > + virDomainObjUnref(domain);
> > + virDomainObjFree(domain);
> > return NULL;
> > }
>
> Simiarly here, you're now requiring all callers to manually obtain
> a lock.
That is the desired result, lock right before do read/write and unlock
it as soon as possible.
>
> > @@ -1239,39 +1243,57 @@ static virDomainPtr qemudDomainCreate(virConnectPtr conn, const char *xml,
> >
> > qemuDriverLock(driver);
> > if (!(def = virDomainDefParseString(driver->caps, xml,
> > - VIR_DOMAIN_XML_INACTIVE)))
> > + VIR_DOMAIN_XML_INACTIVE))) {
> > + qemuDriverUnlock(driver);
> > goto cleanup;
> > + }
> >
> > - if (virSecurityManagerVerify(driver->securityManager, def) < 0)
> > + if (virSecurityManagerVerify(driver->securityManager, def) < 0) {
> > + qemuDriverUnlock(driver);
> > goto cleanup;
> > + }
> >
> > - if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0)
> > + if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0) {
> > + qemuDriverUnlock(driver);
> > goto cleanup;
> > + }
> >
> > - if (qemudCanonicalizeMachine(driver, def) < 0)
> > + if (qemudCanonicalizeMachine(driver, def) < 0) {
> > + qemuDriverUnlock(driver);
> > goto cleanup;
> > + }
> >
> > - if (qemuDomainAssignPCIAddresses(def) < 0)
> > + if (qemuDomainAssignPCIAddresses(def) < 0) {
> > + qemuDriverUnlock(driver);
> > goto cleanup;
> > + }
> >
> > if (!(vm = virDomainAssignDef(driver->caps,
> > &driver->domains,
> > - def, false)))
> > + def, false))) {
> > + qemuDriverUnlock(driver);
> > goto cleanup;
> > + }
> > +
> > + qemuDriverUnlock(driver);
>
> driver is now unlocked....
>
> >
> > def = NULL;
> >
> > - if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0)
> > + if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) {
> > + virDomainObjUnref(vm);
>
> ...but this method *requires* driver to be locked.
>
> > goto cleanup; /* XXXX free the 'vm' we created ? */
> > + }
> >
> > if (qemuProcessStart(conn, driver, vm, NULL,
> > (flags & VIR_DOMAIN_START_PAUSED) != 0,
> > -1, NULL, VIR_VM_OP_CREATE) < 0) {
> > qemuAuditDomainStart(vm, "booted", false);
>
> ...and this method writes to 'driver', so it is now unsafe.
>
> > - if (qemuDomainObjEndJob(vm) > 0)
> > - virDomainRemoveInactive(&driver->domains,
> > - vm);
> > - vm = NULL;
> > + qemuDomainObjEndJob(vm);
> > + qemuDriverLock(driver);
> > + virDomainRemoveInactive(&driver->domains,
> > + vm);
> > + qemuDriverUnlock(driver);
> > + virDomainObjUnref(vm);
> > goto cleanup;
> > }
> >
> > @@ -1283,17 +1305,13 @@ static virDomainPtr qemudDomainCreate(virConnectPtr conn, const char *xml,
> > dom = virGetDomain(conn, vm->def->name, vm->def->uuid);
> > if (dom) dom->id = vm->def->id;
> >
> > - if (vm &&
> > - qemuDomainObjEndJob(vm) == 0)
> > - vm = NULL;
> > + qemuDomainObjEndJob(vm);
> > + virDomainObjUnref(vm);
> >
> > cleanup:
> > virDomainDefFree(def);
> > - if (vm)
> > - virDomainObjUnlock(vm);
> > if (event)
> > qemuDomainEventQueue(driver, event);
> > - qemuDriverUnlock(driver);
> > return dom;
> > }
>
>
> And all the usage of 'vm' in this method is now completely
> unlocked and unsafe.
>
>
> >
> > @@ -1307,13 +1325,14 @@ static int qemudDomainSuspend(virDomainPtr dom) {
> >
> > qemuDriverLock(driver);
> > vm = virDomainFindByUUID(&driver->domains, dom->uuid);
> > + qemuDriverUnlock(driver);
> >
> > if (!vm) {
> > char uuidstr[VIR_UUID_STRING_BUFLEN];
> > virUUIDFormat(dom->uuid, uuidstr);
> > qemuReportError(VIR_ERR_NO_DOMAIN,
> > _("no domain with matching uuid '%s'"), uuidstr);
> > - goto cleanup;
> > + return -1;
> > }
> > if (!virDomainObjIsActive(vm)) {
> > qemuReportError(VIR_ERR_OPERATION_INVALID,
> > @@ -1354,16 +1373,12 @@ static int qemudDomainSuspend(virDomainPtr dom) {
> > }
> >
> > endjob:
> > - if (qemuDomainObjEndJob(vm) == 0)
> > - vm = NULL;
> > + qemuDomainObjEndJob(vm);
> >
> > cleanup:
> > - if (vm)
> > - virDomainObjUnlock(vm);
> > -
> > + virDomainObjUnref(vm);
> > if (event)
> > qemuDomainEventQueue(driver, event);
> > - qemuDriverUnlock(driver);
> > return ret;
> > }
>
> Also now completely unsafe since 'vm' is never locked while
> making changes to it.
Yes. Will improve qemudDomainSuspend.
>
> [cut rest of patch]
>
> I don't see how any of this patch is threadsafe now that virtually
> no methods are acquiring the 'vm' lock.
qemuDomainObjBeginJob does acquire vm lock.
>
>
> Daniel
> --
> |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :|
> |: http://libvirt.org -o- http://virt-manager.org :|
> |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :|
> |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
More information about the libvir-list
mailing list