[libvirt] [PATCH 4/4] vz: fix race condition when adding domain to domains list

Nikolay Shirokovskiy nshirokovskiy at virtuozzo.com
Fri Jan 29 08:06:41 UTC 2016



On 28.01.2016 18:38, Mikhail Feoktistov wrote:
> Race condition:
> User calls defineXML to create new instance.
> The main thread from vzDomainDefineXMLFlags() creates new instance by prlsdkCreateVm.
> Then this thread calls prlsdkAddDomain to add new domain to domains list.
> The second thread receives notification from hypervisor that new VM was created.
> It calls prlsdkHandleVmAddedEvent() and also tries to add new domain to domains list.
> These two threads call virDomainObjListFindByUUID() from prlsdkAddDomain() and don't find new domain.
> So they add two domains with the same uuid to domains list.
> 
> This fix splits logic of prlsdkAddDomain() into two functions.
> 1. prlsdkNewDomain() creates new empty domain in domains list with the specific uuid.
> 2. prlsdkLoadDomain() add data from VM to domain object.
> 
> New algorithm for creating an instance:
> In vzDomainDefineXMLFlags() we add new domain to domain list by calling prlsdkNewDomain()
> and only after that we call CreateVm() to create VM.
> It means that we "reserve" domain object with the specific uuid.
> After creation of new VM we add info from this VM
> to reserved domain object by calling prlsdkLoadDomain().
> 
> Before this patch prlsdkLoadDomain() worked in 2 different cases:
> 1. It creates and initializes new domain. Then updates it from sdk handle.
> 2. It updates existed domain from sdk handle.
> In this patch we remove code which creates new domain from LoadDomain()
> and move it to prlsdkNewDomain().
> Now prlsdkLoadDomain() only updates domain from skd handle.
> 
> In notification handler prlsdkHandleVmAddedEvent() we check
> the existence of a domain and if it doesn't exist we add new domain by calling
> prlsdkNewDomain() and load info from sdk handle via prlsdkLoadDomain().
> ---
>  src/vz/vz_driver.c |  13 ++-
>  src/vz/vz_sdk.c    | 253 +++++++++++++++++++++++++++--------------------------
>  src/vz/vz_sdk.h    |   9 +-
>  3 files changed, 146 insertions(+), 129 deletions(-)
> 
> diff --git a/src/vz/vz_driver.c b/src/vz/vz_driver.c
> index 91a48b6..521efd4 100644
> --- a/src/vz/vz_driver.c
> +++ b/src/vz/vz_driver.c
> @@ -685,6 +685,7 @@ vzDomainDefineXMLFlags(virConnectPtr conn, const char *xml, unsigned int flags)
>      virDomainPtr retdom = NULL;
>      virDomainDefPtr def;
>      virDomainObjPtr olddom = NULL;
> +    virDomainObjPtr newdom = NULL;
>      unsigned int parse_flags = VIR_DOMAIN_DEF_PARSE_INACTIVE;
>  
>      virCheckFlags(VIR_DOMAIN_DEFINE_VALIDATE, NULL);
> @@ -700,6 +701,9 @@ vzDomainDefineXMLFlags(virConnectPtr conn, const char *xml, unsigned int flags)
>      olddom = virDomainObjListFindByUUID(privconn->domains, def->uuid);
>      if (olddom == NULL) {
>          virResetLastError();
> +        newdom = prlsdkNewDomain(privconn, def->name, def->uuid);
> +        if (!newdom)
> +            goto cleanup;
>          if (def->os.type == VIR_DOMAIN_OSTYPE_HVM) {
>              if (prlsdkCreateVm(conn, def))
>                  goto cleanup;
> @@ -713,8 +717,7 @@ vzDomainDefineXMLFlags(virConnectPtr conn, const char *xml, unsigned int flags)
>              goto cleanup;
>          }
>  
> -        olddom = prlsdkAddDomain(privconn, def->uuid);
> -        if (!olddom)
> +        if (prlsdkLoadDomain(privconn, newdom))
>              goto cleanup;
>      } else {
>          int state, reason;
> @@ -755,6 +758,12 @@ vzDomainDefineXMLFlags(virConnectPtr conn, const char *xml, unsigned int flags)
>   cleanup:
>      if (olddom)
>          virObjectUnlock(olddom);
> +    if (newdom) {
> +        if (!retdom)
> +             virDomainObjListRemove(privconn->domains, newdom);
> +        else
> +             virObjectUnlock(newdom);
> +    }
>      virDomainDefFree(def);
>      vzDriverUnlock(privconn);
>      return retdom;
> diff --git a/src/vz/vz_sdk.c b/src/vz/vz_sdk.c
> index 6fb2a97..9d2bdab 100644
> --- a/src/vz/vz_sdk.c
> +++ b/src/vz/vz_sdk.c
> @@ -1236,58 +1236,35 @@ prlsdkConvertCpuMode(PRL_HANDLE sdkdom, virDomainDefPtr def)
>      return -1;
>  }
>  
> -/*
> - * This function retrieves information about domain.
> - * If the domains is already in the domains list
> - * privconn->domains, then locked 'olddom' must be
> - * provided. If the domains must be added to the list,
> - * olddom must be NULL.
> - *
> - * The function return a pointer to a locked virDomainObj.
> - */
> -static virDomainObjPtr
> -prlsdkLoadDomain(vzConnPtr privconn,
> -                 PRL_HANDLE sdkdom,
> -                 virDomainObjPtr olddom)
> +int
> +prlsdkLoadDomain(vzConnPtr privconn, virDomainObjPtr dom)
>  {
> -    virDomainObjPtr dom = NULL;
>      virDomainDefPtr def = NULL;
> -    vzDomObjPtr pdom = NULL;
>      VIRTUAL_MACHINE_STATE domainState;
> +    char *home = NULL;
>  
>      PRL_UINT32 buflen = 0;
>      PRL_RESULT pret;
>      PRL_UINT32 ram;
>      PRL_UINT32 envId;
>      PRL_VM_AUTOSTART_OPTION autostart;
> +    PRL_HANDLE sdkdom = PRL_INVALID_HANDLE;
> +    vzDomObjPtr pdom = dom->privateData;
>  
>      virCheckNonNullArgGoto(privconn, error);
> -    virCheckNonNullArgGoto(sdkdom, error);
> +    virCheckNonNullArgGoto(dom, error);
> +
> +    sdkdom = prlsdkSdkDomainLookupByUUID(privconn, dom->def->uuid);
> +    if (sdkdom == PRL_INVALID_HANDLE)
> +        return -1;
>  
>      if (!(def = virDomainDefNew()))
>          goto error;
>  
> -    if (!olddom) {
> -        if (VIR_ALLOC(pdom) < 0)
> -            goto error;
> -        pdom->cache.stats = PRL_INVALID_HANDLE;
> -        pdom->cache.count = -1;
> -        if (virCondInit(&pdom->cache.cond) < 0) {
> -            virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("cannot initialize condition"));
> -            goto error;
> -        }
> -    } else {
> -        pdom = olddom->privateData;
> -    }
> +    def->virtType = dom->def->virtType;
> +    def->id = dom->def->id;
>  
> -    if (STREQ(privconn->drivername, "vz"))
> -        def->virtType = VIR_DOMAIN_VIRT_VZ;
> -    else
> -        def->virtType = VIR_DOMAIN_VIRT_PARALLELS;
> -
> -    def->id = -1;
> -
> -    if (prlsdkGetDomainIds(sdkdom, &def->name, def->uuid) < 0)
> +    if (prlsdkGetDomainIds(sdkdom, &def->name, def->uuid))
better stay with < 0
>          goto error;
>  
>      def->onReboot = VIR_DOMAIN_LIFECYCLE_RESTART;
> @@ -1318,29 +1295,33 @@ prlsdkLoadDomain(vzConnPtr privconn,
>  
>      pret = PrlVmCfg_GetEnvId(sdkdom, &envId);
>      prlsdkCheckRetGoto(pret, error);
> -    pdom->id = envId;
>  
>      buflen = 0;
>      pret = PrlVmCfg_GetHomePath(sdkdom, NULL, &buflen);
>      prlsdkCheckRetGoto(pret, error);
>  
> -    VIR_FREE(pdom->home);
> -    if (VIR_ALLOC_N(pdom->home, buflen) < 0)
> +    if (VIR_ALLOC_N(home, buflen) < 0)
>          goto error;
>  
> -    pret = PrlVmCfg_GetHomePath(sdkdom, pdom->home, &buflen);
> +    pret = PrlVmCfg_GetHomePath(sdkdom, home, &buflen);
>      prlsdkCheckRetGoto(pret, error);
>  
> -    /* For VMs pdom->home is actually /directory/config.pvs */
> +    /* For VMs home is actually /directory/config.pvs */
>      if (!IS_CT(def)) {
>          /* Get rid of /config.pvs in path string */
> -        char *s = strrchr(pdom->home, '/');
> +        char *s = strrchr(home, '/');
>          if (s)
>              *s = '\0';
>      }
>  
>      pret = PrlVmCfg_GetAutoStart(sdkdom, &autostart);
>      prlsdkCheckRetGoto(pret, error);
> +    if (autostart != PAO_VM_START_ON_LOAD &&
> +        autostart != PAO_VM_START_MANUAL) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR,
> +                       _("Unknown autostart mode: %X"), autostart);
> +        goto error;
> +    }
>  
>      if (prlsdkGetDomainState(sdkdom, &domainState) < 0)
>          goto error;
> @@ -1363,38 +1344,6 @@ prlsdkLoadDomain(vzConnPtr privconn,
>              goto error;
>      }
>  
> -    if (olddom) {
> -        /* assign new virDomainDef without any checks */
> -        /* we can't use virDomainObjAssignDef, because it checks
> -         * for state and domain name */
> -        dom = olddom;
> -        virDomainDefFree(dom->def);
> -        dom->def = def;
> -    } else {
> -        if (!(dom = virDomainObjListAdd(privconn->domains, def,
> -                                        privconn->xmlopt,
> -                                        0, NULL)))
> -            goto error;
> -    }
> -    /* dom is locked here */
> -
> -    dom->privateData = pdom;
> -    dom->privateDataFreeFunc = prlsdkDomObjFreePrivate;
> -    dom->persistent = 1;
> -
> -    switch (autostart) {
> -    case PAO_VM_START_ON_LOAD:
> -        dom->autostart = 1;
> -        break;
> -    case PAO_VM_START_MANUAL:
> -        dom->autostart = 0;
> -        break;
> -    default:
> -        virReportError(VIR_ERR_INTERNAL_ERROR,
> -                       _("Unknown autostart mode: %X"), autostart);
> -        goto error;
> -    }
> -
>      if (prlsdkConvertDomainState(domainState, envId, dom) < 0)
>          goto error;
>  
> @@ -1404,24 +1353,27 @@ prlsdkLoadDomain(vzConnPtr privconn,
>          pdom->sdkdom = sdkdom;
>      }
>  
> -    return dom;
> - error:
> -    if (dom && !olddom) {
> -        /* Domain isn't persistent means that we haven't yet set
> -         * prlsdkDomObjFreePrivate and should call it manually
> -         */
> -        if (!dom->persistent)
> -            prlsdkDomObjFreePrivate(pdom);
> +    /* assign new virDomainDef without any checks
> +     * we can't use virDomainObjAssignDef, because it checks
> +     * for state and domain name */
> +    virDomainDefFree(dom->def);
> +    dom->def = def;
> +    pdom->id = envId;
> +    VIR_FREE(pdom->home);
> +    pdom->home = home;
So you are trying to be atomic here and move all dom and pdom
changes to place when nothing could go wrong. It's great
but this mixes aims of this patch. It is better move it to
another preparation step(s) so final split of load would
be (mostly) deleting lines from load and move it to another place.

Also look closely on above call of prlsdkConvertDomainState.
It mutates domain too. Looks like it can not fail for runtime reasons 
only for code inconsitencies. Can we make it return void?

>  
> -        virDomainObjListRemove(privconn->domains, dom);
> -    }
> -    /* Delete newly allocated def only if we haven't assigned it to domain
> -     * Otherwise we will end up with domain having invalid def within it
> -     */
> -    if (!dom)
> -        virDomainDefFree(def);
> +    if (autostart == PAO_VM_START_ON_LOAD)
> +        dom->autostart = 1;
> +    else
> +        dom->autostart = 0;
>  
> -    return NULL;
> +    PrlHandle_Free(sdkdom);
> +    return 0;
> + error:
> +    PrlHandle_Free(sdkdom);
> +    VIR_FREE(home);
> +    virDomainDefFree(def);
> +    return -1;
>  }
>  
>  int
> @@ -1429,11 +1381,13 @@ prlsdkLoadDomains(vzConnPtr privconn)
>  {
>      PRL_HANDLE job = PRL_INVALID_HANDLE;
>      PRL_HANDLE result;
> -    PRL_HANDLE sdkdom;
> +    PRL_HANDLE sdkdom = PRL_INVALID_HANDLE;
>      PRL_UINT32 paramsCount;
>      PRL_RESULT pret;
>      size_t i = 0;
>      virDomainObjPtr dom;
> +    unsigned char uuid[VIR_UUID_BUFLEN];
> +    char *name = NULL;
>  
>      job = PrlSrv_GetVmListEx(privconn->server, PVTF_VM | PVTF_CT);
>  
> @@ -1447,61 +1401,89 @@ prlsdkLoadDomains(vzConnPtr privconn)
>          pret = PrlResult_GetParamByIndex(result, i, &sdkdom);
>          if (PRL_FAILED(pret)) {
>              logPrlError(pret);
> -            PrlHandle_Free(sdkdom);
>              goto error;
>          }
>  
> -        dom = prlsdkLoadDomain(privconn, sdkdom, NULL);
> -        PrlHandle_Free(sdkdom);
> +        if (prlsdkGetDomainIds(sdkdom, &name, uuid))
> +            goto error;
better consitently use < 0
>  
> -        if (!dom)
> +        if (!(dom = prlsdkNewDomain(privconn, name, uuid)))
>              goto error;
> -        else
> -            virObjectUnlock(dom);
> +
> +        if (prlsdkLoadDomain(privconn, dom)) {
here too
> +            virDomainObjListRemove(privconn->domains, dom);
> +            goto error;
> +        }
> +
> +        VIR_FREE(name);
> +        PrlHandle_Free(sdkdom);
> +        sdkdom = PRL_INVALID_HANDLE;
> +        virObjectUnlock(dom);
>      }
>  
>      PrlHandle_Free(result);
>      return 0;
>  
>   error:
Similar cleanup here and in the loop. May be better to move loop body
into a function.
> +    VIR_FREE(name);
> +    PrlHandle_Free(sdkdom);
>      PrlHandle_Free(result);
>      return -1;
>  }
>  
> -virDomainObjPtr
> -prlsdkAddDomain(vzConnPtr privconn, const unsigned char *uuid)
> -{
> -    PRL_HANDLE sdkdom = PRL_INVALID_HANDLE;
> -    virDomainObjPtr dom;
> -
> -    dom = virDomainObjListFindByUUID(privconn->domains, uuid);
> -    if (dom) {
> -        /* domain is already in the list */
> -        return dom;
> -    }
> -
> -    sdkdom = prlsdkSdkDomainLookupByUUID(privconn, uuid);
> -    if (sdkdom == PRL_INVALID_HANDLE)
> -        return NULL;
> -
> -    dom = prlsdkLoadDomain(privconn, sdkdom, NULL);
> -    PrlHandle_Free(sdkdom);
> -    return dom;
> -}
> -
>  int
>  prlsdkUpdateDomain(vzConnPtr privconn, virDomainObjPtr dom)
>  {
>      PRL_HANDLE job;
> -    virDomainObjPtr retdom = NULL;
>      vzDomObjPtr pdom = dom->privateData;
>  
>      job = PrlVm_RefreshConfig(pdom->sdkdom);
>      if (waitJob(job))
>          return -1;
>  
> -    retdom = prlsdkLoadDomain(privconn, pdom->sdkdom, dom);
> -    return retdom ? 0 : -1;
> +    return prlsdkLoadDomain(privconn, dom);
> +}
> +
> +
> +virDomainObjPtr
> +prlsdkNewDomain(vzConnPtr privconn, char *name, const unsigned char *uuid)
> +{
> +    virDomainDefPtr def = NULL;
> +    virDomainObjPtr dom = NULL;
> +    vzDomObjPtr pdom = NULL;
> +
> +    if (!(def = virDomainDefNewFull(name, uuid, -1)))
> +        goto error;
> +
> +    if (VIR_ALLOC(pdom) < 0)
> +        goto error;
> +
> +    pdom->cache.stats = PRL_INVALID_HANDLE;
> +    pdom->cache.count = -1;
> +    if (virCondInit(&pdom->cache.cond) < 0) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("cannot initialize condition"));
> +        goto error;
> +    }
> +
> +    if (STREQ(privconn->drivername, "vz"))
> +        def->virtType = VIR_DOMAIN_VIRT_VZ;
> +    else
> +        def->virtType = VIR_DOMAIN_VIRT_PARALLELS;
are you lose def->id = -1 from original code?
> +
> +    if (!(dom = virDomainObjListAdd(privconn->domains, def,
> +                                        privconn->xmlopt,
> +                                        0, NULL)))
> +        goto error;
> +
> +    dom->privateData = pdom;
> +    dom->privateDataFreeFunc = prlsdkDomObjFreePrivate;
> +    dom->persistent = 1;
> +    return dom;
> +
> + error:
we need to destroy cond.here too. I know it was not in original code))
> +    virDomainDefFree(def);
> +    VIR_FREE(pdom);
> +    return NULL;
>  }
>  
>  static int prlsdkSendEvent(vzConnPtr privconn,
> @@ -1618,15 +1600,36 @@ prlsdkHandleVmAddedEvent(vzConnPtr privconn,
>                           unsigned char *uuid)
>  {
>      virDomainObjPtr dom = NULL;
> +    PRL_HANDLE sdkdom = PRL_INVALID_HANDLE;
> +    char *name = NULL;
>  
> -    dom = prlsdkAddDomain(privconn, uuid);
> -    if (dom == NULL)
> -        return;
> +    dom = virDomainObjListFindByUUID(privconn->domains, uuid);
> +    if (!dom) {
> +        sdkdom = prlsdkSdkDomainLookupByUUID(privconn, uuid);
> +        if (sdkdom == PRL_INVALID_HANDLE)
> +            goto cleanup;
> +
> +        if (prlsdkGetDomainIds(sdkdom, &name, NULL))
> +            goto cleanup;
> +
> +        if (!(dom = prlsdkNewDomain(privconn, name, uuid)))
> +            goto cleanup;
> +
> +        if (prlsdkLoadDomain(privconn, dom)) {
> +            virDomainObjListRemove(privconn->domains, dom);
> +            dom = NULL;
> +            goto cleanup;
> +        }
this is the exact succession of functions as in load domains, so another
reason to move it to a distinct function. And then we don't
need the patch that enhance  prlsdkGetDomainIds, just ask
for uuid again.

> +    }
>  
>      prlsdkSendEvent(privconn, dom, VIR_DOMAIN_EVENT_DEFINED,
>                      VIR_DOMAIN_EVENT_DEFINED_ADDED);
>  
> -    virObjectUnlock(dom);
> + cleanup:
> +    if (dom)
> +        virObjectUnlock(dom);
> +    VIR_FREE(name);
> +    PrlHandle_Free(sdkdom);
>      return;
>  }
>  
> diff --git a/src/vz/vz_sdk.h b/src/vz/vz_sdk.h
> index ff6be07..f2d3e35 100644
> --- a/src/vz/vz_sdk.h
> +++ b/src/vz/vz_sdk.h
> @@ -30,9 +30,14 @@ int prlsdkConnect(vzConnPtr privconn);
>  void prlsdkDisconnect(vzConnPtr privconn);
>  int
>  prlsdkLoadDomains(vzConnPtr privconn);
> -virDomainObjPtr
> -prlsdkAddDomain(vzConnPtr privconn, const unsigned char *uuid);
>  int prlsdkUpdateDomain(vzConnPtr privconn, virDomainObjPtr dom);
> +virDomainObjPtr
> +prlsdkNewDomain(vzConnPtr privconn,
> +                   char *name,
> +                   const unsigned char *uuid);
> +int
> +prlsdkLoadDomain(vzConnPtr privconn,
> +                 virDomainObjPtr dom);
>  int prlsdkSubscribeToPCSEvents(vzConnPtr privconn);
>  void prlsdkUnsubscribeFromPCSEvents(vzConnPtr privconn);
>  PRL_RESULT prlsdkStart(PRL_HANDLE sdkdom);
> 

Overall this patch is too big. 





More information about the libvir-list mailing list