[libvirt] [PATCH v5 3/3] gluster: cache glfs connection object per volume

Peter Krempa pkrempa at redhat.com
Tue Jan 17 07:31:27 UTC 2017


On Thu, Dec 15, 2016 at 13:37:05 +0530, Prasanna Kumar Kalever wrote:
> Currently, in case if we have 4 extra attached disks, then for each disk
> we need to call 'glfs_init' (over network) and friends which could be costly.
> 
> Additionally snapshot(external) scenario will further complex the situation.
> 
> This patch maintain a cache of glfs objects per volume, hence the
> all disks/snapshots belonging to the volume whose glfs object is cached
> can avoid calls to 'glfs_init' and friends by simply taking a ref on
> existing object.
> 
> The glfs object is shared only if volume name and all the volfile servers match
> (includes hostname, transport and port number).
> 
> Thanks to 'Peter Krempa' for all the inputs.
> 
> Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever at redhat.com>
> ---
>  src/storage/storage_backend_gluster.c | 302 +++++++++++++++++++++++++++++++---
>  1 file changed, 283 insertions(+), 19 deletions(-)
> 
> diff --git a/src/storage/storage_backend_gluster.c b/src/storage/storage_backend_gluster.c
> index 779a3bd..71426c8 100644
> --- a/src/storage/storage_backend_gluster.c
> +++ b/src/storage/storage_backend_gluster.c
> @@ -36,6 +36,20 @@
>  
>  VIR_LOG_INIT("storage.storage_backend_gluster");
>  
> +
> +typedef struct _virStorageBackendGlusterState virStorageBackendGlusterState;
> +typedef virStorageBackendGlusterState *virStorageBackendGlusterStatePtr;
> +
> +typedef struct _virStorageFileBackendGlusterPriv virStorageFileBackendGlusterPriv;
> +typedef virStorageFileBackendGlusterPriv *virStorageFileBackendGlusterPrivPtr;
> +
> +typedef struct _virStorageBackendGlusterCache virStorageBackendGlusterCache;
> +typedef virStorageBackendGlusterCache *virStorageBackendGlusterCachePtr;
> +
> +typedef struct _virStorageBackendGlusterCacheConn virStorageBackendGlusterCacheConn;
> +typedef virStorageBackendGlusterCacheConn *virStorageBackendGlusterCacheConnPtr;
> +
> +
>  struct _virStorageBackendGlusterState {
>      glfs_t *vol;
>  
> @@ -47,8 +61,206 @@ struct _virStorageBackendGlusterState {
>      char *dir; /* dir from URI, or "/"; always starts and ends in '/' */
>  };
>  
> -typedef struct _virStorageBackendGlusterState virStorageBackendGlusterState;
> -typedef virStorageBackendGlusterState *virStorageBackendGlusterStatePtr;
> +struct _virStorageFileBackendGlusterPriv {
> +    glfs_t *vol;
> +    char *canonpath;
> +};
> +
> +struct _virStorageBackendGlusterCacheConn {
> +    virObjectLockable parent;
> +    virCond cond;
> +    bool quit;

The name 'quit' is wrong. The flag is used to mark that the connection is valid.

> +
> +    glfs_t *fs;
> +    size_t nhosts;
> +    virStorageNetHostDefPtr hosts;
> +    char *volname;
> +};
> +
> +struct _virStorageBackendGlusterCache {
> +    virMutex lock;
> +    size_t nConn;
> +    virStorageBackendGlusterCacheConnPtr *conn;
> +};
> +
> +static bool
> +virStorageBackendGlusterCompareHosts(size_t nSrcHosts,
> +                                     virStorageNetHostDefPtr srcHosts,
> +                                     size_t nDstHosts,
> +                                     virStorageNetHostDefPtr dstHosts)
> +{
> +    bool match = false;
> +    size_t i;
> +    size_t j;
> +
> +    if (nSrcHosts != nDstHosts)
> +        return false;
> +    for (i = 0; i < nSrcHosts; i++) {
> +        for (j = 0; j < nDstHosts; j++) {
> +            if (srcHosts[i].type != dstHosts[j].type)
> +                continue;
> +            switch (srcHosts[i].type) {
> +                case VIR_STORAGE_NET_HOST_TRANS_UNIX:
> +                    if (STREQ(srcHosts[i].u.uds.path, dstHosts[j].u.uds.path))
> +                        match = true;
> +                    break;
> +                case VIR_STORAGE_NET_HOST_TRANS_TCP:
> +                case VIR_STORAGE_NET_HOST_TRANS_RDMA:
> +                    if (STREQ(srcHosts[i].u.inet.addr, dstHosts[j].u.inet.addr)
> +                                               &&
> +                        STREQ(srcHosts[i].u.inet.port, dstHosts[j].u.inet.port))
> +                        match = true;
> +                    break;
> +                case VIR_STORAGE_NET_HOST_TRANS_LAST:
> +                    break;
> +            }
> +            if (match)
> +                break; /* out of inner for loop */
> +        }
> +        if (!match)
> +            return false;
> +        match = false; /* reset */
> +    }
> +    return true;
> +}
> +
> +static int
> +virStorageBackendGlusterCacheAdd(virStorageSourcePtr src, glfs_t *fs)

Since need access to the cache entry after calling this function and you
repeatedly try to look it up, this function should return the cache
entry so that the code does not have to look it up afterwards.

> +{
> +    virStorageBackendGlusterCacheConnPtr newConn = NULL;
> +    virStorageBackendGlusterCacheConnPtr conn = NULL;
> +    size_t i;
> +
> +    for (i = 0; i < virStorageBackendGlusterGlobalCacheObj->nConn; i++) {
> +        conn = virStorageBackendGlusterGlobalCacheObj->conn[i];
> +        if (STRNEQ(conn->volname, src->volume))
> +            continue;
> +        if (virStorageBackendGlusterCompareHosts(conn->nhosts, conn->hosts,
> +                                                 src->nhosts, src->hosts)) {
> +            return 0;

This looks wrong. The function would return success both if the entry
was found and when it was created. Since this function does not
initialize the connection, you need a way to be able to tell the two
states apart.

> +        }
> +    }
> +
> +    if (!(newConn = virObjectLockableNew(virStorageBackendGlusterCacheConnClass)))
> +        return -1;
> +
> +    if (virCondInit(&newConn->cond) < 0) {
> +        virReportSystemError(errno, "%s",
> +                             _("failed to initialize domain condition"));

You did not modify the error message after copying this.

> +        goto error;
> +    }
> +
> +    if (VIR_STRDUP(newConn->volname, src->volume) < 0)
> +        goto error;
> +
> +    newConn->nhosts = src->nhosts;
> +    if (!(newConn->hosts = virStorageNetHostDefCopy(src->nhosts, src->hosts)))
> +        goto error;
> +    newConn->fs = fs;
> +
> +    if (VIR_APPEND_ELEMENT(virStorageBackendGlusterGlobalCacheObj->conn,
> +                           virStorageBackendGlusterGlobalCacheObj->nConn,
> +                           newConn) < 0)
> +        goto error;
> +
> +    return 0;
> +
> + error:
> +    virStorageNetHostDefFree(newConn->nhosts, newConn->hosts);
> +    VIR_FREE(newConn->volname);
> +    virObjectUnref(newConn);
> +    return -1;
> +}
> +
> +static glfs_t *
> +virStorageBackendGlusterCacheLookup(virStorageSourcePtr src)
> +{
> +    virStorageBackendGlusterCacheConnPtr conn;
> +    size_t i;
> +
> +    if (virStorageBackendGlusterGlobalCacheObj == NULL)
> +        return NULL;
> +
> +    for (i = 0; i < virStorageBackendGlusterGlobalCacheObj->nConn; i++) {
> +        conn = virStorageBackendGlusterGlobalCacheObj->conn[i];
> +        if (STRNEQ(conn->volname, src->volume))
> +            continue;
> +        if (virStorageBackendGlusterCompareHosts(conn->nhosts, conn->hosts,
> +                                                 src->nhosts, src->hosts)) {
> +            while (!conn->quit) {
> +                if (virCondWait(&conn->cond, &conn->parent.lock) < 0) {

The usage of the condition does not solve anything here. This function
is called while still holding the lock on the connection cache, so as
this condition waits until the connection is up the cache lock would be
held as well, thus this does not fix the problem I've pointed out.

You need to add a reference on the connection cache entry, add a
reference, drop the lock on the connection cache and wait until it's
ready.

Also the condition requires that the mutex is locked, which you did not
prior to trying to wait on it. This also means that you don't properly
protect access to the cache entries. You have a lockable object but
don't use it at all.

Additionally, after virCondWait finishes the mutex is locked, but you
don't ever unlock it.

> +                    virReportSystemError(errno, "%s",
> +                                         _("failed to wait for domain condition"));

Again, wrongly copied message.

> +                    return NULL;
> +                }
> +            }
> +            virObjectRef(conn);
> +            return conn->fs;

Here this should return the whole entry ...

> +        }
> +    }
> +
> +    return NULL;
> +}
> +
> +static void
> +virStorageBackendGlusterCacheRelease(virStorageSourcePtr src)
> +{
> +    virStorageBackendGlusterCacheConnPtr conn = NULL;
> +    size_t i;
> +
> +    virMutexLock(&virStorageBackendGlusterGlobalCacheObj->lock);

Please note that you can only lock the cache lock before locking the
entry lock to avoid deadlocks. This currently is not a problem, because
you don't lock the entries themselves, but you'll need to do that.

> +    for (i = 0; i < virStorageBackendGlusterGlobalCacheObj->nConn; i++) {
> +        conn = virStorageBackendGlusterGlobalCacheObj->conn[i];
> +        if (STRNEQ(conn->volname, src->volume))
> +            continue;
> +        if (virStorageBackendGlusterCompareHosts(conn->nhosts, conn->hosts,
> +                                                 src->nhosts, src->hosts)) {
> +            if (virObjectUnref(conn))
> +                goto unlock;

... so that you can store it in the private data and then it'll allow to
look up by pointer comparison.

> +
> +            VIR_DELETE_ELEMENT(virStorageBackendGlusterGlobalCacheObj->conn, i,
> +                               virStorageBackendGlusterGlobalCacheObj->nConn);
> +        }
> +    }
> +
> + unlock:
> +    virMutexUnlock(&virStorageBackendGlusterGlobalCacheObj->lock);
> +}
>  
>  static void
>  virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state)
> @@ -69,6 +281,26 @@ virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state)
>      VIR_FREE(state);
>  }
>  
> +static void
> +virStorageBackendGlusterBroadcast(virStorageSourcePtr src)
> +{
> +    virStorageBackendGlusterCacheConnPtr conn;
> +    size_t i;
> +
> +    virMutexLock(&virStorageBackendGlusterGlobalCacheObj->lock);
> +    for (i = 0; i < virStorageBackendGlusterGlobalCacheObj->nConn; i++) {

This function should not have to look up the connection once you pass in
the cache entry itself, since you've added it a while prior calling this
function.

Also due to the fact that you hold the cache lock while waiting for the
connection, this would deadlock.

> +        conn = virStorageBackendGlusterGlobalCacheObj->conn[i];
> +        if (STRNEQ(conn->volname, src->volume))
> +            continue;
> +        if (virStorageBackendGlusterCompareHosts(conn->nhosts, conn->hosts,
> +                                                 src->nhosts, src->hosts)) {
> +            conn->quit = true;
> +            virCondBroadcast(&conn->cond);
> +        }
> +    }
> +    virMutexUnlock(&virStorageBackendGlusterGlobalCacheObj->lock);
> +}
> +
>  static virStorageBackendGlusterStatePtr
>  virStorageBackendGlusterOpen(virStoragePoolObjPtr pool)
>  {
> @@ -538,15 +770,6 @@ virStorageBackend virStorageBackendGluster = {
>  };
>  
>  
> -typedef struct _virStorageFileBackendGlusterPriv virStorageFileBackendGlusterPriv;
> -typedef virStorageFileBackendGlusterPriv *virStorageFileBackendGlusterPrivPtr;
> -
> -struct _virStorageFileBackendGlusterPriv {
> -    glfs_t *vol;
> -    char *canonpath;
> -};
> -
> -
>  static void
>  virStorageFileBackendGlusterDeinit(virStorageSourcePtr src)
>  {
> @@ -562,10 +785,9 @@ virStorageFileBackendGlusterDeinit(virStorageSourcePtr src)
>  
>      src->drv = NULL;
>  
> -    if (priv->vol)
> -        glfs_fini(priv->vol);
> -    VIR_FREE(priv->canonpath);
> +    virStorageBackendGlusterCacheRelease(src);
>  
> +    VIR_FREE(priv->canonpath);
>      VIR_FREE(priv);
>  }
>  
> @@ -613,6 +835,19 @@ virStorageFileBackendGlusterInitServer(virStorageFileBackendGlusterPrivPtr priv,
>      return 0;
>  }
>  
> +static void
> +virStorageBackendGlusterCacheInit(void)
> +{
> +    if (VIR_ALLOC(virStorageBackendGlusterGlobalCacheObj) < 0)
> +        return;
> +
> +    if (virMutexInit(&virStorageBackendGlusterGlobalCacheObj->lock) < 0) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                       _("Unable to initialize mutex"));
> +        return;
> +    }
> +    errno = VIR_ERR_OK;

Please use VIR_ONCE_GLOBAL_INIT instead of abusing 'errno'.

> +}
>  
>  static int
>  virStorageFileBackendGlusterInit(virStorageSourcePtr src)
> @@ -635,11 +870,38 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src)
>                src, priv, src->volume, src->path,
>                (unsigned int)src->drv->uid, (unsigned int)src->drv->gid);
>  
> +    if (virOnce(&virStorageBackendGlusterCacheOnce,
> +                virStorageBackendGlusterCacheInit) < 0)
> +        return -1;

And replace this with the appropriate call to the initializer, so that
we don't have multiple different instances of the same approach.,

> +
> +    if (errno)
> +        return -errno;
> +
> +    if (!virStorageBackendGlusterGlobalCacheObj)
> +        return -1;
> +
> +    if (virStorageBackendGlusterCacheConnInitialize() < 0)
> +        return -1;
> +
> +    virMutexLock(&virStorageBackendGlusterGlobalCacheObj->lock);

I'd strongly suggest you turn the cache into a virObjectLockable, so
that you can use virObjectLock here instead of hand-rolling this.

> +
> +    priv->vol = virStorageBackendGlusterCacheLookup(src);
> +    if (priv->vol) {
> +        src->drv->priv = priv;
> +        virMutexUnlock(&virStorageBackendGlusterGlobalCacheObj->lock);
> +        return 0;
> +    }
> +
>      if (!(priv->vol = glfs_new(src->volume))) {
>          virReportOOMError();
> -        goto error;
> +        goto unlock;
>      }
>  
> +    if (virStorageBackendGlusterCacheAdd(src, priv->vol) < 0)
> +        goto unlock;
> +
> +    virMutexUnlock(&virStorageBackendGlusterGlobalCacheObj->lock);
> +
>      for (i = 0; i < src->nhosts; i++) {
>          if (virStorageFileBackendGlusterInitServer(priv, src->hosts + i) < 0)
>              goto error;
> @@ -652,15 +914,17 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src)
>          goto error;
>      }
>  
> +    virStorageBackendGlusterBroadcast(src);
> +
>      src->drv->priv = priv;
>  
>      return 0;
>  
> - error:
> -    if (priv->vol)
> -        glfs_fini(priv->vol);
> -    VIR_FREE(priv);
> + unlock:
> +    virMutexUnlock(&virStorageBackendGlusterGlobalCacheObj->lock);
>  
> + error:
> +    virStorageBackendGlusterCacheRelease(src);
>      return -1;
>  }
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20170117/7c67968e/attachment-0001.sig>


More information about the libvir-list mailing list