[libvirt] [PATCH v2 1/1] gluster: cache glfs connection object per volume

Peter Krempa pkrempa at redhat.com
Wed Nov 30 12:11:58 UTC 2016


On Wed, Nov 30, 2016 at 16:06:37 +0530, prasanna.kalever at redhat.com wrote:
> From: Prasanna Kumar Kalever <prasanna.kalever at redhat.com>
> 
> This patch optimizes calls to glfs_init() and friends
> 
> Currently, a start of a VM will call 2 glfs_new/glfs_init (which will create
> glfs object, once for stat, read headers and next to chown) and then will fork
> qemu process which will call once again (for actual read write IO).
> 
> Not that all, in case if we are have 4 extra attached disks, then the total
> calls to glfs_init() and friends will be (4+1)*2 in libvirt and (4+1)*1 in
> qemu space i.e 15 calls. Since we don't have control over qemu process as that
> executes in a different process environment, lets do not bother much about it.
> 
> This patch shrinks these 10 calls (i.e objects from above example) to just
> one, by maintaining a cache of glfs objects.
> 
> Additionally snapshot(external) scenario will further complex the situation ...
> 
> The glfs object is shared across other only if volume name and all the
> volfile servers match (includes hostname, transport and port number).
> In case of hit glfs object takes a ref and on close unref happens.
> 
> Thanks to 'Peter Krempa' for all the inputs.
> 
> Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever at redhat.com>
> ---
> v2: Address review comments from Peter on v1
> * Rebased on latest master
> * Changes to commit msg
> * Introduce storage API's for Register and Unregister of volume
> * During qemu process Start/Stop and snapshot create
> * Check Transport and Port type
> * Atomic element add/del to list and ref counting
> Pending: Treating IP and FQDN belong to same host

In addition to dan's review:

You should split the parts that add the caching and the parts that
optimize the calls to virStorageVolInit and friends.

> 
> v1: Initial patch
> ---
>  src/qemu/qemu_domain.c                |   2 +-
>  src/qemu/qemu_domain.h                |   5 +
>  src/qemu/qemu_driver.c                |  29 ++++
>  src/qemu/qemu_process.c               |  25 +++
>  src/storage/storage_backend_fs.c      |   2 +
>  src/storage/storage_backend_gluster.c | 295 +++++++++++++++++++++++++++++++---
>  src/storage/storage_driver.c          |  23 ++-
>  src/storage/storage_driver.h          |   3 +
>  8 files changed, 357 insertions(+), 27 deletions(-)
> 

> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
> index 7650ff3..a9e38bd 100644
> --- a/src/qemu/qemu_domain.h
> +++ b/src/qemu/qemu_domain.h
> @@ -591,6 +591,11 @@ bool qemuDomainDiskSourceDiffers(virDomainDiskDefPtr disk,
>  bool qemuDomainDiskChangeSupported(virDomainDiskDefPtr disk,
>                                     virDomainDiskDefPtr orig_disk);
>  
> +void qemuDomainGetImageIds(virQEMUDriverConfigPtr cfg,
> +                           virDomainObjPtr vm,
> +                           virStorageSourcePtr src,
> +                           uid_t *uid, gid_t *gid);
> +
>  int qemuDomainStorageFileInit(virQEMUDriverPtr driver,
>                                virDomainObjPtr vm,
>                                virStorageSourcePtr src);
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 3517aa2..7cae094 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -14211,6 +14211,7 @@ qemuDomainSnapshotCreateSingleDiskActive(virQEMUDriverPtr driver,
>   cleanup:
>      if (need_unlink && virStorageFileUnlink(newDiskSrc))
>          VIR_WARN("unable to unlink just-created %s", source);
> +    virStorageFileDeinit(disk->src);
>      virStorageFileDeinit(newDiskSrc);
>      virStorageSourceFree(newDiskSrc);
>      virStorageSourceFree(persistDiskSrc);
> @@ -14566,6 +14567,7 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain,
>      virDomainSnapshotObjPtr snap = NULL;
>      virDomainSnapshotPtr snapshot = NULL;
>      virDomainSnapshotDefPtr def = NULL;
> +    virDomainSnapshotDefPtr refDef = NULL;
>      bool update_current = true;
>      bool redefine = flags & VIR_DOMAIN_SNAPSHOT_CREATE_REDEFINE;
>      unsigned int parse_flags = VIR_DOMAIN_SNAPSHOT_PARSE_DISKS;
> @@ -14574,6 +14576,9 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain,
>      bool align_match = true;
>      virQEMUDriverConfigPtr cfg = NULL;
>      virCapsPtr caps = NULL;
> +    unsigned int dIndex;
> +    uid_t uid;
> +    gid_t gid;
>  
>      virCheckFlags(VIR_DOMAIN_SNAPSHOT_CREATE_REDEFINE |
>                    VIR_DOMAIN_SNAPSHOT_CREATE_CURRENT |
> @@ -14690,6 +14695,19 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain,
>  
>      qemuDomainObjSetAsyncJobMask(vm, QEMU_JOB_NONE);
>  
> +    for (dIndex = 0; dIndex < def->ndisks; dIndex++) {

Please use 'i' for iterator.

> +        virDomainSnapshotDiskDef disk = def->disks[dIndex];
> +
> +        if (virStorageSourceIsEmpty(disk.src))
> +            continue;
> +
> +        qemuDomainGetImageIds(cfg, vm, disk.src, &uid, &gid);
> +
> +        if (virStorageVolumeRegister(disk.src, uid, gid) <0)
> +            goto cleanup;
> +    }
> +
> +
>      if (redefine) {
>          if (virDomainSnapshotRedefinePrep(domain, vm, &def, &snap,
>                                            &update_current, flags) < 0)
> @@ -14800,6 +14818,17 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain,
>      snapshot = virGetDomainSnapshot(domain, snap->def->name);
>  
>   endjob:
> +    refDef = (!snap) ? def : snap->def;
> +
> +    for (dIndex = 0; dIndex < refDef->ndisks; dIndex++) {
> +        virDomainSnapshotDiskDef disk = refDef->disks[dIndex];
> +
> +        if (virStorageSourceIsEmpty(disk.src))
> +            continue;
> +
> +        virStorageVolumeUnRegister(disk.src);
> +    }
> +
>      if (snapshot && !(flags & VIR_DOMAIN_SNAPSHOT_CREATE_NO_METADATA)) {
>          if (qemuDomainSnapshotWriteMetadata(vm, snap, driver->caps,
>                                              cfg->snapshotDir) < 0) {
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index ecd7ded..20793cf 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -4612,6 +4612,9 @@ qemuProcessInit(virQEMUDriverPtr driver,
>      qemuDomainObjPrivatePtr priv = vm->privateData;
>      int stopFlags;
>      int ret = -1;
> +    unsigned int dIndex;
> +    uid_t uid;
> +    gid_t gid;
>  
>      VIR_DEBUG("vm=%p name=%s id=%d migration=%d",
>                vm, vm->def->name, vm->def->id, migration);
> @@ -4664,6 +4667,18 @@ qemuProcessInit(virQEMUDriverPtr driver,
>      if (qemuDomainSetPrivatePaths(driver, vm) < 0)
>          goto cleanup;
>  
> +    for (dIndex = 0; dIndex < vm->def->ndisks; dIndex++) {

We usually use 'i' for iterator variables.

> +        virDomainDiskDefPtr disk = vm->def->disks[dIndex];
> +
> +        if (virStorageSourceIsEmpty(disk->src))
> +            continue;
> +
> +        qemuDomainGetImageIds(cfg, vm, disk->src, &uid, &gid);
> +
> +        if (virStorageVolumeRegister(disk->src, uid, gid) < 0)
> +            goto cleanup;
> +    }
> +
>      ret = 0;
>  
>   cleanup:

[...]

> diff --git a/src/storage/storage_backend_fs.c b/src/storage/storage_backend_fs.c
> index de0e8d5..0e03e06 100644
> --- a/src/storage/storage_backend_fs.c
> +++ b/src/storage/storage_backend_fs.c
> @@ -1488,6 +1488,8 @@ virStorageFileBackendFileDeinit(virStorageSourcePtr src)

[...]

> +static int
> +virStorageBackendGlusterSetPreopened(virStorageSourcePtr src,
> +                                     virStorageFileBackendGlusterPrivPtr priv)
> +{
> +    unsigned int idx;
> +    virStorageBackendGlusterStatePtrPreopened entry = NULL;
> +
> +    if (connCache == NULL && (virOnce(&glusterConnOnce,
> +                                      virGlusterConnAlloc) < 0))
> +        return -1;
> +
> +    if (virGlusterGlobalError)
> +        return -1;
> +
> +    if (!connCache->nConn) {
> +        if (virMutexInit(&connCache->lock) < 0) {
> +            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                           _("Unable to initialize mutex"));
> +            return -1;
> +        }
> +    }
> +
> +    for (idx = 0; idx < connCache->nConn; idx++) {
> +        if (STREQ(connCache->conn[idx]->volname, src->volume))
> +            return 0;
> +    }
> +
> +    if (VIR_ALLOC(entry) < 0)
> +        return -1;
> +
> +    if (VIR_STRDUP(entry->volname, src->volume) < 0)
> +        goto error;
> +
> +    if (VIR_ALLOC_N(entry->hosts, entry->nservers) < 0)
> +        goto error;
> +
> +    entry->nservers = src->nhosts;
> +
> +    for (idx = 0; idx < src->nhosts; idx++) {
> +        if (VIR_ALLOC(entry->hosts[idx]) < 0)
> +            goto error;
> +        if (src->hosts[idx].transport == VIR_STORAGE_NET_HOST_TRANS_UNIX) {
> +            if (VIR_STRDUP(entry->hosts[idx]->u.uds.path,
> +                           src->hosts[idx].socket) < 0)
> +               goto error;
> +            entry->hosts[idx]->type = VIR_STORAGE_NET_HOST_TRANS_UNIX;
> +        } else if (src->hosts[idx].transport ==
> +                   VIR_STORAGE_NET_HOST_TRANS_TCP) {
> +            if (VIR_STRDUP(entry->hosts[idx]->u.tcp.host,
> +                           src->hosts[idx].name) < 0)
> +                goto error;
> +            if (VIR_STRDUP(entry->hosts[idx]->u.tcp.port,
> +                           src->hosts[idx].port) < 0)
> +                goto error;
> +            entry->hosts[idx]->type = VIR_STORAGE_NET_HOST_TRANS_TCP;
> +        } else {
> +            entry->hosts[idx]->type = VIR_STORAGE_NET_HOST_TRANS_LAST;

We've got virStorageSourceCopy. The cache could use those objects so
that you don't have to copy everything manually.

> +        }
> +    }
> +    entry->priv = priv;
> +
> +    virMutexLock(&connCache->lock);
> +    virAtomicIntSet(&entry->ref, 1);

You should use virObjectLockable for this as I told you in the previous
version. It provides a mutex and reference counting and it's the way we
do stuff in libvirt.

> +    if (VIR_INSERT_ELEMENT(connCache->conn, -1, connCache->nConn, entry) < 0) {

This is equivalent to VIR_APPEND_ELEMENT.

> +        virMutexUnlock(&connCache->lock);
> +        goto error;
> +    }
> +    virMutexUnlock(&connCache->lock);
> +
> +    return 0;
> +
> + error:
> +    for (idx = 0; idx < entry->nservers; idx++) {
> +        if (entry->hosts[idx]->type == VIR_STORAGE_NET_HOST_TRANS_UNIX) {
> +            VIR_FREE(entry->hosts[idx]->u.uds.path);
> +        } else {
> +            VIR_FREE(entry->hosts[idx]->u.tcp.host);
> +            VIR_FREE(entry->hosts[idx]->u.tcp.port);
> +        }
> +        VIR_FREE(entry->hosts[idx]);

You should create a freeing function.

> +    }
> +
> +    VIR_FREE(entry->hosts);
> +    VIR_FREE(entry->volname);
> +    VIR_FREE(entry);
> +
> +    return -1;
> +}
> +
> +static glfs_t *
> +virStorageBackendGlusterFindPreopened(virStorageSourcePtr src)
> +{
> +    unsigned int cIdx, sIdx, nIdx; /* connectionIdx, savedIdx, newIdx */

The comment explaining these variables is pointless. Also please one
variable declaration per line.

> +    bool flag = false;

The name of this variable does not help deciphering what it's used for.

> +
> +    if (connCache == NULL)
> +        return NULL;
> +
> +    virStorageBackendGlusterStatePtrPreopened entry;
> +
> +    for (cIdx = 0; cIdx < connCache->nConn; cIdx++) {
> +        entry = connCache->conn[cIdx];
> +        if (STREQ(entry->volname, src->volume)) {

By inverting a few of the conditions and using continue statements you
can avoid deep nesting.

> +            if (entry->nservers == src->nhosts) {
> +                for (sIdx = 0; sIdx < entry->nservers; sIdx++) {
> +                    for (nIdx = 0; nIdx < src->nhosts; nIdx++) {
> +                        if (entry->hosts[sIdx]->type ==
> +                            src->hosts[nIdx].transport) {
> +                            if (entry->hosts[sIdx]->type
> +                                    == VIR_STORAGE_NET_HOST_TRANS_UNIX) {
> +                                if (STREQ(entry->hosts[sIdx]->u.uds.path,
> +                                          src->hosts[nIdx].socket)) {
> +                                    flag = true;
> +                                    break;
> +                                }
> +                            } else {
> +                                if (STREQ(entry->hosts[sIdx]->u.tcp.host,
> +                                          src->hosts[nIdx].name) &&
> +                                    STREQ(entry->hosts[sIdx]->u.tcp.port,
> +                                          src->hosts[nIdx].port)) {
> +                                    flag = true;
> +                                    break;
> +                                }
> +                            }
> +                        }
> +                    }
> +                    if (!flag)
> +                        return NULL;
> +                    flag = false;
> +                }
> +                virAtomicIntInc(&entry->ref);
> +                return entry->priv->vol;
> +            } else {
> +                return NULL;
> +            }
> +        }
> +    }
> +    return NULL;
> +}
> +
> +static void
> +virStorageBackendGlusterClosePreopened(virStorageSourcePtr src)
> +{
> +    virStorageFileBackendGlusterPrivPtr priv = src->drv->priv;
> +    unsigned int cIdx, hIdx; /* connectionIdx, hostIdx */

The comment is rather pointless. Declare one variable per line.

> +    for (cIdx = 0; cIdx < connCache->nConn; cIdx++) {
> +        if (STREQ(connCache->conn[cIdx]->volname, src->volume)) {
> +            virAtomicIntDecAndTest(&connCache->conn[cIdx]->ref);
> +            if (virAtomicIntGet(&connCache->conn[cIdx]->ref) != 0) {
> +                if (priv && priv->canonpath) {
> +                    VIR_FREE(priv->canonpath);
> +                    VIR_FREE(priv);
> +                    src->drv->priv = NULL;

All this should be replaced by a virObject destructor for your given
cache type.

> +                }
> +                return;
> +            }
> +
> +            glfs_fini(connCache->conn[cIdx]->priv->vol);
> +
> +            VIR_FREE(connCache->conn[cIdx]->priv->canonpath);
> +            VIR_FREE(connCache->conn[cIdx]->priv);
> +
> +            for (hIdx = 0; hIdx < connCache->conn[cIdx]->nservers; hIdx++) {
> +                if (connCache->conn[cIdx]->hosts[hIdx]->type ==
> +                    VIR_STORAGE_NET_HOST_TRANS_UNIX) {
> +                    VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]->u.uds.path);
> +                } else {
> +                    VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]->u.tcp.host);
> +                    VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]->u.tcp.port);
> +                }
> +                VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]);
> +            }
> +
> +            VIR_FREE(connCache->conn[cIdx]->hosts);
> +            VIR_FREE(connCache->conn[cIdx]->volname);
> +            VIR_FREE(connCache->conn[cIdx]);
> +
> +            virMutexLock(&connCache->lock);
> +            VIR_DELETE_ELEMENT(connCache->conn, cIdx, connCache->nConn);
> +            virMutexUnlock(&connCache->lock);

Doing a extracted freeing function will allow you to get rid of this
duplication too.

> +
> +            VIR_FREE(src->drv);
> +        }
> +    }
> +
> +    if (!connCache->conn)
> +        virMutexDestroy(&connCache->lock);
> +}
>  
>  static void
>  virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state)

[...]

> @@ -612,6 +852,7 @@ virStorageFileBackendGlusterInitServer(virStorageFileBackendGlusterPrivPtr priv,
>  static int
>  virStorageFileBackendGlusterInit(virStorageSourcePtr src)
>  {
> +    int ret = 0;
>      virStorageFileBackendGlusterPrivPtr priv = NULL;
>      size_t i;
>  
> @@ -625,6 +866,12 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src)
>      if (VIR_ALLOC(priv) < 0)
>          return -1;
>  
> +    priv->vol = virStorageBackendGlusterFindPreopened(src);
> +    if (priv->vol) {
> +        src->drv->priv = priv;
> +        return ret;
> +    }
> +
>      VIR_DEBUG("initializing gluster storage file %p "
>                "(priv='%p' volume='%s' path='%s') as [%u:%u]",
>                src, priv, src->volume, src->path,
> @@ -635,6 +882,10 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src)
>          goto error;
>      }
>  
> +    ret = virStorageBackendGlusterSetPreopened(src, priv);
> +    if (ret < 0)
> +        goto error;
> +
>      for (i = 0; i < src->nhosts; i++) {
>          if (virStorageFileBackendGlusterInitServer(priv, src->hosts + i) < 0)
>              goto error;
> @@ -648,13 +899,13 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src)
>      }
>  
>      src->drv->priv = priv;
> +    ret = 0;
>  
> -    return 0;
> +    return ret;

Why do you need a ret variable if you overwrite it right before
returning it?  The above changes don't make sense.

>  
>   error:
> -    if (priv->vol)
> -        glfs_fini(priv->vol);
>      VIR_FREE(priv);
> +    virStorageBackendGlusterClosePreopened(src);
>  
>      return -1;
>  }
> diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
> index df65807..9c3bffb 100644
> --- a/src/storage/storage_driver.c
> +++ b/src/storage/storage_driver.c

[...]

> @@ -2967,6 +2964,24 @@ virStorageFileInit(virStorageSourcePtr src)
>      return virStorageFileInitAs(src, -1, -1);
>  }
>  
> +int
> +virStorageVolumeRegister(virStorageSourcePtr src,
> +                         uid_t uid, gid_t gid)
> +{
> +    if (virStorageFileInitAs(src, uid, gid) < 0)
> +        return -1;
> +
> +    src->drv->priv = NULL;

What's the point of this wrapper? Also it seems wrong as you initialize
it and overwrite the private data after that? That will leak them which
is wrong.

Ah, that is correct for the gluster driver. Note that other backends
have private data too and this breaks it and leaks their private data.

> +    return 0;
> +}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 801 bytes
Desc: not available
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20161130/b741f228/attachment-0001.sig>


More information about the libvir-list mailing list