[libvirt] [PATCHv3 3/4] storage: implement rudimentary glusterfs pool refresh

Daniel P. Berrange berrange at redhat.com
Tue Nov 12 16:58:18 UTC 2013


On Mon, Nov 11, 2013 at 09:19:30PM -0700, Eric Blake wrote:
> Actually put gfapi to use, by allowing the creation of a gluster
> pool.  Right now, all volumes are treated as raw; further patches
> will allow peering into files to allow for qcow2 files and backing
> chains, and reporting proper volume allocation.
> 
> I've reported a couple of glusterfs bugs; if we were to require a
> minimum of (not-yet-released) glusterfs 3.5, we could use the new
> glfs_readdir [1] and not worry about the bogus return value of
> glfs_fini [2], but for now I'm testing with Fedora 19's glusterfs
> 3.4.1.
> 
> [1] http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00085.html
> [2] http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00086.html
> 
> * src/storage/storage_backend_gluster.c
> (virStorageBackendGlusterRefreshPool): Initial implementation.
> (virStorageBackendGlusterOpen, virStorageBackendGlusterClose): New
> helper functions.
> 
> Signed-off-by: Eric Blake <eblake at redhat.com>
> ---
>  src/storage/storage_backend_gluster.c | 220 +++++++++++++++++++++++++++++++++-
>  1 file changed, 215 insertions(+), 5 deletions(-)
> 
> diff --git a/src/storage/storage_backend_gluster.c b/src/storage/storage_backend_gluster.c
> index 2863c73..bc90de9 100644
> --- a/src/storage/storage_backend_gluster.c
> +++ b/src/storage/storage_backend_gluster.c
> @@ -23,20 +23,230 @@
> 
>  #include <glusterfs/api/glfs.h>
> 
> -#include "virerror.h"
>  #include "storage_backend_gluster.h"
>  #include "storage_conf.h"
> +#include "viralloc.h"
> +#include "virerror.h"
> +#include "virlog.h"
> +#include "virstoragefile.h"
> +#include "virstring.h"
> +#include "viruri.h"
> 
>  #define VIR_FROM_THIS VIR_FROM_STORAGE
> 
> +struct _virStorageBackendGlusterState {
> +    glfs_t *vol;
> +
> +    /* Accept the same URIs as qemu's block/gluster.c:
> +     * gluster[+transport]://[server[:port]]/vol/[dir/]image[?socket=...] */
> +    virURI *uri;
> +
> +    char *volname; /* vol from URI */
> +    char *dir; /* dir from URI, or "." */
> +};
> +
> +typedef struct _virStorageBackendGlusterState virStorageBackendGlusterState;
> +typedef virStorageBackendGlusterState *virStorageBackendGlusterStatePtr;
> +
> +static void
> +virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state)
> +{
> +    if (!state)
> +        return;
> +
> +    /* Yuck - glusterfs-api-3.4.1 appears to always return -1 for
> +     * glfs_fini, with errno containing random data, so there's no way
> +     * to tell if it succeeded. 3.4.2 is supposed to fix this.*/
> +    if (state->vol && glfs_fini(state->vol) < 0)
> +        VIR_DEBUG("shutdown of gluster volume %s failed with errno %d",
> +                  state->volname, errno);
> +
> +    virURIFree(state->uri);
> +    VIR_FREE(state->volname);
> +    VIR_FREE(state->dir);
> +    VIR_FREE(state);
> +}
> +
> +static virStorageBackendGlusterStatePtr
> +virStorageBackendGlusterOpen(virStoragePoolObjPtr pool)
> +{
> +    virStorageBackendGlusterStatePtr ret = NULL;
> +    char *sub;
> +    const char *name = pool->def->source.name;
> +
> +    if (VIR_ALLOC(ret) < 0)
> +        return NULL;
> +
> +    if (*name == '/')
> +        name++;
> +    if ((sub = strchr(name, '/'))) {
> +        if (VIR_STRNDUP(ret->volname, name, sub - name) < 0 ||
> +            VIR_STRDUP(ret->dir, sub) < 0)
> +            goto error;
> +    } else {
> +        if (VIR_STRDUP(ret->volname, pool->def->source.name) < 0 ||
> +            VIR_STRDUP(ret->dir, ".") < 0)
> +            goto error;
> +    }
> +
> +    /* FIXME: Currently hard-coded to tcp transport; XML needs to be
> +     * extended to allow alternate transport */
> +    if (VIR_ALLOC(ret->uri) < 0 ||
> +        VIR_STRDUP(ret->uri->scheme, "gluster") < 0 ||
> +        VIR_STRDUP(ret->uri->server, pool->def->source.hosts[0].name) < 0 ||
> +        virAsprintf(&ret->uri->path, "%s%s",
> +                    *pool->def->source.name == '/' ? "" : "/",
> +                    pool->def->source.name) < 0)
> +        goto error;

I'm not a huge fan of chaining together statements like this,
since some gdb versions are pretty awful at telling you which
one of these 4 statements has the problem on segv.

> +    ret->uri->port = pool->def->source.hosts[0].port;
> +
> +    if (!(ret->vol = glfs_new(ret->volname))) {
> +        virReportOOMError();
> +        goto error;
> +    }
> +
> +    if (glfs_set_volfile_server(ret->vol, "tcp",
> +                                ret->uri->server, ret->uri->port) < 0 ||
> +        glfs_init(ret->vol) < 0) {
> +        char *uri = virURIFormat(ret->uri);
> +
> +        virReportSystemError(errno, _("failed to connect to %s"),
> +                             NULLSTR(uri));
> +        VIR_FREE(uri);
> +        goto error;
> +    }
> +
> +    return ret;
> +
> +error:
> +    virStorageBackendGlusterClose(ret);
> +    return NULL;
> +}
> +
> +
> +/* Populate *volptr for the given name and stat information, or leave
> + * it NULL if the entry should be skipped (such as ".").  Return 0 on
> + * success, -1 on failure. */
> +static int
> +virStorageBackendGlusterRefreshVol(virStorageBackendGlusterStatePtr state,
> +                                   const char *pool,
> +                                   const char *name,
> +                                   struct stat *st,
> +                                   virStorageVolDefPtr *volptr)
> +{
> +    char *tmp;
> +    int ret = -1;
> +    virStorageVolDefPtr vol = NULL;
> +
> +    /* Silently skip directories, including '.' and '..'.  FIXME:
> +     * should non-'.' subdirectories be listed as type dir?  */

What behaviour does the 'fs' pool have in for non-'.' subdirs ?
Seems we perhaps ought to match that.

> +    *volptr = NULL;
> +    if (S_ISDIR(st->st_mode))
> +        return 0;
> +
> +    if (VIR_ALLOC(vol) < 0 ||
> +        VIR_STRDUP(vol->name, name) < 0 ||
> +        virAsprintf(&vol->key, "%s%s/%s",
> +                    *pool == '/' ? "" : "/", pool, vol->name) < 0)
> +        goto cleanup;
> +
> +    /* FIXME - must open files to determine if they are non-raw */
> +    vol->type = VIR_STORAGE_VOL_NETWORK;
> +    vol->target.format = VIR_STORAGE_FILE_RAW;
> +    vol->capacity = vol->allocation = st->st_size;

So gluster has no concept of sparse volumes ?

> +    tmp = state->uri->path;
> +    state->uri->path = vol->key;
> +    if (!(vol->target.path = virURIFormat(state->uri))) {
> +        state->uri->path = tmp;
> +        goto cleanup;
> +    }
> +    state->uri->path = tmp;
> +
> +    *volptr = vol;
> +    vol = NULL;
> +    ret = 0;
> +cleanup:
> +    virStorageVolDefFree(vol);
> +    return ret;
> +}
> 
>  static int
>  virStorageBackendGlusterRefreshPool(virConnectPtr conn ATTRIBUTE_UNUSED,
> -                                    virStoragePoolObjPtr pool ATTRIBUTE_UNUSED)
> +                                    virStoragePoolObjPtr pool)
>  {
> -    virReportError(VIR_ERR_NO_SUPPORT, "%s",
> -                   _("gluster pool type not fully supported yet"));
> -    return -1;
> +    int ret = -1;
> +    virStorageBackendGlusterStatePtr state = NULL;
> +    struct {
> +        struct dirent ent;
> +        /* See comment below about readdir_r needing padding */
> +        char padding[MAX(1, 256 - (int) (sizeof(struct dirent)
> +                                         - offsetof(struct dirent, d_name)))];
> +    } de;
> +    struct dirent *ent;
> +    glfs_fd_t *dir = NULL;
> +    struct stat st;
> +    struct statvfs sb;
> +
> +    if (!(state = virStorageBackendGlusterOpen(pool)))
> +        goto cleanup;

What kind of overhead is there in opening / closing gluster connections ?
Is it something we should just do in the pool start/stop methods, instead
of on every operation like refresh.

> +
> +    /* Why oh why did glfs 3.4 decide to expose only readdir_r rather
> +     * than readdir?  POSIX admits that readdir_r is inherently a
> +     * flawed design, because systems are not required to define
> +     * NAME_MAX: http://austingroupbugs.net/view.php?id=696
> +     * http://womble.decadent.org.uk/readdir_r-advisory.html
> +     *
> +     * Fortunately, gluster appears to limit its underlying bricks to
> +     * only use file systems such as XFS that have a NAME_MAX of 255;
> +     * so we are currently guaranteed that if we provide 256 bytes of
> +     * tail padding, then we should have enough space to avoid buffer
> +     * overflow no matter whether the OS used d_name[], d_name[1], or
> +     * d_name[256] in its 'struct dirent'.
> +     * http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00083.html
> +     */
> +
> +    if (!(dir = glfs_opendir(state->vol, state->dir))) {
> +        virReportSystemError(errno, _("cannot open path '%s'"),
> +                             pool->def->name);
> +        goto cleanup;
> +    }
> +    while (!(errno = glfs_readdirplus_r(dir, &st, &de.ent, &ent)) && ent) {
> +        virStorageVolDefPtr vol;
> +        int okay = virStorageBackendGlusterRefreshVol(state,
> +                                                      pool->def->source.name,
> +                                                      ent->d_name, &st,
> +                                                      &vol);
> +        if (okay < 0)
> +            goto cleanup;
> +        if (vol && VIR_APPEND_ELEMENT(pool->volumes.objs, pool->volumes.count,
> +                                      vol) < 0)
> +            goto cleanup;
> +    }
> +    if (errno) {
> +        virReportSystemError(errno, _("failed to read directory '%s'"),
> +                             pool->def->source.name);
> +        goto cleanup;
> +    }
> +
> +    if (glfs_statvfs(state->vol, state->dir, &sb) < 0) {
> +        virReportSystemError(errno, _("cannot statvfs path '%s'"),                             pool->def->source.name);

Seems you lost the line break here.

> +        goto cleanup;
> +    }
> +
> +    pool->def->capacity = ((unsigned long long)sb.f_frsize *
> +                           (unsigned long long)sb.f_blocks);
> +    pool->def->available = ((unsigned long long)sb.f_bfree *
> +                            (unsigned long long)sb.f_frsize);
> +    pool->def->allocation = pool->def->capacity - pool->def->available;
> +
> +    ret = 0;
> +cleanup:
> +    if (dir)
> +        glfs_closedir(dir);
> +    virStorageBackendGlusterClose(state);
> +    if (ret < 0)
> +        virStoragePoolObjClearVols(pool);
> +    return ret;
>  }
> 


Daniel
-- 
|: http://berrange.com      -o-    http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org              -o-             http://virt-manager.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org       -o-       http://live.gnome.org/gtk-vnc :|




More information about the libvir-list mailing list