[libvirt] [PATCH] storage backend: Add RBD (RADOS Block Device) support

Wido den Hollander wido at widodh.nl
Wed Apr 25 14:35:25 UTC 2012


On 04/18/2012 12:36 PM, Daniel P. Berrange wrote:
> On Fri, Mar 30, 2012 at 11:04:43AM +0200, Wido den Hollander wrote:
>> This patch adds support for a new storage backend with RBD support.
>>
>> RBD is the RADOS Block Device and is part of the Ceph distributed storage system.
>>
>> It comes in two flavours: Qemu-RBD and Kernel RBD, this storage backend only supports
>> Qemu-RBD, thus limiting the use of this storage driver to Qemu only.
>>
>> To function this backend relies on librbd and librados being present on the local system.
>>
>> The backend also supports Cephx authentication for safe authentication with the Ceph cluster.
>>
>> For storing credentials it uses the build-in secret mechanism of libvirt.
>>
>> Signed-off-by: Wido den Hollander<wido at widodh.nl>
>> ---
>>   configure.ac                             |   20 ++
>>   include/libvirt/libvirt.h.in             |    1 +
>>   src/Makefile.am                          |    9 +
>>   src/conf/storage_conf.c                  |  197 ++++++++++---
>>   src/conf/storage_conf.h                  |   16 +
>>   src/storage/storage_backend.c            |    6 +
>>   src/storage/storage_backend_rbd.c        |  465 ++++++++++++++++++++++++++++++
>>   src/storage/storage_backend_rbd.h        |   30 ++
>>   tests/storagepoolxml2xmlin/pool-rbd.xml  |   11 +
>>   tests/storagepoolxml2xmlout/pool-rbd.xml |   15 +
>>   tools/virsh.c                            |    7 +
>>   11 files changed, 734 insertions(+), 43 deletions(-)
>>   create mode 100644 src/storage/storage_backend_rbd.c
>>   create mode 100644 src/storage/storage_backend_rbd.h
>>   create mode 100644 tests/storagepoolxml2xmlin/pool-rbd.xml
>>   create mode 100644 tests/storagepoolxml2xmlout/pool-rbd.xml
>>
>> diff --git a/configure.ac b/configure.ac
>> index 32cc8d0..b693e5b 100644
>> --- a/configure.ac
>> +++ b/configure.ac
>> @@ -1743,6 +1743,8 @@ AC_ARG_WITH([storage-mpath],
>>     AC_HELP_STRING([--with-storage-mpath], [with mpath backend for the storage driver @<:@default=check@:>@]),[],[with_storage_mpath=check])
>>   AC_ARG_WITH([storage-disk],
>>     AC_HELP_STRING([--with-storage-disk], [with GPartd Disk backend for the storage driver @<:@default=check@:>@]),[],[with_storage_disk=check])
>> +AC_ARG_WITH([storage-rbd],
>> +  AC_HELP_STRING([--with-storage-rbd], [with RADOS Block Device backend for the storage driver @<:@default=check@:>@]),[],[with_storage_rbd=check])
>>
>>   if test "$with_libvirtd" = "no"; then
>>     with_storage_dir=no
>> @@ -1752,6 +1754,7 @@ if test "$with_libvirtd" = "no"; then
>>     with_storage_scsi=no
>>     with_storage_mpath=no
>>     with_storage_disk=no
>> +  with_storage_rbd=no
>>   fi
>>   if test "$with_storage_dir" = "yes" ; then
>>     AC_DEFINE_UNQUOTED([WITH_STORAGE_DIR], 1, [whether directory backend for storage driver is enabled])
>> @@ -1910,6 +1913,22 @@ if test "$with_storage_mpath" = "check"; then
>>   fi
>>   AM_CONDITIONAL([WITH_STORAGE_MPATH], [test "$with_storage_mpath" = "yes"])
>>
>> +if test "$with_storage_rbd" = "yes" || test "$with_storage_rbd" = "check"; then
>> +    AC_CHECK_HEADER([rbd/librbd.h], [LIBRBD_FOUND=yes; break;])
>> +
>> +    LIBRBD_LIBS="-lrbd -lrados -lcrypto"
>> +
>> +    if test "$LIBRBD_FOUND" = "yes"; then
>> +        with_storage_rbd=yes
>> +        LIBS="$LIBS $LIBRBD_LIBS"
>> +    else
>> +        with_storage_rbd=no
>> +    fi
>> +
>> +    AC_DEFINE_UNQUOTED([WITH_STORAGE_RBD], 1, [wether RBD backend for storage driver is enabled])
>> +fi
>> +AM_CONDITIONAL([WITH_STORAGE_RBD], [test "$with_storage_rbd" = "yes"])
>> +
>>   LIBPARTED_CFLAGS=
>>   LIBPARTED_LIBS=
>>   if test "$with_storage_disk" = "yes" ||
>> @@ -2673,6 +2692,7 @@ AC_MSG_NOTICE([   iSCSI: $with_storage_iscsi])
>>   AC_MSG_NOTICE([    SCSI: $with_storage_scsi])
>>   AC_MSG_NOTICE([   mpath: $with_storage_mpath])
>>   AC_MSG_NOTICE([    Disk: $with_storage_disk])
>> +AC_MSG_NOTICE([     RBD: $with_storage_rbd])
>>   AC_MSG_NOTICE([])
>>   AC_MSG_NOTICE([Security Drivers])
>>   AC_MSG_NOTICE([])
>
> Can you also add RBD logic to the libvirt.spec.in RPM specfile. We should
> have it turned on for any Fedora>= 16, but not for RHEL.

I must confess, I've never build a single RPM package in my life, I'm a 
deb guy. I'll give it a try!

>
>
>> diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
>> index 499dcd4..ee1d5ec 100644
>> --- a/include/libvirt/libvirt.h.in
>> +++ b/include/libvirt/libvirt.h.in
>> @@ -2324,6 +2324,7 @@ typedef enum {
>>     VIR_STORAGE_VOL_FILE = 0,     /* Regular file based volumes */
>>     VIR_STORAGE_VOL_BLOCK = 1,    /* Block based volumes */
>>     VIR_STORAGE_VOL_DIR = 2,      /* Directory-passthrough based volume */
>> +  VIR_STORAGE_VOL_NETWORK = 3,  /* Network volumes like RBD (RADOS Block Device) */
>
>
> So the RBD volumes don't ever appear as normal block devices in /dev ?
>
>
> Could you also add some content to  docs/storage.html to give an example
> of the pool XML, and an example of the volume XML you will see, and any
> other information that you think would be relevant.
>
>> diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
>> index bdf6218..2a0b5eb 100644
>> --- a/src/conf/storage_conf.c
>> +++ b/src/conf/storage_conf.c
>> @@ -395,6 +416,27 @@ virStoragePoolDefParseAuthChap(xmlXPathContextPtr ctxt,
>>   }
>>
>>   static int
>> +virStoragePoolDefParseAuthCephx(xmlXPathContextPtr ctxt,
>> +                               virStoragePoolAuthCephxPtr auth) {
>> +    auth->username = virXPathString("string(./auth/@username)", ctxt);
>> +    if (auth->username == NULL) {
>> +        virStorageReportError(VIR_ERR_XML_ERROR,
>> +                              "%s", _("missing auth username attribute"));
>> +        return -1;
>> +    }
>> +
>> +    auth->secret.uuid = virXPathString("string(./auth/secret/@uuid)", ctxt);
>> +    auth->secret.usage = virXPathString("string(./auth/secret/@usage)", ctxt);
>> +    if (auth->secret.uuid == NULL&&  auth->secret.usage == NULL) {
>> +        virStorageReportError(VIR_ERR_XML_ERROR,
>> +                              "%s", _("missing auth secret uuid or usage attribute"));
>> +        return -1;
>> +    }
>
> If would be desirable to use  virUUIDParse to validate the UUID in the
> XML has correct syntax.
>
>> @@ -414,6 +456,12 @@ virStoragePoolDefParseSource(xmlXPathContextPtr ctxt,
>>       }
>>
>>       source->name = virXPathString("string(./name)", ctxt);
>> +    if (pool_type == VIR_STORAGE_POOL_RBD&&  source->name == NULL) {
>> +        virStorageReportError(VIR_ERR_XML_ERROR,
>> +                                  _("%s"), "missing mandatory 'name' field for RBD pool name");
>> +            VIR_FREE(source->name);
>> +            goto cleanup;
>> +    }
>
> You have the  _(..) bit around the wrong string here :-)
>
>> @@ -907,25 +1015,28 @@ virStoragePoolDefFormat(virStoragePoolDefPtr def) {
>>       if (virStoragePoolSourceFormat(&buf, options,&def->source)<  0)
>>           goto cleanup;
>>
>> -    virBufferAddLit(&buf,"<target>\n");
>> +    /* RBD devices are no local block devs nor files, so it doesn't have a target */
>
>
> Interesting, that answers my earlier question. I'd like to see what the XML
> looks like for a volume, and how you would then put that info in a guest
> <disk>  section.

Like you asked above, no, RBD images will not show up as a regular block 
device nor file on your system.

You attach them as network disks: 
http://libvirt.org/formatdomain.html#elementsDisks

    <disk type='network'>
       <driver name="qemu" type="raw"/>
       <source protocol="rbd" name="image_name2">
         <host name="hostname" port="7000"/>
       </source>
       <target dev="hdd" bus="ide"/>
       <auth username='myuser'>
         <secret type='ceph' usage='mypassid'/>
       </auth>
     </disk>

This storage backend manages the pool of RBD images for you.



I've taken care of all other points except the RPM spec file. Once 
that's done I'll send an update version of the patch (with docs!)

Wido

>
>> diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
>> index 1ef9295..6b91ca5 100644
>> --- a/src/conf/storage_conf.h
>> +++ b/src/conf/storage_conf.h
>> @@ -120,6 +120,7 @@ enum virStoragePoolType {
>>       VIR_STORAGE_POOL_ISCSI,    /* iSCSI targets */
>>       VIR_STORAGE_POOL_SCSI,     /* SCSI HBA */
>>       VIR_STORAGE_POOL_MPATH,    /* Multipath devices */
>> +	VIR_STORAGE_POOL_RBD,      /* RADOS Block Device */
>
> Slight indentation bug.
>
>>
>>       VIR_STORAGE_POOL_LAST,
>>   };
>> @@ -137,6 +138,7 @@ enum virStoragePoolDeviceType {
>>   enum virStoragePoolAuthType {
>>       VIR_STORAGE_POOL_AUTH_NONE,
>>       VIR_STORAGE_POOL_AUTH_CHAP,
>> +    VIR_STORAGE_POOL_AUTH_CEPHX,
>>   };
>>
>>   typedef struct _virStoragePoolAuthChap virStoragePoolAuthChap;
>> @@ -146,6 +148,15 @@ struct _virStoragePoolAuthChap {
>>       char *passwd;
>>   };
>>
>> +typedef struct _virStoragePoolAuthCephx virStoragePoolAuthCephx;
>> +typedef virStoragePoolAuthCephx *virStoragePoolAuthCephxPtr;
>> +struct _virStoragePoolAuthCephx {
>> +    char *username;
>> +    struct {
>> +            char *uuid;
>
> For UUID's we normally use
>
>     unsigned char uuid[VIR_UUID_BUFLEN];
>
> and the virUUIDFormat/virUUIDParse APIs in the XML conversion steps
>
>> +            char *usage;
>> +    } secret;
>> +};
>>
>>   /*
>>    * For remote pools, info on how to reach the host
>> @@ -215,6 +226,10 @@ struct _virStoragePoolSource {
>>       /* An optional host */
>>       virStoragePoolSourceHost host;
>>
>> +    /* Or multiple hosts */
>> +    int nhost;
>> +    virStoragePoolSourceHostPtr hosts;
>
> Can you use 'size_t' here instead of 'int', and while
> you're at it, you can kjill off the existing 'host'
> field and just make existing drivers use the new
> 'hosts' field.
>
> It would be desirable if you created a completely separate
> patch to just change 'host' to 'hosts' + 'nhosts', and update
> the existing drivers. Then let you RBD patch depend on it.
>
>> diff --git a/src/storage/storage_backend_rbd.c b/src/storage/storage_backend_rbd.c
>> new file mode 100644
>> index 0000000..056059a
>> --- /dev/null
>> +++ b/src/storage/storage_backend_rbd.c
>> +
>> +struct _virStorageBackendRBDState {
>> +    rados_t cluster;
>> +    rados_ioctx_t ioctx;
>> +    time_t starttime;
>> +};
>> +
>> +typedef struct _virStorageBackendRBDState virStorageBackendRBDState;
>> +typedef virStorageBackendRBDState virStorageBackendRBDStatePtr;
>> +
>> +static int virStorageBackendRBDOpenRADOSConn(virStorageBackendRBDStatePtr *ptr,
>> +                                             virConnectPtr conn,
>> +                                             virStoragePoolObjPtr pool)
>> +{
>> +    int ret = -1;
>> +    unsigned char *secret_value;
>> +    size_t secret_value_size;
>> +    char *rados_key;
>> +    virBuffer mon_host = VIR_BUFFER_INITIALIZER;
>> +    virSecretPtr secret = NULL;
>> +
>> +    VIR_DEBUG("Found Cephx username: %s",
>> +              pool->def->source.auth.cephx.username);
>> +
>> +    if (pool->def->source.auth.cephx.username != NULL) {
>> +        VIR_DEBUG("Using cephx authorization");
>> +        if (rados_create(&ptr->cluster,
>> +            pool->def->source.auth.cephx.username)<  0) {
>> +            virStorageReportError(VIR_ERR_INTERNAL_ERROR,
>> +                                  _("failed to initialize RADOS"));
>> +            goto cleanup;
>> +        }
>> +
>> +        if (pool->def->source.auth.cephx.secret.uuid != NULL) {
>> +            VIR_DEBUG("Looking up secret by UUID: %s",
>> +                      pool->def->source.auth.cephx.secret.uuid);
>> +            secret = virSecretLookupByUUIDString(conn,
>> +                                                 pool->def->source.auth.cephx.secret.uuid);
>> +        }
>> +
>> +        if (pool->def->source.auth.cephx.secret.usage != NULL) {
>> +            VIR_DEBUG("Looking up secret by usage: %s",
>> +                      pool->def->source.auth.cephx.secret.usage);
>> +            secret = virSecretLookupByUsage(conn, VIR_SECRET_USAGE_TYPE_CEPH,
>> +                                            pool->def->source.auth.cephx.secret.usage);
>> +        }
>> +
>> +        if (secret == NULL) {
>> +            virStorageReportError(VIR_ERR_NO_SECRET,
>> +                                  _("failed to find the secret"));
>> +            goto cleanup;
>> +        }
>> +
>> +        secret_value = virSecretGetValue(secret,&secret_value_size, 0);
>> +        base64_encode_alloc((char *)secret_value,
>> +                            secret_value_size,&rados_key);
>> +        memset(secret_value, 0, secret_value_size);
>> +
>> +        if (rados_key == NULL) {
>> +            virStorageReportError(VIR_ERR_INTERNAL_ERROR,
>> +                                  _("failed to decode the RADOS key"));
>> +            goto cleanup;
>> +        }
>> +
>> +        VIR_DEBUG("Found cephx key: %s", rados_key);
>> +        if (rados_conf_set(ptr->cluster, "key", rados_key)<  0) {
>> +            virStorageReportError(VIR_ERR_INTERNAL_ERROR,
>> +                                  _("failed to set RADOS option: %s"),
>> +                                  "rados_key");
>> +            goto cleanup;
>> +        }
>> +
>> +        memset(rados_key, 0, strlen(rados_key));
>> +
>> +        if (rados_conf_set(ptr->cluster, "auth_supported", "cephx")<  0) {
>> +            virStorageReportError(VIR_ERR_INTERNAL_ERROR,
>> +                                  _("failed to set RADOS option: %s"),
>> +                                  "auth_supported");
>> +            goto cleanup;
>> +        }
>> +    } else {
>> +        VIR_DEBUG("Not using cephx authorization");
>> +        if (rados_conf_set(ptr->cluster, "auth_supported", "none")<  0) {
>> +            virStorageReportError(VIR_ERR_INTERNAL_ERROR,
>> +                                  _("failed to set RADOS option: %s"),
>> +                                  "auth_supported");
>> +            goto cleanup;
>> +        }
>> +        if (rados_create(&ptr->cluster, NULL)<  0) {
>> +            virStorageReportError(VIR_ERR_INTERNAL_ERROR,
>> +                                  _("failed to create the RADOS cluster"));
>> +            goto cleanup;
>> +        }
>> +    }
>> +
>> +    VIR_DEBUG("Found %d RADOS cluster monitors in the pool configuration",
>> +              pool->def->source.nhost);
>> +
>> +    int i;
>> +    for (i = 0; i<  pool->def->source.nhost; i++) {
>> +        if (pool->def->source.hosts[i].name != NULL&&
>> +            !pool->def->source.hosts[i].port) {
>> +            virBufferAsprintf(&mon_host, "%s:6789,",
>> +                              pool->def->source.hosts[i].name);
>> +        } else if (pool->def->source.hosts[i].name != NULL&&
>> +            pool->def->source.hosts[i].port) {
>> +            virBufferAsprintf(&mon_host, "%s:%d,",
>> +                              pool->def->source.hosts[i].name,
>> +                              pool->def->source.hosts[i].port);
>> +        } else {
>> +            virStorageReportError(VIR_ERR_INTERNAL_ERROR,
>> +                                  _("received malformed monitor, check the XML definition"));
>> +        }
>> +    }
>> +
>> +    char *mon_buff = virBufferContentAndReset(&mon_host);
>
> Before calling this you are required to do
>
>
>     if (virBufferError(&mon_host)) {
>        virReportOOMError();
>        goto cleanup;
>     }
>
>
> otherwise  'mon_buff' could be NULL.
>
>> +    VIR_DEBUG("RADOS mon_host has been set to: %s", mon_buff);
>> +    if (rados_conf_set(ptr->cluster, "mon_host", mon_buff)<  0) {
>> +       virStorageReportError(VIR_ERR_INTERNAL_ERROR,
>> +                             _("failed to set RADOS option: %s"),
>> +                             "mon_host");
>> +        goto cleanup;
>> +    }
>> +
>> +    ptr->starttime = time(0);
>> +    if (rados_connect(ptr->cluster)<  0) {
>> +        virStorageReportError(VIR_ERR_INTERNAL_ERROR,
>> +                              _("failed to connect to the RADOS monitor on: %s"),
>> +                              virBufferContentAndReset(&mon_host));
>> +        goto cleanup;
>> +    }
>> +
>> +    ret = 0;
>> +
>> +cleanup:
>> +    VIR_FREE(secret_value);
>> +    VIR_FREE(rados_key);
>> +    virSecretFree(secret);
>> +    virBufferFreeAndReset(&mon_host);
>
>
> Does 'mon_buff' need to be free()d here, or does 'ptr->cluster'
> now own the buffer you gave it ?
>
>> +static int virStorageBackendRBDCloseRADOSConn(virStorageBackendRBDStatePtr ptr)
>> +{
>> +    int ret = 0;
>> +
>> +    if (ptr.ioctx != NULL) {
>> +        VIR_DEBUG("Closing RADOS IoCTX");
>> +        rados_ioctx_destroy(ptr.ioctx);
>> +        ret = -1;
>> +    }
>> +
>> +    if (ptr.cluster != NULL) {
>> +        VIR_DEBUG("Closing RADOS connection");
>> +        rados_shutdown(ptr.cluster);
>> +        ret = -2;
>> +    }
>> +
>> +    time_t runtime = time(0) - ptr.starttime;
>> +    VIR_DEBUG("RADOS connection existed for %ld seconds", runtime);
>
> I think you want to set  ptr.cluster&  ptr.ioctx to NULL here, to protect
> against being called twice.
>
>
>> +static int virStorageBackendRBDRefreshPool(virConnectPtr conn ATTRIBUTE_UNUSED,
>> +                                           virStoragePoolObjPtr pool)
>> +{
>> +    size_t max_size = 1024;
>> +    int ret = -1;
>> +    virStorageBackendRBDStatePtr ptr;
>> +    ptr.cluster = NULL;
>> +    ptr.ioctx = NULL;
>> +
>> +    if (virStorageBackendRBDOpenRADOSConn(&ptr, conn, pool)<  0) {
>> +        goto cleanup;
>> +    }
>> +
>> +    if (rados_ioctx_create(ptr.cluster,
>> +        pool->def->source.name,&ptr.ioctx)<  0) {
>> +        virStorageReportError(VIR_ERR_INTERNAL_ERROR,
>> +                              _("failed to create the RBD IoCTX. Does the pool '%s' exist?"),
>> +                              pool->def->source.name);
>> +        goto cleanup;
>> +    }
>> +
>> +    struct rados_cluster_stat_t stat;
>> +    if (rados_cluster_stat(ptr.cluster,&stat)<  0) {
>> +        virStorageReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> +                              _("failed to stat the RADOS cluster"));
>> +        goto cleanup;
>> +    }
>> +
>> +    struct rados_pool_stat_t poolstat;
>> +    if (rados_ioctx_pool_stat(ptr.ioctx,&poolstat)<  0) {
>> +        virStorageReportError(VIR_ERR_INTERNAL_ERROR,
>> +                              _("failed to stat the RADOS pool '%s'"),
>> +                              pool->def->source.name);
>> +        goto cleanup;
>> +    }
>> +
>> +    pool->def->capacity = stat.kb * 1024;
>> +    pool->def->available = stat.kb_avail * 1024;
>> +    pool->def->allocation = poolstat.num_bytes;
>> +
>> +    int num_images, i;
>> +    char *names, *name = NULL;
>> +
>
> Our preference is for variables to be declared at the start of the
> block, since it gives clear initialization semantics when you consider
> the use of 'goto'.
>
> eg if one of those earlier 'goto cleanup' statements run, you can't
> know whether 'name' was initialized to NULL or not.  IIRC we have
> compiler flags that should have generated a warning on this.
>
>
>> +    if (VIR_ALLOC_N(names, 1024)<  0)
>> +        goto cleanup;
>
> Need a virReportOOMError() here
>
>> +
>> +    int len = rbd_list(ptr.ioctx, names,&max_size);
>> +
>> +    for (i = 0, num_images = 0, name = names; name<  names + len; i++) {
>> +
>> +        if (VIR_REALLOC_N(pool->volumes.objs, pool->volumes.count + 1)<  0) {
>> +            virStoragePoolObjClearVols(pool);
>> +            virReportOOMError();
>> +            goto cleanup;
>> +        }
>> +
>> +        virStorageVolDefPtr vol;
>> +        if (VIR_ALLOC(vol)<  0)
>> +            goto cleanup;
> And  virReportOOMError() here.
>
>> +
>> +        vol->name = strdup(name);
>> +        if (vol->name == NULL)
>> +            goto cleanup;
>
> And here.
>
> You might find it preferable to add a block at the end
>
>    no_memory:
>      virReportOOMError();
>      goto cleanup;
>
>> +        name += strlen(name) + 1;
>> +
>> +        if (volStorageBackendRBDRefreshVolInfo(vol, pool, ptr)<  0)
>> +            goto cleanup;
>> +
>> +        pool->volumes.objs[pool->volumes.count++] = vol;
>> +    }
>> +
>> +    VIR_DEBUG("Refreshed RBD pool %s (kb: %llu kb_avail: %llu num_bytes: %llu num_images: %d)",
>> +              pool->def->source.name, (unsigned long long)stat.kb,
>> +              (unsigned long long)stat.kb_avail,
>> +              (unsigned long long)poolstat.num_bytes, pool->volumes.count);
>> +
>> +    ret = 0;
>> +
>> +cleanup:
>> +    VIR_FREE(names);
>> +    virStorageBackendRBDCloseRADOSConn(ptr);
>> +    return ret;
>> +}
>
> Overall I think your patch looks pretty good&  look forward to updated
> versions
>
> Regards,
> Daniel




More information about the libvir-list mailing list