[libvirt PATCH 3/3] WIP: use nbdkit for remote disk sources

Peter Krempa pkrempa at redhat.com
Fri Jun 24 13:28:53 UTC 2022


On Wed, Jun 22, 2022 at 16:26:26 -0500, Jonathon Jongsma wrote:
> ---
>  include/libvirt/virterror.h                   |   1 +
>  po/POTFILES                                   |   1 +
>  src/qemu/meson.build                          |   1 +
>  src/qemu/qemu_block.c                         |  64 +-
>  src/qemu/qemu_block.h                         |   1 +
>  src/qemu/qemu_command.c                       |  26 +-
>  src/qemu/qemu_conf.c                          |  19 +
>  src/qemu/qemu_conf.h                          |   5 +
>  src/qemu/qemu_domain.c                        | 110 ++-
>  src/qemu/qemu_domain.h                        |   5 +
>  src/qemu/qemu_driver.c                        |   4 +-
>  src/qemu/qemu_extdevice.c                     |  25 +
>  src/qemu/qemu_nbdkit.c                        | 629 ++++++++++++++++++
>  src/qemu/qemu_nbdkit.h                        |  89 +++
>  src/qemu/qemu_validate.c                      |  22 +-
>  src/qemu/qemu_validate.h                      |   4 +-
>  src/util/virerror.c                           |   1 +
>  tests/qemublocktest.c                         |   8 +-
>  tests/qemustatusxml2xmldata/modern-in.xml     |   1 -
>  ...sk-cdrom-network-nbdkit.x86_64-latest.args |  42 ++
>  .../disk-cdrom-network-nbdkit.xml             |   1 +
>  ...isk-network-http-nbdkit.x86_64-latest.args |  45 ++
>  .../disk-network-http-nbdkit.xml              |   1 +
>  ...work-source-curl-nbdkit.x86_64-latest.args |  49 ++
>  .../disk-network-source-curl-nbdkit.xml       |   1 +
>  ...isk-network-source-curl.x86_64-latest.args |  53 ++
>  .../disk-network-source-curl.xml              |  71 ++
>  tests/qemuxml2argvtest.c                      |  12 +
>  tests/testutilsqemu.c                         |  16 +
>  tests/testutilsqemu.h                         |   4 +
>  30 files changed, 1278 insertions(+), 33 deletions(-)
>  create mode 100644 src/qemu/qemu_nbdkit.c
>  create mode 100644 src/qemu/qemu_nbdkit.h
>  create mode 100644 tests/qemuxml2argvdata/disk-cdrom-network-nbdkit.x86_64-latest.args
>  create mode 120000 tests/qemuxml2argvdata/disk-cdrom-network-nbdkit.xml
>  create mode 100644 tests/qemuxml2argvdata/disk-network-http-nbdkit.x86_64-latest.args
>  create mode 120000 tests/qemuxml2argvdata/disk-network-http-nbdkit.xml
>  create mode 100644 tests/qemuxml2argvdata/disk-network-source-curl-nbdkit.x86_64-latest.args
>  create mode 120000 tests/qemuxml2argvdata/disk-network-source-curl-nbdkit.xml
>  create mode 100644 tests/qemuxml2argvdata/disk-network-source-curl.x86_64-latest.args
>  create mode 100644 tests/qemuxml2argvdata/disk-network-source-curl.xml

I presume you plan to split this into commits.

 
> diff --git a/include/libvirt/virterror.h b/include/libvirt/virterror.h
> index df13e4f11e..dd198dfd7d 100644
> --- a/include/libvirt/virterror.h
> +++ b/include/libvirt/virterror.h
> @@ -141,6 +141,7 @@ typedef enum {
>      VIR_FROM_TPM = 70,          /* Error from TPM (Since: 5.6.0) */
>      VIR_FROM_BPF = 71,          /* Error from BPF code (Since: 5.10.0) */
>      VIR_FROM_CH = 72,           /* Error from Cloud-Hypervisor driver (Since: 7.5.0) */
> +    VIR_FROM_NBDKIT = 73,       /* Error from Nbdkit code (Since: 8.5.0) */
>  
>  # ifdef VIR_ENUM_SENTINELS
>      VIR_ERR_DOMAIN_LAST /* (Since: 0.9.13) */
> diff --git a/po/POTFILES b/po/POTFILES
> index faaba53c8f..99284b8173 100644
> --- a/po/POTFILES
> +++ b/po/POTFILES
> @@ -177,6 +177,7 @@ src/qemu/qemu_monitor.c
>  src/qemu/qemu_monitor_json.c
>  src/qemu/qemu_monitor_text.c
>  src/qemu/qemu_namespace.c
> +src/qemu/qemu_nbdkit.c
>  src/qemu/qemu_process.c
>  src/qemu/qemu_qapi.c
>  src/qemu/qemu_saveimage.c
> diff --git a/src/qemu/meson.build b/src/qemu/meson.build
> index 96952cc52d..101cf3591f 100644
> --- a/src/qemu/meson.build
> +++ b/src/qemu/meson.build
> @@ -28,6 +28,7 @@ qemu_driver_sources = [
>    'qemu_monitor_json.c',
>    'qemu_monitor_text.c',
>    'qemu_namespace.c',
> +  'qemu_nbdkit.c',
>    'qemu_process.c',
>    'qemu_qapi.c',
>    'qemu_saveimage.c',
> diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
> index 9fe22f18f2..91f17f133f 100644
> --- a/src/qemu/qemu_block.c
> +++ b/src/qemu/qemu_block.c
> @@ -773,6 +773,35 @@ qemuBlockStorageSourceGetCURLProps(virStorageSource *src,
>  }
>  
>  
> +static virJSONValue *
> +qemuBlockStorageSourceGetNbdkitProps(virStorageSource *src)
> +{
> +    qemuDomainStorageSourcePrivate *srcPriv = QEMU_DOMAIN_STORAGE_SOURCE_PRIVATE(src);
> +    virJSONValue *ret = NULL;
> +    g_autoptr(virJSONValue) serverprops = NULL;
> +    virStorageNetHostDef host = { NULL };
> +
> +    /* srcPriv->nbdkitProcess will already be initialized if we can use nbdkit
> +     * to proxy this storage source */
> +    if (!(srcPriv  && srcPriv->nbdkitProcess))
> +        return NULL;
> +
> +    host.transport = VIR_STORAGE_NET_HOST_TRANS_UNIX;
> +    host.socket = srcPriv->nbdkitProcess->socketfile;
> +    serverprops = qemuBlockStorageSourceBuildJSONSocketAddress(&host,
> +                                                               false);
> +    if (!serverprops)
> +        return NULL;
> +
> +    if (virJSONValueObjectAdd(&ret,
> +                              "a:server", &serverprops,
> +                              NULL) < 0)
> +            return NULL;
> +
> +    return ret;
> +}
> +
> +
>  static virJSONValue *
>  qemuBlockStorageSourceGetISCSIProps(virStorageSource *src,
>                                      bool onlytarget)
> @@ -1207,6 +1236,14 @@ qemuBlockStorageSourceGetBackendProps(virStorageSource *src,
>          case VIR_STORAGE_NET_PROTOCOL_FTP:
>          case VIR_STORAGE_NET_PROTOCOL_FTPS:
>          case VIR_STORAGE_NET_PROTOCOL_TFTP:
> +            /* first try to use nbdkit for http/ftp sources */
> +            if ((fileprops = qemuBlockStorageSourceGetNbdkitProps(src))) {
> +                driver = "nbd";
> +                break;
> +            }
> +
> +            /* nbdkit is not supported for this host/source, fall back to old
> +             * qemu storage plugins */
>              driver = virStorageNetProtocolTypeToString(src->protocol);
>              if (!(fileprops = qemuBlockStorageSourceGetCURLProps(src, onlytarget)))
>                  return NULL;
> @@ -1237,6 +1274,14 @@ qemuBlockStorageSourceGetBackendProps(virStorageSource *src,
>              break;
>  
>          case VIR_STORAGE_NET_PROTOCOL_SSH:
> +            /* first try to use nbdkit for ssh sources */
> +            if ((fileprops = qemuBlockStorageSourceGetNbdkitProps(src))) {
> +                driver = "nbd";
> +                break;
> +            }
> +
> +            /* nbdkit is not supported for this host/source. fallback to
> +             * the old qemu storage plugins */
>              driver = "ssh";
>              if (!(fileprops = qemuBlockStorageSourceGetSshProps(src)))
>                  return NULL;


I think you can do this as the very first thing. If the nbdkit data is
set we want to use it for any protocol it was instantiated rather than
have some more extra redundant logic here.

[...]

> @@ -1701,6 +1748,10 @@ static int
>  qemuBlockStorageSourceAttachApplyStorageDeps(qemuMonitor *mon,
>                                               qemuBlockStorageSourceAttachData *data)
>  {
> +    /* when using nbdkit, data is not passed via qemu secrets */
> +    if (data->useNbdkit)
> +        return 0;

This feels like a hack to be honest. If the encryption/authentication
objects are not needed they should not be instantiated in the first
place, rather than skipping formatting them here.

> +
>      if (data->prmgrProps &&
>          qemuMonitorAddObject(mon, &data->prmgrProps, &data->prmgrAlias) < 0)
>          return -1;
> @@ -2205,6 +2256,8 @@ qemuBlockGetBackingStoreString(virStorageSource *src,
>      virJSONValue *props = NULL;
>      g_autoptr(virURI) uri = NULL;
>      g_autofree char *backingJSON = NULL;
> +    qemuDomainStorageSourcePrivate *srcPriv = QEMU_DOMAIN_STORAGE_SOURCE_PRIVATE(src);
> +    bool useNbdkit = srcPriv && srcPriv->nbdkitProcess;
>  
>      if (!src->sliceStorage) {
>          if (virStorageSourceIsLocalStorage(src)) {
> @@ -2223,7 +2276,8 @@ qemuBlockGetBackingStoreString(virStorageSource *src,
>              src->ncookies == 0 &&
>              src->sslverify == VIR_TRISTATE_BOOL_ABSENT &&
>              src->timeout == 0 &&
> -            src->readahead == 0) {
> +            src->readahead == 0 &&
> +            !useNbdkit) {
>  
>              switch ((virStorageNetProtocol) src->protocol) {
>              case VIR_STORAGE_NET_PROTOCOL_NBD:


[...]

> @@ -2630,6 +2685,13 @@ qemuBlockStorageSourceCreateGetStorageProps(virStorageSource *src,
>              break;
>  
>          case VIR_STORAGE_NET_PROTOCOL_SSH:
> +            if (srcPriv->nbdkitProcess) {
> +                /* disk creation not yet supported with nbdkit, and even if it
> +                 * was supported, it would not be done with blockdev-create
> +                 * props */
> +                return 0;
> +            }

So this introduces a regression in functionality ...

> +
>              driver = "ssh";
>              if (!(location = qemuBlockStorageSourceGetSshProps(src)))
>                  return -1;

.. as previously we'd be able to create the image.

> diff --git a/src/qemu/qemu_block.h b/src/qemu/qemu_block.h
> index 8641c8a2d2..a1f98f4452 100644
> --- a/src/qemu/qemu_block.h
> +++ b/src/qemu/qemu_block.h
> @@ -113,6 +113,7 @@ struct qemuBlockStorageSourceAttachData {
>      char *tlsAlias;
>      virJSONValue *tlsKeySecretProps;
>      char *tlsKeySecretAlias;
> +    bool useNbdkit;
>  };
>  
>  
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index b307d3139c..dd410362fb 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -1578,6 +1578,7 @@ qemuBuildNetworkDriveStr(virStorageSource *src,
>                           qemuDomainSecretInfo *secinfo)
>  {
>      g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
> +    qemuDomainStorageSourcePrivate *priv = QEMU_DOMAIN_STORAGE_SOURCE_PRIVATE(src);
>      size_t i;
>      char *ret = NULL;
>  
> @@ -1637,6 +1638,13 @@ qemuBuildNetworkDriveStr(virStorageSource *src,
>          case VIR_STORAGE_NET_PROTOCOL_FTP:
>          case VIR_STORAGE_NET_PROTOCOL_FTPS:
>          case VIR_STORAGE_NET_PROTOCOL_TFTP:
> +            if (priv && priv->nbdkitProcess) {
> +                virBufferAsprintf(&buf, "nbd:unix:%s", priv->nbdkitProcess->socketfile);
> +                ret = virBufferContentAndReset(&buf);
> +            } else {
> +                ret = qemuBuildNetworkDriveURI(src);
> +            }
> +            break;
>          case VIR_STORAGE_NET_PROTOCOL_ISCSI:
>          case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
>              ret = qemuBuildNetworkDriveURI(src);

Please don't add the functionality to this pre-blockdev helper. It will
be deleted soon anyways. Support for the nbdkit stuff should be only
allowed in blockdev mode.


> @@ -2497,13 +2505,17 @@ qemuBuildBlockStorageSourceAttachDataCommandline(virCommand *cmd,
>  {
>      char *tmp;
>  
> -    if (qemuBuildObjectCommandline(cmd, data->prmgrProps, qemuCaps) < 0 ||
> -        qemuBuildObjectCommandline(cmd, data->authsecretProps, qemuCaps) < 0 ||
> -        qemuBuildObjectCommandline(cmd, data->encryptsecretProps, qemuCaps) < 0 ||
> -        qemuBuildObjectCommandline(cmd, data->httpcookiesecretProps, qemuCaps) < 0 ||
> -        qemuBuildObjectCommandline(cmd, data->tlsKeySecretProps, qemuCaps) < 0 ||
> -        qemuBuildObjectCommandline(cmd, data->tlsProps, qemuCaps) < 0)
> -        return -1;
> +    /* disks that are backed by nbdkit do not send these secrets to qemu, but
> +     * rather directly to nbdkit */
> +    if (!data->useNbdkit) {

NACK to this hack, modify the code ...

> +        if (qemuBuildObjectCommandline(cmd, data->prmgrProps, qemuCaps) < 0 ||
> +            qemuBuildObjectCommandline(cmd, data->authsecretProps, qemuCaps) < 0 ||
> +            qemuBuildObjectCommandline(cmd, data->encryptsecretProps, qemuCaps) < 0 ||
> +            qemuBuildObjectCommandline(cmd, data->httpcookiesecretProps, qemuCaps) < 0 ||
> +            qemuBuildObjectCommandline(cmd, data->tlsKeySecretProps, qemuCaps) < 0 ||
> +            qemuBuildObjectCommandline(cmd, data->tlsProps, qemuCaps) < 0)

So that these are not instantiated rather than hacking them out.

> +            return -1;

[...]

> diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
> index 3b75cdeb95..22aeab0c49 100644
> --- a/src/qemu/qemu_conf.c
> +++ b/src/qemu/qemu_conf.c
> @@ -1571,3 +1571,22 @@ qemuGetMemoryBackingPath(virQEMUDriver *driver,
>      *memPath = g_strdup_printf("%s/%s", domainPath, alias);
>      return 0;
>  }
> +
> +/*
> + * qemuGetNbdkitCaps:
> + * @driver: the qemu driver
> + *
> + * Gets the capabilities for Nbdkit for the specified driver. These can be used
> + * to determine whether a particular disk source can be served by nbdkit or
> + * not.
> + *
> + * Returns: a reference to qemuNbdkitCaps or NULL
> + */
> +qemuNbdkitCaps*
> +qemuGetNbdkitCaps(virQEMUDriver *driver)
> +{
> +    if (!QEMU_IS_NBDKIT_CAPS(driver->nbdkitCaps))
> +        return NULL;
> +
> +    return g_object_ref(driver->nbdkitCaps);
> +}

Since the _virQEMUDriverConfig struct is not confined to qemu_conf.c you
don't need to have this helper here.

> diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
> index c40c452f58..f14f9fc4c1 100644
> --- a/src/qemu/qemu_conf.h
> +++ b/src/qemu/qemu_conf.h
> @@ -36,6 +36,7 @@
>  #include "virthreadpool.h"
>  #include "locking/lock_manager.h"
>  #include "qemu_capabilities.h"
> +#include "qemu_nbdkit.h"
>  #include "virclosecallbacks.h"
>  #include "virhostdev.h"
>  #include "virfile.h"
> @@ -306,6 +307,8 @@ struct _virQEMUDriver {
>  
>      /* Immutable pointer, self-locking APIs */
>      virHashAtomic *migrationErrors;
> +
> +    qemuNbdkitCaps *nbdkitCaps;
>  };
>  
>  virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
> @@ -359,3 +362,5 @@ int qemuGetMemoryBackingPath(virQEMUDriver *driver,
>                               const virDomainDef *def,
>                               const char *alias,
>                               char **memPath);
> +
> +qemuNbdkitCaps * qemuGetNbdkitCaps(virQEMUDriver *driver);
> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index 9769e3bb92..faeb779b3f 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c

[...]

>  
> @@ -1293,10 +1295,7 @@ qemuDomainSecretStorageSourcePrepare(qemuDomainObjPrivate *priv,
>      if (!src->auth && !hasEnc && src->ncookies == 0)
>          return 0;
>  
> -    if (!(src->privateData = qemuDomainStorageSourcePrivateNew()))
> -        return -1;
> -
> -    srcPriv = QEMU_DOMAIN_STORAGE_SOURCE_PRIVATE(src);
> +    srcPriv = qemuDomainStorageSourcePrivateFetch(src);

Extract this into a separate patch.

>  
>      if (src->auth) {
>          virSecretUsageType usageType = VIR_SECRET_USAGE_TYPE_ISCSI;
> @@ -1321,7 +1320,9 @@ qemuDomainSecretStorageSourcePrepare(qemuDomainObjPrivate *priv,
>                return -1;
>      }
>  
> -    if (src->ncookies &&
> +    /* when using nbdkit for http(s) sources, we don't need to pass cookies as
> +     * qemu secrets */
> +    if (!srcPriv->nbdkitProcess && src->ncookies &&
>          virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV) &&
>          !(srcPriv->httpcookie = qemuDomainSecretStorageSourcePrepareCookies(priv,
>                                                                              src,

Skip setup of all the objects which are not needed when nbdkit is un use
including auth, so that the code formatting the commandline doesn't need
to be special-cased.

[...]

> @@ -4899,7 +4951,8 @@ qemuDomainValidateStorageSource(virStorageSource *src,
>              return -1;
>          }
>  
> -        if (!src->detected && !blockdev) {
> +        if (!src->detected && !blockdev &&
> +            !qemuNbdkitCapsGet(nbdkitCaps, QEMU_NBDKIT_CAPS_PLUGIN_CURL)) {
>              virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
>                             _("http cookies are not supported by this QEMU binary"));
>              return -1;

As noted before, don't add support fro nbdkit for pre-blockdev setups;
it's pointless.

> @@ -4920,7 +4973,8 @@ qemuDomainValidateStorageSource(virStorageSource *src,
>              return -1;
>          }
>  
> -        if (!src->detected && !blockdev) {
> +        if (!src->detected && !blockdev &&
> +            !qemuNbdkitCapsGet(nbdkitCaps, QEMU_NBDKIT_CAPS_FILTER_READAHEAD)) {
>              virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
>                             _("readahead setting is not supported with this QEMU binary"));
>              return -1;

ditto.

> @@ -4938,7 +4992,8 @@ qemuDomainValidateStorageSource(virStorageSource *src,
>              return -1;
>          }
>  
> -        if (!src->detected && !blockdev) {
> +        if (!src->detected && !blockdev &&
> +            !qemuNbdkitCapsGet(nbdkitCaps, QEMU_NBDKIT_CAPS_PLUGIN_CURL)) {
>              virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
>                             _("timeout setting is not supported with this QEMU binary"));
>              return -1;

This too.


[...]

> @@ -10112,6 +10169,29 @@ qemuDomainPrepareStorageSourceNFS(virStorageSource *src)
>  }
>  
>  
> +/* qemuPrepareStorageSourceNbdkit:
> + * @src: source for a disk
> + *
> + * If src is an network source that is managed by nbdkit, prepare data so that
> + * nbdkit can be launched before the domain is started
> + */
> +static void
> +qemuDomainPrepareStorageSourceNbdkit(virDomainDiskDef *disk,
> +                                     virQEMUDriverConfig *cfg,
> +                                     qemuDomainObjPrivate *priv)
> +{
> +    g_autoptr(qemuNbdkitCaps) nbdkit = qemuGetNbdkitCaps(priv->driver);
> +    if (!nbdkit)
> +        return;
> +
> +    if (virStorageSourceGetActualType(disk->src) != VIR_STORAGE_TYPE_NETWORK)
> +        return;
> +
> +    qemuNbdkitInitStorageSource(nbdkit, disk->src, priv->libDir,
> +                                disk->info.alias, cfg->user, cfg->group);

You pass disk alias here, but a backing chain of a disk can have
multiple network sourced images theoretically so for a single disk you
can have multiple nbdkit instances. You should name them based on the
node name.

So this function should return whether nbdkit is going to be used for a
given storage source so that the upper logic skips all the setup of the
helper objects, so that we don't need to have hacks in the code avoiding
the use of the perpared stuff.

> +}
> +
> +
>  /* qemuProcessPrepareStorageSourceTLS:
>   * @source: source for a disk
>   * @cfg: driver configuration
> @@ -10828,7 +10908,9 @@ qemuDomainPrepareDiskSourceLegacy(virDomainDiskDef *disk,
>                                    qemuDomainObjPrivate *priv,
>                                    virQEMUDriverConfig *cfg)
>  {
> -    if (qemuDomainValidateStorageSource(disk->src, priv->qemuCaps, true) < 0)
> +    g_autoptr(qemuNbdkitCaps) nbdkitcaps = qemuGetNbdkitCaps(priv->driver);
> +    if (qemuDomainValidateStorageSource(disk->src, priv->qemuCaps,
> +                                        nbdkitcaps, true) < 0)
>          return -1;

Don't bother with legacy stuff.

>  
>      qemuDomainPrepareStorageSourceConfig(disk->src, cfg, priv->qemuCaps);
> @@ -10846,6 +10928,8 @@ qemuDomainPrepareDiskSourceLegacy(virDomainDiskDef *disk,
>                                            priv) < 0)
>          return -1;
>  
> +    qemuDomainPrepareStorageSourceNbdkit(disk, cfg, priv);
> +
>      return 0;
>  }
>  
> @@ -10857,6 +10941,7 @@ qemuDomainPrepareStorageSourceBlockdevNodename(virDomainDiskDef *disk,
>                                                 qemuDomainObjPrivate *priv,
>                                                 virQEMUDriverConfig *cfg)
>  {
> +    g_autoptr(qemuNbdkitCaps) nbdkitcaps = qemuGetNbdkitCaps(priv->driver);
>      src->nodestorage = g_strdup_printf("%s-storage", nodenameprefix);
>      src->nodeformat = g_strdup_printf("%s-format", nodenameprefix);
>  
> @@ -10866,7 +10951,8 @@ qemuDomainPrepareStorageSourceBlockdevNodename(virDomainDiskDef *disk,
>      if (src->encryption && src->encryption->engine == VIR_STORAGE_ENCRYPTION_ENGINE_DEFAULT)
>          src->encryption->engine = VIR_STORAGE_ENCRYPTION_ENGINE_QEMU;
>  
> -    if (qemuDomainValidateStorageSource(src, priv->qemuCaps, false) < 0)
> +    if (qemuDomainValidateStorageSource(src, priv->qemuCaps,
> +                                        nbdkitcaps, false) < 0)
>          return -1;
>  
>      qemuDomainPrepareStorageSourceConfig(src, cfg, priv->qemuCaps);
> @@ -10887,6 +10973,8 @@ qemuDomainPrepareStorageSourceBlockdevNodename(virDomainDiskDef *disk,
>      if (qemuDomainPrepareStorageSourceNFS(src) < 0)
>          return -1;
>  
> +    qemuDomainPrepareStorageSourceNbdkit(disk, cfg, priv);

So this should be done right after all image frontend/format setup is
done (encryption etc) and all image protocol setup should be skipped, so
that there's a central point of setup.

You'll need to refactor qemuDomainSecretStorageSourcePrepare to have two
distinct steps for encryption which needs to be done also for
nbdkit-backed disks and then authentication which needs to be skipped.

TLS/cookies/and NFS setup can then be skipped too.

> +
>      return 0;
>  }

[...]


> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 3b5c3db67c..d1a972eb37 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -831,6 +831,9 @@ qemuStateInitialize(bool privileged,
>                                                             defsecmodel)))
>          goto error;
>  
> +    /* find whether nbdkit is available and query its capabilities */
> +    qemu_driver->nbdkitCaps = qemuNbdkitCapsQuery();
> +
>      /* If hugetlbfs is present, then we need to create a sub-directory within
>       * it, since we can't assume the root mount point has permissions that
>       * will let our spawned QEMU instances use it. */
> @@ -14471,7 +14474,6 @@ qemuDomainBlockPivot(virQEMUDriver *driver,
>              if (reuse && shallow &&
>                  virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV_SNAPSHOT_ALLOW_WRITE_ONLY) &&
>                  virStorageSourceHasBacking(disk->mirror)) {
> -

Spurious change;

>                  if (!(chainattachdata = qemuBuildStorageSourceChainAttachPrepareBlockdev(disk->mirror->backingStore)))
>                      return -1;
>  
> diff --git a/src/qemu/qemu_extdevice.c b/src/qemu/qemu_extdevice.c
> index b8e3c1000a..0f9361b294 100644
> --- a/src/qemu/qemu_extdevice.c
> +++ b/src/qemu/qemu_extdevice.c
> @@ -218,6 +218,14 @@ qemuExtDevicesStart(virQEMUDriver *driver,
>              return -1;
>      }
>  
> +    for (i = 0; i < def->ndisks; i++) {
> +        virDomainDiskDef *disk = def->disks[i];
> +        qemuDomainStorageSourcePrivate *priv = QEMU_DOMAIN_STORAGE_SOURCE_PRIVATE(disk->src);
> +        if (priv && priv->nbdkitProcess &&

This is insufficient. If you have multiple images in an explicitly
defined chain, any of them can have an 'nbdkit' process but this code
checks only the topmost image.

You'll need to walk the full bakcing chain and spawn all of the helpers.

> +            qemuNbdkitProcessStart(priv->nbdkitProcess, vm, driver) < 0)
> +            return -1;
> +    }
> +
>      return 0;
>  }
>  
> @@ -262,6 +270,14 @@ qemuExtDevicesStop(virQEMUDriver *driver,
>              fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_VIRTIOFS)
>              qemuVirtioFSStop(driver, vm, fs);
>      }
> +
> +    for (i = 0; i < def->ndisks; i++) {
> +        virDomainDiskDef *disk = def->disks[i];
> +        qemuDomainStorageSourcePrivate *priv = QEMU_DOMAIN_STORAGE_SOURCE_PRIVATE(disk->src);
> +
> +        if (priv && priv->nbdkitProcess)
> +            qemuNbdkitProcessStop(priv->nbdkitProcess);

Same as above.

> +    }
>  }
>  
>  
> @@ -324,6 +340,15 @@ qemuExtDevicesSetupCgroup(virQEMUDriver *driver,
>              return -1;
>      }
>  
> +    for (i = 0; i < def->ndisks; i++) {
> +        virDomainDiskDef *disk = def->disks[i];
> +        qemuDomainStorageSourcePrivate *priv = QEMU_DOMAIN_STORAGE_SOURCE_PRIVATE(disk);
> +
> +        if (priv && priv->nbdkitProcess &&
> +            qemuNbdkitProcessSetupCgroup(priv->nbdkitProcess, cgroup) < 0)
> +            return -1;

And once more

> +    }
> +
>      for (i = 0; i < def->nfss; i++) {
>          virDomainFSDef *fs = def->fss[i];
>  
> diff --git a/src/qemu/qemu_nbdkit.c b/src/qemu/qemu_nbdkit.c
> new file mode 100644
> index 0000000000..dd8759689c
> --- /dev/null
> +++ b/src/qemu/qemu_nbdkit.c
> @@ -0,0 +1,629 @@
> +/*
> + * qemu_nbdkit.c: helpers for using nbdkit
> + *
> + * Copyright (C) 2021 Red Hat, Inc.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library.  If not, see
> + * <http://www.gnu.org/licenses/>.
> + *
> + */
> +
> +#include <config.h>
> +#include <glib.h>
> +
> +#include "vircommand.h"
> +#include "virerror.h"
> +#include "virlog.h"
> +#include "virpidfile.h"
> +#include "virsecureerase.h"
> +#include "qemu_block.h"
> +#include "qemu_conf.h"
> +#include "qemu_domain.h"
> +#include "qemu_driver.h"
> +#include "qemu_extdevice.h"
> +#include "qemu_nbdkit.h"
> +#include "qemu_security.h"
> +
> +#include <fcntl.h>
> +
> +#define VIR_FROM_THIS VIR_FROM_NBDKIT
> +
> +VIR_LOG_INIT("qemu.nbdkit");
> +
> +struct _qemuNbdkitCaps {
> +    GObject parent;
> +
> +    char *path;
> +    char *version;
> +
> +    virBitmap *flags;
> +};
> +G_DEFINE_TYPE(qemuNbdkitCaps, qemu_nbdkit_caps, G_TYPE_OBJECT);
> +
> +
> +static void
> +qemuNbdkitCheckCommandCap(qemuNbdkitCaps *nbdkit,
> +                          virCommand *cmd,
> +                          qemuNbdkitCapsFlags cap)
> +{
> +    if (virCommandRun(cmd, NULL) != 0)
> +        return;
> +
> +    VIR_DEBUG("Setting nbdkit capability %i", cap);
> +    ignore_value(virBitmapSetBit(nbdkit->flags, cap));
> +}
> +
> +
> +static void
> +qemuNbdkitQueryFilter(qemuNbdkitCaps *nbdkit,
> +                      const char *filter,
> +                      qemuNbdkitCapsFlags cap)
> +{
> +    g_autoptr(virCommand) cmd = virCommandNewArgList(nbdkit->path,
> +                                                     "--version",
> +                                                     NULL);
> +
> +    virCommandAddArgPair(cmd, "--filter", filter);
> +
> +    /* use null plugin to check for filter */
> +    virCommandAddArg(cmd, "null");
> +
> +    qemuNbdkitCheckCommandCap(nbdkit, cmd, cap);
> +}
> +
> +
> +static void
> +qemuNbdkitQueryPlugin(qemuNbdkitCaps *nbdkit,
> +                      const char *plugin,
> +                      qemuNbdkitCapsFlags cap)
> +{
> +    g_autoptr(virCommand) cmd = virCommandNewArgList(nbdkit->path,
> +                                                     plugin,
> +                                                     "--version",
> +                                                     NULL);
> +
> +    qemuNbdkitCheckCommandCap(nbdkit, cmd, cap);
> +}
> +
> +
> +static void
> +qemuNbdkitQueryPlugins(qemuNbdkitCaps *nbdkit)
> +{
> +    qemuNbdkitQueryPlugin(nbdkit, "curl", QEMU_NBDKIT_CAPS_PLUGIN_CURL);
> +    qemuNbdkitQueryPlugin(nbdkit, "ssh", QEMU_NBDKIT_CAPS_PLUGIN_SSH);

Do we really have to run nbdkit multiple times to check these?

> +}
> +
> +
> +static void
> +qemuNbdkitQueryFilters(qemuNbdkitCaps *nbdkit)
> +{
> +    qemuNbdkitQueryFilter(nbdkit, "readahead",
> +                          QEMU_NBDKIT_CAPS_FILTER_READAHEAD);
> +}
> +
> +
> +static int
> +qemuNbdkitQueryVersion(qemuNbdkitCaps *nbdkit)
> +{
> +    g_autoptr(virCommand) cmd = virCommandNewArgList(nbdkit->path,
> +                                                     "--version",
> +                                                     NULL);
> +
> +    virCommandSetOutputBuffer(cmd, &nbdkit->version);
> +
> +    if (virCommandRun(cmd, NULL) != 0)
> +        return -1;
> +
> +    VIR_DEBUG("Got nbdkit version %s", nbdkit->version);
> +    return 0;
> +}
> +
> +
> +static void qemuNbdkitCapsFinalize(GObject *object)
> +{
> +    qemuNbdkitCaps *nbdkit = QEMU_NBDKIT_CAPS(object);
> +
> +    g_clear_pointer(&nbdkit->path, g_free);
> +    g_clear_pointer(&nbdkit->version, g_free);
> +    g_clear_pointer(&nbdkit->flags, virBitmapFree);
> +
> +    G_OBJECT_CLASS(qemu_nbdkit_caps_parent_class)->finalize(object);
> +}
> +
> +
> +void qemu_nbdkit_caps_init(qemuNbdkitCaps *caps)
> +{
> +    caps->flags = virBitmapNew(QEMU_NBDKIT_CAPS_LAST);
> +    caps->version = NULL;
> +}
> +
> +
> +static void
> +qemu_nbdkit_caps_class_init(qemuNbdkitCapsClass *klass)
> +{
> +    GObjectClass *obj = G_OBJECT_CLASS(klass);
> +
> +    obj->finalize = qemuNbdkitCapsFinalize;
> +}
> +
> +
> +qemuNbdkitCaps *
> +qemuNbdkitCapsNew(const char *path)
> +{
> +    qemuNbdkitCaps *caps = g_object_new(QEMU_TYPE_NBDKIT_CAPS, NULL);
> +    caps->path = g_strdup(path);
> +
> +    return caps;
> +}
> +
> +
> +qemuNbdkitCaps *
> +qemuNbdkitCapsQuery(void)
> +{
> +    qemuNbdkitCaps *caps = NULL;
> +    g_autofree char *path = virFindFileInPath("nbdkit");
> +
> +    if (!path)
> +        return NULL;
> +
> +    // make sure it's executable
> +    if (!virFileIsExecutable(path)) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR, _("nbdkit '%s' is not executable"),
> +                       path);
> +        return NULL;
> +    }
> +
> +    VIR_DEBUG("found nbdkit executable '%s'", path);
> +    caps = qemuNbdkitCapsNew(path);
> +
> +    qemuNbdkitQueryPlugins(caps);
> +    qemuNbdkitQueryFilters(caps);
> +    qemuNbdkitQueryVersion(caps);
> +
> +    return caps;
> +}
> +
> +
> +bool
> +qemuNbdkitCapsGet(qemuNbdkitCaps *nbdkitCaps, qemuNbdkitCapsFlags flag)
> +{
> +    return virBitmapIsBitSet(nbdkitCaps->flags, flag);
> +}
> +
> +
> +void
> +qemuNbdkitCapsSet(qemuNbdkitCaps *nbdkitCaps, qemuNbdkitCapsFlags flag)
> +{
> +    ignore_value(virBitmapSetBit(nbdkitCaps->flags, flag));
> +}
> +
> +
> +static int childProcess(int fd, uint8_t *output, size_t outputlen)
> +{
> +    if (safewrite(fd, output, outputlen) < 0) {
> +        virReportSystemError(errno, "%s",
> +                             _("failed to write to socket for nbdkit"));
> +        return -errno;
> +    }
> +    return 0;
> +}
> +
> +
> +/* Forks a process to write a password to a socket to the nbdkit process.
> + * Returns a file descriptor that can be passed to ndkit */
> +static int qemuNbdkitForkHandler(uint8_t *output, size_t outputlen)
> +{
> +    enum {
> +        PARENT_SOCKET = 0,
> +        CHILD_SOCKET = 1
> +    };
> +    int pair[2] = { -1, -1 };
> +    pid_t pid;
> +
> +    if (socketpair(AF_UNIX, SOCK_STREAM, 0, pair) < 0) {
> +        virReportSystemError(errno, "%s",
> +                             _("failed to create socket for nbdkit"));
> +        return -errno;
> +    }
> +
> +    pid = virFork();
> +
> +    if (pid < 0)
> +        return -errno;
> +
> +    /* child process */
> +    if (pid == 0) {
> +        int ret = childProcess(pair[CHILD_SOCKET], output, outputlen);
> +        _exit(ret);
> +    }
> +
> +    /* parent process */
> +    VIR_FORCE_CLOSE(pair[CHILD_SOCKET]);
> +
> +    return pair[PARENT_SOCKET];
> +}
> +
> +
> +/* Forks a process to write cookies to a socket to the nbdkit process.
> + * Returns a file descriptor that can be passed to ndkit */
> +static int qemuNbdkitProcessForkCookieHandler(qemuNbdkitProcess *proc)
> +{
> +    g_autofree char *cookies = qemuBlockStorageSourceGetCookieString(proc->source);
> +
> +    if (!cookies)
> +        return 0;
> +
> +    if ((proc->cookiefd = qemuNbdkitForkHandler((uint8_t*)cookies, strlen(cookies))) < 0)
> +        return -1;
> +
> +    return 0;
> +}
> +
> +
> +/* Forks a process to write a password to a socket to the nbdkit process.
> + * Returns a file descriptor that can be passed to ndkit */
> +static int qemuNbdkitProcessForkPasswordHandler(qemuNbdkitProcess *proc)
> +{
> +    g_autofree uint8_t *password = NULL;
> +    size_t passwordlen = 0;
> +    g_autoptr(virConnect) conn = virGetConnectSecret();
> +
> +    if (!proc->source->auth->username)
> +        return 0;
> +
> +    if (virSecretGetSecretString(conn,
> +                                 &proc->source->auth->seclookupdef,
> +                                 /* FIXME: for some reason auth->authType is always NONE... */
> +                                 VIR_SECRET_USAGE_TYPE_ISCSI,
> +                                 &password,
> +                                 &passwordlen) < 0)
> +    {
> +        /* FIXME: message */
> +        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                       _("failed to get auth secret for storage"));
> +        return -1;
> +    }
> +
> +    if ((proc->authfd = qemuNbdkitForkHandler(password, passwordlen)) < 0)
> +        return -1;
> +
> +    return 0;
> +}
> +
> +
> +qemuNbdkitProcess *
> +qemuNbdkitProcessLoad(virStorageSource *source,
> +                      const char *pidfile,
> +                      const char *socketfile)
> +{
> +    int rc;
> +    qemuNbdkitProcess *nbdkit = g_new0(qemuNbdkitProcess, 1);
> +
> +    nbdkit->pidfile = g_strdup(pidfile);
> +    nbdkit->socketfile = g_strdup(socketfile);
> +    nbdkit->source = virObjectRef(source);
> +    nbdkit->user = -1;
> +    nbdkit->group = -1;
> +
> +    if ((rc = virPidFileReadPath(nbdkit->pidfile, &nbdkit->pid)) < 0)
> +        VIR_WARN("Failed to read pidfile %s", nbdkit->pidfile);
> +
> +    return nbdkit;
> +}
> +
> +

[...]

> +
> +
> +static void
> +qemuNbdkitProcessBuildCommandSSH(qemuNbdkitProcess *proc,
> +                          virCommand *cmd)
> +{
> +    char *user = NULL;
> +    virStorageNetHostDef *host = &proc->source->hosts[0];
> +
> +    /* nbdkit plugin name */
> +    virCommandAddArg(cmd, "ssh");
> +
> +    virCommandAddArgPair(cmd, "host", g_strdup(host->name));

virCommandAddArgPair calls:

 virCommandAddArgFormat(cmd, "%s=%s", name, val);

Which in turn calls g_strdup_vprintf with '%s=%s' as argument and 'name,
val' as vargs.

Thus the copied memory from the second argument is leaked as the
function doesn't take ownership.

> +    virCommandAddArgPair(cmd, "port", g_strdup_printf("%u",
> +                                                      host->port));
> +    virCommandAddArgPair(cmd, "path", g_strdup(proc->source->path));

same here.

> +
> +    if (proc->source->auth) {
> +        user = g_strdup(proc->source->auth->username);
> +    } else if  (proc->source->ssh_user) {
> +        user = g_strdup(proc->source->ssh_user);
> +    }
> +
> +    if (user) {
> +        virCommandAddArgPair(cmd, "user", user);

'user' variable is leaked.

> +    }
> +
> +    if (proc->source->ssh_host_key_check_disabled) {
> +        virCommandAddArgPair(cmd, "verify-remote-host",
> +                             g_strdup("false"));

The copied string will be leaked.

> +    }
> +}
> +
> +

[...]

> +int
> +qemuNbdkitProcessSetupCgroup(qemuNbdkitProcess *proc,
> +                             virCgroup *cgroup)
> +{
> +    return virCgroupAddProcess(cgroup, proc->pid);
> +}
> +
> +
> +/* FIXME: selinux permissions errors */
> +int
> +qemuNbdkitProcessStart(qemuNbdkitProcess *proc,
> +                       virDomainObj *vm,
> +                       virQEMUDriver *driver)
> +{
> +    g_autoptr(virCommand) cmd = NULL;
> +    int rc;
> +    int exitstatus = 0;
> +    int cmdret = 0;
> +    unsigned int loops = 0;
> +    int errfd = -1;
> +
> +    if (qemuNbdkitProcessForkCookieHandler(proc) < 0)
> +        return -1;
> +
> +    if (qemuNbdkitProcessForkPasswordHandler(proc) < 0)
> +        return -1;
> +
> +    if (!(cmd = qemuNbdkitProcessBuildCommand(proc)))
> +        return -1;
> +
> +    virCommandSetErrorFD(cmd, &errfd);
> +
> +    if (qemuExtDeviceLogCommand(driver, vm, cmd, "nbdkit") < 0)
> +        goto error;
> +
> +    if (qemuSecurityCommandRun(driver, vm, cmd, proc->user, proc->group, &exitstatus, &cmdret) < 0)
> +        goto error;
> +
> +    if (cmdret < 0 || exitstatus != 0) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR,
> +                       _("Could not start 'nbdkit'. exitstatus: %d"), exitstatus);
> +        goto error;
> +    }
> +
> +    /* Wait for the pid file to appear. The file doesn't appear until nbdkit is
> +     * ready to accept connections to the socket */
> +    while (!virFileExists(proc->pidfile)) {

I don't think we want to go this way. Isn't there a possibility to pass
the socket or an FD so that we are notified?


> +        VIR_DEBUG("waiting for nbdkit pidfile %s: %u", proc->pidfile, loops);
> +        /* wait for 100ms before checking again, but don't do it for ever */
> +        if (errno == ENOENT && loops < 10) {
> +            g_usleep(100 * 1000);

Because here you'll get a 100ms penalty per process of startup delay if
it doesn't come up quickly enough.

> +            loops++;
> +        } else {
> +            char errbuf[1024] = { 0 };
> +            if (errfd && saferead(errfd, errbuf, sizeof(errbuf) - 1) > 0) {
> +                    virReportError(VIR_ERR_OPERATION_FAILED,
> +                                   _("nbdkit failed to start: %s"), errbuf);
> +            } else {
> +                virReportSystemError(errno, "%s",
> +                                     _("Gave up waiting for nbdkit to start"));
> +            }
> +            goto error;
> +        }
> +    }
> +
> +    if ((rc = virPidFileReadPath(proc->pidfile, &proc->pid)) < 0) {
> +        virReportSystemError(-rc,
> +                             _("Failed to read pidfile %s"),
> +                             proc->pidfile);
> +        goto error;
> +    }
> +
> +    return 0;

[...]


> diff --git a/src/qemu/qemu_validate.c b/src/qemu/qemu_validate.c
> index 39210ba65b..b6cc75b651 100644
> --- a/src/qemu/qemu_validate.c
> +++ b/src/qemu/qemu_validate.c
> @@ -602,7 +602,8 @@ qemuValidateDomainDefPM(const virDomainDef *def,
>  
>  static int
>  qemuValidateDomainDefNvram(const virDomainDef *def,
> -                           virQEMUCaps *qemuCaps)
> +                           virQEMUCaps *qemuCaps,
> +                           qemuNbdkitCaps *nbdkitCaps)

If you intend to instantiate nbdkit even for the nvram backing you'll
need to make sure to start the process too.

>  {
>      virStorageSource *src = def->os.loader->nvram;
>

[...]

> diff --git a/tests/qemustatusxml2xmldata/modern-in.xml b/tests/qemustatusxml2xmldata/modern-in.xml
> index 7759034f7a..49b005cfc7 100644
> --- a/tests/qemustatusxml2xmldata/modern-in.xml
> +++ b/tests/qemustatusxml2xmldata/modern-in.xml
> @@ -337,7 +337,6 @@
>                <objects>
>                  <secret type='auth' alias='test-auth-alias'/>
>                  <secret type='encryption' alias='test-encryption-alias'/>
> -                <secret type='httpcookie' alias='http-cookie-alias'/>

Spurious change?

>                  <secret type='tlskey' alias='tls-certificate-key-alias'/>
>                  <TLSx509 alias='transport-alias'/>
>                </objects>


More information about the libvir-list mailing list