[Virtio-fs] [RFC 2/2] vhost-user-fs: Implement stateful migration

Anton Kuchin antonkuchin at yandex-team.ru
Fri Mar 17 17:19:46 UTC 2023


On 13/03/2023 19:48, Hanna Czenczek wrote:
> A virtio-fs device's VM state consists of:
> - the virtio device (vring) state (VMSTATE_VIRTIO_DEVICE)
> - the back-end's (virtiofsd's) internal state
>
> We get/set the latter via the new vhost-user operations FS_SET_STATE_FD,
> FS_GET_STATE, and FS_SET_STATE.
>
> Signed-off-by: Hanna Czenczek <hreitz at redhat.com>
> ---
>   hw/virtio/vhost-user-fs.c | 171 +++++++++++++++++++++++++++++++++++++-
>   1 file changed, 170 insertions(+), 1 deletion(-)
>
> diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
> index 83fc20e49e..df1fb02acc 100644
> --- a/hw/virtio/vhost-user-fs.c
> +++ b/hw/virtio/vhost-user-fs.c
> @@ -20,8 +20,10 @@
>   #include "hw/virtio/virtio-bus.h"
>   #include "hw/virtio/virtio-access.h"
>   #include "qemu/error-report.h"
> +#include "qemu/memfd.h"
>   #include "hw/virtio/vhost.h"
>   #include "hw/virtio/vhost-user-fs.h"
> +#include "migration/qemu-file-types.h"
>   #include "monitor/monitor.h"
>   #include "sysemu/sysemu.h"
>   
> @@ -298,9 +300,176 @@ static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev)
>       return &fs->vhost_dev;
>   }
>   
> +/**
> + * Fetch the internal state from the back-end (virtiofsd) and save it
> + * to `f`.
> + */
> +static int vuf_save_state(QEMUFile *f, void *pv, size_t size,
> +                          const VMStateField *field, JSONWriter *vmdesc)
> +{
> +    VirtIODevice *vdev = pv;
> +    VHostUserFS *fs = VHOST_USER_FS(vdev);
> +    int memfd = -1;
> +    /* Size of the shared memory through which to transfer the state */
> +    const size_t chunk_size = 4 * 1024 * 1024;
> +    size_t state_offset;
> +    ssize_t remaining;
> +    void *shm_buf;
> +    Error *local_err = NULL;
> +    int ret, ret2;
> +
> +    /* Set up shared memory through which to receive the state from virtiofsd */
> +    shm_buf = qemu_memfd_alloc("vhost-fs-state", chunk_size,
> +                               F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW,
> +                               &memfd, &local_err);
> +    if (!shm_buf) {
> +        error_report_err(local_err);
> +        ret = -ENOMEM;
> +        goto early_fail;
> +    }
> +
> +    /* Share the SHM area with virtiofsd */
> +    ret = vhost_fs_set_state_fd(&fs->vhost_dev, memfd, chunk_size);
> +    if (ret < 0) {
> +        goto early_fail;

Don't we need some log message here too?

> +    }
> +
> +    /* Receive the virtiofsd state in chunks, and write them to `f` */
> +    state_offset = 0;
> +    do {
> +        size_t this_chunk_size;
> +
> +        remaining = vhost_fs_get_state(&fs->vhost_dev, state_offset,
> +                                       chunk_size);
> +        if (remaining < 0) {
> +            ret = remaining;
> +            goto fail;
> +        }
> +
> +        /* Prefix the whole state by its total length */
> +        if (state_offset == 0) {
> +            qemu_put_be64(f, remaining);
> +        }
> +
> +        this_chunk_size = MIN(remaining, chunk_size);
> +        qemu_put_buffer(f, shm_buf, this_chunk_size);
> +        state_offset += this_chunk_size;
> +    } while (remaining >= chunk_size);
> +
> +    ret = 0;
> +fail:
> +    /* Have virtiofsd close the shared memory */
> +    ret2 = vhost_fs_set_state_fd(&fs->vhost_dev, -1, 0);
> +    if (ret2 < 0) {
> +        error_report("Failed to remove state FD from the vhost-user-fs back "
> +                     "end: %s", strerror(-ret));
> +        if (ret == 0) {
> +            ret = ret2;
> +        }
> +    }
> +
> +early_fail:
> +    if (shm_buf) {
> +        qemu_memfd_free(shm_buf, chunk_size, memfd);
> +    }
> +
> +    return ret;
> +}
> +
> +/**
> + * Load the back-end's (virtiofsd's) internal state from `f` and send
> + * it over to that back-end.
> + */
> +static int vuf_load_state(QEMUFile *f, void *pv, size_t size,
> +                          const VMStateField *field)
> +{
> +    VirtIODevice *vdev = pv;
> +    VHostUserFS *fs = VHOST_USER_FS(vdev);
> +    int memfd = -1;
> +    /* Size of the shared memory through which to transfer the state */
> +    const size_t chunk_size = 4 * 1024 * 1024;
> +    size_t state_offset;
> +    uint64_t remaining;
> +    void *shm_buf;
> +    Error *local_err = NULL;
> +    int ret, ret2;
> +
> +    /* The state is prefixed by its total length, read that first */
> +    remaining = qemu_get_be64(f);
> +
> +    /* Set up shared memory through which to send the state to virtiofsd */
> +    shm_buf = qemu_memfd_alloc("vhost-fs-state", chunk_size,
> +                               F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW,
> +                               &memfd, &local_err);
> +    if (!shm_buf) {
> +        error_report_err(local_err);
> +        ret = -ENOMEM;
> +        goto early_fail;
> +    }
> +
> +    /* Share the SHM area with virtiofsd */
> +    ret = vhost_fs_set_state_fd(&fs->vhost_dev, memfd, chunk_size);
> +    if (ret < 0) {
> +        goto early_fail;
> +    }
> +
> +    /*
> +     * Read the virtiofsd state in chunks from `f`, and send them over
> +     * to virtiofsd
> +     */
> +    state_offset = 0;
> +    do {
> +        size_t this_chunk_size = MIN(remaining, chunk_size);
> +
> +        if (qemu_get_buffer(f, shm_buf, this_chunk_size) < this_chunk_size) {
> +            ret = -EINVAL;
> +            goto fail;
> +        }
> +
> +        ret = vhost_fs_set_state(&fs->vhost_dev, state_offset, this_chunk_size);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +
> +        state_offset += this_chunk_size;
> +        remaining -= this_chunk_size;
> +    } while (remaining > 0);
> +
> +    ret = 0;
> +fail:
> +    ret2 = vhost_fs_set_state_fd(&fs->vhost_dev, -1, 0);
> +    if (ret2 < 0) {
> +        error_report("Failed to remove state FD from the vhost-user-fs back "
> +                     "end -- perhaps it failed to deserialize/apply the state: "
> +                     "%s", strerror(-ret2));
> +        if (ret == 0) {
> +            ret = ret2;
> +        }
> +    }
> +
> +early_fail:
> +    if (shm_buf) {
> +        qemu_memfd_free(shm_buf, chunk_size, memfd);
> +    }
> +
> +    return ret;
> +}
> +
>   static const VMStateDescription vuf_vmstate = {
>       .name = "vhost-user-fs",
> -    .unmigratable = 1,
> +    .version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_VIRTIO_DEVICE,
> +        {
> +            .name = "back-end",
> +            .info = &(const VMStateInfo) {
> +                .name = "virtio-fs back-end state",
> +                .get = vuf_load_state,
> +                .put = vuf_save_state,
> +            },
> +        },

I've been working on stateless migration patch [1] and there was 
discussed that we
need to keep some kind of blocker by default if orchestrators rely on 
unmigratable
field in virtio-fs vmstate to block the migration.
For this purpose I've implemented flag that selects "none" or "external" 
and is checked
in pre_save, so it could be extended with "internal" option.
We didn't come to conclusion if we also need to check incoming 
migration, the discussion
has stopped for a while but I'm going back to it now.

I would appreciate if you have time to take a look at the discussion and 
consider the idea
proposed there to store internal state as a subsection of vmstate to 
make it as an option
but not mandatory.

[1] 
https://patchew.org/QEMU/20230217170038.1273710-1-antonkuchin@yandex-team.ru/

> +        VMSTATE_END_OF_LIST()
> +    },
>   };
>   
>   static Property vuf_properties[] = {



More information about the Virtio-fs mailing list