[Virtio-fs] [RFC 2/2] vhost-user-fs: Implement stateful migration
Anton Kuchin
antonkuchin at yandex-team.ru
Fri Mar 17 17:19:46 UTC 2023
On 13/03/2023 19:48, Hanna Czenczek wrote:
> A virtio-fs device's VM state consists of:
> - the virtio device (vring) state (VMSTATE_VIRTIO_DEVICE)
> - the back-end's (virtiofsd's) internal state
>
> We get/set the latter via the new vhost-user operations FS_SET_STATE_FD,
> FS_GET_STATE, and FS_SET_STATE.
>
> Signed-off-by: Hanna Czenczek <hreitz at redhat.com>
> ---
> hw/virtio/vhost-user-fs.c | 171 +++++++++++++++++++++++++++++++++++++-
> 1 file changed, 170 insertions(+), 1 deletion(-)
>
> diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
> index 83fc20e49e..df1fb02acc 100644
> --- a/hw/virtio/vhost-user-fs.c
> +++ b/hw/virtio/vhost-user-fs.c
> @@ -20,8 +20,10 @@
> #include "hw/virtio/virtio-bus.h"
> #include "hw/virtio/virtio-access.h"
> #include "qemu/error-report.h"
> +#include "qemu/memfd.h"
> #include "hw/virtio/vhost.h"
> #include "hw/virtio/vhost-user-fs.h"
> +#include "migration/qemu-file-types.h"
> #include "monitor/monitor.h"
> #include "sysemu/sysemu.h"
>
> @@ -298,9 +300,176 @@ static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev)
> return &fs->vhost_dev;
> }
>
> +/**
> + * Fetch the internal state from the back-end (virtiofsd) and save it
> + * to `f`.
> + */
> +static int vuf_save_state(QEMUFile *f, void *pv, size_t size,
> + const VMStateField *field, JSONWriter *vmdesc)
> +{
> + VirtIODevice *vdev = pv;
> + VHostUserFS *fs = VHOST_USER_FS(vdev);
> + int memfd = -1;
> + /* Size of the shared memory through which to transfer the state */
> + const size_t chunk_size = 4 * 1024 * 1024;
> + size_t state_offset;
> + ssize_t remaining;
> + void *shm_buf;
> + Error *local_err = NULL;
> + int ret, ret2;
> +
> + /* Set up shared memory through which to receive the state from virtiofsd */
> + shm_buf = qemu_memfd_alloc("vhost-fs-state", chunk_size,
> + F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW,
> + &memfd, &local_err);
> + if (!shm_buf) {
> + error_report_err(local_err);
> + ret = -ENOMEM;
> + goto early_fail;
> + }
> +
> + /* Share the SHM area with virtiofsd */
> + ret = vhost_fs_set_state_fd(&fs->vhost_dev, memfd, chunk_size);
> + if (ret < 0) {
> + goto early_fail;
Don't we need some log message here too?
> + }
> +
> + /* Receive the virtiofsd state in chunks, and write them to `f` */
> + state_offset = 0;
> + do {
> + size_t this_chunk_size;
> +
> + remaining = vhost_fs_get_state(&fs->vhost_dev, state_offset,
> + chunk_size);
> + if (remaining < 0) {
> + ret = remaining;
> + goto fail;
> + }
> +
> + /* Prefix the whole state by its total length */
> + if (state_offset == 0) {
> + qemu_put_be64(f, remaining);
> + }
> +
> + this_chunk_size = MIN(remaining, chunk_size);
> + qemu_put_buffer(f, shm_buf, this_chunk_size);
> + state_offset += this_chunk_size;
> + } while (remaining >= chunk_size);
> +
> + ret = 0;
> +fail:
> + /* Have virtiofsd close the shared memory */
> + ret2 = vhost_fs_set_state_fd(&fs->vhost_dev, -1, 0);
> + if (ret2 < 0) {
> + error_report("Failed to remove state FD from the vhost-user-fs back "
> + "end: %s", strerror(-ret));
> + if (ret == 0) {
> + ret = ret2;
> + }
> + }
> +
> +early_fail:
> + if (shm_buf) {
> + qemu_memfd_free(shm_buf, chunk_size, memfd);
> + }
> +
> + return ret;
> +}
> +
> +/**
> + * Load the back-end's (virtiofsd's) internal state from `f` and send
> + * it over to that back-end.
> + */
> +static int vuf_load_state(QEMUFile *f, void *pv, size_t size,
> + const VMStateField *field)
> +{
> + VirtIODevice *vdev = pv;
> + VHostUserFS *fs = VHOST_USER_FS(vdev);
> + int memfd = -1;
> + /* Size of the shared memory through which to transfer the state */
> + const size_t chunk_size = 4 * 1024 * 1024;
> + size_t state_offset;
> + uint64_t remaining;
> + void *shm_buf;
> + Error *local_err = NULL;
> + int ret, ret2;
> +
> + /* The state is prefixed by its total length, read that first */
> + remaining = qemu_get_be64(f);
> +
> + /* Set up shared memory through which to send the state to virtiofsd */
> + shm_buf = qemu_memfd_alloc("vhost-fs-state", chunk_size,
> + F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW,
> + &memfd, &local_err);
> + if (!shm_buf) {
> + error_report_err(local_err);
> + ret = -ENOMEM;
> + goto early_fail;
> + }
> +
> + /* Share the SHM area with virtiofsd */
> + ret = vhost_fs_set_state_fd(&fs->vhost_dev, memfd, chunk_size);
> + if (ret < 0) {
> + goto early_fail;
> + }
> +
> + /*
> + * Read the virtiofsd state in chunks from `f`, and send them over
> + * to virtiofsd
> + */
> + state_offset = 0;
> + do {
> + size_t this_chunk_size = MIN(remaining, chunk_size);
> +
> + if (qemu_get_buffer(f, shm_buf, this_chunk_size) < this_chunk_size) {
> + ret = -EINVAL;
> + goto fail;
> + }
> +
> + ret = vhost_fs_set_state(&fs->vhost_dev, state_offset, this_chunk_size);
> + if (ret < 0) {
> + goto fail;
> + }
> +
> + state_offset += this_chunk_size;
> + remaining -= this_chunk_size;
> + } while (remaining > 0);
> +
> + ret = 0;
> +fail:
> + ret2 = vhost_fs_set_state_fd(&fs->vhost_dev, -1, 0);
> + if (ret2 < 0) {
> + error_report("Failed to remove state FD from the vhost-user-fs back "
> + "end -- perhaps it failed to deserialize/apply the state: "
> + "%s", strerror(-ret2));
> + if (ret == 0) {
> + ret = ret2;
> + }
> + }
> +
> +early_fail:
> + if (shm_buf) {
> + qemu_memfd_free(shm_buf, chunk_size, memfd);
> + }
> +
> + return ret;
> +}
> +
> static const VMStateDescription vuf_vmstate = {
> .name = "vhost-user-fs",
> - .unmigratable = 1,
> + .version_id = 1,
> + .fields = (VMStateField[]) {
> + VMSTATE_VIRTIO_DEVICE,
> + {
> + .name = "back-end",
> + .info = &(const VMStateInfo) {
> + .name = "virtio-fs back-end state",
> + .get = vuf_load_state,
> + .put = vuf_save_state,
> + },
> + },
I've been working on stateless migration patch [1] and there was
discussed that we
need to keep some kind of blocker by default if orchestrators rely on
unmigratable
field in virtio-fs vmstate to block the migration.
For this purpose I've implemented flag that selects "none" or "external"
and is checked
in pre_save, so it could be extended with "internal" option.
We didn't come to conclusion if we also need to check incoming
migration, the discussion
has stopped for a while but I'm going back to it now.
I would appreciate if you have time to take a look at the discussion and
consider the idea
proposed there to store internal state as a subsection of vmstate to
make it as an option
but not mandatory.
[1]
https://patchew.org/QEMU/20230217170038.1273710-1-antonkuchin@yandex-team.ru/
> + VMSTATE_END_OF_LIST()
> + },
> };
>
> static Property vuf_properties[] = {
More information about the Virtio-fs
mailing list