[Virtio-fs] [PATCH] virtiofsd: Don't assume header layout

Vivek Goyal vgoyal at redhat.com
Fri Mar 5 16:06:09 UTC 2021


On Thu, Mar 04, 2021 at 05:45:36PM +0000, Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" <dgilbert at redhat.com>
> 
> virtiofsd incorrectly assumed a fixed set of header layout in the virt
> queue; assuming that the fuse and write headers were conveniently
> separated from the data;  the spec doesn't allow us to take that
> convenience, so fix it up to deal with it the hard way.
> 
> Signed-off-by: Dr. David Alan Gilbert <dgilbert at redhat.com>
> ---
>  tools/virtiofsd/fuse_virtio.c | 84 ++++++++++++++++++++++++++---------
>  1 file changed, 63 insertions(+), 21 deletions(-)
> 
> diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
> index 523ee64fb7..55e2e057ec 100644
> --- a/tools/virtiofsd/fuse_virtio.c
> +++ b/tools/virtiofsd/fuse_virtio.c
> @@ -129,18 +129,55 @@ static void fv_panic(VuDev *dev, const char *err)
>   * Copy from an iovec into a fuse_buf (memory only)
>   * Caller must ensure there is space
>   */
> -static void copy_from_iov(struct fuse_buf *buf, size_t out_num,
> -                          const struct iovec *out_sg)
> +static size_t copy_from_iov(struct fuse_buf *buf, size_t out_num,
> +                            const struct iovec *out_sg,
> +                            size_t max)
>  {
>      void *dest = buf->mem;
> +    size_t copied = 0;
>  
> -    while (out_num) {
> +    while (out_num && max) {
>          size_t onelen = out_sg->iov_len;
> +        onelen = MIN(onelen, max);
>          memcpy(dest, out_sg->iov_base, onelen);
>          dest += onelen;
> +        copied += onelen;
>          out_sg++;
>          out_num--;
> +        max -= onelen;
>      }
> +
> +    return copied;
> +}
> +
> +/*
> + * Skip 'skip' bytes in the iov; 'sg_1stindex' is set as
> + * the index for the 1st iovec to read data from, and
> + * 'sg_1stskip' is the number of bytes to skip in that entry.
> + *
> + * Returns True if there are at least 'skip' bytes in the iovec
> + *
> + */
> +static bool skip_iov(const struct iovec *sg, size_t sg_size,
> +                     size_t skip,
> +                     size_t *sg_1stindex, size_t *sg_1stskip)
> +{
> +    size_t vec;
> +
> +    for (vec = 0; vec < sg_size; vec++) {
> +        if (sg[vec].iov_len > skip) {
> +            *sg_1stskip = skip;
> +            *sg_1stindex = vec;
> +
> +            return true;
> +        }
> +
> +        skip -= sg[vec].iov_len;
> +    }
> +
> +    *sg_1stindex = vec;
> +    *sg_1stskip = 0;
> +    return skip == 0;
>  }
>  
>  /*
> @@ -505,14 +542,14 @@ static void fv_queue_worker(gpointer data, gpointer user_data)
>                   elem->index);
>          assert(0); /* TODO */
>      }
> -    /* Copy just the first element and look at it */
> -    copy_from_iov(&fbuf, 1, out_sg);
> +    /* Copy just the fuse_in_header and look at it */
> +    copy_from_iov(&fbuf, out_num, out_sg,
> +                  sizeof(struct fuse_in_header));
>  
>      pbufv = NULL; /* Compiler thinks an unitialised path */
> -    if (out_num > 2 &&
> -        out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
> -        ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
> -        out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
> +    if (((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
> +        out_len >= (sizeof(struct fuse_in_header) +
> +                    sizeof(struct fuse_write_in))) {
>          /*
>           * For a write we don't actually need to copy the
>           * data, we can just do it straight out of guest memory
> @@ -521,15 +558,13 @@ static void fv_queue_worker(gpointer data, gpointer user_data)
>           */
>          fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__);
>  
> -        /* copy the fuse_write_in header afte rthe fuse_in_header */

Hi Dave,

I was scratching my head that why FUSE_WRITE path is being handled
differently than rest of the requests. Then I happened to look
at description of fuse_session_process_buf_int() and that expects
a certain layout for FUSE_WRITE requests. Thats why...

> -        fbuf.mem += out_sg->iov_len;
> -        copy_from_iov(&fbuf, 1, out_sg + 1);
> -        fbuf.mem -= out_sg->iov_len;
> -        fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
> +        fbuf.size = copy_from_iov(&fbuf, out_num, out_sg,
> +                                  sizeof(struct fuse_in_header) +
> +                                  sizeof(struct fuse_write_in));

You are not trusting guest that it can change iovecs. So is it a good
idea to copy fuse_in_header again. I mean we copied once, looked at
it and now are copying again. It is possible that guest changed it
and our assumption of this being a WRITE request might be broken now.


>  
>          /* Allocate the bufv, with space for the rest of the iov */
>          pbufv = malloc(sizeof(struct fuse_bufvec) +
> -                       sizeof(struct fuse_buf) * (out_num - 2));
> +                       sizeof(struct fuse_buf) * out_num);
>          if (!pbufv) {
>              fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
>                      __func__);
> @@ -540,24 +575,31 @@ static void fv_queue_worker(gpointer data, gpointer user_data)
>          pbufv->count = 1;
>          pbufv->buf[0] = fbuf;
>  
> -        size_t iovindex, pbufvindex;
> -        iovindex = 2; /* 2 headers, separate iovs */
> +        size_t iovindex, pbufvindex, skip;

How about renaming "skip" to say "iov_byte_skip" or something like that.
Too many variables are in there. It just makes it clear to skip few
bytes in iov. Just a suggestion. Feel free to ignore if you are not
convinced.

>          pbufvindex = 1; /* 2 headers, 1 fusebuf */
>  
> +        skip_iov(out_sg, out_num,
> +                 sizeof(struct fuse_in_header) +
> +                 sizeof(struct fuse_write_in),
> +                 &iovindex, &skip);
> +

How about looking at return code of skip_iov(). Just in case guest
has changed it by now.

May be a safer practice will be to always first copy out_sg in a
temporary buffer and then we should parse it and organize in
pbufv/bufv for fuse consumption. But I guess that adds one extra
copy and it might not be worth it. So may be solve it when it
creates a real problem.

Thanks
Vivek

>          for (; iovindex < out_num; iovindex++, pbufvindex++) {
>              pbufv->count++;
>              pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
>              pbufv->buf[pbufvindex].flags = 0;
>              pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
>              pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
> +
> +            if (skip) {
> +                pbufv->buf[pbufvindex].mem += skip;
> +                pbufv->buf[pbufvindex].size -= skip;
> +                skip = 0;
> +            }
>          }
>      } else {
>          /* Normal (non fast write) path */
>  
> -        /* Copy the rest of the buffer */
> -        fbuf.mem += out_sg->iov_len;
> -        copy_from_iov(&fbuf, out_num - 1, out_sg + 1);
> -        fbuf.mem -= out_sg->iov_len;
> +        copy_from_iov(&fbuf, out_num, out_sg, se->bufsize);
>          fbuf.size = out_len;
>  
>          /* TODO! Endianness of header */
> -- 
> 2.29.2
> 




More information about the Virtio-fs mailing list