[Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write
piaojun
piaojun at huawei.com
Sun Aug 4 01:56:55 UTC 2019
Hi Vivek,
Shall we downgrade dax mapping from rw to read-only, if current user
open with O_RDONLY, and last user opened with O_RDWR? This will minimize
the access authority.
I wonder if my concern is necessary. And if it will cause frequent dax
mapping?
Thanks,
Jun
On 2019/7/26 23:49, Vivek Goyal wrote:
> Do not always map a dax mapping read-write. There are use cases like
> executing a file where we want to map file read-only. virtio-fs dir on
> host might be backed by overlayfs. We don't want to open file read-write
> on overlayfs otherwise it will unnecessariliy be copied-up nullyifying
> the advantages of sharing page cache between vms for unmodified files.
>
> Hence, create a read-only mapping if user did not ask for writable mapping.
> Later upgrade it to read-write mapping when users requests it.
>
> Signed-off-by: Vivek Goyal <vgoyal at redhat.com>
> ---
> fs/fuse/file.c | 121 +++++++++++++++++++++++++++++++++++++----------
> fs/fuse/fuse_i.h | 3 ++
> 2 files changed, 98 insertions(+), 26 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index a2c19e4a28b5..5277de7028a6 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -265,7 +265,8 @@ static void dmap_add_to_free_pool(struct fuse_conn *fc,
>
> /* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */
> static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> - struct fuse_dax_mapping *dmap)
> + struct fuse_dax_mapping *dmap, bool writable,
> + bool upgrade)
> {
> struct fuse_conn *fc = get_fuse_conn(inode);
> struct fuse_inode *fi = get_fuse_inode(inode);
> @@ -283,7 +284,8 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> inarg.moffset = dmap->window_offset;
> inarg.len = FUSE_DAX_MEM_RANGE_SZ;
> inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
> - inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
> + if (writable)
> + inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
> args.in.h.opcode = FUSE_SETUPMAPPING;
> args.in.h.nodeid = fi->nodeid;
> args.in.numargs = 1;
> @@ -296,26 +298,30 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> return err;
> }
>
> - pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
> + pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx writable=%d"
> + " err=%zd\n", offset, writable, err);
>
> - /*
> - * We don't take a refernce on inode. inode is valid right now and
> - * when inode is going away, cleanup logic should first cleanup
> - * dmap entries.
> - *
> - * TODO: Do we need to ensure that we are holding inode lock
> - * as well.
> - */
> - dmap->inode = inode;
> - dmap->start = offset;
> - dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> - /* Protected by fi->i_dmap_sem */
> - fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> - fi->nr_dmaps++;
> - spin_lock(&fc->lock);
> - list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> - fc->nr_busy_ranges++;
> - spin_unlock(&fc->lock);
> + dmap->writable = writable;
> + if (!upgrade) {
> + /*
> + * We don't take a refernce on inode. inode is valid right now
> + * and when inode is going away, cleanup logic should first
> + * cleanup dmap entries.
> + *
> + * TODO: Do we need to ensure that we are holding inode lock
> + * as well.
> + */
> + dmap->inode = inode;
> + dmap->start = offset;
> + dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> + /* Protected by fi->i_dmap_sem */
> + fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> + fi->nr_dmaps++;
> + spin_lock(&fc->lock);
> + list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> + fc->nr_busy_ranges++;
> + spin_unlock(&fc->lock);
> + }
> return 0;
> }
>
> @@ -1890,6 +1896,7 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> struct fuse_conn *fc = get_fuse_conn(inode);
> struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
> int ret;
> + bool writable = flags & IOMAP_WRITE;
>
> /* Can't do reclaim in fault path yet due to lock ordering.
> * Read path takes shared inode lock and that's not sufficient
> @@ -1930,10 +1937,11 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
>
> /* Setup one mapping */
> ret = fuse_setup_one_mapping(inode,
> - ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
> + ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
> + alloc_dmap, writable, false);
> if (ret < 0) {
> printk("fuse_setup_one_mapping() failed. err=%d"
> - " pos=0x%llx\n", ret, pos);
> + " pos=0x%llx, writable=%d\n", ret, pos, writable);
> dmap_add_to_free_pool(fc, alloc_dmap);
> up_write(&fi->i_dmap_sem);
> return ret;
> @@ -1943,6 +1951,52 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> return 0;
> }
>
> +static int iomap_begin_upgrade_mapping(struct inode *inode, loff_t pos,
> + loff_t length, unsigned flags,
> + struct iomap *iomap)
> +{
> + struct fuse_inode *fi = get_fuse_inode(inode);
> + struct fuse_dax_mapping *dmap;
> + int ret;
> +
> + /*
> + * Take exclusive lock so that only one caller can try to setup
> + * mapping and others wait.
> + */
> + down_write(&fi->i_dmap_sem);
> + dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
> +
> + /* We are holding either inode lock or i_mmap_sem, and that should
> + * ensure that dmap can't reclaimed or truncated and it should still
> + * be there in tree despite the fact we dropped and re-acquired the
> + * lock.
> + */
> + ret = -EIO;
> + if (WARN_ON(!dmap))
> + goto out_err;
> +
> + /* Maybe another thread already upgraded mapping while we were not
> + * holding lock.
> + */
> + if (dmap->writable)
> + goto out_fill_iomap;
> +
> + ret = fuse_setup_one_mapping(inode,
> + ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
> + dmap, true, true);
> + if (ret < 0) {
> + printk("fuse_setup_one_mapping() failed. err=%d pos=0x%llx\n",
> + ret, pos);
> + goto out_err;
> + }
> +
> +out_fill_iomap:
> + fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
> +out_err:
> + up_write(&fi->i_dmap_sem);
> + return ret;
> +}
> +
> /* This is just for DAX and the mapping is ephemeral, do not use it for other
> * purposes since there is no block device with a permanent mapping.
> */
> @@ -1952,6 +2006,7 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
> struct fuse_inode *fi = get_fuse_inode(inode);
> struct fuse_conn *fc = get_fuse_conn(inode);
> struct fuse_dax_mapping *dmap;
> + bool writable = flags & IOMAP_WRITE;
>
> /* We don't support FIEMAP */
> BUG_ON(flags & IOMAP_REPORT);
> @@ -1982,9 +2037,23 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
> dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
>
> if (dmap) {
> - fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
> - up_read(&fi->i_dmap_sem);
> - return 0;
> + if (writable && !dmap->writable) {
> + /* Upgrade read-only mapping to read-write. This will
> + * require exclusive i_dmap_sem lock as we don't want
> + * two threads to be trying to this simultaneously
> + * for same dmap. So drop shared lock and acquire
> + * exclusive lock.
> + */
> + up_read(&fi->i_dmap_sem);
> + pr_debug("%s: Upgrading mapping at offset 0x%llx"
> + " length 0x%llx\n", __func__, pos, length);
> + return iomap_begin_upgrade_mapping(inode, pos, length,
> + flags, iomap);
> + } else {
> + fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
> + up_read(&fi->i_dmap_sem);
> + return 0;
> + }
> } else {
> up_read(&fi->i_dmap_sem);
> pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n",
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index e899a06e29d7..34ca8b90a1e1 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -134,6 +134,9 @@ struct fuse_dax_mapping {
>
> /** Length of mapping, in bytes */
> loff_t length;
> +
> + /* Is this mapping read-only or read-write */
> + bool writable;
> };
>
> /** FUSE inode */
>
More information about the Virtio-fs
mailing list