[Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write

Vivek Goyal vgoyal at redhat.com
Fri Jul 26 15:49:48 UTC 2019


Do not always map a dax mapping read-write. There are use cases like
executing a file where we want to map file read-only. virtio-fs dir on
host might be backed by overlayfs. We don't want to open file read-write
on overlayfs otherwise it will unnecessariliy be copied-up nullyifying
the advantages of sharing page cache between vms for unmodified files.

Hence, create a read-only mapping if user did not ask for writable mapping.
Later upgrade it to read-write mapping when users requests it.

Signed-off-by: Vivek Goyal <vgoyal at redhat.com>
---
 fs/fuse/file.c   | 121 +++++++++++++++++++++++++++++++++++++----------
 fs/fuse/fuse_i.h |   3 ++
 2 files changed, 98 insertions(+), 26 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a2c19e4a28b5..5277de7028a6 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -265,7 +265,8 @@ static void dmap_add_to_free_pool(struct fuse_conn *fc,
 
 /* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */
 static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
-				  struct fuse_dax_mapping *dmap)
+				  struct fuse_dax_mapping *dmap, bool writable,
+				  bool upgrade)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_inode *fi = get_fuse_inode(inode);
@@ -283,7 +284,8 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
 	inarg.moffset = dmap->window_offset;
 	inarg.len = FUSE_DAX_MEM_RANGE_SZ;
 	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
-	inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
+	if (writable)
+		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
 	args.in.h.opcode = FUSE_SETUPMAPPING;
 	args.in.h.nodeid = fi->nodeid;
 	args.in.numargs = 1;
@@ -296,26 +298,30 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
 		return err;
 	}
 
-	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
+	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx writable=%d"
+		 " err=%zd\n", offset, writable, err);
 
-	/*
-	 * We don't take a refernce on inode. inode is valid right now and
-	 * when inode is going away, cleanup logic should first cleanup
-	 * dmap entries.
-	 *
-	 * TODO: Do we need to ensure that we are holding inode lock
-	 * as well.
-	 */
-	dmap->inode = inode;
-	dmap->start = offset;
-	dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
-	/* Protected by fi->i_dmap_sem */
-	fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
-	fi->nr_dmaps++;
-	spin_lock(&fc->lock);
-	list_add_tail(&dmap->busy_list, &fc->busy_ranges);
-	fc->nr_busy_ranges++;
-	spin_unlock(&fc->lock);
+	dmap->writable = writable;
+	if (!upgrade) {
+		/*
+		 * We don't take a refernce on inode. inode is valid right now
+		 * and when inode is going away, cleanup logic should first
+		 * cleanup dmap entries.
+		 *
+		 * TODO: Do we need to ensure that we are holding inode lock
+		 * as well.
+		 */
+		dmap->inode = inode;
+		dmap->start = offset;
+		dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
+		/* Protected by fi->i_dmap_sem */
+		fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
+		fi->nr_dmaps++;
+		spin_lock(&fc->lock);
+		list_add_tail(&dmap->busy_list, &fc->busy_ranges);
+		fc->nr_busy_ranges++;
+		spin_unlock(&fc->lock);
+	}
 	return 0;
 }
 
@@ -1890,6 +1896,7 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
 	int ret;
+	bool writable = flags & IOMAP_WRITE;
 
 	/* Can't do reclaim in fault path yet due to lock ordering.
 	 * Read path takes shared inode lock and that's not sufficient
@@ -1930,10 +1937,11 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
 
 	/* Setup one mapping */
 	ret = fuse_setup_one_mapping(inode,
-		ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
+				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
+				     alloc_dmap, writable, false);
 	if (ret < 0) {
 		printk("fuse_setup_one_mapping() failed. err=%d"
-			" pos=0x%llx\n", ret, pos);
+			" pos=0x%llx, writable=%d\n", ret, pos, writable);
 		dmap_add_to_free_pool(fc, alloc_dmap);
 		up_write(&fi->i_dmap_sem);
 		return ret;
@@ -1943,6 +1951,52 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
 	return 0;
 }
 
+static int iomap_begin_upgrade_mapping(struct inode *inode, loff_t pos,
+					 loff_t length, unsigned flags,
+					 struct iomap *iomap)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_dax_mapping *dmap;
+	int ret;
+
+	/*
+	 * Take exclusive lock so that only one caller can try to setup
+	 * mapping and others wait.
+	 */
+	down_write(&fi->i_dmap_sem);
+	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
+
+	/* We are holding either inode lock or i_mmap_sem, and that should
+	 * ensure that dmap can't reclaimed or truncated and it should still
+	 * be there in tree despite the fact we dropped and re-acquired the
+	 * lock.
+	 */
+	ret = -EIO;
+	if (WARN_ON(!dmap))
+		goto out_err;
+
+	/* Maybe another thread already upgraded mapping while we were not
+	 * holding lock.
+	 */
+	if (dmap->writable)
+		goto out_fill_iomap;
+
+	ret = fuse_setup_one_mapping(inode,
+				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
+				     dmap, true, true);
+	if (ret < 0) {
+		printk("fuse_setup_one_mapping() failed. err=%d pos=0x%llx\n",
+		       ret, pos);
+		goto out_err;
+	}
+
+out_fill_iomap:
+	fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
+out_err:
+	up_write(&fi->i_dmap_sem);
+	return ret;
+}
+
 /* This is just for DAX and the mapping is ephemeral, do not use it for other
  * purposes since there is no block device with a permanent mapping.
  */
@@ -1952,6 +2006,7 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 	struct fuse_inode *fi = get_fuse_inode(inode);
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_dax_mapping *dmap;
+	bool writable = flags & IOMAP_WRITE;
 
 	/* We don't support FIEMAP */
 	BUG_ON(flags & IOMAP_REPORT);
@@ -1982,9 +2037,23 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
 
 	if (dmap) {
-		fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
-		up_read(&fi->i_dmap_sem);
-		return 0;
+		if (writable && !dmap->writable) {
+			/* Upgrade read-only mapping to read-write. This will
+			 * require exclusive i_dmap_sem lock as we don't want
+			 * two threads to be trying to this simultaneously
+			 * for same dmap. So drop shared lock and acquire
+			 * exclusive lock.
+			 */
+			up_read(&fi->i_dmap_sem);
+			pr_debug("%s: Upgrading mapping at offset 0x%llx"
+				 " length 0x%llx\n", __func__, pos, length);
+			return iomap_begin_upgrade_mapping(inode, pos, length,
+							   flags, iomap);
+		} else {
+			fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
+			up_read(&fi->i_dmap_sem);
+			return 0;
+		}
 	} else {
 		up_read(&fi->i_dmap_sem);
 		pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n",
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e899a06e29d7..34ca8b90a1e1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -134,6 +134,9 @@ struct fuse_dax_mapping {
 
 	/** Length of mapping, in bytes */
 	loff_t length;
+
+	/* Is this mapping read-only or read-write */
+	bool writable;
 };
 
 /** FUSE inode */
-- 
2.17.2




More information about the Virtio-fs mailing list