[Cluster-devel] [PATCH] gfs2: Clean up gfs2_file_write_iter and fix O_SYNC write handling

Andreas Gruenbacher agruenba at redhat.com
Thu Feb 6 15:37:20 UTC 2020


Clean up gfs2_file_write_iter.  When falling back to a buffered write in the
O_DIRECT case, use generic_write_sync + IOCB_DSYNC to sync the buffered write
to disk instead of hand-rolling the sync.  Currently, the error checking in the
buffered write fallback case is incomplete.

Based on a proposed fix by Christoph Hellwig <hch at lst.de>.

Signed-off-by: Andreas Gruenbacher <agruenba at redhat.com>
---
 fs/gfs2/file.c | 51 +++++++++++++++++++++-----------------------------
 1 file changed, 21 insertions(+), 30 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 21d032c4b077..d68530b12706 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -847,7 +847,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
 	struct gfs2_inode *ip = GFS2_I(inode);
-	ssize_t written = 0, ret;
+	ssize_t ret;
 
 	ret = gfs2_rsqa_alloc(ip);
 	if (ret)
@@ -879,55 +879,46 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
 	if (iocb->ki_flags & IOCB_DIRECT) {
 		struct address_space *mapping = file->f_mapping;
-		loff_t pos, endbyte;
-		ssize_t buffered;
+		ssize_t buffered, ret2;
 
-		written = gfs2_file_direct_write(iocb, from);
-		if (written < 0 || !iov_iter_count(from))
+		ret = gfs2_file_direct_write(iocb, from);
+		if (ret < 0 || !iov_iter_count(from))
 			goto out_unlock;
 
 		current->backing_dev_info = inode_to_bdi(inode);
-		ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+		buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
 		current->backing_dev_info = NULL;
-		if (unlikely(ret < 0))
+		if (unlikely(buffered <= 0))
 			goto out_unlock;
-		buffered = ret;
 
 		/*
 		 * We need to ensure that the page cache pages are written to
 		 * disk and invalidated to preserve the expected O_DIRECT
-		 * semantics.
+		 * semantics.  If the writeback or invalidate fails, only report
+		 * the direct I/O range as we don't know if the buffered pages
+		 * made it to disk.
 		 */
-		pos = iocb->ki_pos;
-		endbyte = pos + buffered - 1;
-		ret = filemap_write_and_wait_range(mapping, pos, endbyte);
-		if (!ret) {
-			iocb->ki_pos += buffered;
-			written += buffered;
-			invalidate_mapping_pages(mapping,
-						 pos >> PAGE_SHIFT,
-						 endbyte >> PAGE_SHIFT);
-		} else {
-			/*
-			 * We don't know how much we wrote, so just return
-			 * the number of bytes which were direct-written
-			 */
-		}
+		iocb->ki_pos += buffered;
+		iocb->ki_flags |= IOCB_DSYNC;
+		ret2 = generic_write_sync(iocb, buffered);
+		invalidate_mapping_pages(mapping,
+					 (iocb->ki_pos - buffered) >> PAGE_SHIFT,
+					 (iocb->ki_pos - 1) >> PAGE_SHIFT);
+		if (!ret || ret2 > 0)
+			ret += ret2;
 	} else {
 		current->backing_dev_info = inode_to_bdi(inode);
 		ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
 		current->backing_dev_info = NULL;
-		if (likely(ret > 0))
+		if (likely(ret > 0)) {
 			iocb->ki_pos += ret;
+			ret = generic_write_sync(iocb, ret);
+		}
 	}
 
 out_unlock:
 	inode_unlock(inode);
-	if (likely(ret > 0)) {
-		/* Handle various SYNC-type writes */
-		ret = generic_write_sync(iocb, ret);
-	}
-	return written ? written : ret;
+	return ret;
 }
 
 static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
-- 
2.24.1




More information about the Cluster-devel mailing list