[Cluster-devel] [PATCH 3/4] gfs2: ordered buffer writes are not sync

Dave Chinner dchinner at redhat.com
Fri Feb 5 05:45:26 UTC 2010


Currently gfs2 ordered buffer writes use WRITE_SYNC_PLUG as the IO
type being dispatched. They aren't sync writes; we issue all the IO
pending, then wait for it all. IOWs, this is async IO with a bulk
wait on the end.

We should use normal WRITE tagging for this, and before we start
waiting make sure that all the Io is issued by unplugging the
device. The use of normal WRITEs for these buffers should
significantly reduce the overhead of processing in the cfq elevator
and enable the disk subsystem to get much closer to disk bandwidth
for large sequential writes.

Signed-off-by: Dave Chinner <dchinner at redhat.com>
---
 fs/gfs2/aops.c |    3 +++
 fs/gfs2/log.c  |   11 +++++++----
 fs/gfs2/lops.c |   18 ++++++++++--------
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 7b8da94..b75784c 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -20,6 +20,7 @@
 #include <linux/swap.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/backing-dev.h>
+#include <linux/blkdev.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -34,6 +35,7 @@
 #include "super.h"
 #include "util.h"
 #include "glops.h"
+#include "trace_gfs2.h"
 
 
 static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
@@ -52,6 +54,7 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
 		if (gfs2_is_jdata(ip))
 			set_buffer_uptodate(bh);
 		gfs2_trans_add_bh(ip->i_gl, bh, 0);
+		trace_gfs2_submit_bh(bh, WRITE, __func__);
 	}
 }
 
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index bd26dff..a9797be 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -18,6 +18,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/bio.h>
+#include <linux/blkdev.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -121,8 +122,8 @@ __acquires(&sdp->sd_log_lock)
 			lock_buffer(bh);
 			if (test_clear_buffer_dirty(bh)) {
 				bh->b_end_io = end_buffer_write_sync;
-				trace_gfs2_submit_bh(bh, WRITE_SYNC_PLUG, __func__);
-				submit_bh(WRITE_SYNC_PLUG, bh);
+				trace_gfs2_submit_bh(bh, WRITE, __func__);
+				submit_bh(WRITE, bh);
 			} else {
 				unlock_buffer(bh);
 				brelse(bh);
@@ -675,8 +676,8 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
 		lock_buffer(bh);
 		if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
 			bh->b_end_io = end_buffer_write_sync;
-			trace_gfs2_submit_bh(bh, WRITE_SYNC_PLUG, __func__);
-			submit_bh(WRITE_SYNC_PLUG, bh);
+			trace_gfs2_submit_bh(bh, WRITE, __func__);
+			submit_bh(WRITE, bh);
 		} else {
 			unlock_buffer(bh);
 			brelse(bh);
@@ -692,6 +693,8 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
 	struct gfs2_bufdata *bd;
 	struct buffer_head *bh;
 
+	blk_run_backing_dev(blk_get_backing_dev_info(sdp->sd_vfs->s_bdev), NULL);
+
 	gfs2_log_lock(sdp);
 	while (!list_empty(&sdp->sd_log_le_ordered)) {
 		bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_le.le_list);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 7278cf0..0fe2f3c 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -15,6 +15,7 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/bio.h>
 #include <linux/fs.h>
+#include <linux/blkdev.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -198,8 +199,8 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
 		}
 
 		gfs2_log_unlock(sdp);
-		trace_gfs2_submit_bh(bh, WRITE_SYNC_PLUG, __func__);
-		submit_bh(WRITE_SYNC_PLUG, bh);
+		trace_gfs2_submit_bh(bh, WRITE, __func__);
+		submit_bh(WRITE, bh);
 		gfs2_log_lock(sdp);
 
 		n = 0;
@@ -209,8 +210,8 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
 			gfs2_log_unlock(sdp);
 			lock_buffer(bd2->bd_bh);
 			bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
-			trace_gfs2_submit_bh(bh, WRITE_SYNC_PLUG, __func__);
-			submit_bh(WRITE_SYNC_PLUG, bh);
+			trace_gfs2_submit_bh(bh, WRITE, __func__);
+			submit_bh(WRITE, bh);
 			gfs2_log_lock(sdp);
 			if (++n >= num)
 				break;
@@ -220,6 +221,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
 		total -= num;
 	}
 	gfs2_log_unlock(sdp);
+	blk_run_backing_dev(blk_get_backing_dev_info(sdp->sd_vfs->s_bdev), NULL);
 }
 
 static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
@@ -573,8 +575,8 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
 	ptr = bh_log_ptr(bh);
 	
 	get_bh(bh);
-	trace_gfs2_submit_bh(bh, WRITE_SYNC_PLUG, __func__);
-	submit_bh(WRITE_SYNC_PLUG, bh);
+	trace_gfs2_submit_bh(bh, WRITE, __func__);
+	submit_bh(WRITE, bh);
 	gfs2_log_lock(sdp);
 	while(!list_empty(list)) {
 		bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
@@ -600,8 +602,8 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
 		} else {
 			bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
 		}
-		trace_gfs2_submit_bh(bh1, WRITE_SYNC_PLUG, __func__);
-		submit_bh(WRITE_SYNC_PLUG, bh1);
+		trace_gfs2_submit_bh(bh1, WRITE, __func__);
+		submit_bh(WRITE, bh1);
 		gfs2_log_lock(sdp);
 		ptr += 2;
 	}
-- 
1.6.5




More information about the Cluster-devel mailing list