[Cluster-devel] [RFC RHEL7 GFS2 PATCH 3/3] gfs2: ordered write list addendum patch
Steven Whitehouse
swhiteho at redhat.com
Wed Dec 13 11:39:10 UTC 2017
On 12/12/17 17:22, Abhi Das wrote:
> Trim the list in gfs2_ordered_write() as we run through it
> to write out inodes.
> Also attempt to remove an inode from the list after it is
> fsync'ed.
> Finally, call gfs2_ordered_write() in case we were not able
> to shrink the list in gfs2_ordered_shrink() in the hopes that
> it will eventually cause the list to shrink.
> ---
> fs/gfs2/file.c | 3 +++
> fs/gfs2/incore.h | 1 +
> fs/gfs2/log.c | 13 +++++++++++--
> fs/gfs2/log.h | 4 +++-
> fs/gfs2/quota.c | 4 ++--
> 5 files changed, 20 insertions(+), 5 deletions(-)
A further thought... eventually we'll probably need to build the bios
ourselves in order that we can hook the I/O completion functions. That
would be the best way to get notification that the I/O has completed. So
something like a copy of the existing filemap/writepage functions but
allowing us custom completions.
It would also be worth looking at the existing pagecache/mm code too, so
see how the inode dirty flags get cleared after an fsync for example.
That might be a useful reference for how to check that the inode is
clean in a race free manner,
Steve.
>
> diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
> index 757ec66..75f9ac0 100644
> --- a/fs/gfs2/file.c
> +++ b/fs/gfs2/file.c
> @@ -697,6 +697,9 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
> if (mapping->nrpages)
> ret = filemap_fdatawait_range(mapping, start, end);
>
> + if (!ret && !ret1)
> + gfs2_ordered_del_inode(ip, ORD_WHENCE_FSYNC);
> +
> return ret ? ret : ret1;
> }
>
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 6fcad2a..93da360 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -661,6 +661,7 @@ struct ord_stats {
> unsigned long os_rm_trunc;
> unsigned long os_rm_evict;
> unsigned long os_rm_wait;
> + unsigned long os_rm_fsync;
> unsigned long os_rm_syncfs;
> unsigned long os_rm_write;
> unsigned long os_rm_setflags;
> diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
> index 6d618a1..4cfef47 100644
> --- a/fs/gfs2/log.c
> +++ b/fs/gfs2/log.c
> @@ -507,9 +507,13 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
> list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
> while (!list_empty(&sdp->sd_log_le_ordered)) {
> ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
> - list_move(&ip->i_ordered, &written);
> - if (ip->i_inode.i_mapping->nrpages == 0)
> + if (ip->i_inode.i_mapping->nrpages == 0) {
> + test_and_clear_bit(GIF_ORDERED, &ip->i_flags);
> + list_del(&ip->i_ordered);
> + ord_stats_adjust(sdp, -1, ORD_WHENCE_ORD_WRITE);
> continue;
> + }
> + list_move(&ip->i_ordered, &written);
> spin_unlock(&sdp->sd_ordered_lock);
> filemap_fdatawrite(ip->i_inode.i_mapping);
> spin_lock(&sdp->sd_ordered_lock);
> @@ -540,17 +544,22 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
> void gfs2_ordered_shrink(struct gfs2_sbd *sdp, int whence)
> {
> struct gfs2_inode *ip, *tmp;
> + bool removed;
>
> spin_lock(&sdp->sd_ordered_lock);
> list_for_each_entry_safe(ip, tmp, &sdp->sd_log_le_ordered, i_ordered) {
> if (ip->i_inode.i_mapping->nrpages != 0)
> continue;
> if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags)) {
> + removed = true;
> list_del(&ip->i_ordered);
> ord_stats_adjust(sdp, -1, whence);
> }
> }
> spin_unlock(&sdp->sd_ordered_lock);
> +
> + if (!removed)
> + gfs2_ordered_write(sdp);
> }
>
> void gfs2_ordered_del_inode(struct gfs2_inode *ip, int whence)
> diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
> index 80c8861..0bc3620 100644
> --- a/fs/gfs2/log.h
> +++ b/fs/gfs2/log.h
> @@ -55,7 +55,8 @@ enum {
> ORD_WHENCE_SYNCFS = 3,
> ORD_WHENCE_ORD_WRITE = 4,
> ORD_WHENCE_SETFLAGS = 5,
> - ORD_WHENCE_ADD = 6,
> + ORD_WHENCE_FSYNC = 6,
> + ORD_WHENCE_ADD = 7,
> };
>
> static inline void ord_stats_adjust(struct gfs2_sbd *sdp, int count, int whence)
> @@ -70,6 +71,7 @@ static inline void ord_stats_adjust(struct gfs2_sbd *sdp, int count, int whence)
> case ORD_WHENCE_SYNCFS: os->os_rm_syncfs += -(count); break;
> case ORD_WHENCE_ORD_WRITE: os->os_rm_write += -(count); break;
> case ORD_WHENCE_SETFLAGS: os->os_rm_setflags += -(count); break;
> + case ORD_WHENCE_FSYNC: os->os_rm_fsync += -(count); break;
>
> case ORD_WHENCE_ADD: os->os_add += count; break;
> default: break;
> diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
> index 66c5126..63e3afa 100644
> --- a/fs/gfs2/quota.c
> +++ b/fs/gfs2/quota.c
> @@ -1575,11 +1575,11 @@ int gfs2_quotad(void *data)
>
> printk(KERN_WARNING "Ord list Size:%lu +[add_inode:%lu] "
> "-[trunc:%lu evict:%lu wait:%lu syncfs:%lu ord_write:%lu"
> - " setflags:%lu]\n", sdp->sd_ord_stats.os_ct,
> + " setflags:%lu fsync:%lu]\n", sdp->sd_ord_stats.os_ct,
> sdp->sd_ord_stats.os_add, sdp->sd_ord_stats.os_rm_trunc,
> sdp->sd_ord_stats.os_rm_evict, sdp->sd_ord_stats.os_rm_wait,
> sdp->sd_ord_stats.os_rm_syncfs, sdp->sd_ord_stats.os_rm_write,
> - sdp->sd_ord_stats.os_rm_setflags);
> + sdp->sd_ord_stats.os_rm_setflags, sdp->sd_ord_stats.os_rm_fsync);
> }
>
> return 0;
More information about the Cluster-devel
mailing list