[Cluster-devel] [RFC RHEL7 GFS2 PATCH 3/3] gfs2: ordered write list addendum patch

Steven Whitehouse swhiteho at redhat.com
Wed Dec 13 11:39:10 UTC 2017



On 12/12/17 17:22, Abhi Das wrote:
> Trim the list in gfs2_ordered_write() as we run through it
> to write out inodes.
> Also attempt to remove an inode from the list after it is
> fsync'ed.
> Finally, call gfs2_ordered_write() in case we were not able
> to shrink the list in gfs2_ordered_shrink() in the hopes that
> it will eventually cause the list to shrink.
> ---
>   fs/gfs2/file.c   |  3 +++
>   fs/gfs2/incore.h |  1 +
>   fs/gfs2/log.c    | 13 +++++++++++--
>   fs/gfs2/log.h    |  4 +++-
>   fs/gfs2/quota.c  |  4 ++--
>   5 files changed, 20 insertions(+), 5 deletions(-)
A further thought... eventually we'll probably need to build the bios 
ourselves in order that we can hook the I/O completion functions. That 
would be the best way to get notification that the I/O has completed. So 
something like a copy of the existing filemap/writepage functions but 
allowing us custom completions.

It would also be worth looking at the existing pagecache/mm code too, so 
see how the inode dirty flags get cleared after an fsync for example. 
That might be a useful reference for how to check that the inode is 
clean in a race free manner,

Steve.

>
> diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
> index 757ec66..75f9ac0 100644
> --- a/fs/gfs2/file.c
> +++ b/fs/gfs2/file.c
> @@ -697,6 +697,9 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
>   	if (mapping->nrpages)
>   		ret = filemap_fdatawait_range(mapping, start, end);
>   
> +	if (!ret && !ret1)
> +		gfs2_ordered_del_inode(ip, ORD_WHENCE_FSYNC);
> +
>   	return ret ? ret : ret1;
>   }
>   
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 6fcad2a..93da360 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -661,6 +661,7 @@ struct ord_stats {
>   	unsigned long os_rm_trunc;
>   	unsigned long os_rm_evict;
>   	unsigned long os_rm_wait;
> +	unsigned long os_rm_fsync;
>   	unsigned long os_rm_syncfs;
>   	unsigned long os_rm_write;
>   	unsigned long os_rm_setflags;
> diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
> index 6d618a1..4cfef47 100644
> --- a/fs/gfs2/log.c
> +++ b/fs/gfs2/log.c
> @@ -507,9 +507,13 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
>   	list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
>   	while (!list_empty(&sdp->sd_log_le_ordered)) {
>   		ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
> -		list_move(&ip->i_ordered, &written);
> -		if (ip->i_inode.i_mapping->nrpages == 0)
> +		if (ip->i_inode.i_mapping->nrpages == 0) {
> +			test_and_clear_bit(GIF_ORDERED, &ip->i_flags);
> +			list_del(&ip->i_ordered);
> +			ord_stats_adjust(sdp, -1, ORD_WHENCE_ORD_WRITE);
>   			continue;
> +		}
> +		list_move(&ip->i_ordered, &written);
>   		spin_unlock(&sdp->sd_ordered_lock);
>   		filemap_fdatawrite(ip->i_inode.i_mapping);
>   		spin_lock(&sdp->sd_ordered_lock);
> @@ -540,17 +544,22 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
>   void gfs2_ordered_shrink(struct gfs2_sbd *sdp, int whence)
>   {
>   	struct gfs2_inode *ip, *tmp;
> +	bool removed;
>   
>   	spin_lock(&sdp->sd_ordered_lock);
>   	list_for_each_entry_safe(ip, tmp, &sdp->sd_log_le_ordered, i_ordered) {
>   		if (ip->i_inode.i_mapping->nrpages != 0)
>   			continue;
>   		if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags)) {
> +			removed = true;
>   			list_del(&ip->i_ordered);
>   			ord_stats_adjust(sdp, -1, whence);
>   		}
>   	}
>   	spin_unlock(&sdp->sd_ordered_lock);
> +
> +	if (!removed)
> +		gfs2_ordered_write(sdp);
>   }
>   
>   void gfs2_ordered_del_inode(struct gfs2_inode *ip, int whence)
> diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
> index 80c8861..0bc3620 100644
> --- a/fs/gfs2/log.h
> +++ b/fs/gfs2/log.h
> @@ -55,7 +55,8 @@ enum {
>   	ORD_WHENCE_SYNCFS       = 3,
>   	ORD_WHENCE_ORD_WRITE    = 4,
>   	ORD_WHENCE_SETFLAGS     = 5,
> -	ORD_WHENCE_ADD          = 6,
> +	ORD_WHENCE_FSYNC        = 6,
> +	ORD_WHENCE_ADD          = 7,
>   };
>   
>   static inline void ord_stats_adjust(struct gfs2_sbd *sdp, int count, int whence)
> @@ -70,6 +71,7 @@ static inline void ord_stats_adjust(struct gfs2_sbd *sdp, int count, int whence)
>   	case ORD_WHENCE_SYNCFS:     os->os_rm_syncfs += -(count); break;
>   	case ORD_WHENCE_ORD_WRITE:  os->os_rm_write += -(count); break;
>   	case ORD_WHENCE_SETFLAGS:   os->os_rm_setflags += -(count); break;
> +	case ORD_WHENCE_FSYNC:      os->os_rm_fsync += -(count); break;
>   
>   	case ORD_WHENCE_ADD:        os->os_add += count; break;
>   	default: break;
> diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
> index 66c5126..63e3afa 100644
> --- a/fs/gfs2/quota.c
> +++ b/fs/gfs2/quota.c
> @@ -1575,11 +1575,11 @@ int gfs2_quotad(void *data)
>   
>   		printk(KERN_WARNING "Ord list Size:%lu +[add_inode:%lu] "
>   		       "-[trunc:%lu evict:%lu wait:%lu syncfs:%lu ord_write:%lu"
> -		       " setflags:%lu]\n", sdp->sd_ord_stats.os_ct,
> +		       " setflags:%lu fsync:%lu]\n", sdp->sd_ord_stats.os_ct,
>   		       sdp->sd_ord_stats.os_add, sdp->sd_ord_stats.os_rm_trunc,
>   		       sdp->sd_ord_stats.os_rm_evict, sdp->sd_ord_stats.os_rm_wait,
>   		       sdp->sd_ord_stats.os_rm_syncfs, sdp->sd_ord_stats.os_rm_write,
> -		       sdp->sd_ord_stats.os_rm_setflags);
> +		       sdp->sd_ord_stats.os_rm_setflags, sdp->sd_ord_stats.os_rm_fsync);
>   	}
>   
>   	return 0;




More information about the Cluster-devel mailing list