[Cluster-devel] [GFS2 PATCH] gfs2: Implement special writepage for ail start operations

Steven Whitehouse swhiteho at redhat.com
Thu Jan 3 09:09:17 UTC 2019


Hi,

On 02/01/2019 21:12, Bob Peterson wrote:
> Hi,
>
> This patch is a working prototype to fix the hangs that result from
> doing certain jdata I/O, primarily xfstests/269.
>
> My earlier prototypes used a special "fs_specific" wbc flag that I suspect
> the upstream community wouldn't like. So this version is a bit bigger and
> more convoluted but accomplishes the same basic thing without the special
> wbc flag.
>
> It works by implementing a special new version of writepage that's used
> for writing pages from an ail sync operation, as opposed to writes done
> on behalf of an inode write operation. So far I've done more than 125
> iterations of test 269 which consistently failed on the first iteration
> before the patch.
>
> Since jdata and ordered pages can both be put on the ail lists, I had
> to add a special check in function start_writepage to see how to handle
> the page.
>
> It may not be perfect, but I wanted to get reactions from developers
> to see if I'm off base or forgetting something.
>
> Signed-off-by: Bob Peterson <rpeterso at redhat.com>
> ---
>   fs/gfs2/aops.c  | 28 +++++++++++++++++++++++++++-
>   fs/gfs2/aops.h  |  4 ++++
>   fs/gfs2/log.c   | 37 +++++++++++++++++++++++++++++++------
>   fs/gfs2/log.h   |  3 ++-
>   fs/gfs2/super.c |  2 +-
>   5 files changed, 65 insertions(+), 9 deletions(-)
>
> diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
> index 05dd78f4b2b3..bd6cb49f3b9a 100644
> --- a/fs/gfs2/aops.c
> +++ b/fs/gfs2/aops.c
> @@ -192,6 +192,32 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
>   	return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
>   }
>   
> +/**
> + * gfs2_ail_writepage - Write complete page from the ail list
> + * @page: Page to write
> + * @wbc: The writeback control
> + *
> + * Returns: errno
> + *
> + */
> +
> +int gfs2_ail_writepage(struct page *page, struct writeback_control *wbc)
> +{
> +	struct inode *inode = page->mapping->host;
> +	struct gfs2_inode *ip = GFS2_I(inode);
> +	struct gfs2_sbd *sdp = GFS2_SB(inode);
> +
> +	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
> +		goto out;
> +	if (current->journal_info == NULL)
> +		return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
> +
> +	redirty_page_for_writepage(wbc, page);
> +out:
> +	unlock_page(page);
> +	return 0;
> +}
> +

Since this is only called from the ail flushing code, we cannot be in a 
transaction and in any case we do not want to defer the writeback, so 
this whole thing can be replaced by a simple call to gfs2_write_full_page()


>   /**
>    * gfs2_jdata_writepage - Write complete page
>    * @page: Page to write
> @@ -201,7 +227,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
>    *
>    */
>   
> -static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
> +int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
>   {
>   	struct inode *inode = page->mapping->host;
>   	struct gfs2_inode *ip = GFS2_I(inode);
> diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h
> index fa8e5d0144dd..cbaaa372edc4 100644
> --- a/fs/gfs2/aops.h
> +++ b/fs/gfs2/aops.h
> @@ -15,5 +15,9 @@ extern int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
>   extern void adjust_fs_space(struct inode *inode);
>   extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
>   				   unsigned int from, unsigned int len);
> +extern int gfs2_jdata_writepage(struct page *page,
> +				struct writeback_control *wbc);
> +extern int gfs2_ail_writepage(struct page *page,
> +			      struct writeback_control *wbc);
>   
>   #endif /* __AOPS_DOT_H__ */
> diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
> index 5bfaf381921a..1c70471fb07d 100644
> --- a/fs/gfs2/log.c
> +++ b/fs/gfs2/log.c
> @@ -23,6 +23,7 @@
>   #include <linux/writeback.h>
>   #include <linux/list_sort.h>
>   
> +#include "aops.h"
>   #include "gfs2.h"
>   #include "incore.h"
>   #include "bmap.h"
> @@ -82,18 +83,40 @@ static void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
>   	brelse(bd->bd_bh);
>   }
>   
> +/*
> + * Function used by generic_writepages to call the real writepage
> + * function and set the mapping flags on error
> + */
> +static int start_writepage(struct page *page, struct writeback_control *wbc,
> +			   void *data)
> +{
> +	struct address_space *mapping = page->mapping;
> +	int ail_start = *(int *)data;
> +	int ret;
> +	int (*writepage) (struct page *page, struct writeback_control *wbc);
> +
> +	writepage = mapping->a_ops->writepage;
> +
> +	if (ail_start && writepage == gfs2_jdata_writepage)
> +		writepage = gfs2_ail_writepage;
> +	ret = writepage(page, wbc);
> +	mapping_set_error(mapping, ret);
> +	return ret;
> +}
> +
If you are passing this directly to write_cache_pages() when we need it, 
then why do we need the switch to a different writepage here? In both 
cases we are doing the same thing - writing back in-place data after it 
has been through the journal, so we should be able to use the same function.
>   /**
>    * gfs2_ail1_start_one - Start I/O on a part of the AIL
>    * @sdp: the filesystem
>    * @wbc: The writeback control structure
>    * @ai: The ail structure
> + * @start: True if called from gfs2_ail1_start
>    *
>    */
>   
>   static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
>   			       struct writeback_control *wbc,
> -			       struct gfs2_trans *tr,
> -			       bool *withdraw)
> +			       struct gfs2_trans *tr, bool *withdraw,
> +			       int ail_start)
>   __releases(&sdp->sd_ail_lock)
>   __acquires(&sdp->sd_ail_lock)
>   {
> @@ -128,7 +151,8 @@ __acquires(&sdp->sd_ail_lock)
>   		if (!mapping)
>   			continue;
>   		spin_unlock(&sdp->sd_ail_lock);
> -		generic_writepages(mapping, wbc);
> +		write_cache_pages(mapping, wbc, start_writepage,
> +				  &ail_start);
>   		spin_lock(&sdp->sd_ail_lock);
>   		if (wbc->nr_to_write <= 0)
>   			break;
> @@ -148,7 +172,8 @@ __acquires(&sdp->sd_ail_lock)
>    * writeback control structure
>    */
>   
> -void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
> +void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc,
> +		     int start)
>   {
>   	struct list_head *head = &sdp->sd_ail1_list;
>   	struct gfs2_trans *tr;
> @@ -162,7 +187,7 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
>   	list_for_each_entry_reverse(tr, head, tr_list) {
>   		if (wbc->nr_to_write <= 0)
>   			break;
> -		if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw))
> +		if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw, start))
>   			goto restart;
>   	}
>   	spin_unlock(&sdp->sd_ail_lock);
> @@ -186,7 +211,7 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
>   		.range_end = LLONG_MAX,
>   	};
>   
> -	return gfs2_ail1_flush(sdp, &wbc);
> +	return gfs2_ail1_flush(sdp, &wbc, 1);
>   }
>   
>   /**
> diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
> index 1bc9bd444b28..3ebad7e505e4 100644
> --- a/fs/gfs2/log.h
> +++ b/fs/gfs2/log.h
> @@ -74,7 +74,8 @@ extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
>   extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
>   			   u32 type);
>   extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
> -extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
> +extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc,
> +			    int start);
>   
>   extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
>   extern int gfs2_logd(void *data);
> diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
> index d4b11c903971..31e7948c3c0c 100644
> --- a/fs/gfs2/super.c
> +++ b/fs/gfs2/super.c
> @@ -762,7 +762,7 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
>   			       GFS2_LOG_HEAD_FLUSH_NORMAL |
>   			       GFS2_LFC_WRITE_INODE);
>   	if (bdi->wb.dirty_exceeded)
> -		gfs2_ail1_flush(sdp, wbc);
> +		gfs2_ail1_flush(sdp, wbc, 0);
>   	else
>   		filemap_fdatawrite(metamapping);
>   	if (flush_all)




More information about the Cluster-devel mailing list