[Cluster-devel] [GFS2 PATCH] gfs2: Implement special writepage for ail start operations
Steven Whitehouse
swhiteho at redhat.com
Thu Jan 3 09:09:17 UTC 2019
Hi,
On 02/01/2019 21:12, Bob Peterson wrote:
> Hi,
>
> This patch is a working prototype to fix the hangs that result from
> doing certain jdata I/O, primarily xfstests/269.
>
> My earlier prototypes used a special "fs_specific" wbc flag that I suspect
> the upstream community wouldn't like. So this version is a bit bigger and
> more convoluted but accomplishes the same basic thing without the special
> wbc flag.
>
> It works by implementing a special new version of writepage that's used
> for writing pages from an ail sync operation, as opposed to writes done
> on behalf of an inode write operation. So far I've done more than 125
> iterations of test 269 which consistently failed on the first iteration
> before the patch.
>
> Since jdata and ordered pages can both be put on the ail lists, I had
> to add a special check in function start_writepage to see how to handle
> the page.
>
> It may not be perfect, but I wanted to get reactions from developers
> to see if I'm off base or forgetting something.
>
> Signed-off-by: Bob Peterson <rpeterso at redhat.com>
> ---
> fs/gfs2/aops.c | 28 +++++++++++++++++++++++++++-
> fs/gfs2/aops.h | 4 ++++
> fs/gfs2/log.c | 37 +++++++++++++++++++++++++++++++------
> fs/gfs2/log.h | 3 ++-
> fs/gfs2/super.c | 2 +-
> 5 files changed, 65 insertions(+), 9 deletions(-)
>
> diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
> index 05dd78f4b2b3..bd6cb49f3b9a 100644
> --- a/fs/gfs2/aops.c
> +++ b/fs/gfs2/aops.c
> @@ -192,6 +192,32 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
> return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
> }
>
> +/**
> + * gfs2_ail_writepage - Write complete page from the ail list
> + * @page: Page to write
> + * @wbc: The writeback control
> + *
> + * Returns: errno
> + *
> + */
> +
> +int gfs2_ail_writepage(struct page *page, struct writeback_control *wbc)
> +{
> + struct inode *inode = page->mapping->host;
> + struct gfs2_inode *ip = GFS2_I(inode);
> + struct gfs2_sbd *sdp = GFS2_SB(inode);
> +
> + if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
> + goto out;
> + if (current->journal_info == NULL)
> + return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
> +
> + redirty_page_for_writepage(wbc, page);
> +out:
> + unlock_page(page);
> + return 0;
> +}
> +
Since this is only called from the ail flushing code, we cannot be in a
transaction and in any case we do not want to defer the writeback, so
this whole thing can be replaced by a simple call to gfs2_write_full_page()
> /**
> * gfs2_jdata_writepage - Write complete page
> * @page: Page to write
> @@ -201,7 +227,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
> *
> */
>
> -static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
> +int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
> {
> struct inode *inode = page->mapping->host;
> struct gfs2_inode *ip = GFS2_I(inode);
> diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h
> index fa8e5d0144dd..cbaaa372edc4 100644
> --- a/fs/gfs2/aops.h
> +++ b/fs/gfs2/aops.h
> @@ -15,5 +15,9 @@ extern int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
> extern void adjust_fs_space(struct inode *inode);
> extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
> unsigned int from, unsigned int len);
> +extern int gfs2_jdata_writepage(struct page *page,
> + struct writeback_control *wbc);
> +extern int gfs2_ail_writepage(struct page *page,
> + struct writeback_control *wbc);
>
> #endif /* __AOPS_DOT_H__ */
> diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
> index 5bfaf381921a..1c70471fb07d 100644
> --- a/fs/gfs2/log.c
> +++ b/fs/gfs2/log.c
> @@ -23,6 +23,7 @@
> #include <linux/writeback.h>
> #include <linux/list_sort.h>
>
> +#include "aops.h"
> #include "gfs2.h"
> #include "incore.h"
> #include "bmap.h"
> @@ -82,18 +83,40 @@ static void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
> brelse(bd->bd_bh);
> }
>
> +/*
> + * Function used by generic_writepages to call the real writepage
> + * function and set the mapping flags on error
> + */
> +static int start_writepage(struct page *page, struct writeback_control *wbc,
> + void *data)
> +{
> + struct address_space *mapping = page->mapping;
> + int ail_start = *(int *)data;
> + int ret;
> + int (*writepage) (struct page *page, struct writeback_control *wbc);
> +
> + writepage = mapping->a_ops->writepage;
> +
> + if (ail_start && writepage == gfs2_jdata_writepage)
> + writepage = gfs2_ail_writepage;
> + ret = writepage(page, wbc);
> + mapping_set_error(mapping, ret);
> + return ret;
> +}
> +
If you are passing this directly to write_cache_pages() when we need it,
then why do we need the switch to a different writepage here? In both
cases we are doing the same thing - writing back in-place data after it
has been through the journal, so we should be able to use the same function.
> /**
> * gfs2_ail1_start_one - Start I/O on a part of the AIL
> * @sdp: the filesystem
> * @wbc: The writeback control structure
> * @ai: The ail structure
> + * @start: True if called from gfs2_ail1_start
> *
> */
>
> static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
> struct writeback_control *wbc,
> - struct gfs2_trans *tr,
> - bool *withdraw)
> + struct gfs2_trans *tr, bool *withdraw,
> + int ail_start)
> __releases(&sdp->sd_ail_lock)
> __acquires(&sdp->sd_ail_lock)
> {
> @@ -128,7 +151,8 @@ __acquires(&sdp->sd_ail_lock)
> if (!mapping)
> continue;
> spin_unlock(&sdp->sd_ail_lock);
> - generic_writepages(mapping, wbc);
> + write_cache_pages(mapping, wbc, start_writepage,
> + &ail_start);
> spin_lock(&sdp->sd_ail_lock);
> if (wbc->nr_to_write <= 0)
> break;
> @@ -148,7 +172,8 @@ __acquires(&sdp->sd_ail_lock)
> * writeback control structure
> */
>
> -void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
> +void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc,
> + int start)
> {
> struct list_head *head = &sdp->sd_ail1_list;
> struct gfs2_trans *tr;
> @@ -162,7 +187,7 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
> list_for_each_entry_reverse(tr, head, tr_list) {
> if (wbc->nr_to_write <= 0)
> break;
> - if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw))
> + if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw, start))
> goto restart;
> }
> spin_unlock(&sdp->sd_ail_lock);
> @@ -186,7 +211,7 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
> .range_end = LLONG_MAX,
> };
>
> - return gfs2_ail1_flush(sdp, &wbc);
> + return gfs2_ail1_flush(sdp, &wbc, 1);
> }
>
> /**
> diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
> index 1bc9bd444b28..3ebad7e505e4 100644
> --- a/fs/gfs2/log.h
> +++ b/fs/gfs2/log.h
> @@ -74,7 +74,8 @@ extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
> extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
> u32 type);
> extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
> -extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
> +extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc,
> + int start);
>
> extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
> extern int gfs2_logd(void *data);
> diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
> index d4b11c903971..31e7948c3c0c 100644
> --- a/fs/gfs2/super.c
> +++ b/fs/gfs2/super.c
> @@ -762,7 +762,7 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
> GFS2_LOG_HEAD_FLUSH_NORMAL |
> GFS2_LFC_WRITE_INODE);
> if (bdi->wb.dirty_exceeded)
> - gfs2_ail1_flush(sdp, wbc);
> + gfs2_ail1_flush(sdp, wbc, 0);
> else
> filemap_fdatawrite(metamapping);
> if (flush_all)
More information about the Cluster-devel
mailing list