[Cluster-devel] [PATCH 01/19] gfs2: log error reform
Andreas Gruenbacher
agruenba at redhat.com
Tue Apr 9 13:46:17 UTC 2019
Bob,
On Wed, 27 Mar 2019 at 13:36, Bob Peterson <rpeterso at redhat.com> wrote:
> Before this patch, gfs2 kept track of journal io errors in two
> places (sd_log_error and the SDF_AIL1_IO_ERROR flag in sd_flags.
> This patch consolidates the two by eliminating the SDF_AIL1_IO_ERROR
> flag in favor of an atomic count of journal errors, sd_log_errors.
> When the first io error occurs and its value is incremented to 1,
> sd_log_error is set. Thus, sd_log_error reflects the first error
> we encountered writing to the journal. In future patches, we will
> take advantage of this by checking a single value (sd_log_errors)
> rather than having to check both the flag and the sd_log_error
> when reacting to io errors.
>
> Signed-off-by: Bob Peterson <rpeterso at redhat.com>
> ---
> fs/gfs2/incore.h | 4 ++--
> fs/gfs2/log.c | 7 ++++---
> fs/gfs2/lops.c | 7 +++++--
> fs/gfs2/ops_fstype.c | 1 +
> fs/gfs2/quota.c | 6 ++++--
> 5 files changed, 16 insertions(+), 9 deletions(-)
>
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index cdf07b408f54..76336b592030 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -620,7 +620,6 @@ enum {
> SDF_RORECOVERY = 7, /* read only recovery */
> SDF_SKIP_DLM_UNLOCK = 8,
> SDF_FORCE_AIL_FLUSH = 9,
> - SDF_AIL1_IO_ERROR = 10,
> };
>
> enum gfs2_freeze_state {
> @@ -829,7 +828,8 @@ struct gfs2_sbd {
> atomic_t sd_log_in_flight;
> struct bio *sd_log_bio;
> wait_queue_head_t sd_log_flush_wait;
> - int sd_log_error;
> + int sd_log_error; /* First log error */
> + atomic_t sd_log_errors; /* Count of log errors */
>
> atomic_t sd_reserving_log;
> wait_queue_head_t sd_reserving_log_wait;
> diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
> index b8830fda51e8..0717b4d4828b 100644
> --- a/fs/gfs2/log.c
> +++ b/fs/gfs2/log.c
> @@ -109,8 +109,8 @@ __acquires(&sdp->sd_ail_lock)
>
> if (!buffer_busy(bh)) {
> if (!buffer_uptodate(bh) &&
> - !test_and_set_bit(SDF_AIL1_IO_ERROR,
> - &sdp->sd_flags)) {
> + atomic_add_return(1, &sdp->sd_log_errors) == 1) {
> + sdp->sd_log_error = -EIO;
> gfs2_io_error_bh(sdp, bh);
> *withdraw = true;
> }
> @@ -209,7 +209,8 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
> if (buffer_busy(bh))
> continue;
> if (!buffer_uptodate(bh) &&
> - !test_and_set_bit(SDF_AIL1_IO_ERROR, &sdp->sd_flags)) {
> + atomic_add_return(1, &sdp->sd_log_errors) == 1) {
> + sdp->sd_log_error = -EIO;
> gfs2_io_error_bh(sdp, bh);
> *withdraw = true;
> }
> diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
> index 8722c60b11fe..627738cfe6bb 100644
> --- a/fs/gfs2/lops.c
> +++ b/fs/gfs2/lops.c
> @@ -211,8 +211,11 @@ static void gfs2_end_log_write(struct bio *bio)
> struct bvec_iter_all iter_all;
>
> if (bio->bi_status) {
> - fs_err(sdp, "Error %d writing to journal, jid=%u\n",
> - bio->bi_status, sdp->sd_jdesc->jd_jid);
> + if (atomic_add_return(1, &sdp->sd_log_errors) == 1) {
> + sdp->sd_log_error = bio->bi_status;
> + fs_err(sdp, "Error %d writing to journal, jid=%u\n",
> + bio->bi_status, sdp->sd_jdesc->jd_jid);
> + }
> wake_up(&sdp->sd_logd_waitq);
> }
>
> diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
> index b041cb8ae383..77610be6c918 100644
> --- a/fs/gfs2/ops_fstype.c
> +++ b/fs/gfs2/ops_fstype.c
> @@ -129,6 +129,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
>
> init_rwsem(&sdp->sd_log_flush_lock);
> atomic_set(&sdp->sd_log_in_flight, 0);
> + atomic_set(&sdp->sd_log_errors, 0);
> atomic_set(&sdp->sd_reserving_log, 0);
> init_waitqueue_head(&sdp->sd_reserving_log_wait);
> init_waitqueue_head(&sdp->sd_log_flush_wait);
> diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
> index 2ae5a109eea7..009172ef4dfe 100644
> --- a/fs/gfs2/quota.c
> +++ b/fs/gfs2/quota.c
> @@ -1479,8 +1479,10 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
> if (error == 0 || error == -EROFS)
> return;
> if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
> - fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
> - sdp->sd_log_error = error;
> + if (atomic_add_return(1, &sdp->sd_log_errors) == 1) {
> + sdp->sd_log_error = error;
> + fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
> + }
> wake_up(&sdp->sd_logd_waitq);
> }
> }
> --
> 2.20.1
>
this looks like the typical use case for cmpxchg, which won't require
an additional counter.
Andreas
More information about the Cluster-devel
mailing list