[Cluster-devel] [PATCH 01/19] gfs2: log error reform

Andreas Gruenbacher agruenba at redhat.com
Tue Apr 9 13:46:17 UTC 2019


Bob,

On Wed, 27 Mar 2019 at 13:36, Bob Peterson <rpeterso at redhat.com> wrote:
> Before this patch, gfs2 kept track of journal io errors in two
> places (sd_log_error and the SDF_AIL1_IO_ERROR flag in sd_flags.
> This patch consolidates the two by eliminating the SDF_AIL1_IO_ERROR
> flag in favor of an atomic count of journal errors, sd_log_errors.
> When the first io error occurs and its value is incremented to 1,
> sd_log_error is set. Thus, sd_log_error reflects the first error
> we encountered writing to the journal. In future patches, we will
> take advantage of this by checking a single value (sd_log_errors)
> rather than having to check both the flag and the sd_log_error
> when reacting to io errors.
>
> Signed-off-by: Bob Peterson <rpeterso at redhat.com>
> ---
>  fs/gfs2/incore.h     | 4 ++--
>  fs/gfs2/log.c        | 7 ++++---
>  fs/gfs2/lops.c       | 7 +++++--
>  fs/gfs2/ops_fstype.c | 1 +
>  fs/gfs2/quota.c      | 6 ++++--
>  5 files changed, 16 insertions(+), 9 deletions(-)
>
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index cdf07b408f54..76336b592030 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -620,7 +620,6 @@ enum {
>         SDF_RORECOVERY          = 7, /* read only recovery */
>         SDF_SKIP_DLM_UNLOCK     = 8,
>         SDF_FORCE_AIL_FLUSH     = 9,
> -       SDF_AIL1_IO_ERROR       = 10,
>  };
>
>  enum gfs2_freeze_state {
> @@ -829,7 +828,8 @@ struct gfs2_sbd {
>         atomic_t sd_log_in_flight;
>         struct bio *sd_log_bio;
>         wait_queue_head_t sd_log_flush_wait;
> -       int sd_log_error;
> +       int sd_log_error; /* First log error */
> +       atomic_t sd_log_errors; /* Count of log errors */
>
>         atomic_t sd_reserving_log;
>         wait_queue_head_t sd_reserving_log_wait;
> diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
> index b8830fda51e8..0717b4d4828b 100644
> --- a/fs/gfs2/log.c
> +++ b/fs/gfs2/log.c
> @@ -109,8 +109,8 @@ __acquires(&sdp->sd_ail_lock)
>
>                 if (!buffer_busy(bh)) {
>                         if (!buffer_uptodate(bh) &&
> -                           !test_and_set_bit(SDF_AIL1_IO_ERROR,
> -                                             &sdp->sd_flags)) {
> +                           atomic_add_return(1, &sdp->sd_log_errors) == 1) {
> +                               sdp->sd_log_error = -EIO;
>                                 gfs2_io_error_bh(sdp, bh);
>                                 *withdraw = true;
>                         }
> @@ -209,7 +209,8 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
>                 if (buffer_busy(bh))
>                         continue;
>                 if (!buffer_uptodate(bh) &&
> -                   !test_and_set_bit(SDF_AIL1_IO_ERROR, &sdp->sd_flags)) {
> +                   atomic_add_return(1, &sdp->sd_log_errors) == 1) {
> +                       sdp->sd_log_error = -EIO;
>                         gfs2_io_error_bh(sdp, bh);
>                         *withdraw = true;
>                 }
> diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
> index 8722c60b11fe..627738cfe6bb 100644
> --- a/fs/gfs2/lops.c
> +++ b/fs/gfs2/lops.c
> @@ -211,8 +211,11 @@ static void gfs2_end_log_write(struct bio *bio)
>         struct bvec_iter_all iter_all;
>
>         if (bio->bi_status) {
> -               fs_err(sdp, "Error %d writing to journal, jid=%u\n",
> -                      bio->bi_status, sdp->sd_jdesc->jd_jid);
> +               if (atomic_add_return(1, &sdp->sd_log_errors) == 1) {
> +                       sdp->sd_log_error = bio->bi_status;
> +                       fs_err(sdp, "Error %d writing to journal, jid=%u\n",
> +                              bio->bi_status, sdp->sd_jdesc->jd_jid);
> +               }
>                 wake_up(&sdp->sd_logd_waitq);
>         }
>
> diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
> index b041cb8ae383..77610be6c918 100644
> --- a/fs/gfs2/ops_fstype.c
> +++ b/fs/gfs2/ops_fstype.c
> @@ -129,6 +129,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
>
>         init_rwsem(&sdp->sd_log_flush_lock);
>         atomic_set(&sdp->sd_log_in_flight, 0);
> +       atomic_set(&sdp->sd_log_errors, 0);
>         atomic_set(&sdp->sd_reserving_log, 0);
>         init_waitqueue_head(&sdp->sd_reserving_log_wait);
>         init_waitqueue_head(&sdp->sd_log_flush_wait);
> diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
> index 2ae5a109eea7..009172ef4dfe 100644
> --- a/fs/gfs2/quota.c
> +++ b/fs/gfs2/quota.c
> @@ -1479,8 +1479,10 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
>         if (error == 0 || error == -EROFS)
>                 return;
>         if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
> -               fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
> -               sdp->sd_log_error = error;
> +               if (atomic_add_return(1, &sdp->sd_log_errors) == 1) {
> +                       sdp->sd_log_error = error;
> +                       fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
> +               }
>                 wake_up(&sdp->sd_logd_waitq);
>         }
>  }
> --
> 2.20.1
>

this looks like the typical use case for cmpxchg, which won't require
an additional counter.

Andreas




More information about the Cluster-devel mailing list