[dm-devel] [PATCH RFC] btrfs: sysfs: add <uuid>/debug/io_accounting/ directory
Anand Jain
anand.jain at oracle.com
Fri Jan 21 09:14:53 UTC 2022
On 21/01/2022 13:24, Qu Wenruo wrote:
> [BACKGROUND]
> There is a series of btrfs autodefrag bugs in v5.16, that would cause
> way more IO than previous kernels.
>
> Unfortunately there isn't any test case covering autodefrag, nor there
> is anyway to show the io accounting of a btrfs.
>
> [WORKAROUND]
> I originally want to implement a dm target to do the io accounting for
> all filesystems, but can not find a good enough interface (the status
> interface has only 3 pre-defined workload).
>
> Thus I turned to btrfs specific io accounting first.
> One thing specific to btrfs is its integrated volume management/RAID.
>
> Without proper profiles specification, default profile will cause
> metadata IO to be accounted twice (DUP profile) and only data IO is
> accounted correctly.
>
> So for btrfs this patch will introduce a new sysfs directory in
> /sys/fs/btrfs/<uuid>/debug/io_accounting/
>
> And have the following files:
>
> - meta_read: Metadata bytes read
> - meta_write: Metadata bytes written
> - data_read: Data bytes read
> - data_write: Data bytes written
> (including both zoned append and regular write)
>
> And all these accounting is in logical address space, meaning profile
> will not affect the values.
>
> All those values can be reset by simply "echo 0".
>
> Signed-off-by: Qu Wenruo <wqu at suse.com>
> ---
> Reason for RFC:
>
> - (To DM guys) Is there any good way to implement a dm target to do the IO
> accounting?
> A more generic one can help more filesystems.
>
> - (To Btrfs guys) Is the sysfs interface fine?
I am in for it. It can be a non debug feature IMO.
More below.
> ---
> fs/btrfs/ctree.h | 11 +++++++
> fs/btrfs/disk-io.c | 1 +
> fs/btrfs/sysfs.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++
> fs/btrfs/volumes.c | 24 +++++++++++++++
> 4 files changed, 113 insertions(+)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index b4a9b1c58d22..3983bceaef7f 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -1035,10 +1035,21 @@ struct btrfs_fs_info {
> #ifdef CONFIG_BTRFS_DEBUG
> struct kobject *debug_kobj;
> struct kobject *discard_debug_kobj;
> + struct kobject *io_accounting_debug_kobj;
> struct list_head allocated_roots;
>
> spinlock_t eb_leak_lock;
> struct list_head allocated_ebs;
> +
> + spinlock_t io_accounting_lock;
> + /*
> + * The IO accounting unit are all in byte, and are in logical address
> + * space, which is before the RAID/DUP mapping.
> + */
> + u64 meta_read;
> + u64 meta_write;
> + u64 data_read;
> + u64 data_write;
> #endif
> };
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 87a5addbedf6..41b56fde6e97 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3065,6 +3065,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
> INIT_LIST_HEAD(&fs_info->allocated_roots);
> INIT_LIST_HEAD(&fs_info->allocated_ebs);
> spin_lock_init(&fs_info->eb_leak_lock);
> + spin_lock_init(&fs_info->io_accounting_lock);
> #endif
> extent_map_tree_init(&fs_info->mapping_tree);
> btrfs_init_block_rsv(&fs_info->global_block_rsv,
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index beb7f72d50b8..dfdef93bdeab 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -40,6 +40,7 @@
> * btrfs_debug_feature_attrs /sys/fs/btrfs/debug
> * btrfs_debug_mount_attrs /sys/fs/btrfs/<uuid>/debug
> * discard_debug_attrs /sys/fs/btrfs/<uuid>/debug/discard
> + * io_accounting_debug_attrs /sys/fs/btrfs/<uuid>/debug/io_accounting
> */
>
> struct btrfs_feature_attr {
> @@ -616,6 +617,62 @@ static const struct attribute_group btrfs_debug_feature_attr_group = {
> .attrs = btrfs_debug_feature_attrs,
> };
>
> +/* IO accounting */
> +#define io_accounting_to_fs_info(_kobj) to_fs_info((_kobj)->parent->parent)
> +
> +#define DECLARE_IO_ACCOUNTING_OPS(name) \
> +static ssize_t io_accounting_##name##_show(struct kobject *kobj, \
> + struct kobj_attribute *a, \
> + char *buf) \
> +{ \
> + struct btrfs_fs_info *fs_info = io_accounting_to_fs_info(kobj); \
> + u64 result; \
> + \
> + spin_lock(&fs_info->io_accounting_lock); \
> + result = fs_info->name; \
> + spin_unlock(&fs_info->io_accounting_lock); \
> + return sysfs_emit(buf, "%llu\n", result); \
> +} \
> +static ssize_t io_accounting_##name##_store(struct kobject *kobj, \
> + struct kobj_attribute *a, \
> + const char *buf, \
> + size_t count) \
> +{ \
> + struct btrfs_fs_info *fs_info = io_accounting_to_fs_info(kobj); \
> + u64 value; \
> + int ret; \
> + \
> + ret = kstrtoull(skip_spaces(buf), 0, &value); \
> + if (ret) \
> + return ret; \
> + spin_lock(&fs_info->io_accounting_lock); \
> + fs_info->name = value; \
> + spin_unlock(&fs_info->io_accounting_lock); \
> + return count; \
> +}
> +
> +DECLARE_IO_ACCOUNTING_OPS(meta_read);
> +DECLARE_IO_ACCOUNTING_OPS(meta_write);
> +DECLARE_IO_ACCOUNTING_OPS(data_read);
> +DECLARE_IO_ACCOUNTING_OPS(data_write);
> +
> +BTRFS_ATTR_RW(io_accounting, meta_read, io_accounting_meta_read_show,
> + io_accounting_meta_read_store);
> +BTRFS_ATTR_RW(io_accounting, meta_write, io_accounting_meta_write_show,
> + io_accounting_meta_write_store);
> +BTRFS_ATTR_RW(io_accounting, data_read, io_accounting_data_read_show,
> + io_accounting_data_read_store);
> +BTRFS_ATTR_RW(io_accounting, data_write, io_accounting_data_write_show,
> + io_accounting_data_write_store);
> +
> +static const struct attribute *io_accounting_debug_attrs[] = {
> + BTRFS_ATTR_PTR(io_accounting, meta_read),
> + BTRFS_ATTR_PTR(io_accounting, meta_write),
> + BTRFS_ATTR_PTR(io_accounting, data_read),
> + BTRFS_ATTR_PTR(io_accounting, data_write),
> + NULL,
> +};
> +
> #endif
>
> static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char *buf)
> @@ -1219,6 +1276,12 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
> kobject_del(fs_info->discard_debug_kobj);
> kobject_put(fs_info->discard_debug_kobj);
> }
> + if (fs_info->io_accounting_debug_kobj) {
> + sysfs_remove_files(fs_info->io_accounting_debug_kobj,
> + io_accounting_debug_attrs);
> + kobject_del(fs_info->io_accounting_debug_kobj);
> + kobject_put(fs_info->io_accounting_debug_kobj);
> + }
> if (fs_info->debug_kobj) {
> sysfs_remove_files(fs_info->debug_kobj, btrfs_debug_mount_attrs);
> kobject_del(fs_info->debug_kobj);
> @@ -1804,6 +1867,20 @@ int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
> discard_debug_attrs);
> if (error)
> goto failure;
> +
> + /* io_accounting directory */
> + fs_info->io_accounting_debug_kobj =
> + kobject_create_and_add("io_accounting",fs_info->debug_kobj);
> + if (!fs_info->io_accounting_debug_kobj) {
> + error = -ENOMEM;
> + goto failure;
> + }
> +
> + error = sysfs_create_files(fs_info->io_accounting_debug_kobj,
> + io_accounting_debug_attrs);
> + if (error)
> + goto failure;
> +
> #endif
>
> error = addrm_unknown_feature_attrs(fs_info, true);
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 2332e3026efa..58f2ec0a611a 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -6763,6 +6763,29 @@ static void bioc_error(struct btrfs_io_context *bioc, struct bio *bio, u64 logic
> }
> }
>
> +static void update_io_accounting(struct btrfs_fs_info *fs_info, struct bio *bio)
> +{
> + u32 length = bio->bi_iter.bi_size;
> + bool metadata = bio->bi_opf & REQ_META;
> +
> +#ifdef CONFIG_BTRFS_DEBUG
> + spin_lock(&fs_info->io_accounting_lock);
> + if (bio_op(bio) == REQ_OP_READ) {
> + if (metadata)
> + fs_info->meta_read += length;
> + else
> + fs_info->data_read += length;
> + } else {
> + if (metadata)
> + fs_info->meta_write += length;
> + else
> + fs_info->data_write += length;
> + }
> + spin_unlock(&fs_info->io_accounting_lock);
> +#endif
> + return;
> +}
> +
> blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
> int mirror_num)
> {
> @@ -6776,6 +6799,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>
> int total_devs;
> struct btrfs_io_context *bioc = NULL;
>
> + update_io_accounting(fs_info, bio);
Generally, speaking accounting should go at the end of IO.
Other than that looks good.
Also, need a way to reset the values.
Thanks, Anand
> length = bio->bi_iter.bi_size;
> map_length = length;
>
More information about the dm-devel
mailing list