[dm-devel] [PATCH RFC] btrfs: sysfs: add <uuid>/debug/io_accounting/ directory
Qu Wenruo
quwenruo.btrfs at gmx.com
Fri Jan 21 09:20:24 UTC 2022
On 2022/1/21 17:14, Anand Jain wrote:
> On 21/01/2022 13:24, Qu Wenruo wrote:
>> [BACKGROUND]
>> There is a series of btrfs autodefrag bugs in v5.16, that would cause
>> way more IO than previous kernels.
>>
>> Unfortunately there isn't any test case covering autodefrag, nor there
>> is anyway to show the io accounting of a btrfs.
>>
>> [WORKAROUND]
>> I originally want to implement a dm target to do the io accounting for
>> all filesystems, but can not find a good enough interface (the status
>> interface has only 3 pre-defined workload).
>>
>> Thus I turned to btrfs specific io accounting first.
>> One thing specific to btrfs is its integrated volume management/RAID.
>>
>> Without proper profiles specification, default profile will cause
>> metadata IO to be accounted twice (DUP profile) and only data IO is
>> accounted correctly.
>>
>> So for btrfs this patch will introduce a new sysfs directory in
>> /sys/fs/btrfs/<uuid>/debug/io_accounting/
>>
>> And have the following files:
>>
>> - meta_read: Metadata bytes read
>> - meta_write: Metadata bytes written
>> - data_read: Data bytes read
>> - data_write: Data bytes written
>> (including both zoned append and regular write)
>>
>> And all these accounting is in logical address space, meaning profile
>> will not affect the values.
>>
>> All those values can be reset by simply "echo 0".
>>
>> Signed-off-by: Qu Wenruo <wqu at suse.com>
>> ---
>> Reason for RFC:
>>
>> - (To DM guys) Is there any good way to implement a dm target to do
>> the IO
>> accounting?
>> A more generic one can help more filesystems.
>>
>> - (To Btrfs guys) Is the sysfs interface fine?
>
> I am in for it. It can be a non debug feature IMO.
> More below.
>
>> ---
>> fs/btrfs/ctree.h | 11 +++++++
>> fs/btrfs/disk-io.c | 1 +
>> fs/btrfs/sysfs.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++
>> fs/btrfs/volumes.c | 24 +++++++++++++++
>> 4 files changed, 113 insertions(+)
>>
>> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
>> index b4a9b1c58d22..3983bceaef7f 100644
>> --- a/fs/btrfs/ctree.h
>> +++ b/fs/btrfs/ctree.h
>> @@ -1035,10 +1035,21 @@ struct btrfs_fs_info {
>> #ifdef CONFIG_BTRFS_DEBUG
>> struct kobject *debug_kobj;
>> struct kobject *discard_debug_kobj;
>> + struct kobject *io_accounting_debug_kobj;
>> struct list_head allocated_roots;
>> spinlock_t eb_leak_lock;
>> struct list_head allocated_ebs;
>> +
>> + spinlock_t io_accounting_lock;
>> + /*
>> + * The IO accounting unit are all in byte, and are in logical
>> address
>> + * space, which is before the RAID/DUP mapping.
>> + */
>> + u64 meta_read;
>> + u64 meta_write;
>> + u64 data_read;
>> + u64 data_write;
>> #endif
>> };
>> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
>> index 87a5addbedf6..41b56fde6e97 100644
>> --- a/fs/btrfs/disk-io.c
>> +++ b/fs/btrfs/disk-io.c
>> @@ -3065,6 +3065,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info
>> *fs_info)
>> INIT_LIST_HEAD(&fs_info->allocated_roots);
>> INIT_LIST_HEAD(&fs_info->allocated_ebs);
>> spin_lock_init(&fs_info->eb_leak_lock);
>> + spin_lock_init(&fs_info->io_accounting_lock);
>> #endif
>> extent_map_tree_init(&fs_info->mapping_tree);
>> btrfs_init_block_rsv(&fs_info->global_block_rsv,
>> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
>> index beb7f72d50b8..dfdef93bdeab 100644
>> --- a/fs/btrfs/sysfs.c
>> +++ b/fs/btrfs/sysfs.c
>> @@ -40,6 +40,7 @@
>> * btrfs_debug_feature_attrs /sys/fs/btrfs/debug
>> * btrfs_debug_mount_attrs /sys/fs/btrfs/<uuid>/debug
>> * discard_debug_attrs /sys/fs/btrfs/<uuid>/debug/discard
>> + * io_accounting_debug_attrs
>> /sys/fs/btrfs/<uuid>/debug/io_accounting
>> */
>> struct btrfs_feature_attr {
>> @@ -616,6 +617,62 @@ static const struct attribute_group
>> btrfs_debug_feature_attr_group = {
>> .attrs = btrfs_debug_feature_attrs,
>> };
>> +/* IO accounting */
>> +#define io_accounting_to_fs_info(_kobj)
>> to_fs_info((_kobj)->parent->parent)
>> +
>> +#define DECLARE_IO_ACCOUNTING_OPS(name) \
>> +static ssize_t io_accounting_##name##_show(struct kobject *kobj, \
>> + struct kobj_attribute *a, \
>> + char *buf) \
>> +{ \
>> + struct btrfs_fs_info *fs_info = io_accounting_to_fs_info(kobj); \
>> + u64 result; \
>> + \
>> + spin_lock(&fs_info->io_accounting_lock); \
>> + result = fs_info->name; \
>> + spin_unlock(&fs_info->io_accounting_lock); \
>> + return sysfs_emit(buf, "%llu\n", result); \
>> +} \
>> +static ssize_t io_accounting_##name##_store(struct kobject *kobj, \
>> + struct kobj_attribute *a, \
>> + const char *buf, \
>> + size_t count) \
>> +{ \
>> + struct btrfs_fs_info *fs_info = io_accounting_to_fs_info(kobj); \
>> + u64 value; \
>> + int ret; \
>> + \
>> + ret = kstrtoull(skip_spaces(buf), 0, &value); \
>> + if (ret) \
>> + return ret; \
>> + spin_lock(&fs_info->io_accounting_lock); \
>> + fs_info->name = value; \
>> + spin_unlock(&fs_info->io_accounting_lock); \
>> + return count; \
>> +}
>> +
>> +DECLARE_IO_ACCOUNTING_OPS(meta_read);
>> +DECLARE_IO_ACCOUNTING_OPS(meta_write);
>> +DECLARE_IO_ACCOUNTING_OPS(data_read);
>> +DECLARE_IO_ACCOUNTING_OPS(data_write);
>> +
>> +BTRFS_ATTR_RW(io_accounting, meta_read, io_accounting_meta_read_show,
>> + io_accounting_meta_read_store);
>> +BTRFS_ATTR_RW(io_accounting, meta_write, io_accounting_meta_write_show,
>> + io_accounting_meta_write_store);
>> +BTRFS_ATTR_RW(io_accounting, data_read, io_accounting_data_read_show,
>> + io_accounting_data_read_store);
>> +BTRFS_ATTR_RW(io_accounting, data_write, io_accounting_data_write_show,
>> + io_accounting_data_write_store);
>> +
>> +static const struct attribute *io_accounting_debug_attrs[] = {
>> + BTRFS_ATTR_PTR(io_accounting, meta_read),
>> + BTRFS_ATTR_PTR(io_accounting, meta_write),
>> + BTRFS_ATTR_PTR(io_accounting, data_read),
>> + BTRFS_ATTR_PTR(io_accounting, data_write),
>> + NULL,
>> +};
>> +
>> #endif
>> static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char
>> *buf)
>> @@ -1219,6 +1276,12 @@ void btrfs_sysfs_remove_mounted(struct
>> btrfs_fs_info *fs_info)
>> kobject_del(fs_info->discard_debug_kobj);
>> kobject_put(fs_info->discard_debug_kobj);
>> }
>> + if (fs_info->io_accounting_debug_kobj) {
>> + sysfs_remove_files(fs_info->io_accounting_debug_kobj,
>> + io_accounting_debug_attrs);
>> + kobject_del(fs_info->io_accounting_debug_kobj);
>> + kobject_put(fs_info->io_accounting_debug_kobj);
>> + }
>> if (fs_info->debug_kobj) {
>> sysfs_remove_files(fs_info->debug_kobj,
>> btrfs_debug_mount_attrs);
>> kobject_del(fs_info->debug_kobj);
>> @@ -1804,6 +1867,20 @@ int btrfs_sysfs_add_mounted(struct
>> btrfs_fs_info *fs_info)
>> discard_debug_attrs);
>> if (error)
>> goto failure;
>> +
>> + /* io_accounting directory */
>> + fs_info->io_accounting_debug_kobj =
>> + kobject_create_and_add("io_accounting",fs_info->debug_kobj);
>> + if (!fs_info->io_accounting_debug_kobj) {
>> + error = -ENOMEM;
>> + goto failure;
>> + }
>> +
>> + error = sysfs_create_files(fs_info->io_accounting_debug_kobj,
>> + io_accounting_debug_attrs);
>> + if (error)
>> + goto failure;
>> +
>> #endif
>> error = addrm_unknown_feature_attrs(fs_info, true);
>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>> index 2332e3026efa..58f2ec0a611a 100644
>> --- a/fs/btrfs/volumes.c
>> +++ b/fs/btrfs/volumes.c
>> @@ -6763,6 +6763,29 @@ static void bioc_error(struct btrfs_io_context
>> *bioc, struct bio *bio, u64 logic
>> }
>> }
>> +static void update_io_accounting(struct btrfs_fs_info *fs_info,
>> struct bio *bio)
>> +{
>> + u32 length = bio->bi_iter.bi_size;
>> + bool metadata = bio->bi_opf & REQ_META;
>> +
>> +#ifdef CONFIG_BTRFS_DEBUG
>> + spin_lock(&fs_info->io_accounting_lock);
>> + if (bio_op(bio) == REQ_OP_READ) {
>> + if (metadata)
>> + fs_info->meta_read += length;
>> + else
>> + fs_info->data_read += length;
>> + } else {
>> + if (metadata)
>> + fs_info->meta_write += length;
>> + else
>> + fs_info->data_write += length;
>> + }
>> + spin_unlock(&fs_info->io_accounting_lock);
>> +#endif
>> + return;
>> +}
>> +
>> blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio
>> *bio,
>> int mirror_num)
>> {
>> @@ -6776,6 +6799,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info
>> *fs_info, struct bio *bio,
> >
>> int total_devs;
>> struct btrfs_io_context *bioc = NULL;
>> + update_io_accounting(fs_info, bio);
>
> Generally, speaking accounting should go at the end of IO.
> Other than that looks good.
That is a little more complex than this solution.
At endio time, we no longer have reliable bi_iter->bi_size.
And if we go bio_for_each_segment(), it will go conflicts with recent
bio split patchset.
Thus I prefer to do the accounting before the IO.
>
> Also, need a way to reset the values.
Check io_accounting_##name##_store(), which will reset the value to the
provided one.
Normally we only need to "echo 0" to reset.
Thanks,
Qu
>
> Thanks, Anand
>
>> length = bio->bi_iter.bi_size;
>> map_length = length;
>
More information about the dm-devel
mailing list