[dm-devel] [PATCH RFC] btrfs: sysfs: add <uuid>/debug/io_accounting/ directory

Anand Jain anand.jain at oracle.com
Fri Jan 21 09:14:53 UTC 2022


On 21/01/2022 13:24, Qu Wenruo wrote:
> [BACKGROUND]
> There is a series of btrfs autodefrag bugs in v5.16, that would cause
> way more IO than previous kernels.
> 
> Unfortunately there isn't any test case covering autodefrag, nor there
> is anyway to show the io accounting of a btrfs.
> 
> [WORKAROUND]
> I originally want to implement a dm target to do the io accounting for
> all filesystems, but can not find a good enough interface (the status
> interface has only 3 pre-defined workload).
> 
> Thus I turned to btrfs specific io accounting first.
> One thing specific to btrfs is its integrated volume management/RAID.
> 
> Without proper profiles specification, default profile will cause
> metadata IO to be accounted twice (DUP profile) and only data IO is
> accounted correctly.
> 
> So for btrfs this patch will introduce a new sysfs directory in
> /sys/fs/btrfs/<uuid>/debug/io_accounting/
> 
> And have the following files:
> 
> - meta_read:	Metadata bytes read
> - meta_write:	Metadata bytes written
> - data_read:	Data bytes read
> - data_write:	Data bytes written
> 		(including both zoned append and regular write)
> 
> And all these accounting is in logical address space, meaning profile
> will not affect the values.
> 
> All those values can be reset by simply "echo 0".
> 
> Signed-off-by: Qu Wenruo <wqu at suse.com>
> ---
> Reason for RFC:
> 
> - (To DM guys) Is there any good way to implement a dm target to do the IO
>    accounting?
>    A more generic one can help more filesystems.
> 
> - (To Btrfs guys) Is the sysfs interface fine?

  I am in for it. It can be a non debug feature IMO.
  More below.

> ---
>   fs/btrfs/ctree.h   | 11 +++++++
>   fs/btrfs/disk-io.c |  1 +
>   fs/btrfs/sysfs.c   | 77 ++++++++++++++++++++++++++++++++++++++++++++++
>   fs/btrfs/volumes.c | 24 +++++++++++++++
>   4 files changed, 113 insertions(+)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index b4a9b1c58d22..3983bceaef7f 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -1035,10 +1035,21 @@ struct btrfs_fs_info {
>   #ifdef CONFIG_BTRFS_DEBUG
>   	struct kobject *debug_kobj;
>   	struct kobject *discard_debug_kobj;
> +	struct kobject *io_accounting_debug_kobj;
>   	struct list_head allocated_roots;
>   
>   	spinlock_t eb_leak_lock;
>   	struct list_head allocated_ebs;
> +
> +	spinlock_t io_accounting_lock;
> +	/*
> +	 * The IO accounting unit are all in byte, and are in logical address
> +	 * space, which is before the RAID/DUP mapping.
> +	 */
> +	u64 meta_read;
> +	u64 meta_write;
> +	u64 data_read;
> +	u64 data_write;
>   #endif
>   };
>   
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 87a5addbedf6..41b56fde6e97 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3065,6 +3065,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
>   	INIT_LIST_HEAD(&fs_info->allocated_roots);
>   	INIT_LIST_HEAD(&fs_info->allocated_ebs);
>   	spin_lock_init(&fs_info->eb_leak_lock);
> +	spin_lock_init(&fs_info->io_accounting_lock);
>   #endif
>   	extent_map_tree_init(&fs_info->mapping_tree);
>   	btrfs_init_block_rsv(&fs_info->global_block_rsv,
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index beb7f72d50b8..dfdef93bdeab 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -40,6 +40,7 @@
>    * btrfs_debug_feature_attrs		/sys/fs/btrfs/debug
>    * btrfs_debug_mount_attrs		/sys/fs/btrfs/<uuid>/debug
>    * discard_debug_attrs			/sys/fs/btrfs/<uuid>/debug/discard
> + * io_accounting_debug_attrs		/sys/fs/btrfs/<uuid>/debug/io_accounting
>    */
>   
>   struct btrfs_feature_attr {
> @@ -616,6 +617,62 @@ static const struct attribute_group btrfs_debug_feature_attr_group = {
>   	.attrs = btrfs_debug_feature_attrs,
>   };
>   
> +/* IO accounting */
> +#define io_accounting_to_fs_info(_kobj)	to_fs_info((_kobj)->parent->parent)
> +
> +#define DECLARE_IO_ACCOUNTING_OPS(name)					\
> +static ssize_t io_accounting_##name##_show(struct kobject *kobj,	\
> +					   struct kobj_attribute *a,	\
> +					   char *buf)			\
> +{									\
> +	struct btrfs_fs_info *fs_info = io_accounting_to_fs_info(kobj);	\
> +	u64 result;							\
> +									\
> +	spin_lock(&fs_info->io_accounting_lock);			\
> +	result = fs_info->name;						\
> +	spin_unlock(&fs_info->io_accounting_lock);			\
> +	return sysfs_emit(buf, "%llu\n", result);			\
> +}									\
> +static ssize_t io_accounting_##name##_store(struct kobject *kobj,	\
> +					    struct kobj_attribute *a,	\
> +					    const char *buf,		\
> +					    size_t count) 		\
> +{									\
> +	struct btrfs_fs_info *fs_info = io_accounting_to_fs_info(kobj);	\
> +	u64 value;							\
> +	int ret;							\
> +									\
> +	ret = kstrtoull(skip_spaces(buf), 0, &value);			\
> +	if (ret)							\
> +		return ret;						\
> +	spin_lock(&fs_info->io_accounting_lock);			\
> +	fs_info->name = value;						\
> +	spin_unlock(&fs_info->io_accounting_lock);			\
> +	return count;							\
> +}
> +
> +DECLARE_IO_ACCOUNTING_OPS(meta_read);
> +DECLARE_IO_ACCOUNTING_OPS(meta_write);
> +DECLARE_IO_ACCOUNTING_OPS(data_read);
> +DECLARE_IO_ACCOUNTING_OPS(data_write);
> +
> +BTRFS_ATTR_RW(io_accounting, meta_read, io_accounting_meta_read_show,
> +	      io_accounting_meta_read_store);
> +BTRFS_ATTR_RW(io_accounting, meta_write, io_accounting_meta_write_show,
> +	      io_accounting_meta_write_store);
> +BTRFS_ATTR_RW(io_accounting, data_read, io_accounting_data_read_show,
> +	      io_accounting_data_read_store);
> +BTRFS_ATTR_RW(io_accounting, data_write, io_accounting_data_write_show,
> +	      io_accounting_data_write_store);
> +
> +static const struct attribute *io_accounting_debug_attrs[] = {
> +	BTRFS_ATTR_PTR(io_accounting, meta_read),
> +	BTRFS_ATTR_PTR(io_accounting, meta_write),
> +	BTRFS_ATTR_PTR(io_accounting, data_read),
> +	BTRFS_ATTR_PTR(io_accounting, data_write),
> +	NULL,
> +};
> +
>   #endif
>   
>   static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char *buf)
> @@ -1219,6 +1276,12 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
>   		kobject_del(fs_info->discard_debug_kobj);
>   		kobject_put(fs_info->discard_debug_kobj);
>   	}
> +	if (fs_info->io_accounting_debug_kobj) {
> +		sysfs_remove_files(fs_info->io_accounting_debug_kobj,
> +				   io_accounting_debug_attrs);
> +		kobject_del(fs_info->io_accounting_debug_kobj);
> +		kobject_put(fs_info->io_accounting_debug_kobj);
> +	}
>   	if (fs_info->debug_kobj) {
>   		sysfs_remove_files(fs_info->debug_kobj, btrfs_debug_mount_attrs);
>   		kobject_del(fs_info->debug_kobj);
> @@ -1804,6 +1867,20 @@ int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
>   				   discard_debug_attrs);
>   	if (error)
>   		goto failure;
> +
> +	/* io_accounting directory */
> +	fs_info->io_accounting_debug_kobj =
> +		kobject_create_and_add("io_accounting",fs_info->debug_kobj);
> +	if (!fs_info->io_accounting_debug_kobj) {
> +		error = -ENOMEM;
> +		goto failure;
> +	}
> +
> +	error = sysfs_create_files(fs_info->io_accounting_debug_kobj,
> +				   io_accounting_debug_attrs);
> +	if (error)
> +		goto failure;
> +
>   #endif
>   
>   	error = addrm_unknown_feature_attrs(fs_info, true);
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 2332e3026efa..58f2ec0a611a 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -6763,6 +6763,29 @@ static void bioc_error(struct btrfs_io_context *bioc, struct bio *bio, u64 logic
>   	}
>   }
>   
> +static void update_io_accounting(struct btrfs_fs_info *fs_info, struct bio *bio)
> +{
> +	u32 length = bio->bi_iter.bi_size;
> +	bool metadata = bio->bi_opf & REQ_META;
> +
> +#ifdef	CONFIG_BTRFS_DEBUG
> +	spin_lock(&fs_info->io_accounting_lock);
> +	if (bio_op(bio) == REQ_OP_READ) {
> +		if (metadata)
> +			fs_info->meta_read += length;
> +		else
> +			fs_info->data_read += length;
> +	} else {
> +		if (metadata)
> +			fs_info->meta_write += length;
> +		else
> +			fs_info->data_write += length;
> +	}
> +	spin_unlock(&fs_info->io_accounting_lock);
> +#endif
> +	return;
> +}
> +
>   blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>   			   int mirror_num)
>   {
> @@ -6776,6 +6799,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 >
>   	int total_devs;
>   	struct btrfs_io_context *bioc = NULL;
>   
> +	update_io_accounting(fs_info, bio);

  Generally, speaking accounting should go at the end of IO.
  Other than that looks good.

  Also, need a way to reset the values.

Thanks, Anand

>   	length = bio->bi_iter.bi_size;
>   	map_length = length;
>   




More information about the dm-devel mailing list