[Cluster-devel] [GFS2 PATCH] GFS2: Add function gfs2_get_iomap

Steven Whitehouse swhiteho at redhat.com
Thu Aug 11 15:26:04 UTC 2016


Hi,

On 11/08/16 15:59, Bob Peterson wrote:
> Hi,
>
> This patch replaces the GFS2 fiemap implementation that used vfs
> function __generic_block_fiemap with a new implementation that uses
> the new iomap-based fiemap interface. This allows GFS2's fiemap to
> skip holes. The time to do filefrag on a file with a 1 petabyte hole
> is reduced from several days or weeks, to milliseconds. Note that
> there are Kconfig changes that affect everyone.
>
> Signed-off-by: Bob Peterson <rpeterso at redhat.com>
> ---
> diff --git a/fs/Kconfig b/fs/Kconfig
> index 2bc7ad7..d601aeb 100644
> --- a/fs/Kconfig
> +++ b/fs/Kconfig
> @@ -11,7 +11,7 @@ config DCACHE_WORD_ACCESS
>   if BLOCK
>   
>   config FS_IOMAP
> -	bool
> +	bool "File System IOMAP Support"
>   
>   source "fs/ext2/Kconfig"
>   source "fs/ext4/Kconfig"
> diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
> index 90c6a8f..f8fa955 100644
> --- a/fs/gfs2/Kconfig
> +++ b/fs/gfs2/Kconfig
> @@ -25,6 +25,7 @@ config GFS2_FS
>   config GFS2_FS_LOCKING_DLM
>   	bool "GFS2 DLM locking"
>   	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
> +		FS_IOMAP && \
>   		CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS)
>   	help
>   	  Multiple node locking module for GFS2
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index 6e2bec1..685f1ed 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -588,6 +588,155 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
>   }
>   
>   /**
> + * hole_size - figure out the size of a hole
> + * @ip: The inode
> + * @lblock: The logical starting block number
> + * @mp: The metapath
> + *
> + * Returns: The hole size in bytes
> + *
> + */
> +static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
> +{
> +	struct gfs2_inode *ip = GFS2_I(inode);
> +	struct gfs2_sbd *sdp = GFS2_SB(inode);
> +	struct metapath mp_eof;
> +	unsigned int end_of_metadata = ip->i_height - 1;
> +	u64 factor = 1;
> +	int hgt = end_of_metadata;
> +	u64 holesz = 0, holestep;
> +	const __be64 *first, *end, *ptr;
> +	const struct buffer_head *bh;
> +	u64 isize = i_size_read(inode);
> +	int zeroptrs;
> +	bool done = false;
> +
> +	/* Get another metapath, to the very last byte */
> +	find_metapath(sdp, (isize - 1) >> inode->i_blkbits, &mp_eof,
> +		      ip->i_height);
> +	for (hgt = end_of_metadata; hgt >= 0 && !done; hgt--) {
> +		bh = mp->mp_bh[hgt];
> +		if (bh) {
> +			zeroptrs = 0;
> +			first = metapointer(hgt, mp);
> +			end = (const __be64 *)(bh->b_data + bh->b_size);
> +
> +			for (ptr = first; ptr < end; ptr++) {
> +				if (*ptr) {
> +					done = true;
> +					break;
> +				} else {
> +					zeroptrs++;
> +				}
> +			}
> +		} else {
> +			zeroptrs = sdp->sd_inptrs;
> +		}
> +		holestep = min(factor * zeroptrs,
> +			       isize - (lblock + (zeroptrs * holesz)));
> +		holesz += holestep;
> +		if (lblock + holesz >= isize)
> +			return holesz << inode->i_blkbits;
> +
> +		factor *= sdp->sd_inptrs;
> +		if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
> +			(mp->mp_list[hgt - 1])++;
> +	}
> +	return holesz << inode->i_blkbits;
> +}
> +
> +/**
> + * gfs2_get_iomap - Map blocks from an inode to disk blocks
> + * @mapping: The address space
> + * @pos: Starting position in bytes
> + * @length: Length to map, in bytes
> + * @iomap: The iomap structure
> + *
> + * Returns: errno
> + */
> +
> +int gfs2_get_iomap(struct inode *inode, loff_t pos, ssize_t length,
> +		   struct iomap *iomap)
This function should be merged with gfs2_block_map() I think, so that 
gfs2_block_map just becomes a wrapper (which will eventually go away) 
for this function. Otherwise we will have two parallel but slightly 
different implementations of block mapping to maintain.

[snip]
> diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> index e4da0ec..0d705ef 100644
> --- a/fs/gfs2/inode.c
> +++ b/fs/gfs2/inode.c
> @@ -17,7 +17,7 @@
>   #include <linux/posix_acl.h>
>   #include <linux/gfs2_ondisk.h>
>   #include <linux/crc32.h>
> -#include <linux/fiemap.h>
> +#include <linux/iomap.h>
>   #include <linux/security.h>
>   #include <asm/uaccess.h>
>   
> @@ -1990,28 +1990,65 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
>   	return 0;
>   }
>   
> +static int gfs2_iomap_fiemap_begin(struct inode *inode, loff_t offset,
> +				   loff_t length, unsigned flags,
> +				   struct iomap *iomap)
> +{
> +	struct gfs2_inode *ip = GFS2_I(inode);
> +	struct gfs2_holder *gh;
> +	int ret;
> +
> +	gh = kzalloc(sizeof(struct gfs2_holder), GFP_NOFS);
> +	if (!gh)
> +		return -ENOMEM;
> +
> +	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, gh);
> +	if (ret) {
> +		kfree(gh);
> +		return ret;
> +	}
> +	ret = gfs2_get_iomap(inode, offset, length, iomap);
> +	if (ret)
> +		gfs2_glock_dq_uninit(gh);
> +	return ret;
> +}
> +
> +static int gfs2_iomap_fiemap_end(struct inode *inode, loff_t offset,
> +				 loff_t length, ssize_t written,
> +				 unsigned flags, struct iomap *iomap)
> +{
> +	struct gfs2_inode *ip = GFS2_I(inode);
> +	struct gfs2_holder *gh;
> +
> +	gh = gfs2_glock_is_locked_by_me(ip->i_gl);
Is there a better way to pass the gh from the begin function to the end 
function? I'm sure that will work, but it is the kind of thing that 
might trip up the unwary in the future,

Otherwise I think it looks good,

Steve.




More information about the Cluster-devel mailing list