[Cluster-devel] [GFS2 Patch][TRY 2] GFS2: Extend the life of the reservations structure

Fri May 11 09:36:51 UTC 2012

Hi,

On Wed, 2012-05-09 at 12:22 -0400, Bob Peterson wrote:
> Hi,
> 
> This is a second attempt at this patch. Explanation is below.
> 
> The idea here is to extend the lifespan of the block reservations
> structure. This has three benefits: First, for performance, GFS2
> spends less time allocating and freeing memory for this purpose
> and can re-use the structure already attached to the inode.
> Second, it allows us to re-combine the quota structure in a
> to save the time we spend allocating and freeing that, in a future
> patch. Third, it sets us up for the forthcoming multi-block
> reservations patch.
> 
> There was an issue with the predecessor patch not doing any locking
> during the allocating and freeing of the reservations for an inode.
> This patch re-purposes the gfs2 i_rw_mutex (to save memory) to
> protect the allocations and deletes. I couldn't see any places where
> this could conflict or deadlock with existing uses of the mutex.
> 
> Regards,
> 
> Bob Peterson
> Red Hat File Systems
> 
> Signed-off-by: Bob Peterson <rpeterso at redhat.com> 
> ---
> Author: Bob Peterson <rpeterso at redhat.com>
> Date:   Thu Apr 12 11:37:24 2012 -0500
> 
> GFS2: Extend the life of the reservations structure
> 
> This patch lengthens the lifespan of the reservations structure for
> inodes. Before, they were allocated and deallocated for every write
> operation. With this patch, they are allocated when the first write
> occurs, and deallocated when the last process closes the file.
> It's more efficient to do it this way because it saves GFS2 a lot of
> unnecessary allocates and frees. It also gives us more flexibility
> for the future: (1) we can now fold the qadata structure back into
> the structure and save those alloc/frees, (2) we can use this for
> multi-block reservations.
> 
> diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
> index e80a464..aba77b5 100644
> --- a/fs/gfs2/aops.c
> +++ b/fs/gfs2/aops.c
> @@ -878,7 +878,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
>  	brelse(dibh);
>  failed:
>  	gfs2_trans_end(sdp);
> -	if (ip->i_res)
> +	if (gfs2_mb_reserved(ip))
>  		gfs2_inplace_release(ip);
>  	if (qa) {
>  		gfs2_quota_unlock(ip);
> diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
> index 31b199f..3790617 100644
> --- a/fs/gfs2/file.c
> +++ b/fs/gfs2/file.c
> @@ -376,6 +376,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	 */
>  	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
>  
> +	ret = gfs2_rs_alloc(ip);
> +	if (ret)
> +		return ret;
> +
>  	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
>  	ret = gfs2_glock_nq(&gh);
>  	if (ret)
> @@ -569,10 +573,15 @@ static int gfs2_release(struct inode *inode, struct file *file)
>  {
>  	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
>  	struct gfs2_file *fp;
> +	struct gfs2_inode *ip = GFS2_I(inode);
>  
>  	fp = file->private_data;
>  	file->private_data = NULL;
>  
> +	if ((file->f_mode & FMODE_WRITE) && ip->i_res &&
> +	    (atomic_read(&inode->i_writecount) == 1))
> +		gfs2_rs_delete(ip);
> +
>  	if (gfs2_assert_warn(sdp, fp))
>  		return -EIO;
>  
> @@ -653,12 +662,16 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
>  				   unsigned long nr_segs, loff_t pos)
>  {
>  	struct file *file = iocb->ki_filp;
> +	struct dentry *dentry = file->f_dentry;
> +	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
> +	int ret;
> +
> +	ret = gfs2_rs_alloc(ip);
> +	if (ret)
> +		return ret;
>  
>  	if (file->f_flags & O_APPEND) {
> -		struct dentry *dentry = file->f_dentry;
> -		struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
>  		struct gfs2_holder gh;
> -		int ret;
>  
>  		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
>  		if (ret)
> @@ -774,6 +787,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
>  	if (bytes == 0)
>  		bytes = sdp->sd_sb.sb_bsize;
>  
> +	error = gfs2_rs_alloc(ip);
> +	if (error)
> +		return error;
> +
>  	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
>  	error = gfs2_glock_nq(&ip->i_gh);
>  	if (unlikely(error))
> diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> index a9ba244..6a46417 100644
> --- a/fs/gfs2/inode.c
> +++ b/fs/gfs2/inode.c
> @@ -667,6 +667,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
>  	if (!name->len || name->len > GFS2_FNAMESIZE)
>  		return -ENAMETOOLONG;
>  
> +	error = gfs2_rs_alloc(dip);
> +	if (error)
> +		return error;
> +
>  	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
>  	if (error)
>  		goto fail;
> @@ -704,6 +708,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
>  	if (error)
>  		goto fail_gunlock2;
>  
> +	/* the new inode needs a reservation so it can allocate xattrs. */
> +	error = gfs2_rs_alloc(GFS2_I(inode));
> +	if (error)
> +		goto fail_gunlock2;
> +
>  	error = gfs2_acl_create(dip, inode);
>  	if (error)
>  		goto fail_gunlock2;
> @@ -722,7 +731,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
>  	gfs2_trans_end(sdp);
>  	/* Check if we reserved space in the rgrp. Function link_dinode may
>  	   not, depending on whether alloc is required. */
> -	if (dip->i_res)
> +	if (gfs2_mb_reserved(dip))
>  		gfs2_inplace_release(dip);
>  	gfs2_quota_unlock(dip);
>  	gfs2_qadata_put(dip);
> @@ -740,6 +749,7 @@ fail_gunlock:
>  		iput(inode);
>  	}
>  fail:
> +	gfs2_rs_delete(dip);

Should we really be dropping the ip->i_res here I wonder? I'm not sure
that we want to forget this information just because (for example)
someone has tried to create a new inode and it has been failed for some
reason or other.

Otherwise I think this looks good,

Steve.