[Cluster-devel] [PATCH] [GFS2] bz288581 alternate gfs2_iget to avoid looking up inodes being freed

Steven Whitehouse swhiteho at redhat.com
Wed Sep 19 12:22:50 UTC 2007


Hi,

Now in the -nmw git tree. Thanks,

Steve.

On Tue, 2007-09-18 at 13:33 -0500, Benjamin Marzinski wrote:
> There is a possible deadlock between two processes on the same node, where one
> process is deleting an inode, and another process is looking for allocated but
> unused inodes to delete in order to create more space.
> 
> process A does an iput() on inode X, and it's i_count drops to 0. This causes
> iput_final() to be called, which puts an inode into state I_FREEING at
> generic_delete_inode(). There no point between when iput_final() is called, and
> when I_FREEING is set where GFS2 could acquire any glocks. Once I_FREEING is
> set, no other process on that node can successfully look up that inode until
> the delete finishes.
> 
> process B locks the the resource group for the same inode in get_local_rgrp(),
> which is called by gfs2_inplace_reserve_i()
> 
> process A tries to lock the resource group for the inode in
> gfs2_dinode_dealloc(), but it's already locked by process B
> 
> process B waits in find_inode for the inode to have the I_FREEING state cleared.
> 
> Deadlock.
> 
> This patch solves the problem by adding an alternative to gfs2_iget(),
> gfs2_iget_skip(), that simply skips any inodes that are in the I_FREEING
> state.o The alternate test function is just like the original one, except that
> it fails if the inode is being freed, and sets a skipped flag. The alternate
> set function is just like the original, except that it fails if the skipped
> flag is set. Only try_rgrp_unlink() calls gfs2_iget_skip() instead of
> gfs2_iget().
> 
> Signed-off-by: Benjamin E. Marzinski <bmarzins at redhat.com>
> plain text document attachment (upstream_288581.patch)
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/dir.c gfs2-2.6-nmw-bz288581/fs/gfs2/dir.c
> --- gfs2-2.6-nmw/fs/gfs2/dir.c	2007-09-14 14:00:39.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/dir.c	2007-09-18 11:42:04.000000000 -0500
> @@ -1502,7 +1502,7 @@ struct inode *gfs2_dir_search(struct ino
>  		inode = gfs2_inode_lookup(dir->i_sb, 
>  				be16_to_cpu(dent->de_type),
>  				be64_to_cpu(dent->de_inum.no_addr),
> -				be64_to_cpu(dent->de_inum.no_formal_ino));
> +				be64_to_cpu(dent->de_inum.no_formal_ino), 0);
>  		brelse(bh);
>  		return inode;
>  	}
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/inode.c gfs2-2.6-nmw-bz288581/fs/gfs2/inode.c
> --- gfs2-2.6-nmw/fs/gfs2/inode.c	2007-09-14 14:00:39.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/inode.c	2007-09-18 11:42:04.000000000 -0500
> @@ -77,6 +77,49 @@ static struct inode *gfs2_iget(struct su
>  	return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
>  }
>  
> +struct gfs2_skip_data {
> +	u64	no_addr;
> +	int	skipped;
> +};
> +
> +static int iget_skip_test(struct inode *inode, void *opaque)
> +{
> +	struct gfs2_inode *ip = GFS2_I(inode);
> +	struct gfs2_skip_data *data = opaque;
> +
> +	if (ip->i_no_addr == data->no_addr && inode->i_private != NULL){
> +		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){
> +			data->skipped = 1;
> +			return 0;
> +		}
> +		return 1;
> +	}
> +	return 0;
> +}
> +
> +static int iget_skip_set(struct inode *inode, void *opaque)
> +{
> +	struct gfs2_inode *ip = GFS2_I(inode);
> +	struct gfs2_skip_data *data = opaque;
> +
> +	if (data->skipped)
> +		return 1;
> +	inode->i_ino = (unsigned long)(data->no_addr);
> +	ip->i_no_addr = data->no_addr;
> +	return 0;
> +}
> +
> +static struct inode *gfs2_iget_skip(struct super_block *sb,
> +				    u64 no_addr)
> +{
> +	struct gfs2_skip_data data;
> +	unsigned long hash = (unsigned long)no_addr;
> +
> +	data.no_addr = no_addr;
> +	data.skipped = 0;
> +	return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data);
> +}
> +
>  /**
>   * GFS2 lookup code fills in vfs inode contents based on info obtained
>   * from directory entry inside gfs2_inode_lookup(). This has caused issues
> @@ -112,6 +155,7 @@ void gfs2_set_iop(struct inode *inode)
>   * @sb: The super block
>   * @no_addr: The inode number
>   * @type: The type of the inode
> + * @skip_freeing: set this not return an inode if it is currently being freed.
>   *
>   * Returns: A VFS inode, or an error
>   */
> @@ -119,13 +163,19 @@ void gfs2_set_iop(struct inode *inode)
>  struct inode *gfs2_inode_lookup(struct super_block *sb, 
>  				unsigned int type,
>  				u64 no_addr,
> -				u64 no_formal_ino)
> +				u64 no_formal_ino, int skip_freeing)
>  {
> -	struct inode *inode = gfs2_iget(sb, no_addr);
> -	struct gfs2_inode *ip = GFS2_I(inode);
> +	struct inode *inode;
> +	struct gfs2_inode *ip;
>  	struct gfs2_glock *io_gl;
>  	int error;
>  
> +	if (skip_freeing)
> +		inode = gfs2_iget_skip(sb, no_addr);
> +	else
> +		inode = gfs2_iget(sb, no_addr);
> +	ip = GFS2_I(inode);
> +
>  	if (!inode)
>  		return ERR_PTR(-ENOBUFS);
>  
> @@ -949,7 +999,7 @@ struct inode *gfs2_createi(struct gfs2_h
>  
>  	inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
>  					inum.no_addr,
> -					inum.no_formal_ino);
> +					inum.no_formal_ino, 0);
>  	if (IS_ERR(inode))
>  		goto fail_gunlock2;
>  
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/inode.h gfs2-2.6-nmw-bz288581/fs/gfs2/inode.h
> --- gfs2-2.6-nmw/fs/gfs2/inode.h	2007-09-14 14:00:39.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/inode.h	2007-09-18 13:03:27.000000000 -0500
> @@ -49,7 +49,8 @@ static inline void gfs2_inum_out(const s
>  void gfs2_inode_attr_in(struct gfs2_inode *ip);
>  void gfs2_set_iop(struct inode *inode);
>  struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
> -				u64 no_addr, u64 no_formal_ino);
> +				u64 no_addr, u64 no_formal_ino,
> +				int skip_freeing);
>  struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
>  
>  int gfs2_inode_refresh(struct gfs2_inode *ip);
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/ops_export.c gfs2-2.6-nmw-bz288581/fs/gfs2/ops_export.c
> --- gfs2-2.6-nmw/fs/gfs2/ops_export.c	2007-09-14 14:00:39.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/ops_export.c	2007-09-18 11:42:04.000000000 -0500
> @@ -237,7 +237,7 @@ static struct dentry *gfs2_get_dentry(st
>  
>  	inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
>  					inum->no_addr,
> -					0);
> +					0, 0);
>  	if (!inode)
>  		goto fail;
>  	if (IS_ERR(inode)) {
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/ops_fstype.c gfs2-2.6-nmw-bz288581/fs/gfs2/ops_fstype.c
> --- gfs2-2.6-nmw/fs/gfs2/ops_fstype.c	2007-09-18 11:24:03.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/ops_fstype.c	2007-09-18 11:42:04.000000000 -0500
> @@ -230,7 +230,7 @@ fail:
>  static inline struct inode *gfs2_lookup_root(struct super_block *sb,
>  					     u64 no_addr)
>  {
> -	return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
> +	return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
>  }
>  
>  static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/rgrp.c gfs2-2.6-nmw-bz288581/fs/gfs2/rgrp.c
> --- gfs2-2.6-nmw/fs/gfs2/rgrp.c	2007-09-14 14:00:39.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/rgrp.c	2007-09-18 11:42:04.000000000 -0500
> @@ -884,7 +884,7 @@ static struct inode *try_rgrp_unlink(str
>  			continue;
>  		*last_unlinked = no_addr;
>  		inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
> -					  no_addr, -1);
> +					  no_addr, -1, 1);
>  		if (!IS_ERR(inode))
>  			return inode;
>  	}




More information about the Cluster-devel mailing list