[Cluster-devel] [PATCH] [GFS2] bz288581 alternate gfs2_iget to avoid looking up inodes being freed
Steven Whitehouse
swhiteho at redhat.com
Wed Sep 19 12:22:50 UTC 2007
Hi,
Now in the -nmw git tree. Thanks,
Steve.
On Tue, 2007-09-18 at 13:33 -0500, Benjamin Marzinski wrote:
> There is a possible deadlock between two processes on the same node, where one
> process is deleting an inode, and another process is looking for allocated but
> unused inodes to delete in order to create more space.
>
> process A does an iput() on inode X, and it's i_count drops to 0. This causes
> iput_final() to be called, which puts an inode into state I_FREEING at
> generic_delete_inode(). There no point between when iput_final() is called, and
> when I_FREEING is set where GFS2 could acquire any glocks. Once I_FREEING is
> set, no other process on that node can successfully look up that inode until
> the delete finishes.
>
> process B locks the the resource group for the same inode in get_local_rgrp(),
> which is called by gfs2_inplace_reserve_i()
>
> process A tries to lock the resource group for the inode in
> gfs2_dinode_dealloc(), but it's already locked by process B
>
> process B waits in find_inode for the inode to have the I_FREEING state cleared.
>
> Deadlock.
>
> This patch solves the problem by adding an alternative to gfs2_iget(),
> gfs2_iget_skip(), that simply skips any inodes that are in the I_FREEING
> state.o The alternate test function is just like the original one, except that
> it fails if the inode is being freed, and sets a skipped flag. The alternate
> set function is just like the original, except that it fails if the skipped
> flag is set. Only try_rgrp_unlink() calls gfs2_iget_skip() instead of
> gfs2_iget().
>
> Signed-off-by: Benjamin E. Marzinski <bmarzins at redhat.com>
> plain text document attachment (upstream_288581.patch)
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/dir.c gfs2-2.6-nmw-bz288581/fs/gfs2/dir.c
> --- gfs2-2.6-nmw/fs/gfs2/dir.c 2007-09-14 14:00:39.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/dir.c 2007-09-18 11:42:04.000000000 -0500
> @@ -1502,7 +1502,7 @@ struct inode *gfs2_dir_search(struct ino
> inode = gfs2_inode_lookup(dir->i_sb,
> be16_to_cpu(dent->de_type),
> be64_to_cpu(dent->de_inum.no_addr),
> - be64_to_cpu(dent->de_inum.no_formal_ino));
> + be64_to_cpu(dent->de_inum.no_formal_ino), 0);
> brelse(bh);
> return inode;
> }
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/inode.c gfs2-2.6-nmw-bz288581/fs/gfs2/inode.c
> --- gfs2-2.6-nmw/fs/gfs2/inode.c 2007-09-14 14:00:39.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/inode.c 2007-09-18 11:42:04.000000000 -0500
> @@ -77,6 +77,49 @@ static struct inode *gfs2_iget(struct su
> return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
> }
>
> +struct gfs2_skip_data {
> + u64 no_addr;
> + int skipped;
> +};
> +
> +static int iget_skip_test(struct inode *inode, void *opaque)
> +{
> + struct gfs2_inode *ip = GFS2_I(inode);
> + struct gfs2_skip_data *data = opaque;
> +
> + if (ip->i_no_addr == data->no_addr && inode->i_private != NULL){
> + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){
> + data->skipped = 1;
> + return 0;
> + }
> + return 1;
> + }
> + return 0;
> +}
> +
> +static int iget_skip_set(struct inode *inode, void *opaque)
> +{
> + struct gfs2_inode *ip = GFS2_I(inode);
> + struct gfs2_skip_data *data = opaque;
> +
> + if (data->skipped)
> + return 1;
> + inode->i_ino = (unsigned long)(data->no_addr);
> + ip->i_no_addr = data->no_addr;
> + return 0;
> +}
> +
> +static struct inode *gfs2_iget_skip(struct super_block *sb,
> + u64 no_addr)
> +{
> + struct gfs2_skip_data data;
> + unsigned long hash = (unsigned long)no_addr;
> +
> + data.no_addr = no_addr;
> + data.skipped = 0;
> + return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data);
> +}
> +
> /**
> * GFS2 lookup code fills in vfs inode contents based on info obtained
> * from directory entry inside gfs2_inode_lookup(). This has caused issues
> @@ -112,6 +155,7 @@ void gfs2_set_iop(struct inode *inode)
> * @sb: The super block
> * @no_addr: The inode number
> * @type: The type of the inode
> + * @skip_freeing: set this not return an inode if it is currently being freed.
> *
> * Returns: A VFS inode, or an error
> */
> @@ -119,13 +163,19 @@ void gfs2_set_iop(struct inode *inode)
> struct inode *gfs2_inode_lookup(struct super_block *sb,
> unsigned int type,
> u64 no_addr,
> - u64 no_formal_ino)
> + u64 no_formal_ino, int skip_freeing)
> {
> - struct inode *inode = gfs2_iget(sb, no_addr);
> - struct gfs2_inode *ip = GFS2_I(inode);
> + struct inode *inode;
> + struct gfs2_inode *ip;
> struct gfs2_glock *io_gl;
> int error;
>
> + if (skip_freeing)
> + inode = gfs2_iget_skip(sb, no_addr);
> + else
> + inode = gfs2_iget(sb, no_addr);
> + ip = GFS2_I(inode);
> +
> if (!inode)
> return ERR_PTR(-ENOBUFS);
>
> @@ -949,7 +999,7 @@ struct inode *gfs2_createi(struct gfs2_h
>
> inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
> inum.no_addr,
> - inum.no_formal_ino);
> + inum.no_formal_ino, 0);
> if (IS_ERR(inode))
> goto fail_gunlock2;
>
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/inode.h gfs2-2.6-nmw-bz288581/fs/gfs2/inode.h
> --- gfs2-2.6-nmw/fs/gfs2/inode.h 2007-09-14 14:00:39.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/inode.h 2007-09-18 13:03:27.000000000 -0500
> @@ -49,7 +49,8 @@ static inline void gfs2_inum_out(const s
> void gfs2_inode_attr_in(struct gfs2_inode *ip);
> void gfs2_set_iop(struct inode *inode);
> struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
> - u64 no_addr, u64 no_formal_ino);
> + u64 no_addr, u64 no_formal_ino,
> + int skip_freeing);
> struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
>
> int gfs2_inode_refresh(struct gfs2_inode *ip);
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/ops_export.c gfs2-2.6-nmw-bz288581/fs/gfs2/ops_export.c
> --- gfs2-2.6-nmw/fs/gfs2/ops_export.c 2007-09-14 14:00:39.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/ops_export.c 2007-09-18 11:42:04.000000000 -0500
> @@ -237,7 +237,7 @@ static struct dentry *gfs2_get_dentry(st
>
> inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
> inum->no_addr,
> - 0);
> + 0, 0);
> if (!inode)
> goto fail;
> if (IS_ERR(inode)) {
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/ops_fstype.c gfs2-2.6-nmw-bz288581/fs/gfs2/ops_fstype.c
> --- gfs2-2.6-nmw/fs/gfs2/ops_fstype.c 2007-09-18 11:24:03.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/ops_fstype.c 2007-09-18 11:42:04.000000000 -0500
> @@ -230,7 +230,7 @@ fail:
> static inline struct inode *gfs2_lookup_root(struct super_block *sb,
> u64 no_addr)
> {
> - return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
> + return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
> }
>
> static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/rgrp.c gfs2-2.6-nmw-bz288581/fs/gfs2/rgrp.c
> --- gfs2-2.6-nmw/fs/gfs2/rgrp.c 2007-09-14 14:00:39.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/rgrp.c 2007-09-18 11:42:04.000000000 -0500
> @@ -884,7 +884,7 @@ static struct inode *try_rgrp_unlink(str
> continue;
> *last_unlinked = no_addr;
> inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
> - no_addr, -1);
> + no_addr, -1, 1);
> if (!IS_ERR(inode))
> return inode;
> }
More information about the Cluster-devel
mailing list