[Cluster-devel] Re: [PATCH 1/2] NLM failover unlock commands

J. Bruce Fields bfields at fieldses.org
Mon Jan 14 23:07:42 UTC 2008


On Sat, Jan 12, 2008 at 02:03:55AM -0500, Wendy Cheng wrote:
> This is a combined patch that has:
>
> * changes made by Christoph Hellwig
> * code segment that handles f_locks so we would not walk inode->i_flock  
> list twice.
>
> If agreed, please re-add your "ack-by" and "signed-off" lines  
> respectively. Thanks ...
>
> -- Wendy

Comments below--

> Two new NFSD procfs files are added:
>   /proc/fs/nfsd/unlock_ip
>   /proc/fs/nfsd/unlock_filesystem
> 
> They are intended to allow admin or user mode script to release NLM locks
> based on either a path name or a server in-bound ip address (ipv4 for now)
> as;
> 
> shell> echo 10.1.1.2 > /proc/fs/nfsd/unlock_ip
> shell> echo /mnt/sfs1 > /proc/fs/nfsd/unlock_filesystem
> 
> Signed-off-by: S. Wendy Cheng <wcheng at redhat.com>
> Signed-off-by: Lon Hohberger  <lhh at redhat.com>
> 
>  fs/lockd/svcsubs.c          |   68 ++++++++++++++++++++++++++++++++++++++------
>  fs/nfsd/nfsctl.c            |   62 ++++++++++++++++++++++++++++++++++++++++
>  include/linux/lockd/lockd.h |    7 ++++
>  3 files changed, 129 insertions(+), 8 deletions(-)
> 
> --- linux-o/fs/nfsd/nfsctl.c	2008-01-04 10:01:08.000000000 -0500
> +++ linux/fs/nfsd/nfsctl.c	2008-01-11 19:08:02.000000000 -0500
> @@ -22,6 +22,7 @@
>  #include <linux/seq_file.h>
>  #include <linux/pagemap.h>
>  #include <linux/init.h>
> +#include <linux/inet.h>
>  #include <linux/string.h>
>  #include <linux/smp_lock.h>
>  #include <linux/ctype.h>
> @@ -35,6 +36,7 @@
>  #include <linux/nfsd/cache.h>
>  #include <linux/nfsd/xdr.h>
>  #include <linux/nfsd/syscall.h>
> +#include <linux/lockd/lockd.h>
>  
>  #include <asm/uaccess.h>
>  
> @@ -52,6 +54,8 @@ enum {
>  	NFSD_Getfs,
>  	NFSD_List,
>  	NFSD_Fh,
> +	NFSD_FO_UnlockIP,
> +	NFSD_FO_UnlockFS,
>  	NFSD_Threads,
>  	NFSD_Pool_Threads,
>  	NFSD_Versions,
> @@ -88,6 +92,9 @@ static ssize_t write_leasetime(struct fi
>  static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
>  #endif
>  
> +static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size);
> +static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size);
> +
>  static ssize_t (*write_op[])(struct file *, char *, size_t) = {
>  	[NFSD_Svc] = write_svc,
>  	[NFSD_Add] = write_add,
> @@ -97,6 +104,8 @@ static ssize_t (*write_op[])(struct file
>  	[NFSD_Getfd] = write_getfd,
>  	[NFSD_Getfs] = write_getfs,
>  	[NFSD_Fh] = write_filehandle,
> +	[NFSD_FO_UnlockIP] = failover_unlock_ip,
> +	[NFSD_FO_UnlockFS] = failover_unlock_fs,
>  	[NFSD_Threads] = write_threads,
>  	[NFSD_Pool_Threads] = write_pool_threads,
>  	[NFSD_Versions] = write_versions,
> @@ -288,6 +297,55 @@ static ssize_t write_getfd(struct file *
>  	return err;
>  }
>  
> +static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
> +{
> +	__be32 server_ip;
> +	char *fo_path;
> +	char *mesg;
> +
> +	/* sanity check */
> +	if (size <= 0)
> +		return -EINVAL;

Not only is size never negative, it's actually an unsigned type here, so
this is a no-op.

> +
> +	if (buf[size-1] == '\n')
> +		buf[size-1] = 0;

The other write methods in this file actually just do

	if (buf[size-1] != '\n')
		return -EINVAL;

I don't know why.  But absent some reason, I'd rather these two new
files behaved the same as existing ones.

> +
> +	fo_path = mesg = buf;
> +	if (qword_get(&mesg, fo_path, size) < 0)
> +		return -EINVAL;

"mesg" is unneeded here, right?  You can just do:

	fo_path = buf;
	if (qword_get(&buf, buf, size) < 0)

> +
> +	server_ip = in_aton(fo_path);

It'd be nice if we could sanity-check this.  (Is there code already in
the kernel someplace to do this?)

And, this isn't your problem for now, but eventually I guess this will
have to accept an ivp6 address as well?

> +	return nlmsvc_failover_ip(server_ip);
> +}
> +
> +static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size)
> +{
> +	struct nameidata nd;
> +	char *fo_path;
> +	char *mesg;
> +	int error;
> +
> +	/* sanity check */
> +	if (size <= 0)
> +		return -EINVAL;
> +
> +	if (buf[size-1] == '\n')
> +		buf[size-1] = 0;
> +
> +	fo_path = mesg = buf;
> +	if (qword_get(&mesg, fo_path, size) < 0)
> +		return -EINVAL;

Same comments as above.

> +
> +	error = path_lookup(fo_path, 0, &nd);
> +	if (error)
> +		return error;
> +
> +	error = nlmsvc_failover_path(&nd);
> +
> +	path_release(&nd);
> +	return error;
> +}
> +
>  static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
>  {
>  	/* request is:
> @@ -646,6 +704,10 @@ static int nfsd_fill_super(struct super_
>  		[NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
>  		[NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
>  		[NFSD_List] = {"exports", &exports_operations, S_IRUGO},
> +		[NFSD_FO_UnlockIP] = {"unlock_ip",
> +					&transaction_ops, S_IWUSR|S_IRUSR},
> +		[NFSD_FO_UnlockFS] = {"unlock_filesystem",
> +					&transaction_ops, S_IWUSR|S_IRUSR},
>  		[NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
>  		[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
>  		[NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
> --- linux-o/fs/lockd/svcsubs.c	2008-01-04 10:01:08.000000000 -0500
> +++ linux/fs/lockd/svcsubs.c	2008-01-11 19:08:28.000000000 -0500
> @@ -18,6 +18,8 @@
>  #include <linux/lockd/lockd.h>
>  #include <linux/lockd/share.h>
>  #include <linux/lockd/sm_inter.h>
> +#include <linux/module.h>
> +#include <linux/mount.h>
>  
>  #define NLMDBG_FACILITY		NLMDBG_SVCSUBS
>  
> @@ -87,7 +89,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, 
>  	unsigned int	hash;
>  	__be32		nfserr;
>  
> -	nlm_debug_print_fh("nlm_file_lookup", f);
> +	nlm_debug_print_fh("nlm_lookup_file", f);
>  
>  	hash = file_hash(f);
>  
> @@ -123,6 +125,9 @@ nlm_lookup_file(struct svc_rqst *rqstp, 
>  
>  	hlist_add_head(&file->f_list, &nlm_files[hash]);
>  
> +	/* fill in f_iaddr for nlm lock failover */
> +	file->f_iaddr = rqstp->rq_daddr;
> +
>  found:
>  	dprintk("lockd: found file %p (count %d)\n", file, file->f_count);
>  	*result = file;
> @@ -194,6 +199,12 @@ again:
>  	return 0;
>  }
>  
> +static int
> +nlmsvc_always_match(struct nlm_host *dummy1, struct nlm_host *dummy2)
> +{
> +	return 1;
> +}
> +
>  /*
>   * Inspect a single file
>   */
> @@ -230,27 +241,37 @@ nlm_file_inuse(struct nlm_file *file)
>   * Loop over all files in the file table.
>   */
>  static int
> -nlm_traverse_files(struct nlm_host *host, nlm_host_match_fn_t match)
> +nlm_traverse_files(void *data, nlm_host_match_fn_t match,
> +		int (*failover)(void *data, struct nlm_file *file))
>  {
>  	struct hlist_node *pos, *next;
>  	struct nlm_file	*file;
> -	int i, ret = 0;
> +	int i, ret = 0, inspect_file;
>  
>  	mutex_lock(&nlm_file_mutex);
>  	for (i = 0; i < FILE_NRHASH; i++) {
>  		hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) {
>  			file->f_count++;
>  			mutex_unlock(&nlm_file_mutex);
> +			inspect_file = 1;
>  
>  			/* Traverse locks, blocks and shares of this file
>  			 * and update file->f_locks count */
> -			if (nlm_inspect_file(host, file, match))
> +
> +			if (unlikely(failover)) {
> +				if (!failover(data, file)) {
> +					inspect_file = 0;
> +					file->f_locks = nlm_file_inuse(file);
> +				}
> +			}
> +
> +			if (inspect_file && nlm_inspect_file(data, file, match))
>  				ret = 1;

This seems quite complicated!  I don't have an alternative suggestion,
though.

--b.


>  
>  			mutex_lock(&nlm_file_mutex);
>  			file->f_count--;
>  			/* No more references to this file. Let go of it. */
> -			if (list_empty(&file->f_blocks) && !file->f_locks
> +			if (!file->f_locks && list_empty(&file->f_blocks)
>  			 && !file->f_shares && !file->f_count) {
>  				hlist_del(&file->f_list);
>  				nlmsvc_ops->fclose(file->f_file);
> @@ -337,7 +358,7 @@ void
>  nlmsvc_mark_resources(void)
>  {
>  	dprintk("lockd: nlmsvc_mark_resources\n");
> -	nlm_traverse_files(NULL, nlmsvc_mark_host);
> +	nlm_traverse_files(NULL, nlmsvc_mark_host, NULL);
>  }
>  
>  /*
> @@ -348,7 +369,7 @@ nlmsvc_free_host_resources(struct nlm_ho
>  {
>  	dprintk("lockd: nlmsvc_free_host_resources\n");
>  
> -	if (nlm_traverse_files(host, nlmsvc_same_host)) {
> +	if (nlm_traverse_files(host, nlmsvc_same_host, NULL)) {
>  		printk(KERN_WARNING
>  			"lockd: couldn't remove all locks held by %s\n",
>  			host->h_name);
> @@ -368,5 +389,36 @@ nlmsvc_invalidate_all(void)
>  	 * turn, which is about as inefficient as it gets.
>  	 * Now we just do it once in nlm_traverse_files.
>  	 */
> -	nlm_traverse_files(NULL, nlmsvc_is_client);
> +	nlm_traverse_files(NULL, nlmsvc_is_client, NULL);
> +}
> +
> +static int
> +nlmsvc_failover_file_ok_path(void *datap, struct nlm_file *file)
> +{
> +	struct nameidata *nd = datap;
> +	return nd->mnt == file->f_file->f_path.mnt;
> +}
> +
> +int
> +nlmsvc_failover_path(struct nameidata *nd)
> +{
> +	return nlm_traverse_files(nd, nlmsvc_always_match,
> +			nlmsvc_failover_file_ok_path);
> +}
> +EXPORT_SYMBOL_GPL(nlmsvc_failover_path);
> +
> +static int
> +nlmsvc_failover_file_ok_ip(void *datap, struct nlm_file *file)
> +{
> +	__be32 *server_addr = datap;
> +
> +	return file->f_iaddr.addr.s_addr == *server_addr;
> +}
> +
> +int
> +nlmsvc_failover_ip(__be32 server_addr)
> +{
> +	return nlm_traverse_files(&server_addr, nlmsvc_always_match,
> +			nlmsvc_failover_file_ok_ip);
>  }
> +EXPORT_SYMBOL_GPL(nlmsvc_failover_ip);
> --- linux-o/include/linux/lockd/lockd.h	2008-01-04 10:01:08.000000000 -0500
> +++ linux/include/linux/lockd/lockd.h	2008-01-11 18:24:45.000000000 -0500
> @@ -113,6 +113,7 @@ struct nlm_file {
>  	unsigned int		f_locks;	/* guesstimate # of locks */
>  	unsigned int		f_count;	/* reference count */
>  	struct mutex		f_mutex;	/* avoid concurrent access */
> +	union svc_addr_u	f_iaddr;	/* server ip for failover */
>  };
>  
>  /*
> @@ -214,6 +215,12 @@ void		  nlmsvc_mark_resources(void);
>  void		  nlmsvc_free_host_resources(struct nlm_host *);
>  void		  nlmsvc_invalidate_all(void);
>  
> +/*
> + * Cluster failover support
> + */
> +int           nlmsvc_failover_path(struct nameidata *nd);
> +int           nlmsvc_failover_ip(__be32 server_addr);
> +
>  static __inline__ struct inode *
>  nlmsvc_file_inode(struct nlm_file *file)
>  {




More information about the Cluster-devel mailing list