[Cluster-devel] Re: [PATCH] dlm: timeout fixes

Steven Whitehouse swhiteho at redhat.com
Tue May 29 15:34:54 UTC 2007


Hi,

These four are now all in the -nmw git tree. Thanks,

Steve.

On Tue, 2007-05-29 at 08:44 -0500, David Teigland wrote:
> Various fixes related to the new timeout feature:
> - add_timeout() missed setting TIMEWARN flag on lkb's when the
>   TIMEOUT flag was already set
> - clear_proc_locks should remove a dead process's locks from the
>   timeout list
> - the end-of-life calculation for user locks needs to consider that
>   ETIMEDOUT is equivalent to -DLM_ECANCEL
> - make initial default timewarn_cs config value visible in configfs
> - change bit position of TIMEOUT_CANCEL flag so it's not copied to
>   a remote master node
> - set timestamp on remote lkb's so a lock dump will display the time
>   they've been waiting
> 
> Signed-off-by: David Teigland <teigland at redhat.com>
> 
> Index: linux-quilt/fs/dlm/lock.c
> ===================================================================
> --- linux-quilt.orig/fs/dlm/lock.c	2007-05-25 14:29:56.000000000 -0500
> +++ linux-quilt/fs/dlm/lock.c	2007-05-25 14:40:59.000000000 -0500
> @@ -1010,17 +1010,18 @@
>  {
>  	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
>  
> -	if (is_master_copy(lkb))
> +	if (is_master_copy(lkb)) {
> +		lkb->lkb_timestamp = jiffies;
>  		return;
> -
> -	if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
> -		goto add_it;
> +	}
>  
>  	if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
>  	    !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
>  		lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
>  		goto add_it;
>  	}
> +	if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
> +		goto add_it;
>  	return;
>  
>   add_it:
> @@ -3510,8 +3511,7 @@
>  	case -DLM_ECANCEL:
>  		receive_flags_reply(lkb, ms);
>  		revert_lock_pc(r, lkb);
> -		if (ms->m_result)
> -			queue_cast(r, lkb, -DLM_ECANCEL);
> +		queue_cast(r, lkb, -DLM_ECANCEL);
>  		break;
>  	case 0:
>  		break;
> @@ -4534,6 +4534,7 @@
>  		lkb = del_proc_lock(ls, proc);
>  		if (!lkb)
>  			break;
> +		del_timeout(lkb);
>  		if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
>  			orphan_proc_lock(ls, lkb);
>  		else
> Index: linux-quilt/fs/dlm/user.c
> ===================================================================
> --- linux-quilt.orig/fs/dlm/user.c	2007-05-25 14:29:51.000000000 -0500
> +++ linux-quilt/fs/dlm/user.c	2007-05-25 14:40:59.000000000 -0500
> @@ -138,6 +138,35 @@
>  }
>  #endif
>  
> +/* Figure out if this lock is at the end of its life and no longer
> +   available for the application to use.  The lkb still exists until
> +   the final ast is read.  A lock becomes EOL in three situations:
> +     1. a noqueue request fails with EAGAIN
> +     2. an unlock completes with EUNLOCK
> +     3. a cancel of a waiting request completes with ECANCEL/EDEADLK
> +   An EOL lock needs to be removed from the process's list of locks.
> +   And we can't allow any new operation on an EOL lock.  This is
> +   not related to the lifetime of the lkb struct which is managed
> +   entirely by refcount. */
> +
> +static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
> +{
> +	switch (sb_status) {
> +	case -DLM_EUNLOCK:
> +		return 1;
> +	case -DLM_ECANCEL:
> +	case -ETIMEDOUT:
> +		if (lkb->lkb_grmode == DLM_LOCK_IV)
> +			return 1;
> +		break;
> +	case -EAGAIN:
> +		if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
> +			return 1;
> +		break;
> +	}
> +	return 0;
> +}
> +
>  /* we could possibly check if the cancel of an orphan has resulted in the lkb
>     being removed and then remove that lkb from the orphans list and free it */
>  
> @@ -184,25 +213,7 @@
>  		log_debug(ls, "ast overlap %x status %x %x",
>  			  lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
>  
> -	/* Figure out if this lock is at the end of its life and no longer
> -	   available for the application to use.  The lkb still exists until
> -	   the final ast is read.  A lock becomes EOL in three situations:
> -	     1. a noqueue request fails with EAGAIN
> -	     2. an unlock completes with EUNLOCK
> -	     3. a cancel of a waiting request completes with ECANCEL
> -	   An EOL lock needs to be removed from the process's list of locks.
> -	   And we can't allow any new operation on an EOL lock.  This is
> -	   not related to the lifetime of the lkb struct which is managed
> -	   entirely by refcount. */
> -
> -	if (type == AST_COMP &&
> -	    lkb->lkb_grmode == DLM_LOCK_IV &&
> -	    ua->lksb.sb_status == -EAGAIN)
> -		eol = 1;
> -	else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
> -	    (ua->lksb.sb_status == -DLM_ECANCEL &&
> -	     lkb->lkb_grmode == DLM_LOCK_IV))
> -		eol = 1;
> +	eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
>  	if (eol) {
>  		lkb->lkb_ast_type &= ~AST_BAST;
>  		lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
> Index: linux-quilt/fs/dlm/config.c
> ===================================================================
> --- linux-quilt.orig/fs/dlm/config.c	2007-05-25 14:29:51.000000000 -0500
> +++ linux-quilt/fs/dlm/config.c	2007-05-25 14:29:56.000000000 -0500
> @@ -433,6 +433,7 @@
>  	cl->cl_toss_secs = dlm_config.ci_toss_secs;
>  	cl->cl_scan_secs = dlm_config.ci_scan_secs;
>  	cl->cl_log_debug = dlm_config.ci_log_debug;
> +	cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
>  
>  	space_list = &sps->ss_group;
>  	comm_list = &cms->cs_group;
> Index: linux-quilt/fs/dlm/netlink.c
> ===================================================================
> --- linux-quilt.orig/fs/dlm/netlink.c	2007-05-25 14:29:51.000000000 -0500
> +++ linux-quilt/fs/dlm/netlink.c	2007-05-25 14:29:56.000000000 -0500
> @@ -133,8 +133,6 @@
>  	size_t size;
>  	int rv;
>  
> -	log_debug(lkb->lkb_resource->res_ls, "timeout_warn %x", lkb->lkb_id);
> -
>  	size = nla_total_size(sizeof(struct dlm_lock_data)) +
>  	       nla_total_size(0); /* why this? */
>  
> Index: linux-quilt/fs/dlm/dlm_internal.h
> ===================================================================
> --- linux-quilt.orig/fs/dlm/dlm_internal.h	2007-05-25 14:29:56.000000000 -0500
> +++ linux-quilt/fs/dlm/dlm_internal.h	2007-05-25 14:40:59.000000000 -0500
> @@ -215,9 +215,9 @@
>  #define DLM_IFL_OVERLAP_CANCEL  0x00100000
>  #define DLM_IFL_ENDOFLIFE	0x00200000
>  #define DLM_IFL_WATCH_TIMEWARN	0x00400000
> +#define DLM_IFL_TIMEOUT_CANCEL	0x00800000
>  #define DLM_IFL_USER		0x00000001
>  #define DLM_IFL_ORPHAN		0x00000002
> -#define DLM_IFL_TIMEOUT_CANCEL	0x00000004
>  
>  struct dlm_lkb {
>  	struct dlm_rsb		*lkb_resource;	/* the rsb */




More information about the Cluster-devel mailing list