[Cluster-devel] [PATCH 02/18] [try #2] DLM: Eliminate CF_WRITE_PENDING flag

tsutomu.owa at toshiba.co.jp tsutomu.owa at toshiba.co.jp
Tue Sep 12 08:55:14 UTC 2017


---
This version implements Steve Whitehouse's suggestion to put
cond_resched() after the queue_work in function send_to_sock.
This was just a thinko; it does make more sense to do it afterward.

Before this patch the CF_WRITE_PENDING flag was used to indicate
when writes to the socket were pending. This caused race conditions
whereby one process set the bit and another cleared it. Instead,
we just check to see if there's anything there to be sent. This
makes the code more intuitive and bullet-proof.

Signed-off-by: Bob Peterson <rpeterso at redhat.com>
Reviewed-by: Tadashi Miyauchi <miyauchi at toshiba-tops.co.jp>
---
 fs/dlm/lowcomms.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 41bf93a..4a34254 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -106,7 +106,6 @@ struct connection {
 	struct mutex sock_mutex;
 	unsigned long flags;
 #define CF_READ_PENDING 1
-#define CF_WRITE_PENDING 2
 #define CF_INIT_PENDING 4
 #define CF_IS_OTHERCON 5
 #define CF_CLOSE 6
@@ -426,8 +425,7 @@ static void lowcomms_write_space(struct sock *sk)
 		clear_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags);
 	}
 
-	if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
-		queue_work(send_workqueue, &con->swork);
+	queue_work(send_workqueue, &con->swork);
 }
 
 static inline void lowcomms_connect_sock(struct connection *con)
@@ -578,7 +576,6 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
 static void close_connection(struct connection *con, bool and_other,
 			     bool tx, bool rx)
 {
-	clear_bit(CF_WRITE_PENDING, &con->flags);
 	if (tx && cancel_work_sync(&con->swork))
 		log_print("canceled swork for node %d", con->nodeid);
 	if (rx && cancel_work_sync(&con->rwork))
@@ -1077,7 +1074,6 @@ static void sctp_connect_to_sock(struct connection *con)
 	if (result == 0)
 		goto out;
 
-
 bind_err:
 	con->sock = NULL;
 	sock_release(sock);
@@ -1102,7 +1098,6 @@ static void sctp_connect_to_sock(struct connection *con)
 
 out:
 	mutex_unlock(&con->sock_mutex);
-	set_bit(CF_WRITE_PENDING, &con->flags);
 }
 
 /* Connect a new socket to its peer */
@@ -1196,7 +1191,6 @@ static void tcp_connect_to_sock(struct connection *con)
 	}
 out:
 	mutex_unlock(&con->sock_mutex);
-	set_bit(CF_WRITE_PENDING, &con->flags);
 	return;
 }
 
@@ -1452,9 +1446,7 @@ void dlm_lowcomms_commit_buffer(void *mh)
 	e->len = e->end - e->offset;
 	spin_unlock(&con->writequeue_lock);
 
-	if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) {
-		queue_work(send_workqueue, &con->swork);
-	}
+	queue_work(send_workqueue, &con->swork);
 	return;
 
 out:
@@ -1524,12 +1516,15 @@ static void send_to_sock(struct connection *con)
 send_error:
 	mutex_unlock(&con->sock_mutex);
 	close_connection(con, false, false, true);
-	lowcomms_connect_sock(con);
+	/* Requeue the send work. When the work daemon runs again, it will try
+	   a new connection, then call this function again. */
+	queue_work(send_workqueue, &con->swork);
 	return;
 
 out_connect:
 	mutex_unlock(&con->sock_mutex);
-	lowcomms_connect_sock(con);
+	queue_work(send_workqueue, &con->swork);
+	cond_resched();
 }
 
 static void clean_one_writequeue(struct connection *con)
@@ -1591,7 +1586,7 @@ static void process_send_sockets(struct work_struct *work)
 
 	if (con->sock == NULL) /* not mutex protected so check it inside too */
 		con->connect_action(con);
-	if (test_and_clear_bit(CF_WRITE_PENDING, &con->flags))
+	if (!list_empty(&con->writequeue))
 		send_to_sock(con);
 }
 
-- 
2.7.4








More information about the Cluster-devel mailing list