Commit 063c4c99630c0b06afad080d2a18bda64172c1a2

Authored by Lars Marowsky-Bree
Committed by David Teigland
1 parent b5711b8e5a

dlm: fix connection close handling

Closing a connection to a node can create problems if there are
outstanding messages for that node.  The problems include dlm_send
spinning attempting to reconnect, or BUG from tcp_connect_to_sock()
attempting to use a partially closed connection.

To cleanly close a connection, we now first attempt to send any pending
messages, cancel any remaining workqueue work, and flag the connection
as closed to avoid reconnect attempts.

Signed-off-by: Lars Marowsky-Bree <lmb@suse.de>
Signed-off-by: Christine Caulfield <ccaulfie@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>

Showing 1 changed file with 13 additions and 2 deletions Side-by-side Diff

... ... @@ -106,6 +106,7 @@
106 106 #define CF_CONNECT_PENDING 3
107 107 #define CF_INIT_PENDING 4
108 108 #define CF_IS_OTHERCON 5
  109 +#define CF_CLOSE 6
109 110 struct list_head writequeue; /* List of outgoing writequeue_entries */
110 111 spinlock_t writequeue_lock;
111 112 int (*rx_action) (struct connection *); /* What to do when active */
... ... @@ -299,6 +300,8 @@
299 300  
300 301 static inline void lowcomms_connect_sock(struct connection *con)
301 302 {
  303 + if (test_bit(CF_CLOSE, &con->flags))
  304 + return;
302 305 if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
303 306 queue_work(send_workqueue, &con->swork);
304 307 }
... ... @@ -1368,6 +1371,13 @@
1368 1371 log_print("closing connection to node %d", nodeid);
1369 1372 con = nodeid2con(nodeid, 0);
1370 1373 if (con) {
  1374 + clear_bit(CF_CONNECT_PENDING, &con->flags);
  1375 + clear_bit(CF_WRITE_PENDING, &con->flags);
  1376 + set_bit(CF_CLOSE, &con->flags);
  1377 + if (cancel_work_sync(&con->swork))
  1378 + log_print("canceled swork for node %d", nodeid);
  1379 + if (cancel_work_sync(&con->rwork))
  1380 + log_print("canceled rwork for node %d", nodeid);
1371 1381 clean_one_writequeue(con);
1372 1382 close_connection(con, true);
1373 1383 }
1374 1384  
... ... @@ -1393,9 +1403,10 @@
1393 1403  
1394 1404 if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
1395 1405 con->connect_action(con);
  1406 + set_bit(CF_WRITE_PENDING, &con->flags);
1396 1407 }
1397   - clear_bit(CF_WRITE_PENDING, &con->flags);
1398   - send_to_sock(con);
  1408 + if (test_and_clear_bit(CF_WRITE_PENDING, &con->flags))
  1409 + send_to_sock(con);
1399 1410 }
1400 1411  
1401 1412