Commit 063c4c99630c0b06afad080d2a18bda64172c1a2
Committed by
David Teigland
1 parent
b5711b8e5a
Exists in
master
and in
7 other branches
dlm: fix connection close handling
Closing a connection to a node can create problems if there are outstanding messages for that node. The problems include dlm_send spinning attempting to reconnect, or BUG from tcp_connect_to_sock() attempting to use a partially closed connection. To cleanly close a connection, we now first attempt to send any pending messages, cancel any remaining workqueue work, and flag the connection as closed to avoid reconnect attempts. Signed-off-by: Lars Marowsky-Bree <lmb@suse.de> Signed-off-by: Christine Caulfield <ccaulfie@redhat.com> Signed-off-by: David Teigland <teigland@redhat.com>
Showing 1 changed file with 13 additions and 2 deletions Side-by-side Diff
fs/dlm/lowcomms.c
... | ... | @@ -106,6 +106,7 @@ |
106 | 106 | #define CF_CONNECT_PENDING 3 |
107 | 107 | #define CF_INIT_PENDING 4 |
108 | 108 | #define CF_IS_OTHERCON 5 |
109 | +#define CF_CLOSE 6 | |
109 | 110 | struct list_head writequeue; /* List of outgoing writequeue_entries */ |
110 | 111 | spinlock_t writequeue_lock; |
111 | 112 | int (*rx_action) (struct connection *); /* What to do when active */ |
... | ... | @@ -299,6 +300,8 @@ |
299 | 300 | |
300 | 301 | static inline void lowcomms_connect_sock(struct connection *con) |
301 | 302 | { |
303 | + if (test_bit(CF_CLOSE, &con->flags)) | |
304 | + return; | |
302 | 305 | if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) |
303 | 306 | queue_work(send_workqueue, &con->swork); |
304 | 307 | } |
... | ... | @@ -1368,6 +1371,13 @@ |
1368 | 1371 | log_print("closing connection to node %d", nodeid); |
1369 | 1372 | con = nodeid2con(nodeid, 0); |
1370 | 1373 | if (con) { |
1374 | + clear_bit(CF_CONNECT_PENDING, &con->flags); | |
1375 | + clear_bit(CF_WRITE_PENDING, &con->flags); | |
1376 | + set_bit(CF_CLOSE, &con->flags); | |
1377 | + if (cancel_work_sync(&con->swork)) | |
1378 | + log_print("canceled swork for node %d", nodeid); | |
1379 | + if (cancel_work_sync(&con->rwork)) | |
1380 | + log_print("canceled rwork for node %d", nodeid); | |
1371 | 1381 | clean_one_writequeue(con); |
1372 | 1382 | close_connection(con, true); |
1373 | 1383 | } |
1374 | 1384 | |
... | ... | @@ -1393,9 +1403,10 @@ |
1393 | 1403 | |
1394 | 1404 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { |
1395 | 1405 | con->connect_action(con); |
1406 | + set_bit(CF_WRITE_PENDING, &con->flags); | |
1396 | 1407 | } |
1397 | - clear_bit(CF_WRITE_PENDING, &con->flags); | |
1398 | - send_to_sock(con); | |
1408 | + if (test_and_clear_bit(CF_WRITE_PENDING, &con->flags)) | |
1409 | + send_to_sock(con); | |
1399 | 1410 | } |
1400 | 1411 | |
1401 | 1412 |