Commit 0da5d70369e87f80adf794080cfff1ca15a34198

Authored by Sage Weil
1 parent 3b66378034

libceph: handle connection reopen race with callbacks

If a connection is closed and/or reopened (ceph_con_close, ceph_con_open)
it can race with a callback.  con_work does various state checks for
closed or reopened sockets at the beginning, but drops con->mutex before
making callbacks.  We need to check for state bit changes after retaking
the lock to ensure we restart con_work and execute those CLOSED/OPENING
tests or else we may end up operating under stale assumptions.

In Jim's case, this was causing 'bad tag' errors.

There are four cases where we re-take the con->mutex inside con_work: catch
them all and return EAGAIN from try_{read,write} so that we can restart
con_work.

Reported-by: Jim Schutt <jaschut@sandia.gov>
Tested-by: Jim Schutt <jaschut@sandia.gov>
Signed-off-by: Sage Weil <sage@newdream.net>

Showing 1 changed file with 51 additions and 13 deletions Side-by-side Diff

net/ceph/messenger.c
... ... @@ -598,7 +598,7 @@
598 598 * Connection negotiation.
599 599 */
600 600  
601   -static void prepare_connect_authorizer(struct ceph_connection *con)
  601 +static int prepare_connect_authorizer(struct ceph_connection *con)
602 602 {
603 603 void *auth_buf;
604 604 int auth_len = 0;
... ... @@ -612,6 +612,10 @@
612 612 con->auth_retry);
613 613 mutex_lock(&con->mutex);
614 614  
  615 + if (test_bit(CLOSED, &con->state) ||
  616 + test_bit(OPENING, &con->state))
  617 + return -EAGAIN;
  618 +
615 619 con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol);
616 620 con->out_connect.authorizer_len = cpu_to_le32(auth_len);
617 621  
... ... @@ -619,6 +623,8 @@
619 623 con->out_kvec[con->out_kvec_left].iov_len = auth_len;
620 624 con->out_kvec_left++;
621 625 con->out_kvec_bytes += auth_len;
  626 +
  627 + return 0;
622 628 }
623 629  
624 630 /*
... ... @@ -640,9 +646,9 @@
640 646 set_bit(WRITE_PENDING, &con->state);
641 647 }
642 648  
643   -static void prepare_write_connect(struct ceph_messenger *msgr,
644   - struct ceph_connection *con,
645   - int after_banner)
  649 +static int prepare_write_connect(struct ceph_messenger *msgr,
  650 + struct ceph_connection *con,
  651 + int after_banner)
646 652 {
647 653 unsigned global_seq = get_global_seq(con->msgr, 0);
648 654 int proto;
... ... @@ -683,7 +689,7 @@
683 689 con->out_more = 0;
684 690 set_bit(WRITE_PENDING, &con->state);
685 691  
686   - prepare_connect_authorizer(con);
  692 + return prepare_connect_authorizer(con);
687 693 }
688 694  
689 695  
... ... @@ -1216,6 +1222,7 @@
1216 1222 u64 sup_feat = con->msgr->supported_features;
1217 1223 u64 req_feat = con->msgr->required_features;
1218 1224 u64 server_feat = le64_to_cpu(con->in_reply.features);
  1225 + int ret;
1219 1226  
1220 1227 dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
1221 1228  
... ... @@ -1250,7 +1257,9 @@
1250 1257 return -1;
1251 1258 }
1252 1259 con->auth_retry = 1;
1253   - prepare_write_connect(con->msgr, con, 0);
  1260 + ret = prepare_write_connect(con->msgr, con, 0);
  1261 + if (ret < 0)
  1262 + return ret;
1254 1263 prepare_read_connect(con);
1255 1264 break;
1256 1265  
... ... @@ -1277,6 +1286,9 @@
1277 1286 if (con->ops->peer_reset)
1278 1287 con->ops->peer_reset(con);
1279 1288 mutex_lock(&con->mutex);
  1289 + if (test_bit(CLOSED, &con->state) ||
  1290 + test_bit(OPENING, &con->state))
  1291 + return -EAGAIN;
1280 1292 break;
1281 1293  
1282 1294 case CEPH_MSGR_TAG_RETRY_SESSION:
... ... @@ -1810,6 +1822,17 @@
1810 1822 more:
1811 1823 dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
1812 1824 con->in_base_pos);
  1825 +
  1826 + /*
  1827 + * process_connect and process_message drop and re-take
  1828 + * con->mutex. make sure we handle a racing close or reopen.
  1829 + */
  1830 + if (test_bit(CLOSED, &con->state) ||
  1831 + test_bit(OPENING, &con->state)) {
  1832 + ret = -EAGAIN;
  1833 + goto out;
  1834 + }
  1835 +
1813 1836 if (test_bit(CONNECTING, &con->state)) {
1814 1837 if (!test_bit(NEGOTIATING, &con->state)) {
1815 1838 dout("try_read connecting\n");
1816 1839  
... ... @@ -1938,8 +1961,10 @@
1938 1961 {
1939 1962 struct ceph_connection *con = container_of(work, struct ceph_connection,
1940 1963 work.work);
  1964 + int ret;
1941 1965  
1942 1966 mutex_lock(&con->mutex);
  1967 +restart:
1943 1968 if (test_and_clear_bit(BACKOFF, &con->state)) {
1944 1969 dout("con_work %p backing off\n", con);
1945 1970 if (queue_delayed_work(ceph_msgr_wq, &con->work,
1946 1971  
1947 1972  
... ... @@ -1969,18 +1994,31 @@
1969 1994 con_close_socket(con);
1970 1995 }
1971 1996  
1972   - if (test_and_clear_bit(SOCK_CLOSED, &con->state) ||
1973   - try_read(con) < 0 ||
1974   - try_write(con) < 0) {
1975   - mutex_unlock(&con->mutex);
1976   - ceph_fault(con); /* error/fault path */
1977   - goto done_unlocked;
1978   - }
  1997 + if (test_and_clear_bit(SOCK_CLOSED, &con->state))
  1998 + goto fault;
1979 1999  
  2000 + ret = try_read(con);
  2001 + if (ret == -EAGAIN)
  2002 + goto restart;
  2003 + if (ret < 0)
  2004 + goto fault;
  2005 +
  2006 + ret = try_write(con);
  2007 + if (ret == -EAGAIN)
  2008 + goto restart;
  2009 + if (ret < 0)
  2010 + goto fault;
  2011 +
1980 2012 done:
1981 2013 mutex_unlock(&con->mutex);
1982 2014 done_unlocked:
1983 2015 con->ops->put(con);
  2016 + return;
  2017 +
  2018 +fault:
  2019 + mutex_unlock(&con->mutex);
  2020 + ceph_fault(con); /* error/fault path */
  2021 + goto done_unlocked;
1984 2022 }
1985 2023  
1986 2024