Commit cd956502590d6db78e621052146f370a72f36dd9

Authored by Ilya Dryomov
Committed by Greg Kroah-Hartman
1 parent b3b3972a8b

libceph: fix double __remove_osd() problem

commit 7eb71e0351fbb1b242ae70abb7bb17107fe2f792 upstream.

It turns out it's possible to get __remove_osd() called twice on the
same OSD.  That doesn't sit well with rb_erase() - depending on the
shape of the tree we can get a NULL dereference, a soft lockup or
a random crash at some point in the future as we end up touching freed
memory.  One scenario that I was able to reproduce is as follows:

            <osd3 is idle, on the osd lru list>
<con reset - osd3>
con_fault_finish()
  osd_reset()
                              <osdmap - osd3 down>
                              ceph_osdc_handle_map()
                                <takes map_sem>
                                kick_requests()
                                  <takes request_mutex>
                                  reset_changed_osds()
                                    __reset_osd()
                                      __remove_osd()
                                  <releases request_mutex>
                                <releases map_sem>
    <takes map_sem>
    <takes request_mutex>
    __kick_osd_requests()
      __reset_osd()
        __remove_osd() <-- !!!

A case can be made that osd refcounting is imperfect and reworking it
would be a proper resolution, but for now Sage and I decided to fix
this by adding a safe guard around __remove_osd().

Fixes: http://tracker.ceph.com/issues/8087

Cc: Sage Weil <sage@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
Reviewed-by: Alex Elder <elder@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Showing 1 changed file with 18 additions and 8 deletions Side-by-side Diff

net/ceph/osd_client.c
... ... @@ -1006,16 +1006,26 @@
1006 1006 */
1007 1007 static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
1008 1008 {
1009   - dout("__remove_osd %p\n", osd);
  1009 + dout("%s %p osd%d\n", __func__, osd, osd->o_osd);
1010 1010 WARN_ON(!list_empty(&osd->o_requests));
1011 1011 WARN_ON(!list_empty(&osd->o_linger_requests));
1012 1012  
1013   - rb_erase(&osd->o_node, &osdc->osds);
1014 1013 list_del_init(&osd->o_osd_lru);
1015   - ceph_con_close(&osd->o_con);
1016   - put_osd(osd);
  1014 + rb_erase(&osd->o_node, &osdc->osds);
  1015 + RB_CLEAR_NODE(&osd->o_node);
1017 1016 }
1018 1017  
  1018 +static void remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
  1019 +{
  1020 + dout("%s %p osd%d\n", __func__, osd, osd->o_osd);
  1021 +
  1022 + if (!RB_EMPTY_NODE(&osd->o_node)) {
  1023 + ceph_con_close(&osd->o_con);
  1024 + __remove_osd(osdc, osd);
  1025 + put_osd(osd);
  1026 + }
  1027 +}
  1028 +
1019 1029 static void remove_all_osds(struct ceph_osd_client *osdc)
1020 1030 {
1021 1031 dout("%s %p\n", __func__, osdc);
... ... @@ -1023,7 +1033,7 @@
1023 1033 while (!RB_EMPTY_ROOT(&osdc->osds)) {
1024 1034 struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds),
1025 1035 struct ceph_osd, o_node);
1026   - __remove_osd(osdc, osd);
  1036 + remove_osd(osdc, osd);
1027 1037 }
1028 1038 mutex_unlock(&osdc->request_mutex);
1029 1039 }
... ... @@ -1064,7 +1074,7 @@
1064 1074 list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) {
1065 1075 if (time_before(jiffies, osd->lru_ttl))
1066 1076 break;
1067   - __remove_osd(osdc, osd);
  1077 + remove_osd(osdc, osd);
1068 1078 }
1069 1079 mutex_unlock(&osdc->request_mutex);
1070 1080 }
... ... @@ -1079,8 +1089,7 @@
1079 1089 dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
1080 1090 if (list_empty(&osd->o_requests) &&
1081 1091 list_empty(&osd->o_linger_requests)) {
1082   - __remove_osd(osdc, osd);
1083   -
  1092 + remove_osd(osdc, osd);
1084 1093 return -ENODEV;
1085 1094 }
1086 1095  
... ... @@ -1884,6 +1893,7 @@
1884 1893 {
1885 1894 struct rb_node *p, *n;
1886 1895  
  1896 + dout("%s %p\n", __func__, osdc);
1887 1897 for (p = rb_first(&osdc->osds); p; p = n) {
1888 1898 struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node);
1889 1899