Commit ff513ace9b772e75e337f8e058cc7f12816843fe

Authored by Ilya Dryomov
Committed by Sage Weil
1 parent 0bbfdfe8d2

libceph: take map_sem for read in handle_reply()

Handling redirect replies requires both map_sem and request_mutex.
Taking map_sem unconditionally near the top of handle_reply() avoids
possible race conditions that arise from releasing request_mutex to be
able to acquire map_sem in redirect reply case.  (Lock ordering is:
map_sem, request_mutex, crush_mutex.)

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>

Showing 1 changed file with 11 additions and 6 deletions Side-by-side Diff

net/ceph/osd_client.c
... ... @@ -1687,6 +1687,7 @@
1687 1687 osdmap_epoch = ceph_decode_32(&p);
1688 1688  
1689 1689 /* lookup */
  1690 + down_read(&osdc->map_sem);
1690 1691 mutex_lock(&osdc->request_mutex);
1691 1692 req = __lookup_request(osdc, tid);
1692 1693 if (req == NULL) {
... ... @@ -1743,7 +1744,6 @@
1743 1744 dout("redirect pool %lld\n", redir.oloc.pool);
1744 1745  
1745 1746 __unregister_request(osdc, req);
1746   - mutex_unlock(&osdc->request_mutex);
1747 1747  
1748 1748 req->r_target_oloc = redir.oloc; /* struct */
1749 1749  
1750 1750  
... ... @@ -1755,10 +1755,10 @@
1755 1755 * successfully. In the future we might want to follow
1756 1756 * original request's nofail setting here.
1757 1757 */
1758   - err = ceph_osdc_start_request(osdc, req, true);
  1758 + err = __ceph_osdc_start_request(osdc, req, true);
1759 1759 BUG_ON(err);
1760 1760  
1761   - goto done;
  1761 + goto out_unlock;
1762 1762 }
1763 1763  
1764 1764 already_completed = req->r_got_reply;
... ... @@ -1776,8 +1776,7 @@
1776 1776 req->r_got_reply = 1;
1777 1777 } else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) {
1778 1778 dout("handle_reply tid %llu dup ack\n", tid);
1779   - mutex_unlock(&osdc->request_mutex);
1780   - goto done;
  1779 + goto out_unlock;
1781 1780 }
1782 1781  
1783 1782 dout("handle_reply tid %llu flags %d\n", tid, flags);
... ... @@ -1792,6 +1791,7 @@
1792 1791 __unregister_request(osdc, req);
1793 1792  
1794 1793 mutex_unlock(&osdc->request_mutex);
  1794 + up_read(&osdc->map_sem);
1795 1795  
1796 1796 if (!already_completed) {
1797 1797 if (req->r_unsafe_callback &&
1798 1798  
... ... @@ -1809,10 +1809,14 @@
1809 1809 complete_request(req);
1810 1810 }
1811 1811  
1812   -done:
  1812 +out:
1813 1813 dout("req=%p req->r_linger=%d\n", req, req->r_linger);
1814 1814 ceph_osdc_put_request(req);
1815 1815 return;
  1816 +out_unlock:
  1817 + mutex_unlock(&osdc->request_mutex);
  1818 + up_read(&osdc->map_sem);
  1819 + goto out;
1816 1820  
1817 1821 bad_put:
1818 1822 req->r_result = -EIO;
... ... @@ -1825,6 +1829,7 @@
1825 1829 ceph_osdc_put_request(req);
1826 1830 bad_mutex:
1827 1831 mutex_unlock(&osdc->request_mutex);
  1832 + up_read(&osdc->map_sem);
1828 1833 bad:
1829 1834 pr_err("corrupt osd_op_reply got %d %d\n",
1830 1835 (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len));