Commit 3738daa68a5121ad7dd0318bca931e2a6afb0e8c

Authored by Yan, Zheng
Committed by Ilya Dryomov
1 parent 01deead041

ceph: fetch inline data when getting Fcr cap refs

we can't use getattr to fetch inline data after getting Fcr caps,
because it can cause deadlock. The solution is try bringing inline
data to page cache when not holding any cap, and hope the inline
data page is still there after getting the Fcr caps. If the page
is still there, pin it in page cache for later IO.

Signed-off-by: Yan, Zheng <zyan@redhat.com>

Showing 3 changed files with 63 additions and 18 deletions Side-by-side Diff

... ... @@ -1207,6 +1207,7 @@
1207 1207 struct inode *inode = file_inode(vma->vm_file);
1208 1208 struct ceph_inode_info *ci = ceph_inode(inode);
1209 1209 struct ceph_file_info *fi = vma->vm_file->private_data;
  1210 + struct page *pinned_page = NULL;
1210 1211 loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
1211 1212 int want, got, ret;
1212 1213  
... ... @@ -1218,7 +1219,8 @@
1218 1219 want = CEPH_CAP_FILE_CACHE;
1219 1220 while (1) {
1220 1221 got = 0;
1221   - ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
  1222 + ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1,
  1223 + &got, &pinned_page);
1222 1224 if (ret == 0)
1223 1225 break;
1224 1226 if (ret != -ERESTARTSYS) {
... ... @@ -1233,6 +1235,8 @@
1233 1235  
1234 1236 dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
1235 1237 inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
  1238 + if (pinned_page)
  1239 + page_cache_release(pinned_page);
1236 1240 ceph_put_cap_refs(ci, got);
1237 1241  
1238 1242 return ret;
... ... @@ -1266,7 +1270,8 @@
1266 1270 want = CEPH_CAP_FILE_BUFFER;
1267 1271 while (1) {
1268 1272 got = 0;
1269   - ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len);
  1273 + ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
  1274 + &got, NULL);
1270 1275 if (ret == 0)
1271 1276 break;
1272 1277 if (ret != -ERESTARTSYS) {
... ... @@ -2057,15 +2057,17 @@
2057 2057 * requested from the MDS.
2058 2058 */
2059 2059 static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2060   - int *got, loff_t endoff, int *check_max, int *err)
  2060 + loff_t endoff, int *got, struct page **pinned_page,
  2061 + int *check_max, int *err)
2061 2062 {
2062 2063 struct inode *inode = &ci->vfs_inode;
2063 2064 int ret = 0;
2064   - int have, implemented;
  2065 + int have, implemented, _got = 0;
2065 2066 int file_wanted;
2066 2067  
2067 2068 dout("get_cap_refs %p need %s want %s\n", inode,
2068 2069 ceph_cap_string(need), ceph_cap_string(want));
  2070 +again:
2069 2071 spin_lock(&ci->i_ceph_lock);
2070 2072  
2071 2073 /* make sure file is actually open */
... ... @@ -2075,7 +2077,7 @@
2075 2077 ceph_cap_string(need), ceph_cap_string(file_wanted));
2076 2078 *err = -EBADF;
2077 2079 ret = 1;
2078   - goto out;
  2080 + goto out_unlock;
2079 2081 }
2080 2082  
2081 2083 /* finish pending truncate */
... ... @@ -2095,7 +2097,7 @@
2095 2097 *check_max = 1;
2096 2098 ret = 1;
2097 2099 }
2098   - goto out;
  2100 + goto out_unlock;
2099 2101 }
2100 2102 /*
2101 2103 * If a sync write is in progress, we must wait, so that we
... ... @@ -2103,7 +2105,7 @@
2103 2105 */
2104 2106 if (__ceph_have_pending_cap_snap(ci)) {
2105 2107 dout("get_cap_refs %p cap_snap_pending\n", inode);
2106   - goto out;
  2108 + goto out_unlock;
2107 2109 }
2108 2110 }
2109 2111  
2110 2112  
2111 2113  
2112 2114  
... ... @@ -2120,18 +2122,50 @@
2120 2122 inode, ceph_cap_string(have), ceph_cap_string(not),
2121 2123 ceph_cap_string(revoking));
2122 2124 if ((revoking & not) == 0) {
2123   - *got = need | (have & want);
2124   - __take_cap_refs(ci, *got);
  2125 + _got = need | (have & want);
  2126 + __take_cap_refs(ci, _got);
2125 2127 ret = 1;
2126 2128 }
2127 2129 } else {
2128 2130 dout("get_cap_refs %p have %s needed %s\n", inode,
2129 2131 ceph_cap_string(have), ceph_cap_string(need));
2130 2132 }
2131   -out:
  2133 +out_unlock:
2132 2134 spin_unlock(&ci->i_ceph_lock);
  2135 +
  2136 + if (ci->i_inline_version != CEPH_INLINE_NONE &&
  2137 + (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
  2138 + i_size_read(inode) > 0) {
  2139 + int ret1;
  2140 + struct page *page = find_get_page(inode->i_mapping, 0);
  2141 + if (page) {
  2142 + if (PageUptodate(page)) {
  2143 + *pinned_page = page;
  2144 + goto out;
  2145 + }
  2146 + page_cache_release(page);
  2147 + }
  2148 + /*
  2149 + * drop cap refs first because getattr while holding
  2150 + * caps refs can cause deadlock.
  2151 + */
  2152 + ceph_put_cap_refs(ci, _got);
  2153 + _got = 0;
  2154 +
  2155 + /* getattr request will bring inline data into page cache */
  2156 + ret1 = __ceph_do_getattr(inode, NULL,
  2157 + CEPH_STAT_CAP_INLINE_DATA, true);
  2158 + if (ret1 >= 0) {
  2159 + ret = 0;
  2160 + goto again;
  2161 + }
  2162 + *err = ret1;
  2163 + ret = 1;
  2164 + }
  2165 +out:
2133 2166 dout("get_cap_refs %p ret %d got %s\n", inode,
2134   - ret, ceph_cap_string(*got));
  2167 + ret, ceph_cap_string(_got));
  2168 + *got = _got;
2135 2169 return ret;
2136 2170 }
2137 2171  
... ... @@ -2168,8 +2202,8 @@
2168 2202 * due to a small max_size, make sure we check_max_size (and possibly
2169 2203 * ask the mds) so we don't get hung up indefinitely.
2170 2204 */
2171   -int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got,
2172   - loff_t endoff)
  2205 +int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
  2206 + loff_t endoff, int *got, struct page **pinned_page)
2173 2207 {
2174 2208 int check_max, ret, err;
2175 2209  
... ... @@ -2179,8 +2213,8 @@
2179 2213 check_max = 0;
2180 2214 err = 0;
2181 2215 ret = wait_event_interruptible(ci->i_cap_wq,
2182   - try_get_cap_refs(ci, need, want,
2183   - got, endoff,
  2216 + try_get_cap_refs(ci, need, want, endoff,
  2217 + got, pinned_page,
2184 2218 &check_max, &err));
2185 2219 if (err)
2186 2220 ret = err;
... ... @@ -805,6 +805,7 @@
805 805 size_t len = iocb->ki_nbytes;
806 806 struct inode *inode = file_inode(filp);
807 807 struct ceph_inode_info *ci = ceph_inode(inode);
  808 + struct page *pinned_page = NULL;
808 809 ssize_t ret;
809 810 int want, got = 0;
810 811 int checkeof = 0, read = 0;
... ... @@ -817,7 +818,7 @@
817 818 want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
818 819 else
819 820 want = CEPH_CAP_FILE_CACHE;
820   - ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
  821 + ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
821 822 if (ret < 0)
822 823 return ret;
823 824  
... ... @@ -840,6 +841,10 @@
840 841 }
841 842 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
842 843 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
  844 + if (pinned_page) {
  845 + page_cache_release(pinned_page);
  846 + pinned_page = NULL;
  847 + }
843 848 ceph_put_cap_refs(ci, got);
844 849  
845 850 if (checkeof && ret >= 0) {
... ... @@ -924,7 +929,8 @@
924 929 else
925 930 want = CEPH_CAP_FILE_BUFFER;
926 931 got = 0;
927   - err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos + count);
  932 + err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, pos + count,
  933 + &got, NULL);
928 934 if (err < 0)
929 935 goto out;
930 936  
... ... @@ -1225,7 +1231,7 @@
1225 1231 else
1226 1232 want = CEPH_CAP_FILE_BUFFER;
1227 1233  
1228   - ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
  1234 + ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
1229 1235 if (ret < 0)
1230 1236 goto unlock;
1231 1237