Commit a05b0855fd15504972dba2358e5faa172a1e50ba
Committed by
Linus Torvalds
1 parent
f5bf18fa22
Exists in
master
and in
20 other branches
hugetlbfs: avoid taking i_mutex from hugetlbfs_read()
Taking i_mutex in hugetlbfs_read() can result in deadlock with mmap as explained below Thread A: read() on hugetlbfs hugetlbfs_read() called i_mutex grabbed hugetlbfs_read_actor() called __copy_to_user() called page fault is triggered Thread B, sharing address space with A: mmap() the same file ->mmap_sem is grabbed on task_B->mm->mmap_sem hugetlbfs_file_mmap() is called attempt to grab ->i_mutex and block waiting for A to give it up Thread A: pagefault handled blocked on attempt to grab task_A->mm->mmap_sem, which happens to be the same thing as task_B->mm->mmap_sem. Block waiting for B to give it up. AFAIU the i_mutex locking was added to hugetlbfs_read() as per http://lkml.indiana.edu/hypermail/linux/kernel/0707.2/3066.html to take care of the race between truncate and read. This patch fixes this by looking at page->mapping under lock_page() (find_lock_page()) to ensure that the inode didn't get truncated in the range during a parallel read. Ideally we can extend the patch to make sure we don't increase i_size in mmap. But that will break userspace, because applications will now have to use truncate(2) to increase i_size in hugetlbfs. Based on the original patch from Hillf Danton. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: Hillf Danton <dhillf@gmail.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Hugh Dickins <hughd@google.com> Cc: <stable@kernel.org> [everything after 2007 :)] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 9 additions and 16 deletions Side-by-side Diff
fs/hugetlbfs/inode.c
... | ... | @@ -245,17 +245,10 @@ |
245 | 245 | loff_t isize; |
246 | 246 | ssize_t retval = 0; |
247 | 247 | |
248 | - mutex_lock(&inode->i_mutex); | |
249 | - | |
250 | 248 | /* validate length */ |
251 | 249 | if (len == 0) |
252 | 250 | goto out; |
253 | 251 | |
254 | - isize = i_size_read(inode); | |
255 | - if (!isize) | |
256 | - goto out; | |
257 | - | |
258 | - end_index = (isize - 1) >> huge_page_shift(h); | |
259 | 252 | for (;;) { |
260 | 253 | struct page *page; |
261 | 254 | unsigned long nr, ret; |
262 | 255 | |
263 | 256 | |
264 | 257 | |
... | ... | @@ -263,18 +256,21 @@ |
263 | 256 | |
264 | 257 | /* nr is the maximum number of bytes to copy from this page */ |
265 | 258 | nr = huge_page_size(h); |
259 | + isize = i_size_read(inode); | |
260 | + if (!isize) | |
261 | + goto out; | |
262 | + end_index = (isize - 1) >> huge_page_shift(h); | |
266 | 263 | if (index >= end_index) { |
267 | 264 | if (index > end_index) |
268 | 265 | goto out; |
269 | 266 | nr = ((isize - 1) & ~huge_page_mask(h)) + 1; |
270 | - if (nr <= offset) { | |
267 | + if (nr <= offset) | |
271 | 268 | goto out; |
272 | - } | |
273 | 269 | } |
274 | 270 | nr = nr - offset; |
275 | 271 | |
276 | 272 | /* Find the page */ |
277 | - page = find_get_page(mapping, index); | |
273 | + page = find_lock_page(mapping, index); | |
278 | 274 | if (unlikely(page == NULL)) { |
279 | 275 | /* |
280 | 276 | * We have a HOLE, zero out the user-buffer for the |
281 | 277 | |
282 | 278 | |
... | ... | @@ -286,17 +282,18 @@ |
286 | 282 | else |
287 | 283 | ra = 0; |
288 | 284 | } else { |
285 | + unlock_page(page); | |
286 | + | |
289 | 287 | /* |
290 | 288 | * We have the page, copy it to user space buffer. |
291 | 289 | */ |
292 | 290 | ra = hugetlbfs_read_actor(page, offset, buf, len, nr); |
293 | 291 | ret = ra; |
292 | + page_cache_release(page); | |
294 | 293 | } |
295 | 294 | if (ra < 0) { |
296 | 295 | if (retval == 0) |
297 | 296 | retval = ra; |
298 | - if (page) | |
299 | - page_cache_release(page); | |
300 | 297 | goto out; |
301 | 298 | } |
302 | 299 | |
303 | 300 | |
... | ... | @@ -306,16 +303,12 @@ |
306 | 303 | index += offset >> huge_page_shift(h); |
307 | 304 | offset &= ~huge_page_mask(h); |
308 | 305 | |
309 | - if (page) | |
310 | - page_cache_release(page); | |
311 | - | |
312 | 306 | /* short read or no more work */ |
313 | 307 | if ((ret != nr) || (len == 0)) |
314 | 308 | break; |
315 | 309 | } |
316 | 310 | out: |
317 | 311 | *ppos = ((loff_t)index << huge_page_shift(h)) + offset; |
318 | - mutex_unlock(&inode->i_mutex); | |
319 | 312 | return retval; |
320 | 313 | } |
321 | 314 |