Commit 53e872681fed6a43047e71bf927f77d06f467988
Committed by
Theodore Ts'o
1 parent
4520fb3c36
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
ext4: fix deadlock in journal_unmap_buffer()
We cannot wait for transaction commit in journal_unmap_buffer() because we hold page lock which ranks below transaction start. We solve the issue by bailing out of journal_unmap_buffer() and jbd2_journal_invalidatepage() with -EBUSY. Caller is then responsible for waiting for transaction commit to finish and try invalidation again. Since the issue can happen only for page stradding i_size, it is simple enough to manually call jbd2_journal_invalidatepage() for such page from ext4_setattr(), check the return value and wait if necessary. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Showing 3 changed files with 86 additions and 25 deletions Side-by-side Diff
fs/ext4/inode.c
... | ... | @@ -2894,8 +2894,8 @@ |
2894 | 2894 | block_invalidatepage(page, offset); |
2895 | 2895 | } |
2896 | 2896 | |
2897 | -static void ext4_journalled_invalidatepage(struct page *page, | |
2898 | - unsigned long offset) | |
2897 | +static int __ext4_journalled_invalidatepage(struct page *page, | |
2898 | + unsigned long offset) | |
2899 | 2899 | { |
2900 | 2900 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
2901 | 2901 | |
2902 | 2902 | |
... | ... | @@ -2907,9 +2907,16 @@ |
2907 | 2907 | if (offset == 0) |
2908 | 2908 | ClearPageChecked(page); |
2909 | 2909 | |
2910 | - jbd2_journal_invalidatepage(journal, page, offset); | |
2910 | + return jbd2_journal_invalidatepage(journal, page, offset); | |
2911 | 2911 | } |
2912 | 2912 | |
2913 | +/* Wrapper for aops... */ | |
2914 | +static void ext4_journalled_invalidatepage(struct page *page, | |
2915 | + unsigned long offset) | |
2916 | +{ | |
2917 | + WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0); | |
2918 | +} | |
2919 | + | |
2913 | 2920 | static int ext4_releasepage(struct page *page, gfp_t wait) |
2914 | 2921 | { |
2915 | 2922 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
... | ... | @@ -4314,6 +4321,47 @@ |
4314 | 4321 | } |
4315 | 4322 | |
4316 | 4323 | /* |
4324 | + * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate | |
4325 | + * buffers that are attached to a page stradding i_size and are undergoing | |
4326 | + * commit. In that case we have to wait for commit to finish and try again. | |
4327 | + */ | |
4328 | +static void ext4_wait_for_tail_page_commit(struct inode *inode) | |
4329 | +{ | |
4330 | + struct page *page; | |
4331 | + unsigned offset; | |
4332 | + journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | |
4333 | + tid_t commit_tid = 0; | |
4334 | + int ret; | |
4335 | + | |
4336 | + offset = inode->i_size & (PAGE_CACHE_SIZE - 1); | |
4337 | + /* | |
4338 | + * All buffers in the last page remain valid? Then there's nothing to | |
4339 | + * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE == | |
4340 | + * blocksize case | |
4341 | + */ | |
4342 | + if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits)) | |
4343 | + return; | |
4344 | + while (1) { | |
4345 | + page = find_lock_page(inode->i_mapping, | |
4346 | + inode->i_size >> PAGE_CACHE_SHIFT); | |
4347 | + if (!page) | |
4348 | + return; | |
4349 | + ret = __ext4_journalled_invalidatepage(page, offset); | |
4350 | + unlock_page(page); | |
4351 | + page_cache_release(page); | |
4352 | + if (ret != -EBUSY) | |
4353 | + return; | |
4354 | + commit_tid = 0; | |
4355 | + read_lock(&journal->j_state_lock); | |
4356 | + if (journal->j_committing_transaction) | |
4357 | + commit_tid = journal->j_committing_transaction->t_tid; | |
4358 | + read_unlock(&journal->j_state_lock); | |
4359 | + if (commit_tid) | |
4360 | + jbd2_log_wait_commit(journal, commit_tid); | |
4361 | + } | |
4362 | +} | |
4363 | + | |
4364 | +/* | |
4317 | 4365 | * ext4_setattr() |
4318 | 4366 | * |
4319 | 4367 | * Called from notify_change. |
4320 | 4368 | |
4321 | 4369 | |
... | ... | @@ -4426,16 +4474,28 @@ |
4426 | 4474 | } |
4427 | 4475 | |
4428 | 4476 | if (attr->ia_valid & ATTR_SIZE) { |
4429 | - if (attr->ia_size != i_size_read(inode)) { | |
4430 | - truncate_setsize(inode, attr->ia_size); | |
4431 | - /* Inode size will be reduced, wait for dio in flight. | |
4432 | - * Temporarily disable dioread_nolock to prevent | |
4433 | - * livelock. */ | |
4477 | + if (attr->ia_size != inode->i_size) { | |
4478 | + loff_t oldsize = inode->i_size; | |
4479 | + | |
4480 | + i_size_write(inode, attr->ia_size); | |
4481 | + /* | |
4482 | + * Blocks are going to be removed from the inode. Wait | |
4483 | + * for dio in flight. Temporarily disable | |
4484 | + * dioread_nolock to prevent livelock. | |
4485 | + */ | |
4434 | 4486 | if (orphan) { |
4435 | - ext4_inode_block_unlocked_dio(inode); | |
4436 | - inode_dio_wait(inode); | |
4437 | - ext4_inode_resume_unlocked_dio(inode); | |
4487 | + if (!ext4_should_journal_data(inode)) { | |
4488 | + ext4_inode_block_unlocked_dio(inode); | |
4489 | + inode_dio_wait(inode); | |
4490 | + ext4_inode_resume_unlocked_dio(inode); | |
4491 | + } else | |
4492 | + ext4_wait_for_tail_page_commit(inode); | |
4438 | 4493 | } |
4494 | + /* | |
4495 | + * Truncate pagecache after we've waited for commit | |
4496 | + * in data=journal mode to make pages freeable. | |
4497 | + */ | |
4498 | + truncate_pagecache(inode, oldsize, inode->i_size); | |
4439 | 4499 | } |
4440 | 4500 | ext4_truncate(inode); |
4441 | 4501 | } |
fs/jbd2/transaction.c
... | ... | @@ -1840,7 +1840,6 @@ |
1840 | 1840 | |
1841 | 1841 | BUFFER_TRACE(bh, "entry"); |
1842 | 1842 | |
1843 | -retry: | |
1844 | 1843 | /* |
1845 | 1844 | * It is safe to proceed here without the j_list_lock because the |
1846 | 1845 | * buffers cannot be stolen by try_to_free_buffers as long as we are |
1847 | 1846 | |
... | ... | @@ -1935,14 +1934,11 @@ |
1935 | 1934 | * for commit and try again. |
1936 | 1935 | */ |
1937 | 1936 | if (partial_page) { |
1938 | - tid_t tid = journal->j_committing_transaction->t_tid; | |
1939 | - | |
1940 | 1937 | jbd2_journal_put_journal_head(jh); |
1941 | 1938 | spin_unlock(&journal->j_list_lock); |
1942 | 1939 | jbd_unlock_bh_state(bh); |
1943 | 1940 | write_unlock(&journal->j_state_lock); |
1944 | - jbd2_log_wait_commit(journal, tid); | |
1945 | - goto retry; | |
1941 | + return -EBUSY; | |
1946 | 1942 | } |
1947 | 1943 | /* |
1948 | 1944 | * OK, buffer won't be reachable after truncate. We just set |
1949 | 1945 | |
1950 | 1946 | |
1951 | 1947 | |
... | ... | @@ -2003,21 +1999,23 @@ |
2003 | 1999 | * @page: page to flush |
2004 | 2000 | * @offset: length of page to invalidate. |
2005 | 2001 | * |
2006 | - * Reap page buffers containing data after offset in page. | |
2007 | - * | |
2002 | + * Reap page buffers containing data after offset in page. Can return -EBUSY | |
2003 | + * if buffers are part of the committing transaction and the page is straddling | |
2004 | + * i_size. Caller then has to wait for current commit and try again. | |
2008 | 2005 | */ |
2009 | -void jbd2_journal_invalidatepage(journal_t *journal, | |
2010 | - struct page *page, | |
2011 | - unsigned long offset) | |
2006 | +int jbd2_journal_invalidatepage(journal_t *journal, | |
2007 | + struct page *page, | |
2008 | + unsigned long offset) | |
2012 | 2009 | { |
2013 | 2010 | struct buffer_head *head, *bh, *next; |
2014 | 2011 | unsigned int curr_off = 0; |
2015 | 2012 | int may_free = 1; |
2013 | + int ret = 0; | |
2016 | 2014 | |
2017 | 2015 | if (!PageLocked(page)) |
2018 | 2016 | BUG(); |
2019 | 2017 | if (!page_has_buffers(page)) |
2020 | - return; | |
2018 | + return 0; | |
2021 | 2019 | |
2022 | 2020 | /* We will potentially be playing with lists other than just the |
2023 | 2021 | * data lists (especially for journaled data mode), so be |
2024 | 2022 | |
... | ... | @@ -2031,9 +2029,11 @@ |
2031 | 2029 | if (offset <= curr_off) { |
2032 | 2030 | /* This block is wholly outside the truncation point */ |
2033 | 2031 | lock_buffer(bh); |
2034 | - may_free &= journal_unmap_buffer(journal, bh, | |
2035 | - offset > 0); | |
2032 | + ret = journal_unmap_buffer(journal, bh, offset > 0); | |
2036 | 2033 | unlock_buffer(bh); |
2034 | + if (ret < 0) | |
2035 | + return ret; | |
2036 | + may_free &= ret; | |
2037 | 2037 | } |
2038 | 2038 | curr_off = next_off; |
2039 | 2039 | bh = next; |
... | ... | @@ -2044,6 +2044,7 @@ |
2044 | 2044 | if (may_free && try_to_free_buffers(page)) |
2045 | 2045 | J_ASSERT(!page_has_buffers(page)); |
2046 | 2046 | } |
2047 | + return 0; | |
2047 | 2048 | } |
2048 | 2049 | |
2049 | 2050 | /* |
include/linux/jbd2.h
... | ... | @@ -1098,7 +1098,7 @@ |
1098 | 1098 | extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); |
1099 | 1099 | extern int jbd2_journal_forget (handle_t *, struct buffer_head *); |
1100 | 1100 | extern void journal_sync_buffer (struct buffer_head *); |
1101 | -extern void jbd2_journal_invalidatepage(journal_t *, | |
1101 | +extern int jbd2_journal_invalidatepage(journal_t *, | |
1102 | 1102 | struct page *, unsigned long); |
1103 | 1103 | extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); |
1104 | 1104 | extern int jbd2_journal_stop(handle_t *); |