Commit e0e851cf30f1a9bd2e2a7624e9810378d6a2b072
Committed by
Linus Torvalds
1 parent
fc5cd582e9
[PATCH] reiserfs: reiserfs hang and performance fix for data=journal mode
In data=journal mode, reiserfs writepage needs to make sure not to trigger transactions while being run under PF_MEMALLOC. This patch makes sure to redirty the page instead of forcing a transaction start in this case. Also, calling filemap_fdata* in order to trigger io on the block device can cause lock inversions on the page lock. Instead, do simple batching from flush_commit_list. Signed-off-by: Chris Mason <mason@suse.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 2 changed files with 21 additions and 5 deletions Side-by-side Diff
fs/reiserfs/inode.c
... | ... | @@ -2363,6 +2363,13 @@ |
2363 | 2363 | int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; |
2364 | 2364 | th.t_trans_id = 0; |
2365 | 2365 | |
2366 | + /* no logging allowed when nonblocking or from PF_MEMALLOC */ | |
2367 | + if (checked && (current->flags & PF_MEMALLOC)) { | |
2368 | + redirty_page_for_writepage(wbc, page); | |
2369 | + unlock_page(page); | |
2370 | + return 0; | |
2371 | + } | |
2372 | + | |
2366 | 2373 | /* The page dirty bit is cleared before writepage is called, which |
2367 | 2374 | * means we have to tell create_empty_buffers to make dirty buffers |
2368 | 2375 | * The page really should be up to date at this point, so tossing |
fs/reiserfs/journal.c
... | ... | @@ -988,6 +988,7 @@ |
988 | 988 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
989 | 989 | int barrier = 0; |
990 | 990 | int retval = 0; |
991 | + int write_len; | |
991 | 992 | |
992 | 993 | reiserfs_check_lock_depth(s, "flush_commit_list"); |
993 | 994 | |
994 | 995 | |
995 | 996 | |
... | ... | @@ -1037,16 +1038,24 @@ |
1037 | 1038 | BUG_ON(!list_empty(&jl->j_bh_list)); |
1038 | 1039 | /* |
1039 | 1040 | * for the description block and all the log blocks, submit any buffers |
1040 | - * that haven't already reached the disk | |
1041 | + * that haven't already reached the disk. Try to write at least 256 | |
1042 | + * log blocks. later on, we will only wait on blocks that correspond | |
1043 | + * to this transaction, but while we're unplugging we might as well | |
1044 | + * get a chunk of data on there. | |
1041 | 1045 | */ |
1042 | 1046 | atomic_inc(&journal->j_async_throttle); |
1043 | - for (i = 0; i < (jl->j_len + 1); i++) { | |
1047 | + write_len = jl->j_len + 1; | |
1048 | + if (write_len < 256) | |
1049 | + write_len = 256; | |
1050 | + for (i = 0 ; i < write_len ; i++) { | |
1044 | 1051 | bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) % |
1045 | 1052 | SB_ONDISK_JOURNAL_SIZE(s); |
1046 | 1053 | tbh = journal_find_get_block(s, bn); |
1047 | - if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */ | |
1048 | - ll_rw_block(SWRITE, 1, &tbh); | |
1049 | - put_bh(tbh); | |
1054 | + if (tbh) { | |
1055 | + if (buffer_dirty(tbh)) | |
1056 | + ll_rw_block(WRITE, 1, &tbh) ; | |
1057 | + put_bh(tbh) ; | |
1058 | + } | |
1050 | 1059 | } |
1051 | 1060 | atomic_dec(&journal->j_async_throttle); |
1052 | 1061 |