Commit d76a3a77113db020d9bb1e894822869410450bd9
1 parent
b10a44c369
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
ext4/jbd2: don't wait (forever) for stale tid caused by wraparound
In the case where an inode has a very stale transaction id (tid) in i_datasync_tid or i_sync_tid, it's possible that after a very large (2**31) number of transactions, that the tid number space might wrap, causing tid_geq()'s calculations to fail. Commit deeeaf13 "jbd2: fix fsync() tid wraparound bug", later modified by commit e7b04ac0 "jbd2: don't wake kjournald unnecessarily", attempted to fix this problem, but it only avoided kjournald spinning forever by fixing the logic in jbd2_log_start_commit(). Unfortunately, in the codepaths in fs/ext4/fsync.c and fs/ext4/inode.c that might call jbd2_log_start_commit() with a stale tid, those functions will subsequently call jbd2_log_wait_commit() with the same stale tid, and then wait for a very long time. To fix this, we replace the calls to jbd2_log_start_commit() and jbd2_log_wait_commit() with a call to a new function, jbd2_complete_transaction(), which will correctly handle stale tid's. As a bonus, jbd2_complete_transaction() will avoid locking j_state_lock for writing unless a commit needs to be started. This should have a small (but probably not measurable) improvement for ext4's scalability. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Reported-by: Ben Hutchings <ben@decadent.org.uk> Reported-by: George Barnett <gbarnett@atlassian.com> Cc: stable@vger.kernel.org
Showing 4 changed files with 34 additions and 4 deletions Side-by-side Diff
fs/ext4/fsync.c
... | ... | @@ -166,8 +166,7 @@ |
166 | 166 | if (journal->j_flags & JBD2_BARRIER && |
167 | 167 | !jbd2_trans_will_send_data_barrier(journal, commit_tid)) |
168 | 168 | needs_barrier = true; |
169 | - jbd2_log_start_commit(journal, commit_tid); | |
170 | - ret = jbd2_log_wait_commit(journal, commit_tid); | |
169 | + ret = jbd2_complete_transaction(journal, commit_tid); | |
171 | 170 | if (needs_barrier) { |
172 | 171 | err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
173 | 172 | if (!ret) |
fs/ext4/inode.c
... | ... | @@ -210,8 +210,7 @@ |
210 | 210 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
211 | 211 | tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; |
212 | 212 | |
213 | - jbd2_log_start_commit(journal, commit_tid); | |
214 | - jbd2_log_wait_commit(journal, commit_tid); | |
213 | + jbd2_complete_transaction(journal, commit_tid); | |
215 | 214 | filemap_write_and_wait(&inode->i_data); |
216 | 215 | } |
217 | 216 | truncate_inode_pages(&inode->i_data, 0); |
fs/jbd2/journal.c
... | ... | @@ -710,6 +710,37 @@ |
710 | 710 | } |
711 | 711 | |
712 | 712 | /* |
713 | + * When this function returns the transaction corresponding to tid | |
714 | + * will be completed. If the transaction has currently running, start | |
715 | + * committing that transaction before waiting for it to complete. If | |
716 | + * the transaction id is stale, it is by definition already completed, | |
717 | + * so just return SUCCESS. | |
718 | + */ | |
719 | +int jbd2_complete_transaction(journal_t *journal, tid_t tid) | |
720 | +{ | |
721 | + int need_to_wait = 1; | |
722 | + | |
723 | + read_lock(&journal->j_state_lock); | |
724 | + if (journal->j_running_transaction && | |
725 | + journal->j_running_transaction->t_tid == tid) { | |
726 | + if (journal->j_commit_request != tid) { | |
727 | + /* transaction not yet started, so request it */ | |
728 | + read_unlock(&journal->j_state_lock); | |
729 | + jbd2_log_start_commit(journal, tid); | |
730 | + goto wait_commit; | |
731 | + } | |
732 | + } else if (!(journal->j_committing_transaction && | |
733 | + journal->j_committing_transaction->t_tid == tid)) | |
734 | + need_to_wait = 0; | |
735 | + read_unlock(&journal->j_state_lock); | |
736 | + if (!need_to_wait) | |
737 | + return 0; | |
738 | +wait_commit: | |
739 | + return jbd2_log_wait_commit(journal, tid); | |
740 | +} | |
741 | +EXPORT_SYMBOL(jbd2_complete_transaction); | |
742 | + | |
743 | +/* | |
713 | 744 | * Log buffer allocation routines: |
714 | 745 | */ |
715 | 746 |
include/linux/jbd2.h
... | ... | @@ -1200,6 +1200,7 @@ |
1200 | 1200 | int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); |
1201 | 1201 | int jbd2_journal_force_commit_nested(journal_t *journal); |
1202 | 1202 | int jbd2_log_wait_commit(journal_t *journal, tid_t tid); |
1203 | +int jbd2_complete_transaction(journal_t *journal, tid_t tid); | |
1203 | 1204 | int jbd2_log_do_checkpoint(journal_t *journal); |
1204 | 1205 | int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid); |
1205 | 1206 |