Commit 7f5aa215088b817add9c71914b83650bdd49f8a9
Committed by
Theodore Tso
1 parent
9eddacf9e9
Exists in
master
and in
7 other branches
jbd2: Avoid possible NULL dereference in jbd2_journal_begin_ordered_truncate()
If we race with commit code setting i_transaction to NULL, we could possibly dereference it. Proper locking requires the journal pointer (to access journal->j_list_lock), which we don't have. So we have to change the prototype of the function so that filesystem passes us the journal pointer. Also add a more detailed comment about why the function jbd2_journal_begin_ordered_truncate() does what it does and how it should be used. Thanks to Dan Carpenter <error27@gmail.com> for pointing to the suspitious code. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Acked-by: Joel Becker <joel.becker@oracle.com> CC: linux-ext4@vger.kernel.org CC: ocfs2-devel@oss.oracle.com CC: mfasheh@suse.de CC: Dan Carpenter <error27@gmail.com>
Showing 4 changed files with 41 additions and 16 deletions Side-by-side Diff
fs/ext4/inode.c
... | ... | @@ -47,8 +47,10 @@ |
47 | 47 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
48 | 48 | loff_t new_size) |
49 | 49 | { |
50 | - return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, | |
51 | - new_size); | |
50 | + return jbd2_journal_begin_ordered_truncate( | |
51 | + EXT4_SB(inode->i_sb)->s_journal, | |
52 | + &EXT4_I(inode)->jinode, | |
53 | + new_size); | |
52 | 54 | } |
53 | 55 | |
54 | 56 | static void ext4_invalidatepage(struct page *page, unsigned long offset); |
fs/jbd2/transaction.c
... | ... | @@ -2129,26 +2129,46 @@ |
2129 | 2129 | } |
2130 | 2130 | |
2131 | 2131 | /* |
2132 | - * This function must be called when inode is journaled in ordered mode | |
2133 | - * before truncation happens. It starts writeout of truncated part in | |
2134 | - * case it is in the committing transaction so that we stand to ordered | |
2135 | - * mode consistency guarantees. | |
2132 | + * File truncate and transaction commit interact with each other in a | |
2133 | + * non-trivial way. If a transaction writing data block A is | |
2134 | + * committing, we cannot discard the data by truncate until we have | |
2135 | + * written them. Otherwise if we crashed after the transaction with | |
2136 | + * write has committed but before the transaction with truncate has | |
2137 | + * committed, we could see stale data in block A. This function is a | |
2138 | + * helper to solve this problem. It starts writeout of the truncated | |
2139 | + * part in case it is in the committing transaction. | |
2140 | + * | |
2141 | + * Filesystem code must call this function when inode is journaled in | |
2142 | + * ordered mode before truncation happens and after the inode has been | |
2143 | + * placed on orphan list with the new inode size. The second condition | |
2144 | + * avoids the race that someone writes new data and we start | |
2145 | + * committing the transaction after this function has been called but | |
2146 | + * before a transaction for truncate is started (and furthermore it | |
2147 | + * allows us to optimize the case where the addition to orphan list | |
2148 | + * happens in the same transaction as write --- we don't have to write | |
2149 | + * any data in such case). | |
2136 | 2150 | */ |
2137 | -int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, | |
2151 | +int jbd2_journal_begin_ordered_truncate(journal_t *journal, | |
2152 | + struct jbd2_inode *jinode, | |
2138 | 2153 | loff_t new_size) |
2139 | 2154 | { |
2140 | - journal_t *journal; | |
2141 | - transaction_t *commit_trans; | |
2155 | + transaction_t *inode_trans, *commit_trans; | |
2142 | 2156 | int ret = 0; |
2143 | 2157 | |
2144 | - if (!inode->i_transaction && !inode->i_next_transaction) | |
2158 | + /* This is a quick check to avoid locking if not necessary */ | |
2159 | + if (!jinode->i_transaction) | |
2145 | 2160 | goto out; |
2146 | - journal = inode->i_transaction->t_journal; | |
2161 | + /* Locks are here just to force reading of recent values, it is | |
2162 | + * enough that the transaction was not committing before we started | |
2163 | + * a transaction adding the inode to orphan list */ | |
2147 | 2164 | spin_lock(&journal->j_state_lock); |
2148 | 2165 | commit_trans = journal->j_committing_transaction; |
2149 | 2166 | spin_unlock(&journal->j_state_lock); |
2150 | - if (inode->i_transaction == commit_trans) { | |
2151 | - ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, | |
2167 | + spin_lock(&journal->j_list_lock); | |
2168 | + inode_trans = jinode->i_transaction; | |
2169 | + spin_unlock(&journal->j_list_lock); | |
2170 | + if (inode_trans == commit_trans) { | |
2171 | + ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping, | |
2152 | 2172 | new_size, LLONG_MAX); |
2153 | 2173 | if (ret) |
2154 | 2174 | jbd2_journal_abort(journal, ret); |
fs/ocfs2/journal.h
... | ... | @@ -513,8 +513,10 @@ |
513 | 513 | static inline int ocfs2_begin_ordered_truncate(struct inode *inode, |
514 | 514 | loff_t new_size) |
515 | 515 | { |
516 | - return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode, | |
517 | - new_size); | |
516 | + return jbd2_journal_begin_ordered_truncate( | |
517 | + OCFS2_SB(inode->i_sb)->journal->j_journal, | |
518 | + &OCFS2_I(inode)->ip_jinode, | |
519 | + new_size); | |
518 | 520 | } |
519 | 521 | |
520 | 522 | #endif /* OCFS2_JOURNAL_H */ |
include/linux/jbd2.h
... | ... | @@ -1150,7 +1150,8 @@ |
1150 | 1150 | extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); |
1151 | 1151 | extern int jbd2_journal_force_commit(journal_t *); |
1152 | 1152 | extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); |
1153 | -extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size); | |
1153 | +extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, | |
1154 | + struct jbd2_inode *inode, loff_t new_size); | |
1154 | 1155 | extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); |
1155 | 1156 | extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode); |
1156 | 1157 |