Commit 7f5aa215088b817add9c71914b83650bdd49f8a9

Authored by Jan Kara
Committed by Theodore Tso
1 parent 9eddacf9e9

jbd2: Avoid possible NULL dereference in jbd2_journal_begin_ordered_truncate()

If we race with commit code setting i_transaction to NULL, we could
possibly dereference it.  Proper locking requires the journal pointer
(to access journal->j_list_lock), which we don't have.  So we have to
change the prototype of the function so that filesystem passes us the
journal pointer.  Also add a more detailed comment about why the
function jbd2_journal_begin_ordered_truncate() does what it does and
how it should be used.

Thanks to Dan Carpenter <error27@gmail.com> for pointing to the
suspitious code.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Acked-by: Joel Becker <joel.becker@oracle.com>
CC: linux-ext4@vger.kernel.org
CC: ocfs2-devel@oss.oracle.com
CC: mfasheh@suse.de
CC: Dan Carpenter <error27@gmail.com>

Showing 4 changed files with 41 additions and 16 deletions Side-by-side Diff

... ... @@ -47,8 +47,10 @@
47 47 static inline int ext4_begin_ordered_truncate(struct inode *inode,
48 48 loff_t new_size)
49 49 {
50   - return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode,
51   - new_size);
  50 + return jbd2_journal_begin_ordered_truncate(
  51 + EXT4_SB(inode->i_sb)->s_journal,
  52 + &EXT4_I(inode)->jinode,
  53 + new_size);
52 54 }
53 55  
54 56 static void ext4_invalidatepage(struct page *page, unsigned long offset);
fs/jbd2/transaction.c
... ... @@ -2129,26 +2129,46 @@
2129 2129 }
2130 2130  
2131 2131 /*
2132   - * This function must be called when inode is journaled in ordered mode
2133   - * before truncation happens. It starts writeout of truncated part in
2134   - * case it is in the committing transaction so that we stand to ordered
2135   - * mode consistency guarantees.
  2132 + * File truncate and transaction commit interact with each other in a
  2133 + * non-trivial way. If a transaction writing data block A is
  2134 + * committing, we cannot discard the data by truncate until we have
  2135 + * written them. Otherwise if we crashed after the transaction with
  2136 + * write has committed but before the transaction with truncate has
  2137 + * committed, we could see stale data in block A. This function is a
  2138 + * helper to solve this problem. It starts writeout of the truncated
  2139 + * part in case it is in the committing transaction.
  2140 + *
  2141 + * Filesystem code must call this function when inode is journaled in
  2142 + * ordered mode before truncation happens and after the inode has been
  2143 + * placed on orphan list with the new inode size. The second condition
  2144 + * avoids the race that someone writes new data and we start
  2145 + * committing the transaction after this function has been called but
  2146 + * before a transaction for truncate is started (and furthermore it
  2147 + * allows us to optimize the case where the addition to orphan list
  2148 + * happens in the same transaction as write --- we don't have to write
  2149 + * any data in such case).
2136 2150 */
2137   -int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
  2151 +int jbd2_journal_begin_ordered_truncate(journal_t *journal,
  2152 + struct jbd2_inode *jinode,
2138 2153 loff_t new_size)
2139 2154 {
2140   - journal_t *journal;
2141   - transaction_t *commit_trans;
  2155 + transaction_t *inode_trans, *commit_trans;
2142 2156 int ret = 0;
2143 2157  
2144   - if (!inode->i_transaction && !inode->i_next_transaction)
  2158 + /* This is a quick check to avoid locking if not necessary */
  2159 + if (!jinode->i_transaction)
2145 2160 goto out;
2146   - journal = inode->i_transaction->t_journal;
  2161 + /* Locks are here just to force reading of recent values, it is
  2162 + * enough that the transaction was not committing before we started
  2163 + * a transaction adding the inode to orphan list */
2147 2164 spin_lock(&journal->j_state_lock);
2148 2165 commit_trans = journal->j_committing_transaction;
2149 2166 spin_unlock(&journal->j_state_lock);
2150   - if (inode->i_transaction == commit_trans) {
2151   - ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping,
  2167 + spin_lock(&journal->j_list_lock);
  2168 + inode_trans = jinode->i_transaction;
  2169 + spin_unlock(&journal->j_list_lock);
  2170 + if (inode_trans == commit_trans) {
  2171 + ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
2152 2172 new_size, LLONG_MAX);
2153 2173 if (ret)
2154 2174 jbd2_journal_abort(journal, ret);
... ... @@ -513,8 +513,10 @@
513 513 static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
514 514 loff_t new_size)
515 515 {
516   - return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode,
517   - new_size);
  516 + return jbd2_journal_begin_ordered_truncate(
  517 + OCFS2_SB(inode->i_sb)->journal->j_journal,
  518 + &OCFS2_I(inode)->ip_jinode,
  519 + new_size);
518 520 }
519 521  
520 522 #endif /* OCFS2_JOURNAL_H */
include/linux/jbd2.h
... ... @@ -1150,7 +1150,8 @@
1150 1150 extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
1151 1151 extern int jbd2_journal_force_commit(journal_t *);
1152 1152 extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
1153   -extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size);
  1153 +extern int jbd2_journal_begin_ordered_truncate(journal_t *journal,
  1154 + struct jbd2_inode *inode, loff_t new_size);
1154 1155 extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
1155 1156 extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
1156 1157