Commit 50d9aa99bd35c77200e0e3dd7a72274f8304701f

Authored by Josef Bacik
Committed by Chris Mason
1 parent 9dba8cf128

Btrfs: make sure logged extents complete in the current transaction V3

Liu Bo pointed out that my previous fix would lose the generation update in the
scenario I described.  It is actually much worse than that, we could lose the
entire extent if we lose power right after the transaction commits.  Consider
the following

write extent 0-4k
log extent in log tree
commit transaction
	< power fail happens here
ordered extent completes

We would lose the 0-4k extent because it hasn't updated the actual fs tree, and
the transaction commit will reset the log so it isn't replayed.  If we lose
power before the transaction commit we are save, otherwise we are not.

Fix this by keeping track of all extents we logged in this transaction.  Then
when we go to commit the transaction make sure we wait for all of those ordered
extents to complete before proceeding.  This will make sure that if we lose
power after the transaction commit we still have our data.  This also fixes the
problem of the improperly updated extent generation.  Thanks,

cc: stable@vger.kernel.org
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>

Showing 6 changed files with 72 additions and 6 deletions Side-by-side Diff

... ... @@ -4129,6 +4129,25 @@
4129 4129 return 0;
4130 4130 }
4131 4131  
  4132 +static void btrfs_free_pending_ordered(struct btrfs_transaction *cur_trans,
  4133 + struct btrfs_fs_info *fs_info)
  4134 +{
  4135 + struct btrfs_ordered_extent *ordered;
  4136 +
  4137 + spin_lock(&fs_info->trans_lock);
  4138 + while (!list_empty(&cur_trans->pending_ordered)) {
  4139 + ordered = list_first_entry(&cur_trans->pending_ordered,
  4140 + struct btrfs_ordered_extent,
  4141 + trans_list);
  4142 + list_del_init(&ordered->trans_list);
  4143 + spin_unlock(&fs_info->trans_lock);
  4144 +
  4145 + btrfs_put_ordered_extent(ordered);
  4146 + spin_lock(&fs_info->trans_lock);
  4147 + }
  4148 + spin_unlock(&fs_info->trans_lock);
  4149 +}
  4150 +
4132 4151 void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
4133 4152 struct btrfs_root *root)
4134 4153 {
... ... @@ -4140,6 +4159,7 @@
4140 4159 cur_trans->state = TRANS_STATE_UNBLOCKED;
4141 4160 wake_up(&root->fs_info->transaction_wait);
4142 4161  
  4162 + btrfs_free_pending_ordered(cur_trans, root->fs_info);
4143 4163 btrfs_destroy_delayed_inodes(root);
4144 4164 btrfs_assert_delayed_root_empty(root);
4145 4165  
fs/btrfs/ordered-data.c
... ... @@ -220,6 +220,7 @@
220 220 INIT_LIST_HEAD(&entry->work_list);
221 221 init_completion(&entry->completion);
222 222 INIT_LIST_HEAD(&entry->log_list);
  223 + INIT_LIST_HEAD(&entry->trans_list);
223 224  
224 225 trace_btrfs_ordered_extent_add(inode, entry);
225 226  
... ... @@ -443,6 +444,8 @@
443 444 ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
444 445 if (!list_empty(&ordered->log_list))
445 446 continue;
  447 + if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
  448 + continue;
446 449 list_add_tail(&ordered->log_list, logged_list);
447 450 atomic_inc(&ordered->refs);
448 451 }
... ... @@ -472,7 +475,8 @@
472 475 spin_unlock_irq(&log->log_extents_lock[index]);
473 476 }
474 477  
475   -void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
  478 +void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
  479 + struct btrfs_root *log, u64 transid)
476 480 {
477 481 struct btrfs_ordered_extent *ordered;
478 482 int index = transid % 2;
... ... @@ -497,7 +501,8 @@
497 501 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
498 502 &ordered->flags));
499 503  
500   - btrfs_put_ordered_extent(ordered);
  504 + if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
  505 + list_add_tail(&ordered->trans_list, &trans->ordered);
501 506 spin_lock_irq(&log->log_extents_lock[index]);
502 507 }
503 508 spin_unlock_irq(&log->log_extents_lock[index]);
fs/btrfs/ordered-data.h
... ... @@ -71,6 +71,8 @@
71 71 ordered extent */
72 72 #define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */
73 73  
  74 +#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent
  75 + * in the logging code. */
74 76 struct btrfs_ordered_extent {
75 77 /* logical offset in the file */
76 78 u64 file_offset;
... ... @@ -121,6 +123,9 @@
121 123 /* If we need to wait on this to be done */
122 124 struct list_head log_list;
123 125  
  126 + /* If the transaction needs to wait on this ordered extent */
  127 + struct list_head trans_list;
  128 +
124 129 /* used to wait for the BTRFS_ORDERED_COMPLETE bit */
125 130 wait_queue_head_t wait;
126 131  
... ... @@ -197,7 +202,8 @@
197 202 void btrfs_put_logged_extents(struct list_head *logged_list);
198 203 void btrfs_submit_logged_extents(struct list_head *logged_list,
199 204 struct btrfs_root *log);
200   -void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
  205 +void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
  206 + struct btrfs_root *log, u64 transid);
201 207 void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
202 208 int __init ordered_data_init(void);
203 209 void ordered_data_exit(void);
fs/btrfs/transaction.c
... ... @@ -247,6 +247,7 @@
247 247 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
248 248 INIT_LIST_HEAD(&cur_trans->pending_chunks);
249 249 INIT_LIST_HEAD(&cur_trans->switch_commits);
  250 + INIT_LIST_HEAD(&cur_trans->pending_ordered);
250 251 list_add_tail(&cur_trans->list, &fs_info->trans_list);
251 252 extent_io_tree_init(&cur_trans->dirty_pages,
252 253 fs_info->btree_inode->i_mapping);
... ... @@ -515,6 +516,7 @@
515 516 h->sync = false;
516 517 INIT_LIST_HEAD(&h->qgroup_ref_list);
517 518 INIT_LIST_HEAD(&h->new_bgs);
  519 + INIT_LIST_HEAD(&h->ordered);
518 520  
519 521 smp_mb();
520 522 if (cur_trans->state >= TRANS_STATE_BLOCKED &&
... ... @@ -746,6 +748,12 @@
746 748 if (!list_empty(&trans->new_bgs))
747 749 btrfs_create_pending_block_groups(trans, root);
748 750  
  751 + if (!list_empty(&trans->ordered)) {
  752 + spin_lock(&info->trans_lock);
  753 + list_splice(&trans->ordered, &cur_trans->pending_ordered);
  754 + spin_unlock(&info->trans_lock);
  755 + }
  756 +
749 757 trans->delayed_ref_updates = 0;
750 758 if (!trans->sync) {
751 759 must_run_delayed_refs =
... ... @@ -1715,6 +1723,28 @@
1715 1723 btrfs_wait_ordered_roots(fs_info, -1);
1716 1724 }
1717 1725  
  1726 +static inline void
  1727 +btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans,
  1728 + struct btrfs_fs_info *fs_info)
  1729 +{
  1730 + struct btrfs_ordered_extent *ordered;
  1731 +
  1732 + spin_lock(&fs_info->trans_lock);
  1733 + while (!list_empty(&cur_trans->pending_ordered)) {
  1734 + ordered = list_first_entry(&cur_trans->pending_ordered,
  1735 + struct btrfs_ordered_extent,
  1736 + trans_list);
  1737 + list_del_init(&ordered->trans_list);
  1738 + spin_unlock(&fs_info->trans_lock);
  1739 +
  1740 + wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE,
  1741 + &ordered->flags));
  1742 + btrfs_put_ordered_extent(ordered);
  1743 + spin_lock(&fs_info->trans_lock);
  1744 + }
  1745 + spin_unlock(&fs_info->trans_lock);
  1746 +}
  1747 +
1718 1748 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1719 1749 struct btrfs_root *root)
1720 1750 {
... ... @@ -1765,6 +1795,7 @@
1765 1795 }
1766 1796  
1767 1797 spin_lock(&root->fs_info->trans_lock);
  1798 + list_splice(&trans->ordered, &cur_trans->pending_ordered);
1768 1799 if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
1769 1800 spin_unlock(&root->fs_info->trans_lock);
1770 1801 atomic_inc(&cur_trans->use_count);
... ... @@ -1816,6 +1847,8 @@
1816 1847 goto cleanup_transaction;
1817 1848  
1818 1849 btrfs_wait_delalloc_flush(root->fs_info);
  1850 +
  1851 + btrfs_wait_pending_ordered(cur_trans, root->fs_info);
1819 1852  
1820 1853 btrfs_scrub_pause(root);
1821 1854 /*
fs/btrfs/transaction.h
... ... @@ -56,6 +56,7 @@
56 56 wait_queue_head_t commit_wait;
57 57 struct list_head pending_snapshots;
58 58 struct list_head pending_chunks;
  59 + struct list_head pending_ordered;
59 60 struct list_head switch_commits;
60 61 struct btrfs_delayed_ref_root delayed_refs;
61 62 int aborted;
... ... @@ -105,6 +106,7 @@
105 106 */
106 107 struct btrfs_root *root;
107 108 struct seq_list delayed_ref_elem;
  109 + struct list_head ordered;
108 110 struct list_head qgroup_ref_list;
109 111 struct list_head new_bgs;
110 112 };
... ... @@ -2600,7 +2600,7 @@
2600 2600 if (atomic_read(&log_root_tree->log_commit[index2])) {
2601 2601 blk_finish_plug(&plug);
2602 2602 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2603   - btrfs_wait_logged_extents(log, log_transid);
  2603 + btrfs_wait_logged_extents(trans, log, log_transid);
2604 2604 wait_log_commit(trans, log_root_tree,
2605 2605 root_log_ctx.log_transid);
2606 2606 mutex_unlock(&log_root_tree->log_mutex);
... ... @@ -2645,7 +2645,7 @@
2645 2645 btrfs_wait_marked_extents(log_root_tree,
2646 2646 &log_root_tree->dirty_log_pages,
2647 2647 EXTENT_NEW | EXTENT_DIRTY);
2648   - btrfs_wait_logged_extents(log, log_transid);
  2648 + btrfs_wait_logged_extents(trans, log, log_transid);
2649 2649  
2650 2650 btrfs_set_super_log_root(root->fs_info->super_for_commit,
2651 2651 log_root_tree->node->start);
... ... @@ -3766,7 +3766,7 @@
3766 3766 fi = btrfs_item_ptr(leaf, path->slots[0],
3767 3767 struct btrfs_file_extent_item);
3768 3768  
3769   - btrfs_set_token_file_extent_generation(leaf, fi, em->generation,
  3769 + btrfs_set_token_file_extent_generation(leaf, fi, trans->transid,
3770 3770 &token);
3771 3771 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
3772 3772 btrfs_set_token_file_extent_type(leaf, fi,