Commit 50d9aa99bd35c77200e0e3dd7a72274f8304701f
Btrfs: make sure logged extents complete in the current transaction V3
Liu Bo pointed out that my previous fix would lose the generation update in the scenario I described. It is actually much worse than that, we could lose the entire extent if we lose power right after the transaction commits. Consider the following write extent 0-4k log extent in log tree commit transaction < power fail happens here ordered extent completes We would lose the 0-4k extent because it hasn't updated the actual fs tree, and the transaction commit will reset the log so it isn't replayed. If we lose power before the transaction commit we are save, otherwise we are not. Fix this by keeping track of all extents we logged in this transaction. Then when we go to commit the transaction make sure we wait for all of those ordered extents to complete before proceeding. This will make sure that if we lose power after the transaction commit we still have our data. This also fixes the problem of the improperly updated extent generation. Thanks, cc: stable@vger.kernel.org Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
Showing 6 changed files with 72 additions and 6 deletions Side-by-side Diff
... | ... | @@ -4129,6 +4129,25 @@ |
4129 | 4129 | return 0; |
4130 | 4130 | } |
4131 | 4131 | |
4132 | +static void btrfs_free_pending_ordered(struct btrfs_transaction *cur_trans, | |
4133 | + struct btrfs_fs_info *fs_info) | |
4134 | +{ | |
4135 | + struct btrfs_ordered_extent *ordered; | |
4136 | + | |
4137 | + spin_lock(&fs_info->trans_lock); | |
4138 | + while (!list_empty(&cur_trans->pending_ordered)) { | |
4139 | + ordered = list_first_entry(&cur_trans->pending_ordered, | |
4140 | + struct btrfs_ordered_extent, | |
4141 | + trans_list); | |
4142 | + list_del_init(&ordered->trans_list); | |
4143 | + spin_unlock(&fs_info->trans_lock); | |
4144 | + | |
4145 | + btrfs_put_ordered_extent(ordered); | |
4146 | + spin_lock(&fs_info->trans_lock); | |
4147 | + } | |
4148 | + spin_unlock(&fs_info->trans_lock); | |
4149 | +} | |
4150 | + | |
4132 | 4151 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, |
4133 | 4152 | struct btrfs_root *root) |
4134 | 4153 | { |
... | ... | @@ -4140,6 +4159,7 @@ |
4140 | 4159 | cur_trans->state = TRANS_STATE_UNBLOCKED; |
4141 | 4160 | wake_up(&root->fs_info->transaction_wait); |
4142 | 4161 | |
4162 | + btrfs_free_pending_ordered(cur_trans, root->fs_info); | |
4143 | 4163 | btrfs_destroy_delayed_inodes(root); |
4144 | 4164 | btrfs_assert_delayed_root_empty(root); |
4145 | 4165 |
... | ... | @@ -220,6 +220,7 @@ |
220 | 220 | INIT_LIST_HEAD(&entry->work_list); |
221 | 221 | init_completion(&entry->completion); |
222 | 222 | INIT_LIST_HEAD(&entry->log_list); |
223 | + INIT_LIST_HEAD(&entry->trans_list); | |
223 | 224 | |
224 | 225 | trace_btrfs_ordered_extent_add(inode, entry); |
225 | 226 | |
... | ... | @@ -443,6 +444,8 @@ |
443 | 444 | ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); |
444 | 445 | if (!list_empty(&ordered->log_list)) |
445 | 446 | continue; |
447 | + if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) | |
448 | + continue; | |
446 | 449 | list_add_tail(&ordered->log_list, logged_list); |
447 | 450 | atomic_inc(&ordered->refs); |
448 | 451 | } |
... | ... | @@ -472,7 +475,8 @@ |
472 | 475 | spin_unlock_irq(&log->log_extents_lock[index]); |
473 | 476 | } |
474 | 477 | |
475 | -void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) | |
478 | +void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, | |
479 | + struct btrfs_root *log, u64 transid) | |
476 | 480 | { |
477 | 481 | struct btrfs_ordered_extent *ordered; |
478 | 482 | int index = transid % 2; |
... | ... | @@ -497,7 +501,8 @@ |
497 | 501 | wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, |
498 | 502 | &ordered->flags)); |
499 | 503 | |
500 | - btrfs_put_ordered_extent(ordered); | |
504 | + if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) | |
505 | + list_add_tail(&ordered->trans_list, &trans->ordered); | |
501 | 506 | spin_lock_irq(&log->log_extents_lock[index]); |
502 | 507 | } |
503 | 508 | spin_unlock_irq(&log->log_extents_lock[index]); |
... | ... | @@ -71,6 +71,8 @@ |
71 | 71 | ordered extent */ |
72 | 72 | #define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */ |
73 | 73 | |
74 | +#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent | |
75 | + * in the logging code. */ | |
74 | 76 | struct btrfs_ordered_extent { |
75 | 77 | /* logical offset in the file */ |
76 | 78 | u64 file_offset; |
... | ... | @@ -121,6 +123,9 @@ |
121 | 123 | /* If we need to wait on this to be done */ |
122 | 124 | struct list_head log_list; |
123 | 125 | |
126 | + /* If the transaction needs to wait on this ordered extent */ | |
127 | + struct list_head trans_list; | |
128 | + | |
124 | 129 | /* used to wait for the BTRFS_ORDERED_COMPLETE bit */ |
125 | 130 | wait_queue_head_t wait; |
126 | 131 | |
... | ... | @@ -197,7 +202,8 @@ |
197 | 202 | void btrfs_put_logged_extents(struct list_head *logged_list); |
198 | 203 | void btrfs_submit_logged_extents(struct list_head *logged_list, |
199 | 204 | struct btrfs_root *log); |
200 | -void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); | |
205 | +void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, | |
206 | + struct btrfs_root *log, u64 transid); | |
201 | 207 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); |
202 | 208 | int __init ordered_data_init(void); |
203 | 209 | void ordered_data_exit(void); |
... | ... | @@ -247,6 +247,7 @@ |
247 | 247 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
248 | 248 | INIT_LIST_HEAD(&cur_trans->pending_chunks); |
249 | 249 | INIT_LIST_HEAD(&cur_trans->switch_commits); |
250 | + INIT_LIST_HEAD(&cur_trans->pending_ordered); | |
250 | 251 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
251 | 252 | extent_io_tree_init(&cur_trans->dirty_pages, |
252 | 253 | fs_info->btree_inode->i_mapping); |
... | ... | @@ -515,6 +516,7 @@ |
515 | 516 | h->sync = false; |
516 | 517 | INIT_LIST_HEAD(&h->qgroup_ref_list); |
517 | 518 | INIT_LIST_HEAD(&h->new_bgs); |
519 | + INIT_LIST_HEAD(&h->ordered); | |
518 | 520 | |
519 | 521 | smp_mb(); |
520 | 522 | if (cur_trans->state >= TRANS_STATE_BLOCKED && |
... | ... | @@ -746,6 +748,12 @@ |
746 | 748 | if (!list_empty(&trans->new_bgs)) |
747 | 749 | btrfs_create_pending_block_groups(trans, root); |
748 | 750 | |
751 | + if (!list_empty(&trans->ordered)) { | |
752 | + spin_lock(&info->trans_lock); | |
753 | + list_splice(&trans->ordered, &cur_trans->pending_ordered); | |
754 | + spin_unlock(&info->trans_lock); | |
755 | + } | |
756 | + | |
749 | 757 | trans->delayed_ref_updates = 0; |
750 | 758 | if (!trans->sync) { |
751 | 759 | must_run_delayed_refs = |
... | ... | @@ -1715,6 +1723,28 @@ |
1715 | 1723 | btrfs_wait_ordered_roots(fs_info, -1); |
1716 | 1724 | } |
1717 | 1725 | |
1726 | +static inline void | |
1727 | +btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans, | |
1728 | + struct btrfs_fs_info *fs_info) | |
1729 | +{ | |
1730 | + struct btrfs_ordered_extent *ordered; | |
1731 | + | |
1732 | + spin_lock(&fs_info->trans_lock); | |
1733 | + while (!list_empty(&cur_trans->pending_ordered)) { | |
1734 | + ordered = list_first_entry(&cur_trans->pending_ordered, | |
1735 | + struct btrfs_ordered_extent, | |
1736 | + trans_list); | |
1737 | + list_del_init(&ordered->trans_list); | |
1738 | + spin_unlock(&fs_info->trans_lock); | |
1739 | + | |
1740 | + wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE, | |
1741 | + &ordered->flags)); | |
1742 | + btrfs_put_ordered_extent(ordered); | |
1743 | + spin_lock(&fs_info->trans_lock); | |
1744 | + } | |
1745 | + spin_unlock(&fs_info->trans_lock); | |
1746 | +} | |
1747 | + | |
1718 | 1748 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
1719 | 1749 | struct btrfs_root *root) |
1720 | 1750 | { |
... | ... | @@ -1765,6 +1795,7 @@ |
1765 | 1795 | } |
1766 | 1796 | |
1767 | 1797 | spin_lock(&root->fs_info->trans_lock); |
1798 | + list_splice(&trans->ordered, &cur_trans->pending_ordered); | |
1768 | 1799 | if (cur_trans->state >= TRANS_STATE_COMMIT_START) { |
1769 | 1800 | spin_unlock(&root->fs_info->trans_lock); |
1770 | 1801 | atomic_inc(&cur_trans->use_count); |
... | ... | @@ -1816,6 +1847,8 @@ |
1816 | 1847 | goto cleanup_transaction; |
1817 | 1848 | |
1818 | 1849 | btrfs_wait_delalloc_flush(root->fs_info); |
1850 | + | |
1851 | + btrfs_wait_pending_ordered(cur_trans, root->fs_info); | |
1819 | 1852 | |
1820 | 1853 | btrfs_scrub_pause(root); |
1821 | 1854 | /* |
... | ... | @@ -56,6 +56,7 @@ |
56 | 56 | wait_queue_head_t commit_wait; |
57 | 57 | struct list_head pending_snapshots; |
58 | 58 | struct list_head pending_chunks; |
59 | + struct list_head pending_ordered; | |
59 | 60 | struct list_head switch_commits; |
60 | 61 | struct btrfs_delayed_ref_root delayed_refs; |
61 | 62 | int aborted; |
... | ... | @@ -105,6 +106,7 @@ |
105 | 106 | */ |
106 | 107 | struct btrfs_root *root; |
107 | 108 | struct seq_list delayed_ref_elem; |
109 | + struct list_head ordered; | |
108 | 110 | struct list_head qgroup_ref_list; |
109 | 111 | struct list_head new_bgs; |
110 | 112 | }; |
... | ... | @@ -2600,7 +2600,7 @@ |
2600 | 2600 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
2601 | 2601 | blk_finish_plug(&plug); |
2602 | 2602 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2603 | - btrfs_wait_logged_extents(log, log_transid); | |
2603 | + btrfs_wait_logged_extents(trans, log, log_transid); | |
2604 | 2604 | wait_log_commit(trans, log_root_tree, |
2605 | 2605 | root_log_ctx.log_transid); |
2606 | 2606 | mutex_unlock(&log_root_tree->log_mutex); |
... | ... | @@ -2645,7 +2645,7 @@ |
2645 | 2645 | btrfs_wait_marked_extents(log_root_tree, |
2646 | 2646 | &log_root_tree->dirty_log_pages, |
2647 | 2647 | EXTENT_NEW | EXTENT_DIRTY); |
2648 | - btrfs_wait_logged_extents(log, log_transid); | |
2648 | + btrfs_wait_logged_extents(trans, log, log_transid); | |
2649 | 2649 | |
2650 | 2650 | btrfs_set_super_log_root(root->fs_info->super_for_commit, |
2651 | 2651 | log_root_tree->node->start); |
... | ... | @@ -3766,7 +3766,7 @@ |
3766 | 3766 | fi = btrfs_item_ptr(leaf, path->slots[0], |
3767 | 3767 | struct btrfs_file_extent_item); |
3768 | 3768 | |
3769 | - btrfs_set_token_file_extent_generation(leaf, fi, em->generation, | |
3769 | + btrfs_set_token_file_extent_generation(leaf, fi, trans->transid, | |
3770 | 3770 | &token); |
3771 | 3771 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) |
3772 | 3772 | btrfs_set_token_file_extent_type(leaf, fi, |
-
mentioned in commit 4d884f
-
mentioned in commit 4d884f
-
mentioned in commit 98f7bf
-
mentioned in commit 98f7bf
-
mentioned in commit 4d884f
-
mentioned in commit 98f7bf
-
mentioned in commit 4d884f
-
mentioned in commit 98f7bf
-
mentioned in commit 4d884f
-
mentioned in commit 98f7bf
-
mentioned in commit 4d884f
-
mentioned in commit 98f7bf
-
mentioned in commit 4d884f
-
mentioned in commit 98f7bf
-
mentioned in commit 4d884f
-
mentioned in commit 98f7bf
-
mentioned in commit 4d884f
-
mentioned in commit d3efe0
-
mentioned in commit 4d884f
-
mentioned in commit d3efe0
-
mentioned in commit 4d884f
-
mentioned in commit d3efe0
-
mentioned in commit 4d884f
-
mentioned in commit 4d884f
-
mentioned in commit d3efe0
-
mentioned in commit d3efe0