Commit 569e0f358c0c37f6733702d4a5d2c412860f7169
1 parent
dde5740fdd
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
Btrfs: place ordered operations on a per transaction list
Miao made the ordered operations stuff run async, which introduced a deadlock where we could get somebody (sync) racing in and committing the transaction while a commit was already happening. The new committer would try and flush ordered operations which would hang waiting for the commit to finish because it is done asynchronously and no longer inherits the callers trans handle. To fix this we need to make the ordered operations list a per transaction list. We can get new inodes added to the ordered operation list by truncating them and then having another process writing to them, so this makes it so that anybody trying to add an ordered operation _must_ start a transaction in order to add itself to the list, which will keep new inodes from getting added to the ordered operations list after we start committing. This should fix the deadlock and also keeps us from doing a lot more work than we need to during commit. Thanks, Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Showing 7 changed files with 34 additions and 21 deletions Side-by-side Diff
fs/btrfs/ctree.h
... | ... | @@ -1408,13 +1408,6 @@ |
1408 | 1408 | struct list_head delalloc_inodes; |
1409 | 1409 | |
1410 | 1410 | /* |
1411 | - * special rename and truncate targets that must be on disk before | |
1412 | - * we're allowed to commit. This is basically the ext3 style | |
1413 | - * data=ordered list. | |
1414 | - */ | |
1415 | - struct list_head ordered_operations; | |
1416 | - | |
1417 | - /* | |
1418 | 1411 | * there is a pool of worker threads for checksumming during writes |
1419 | 1412 | * and a pool for checksumming after reads. This is because readers |
1420 | 1413 | * can run with FS locks held, and the writers may be waiting for |
fs/btrfs/disk-io.c
... | ... | @@ -56,7 +56,8 @@ |
56 | 56 | static void free_fs_root(struct btrfs_root *root); |
57 | 57 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, |
58 | 58 | int read_only); |
59 | -static void btrfs_destroy_ordered_operations(struct btrfs_root *root); | |
59 | +static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, | |
60 | + struct btrfs_root *root); | |
60 | 61 | static void btrfs_destroy_ordered_extents(struct btrfs_root *root); |
61 | 62 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, |
62 | 63 | struct btrfs_root *root); |
... | ... | @@ -2029,7 +2030,6 @@ |
2029 | 2030 | INIT_LIST_HEAD(&fs_info->dead_roots); |
2030 | 2031 | INIT_LIST_HEAD(&fs_info->delayed_iputs); |
2031 | 2032 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
2032 | - INIT_LIST_HEAD(&fs_info->ordered_operations); | |
2033 | 2033 | INIT_LIST_HEAD(&fs_info->caching_block_groups); |
2034 | 2034 | spin_lock_init(&fs_info->delalloc_lock); |
2035 | 2035 | spin_lock_init(&fs_info->trans_lock); |
... | ... | @@ -3538,7 +3538,8 @@ |
3538 | 3538 | btrfs_cleanup_transaction(root); |
3539 | 3539 | } |
3540 | 3540 | |
3541 | -static void btrfs_destroy_ordered_operations(struct btrfs_root *root) | |
3541 | +static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, | |
3542 | + struct btrfs_root *root) | |
3542 | 3543 | { |
3543 | 3544 | struct btrfs_inode *btrfs_inode; |
3544 | 3545 | struct list_head splice; |
... | ... | @@ -3548,7 +3549,7 @@ |
3548 | 3549 | mutex_lock(&root->fs_info->ordered_operations_mutex); |
3549 | 3550 | spin_lock(&root->fs_info->ordered_extent_lock); |
3550 | 3551 | |
3551 | - list_splice_init(&root->fs_info->ordered_operations, &splice); | |
3552 | + list_splice_init(&t->ordered_operations, &splice); | |
3552 | 3553 | while (!list_empty(&splice)) { |
3553 | 3554 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, |
3554 | 3555 | ordered_operations); |
... | ... | @@ -3829,7 +3830,7 @@ |
3829 | 3830 | while (!list_empty(&list)) { |
3830 | 3831 | t = list_entry(list.next, struct btrfs_transaction, list); |
3831 | 3832 | |
3832 | - btrfs_destroy_ordered_operations(root); | |
3833 | + btrfs_destroy_ordered_operations(t, root); | |
3833 | 3834 | |
3834 | 3835 | btrfs_destroy_ordered_extents(root); |
3835 | 3836 |
fs/btrfs/file.c
... | ... | @@ -1628,7 +1628,20 @@ |
1628 | 1628 | */ |
1629 | 1629 | if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
1630 | 1630 | &BTRFS_I(inode)->runtime_flags)) { |
1631 | - btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); | |
1631 | + struct btrfs_trans_handle *trans; | |
1632 | + struct btrfs_root *root = BTRFS_I(inode)->root; | |
1633 | + | |
1634 | + /* | |
1635 | + * We need to block on a committing transaction to keep us from | |
1636 | + * throwing a ordered operation on to the list and causing | |
1637 | + * something like sync to deadlock trying to flush out this | |
1638 | + * inode. | |
1639 | + */ | |
1640 | + trans = btrfs_start_transaction(root, 0); | |
1641 | + if (IS_ERR(trans)) | |
1642 | + return PTR_ERR(trans); | |
1643 | + btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode); | |
1644 | + btrfs_end_transaction(trans, root); | |
1632 | 1645 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) |
1633 | 1646 | filemap_flush(inode->i_mapping); |
1634 | 1647 | } |
fs/btrfs/ordered-data.c
... | ... | @@ -612,10 +612,12 @@ |
612 | 612 | * extra check to make sure the ordered operation list really is empty |
613 | 613 | * before we return |
614 | 614 | */ |
615 | -int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) | |
615 | +int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, | |
616 | + struct btrfs_root *root, int wait) | |
616 | 617 | { |
617 | 618 | struct btrfs_inode *btrfs_inode; |
618 | 619 | struct inode *inode; |
620 | + struct btrfs_transaction *cur_trans = trans->transaction; | |
619 | 621 | struct list_head splice; |
620 | 622 | struct list_head works; |
621 | 623 | struct btrfs_delalloc_work *work, *next; |
... | ... | @@ -626,7 +628,7 @@ |
626 | 628 | |
627 | 629 | mutex_lock(&root->fs_info->ordered_operations_mutex); |
628 | 630 | spin_lock(&root->fs_info->ordered_extent_lock); |
629 | - list_splice_init(&root->fs_info->ordered_operations, &splice); | |
631 | + list_splice_init(&cur_trans->ordered_operations, &splice); | |
630 | 632 | while (!list_empty(&splice)) { |
631 | 633 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, |
632 | 634 | ordered_operations); |
... | ... | @@ -643,7 +645,7 @@ |
643 | 645 | |
644 | 646 | if (!wait) |
645 | 647 | list_add_tail(&BTRFS_I(inode)->ordered_operations, |
646 | - &root->fs_info->ordered_operations); | |
648 | + &cur_trans->ordered_operations); | |
647 | 649 | spin_unlock(&root->fs_info->ordered_extent_lock); |
648 | 650 | |
649 | 651 | work = btrfs_alloc_delalloc_work(inode, wait, 1); |
... | ... | @@ -653,7 +655,7 @@ |
653 | 655 | list_add_tail(&btrfs_inode->ordered_operations, |
654 | 656 | &splice); |
655 | 657 | list_splice_tail(&splice, |
656 | - &root->fs_info->ordered_operations); | |
658 | + &cur_trans->ordered_operations); | |
657 | 659 | spin_unlock(&root->fs_info->ordered_extent_lock); |
658 | 660 | ret = -ENOMEM; |
659 | 661 | goto out; |
... | ... | @@ -1033,6 +1035,7 @@ |
1033 | 1035 | void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, |
1034 | 1036 | struct btrfs_root *root, struct inode *inode) |
1035 | 1037 | { |
1038 | + struct btrfs_transaction *cur_trans = trans->transaction; | |
1036 | 1039 | u64 last_mod; |
1037 | 1040 | |
1038 | 1041 | last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans); |
... | ... | @@ -1047,7 +1050,7 @@ |
1047 | 1050 | spin_lock(&root->fs_info->ordered_extent_lock); |
1048 | 1051 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) { |
1049 | 1052 | list_add_tail(&BTRFS_I(inode)->ordered_operations, |
1050 | - &root->fs_info->ordered_operations); | |
1053 | + &cur_trans->ordered_operations); | |
1051 | 1054 | } |
1052 | 1055 | spin_unlock(&root->fs_info->ordered_extent_lock); |
1053 | 1056 | } |
fs/btrfs/ordered-data.h
... | ... | @@ -197,7 +197,8 @@ |
197 | 197 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, |
198 | 198 | struct btrfs_ordered_extent *ordered); |
199 | 199 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |
200 | -int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); | |
200 | +int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, | |
201 | + struct btrfs_root *root, int wait); | |
201 | 202 | void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, |
202 | 203 | struct btrfs_root *root, |
203 | 204 | struct inode *inode); |
fs/btrfs/transaction.c
... | ... | @@ -157,6 +157,7 @@ |
157 | 157 | spin_lock_init(&cur_trans->delayed_refs.lock); |
158 | 158 | |
159 | 159 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
160 | + INIT_LIST_HEAD(&cur_trans->ordered_operations); | |
160 | 161 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
161 | 162 | extent_io_tree_init(&cur_trans->dirty_pages, |
162 | 163 | fs_info->btree_inode->i_mapping); |
... | ... | @@ -1456,7 +1457,7 @@ |
1456 | 1457 | * it here and no for sure that nothing new will be added |
1457 | 1458 | * to the list |
1458 | 1459 | */ |
1459 | - ret = btrfs_run_ordered_operations(root, 1); | |
1460 | + ret = btrfs_run_ordered_operations(trans, root, 1); | |
1460 | 1461 | |
1461 | 1462 | return ret; |
1462 | 1463 | } |
... | ... | @@ -1479,7 +1480,7 @@ |
1479 | 1480 | int should_grow = 0; |
1480 | 1481 | unsigned long now = get_seconds(); |
1481 | 1482 | |
1482 | - ret = btrfs_run_ordered_operations(root, 0); | |
1483 | + ret = btrfs_run_ordered_operations(trans, root, 0); | |
1483 | 1484 | if (ret) { |
1484 | 1485 | btrfs_abort_transaction(trans, root, ret); |
1485 | 1486 | btrfs_end_transaction(trans, root); |
fs/btrfs/transaction.h