Commit 569e0f358c0c37f6733702d4a5d2c412860f7169

Authored by Josef Bacik
1 parent dde5740fdd

Btrfs: place ordered operations on a per transaction list

Miao made the ordered operations stuff run async, which introduced a
deadlock where we could get somebody (sync) racing in and committing the
transaction while a commit was already happening.  The new committer would
try and flush ordered operations which would hang waiting for the commit to
finish because it is done asynchronously and no longer inherits the callers
trans handle.  To fix this we need to make the ordered operations list a per
transaction list.  We can get new inodes added to the ordered operation list
by truncating them and then having another process writing to them, so this
makes it so that anybody trying to add an ordered operation _must_ start a
transaction in order to add itself to the list, which will keep new inodes
from getting added to the ordered operations list after we start committing.
This should fix the deadlock and also keeps us from doing a lot more work
than we need to during commit.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>

Showing 7 changed files with 34 additions and 21 deletions Side-by-side Diff

... ... @@ -1408,13 +1408,6 @@
1408 1408 struct list_head delalloc_inodes;
1409 1409  
1410 1410 /*
1411   - * special rename and truncate targets that must be on disk before
1412   - * we're allowed to commit. This is basically the ext3 style
1413   - * data=ordered list.
1414   - */
1415   - struct list_head ordered_operations;
1416   -
1417   - /*
1418 1411 * there is a pool of worker threads for checksumming during writes
1419 1412 * and a pool for checksumming after reads. This is because readers
1420 1413 * can run with FS locks held, and the writers may be waiting for
... ... @@ -56,7 +56,8 @@
56 56 static void free_fs_root(struct btrfs_root *root);
57 57 static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
58 58 int read_only);
59   -static void btrfs_destroy_ordered_operations(struct btrfs_root *root);
  59 +static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
  60 + struct btrfs_root *root);
60 61 static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
61 62 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
62 63 struct btrfs_root *root);
... ... @@ -2029,7 +2030,6 @@
2029 2030 INIT_LIST_HEAD(&fs_info->dead_roots);
2030 2031 INIT_LIST_HEAD(&fs_info->delayed_iputs);
2031 2032 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
2032   - INIT_LIST_HEAD(&fs_info->ordered_operations);
2033 2033 INIT_LIST_HEAD(&fs_info->caching_block_groups);
2034 2034 spin_lock_init(&fs_info->delalloc_lock);
2035 2035 spin_lock_init(&fs_info->trans_lock);
... ... @@ -3538,7 +3538,8 @@
3538 3538 btrfs_cleanup_transaction(root);
3539 3539 }
3540 3540  
3541   -static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
  3541 +static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
  3542 + struct btrfs_root *root)
3542 3543 {
3543 3544 struct btrfs_inode *btrfs_inode;
3544 3545 struct list_head splice;
... ... @@ -3548,7 +3549,7 @@
3548 3549 mutex_lock(&root->fs_info->ordered_operations_mutex);
3549 3550 spin_lock(&root->fs_info->ordered_extent_lock);
3550 3551  
3551   - list_splice_init(&root->fs_info->ordered_operations, &splice);
  3552 + list_splice_init(&t->ordered_operations, &splice);
3552 3553 while (!list_empty(&splice)) {
3553 3554 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
3554 3555 ordered_operations);
... ... @@ -3829,7 +3830,7 @@
3829 3830 while (!list_empty(&list)) {
3830 3831 t = list_entry(list.next, struct btrfs_transaction, list);
3831 3832  
3832   - btrfs_destroy_ordered_operations(root);
  3833 + btrfs_destroy_ordered_operations(t, root);
3833 3834  
3834 3835 btrfs_destroy_ordered_extents(root);
3835 3836  
... ... @@ -1628,7 +1628,20 @@
1628 1628 */
1629 1629 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
1630 1630 &BTRFS_I(inode)->runtime_flags)) {
1631   - btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
  1631 + struct btrfs_trans_handle *trans;
  1632 + struct btrfs_root *root = BTRFS_I(inode)->root;
  1633 +
  1634 + /*
  1635 + * We need to block on a committing transaction to keep us from
  1636 + * throwing a ordered operation on to the list and causing
  1637 + * something like sync to deadlock trying to flush out this
  1638 + * inode.
  1639 + */
  1640 + trans = btrfs_start_transaction(root, 0);
  1641 + if (IS_ERR(trans))
  1642 + return PTR_ERR(trans);
  1643 + btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode);
  1644 + btrfs_end_transaction(trans, root);
1632 1645 if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
1633 1646 filemap_flush(inode->i_mapping);
1634 1647 }
fs/btrfs/ordered-data.c
... ... @@ -612,10 +612,12 @@
612 612 * extra check to make sure the ordered operation list really is empty
613 613 * before we return
614 614 */
615   -int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
  615 +int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
  616 + struct btrfs_root *root, int wait)
616 617 {
617 618 struct btrfs_inode *btrfs_inode;
618 619 struct inode *inode;
  620 + struct btrfs_transaction *cur_trans = trans->transaction;
619 621 struct list_head splice;
620 622 struct list_head works;
621 623 struct btrfs_delalloc_work *work, *next;
... ... @@ -626,7 +628,7 @@
626 628  
627 629 mutex_lock(&root->fs_info->ordered_operations_mutex);
628 630 spin_lock(&root->fs_info->ordered_extent_lock);
629   - list_splice_init(&root->fs_info->ordered_operations, &splice);
  631 + list_splice_init(&cur_trans->ordered_operations, &splice);
630 632 while (!list_empty(&splice)) {
631 633 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
632 634 ordered_operations);
... ... @@ -643,7 +645,7 @@
643 645  
644 646 if (!wait)
645 647 list_add_tail(&BTRFS_I(inode)->ordered_operations,
646   - &root->fs_info->ordered_operations);
  648 + &cur_trans->ordered_operations);
647 649 spin_unlock(&root->fs_info->ordered_extent_lock);
648 650  
649 651 work = btrfs_alloc_delalloc_work(inode, wait, 1);
... ... @@ -653,7 +655,7 @@
653 655 list_add_tail(&btrfs_inode->ordered_operations,
654 656 &splice);
655 657 list_splice_tail(&splice,
656   - &root->fs_info->ordered_operations);
  658 + &cur_trans->ordered_operations);
657 659 spin_unlock(&root->fs_info->ordered_extent_lock);
658 660 ret = -ENOMEM;
659 661 goto out;
... ... @@ -1033,6 +1035,7 @@
1033 1035 void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
1034 1036 struct btrfs_root *root, struct inode *inode)
1035 1037 {
  1038 + struct btrfs_transaction *cur_trans = trans->transaction;
1036 1039 u64 last_mod;
1037 1040  
1038 1041 last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
... ... @@ -1047,7 +1050,7 @@
1047 1050 spin_lock(&root->fs_info->ordered_extent_lock);
1048 1051 if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
1049 1052 list_add_tail(&BTRFS_I(inode)->ordered_operations,
1050   - &root->fs_info->ordered_operations);
  1053 + &cur_trans->ordered_operations);
1051 1054 }
1052 1055 spin_unlock(&root->fs_info->ordered_extent_lock);
1053 1056 }
fs/btrfs/ordered-data.h
... ... @@ -197,7 +197,8 @@
197 197 int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
198 198 struct btrfs_ordered_extent *ordered);
199 199 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
200   -int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
  200 +int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
  201 + struct btrfs_root *root, int wait);
201 202 void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
202 203 struct btrfs_root *root,
203 204 struct inode *inode);
fs/btrfs/transaction.c
... ... @@ -157,6 +157,7 @@
157 157 spin_lock_init(&cur_trans->delayed_refs.lock);
158 158  
159 159 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
  160 + INIT_LIST_HEAD(&cur_trans->ordered_operations);
160 161 list_add_tail(&cur_trans->list, &fs_info->trans_list);
161 162 extent_io_tree_init(&cur_trans->dirty_pages,
162 163 fs_info->btree_inode->i_mapping);
... ... @@ -1456,7 +1457,7 @@
1456 1457 * it here and no for sure that nothing new will be added
1457 1458 * to the list
1458 1459 */
1459   - ret = btrfs_run_ordered_operations(root, 1);
  1460 + ret = btrfs_run_ordered_operations(trans, root, 1);
1460 1461  
1461 1462 return ret;
1462 1463 }
... ... @@ -1479,7 +1480,7 @@
1479 1480 int should_grow = 0;
1480 1481 unsigned long now = get_seconds();
1481 1482  
1482   - ret = btrfs_run_ordered_operations(root, 0);
  1483 + ret = btrfs_run_ordered_operations(trans, root, 0);
1483 1484 if (ret) {
1484 1485 btrfs_abort_transaction(trans, root, ret);
1485 1486 btrfs_end_transaction(trans, root);
fs/btrfs/transaction.h
... ... @@ -43,6 +43,7 @@
43 43 wait_queue_head_t writer_wait;
44 44 wait_queue_head_t commit_wait;
45 45 struct list_head pending_snapshots;
  46 + struct list_head ordered_operations;
46 47 struct btrfs_delayed_ref_root delayed_refs;
47 48 int aborted;
48 49 };