Commit 68ce9682a4bb95d6be5529cb57214bf2a1b7d20e

Authored by Stefan Behrens
Committed by Chris Mason
1 parent ae1e206b80

Btrfs: remove superblock writing after fatal error

With commit acce952b0, btrfs was changed to flag the filesystem with
BTRFS_SUPER_FLAG_ERROR and switch to read-only mode after a fatal
error happened like a write I/O errors of all mirrors.
In such situations, on unmount, the superblock is written in
btrfs_error_commit_super(). This is done with the intention to be able
to evaluate the error flag on the next mount. A warning is printed
in this case during the next mount and the log tree is ignored.

The issue is that it is possible that the superblock points to a root
that was not written (due to write I/O errors).
The result is that the filesystem cannot be mounted. btrfsck also does
not start and all the other btrfs-progs tools fail to start as well.
However, mount -o recovery is working well and does the right things
to recover the filesystem (i.e., don't use the log root, clear the
free space cache and use the next mountable root that is stored in the
root backup array).

This patch removes the writing of the superblock when
BTRFS_SUPER_FLAG_ERROR is set, and removes the handling of the error
flag in the mount function.

These lines can be used to reproduce the issue (using /dev/sdm):
SCRATCH_DEV=/dev/sdm
SCRATCH_MNT=/mnt
echo 0 25165824 linear $SCRATCH_DEV 0 | dmsetup create foo
ls -alLF /dev/mapper/foo
mkfs.btrfs /dev/mapper/foo
mount /dev/mapper/foo $SCRATCH_MNT
echo bar > $SCRATCH_MNT/foo
sync
echo 0 25165824 error | dmsetup reload foo
dmsetup resume foo
ls -alF $SCRATCH_MNT
touch $SCRATCH_MNT/1
ls -alF $SCRATCH_MNT
sleep 35
echo 0 25165824 linear $SCRATCH_DEV 0 | dmsetup reload foo
dmsetup resume foo
sleep 1
umount $SCRATCH_MNT
btrfsck /dev/mapper/foo
dmsetup remove foo

Signed-off-by: Stefan Behrens <sbehrens@giantdisaster.de>
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>

Showing 2 changed files with 5 additions and 33 deletions Side-by-side Diff

... ... @@ -2527,8 +2527,7 @@
2527 2527 goto fail_trans_kthread;
2528 2528  
2529 2529 /* do not make disk changes in broken FS */
2530   - if (btrfs_super_log_root(disk_super) != 0 &&
2531   - !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
  2530 + if (btrfs_super_log_root(disk_super) != 0) {
2532 2531 u64 bytenr = btrfs_super_log_root(disk_super);
2533 2532  
2534 2533 if (fs_devices->rw_devices == 0) {
2535 2534  
... ... @@ -3188,30 +3187,14 @@
3188 3187 /* clear out the rbtree of defraggable inodes */
3189 3188 btrfs_run_defrag_inodes(fs_info);
3190 3189  
3191   - /*
3192   - * Here come 2 situations when btrfs is broken to flip readonly:
3193   - *
3194   - * 1. when btrfs flips readonly somewhere else before
3195   - * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
3196   - * and btrfs will skip to write sb directly to keep
3197   - * ERROR state on disk.
3198   - *
3199   - * 2. when btrfs flips readonly just in btrfs_commit_super,
3200   - * and in such case, btrfs cannot write sb via btrfs_commit_super,
3201   - * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
3202   - * btrfs will cleanup all FS resources first and write sb then.
3203   - */
3204 3190 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
3205 3191 ret = btrfs_commit_super(root);
3206 3192 if (ret)
3207 3193 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
3208 3194 }
3209 3195  
3210   - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
3211   - ret = btrfs_error_commit_super(root);
3212   - if (ret)
3213   - printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
3214   - }
  3196 + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
  3197 + btrfs_error_commit_super(root);
3215 3198  
3216 3199 btrfs_put_block_group_cache(fs_info);
3217 3200  
3218 3201  
3219 3202  
... ... @@ -3433,18 +3416,11 @@
3433 3416 if (read_only)
3434 3417 return 0;
3435 3418  
3436   - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
3437   - printk(KERN_WARNING "warning: mount fs with errors, "
3438   - "running btrfsck is recommended\n");
3439   - }
3440   -
3441 3419 return 0;
3442 3420 }
3443 3421  
3444   -int btrfs_error_commit_super(struct btrfs_root *root)
  3422 +void btrfs_error_commit_super(struct btrfs_root *root)
3445 3423 {
3446   - int ret;
3447   -
3448 3424 mutex_lock(&root->fs_info->cleaner_mutex);
3449 3425 btrfs_run_delayed_iputs(root);
3450 3426 mutex_unlock(&root->fs_info->cleaner_mutex);
... ... @@ -3454,10 +3430,6 @@
3454 3430  
3455 3431 /* cleanup FS via transaction */
3456 3432 btrfs_cleanup_transaction(root);
3457   -
3458   - ret = write_ctree_super(NULL, root, 0);
3459   -
3460   - return ret;
3461 3433 }
3462 3434  
3463 3435 static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
... ... @@ -54,7 +54,7 @@
54 54 struct btrfs_root *root, int max_mirrors);
55 55 struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
56 56 int btrfs_commit_super(struct btrfs_root *root);
57   -int btrfs_error_commit_super(struct btrfs_root *root);
  57 +void btrfs_error_commit_super(struct btrfs_root *root);
58 58 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
59 59 u64 bytenr, u32 blocksize);
60 60 struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,