Commit ba1bf4818baf68d914ef9e3b06fbea6acb674fe4

Authored by Josef Bacik
Committed by Chris Mason
1 parent 1fb58a6051

Btrfs: make balance code choose more wisely when relocating

Currently, we can panic the box if the first block group we go to move is of a
type where there is no space left to move those extents.  For example, if we
fill the disk up with data, and then we try to balance and we have no room to
move the data nor room to allocate new chunks, we will panic.  Change this by
checking to see if we have room to move this chunk around, and if not, return
-ENOSPC and move on to the next chunk.  This will make sure we remove block
groups that are moveable, like if we have alot of empty metadata block groups,
and then that way we make room to be able to balance our data chunks as well.
Tested this with an fs that would panic on btrfs-vol -b normally, but no longer
panics with this patch.

V1->V2:
-actually search for a free extent on the device to make sure we can allocate a
chunk if need be.

-fix btrfs_shrink_device to make sure we actually try to relocate all the
chunks, and then if we can't return -ENOSPC so if we are doing a btrfs-vol -r
we don't remove the device with data still on it.

-check to make sure the block group we are going to relocate isn't the last one
in that particular space

-fix a bug in btrfs_shrink_device where we would change the device's size and
not fix it if we fail to do our relocate

Signed-off-by: Josef Bacik <jbacik@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>

Showing 4 changed files with 148 additions and 18 deletions Side-by-side Diff

... ... @@ -2006,6 +2006,7 @@
2006 2006 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
2007 2007 int btrfs_free_block_groups(struct btrfs_fs_info *info);
2008 2008 int btrfs_read_block_groups(struct btrfs_root *root);
  2009 +int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr);
2009 2010 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2010 2011 struct btrfs_root *root, u64 bytes_used,
2011 2012 u64 type, u64 chunk_objectid, u64 chunk_offset,
fs/btrfs/extent-tree.c
... ... @@ -7402,6 +7402,93 @@
7402 7402 }
7403 7403 #endif
7404 7404  
  7405 +/*
  7406 + * checks to see if its even possible to relocate this block group.
  7407 + *
  7408 + * @return - -1 if it's not a good idea to relocate this block group, 0 if its
  7409 + * ok to go ahead and try.
  7410 + */
  7411 +int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
  7412 +{
  7413 + struct btrfs_block_group_cache *block_group;
  7414 + struct btrfs_space_info *space_info;
  7415 + struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
  7416 + struct btrfs_device *device;
  7417 + int full = 0;
  7418 + int ret = 0;
  7419 +
  7420 + block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
  7421 +
  7422 + /* odd, couldn't find the block group, leave it alone */
  7423 + if (!block_group)
  7424 + return -1;
  7425 +
  7426 + /* no bytes used, we're good */
  7427 + if (!btrfs_block_group_used(&block_group->item))
  7428 + goto out;
  7429 +
  7430 + space_info = block_group->space_info;
  7431 + spin_lock(&space_info->lock);
  7432 +
  7433 + full = space_info->full;
  7434 +
  7435 + /*
  7436 + * if this is the last block group we have in this space, we can't
  7437 + * relocate it.
  7438 + */
  7439 + if (space_info->total_bytes == block_group->key.offset) {
  7440 + ret = -1;
  7441 + spin_unlock(&space_info->lock);
  7442 + goto out;
  7443 + }
  7444 +
  7445 + /*
  7446 + * need to make sure we have room in the space to handle all of the
  7447 + * extents from this block group. If we can, we're good
  7448 + */
  7449 + if (space_info->bytes_used + space_info->bytes_reserved +
  7450 + space_info->bytes_pinned + space_info->bytes_readonly +
  7451 + btrfs_block_group_used(&block_group->item) <
  7452 + space_info->total_bytes) {
  7453 + spin_unlock(&space_info->lock);
  7454 + goto out;
  7455 + }
  7456 + spin_unlock(&space_info->lock);
  7457 +
  7458 + /*
  7459 + * ok we don't have enough space, but maybe we have free space on our
  7460 + * devices to allocate new chunks for relocation, so loop through our
  7461 + * alloc devices and guess if we have enough space. However, if we
  7462 + * were marked as full, then we know there aren't enough chunks, and we
  7463 + * can just return.
  7464 + */
  7465 + ret = -1;
  7466 + if (full)
  7467 + goto out;
  7468 +
  7469 + mutex_lock(&root->fs_info->chunk_mutex);
  7470 + list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
  7471 + u64 min_free = btrfs_block_group_used(&block_group->item);
  7472 + u64 dev_offset, max_avail;
  7473 +
  7474 + /*
  7475 + * check to make sure we can actually find a chunk with enough
  7476 + * space to fit our block group in.
  7477 + */
  7478 + if (device->total_bytes > device->bytes_used + min_free) {
  7479 + ret = find_free_dev_extent(NULL, device, min_free,
  7480 + &dev_offset, &max_avail);
  7481 + if (!ret)
  7482 + break;
  7483 + ret = -1;
  7484 + }
  7485 + }
  7486 + mutex_unlock(&root->fs_info->chunk_mutex);
  7487 +out:
  7488 + btrfs_put_block_group(block_group);
  7489 + return ret;
  7490 +}
  7491 +
7405 7492 static int find_first_block_group(struct btrfs_root *root,
7406 7493 struct btrfs_path *path, struct btrfs_key *key)
7407 7494 {
... ... @@ -719,10 +719,9 @@
719 719 * called very infrequently and that a given device has a small number
720 720 * of extents
721 721 */
722   -static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
723   - struct btrfs_device *device,
724   - u64 num_bytes, u64 *start,
725   - u64 *max_avail)
  722 +int find_free_dev_extent(struct btrfs_trans_handle *trans,
  723 + struct btrfs_device *device, u64 num_bytes,
  724 + u64 *start, u64 *max_avail)
726 725 {
727 726 struct btrfs_key key;
728 727 struct btrfs_root *root = device->dev_root;
... ... @@ -1736,6 +1735,10 @@
1736 1735 extent_root = root->fs_info->extent_root;
1737 1736 em_tree = &root->fs_info->mapping_tree.map_tree;
1738 1737  
  1738 + ret = btrfs_can_relocate(extent_root, chunk_offset);
  1739 + if (ret)
  1740 + return -ENOSPC;
  1741 +
1739 1742 /* step one, relocate all the extents inside this chunk */
1740 1743 ret = btrfs_relocate_block_group(extent_root, chunk_offset);
1741 1744 BUG_ON(ret);
1742 1745  
... ... @@ -1807,12 +1810,15 @@
1807 1810 struct btrfs_key found_key;
1808 1811 u64 chunk_tree = chunk_root->root_key.objectid;
1809 1812 u64 chunk_type;
  1813 + bool retried = false;
  1814 + int failed = 0;
1810 1815 int ret;
1811 1816  
1812 1817 path = btrfs_alloc_path();
1813 1818 if (!path)
1814 1819 return -ENOMEM;
1815 1820  
  1821 +again:
1816 1822 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1817 1823 key.offset = (u64)-1;
1818 1824 key.type = BTRFS_CHUNK_ITEM_KEY;
... ... @@ -1842,7 +1848,10 @@
1842 1848 ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
1843 1849 found_key.objectid,
1844 1850 found_key.offset);
1845   - BUG_ON(ret);
  1851 + if (ret == -ENOSPC)
  1852 + failed++;
  1853 + else if (ret)
  1854 + BUG();
1846 1855 }
1847 1856  
1848 1857 if (found_key.offset == 0)
... ... @@ -1850,6 +1859,14 @@
1850 1859 key.offset = found_key.offset - 1;
1851 1860 }
1852 1861 ret = 0;
  1862 + if (failed && !retried) {
  1863 + failed = 0;
  1864 + retried = true;
  1865 + goto again;
  1866 + } else if (failed && retried) {
  1867 + WARN_ON(1);
  1868 + ret = -ENOSPC;
  1869 + }
1853 1870 error:
1854 1871 btrfs_free_path(path);
1855 1872 return ret;
... ... @@ -1894,6 +1911,8 @@
1894 1911 continue;
1895 1912  
1896 1913 ret = btrfs_shrink_device(device, old_size - size_to_free);
  1914 + if (ret == -ENOSPC)
  1915 + break;
1897 1916 BUG_ON(ret);
1898 1917  
1899 1918 trans = btrfs_start_transaction(dev_root, 1);
1900 1919  
... ... @@ -1938,9 +1957,8 @@
1938 1957 chunk = btrfs_item_ptr(path->nodes[0],
1939 1958 path->slots[0],
1940 1959 struct btrfs_chunk);
1941   - key.offset = found_key.offset;
1942 1960 /* chunk zero is special */
1943   - if (key.offset == 0)
  1961 + if (found_key.offset == 0)
1944 1962 break;
1945 1963  
1946 1964 btrfs_release_path(chunk_root, path);
... ... @@ -1948,7 +1966,8 @@
1948 1966 chunk_root->root_key.objectid,
1949 1967 found_key.objectid,
1950 1968 found_key.offset);
1951   - BUG_ON(ret);
  1969 + BUG_ON(ret && ret != -ENOSPC);
  1970 + key.offset = found_key.offset - 1;
1952 1971 }
1953 1972 ret = 0;
1954 1973 error:
1955 1974  
... ... @@ -1974,10 +1993,13 @@
1974 1993 u64 chunk_offset;
1975 1994 int ret;
1976 1995 int slot;
  1996 + int failed = 0;
  1997 + bool retried = false;
1977 1998 struct extent_buffer *l;
1978 1999 struct btrfs_key key;
1979 2000 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1980 2001 u64 old_total = btrfs_super_total_bytes(super_copy);
  2002 + u64 old_size = device->total_bytes;
1981 2003 u64 diff = device->total_bytes - new_size;
1982 2004  
1983 2005 if (new_size >= device->total_bytes)
... ... @@ -1987,12 +2009,6 @@
1987 2009 if (!path)
1988 2010 return -ENOMEM;
1989 2011  
1990   - trans = btrfs_start_transaction(root, 1);
1991   - if (!trans) {
1992   - ret = -ENOMEM;
1993   - goto done;
1994   - }
1995   -
1996 2012 path->reada = 2;
1997 2013  
1998 2014 lock_chunks(root);
1999 2015  
... ... @@ -2001,8 +2017,8 @@
2001 2017 if (device->writeable)
2002 2018 device->fs_devices->total_rw_bytes -= diff;
2003 2019 unlock_chunks(root);
2004   - btrfs_end_transaction(trans, root);
2005 2020  
  2021 +again:
2006 2022 key.objectid = device->devid;
2007 2023 key.offset = (u64)-1;
2008 2024 key.type = BTRFS_DEV_EXTENT_KEY;
... ... @@ -2017,6 +2033,7 @@
2017 2033 goto done;
2018 2034 if (ret) {
2019 2035 ret = 0;
  2036 + btrfs_release_path(root, path);
2020 2037 break;
2021 2038 }
2022 2039  
2023 2040  
2024 2041  
2025 2042  
... ... @@ -2024,14 +2041,18 @@
2024 2041 slot = path->slots[0];
2025 2042 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
2026 2043  
2027   - if (key.objectid != device->devid)
  2044 + if (key.objectid != device->devid) {
  2045 + btrfs_release_path(root, path);
2028 2046 break;
  2047 + }
2029 2048  
2030 2049 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
2031 2050 length = btrfs_dev_extent_length(l, dev_extent);
2032 2051  
2033   - if (key.offset + length <= new_size)
  2052 + if (key.offset + length <= new_size) {
  2053 + btrfs_release_path(root, path);
2034 2054 break;
  2055 + }
2035 2056  
2036 2057 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2037 2058 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
2038 2059  
... ... @@ -2040,8 +2061,26 @@
2040 2061  
2041 2062 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
2042 2063 chunk_offset);
2043   - if (ret)
  2064 + if (ret && ret != -ENOSPC)
2044 2065 goto done;
  2066 + if (ret == -ENOSPC)
  2067 + failed++;
  2068 + key.offset -= 1;
  2069 + }
  2070 +
  2071 + if (failed && !retried) {
  2072 + failed = 0;
  2073 + retried = true;
  2074 + goto again;
  2075 + } else if (failed && retried) {
  2076 + ret = -ENOSPC;
  2077 + lock_chunks(root);
  2078 +
  2079 + device->total_bytes = old_size;
  2080 + if (device->writeable)
  2081 + device->fs_devices->total_rw_bytes += diff;
  2082 + unlock_chunks(root);
  2083 + goto done;
2045 2084 }
2046 2085  
2047 2086 /* Shrinking succeeded, else we would be at "done". */
... ... @@ -181,5 +181,8 @@
181 181 void btrfs_unlock_volumes(void);
182 182 void btrfs_lock_volumes(void);
183 183 int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
  184 +int find_free_dev_extent(struct btrfs_trans_handle *trans,
  185 + struct btrfs_device *device, u64 num_bytes,
  186 + u64 *start, u64 *max_avail);
184 187 #endif