Commit 28623c2f5b0dca3c3ea34fd6108940661352e276

Authored by Theodore Ts'o
1 parent 117fff10d7

ext4: grow the s_group_info array as needed

Previously we allocated the s_group_info array with enough space for
any future possible growth of the file system via online resize.  This
is unfortunate because it wastes memory, and it doesn't work for the
meta_bg scheme, since there is no limit based on the number of
reserved gdt blocks.  So add the code to grow the s_group_info array
as needed.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

Showing 3 changed files with 50 additions and 40 deletions Side-by-side Diff

... ... @@ -1233,6 +1233,7 @@
1233 1233 spinlock_t s_md_lock;
1234 1234 unsigned short *s_mb_offsets;
1235 1235 unsigned int *s_mb_maxs;
  1236 + unsigned int s_group_info_size;
1236 1237  
1237 1238 /* tunables */
1238 1239 unsigned long s_stripe;
... ... @@ -1971,6 +1972,8 @@
1971 1972 extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1972 1973 struct buffer_head *bh, ext4_fsblk_t block,
1973 1974 unsigned long count, int flags);
  1975 +extern int ext4_mb_alloc_groupinfo(struct super_block *sb,
  1976 + ext4_group_t ngroups);
1974 1977 extern int ext4_mb_add_groupinfo(struct super_block *sb,
1975 1978 ext4_group_t i, struct ext4_group_desc *desc);
1976 1979 extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
... ... @@ -24,6 +24,7 @@
24 24 #include "ext4_jbd2.h"
25 25 #include "mballoc.h"
26 26 #include <linux/debugfs.h>
  27 +#include <linux/log2.h>
27 28 #include <linux/slab.h>
28 29 #include <trace/events/ext4.h>
29 30  
... ... @@ -2163,6 +2164,39 @@
2163 2164 return cachep;
2164 2165 }
2165 2166  
  2167 +/*
  2168 + * Allocate the top-level s_group_info array for the specified number
  2169 + * of groups
  2170 + */
  2171 +int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
  2172 +{
  2173 + struct ext4_sb_info *sbi = EXT4_SB(sb);
  2174 + unsigned size;
  2175 + struct ext4_group_info ***new_groupinfo;
  2176 +
  2177 + size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
  2178 + EXT4_DESC_PER_BLOCK_BITS(sb);
  2179 + if (size <= sbi->s_group_info_size)
  2180 + return 0;
  2181 +
  2182 + size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
  2183 + new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL);
  2184 + if (!new_groupinfo) {
  2185 + ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
  2186 + return -ENOMEM;
  2187 + }
  2188 + if (sbi->s_group_info) {
  2189 + memcpy(new_groupinfo, sbi->s_group_info,
  2190 + sbi->s_group_info_size * sizeof(*sbi->s_group_info));
  2191 + ext4_kvfree(sbi->s_group_info);
  2192 + }
  2193 + sbi->s_group_info = new_groupinfo;
  2194 + sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
  2195 + ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
  2196 + sbi->s_group_info_size);
  2197 + return 0;
  2198 +}
  2199 +
2166 2200 /* Create and initialize ext4_group_info data for the given group. */
2167 2201 int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2168 2202 struct ext4_group_desc *desc)
2169 2203  
2170 2204  
... ... @@ -2252,49 +2286,14 @@
2252 2286 ext4_group_t ngroups = ext4_get_groups_count(sb);
2253 2287 ext4_group_t i;
2254 2288 struct ext4_sb_info *sbi = EXT4_SB(sb);
2255   - struct ext4_super_block *es = sbi->s_es;
2256   - int num_meta_group_infos;
2257   - int num_meta_group_infos_max;
2258   - int array_size;
  2289 + int err;
2259 2290 struct ext4_group_desc *desc;
2260 2291 struct kmem_cache *cachep;
2261 2292  
2262   - /* This is the number of blocks used by GDT */
2263   - num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
2264   - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
  2293 + err = ext4_mb_alloc_groupinfo(sb, ngroups);
  2294 + if (err)
  2295 + return err;
2265 2296  
2266   - /*
2267   - * This is the total number of blocks used by GDT including
2268   - * the number of reserved blocks for GDT.
2269   - * The s_group_info array is allocated with this value
2270   - * to allow a clean online resize without a complex
2271   - * manipulation of pointer.
2272   - * The drawback is the unused memory when no resize
2273   - * occurs but it's very low in terms of pages
2274   - * (see comments below)
2275   - * Need to handle this properly when META_BG resizing is allowed
2276   - */
2277   - num_meta_group_infos_max = num_meta_group_infos +
2278   - le16_to_cpu(es->s_reserved_gdt_blocks);
2279   -
2280   - /*
2281   - * array_size is the size of s_group_info array. We round it
2282   - * to the next power of two because this approximation is done
2283   - * internally by kmalloc so we can have some more memory
2284   - * for free here (e.g. may be used for META_BG resize).
2285   - */
2286   - array_size = 1;
2287   - while (array_size < sizeof(*sbi->s_group_info) *
2288   - num_meta_group_infos_max)
2289   - array_size = array_size << 1;
2290   - /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
2291   - * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
2292   - * So a two level scheme suffices for now. */
2293   - sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
2294   - if (sbi->s_group_info == NULL) {
2295   - ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2296   - return -ENOMEM;
2297   - }
2298 2297 sbi->s_buddy_cache = new_inode(sb);
2299 2298 if (sbi->s_buddy_cache == NULL) {
2300 2299 ext4_msg(sb, KERN_ERR, "can't get new inode");
... ... @@ -2322,7 +2321,7 @@
2322 2321 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2323 2322 while (i-- > 0)
2324 2323 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2325   - i = num_meta_group_infos;
  2324 + i = sbi->s_group_info_size;
2326 2325 while (i-- > 0)
2327 2326 kfree(sbi->s_group_info[i]);
2328 2327 iput(sbi->s_buddy_cache);
... ... @@ -1507,6 +1507,10 @@
1507 1507 if (err)
1508 1508 return err;
1509 1509  
  1510 + err = ext4_mb_alloc_groupinfo(sb, input->group + 1);
  1511 + if (err)
  1512 + goto out;
  1513 +
1510 1514 flex_gd.count = 1;
1511 1515 flex_gd.groups = input;
1512 1516 flex_gd.bg_flags = &bg_flags;
... ... @@ -1731,6 +1735,10 @@
1731 1735 err = ext4_alloc_flex_bg_array(sb, n_group + 1);
1732 1736 if (err)
1733 1737 return err;
  1738 +
  1739 + err = ext4_mb_alloc_groupinfo(sb, n_group + 1);
  1740 + if (err)
  1741 + goto out;
1734 1742  
1735 1743 flex_gd = alloc_flex_gd(flexbg_size);
1736 1744 if (flex_gd == NULL) {