Commit a02908f19c819aeec5e3dcf238adaa6deddd70b0

Authored by Mingming Cao
Committed by Theodore Ts'o
1 parent c001077f40

ext4: journal credits calulation cleanup and fix for non-extent writepage

When considering how many journal credits are needed for modifying a
chunk of data, we need to account for the super block, inode block,
quota blocks and xattr block, indirect/index blocks, also, group bitmap
and group descriptor blocks for new allocation (including data and
indirect/index blocks). There are many places in ext4 do the calculation
on their own and often missed one or two meta blocks, and often they
assume single block allocation, and did not considering the multile
chunk of allocation case.

This patch is trying to cleanup current journal credit code, provides
some common helper funtion to calculate the journal credits, to be used
for writepage, writepages, DIO, fallocate, migration, defrag, and for
both nonextent and extent files.

This patch modified the writepage/write_begin credit caculation for
nonextent files, to use the new helper function. It also fixed the
problem that writepage on nonextent files did not consider the case
blocksize <pagesize, thus could possibelly need multiple block
allocation in a single transaction.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

Showing 3 changed files with 108 additions and 34 deletions Side-by-side Diff

... ... @@ -1072,6 +1072,7 @@
1072 1072 extern void ext4_get_inode_flags(struct ext4_inode_info *);
1073 1073 extern void ext4_set_aops(struct inode *inode);
1074 1074 extern int ext4_writepage_trans_blocks(struct inode *);
  1075 +extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
1075 1076 extern int ext4_block_truncate_page(handle_t *handle,
1076 1077 struct address_space *mapping, loff_t from);
1077 1078 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
... ... @@ -1227,6 +1228,8 @@
1227 1228 /* extents.c */
1228 1229 extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
1229 1230 extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
  1231 +extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
  1232 + int chunk);
1230 1233 extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
1231 1234 ext4_lblk_t iblock,
1232 1235 unsigned long max_blocks, struct buffer_head *bh_result,
... ... @@ -51,6 +51,14 @@
51 51 EXT4_XATTR_TRANS_BLOCKS - 2 + \
52 52 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
53 53  
  54 +/*
  55 + * Define the number of metadata blocks we need to account to modify data.
  56 + *
  57 + * This include super block, inode block, quota blocks and xattr blocks
  58 + */
  59 +#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
  60 + 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
  61 +
54 62 /* Delete operations potentially hit one directory's namespace plus an
55 63 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
56 64 * generous. We can grow the delete transaction later if necessary. */
... ... @@ -4354,56 +4354,119 @@
4354 4354 return 0;
4355 4355 }
4356 4356  
  4357 +static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
  4358 + int chunk)
  4359 +{
  4360 + int indirects;
  4361 +
  4362 + /* if nrblocks are contiguous */
  4363 + if (chunk) {
  4364 + /*
  4365 + * With N contiguous data blocks, it need at most
  4366 + * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks
  4367 + * 2 dindirect blocks
  4368 + * 1 tindirect block
  4369 + */
  4370 + indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb);
  4371 + return indirects + 3;
  4372 + }
  4373 + /*
  4374 + * if nrblocks are not contiguous, worse case, each block touch
  4375 + * a indirect block, and each indirect block touch a double indirect
  4376 + * block, plus a triple indirect block
  4377 + */
  4378 + indirects = nrblocks * 2 + 1;
  4379 + return indirects;
  4380 +}
  4381 +
  4382 +static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
  4383 +{
  4384 + if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
  4385 + return ext4_indirect_trans_blocks(inode, nrblocks, 0);
  4386 + return ext4_ext_index_trans_blocks(inode, nrblocks, 0);
  4387 +}
4357 4388 /*
4358   - * How many blocks doth make a writepage()?
  4389 + * Account for index blocks, block groups bitmaps and block group
  4390 + * descriptor blocks if modify datablocks and index blocks
  4391 + * worse case, the indexs blocks spread over different block groups
4359 4392 *
4360   - * With N blocks per page, it may be:
4361   - * N data blocks
4362   - * 2 indirect block
4363   - * 2 dindirect
4364   - * 1 tindirect
4365   - * N+5 bitmap blocks (from the above)
4366   - * N+5 group descriptor summary blocks
4367   - * 1 inode block
4368   - * 1 superblock.
4369   - * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files
  4393 + * If datablocks are discontiguous, they are possible to spread over
  4394 + * different block groups too. If they are contiugous, with flexbg,
  4395 + * they could still across block group boundary.
4370 4396 *
4371   - * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS
  4397 + * Also account for superblock, inode, quota and xattr blocks
  4398 + */
  4399 +int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
  4400 +{
  4401 + int groups, gdpblocks;
  4402 + int idxblocks;
  4403 + int ret = 0;
  4404 +
  4405 + /*
  4406 + * How many index blocks need to touch to modify nrblocks?
  4407 + * The "Chunk" flag indicating whether the nrblocks is
  4408 + * physically contiguous on disk
  4409 + *
  4410 + * For Direct IO and fallocate, they calls get_block to allocate
  4411 + * one single extent at a time, so they could set the "Chunk" flag
  4412 + */
  4413 + idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk);
  4414 +
  4415 + ret = idxblocks;
  4416 +
  4417 + /*
  4418 + * Now let's see how many group bitmaps and group descriptors need
  4419 + * to account
  4420 + */
  4421 + groups = idxblocks;
  4422 + if (chunk)
  4423 + groups += 1;
  4424 + else
  4425 + groups += nrblocks;
  4426 +
  4427 + gdpblocks = groups;
  4428 + if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
  4429 + groups = EXT4_SB(inode->i_sb)->s_groups_count;
  4430 + if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
  4431 + gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
  4432 +
  4433 + /* bitmaps and block group descriptor blocks */
  4434 + ret += groups + gdpblocks;
  4435 +
  4436 + /* Blocks for super block, inode, quota and xattr blocks */
  4437 + ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
  4438 +
  4439 + return ret;
  4440 +}
  4441 +
  4442 +/*
  4443 + * Calulate the total number of credits to reserve to fit
  4444 + * the modification of a single pages into a single transaction
4372 4445 *
4373   - * With ordered or writeback data it's the same, less the N data blocks.
  4446 + * This could be called via ext4_write_begin() or later
  4447 + * ext4_da_writepages() in delalyed allocation case.
4374 4448 *
4375   - * If the inode's direct blocks can hold an integral number of pages then a
4376   - * page cannot straddle two indirect blocks, and we can only touch one indirect
4377   - * and dindirect block, and the "5" above becomes "3".
  4449 + * In both case it's possible that we could allocating multiple
  4450 + * chunks of blocks. We need to consider the worse case, when
  4451 + * one new block per extent.
4378 4452 *
4379   - * This still overestimates under most circumstances. If we were to pass the
4380   - * start and end offsets in here as well we could do block_to_path() on each
4381   - * block and work out the exact number of indirects which are touched. Pah.
  4453 + * For Direct IO and fallocate, the journal credits reservation
  4454 + * is based on one single extent allocation, so they could use
  4455 + * EXT4_DATA_TRANS_BLOCKS to get the needed credit to log a single
  4456 + * chunk of allocation needs.
4382 4457 */
4383   -
4384 4458 int ext4_writepage_trans_blocks(struct inode *inode)
4385 4459 {
4386 4460 int bpp = ext4_journal_blocks_per_page(inode);
4387   - int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
4388 4461 int ret;
4389 4462  
4390   - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
4391   - return ext4_ext_writepage_trans_blocks(inode, bpp);
  4463 + ret = ext4_meta_trans_blocks(inode, bpp, 0);
4392 4464  
  4465 + /* Account for data blocks for journalled mode */
4393 4466 if (ext4_should_journal_data(inode))
4394   - ret = 3 * (bpp + indirects) + 2;
4395   - else
4396   - ret = 2 * (bpp + indirects) + 2;
4397   -
4398   -#ifdef CONFIG_QUOTA
4399   - /* We know that structure was already allocated during DQUOT_INIT so
4400   - * we will be updating only the data blocks + inodes */
4401   - ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
4402   -#endif
4403   -
  4467 + ret += bpp;
4404 4468 return ret;
4405 4469 }
4406   -
4407 4470 /*
4408 4471 * The caller must have previously called ext4_reserve_inode_write().
4409 4472 * Give this, we know that the caller already has write access to iloc->bh.