Commit c174e6d6979a04b7b77b93f244396be4b81f8bfb

Authored by Dmitry Monakhov
Committed by Theodore Ts'o
1 parent 69dc953640

ext4: fix transaction issues for ext4_fallocate and ext_zero_range

After commit f282ac19d86f we use different transactions for
preallocation and i_disksize update which result in complain from fsck
after power-failure.  spotted by generic/019. IMHO this is regression
because fs becomes inconsistent, even more 'e2fsck -p' will no longer
works (which drives admins go crazy) Same transaction requirement
applies ctime,mtime updates

testcase: xfstest generic/019

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org

Showing 1 changed file with 35 additions and 33 deletions Side-by-side Diff

... ... @@ -4665,7 +4665,8 @@
4665 4665 }
4666 4666  
4667 4667 static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4668   - ext4_lblk_t len, int flags, int mode)
  4668 + ext4_lblk_t len, loff_t new_size,
  4669 + int flags, int mode)
4669 4670 {
4670 4671 struct inode *inode = file_inode(file);
4671 4672 handle_t *handle;
4672 4673  
... ... @@ -4674,8 +4675,10 @@
4674 4675 int retries = 0;
4675 4676 struct ext4_map_blocks map;
4676 4677 unsigned int credits;
  4678 + loff_t epos;
4677 4679  
4678 4680 map.m_lblk = offset;
  4681 + map.m_len = len;
4679 4682 /*
4680 4683 * Don't normalize the request if it can fit in one extent so
4681 4684 * that it doesn't get unnecessarily split into multiple
... ... @@ -4690,9 +4693,7 @@
4690 4693 credits = ext4_chunk_trans_blocks(inode, len);
4691 4694  
4692 4695 retry:
4693   - while (ret >= 0 && ret < len) {
4694   - map.m_lblk = map.m_lblk + ret;
4695   - map.m_len = len = len - ret;
  4696 + while (ret >= 0 && len) {
4696 4697 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4697 4698 credits);
4698 4699 if (IS_ERR(handle)) {
... ... @@ -4709,6 +4710,21 @@
4709 4710 ret2 = ext4_journal_stop(handle);
4710 4711 break;
4711 4712 }
  4713 + map.m_lblk += ret;
  4714 + map.m_len = len = len - ret;
  4715 + epos = (loff_t)map.m_lblk << inode->i_blkbits;
  4716 + inode->i_ctime = ext4_current_time(inode);
  4717 + if (new_size) {
  4718 + if (epos > new_size)
  4719 + epos = new_size;
  4720 + if (ext4_update_inode_size(inode, epos) & 0x1)
  4721 + inode->i_mtime = inode->i_ctime;
  4722 + } else {
  4723 + if (epos > inode->i_size)
  4724 + ext4_set_inode_flag(inode,
  4725 + EXT4_INODE_EOFBLOCKS);
  4726 + }
  4727 + ext4_mark_inode_dirty(handle, inode);
4712 4728 ret2 = ext4_journal_stop(handle);
4713 4729 if (ret2)
4714 4730 break;
... ... @@ -4732,7 +4748,7 @@
4732 4748 int ret = 0;
4733 4749 int flags;
4734 4750 int credits;
4735   - int partial;
  4751 + int partial_begin, partial_end;
4736 4752 loff_t start, end;
4737 4753 ext4_lblk_t lblk;
4738 4754 struct address_space *mapping = inode->i_mapping;
... ... @@ -4772,7 +4788,8 @@
4772 4788  
4773 4789 if (start < offset || end > offset + len)
4774 4790 return -EINVAL;
4775   - partial = (offset + len) & ((1 << blkbits) - 1);
  4791 + partial_begin = offset & ((1 << blkbits) - 1);
  4792 + partial_end = (offset + len) & ((1 << blkbits) - 1);
4776 4793  
4777 4794 lblk = start >> blkbits;
4778 4795 max_blocks = (end >> blkbits);
... ... @@ -4806,7 +4823,7 @@
4806 4823 * If we have a partial block after EOF we have to allocate
4807 4824 * the entire block.
4808 4825 */
4809   - if (partial)
  4826 + if (partial_end)
4810 4827 max_blocks += 1;
4811 4828 }
4812 4829  
... ... @@ -4814,6 +4831,7 @@
4814 4831  
4815 4832 /* Now release the pages and zero block aligned part of pages*/
4816 4833 truncate_pagecache_range(inode, start, end - 1);
  4834 + inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4817 4835  
4818 4836 /* Wait all existing dio workers, newcomers will block on i_mutex */
4819 4837 ext4_inode_block_unlocked_dio(inode);
4820 4838  
... ... @@ -4826,11 +4844,14 @@
4826 4844 if (ret)
4827 4845 goto out_dio;
4828 4846  
4829   - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags,
4830   - mode);
  4847 + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
  4848 + flags, mode);
4831 4849 if (ret)
4832 4850 goto out_dio;
4833 4851 }
  4852 + if (!partial_begin && !partial_end)
  4853 + goto out_dio;
  4854 +
4834 4855 /*
4835 4856 * In worst case we have to writeout two nonadjacent unwritten
4836 4857 * blocks and update the inode
... ... @@ -4856,7 +4877,6 @@
4856 4877 if ((offset + len) > i_size_read(inode))
4857 4878 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
4858 4879 }
4859   -
4860 4880 ext4_mark_inode_dirty(handle, inode);
4861 4881  
4862 4882 /* Zero out partial block at the edges of the range */
... ... @@ -4883,7 +4903,6 @@
4883 4903 long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4884 4904 {
4885 4905 struct inode *inode = file_inode(file);
4886   - handle_t *handle;
4887 4906 loff_t new_size = 0;
4888 4907 unsigned int max_blocks;
4889 4908 int ret = 0;
4890 4909  
4891 4910  
... ... @@ -4939,32 +4958,15 @@
4939 4958 goto out;
4940 4959 }
4941 4960  
4942   - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode);
  4961 + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
  4962 + flags, mode);
4943 4963 if (ret)
4944 4964 goto out;
4945 4965  
4946   - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
4947   - if (IS_ERR(handle))
4948   - goto out;
4949   -
4950   - inode->i_ctime = ext4_current_time(inode);
4951   -
4952   - if (new_size) {
4953   - if (ext4_update_inode_size(inode, new_size) & 0x1)
4954   - inode->i_mtime = inode->i_ctime;
4955   - } else {
4956   - /*
4957   - * Mark that we allocate beyond EOF so the subsequent truncate
4958   - * can proceed even if the new size is the same as i_size.
4959   - */
4960   - if ((offset + len) > i_size_read(inode))
4961   - ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
  4966 + if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
  4967 + ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
  4968 + EXT4_I(inode)->i_sync_tid);
4962 4969 }
4963   - ext4_mark_inode_dirty(handle, inode);
4964   - if (file->f_flags & O_SYNC)
4965   - ext4_handle_sync(handle);
4966   -
4967   - ext4_journal_stop(handle);
4968 4970 out:
4969 4971 mutex_unlock(&inode->i_mutex);
4970 4972 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);