Commit d89b2f764e9f9eb6c1ac90d25e6fc5b43b676a48

Authored by Dmitry Monakhov
Committed by Greg Kroah-Hartman
1 parent 69fa118cd6

ext4: fix transaction issues for ext4_fallocate and ext_zero_range

commit c174e6d6979a04b7b77b93f244396be4b81f8bfb upstream.

After commit f282ac19d86f we use different transactions for
preallocation and i_disksize update which result in complain from fsck
after power-failure.  spotted by generic/019. IMHO this is regression
because fs becomes inconsistent, even more 'e2fsck -p' will no longer
works (which drives admins go crazy) Same transaction requirement
applies ctime,mtime updates

testcase: xfstest generic/019

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Showing 1 changed file with 35 additions and 33 deletions Side-by-side Diff

... ... @@ -4664,7 +4664,8 @@
4664 4664 }
4665 4665  
4666 4666 static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4667   - ext4_lblk_t len, int flags, int mode)
  4667 + ext4_lblk_t len, loff_t new_size,
  4668 + int flags, int mode)
4668 4669 {
4669 4670 struct inode *inode = file_inode(file);
4670 4671 handle_t *handle;
4671 4672  
... ... @@ -4673,8 +4674,10 @@
4673 4674 int retries = 0;
4674 4675 struct ext4_map_blocks map;
4675 4676 unsigned int credits;
  4677 + loff_t epos;
4676 4678  
4677 4679 map.m_lblk = offset;
  4680 + map.m_len = len;
4678 4681 /*
4679 4682 * Don't normalize the request if it can fit in one extent so
4680 4683 * that it doesn't get unnecessarily split into multiple
... ... @@ -4689,9 +4692,7 @@
4689 4692 credits = ext4_chunk_trans_blocks(inode, len);
4690 4693  
4691 4694 retry:
4692   - while (ret >= 0 && ret < len) {
4693   - map.m_lblk = map.m_lblk + ret;
4694   - map.m_len = len = len - ret;
  4695 + while (ret >= 0 && len) {
4695 4696 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4696 4697 credits);
4697 4698 if (IS_ERR(handle)) {
... ... @@ -4708,6 +4709,21 @@
4708 4709 ret2 = ext4_journal_stop(handle);
4709 4710 break;
4710 4711 }
  4712 + map.m_lblk += ret;
  4713 + map.m_len = len = len - ret;
  4714 + epos = (loff_t)map.m_lblk << inode->i_blkbits;
  4715 + inode->i_ctime = ext4_current_time(inode);
  4716 + if (new_size) {
  4717 + if (epos > new_size)
  4718 + epos = new_size;
  4719 + if (ext4_update_inode_size(inode, epos) & 0x1)
  4720 + inode->i_mtime = inode->i_ctime;
  4721 + } else {
  4722 + if (epos > inode->i_size)
  4723 + ext4_set_inode_flag(inode,
  4724 + EXT4_INODE_EOFBLOCKS);
  4725 + }
  4726 + ext4_mark_inode_dirty(handle, inode);
4711 4727 ret2 = ext4_journal_stop(handle);
4712 4728 if (ret2)
4713 4729 break;
... ... @@ -4731,7 +4747,7 @@
4731 4747 int ret = 0;
4732 4748 int flags;
4733 4749 int credits;
4734   - int partial;
  4750 + int partial_begin, partial_end;
4735 4751 loff_t start, end;
4736 4752 ext4_lblk_t lblk;
4737 4753 struct address_space *mapping = inode->i_mapping;
... ... @@ -4771,7 +4787,8 @@
4771 4787  
4772 4788 if (start < offset || end > offset + len)
4773 4789 return -EINVAL;
4774   - partial = (offset + len) & ((1 << blkbits) - 1);
  4790 + partial_begin = offset & ((1 << blkbits) - 1);
  4791 + partial_end = (offset + len) & ((1 << blkbits) - 1);
4775 4792  
4776 4793 lblk = start >> blkbits;
4777 4794 max_blocks = (end >> blkbits);
... ... @@ -4805,7 +4822,7 @@
4805 4822 * If we have a partial block after EOF we have to allocate
4806 4823 * the entire block.
4807 4824 */
4808   - if (partial)
  4825 + if (partial_end)
4809 4826 max_blocks += 1;
4810 4827 }
4811 4828  
... ... @@ -4813,6 +4830,7 @@
4813 4830  
4814 4831 /* Now release the pages and zero block aligned part of pages*/
4815 4832 truncate_pagecache_range(inode, start, end - 1);
  4833 + inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4816 4834  
4817 4835 /* Wait all existing dio workers, newcomers will block on i_mutex */
4818 4836 ext4_inode_block_unlocked_dio(inode);
4819 4837  
... ... @@ -4825,11 +4843,14 @@
4825 4843 if (ret)
4826 4844 goto out_dio;
4827 4845  
4828   - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags,
4829   - mode);
  4846 + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
  4847 + flags, mode);
4830 4848 if (ret)
4831 4849 goto out_dio;
4832 4850 }
  4851 + if (!partial_begin && !partial_end)
  4852 + goto out_dio;
  4853 +
4833 4854 /*
4834 4855 * In worst case we have to writeout two nonadjacent unwritten
4835 4856 * blocks and update the inode
... ... @@ -4855,7 +4876,6 @@
4855 4876 if ((offset + len) > i_size_read(inode))
4856 4877 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
4857 4878 }
4858   -
4859 4879 ext4_mark_inode_dirty(handle, inode);
4860 4880  
4861 4881 /* Zero out partial block at the edges of the range */
... ... @@ -4882,7 +4902,6 @@
4882 4902 long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4883 4903 {
4884 4904 struct inode *inode = file_inode(file);
4885   - handle_t *handle;
4886 4905 loff_t new_size = 0;
4887 4906 unsigned int max_blocks;
4888 4907 int ret = 0;
4889 4908  
4890 4909  
... ... @@ -4938,32 +4957,15 @@
4938 4957 goto out;
4939 4958 }
4940 4959  
4941   - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode);
  4960 + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
  4961 + flags, mode);
4942 4962 if (ret)
4943 4963 goto out;
4944 4964  
4945   - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
4946   - if (IS_ERR(handle))
4947   - goto out;
4948   -
4949   - inode->i_ctime = ext4_current_time(inode);
4950   -
4951   - if (new_size) {
4952   - if (ext4_update_inode_size(inode, new_size) & 0x1)
4953   - inode->i_mtime = inode->i_ctime;
4954   - } else {
4955   - /*
4956   - * Mark that we allocate beyond EOF so the subsequent truncate
4957   - * can proceed even if the new size is the same as i_size.
4958   - */
4959   - if ((offset + len) > i_size_read(inode))
4960   - ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
  4965 + if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
  4966 + ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
  4967 + EXT4_I(inode)->i_sync_tid);
4961 4968 }
4962   - ext4_mark_inode_dirty(handle, inode);
4963   - if (file->f_flags & O_SYNC)
4964   - ext4_handle_sync(handle);
4965   -
4966   - ext4_journal_stop(handle);
4967 4969 out:
4968 4970 mutex_unlock(&inode->i_mutex);
4969 4971 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);