Commit d89b2f764e9f9eb6c1ac90d25e6fc5b43b676a48
Committed by
Greg Kroah-Hartman
1 parent
69fa118cd6
ext4: fix transaction issues for ext4_fallocate and ext_zero_range
commit c174e6d6979a04b7b77b93f244396be4b81f8bfb upstream. After commit f282ac19d86f we use different transactions for preallocation and i_disksize update which result in complain from fsck after power-failure. spotted by generic/019. IMHO this is regression because fs becomes inconsistent, even more 'e2fsck -p' will no longer works (which drives admins go crazy) Same transaction requirement applies ctime,mtime updates testcase: xfstest generic/019 Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Showing 1 changed file with 35 additions and 33 deletions Side-by-side Diff
fs/ext4/extents.c
... | ... | @@ -4664,7 +4664,8 @@ |
4664 | 4664 | } |
4665 | 4665 | |
4666 | 4666 | static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, |
4667 | - ext4_lblk_t len, int flags, int mode) | |
4667 | + ext4_lblk_t len, loff_t new_size, | |
4668 | + int flags, int mode) | |
4668 | 4669 | { |
4669 | 4670 | struct inode *inode = file_inode(file); |
4670 | 4671 | handle_t *handle; |
4671 | 4672 | |
... | ... | @@ -4673,8 +4674,10 @@ |
4673 | 4674 | int retries = 0; |
4674 | 4675 | struct ext4_map_blocks map; |
4675 | 4676 | unsigned int credits; |
4677 | + loff_t epos; | |
4676 | 4678 | |
4677 | 4679 | map.m_lblk = offset; |
4680 | + map.m_len = len; | |
4678 | 4681 | /* |
4679 | 4682 | * Don't normalize the request if it can fit in one extent so |
4680 | 4683 | * that it doesn't get unnecessarily split into multiple |
... | ... | @@ -4689,9 +4692,7 @@ |
4689 | 4692 | credits = ext4_chunk_trans_blocks(inode, len); |
4690 | 4693 | |
4691 | 4694 | retry: |
4692 | - while (ret >= 0 && ret < len) { | |
4693 | - map.m_lblk = map.m_lblk + ret; | |
4694 | - map.m_len = len = len - ret; | |
4695 | + while (ret >= 0 && len) { | |
4695 | 4696 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, |
4696 | 4697 | credits); |
4697 | 4698 | if (IS_ERR(handle)) { |
... | ... | @@ -4708,6 +4709,21 @@ |
4708 | 4709 | ret2 = ext4_journal_stop(handle); |
4709 | 4710 | break; |
4710 | 4711 | } |
4712 | + map.m_lblk += ret; | |
4713 | + map.m_len = len = len - ret; | |
4714 | + epos = (loff_t)map.m_lblk << inode->i_blkbits; | |
4715 | + inode->i_ctime = ext4_current_time(inode); | |
4716 | + if (new_size) { | |
4717 | + if (epos > new_size) | |
4718 | + epos = new_size; | |
4719 | + if (ext4_update_inode_size(inode, epos) & 0x1) | |
4720 | + inode->i_mtime = inode->i_ctime; | |
4721 | + } else { | |
4722 | + if (epos > inode->i_size) | |
4723 | + ext4_set_inode_flag(inode, | |
4724 | + EXT4_INODE_EOFBLOCKS); | |
4725 | + } | |
4726 | + ext4_mark_inode_dirty(handle, inode); | |
4711 | 4727 | ret2 = ext4_journal_stop(handle); |
4712 | 4728 | if (ret2) |
4713 | 4729 | break; |
... | ... | @@ -4731,7 +4747,7 @@ |
4731 | 4747 | int ret = 0; |
4732 | 4748 | int flags; |
4733 | 4749 | int credits; |
4734 | - int partial; | |
4750 | + int partial_begin, partial_end; | |
4735 | 4751 | loff_t start, end; |
4736 | 4752 | ext4_lblk_t lblk; |
4737 | 4753 | struct address_space *mapping = inode->i_mapping; |
... | ... | @@ -4771,7 +4787,8 @@ |
4771 | 4787 | |
4772 | 4788 | if (start < offset || end > offset + len) |
4773 | 4789 | return -EINVAL; |
4774 | - partial = (offset + len) & ((1 << blkbits) - 1); | |
4790 | + partial_begin = offset & ((1 << blkbits) - 1); | |
4791 | + partial_end = (offset + len) & ((1 << blkbits) - 1); | |
4775 | 4792 | |
4776 | 4793 | lblk = start >> blkbits; |
4777 | 4794 | max_blocks = (end >> blkbits); |
... | ... | @@ -4805,7 +4822,7 @@ |
4805 | 4822 | * If we have a partial block after EOF we have to allocate |
4806 | 4823 | * the entire block. |
4807 | 4824 | */ |
4808 | - if (partial) | |
4825 | + if (partial_end) | |
4809 | 4826 | max_blocks += 1; |
4810 | 4827 | } |
4811 | 4828 | |
... | ... | @@ -4813,6 +4830,7 @@ |
4813 | 4830 | |
4814 | 4831 | /* Now release the pages and zero block aligned part of pages*/ |
4815 | 4832 | truncate_pagecache_range(inode, start, end - 1); |
4833 | + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | |
4816 | 4834 | |
4817 | 4835 | /* Wait all existing dio workers, newcomers will block on i_mutex */ |
4818 | 4836 | ext4_inode_block_unlocked_dio(inode); |
4819 | 4837 | |
... | ... | @@ -4825,11 +4843,14 @@ |
4825 | 4843 | if (ret) |
4826 | 4844 | goto out_dio; |
4827 | 4845 | |
4828 | - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, | |
4829 | - mode); | |
4846 | + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, | |
4847 | + flags, mode); | |
4830 | 4848 | if (ret) |
4831 | 4849 | goto out_dio; |
4832 | 4850 | } |
4851 | + if (!partial_begin && !partial_end) | |
4852 | + goto out_dio; | |
4853 | + | |
4833 | 4854 | /* |
4834 | 4855 | * In worst case we have to writeout two nonadjacent unwritten |
4835 | 4856 | * blocks and update the inode |
... | ... | @@ -4855,7 +4876,6 @@ |
4855 | 4876 | if ((offset + len) > i_size_read(inode)) |
4856 | 4877 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
4857 | 4878 | } |
4858 | - | |
4859 | 4879 | ext4_mark_inode_dirty(handle, inode); |
4860 | 4880 | |
4861 | 4881 | /* Zero out partial block at the edges of the range */ |
... | ... | @@ -4882,7 +4902,6 @@ |
4882 | 4902 | long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) |
4883 | 4903 | { |
4884 | 4904 | struct inode *inode = file_inode(file); |
4885 | - handle_t *handle; | |
4886 | 4905 | loff_t new_size = 0; |
4887 | 4906 | unsigned int max_blocks; |
4888 | 4907 | int ret = 0; |
4889 | 4908 | |
4890 | 4909 | |
... | ... | @@ -4938,32 +4957,15 @@ |
4938 | 4957 | goto out; |
4939 | 4958 | } |
4940 | 4959 | |
4941 | - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode); | |
4960 | + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, | |
4961 | + flags, mode); | |
4942 | 4962 | if (ret) |
4943 | 4963 | goto out; |
4944 | 4964 | |
4945 | - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); | |
4946 | - if (IS_ERR(handle)) | |
4947 | - goto out; | |
4948 | - | |
4949 | - inode->i_ctime = ext4_current_time(inode); | |
4950 | - | |
4951 | - if (new_size) { | |
4952 | - if (ext4_update_inode_size(inode, new_size) & 0x1) | |
4953 | - inode->i_mtime = inode->i_ctime; | |
4954 | - } else { | |
4955 | - /* | |
4956 | - * Mark that we allocate beyond EOF so the subsequent truncate | |
4957 | - * can proceed even if the new size is the same as i_size. | |
4958 | - */ | |
4959 | - if ((offset + len) > i_size_read(inode)) | |
4960 | - ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | |
4965 | + if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) { | |
4966 | + ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal, | |
4967 | + EXT4_I(inode)->i_sync_tid); | |
4961 | 4968 | } |
4962 | - ext4_mark_inode_dirty(handle, inode); | |
4963 | - if (file->f_flags & O_SYNC) | |
4964 | - ext4_handle_sync(handle); | |
4965 | - | |
4966 | - ext4_journal_stop(handle); | |
4967 | 4969 | out: |
4968 | 4970 | mutex_unlock(&inode->i_mutex); |
4969 | 4971 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); |