Commit c174e6d6979a04b7b77b93f244396be4b81f8bfb
Committed by
Theodore Ts'o
1 parent
69dc953640
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
ext4: fix transaction issues for ext4_fallocate and ext_zero_range
After commit f282ac19d86f we use different transactions for preallocation and i_disksize update which result in complain from fsck after power-failure. spotted by generic/019. IMHO this is regression because fs becomes inconsistent, even more 'e2fsck -p' will no longer works (which drives admins go crazy) Same transaction requirement applies ctime,mtime updates testcase: xfstest generic/019 Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@vger.kernel.org
Showing 1 changed file with 35 additions and 33 deletions Side-by-side Diff
fs/ext4/extents.c
... | ... | @@ -4665,7 +4665,8 @@ |
4665 | 4665 | } |
4666 | 4666 | |
4667 | 4667 | static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, |
4668 | - ext4_lblk_t len, int flags, int mode) | |
4668 | + ext4_lblk_t len, loff_t new_size, | |
4669 | + int flags, int mode) | |
4669 | 4670 | { |
4670 | 4671 | struct inode *inode = file_inode(file); |
4671 | 4672 | handle_t *handle; |
4672 | 4673 | |
... | ... | @@ -4674,8 +4675,10 @@ |
4674 | 4675 | int retries = 0; |
4675 | 4676 | struct ext4_map_blocks map; |
4676 | 4677 | unsigned int credits; |
4678 | + loff_t epos; | |
4677 | 4679 | |
4678 | 4680 | map.m_lblk = offset; |
4681 | + map.m_len = len; | |
4679 | 4682 | /* |
4680 | 4683 | * Don't normalize the request if it can fit in one extent so |
4681 | 4684 | * that it doesn't get unnecessarily split into multiple |
... | ... | @@ -4690,9 +4693,7 @@ |
4690 | 4693 | credits = ext4_chunk_trans_blocks(inode, len); |
4691 | 4694 | |
4692 | 4695 | retry: |
4693 | - while (ret >= 0 && ret < len) { | |
4694 | - map.m_lblk = map.m_lblk + ret; | |
4695 | - map.m_len = len = len - ret; | |
4696 | + while (ret >= 0 && len) { | |
4696 | 4697 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, |
4697 | 4698 | credits); |
4698 | 4699 | if (IS_ERR(handle)) { |
... | ... | @@ -4709,6 +4710,21 @@ |
4709 | 4710 | ret2 = ext4_journal_stop(handle); |
4710 | 4711 | break; |
4711 | 4712 | } |
4713 | + map.m_lblk += ret; | |
4714 | + map.m_len = len = len - ret; | |
4715 | + epos = (loff_t)map.m_lblk << inode->i_blkbits; | |
4716 | + inode->i_ctime = ext4_current_time(inode); | |
4717 | + if (new_size) { | |
4718 | + if (epos > new_size) | |
4719 | + epos = new_size; | |
4720 | + if (ext4_update_inode_size(inode, epos) & 0x1) | |
4721 | + inode->i_mtime = inode->i_ctime; | |
4722 | + } else { | |
4723 | + if (epos > inode->i_size) | |
4724 | + ext4_set_inode_flag(inode, | |
4725 | + EXT4_INODE_EOFBLOCKS); | |
4726 | + } | |
4727 | + ext4_mark_inode_dirty(handle, inode); | |
4712 | 4728 | ret2 = ext4_journal_stop(handle); |
4713 | 4729 | if (ret2) |
4714 | 4730 | break; |
... | ... | @@ -4732,7 +4748,7 @@ |
4732 | 4748 | int ret = 0; |
4733 | 4749 | int flags; |
4734 | 4750 | int credits; |
4735 | - int partial; | |
4751 | + int partial_begin, partial_end; | |
4736 | 4752 | loff_t start, end; |
4737 | 4753 | ext4_lblk_t lblk; |
4738 | 4754 | struct address_space *mapping = inode->i_mapping; |
... | ... | @@ -4772,7 +4788,8 @@ |
4772 | 4788 | |
4773 | 4789 | if (start < offset || end > offset + len) |
4774 | 4790 | return -EINVAL; |
4775 | - partial = (offset + len) & ((1 << blkbits) - 1); | |
4791 | + partial_begin = offset & ((1 << blkbits) - 1); | |
4792 | + partial_end = (offset + len) & ((1 << blkbits) - 1); | |
4776 | 4793 | |
4777 | 4794 | lblk = start >> blkbits; |
4778 | 4795 | max_blocks = (end >> blkbits); |
... | ... | @@ -4806,7 +4823,7 @@ |
4806 | 4823 | * If we have a partial block after EOF we have to allocate |
4807 | 4824 | * the entire block. |
4808 | 4825 | */ |
4809 | - if (partial) | |
4826 | + if (partial_end) | |
4810 | 4827 | max_blocks += 1; |
4811 | 4828 | } |
4812 | 4829 | |
... | ... | @@ -4814,6 +4831,7 @@ |
4814 | 4831 | |
4815 | 4832 | /* Now release the pages and zero block aligned part of pages*/ |
4816 | 4833 | truncate_pagecache_range(inode, start, end - 1); |
4834 | + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | |
4817 | 4835 | |
4818 | 4836 | /* Wait all existing dio workers, newcomers will block on i_mutex */ |
4819 | 4837 | ext4_inode_block_unlocked_dio(inode); |
4820 | 4838 | |
... | ... | @@ -4826,11 +4844,14 @@ |
4826 | 4844 | if (ret) |
4827 | 4845 | goto out_dio; |
4828 | 4846 | |
4829 | - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, | |
4830 | - mode); | |
4847 | + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, | |
4848 | + flags, mode); | |
4831 | 4849 | if (ret) |
4832 | 4850 | goto out_dio; |
4833 | 4851 | } |
4852 | + if (!partial_begin && !partial_end) | |
4853 | + goto out_dio; | |
4854 | + | |
4834 | 4855 | /* |
4835 | 4856 | * In worst case we have to writeout two nonadjacent unwritten |
4836 | 4857 | * blocks and update the inode |
... | ... | @@ -4856,7 +4877,6 @@ |
4856 | 4877 | if ((offset + len) > i_size_read(inode)) |
4857 | 4878 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
4858 | 4879 | } |
4859 | - | |
4860 | 4880 | ext4_mark_inode_dirty(handle, inode); |
4861 | 4881 | |
4862 | 4882 | /* Zero out partial block at the edges of the range */ |
... | ... | @@ -4883,7 +4903,6 @@ |
4883 | 4903 | long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) |
4884 | 4904 | { |
4885 | 4905 | struct inode *inode = file_inode(file); |
4886 | - handle_t *handle; | |
4887 | 4906 | loff_t new_size = 0; |
4888 | 4907 | unsigned int max_blocks; |
4889 | 4908 | int ret = 0; |
4890 | 4909 | |
4891 | 4910 | |
... | ... | @@ -4939,32 +4958,15 @@ |
4939 | 4958 | goto out; |
4940 | 4959 | } |
4941 | 4960 | |
4942 | - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode); | |
4961 | + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, | |
4962 | + flags, mode); | |
4943 | 4963 | if (ret) |
4944 | 4964 | goto out; |
4945 | 4965 | |
4946 | - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); | |
4947 | - if (IS_ERR(handle)) | |
4948 | - goto out; | |
4949 | - | |
4950 | - inode->i_ctime = ext4_current_time(inode); | |
4951 | - | |
4952 | - if (new_size) { | |
4953 | - if (ext4_update_inode_size(inode, new_size) & 0x1) | |
4954 | - inode->i_mtime = inode->i_ctime; | |
4955 | - } else { | |
4956 | - /* | |
4957 | - * Mark that we allocate beyond EOF so the subsequent truncate | |
4958 | - * can proceed even if the new size is the same as i_size. | |
4959 | - */ | |
4960 | - if ((offset + len) > i_size_read(inode)) | |
4961 | - ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | |
4966 | + if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) { | |
4967 | + ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal, | |
4968 | + EXT4_I(inode)->i_sync_tid); | |
4962 | 4969 | } |
4963 | - ext4_mark_inode_dirty(handle, inode); | |
4964 | - if (file->f_flags & O_SYNC) | |
4965 | - ext4_handle_sync(handle); | |
4966 | - | |
4967 | - ext4_journal_stop(handle); | |
4968 | 4970 | out: |
4969 | 4971 | mutex_unlock(&inode->i_mutex); |
4970 | 4972 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); |