Commit adb2355104b2109e06ba5276485d187d023b2fd2

Authored by Zheng Liu
Committed by Theodore Ts'o
1 parent cdee78433c

ext4: update extent status tree after an extent is zeroed out

When we try to split an extent, this extent could be zeroed out and mark
as initialized.  But we don't know this in ext4_map_blocks because it
only returns a length of allocated extent.  Meanwhile we will mark this
extent as uninitialized because we only check m_flags.

This commit update extent status tree when we try to split an unwritten
extent.  We don't need to worry about the status of this extent because
we always mark it as initialized.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Dmitry Monakhov <dmonakhov@openvz.org>

Showing 4 changed files with 61 additions and 4 deletions Side-by-side Diff

... ... @@ -2925,7 +2925,7 @@
2925 2925 {
2926 2926 ext4_fsblk_t newblock;
2927 2927 ext4_lblk_t ee_block;
2928   - struct ext4_extent *ex, newex, orig_ex;
  2928 + struct ext4_extent *ex, newex, orig_ex, zero_ex;
2929 2929 struct ext4_extent *ex2 = NULL;
2930 2930 unsigned int ee_len, depth;
2931 2931 int err = 0;
2932 2932  
2933 2933  
2934 2934  
... ... @@ -2996,12 +2996,26 @@
2996 2996 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
2997 2997 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
2998 2998 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
2999   - if (split_flag & EXT4_EXT_DATA_VALID1)
  2999 + if (split_flag & EXT4_EXT_DATA_VALID1) {
3000 3000 err = ext4_ext_zeroout(inode, ex2);
3001   - else
  3001 + zero_ex.ee_block = ex2->ee_block;
  3002 + zero_ex.ee_len = ext4_ext_get_actual_len(ex2);
  3003 + ext4_ext_store_pblock(&zero_ex,
  3004 + ext4_ext_pblock(ex2));
  3005 + } else {
3002 3006 err = ext4_ext_zeroout(inode, ex);
3003   - } else
  3007 + zero_ex.ee_block = ex->ee_block;
  3008 + zero_ex.ee_len = ext4_ext_get_actual_len(ex);
  3009 + ext4_ext_store_pblock(&zero_ex,
  3010 + ext4_ext_pblock(ex));
  3011 + }
  3012 + } else {
3004 3013 err = ext4_ext_zeroout(inode, &orig_ex);
  3014 + zero_ex.ee_block = orig_ex.ee_block;
  3015 + zero_ex.ee_len = ext4_ext_get_actual_len(&orig_ex);
  3016 + ext4_ext_store_pblock(&zero_ex,
  3017 + ext4_ext_pblock(&orig_ex));
  3018 + }
3005 3019  
3006 3020 if (err)
3007 3021 goto fix_extent_len;
... ... @@ -3009,6 +3023,12 @@
3009 3023 ex->ee_len = cpu_to_le16(ee_len);
3010 3024 ext4_ext_try_to_merge(handle, inode, path, ex);
3011 3025 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
  3026 + if (err)
  3027 + goto fix_extent_len;
  3028 +
  3029 + /* update extent status tree */
  3030 + err = ext4_es_zeroout(inode, &zero_ex);
  3031 +
3012 3032 goto out;
3013 3033 } else if (err)
3014 3034 goto fix_extent_len;
... ... @@ -3150,6 +3170,7 @@
3150 3170 ee_block = le32_to_cpu(ex->ee_block);
3151 3171 ee_len = ext4_ext_get_actual_len(ex);
3152 3172 allocated = ee_len - (map->m_lblk - ee_block);
  3173 + zero_ex.ee_len = 0;
3153 3174  
3154 3175 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
3155 3176  
... ... @@ -3247,6 +3268,9 @@
3247 3268 err = ext4_ext_zeroout(inode, ex);
3248 3269 if (err)
3249 3270 goto out;
  3271 + zero_ex.ee_block = ex->ee_block;
  3272 + zero_ex.ee_len = ext4_ext_get_actual_len(ex);
  3273 + ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
3250 3274  
3251 3275 err = ext4_ext_get_access(handle, inode, path + depth);
3252 3276 if (err)
... ... @@ -3305,6 +3329,9 @@
3305 3329 err = allocated;
3306 3330  
3307 3331 out:
  3332 + /* If we have gotten a failure, don't zero out status tree */
  3333 + if (!err)
  3334 + err = ext4_es_zeroout(inode, &zero_ex);
3308 3335 return err ? err : allocated;
3309 3336 }
3310 3337  
fs/ext4/extents_status.c
... ... @@ -854,6 +854,23 @@
854 854 return err;
855 855 }
856 856  
  857 +int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
  858 +{
  859 + ext4_lblk_t ee_block;
  860 + ext4_fsblk_t ee_pblock;
  861 + unsigned int ee_len;
  862 +
  863 + ee_block = le32_to_cpu(ex->ee_block);
  864 + ee_len = ext4_ext_get_actual_len(ex);
  865 + ee_pblock = ext4_ext_pblock(ex);
  866 +
  867 + if (ee_len == 0)
  868 + return 0;
  869 +
  870 + return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
  871 + EXTENT_STATUS_WRITTEN);
  872 +}
  873 +
857 874 static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
858 875 {
859 876 struct ext4_sb_info *sbi = container_of(shrink,
fs/ext4/extents_status.h
... ... @@ -39,6 +39,8 @@
39 39 EXTENT_STATUS_DELAYED | \
40 40 EXTENT_STATUS_HOLE)
41 41  
  42 +struct ext4_extent;
  43 +
42 44 struct extent_status {
43 45 struct rb_node rb_node;
44 46 ext4_lblk_t es_lblk; /* first logical block extent covers */
... ... @@ -64,6 +66,7 @@
64 66 struct extent_status *es);
65 67 extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
66 68 struct extent_status *es);
  69 +extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex);
67 70  
68 71 static inline int ext4_es_is_written(struct extent_status *es)
69 72 {
... ... @@ -722,6 +722,15 @@
722 722 }
723 723 #endif
724 724  
  725 + /*
  726 + * If the extent has been zeroed out, we don't need to update
  727 + * extent status tree.
  728 + */
  729 + if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
  730 + ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
  731 + if (ext4_es_is_written(&es))
  732 + goto has_zeroout;
  733 + }
725 734 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
726 735 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
727 736 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
... ... @@ -734,6 +743,7 @@
734 743 retval = ret;
735 744 }
736 745  
  746 +has_zeroout:
737 747 up_write((&EXT4_I(inode)->i_data_sem));
738 748 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
739 749 int ret = check_block_validity(inode, map);