Commit d3c926264a92e5ea448add3e883530e1edad3ce2

Authored by Linus Torvalds

Merge tag 'ext4_for_linue' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 fixes from Ted Ts'o:
 "Fix a number of regression and other bugs in ext4, most of which were
  relatively obscure cornercases or races that were found using
  regression tests."

* tag 'ext4_for_linue' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (21 commits)
  ext4: fix data=journal fast mount/umount hang
  ext4: fix ext4_evict_inode() racing against workqueue processing code
  ext4: fix memory leakage in mext_check_coverage
  ext4: use s_extent_max_zeroout_kb value as number of kb
  ext4: use atomic64_t for the per-flexbg free_clusters count
  jbd2: fix use after free in jbd2_journal_dirty_metadata()
  ext4: reserve metadata block for every delayed write
  ext4: update reserved space after the 'correction'
  ext4: do not use yield()
  ext4: remove unused variable in ext4_free_blocks()
  ext4: fix WARN_ON from ext4_releasepage()
  ext4: fix the wrong number of the allocated blocks in ext4_split_extent()
  ext4: update extent status tree after an extent is zeroed out
  ext4: fix wrong m_len value after unwritten extent conversion
  ext4: add self-testing infrastructure to do a sanity check
  ext4: avoid a potential overflow in ext4_es_can_be_merged()
  ext4: invalidate extent status tree during extent migration
  ext4: remove unnecessary wait for extent conversion in ext4_fallocate()
  ext4: add warning to ext4_convert_unwritten_extents_endio
  ext4: disable merging of uninitialized extents
  ...

Showing 12 changed files Side-by-side Diff

... ... @@ -335,9 +335,9 @@
335 335 */
336 336  
337 337 struct flex_groups {
338   - atomic_t free_inodes;
339   - atomic_t free_clusters;
340   - atomic_t used_dirs;
  338 + atomic64_t free_clusters;
  339 + atomic_t free_inodes;
  340 + atomic_t used_dirs;
341 341 };
342 342  
343 343 #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
... ... @@ -2617,7 +2617,7 @@
2617 2617 extern int __init ext4_init_pageio(void);
2618 2618 extern void ext4_add_complete_io(ext4_io_end_t *io_end);
2619 2619 extern void ext4_exit_pageio(void);
2620   -extern void ext4_ioend_wait(struct inode *);
  2620 +extern void ext4_ioend_shutdown(struct inode *);
2621 2621 extern void ext4_free_io_end(ext4_io_end_t *io);
2622 2622 extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2623 2623 extern void ext4_end_io_work(struct work_struct *work);
... ... @@ -1584,10 +1584,12 @@
1584 1584 unsigned short ext1_ee_len, ext2_ee_len, max_len;
1585 1585  
1586 1586 /*
1587   - * Make sure that either both extents are uninitialized, or
1588   - * both are _not_.
  1587 + * Make sure that both extents are initialized. We don't merge
  1588 + * uninitialized extents so that we can be sure that end_io code has
  1589 + * the extent that was written properly split out and conversion to
  1590 + * initialized is trivial.
1589 1591 */
1590   - if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2))
  1592 + if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2))
1591 1593 return 0;
1592 1594  
1593 1595 if (ext4_ext_is_uninitialized(ex1))
... ... @@ -2923,7 +2925,7 @@
2923 2925 {
2924 2926 ext4_fsblk_t newblock;
2925 2927 ext4_lblk_t ee_block;
2926   - struct ext4_extent *ex, newex, orig_ex;
  2928 + struct ext4_extent *ex, newex, orig_ex, zero_ex;
2927 2929 struct ext4_extent *ex2 = NULL;
2928 2930 unsigned int ee_len, depth;
2929 2931 int err = 0;
... ... @@ -2943,6 +2945,10 @@
2943 2945 newblock = split - ee_block + ext4_ext_pblock(ex);
2944 2946  
2945 2947 BUG_ON(split < ee_block || split >= (ee_block + ee_len));
  2948 + BUG_ON(!ext4_ext_is_uninitialized(ex) &&
  2949 + split_flag & (EXT4_EXT_MAY_ZEROOUT |
  2950 + EXT4_EXT_MARK_UNINIT1 |
  2951 + EXT4_EXT_MARK_UNINIT2));
2946 2952  
2947 2953 err = ext4_ext_get_access(handle, inode, path + depth);
2948 2954 if (err)
2949 2955  
2950 2956  
2951 2957  
... ... @@ -2990,12 +2996,26 @@
2990 2996 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
2991 2997 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
2992 2998 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
2993   - if (split_flag & EXT4_EXT_DATA_VALID1)
  2999 + if (split_flag & EXT4_EXT_DATA_VALID1) {
2994 3000 err = ext4_ext_zeroout(inode, ex2);
2995   - else
  3001 + zero_ex.ee_block = ex2->ee_block;
  3002 + zero_ex.ee_len = ext4_ext_get_actual_len(ex2);
  3003 + ext4_ext_store_pblock(&zero_ex,
  3004 + ext4_ext_pblock(ex2));
  3005 + } else {
2996 3006 err = ext4_ext_zeroout(inode, ex);
2997   - } else
  3007 + zero_ex.ee_block = ex->ee_block;
  3008 + zero_ex.ee_len = ext4_ext_get_actual_len(ex);
  3009 + ext4_ext_store_pblock(&zero_ex,
  3010 + ext4_ext_pblock(ex));
  3011 + }
  3012 + } else {
2998 3013 err = ext4_ext_zeroout(inode, &orig_ex);
  3014 + zero_ex.ee_block = orig_ex.ee_block;
  3015 + zero_ex.ee_len = ext4_ext_get_actual_len(&orig_ex);
  3016 + ext4_ext_store_pblock(&zero_ex,
  3017 + ext4_ext_pblock(&orig_ex));
  3018 + }
2999 3019  
3000 3020 if (err)
3001 3021 goto fix_extent_len;
... ... @@ -3003,6 +3023,12 @@
3003 3023 ex->ee_len = cpu_to_le16(ee_len);
3004 3024 ext4_ext_try_to_merge(handle, inode, path, ex);
3005 3025 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
  3026 + if (err)
  3027 + goto fix_extent_len;
  3028 +
  3029 + /* update extent status tree */
  3030 + err = ext4_es_zeroout(inode, &zero_ex);
  3031 +
3006 3032 goto out;
3007 3033 } else if (err)
3008 3034 goto fix_extent_len;
... ... @@ -3041,6 +3067,7 @@
3041 3067 int err = 0;
3042 3068 int uninitialized;
3043 3069 int split_flag1, flags1;
  3070 + int allocated = map->m_len;
3044 3071  
3045 3072 depth = ext_depth(inode);
3046 3073 ex = path[depth].p_ext;
3047 3074  
3048 3075  
3049 3076  
3050 3077  
... ... @@ -3060,20 +3087,29 @@
3060 3087 map->m_lblk + map->m_len, split_flag1, flags1);
3061 3088 if (err)
3062 3089 goto out;
  3090 + } else {
  3091 + allocated = ee_len - (map->m_lblk - ee_block);
3063 3092 }
3064   -
  3093 + /*
  3094 + * Update path is required because previous ext4_split_extent_at() may
  3095 + * result in split of original leaf or extent zeroout.
  3096 + */
3065 3097 ext4_ext_drop_refs(path);
3066 3098 path = ext4_ext_find_extent(inode, map->m_lblk, path);
3067 3099 if (IS_ERR(path))
3068 3100 return PTR_ERR(path);
  3101 + depth = ext_depth(inode);
  3102 + ex = path[depth].p_ext;
  3103 + uninitialized = ext4_ext_is_uninitialized(ex);
  3104 + split_flag1 = 0;
3069 3105  
3070 3106 if (map->m_lblk >= ee_block) {
3071   - split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
3072   - EXT4_EXT_DATA_VALID2);
3073   - if (uninitialized)
  3107 + split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
  3108 + if (uninitialized) {
3074 3109 split_flag1 |= EXT4_EXT_MARK_UNINIT1;
3075   - if (split_flag & EXT4_EXT_MARK_UNINIT2)
3076   - split_flag1 |= EXT4_EXT_MARK_UNINIT2;
  3110 + split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
  3111 + EXT4_EXT_MARK_UNINIT2);
  3112 + }
3077 3113 err = ext4_split_extent_at(handle, inode, path,
3078 3114 map->m_lblk, split_flag1, flags);
3079 3115 if (err)
... ... @@ -3082,7 +3118,7 @@
3082 3118  
3083 3119 ext4_ext_show_leaf(inode, path);
3084 3120 out:
3085   - return err ? err : map->m_len;
  3121 + return err ? err : allocated;
3086 3122 }
3087 3123  
3088 3124 /*
... ... @@ -3137,6 +3173,7 @@
3137 3173 ee_block = le32_to_cpu(ex->ee_block);
3138 3174 ee_len = ext4_ext_get_actual_len(ex);
3139 3175 allocated = ee_len - (map->m_lblk - ee_block);
  3176 + zero_ex.ee_len = 0;
3140 3177  
3141 3178 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
3142 3179  
3143 3180  
... ... @@ -3227,13 +3264,16 @@
3227 3264  
3228 3265 if (EXT4_EXT_MAY_ZEROOUT & split_flag)
3229 3266 max_zeroout = sbi->s_extent_max_zeroout_kb >>
3230   - inode->i_sb->s_blocksize_bits;
  3267 + (inode->i_sb->s_blocksize_bits - 10);
3231 3268  
3232 3269 /* If extent is less than s_max_zeroout_kb, zeroout directly */
3233 3270 if (max_zeroout && (ee_len <= max_zeroout)) {
3234 3271 err = ext4_ext_zeroout(inode, ex);
3235 3272 if (err)
3236 3273 goto out;
  3274 + zero_ex.ee_block = ex->ee_block;
  3275 + zero_ex.ee_len = ext4_ext_get_actual_len(ex);
  3276 + ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
3237 3277  
3238 3278 err = ext4_ext_get_access(handle, inode, path + depth);
3239 3279 if (err)
... ... @@ -3292,6 +3332,9 @@
3292 3332 err = allocated;
3293 3333  
3294 3334 out:
  3335 + /* If we have gotten a failure, don't zero out status tree */
  3336 + if (!err)
  3337 + err = ext4_es_zeroout(inode, &zero_ex);
3295 3338 return err ? err : allocated;
3296 3339 }
3297 3340  
3298 3341  
... ... @@ -3374,8 +3417,19 @@
3374 3417 "block %llu, max_blocks %u\n", inode->i_ino,
3375 3418 (unsigned long long)ee_block, ee_len);
3376 3419  
3377   - /* If extent is larger than requested then split is required */
  3420 + /* If extent is larger than requested it is a clear sign that we still
  3421 + * have some extent state machine issues left. So extent_split is still
  3422 + * required.
  3423 + * TODO: Once all related issues will be fixed this situation should be
  3424 + * illegal.
  3425 + */
3378 3426 if (ee_block != map->m_lblk || ee_len > map->m_len) {
  3427 +#ifdef EXT4_DEBUG
  3428 + ext4_warning("Inode (%ld) finished: extent logical block %llu,"
  3429 + " len %u; IO logical block %llu, len %u\n",
  3430 + inode->i_ino, (unsigned long long)ee_block, ee_len,
  3431 + (unsigned long long)map->m_lblk, map->m_len);
  3432 +#endif
3379 3433 err = ext4_split_unwritten_extents(handle, inode, map, path,
3380 3434 EXT4_GET_BLOCKS_CONVERT);
3381 3435 if (err < 0)
... ... @@ -3626,6 +3680,10 @@
3626 3680 path, map->m_len);
3627 3681 } else
3628 3682 err = ret;
  3683 + map->m_flags |= EXT4_MAP_MAPPED;
  3684 + if (allocated > map->m_len)
  3685 + allocated = map->m_len;
  3686 + map->m_len = allocated;
3629 3687 goto out2;
3630 3688 }
3631 3689 /* buffered IO case */
... ... @@ -3675,6 +3733,7 @@
3675 3733 allocated - map->m_len);
3676 3734 allocated = map->m_len;
3677 3735 }
  3736 + map->m_len = allocated;
3678 3737  
3679 3738 /*
3680 3739 * If we have done fallocate with the offset that is already
... ... @@ -4106,9 +4165,6 @@
4106 4165 }
4107 4166 } else {
4108 4167 BUG_ON(allocated_clusters < reserved_clusters);
4109   - /* We will claim quota for all newly allocated blocks.*/
4110   - ext4_da_update_reserve_space(inode, allocated_clusters,
4111   - 1);
4112 4168 if (reserved_clusters < allocated_clusters) {
4113 4169 struct ext4_inode_info *ei = EXT4_I(inode);
4114 4170 int reservation = allocated_clusters -
... ... @@ -4159,6 +4215,15 @@
4159 4215 ei->i_reserved_data_blocks += reservation;
4160 4216 spin_unlock(&ei->i_block_reservation_lock);
4161 4217 }
  4218 + /*
  4219 + * We will claim quota for all newly allocated blocks.
  4220 + * We're updating the reserved space *after* the
  4221 + * correction above so we do not accidentally free
  4222 + * all the metadata reservation because we might
  4223 + * actually need it later on.
  4224 + */
  4225 + ext4_da_update_reserve_space(inode, allocated_clusters,
  4226 + 1);
4162 4227 }
4163 4228 }
4164 4229  
... ... @@ -4368,8 +4433,6 @@
4368 4433 if (len <= EXT_UNINIT_MAX_LEN << blkbits)
4369 4434 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
4370 4435  
4371   - /* Prevent race condition between unwritten */
4372   - ext4_flush_unwritten_io(inode);
4373 4436 retry:
4374 4437 while (ret >= 0 && ret < max_blocks) {
4375 4438 map.m_lblk = map.m_lblk + ret;
fs/ext4/extents_status.c
... ... @@ -333,17 +333,27 @@
333 333 static int ext4_es_can_be_merged(struct extent_status *es1,
334 334 struct extent_status *es2)
335 335 {
336   - if (es1->es_lblk + es1->es_len != es2->es_lblk)
  336 + if (ext4_es_status(es1) != ext4_es_status(es2))
337 337 return 0;
338 338  
339   - if (ext4_es_status(es1) != ext4_es_status(es2))
  339 + if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL)
340 340 return 0;
341 341  
342   - if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
343   - (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2)))
  342 + if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
344 343 return 0;
345 344  
346   - return 1;
  345 + if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
  346 + (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
  347 + return 1;
  348 +
  349 + if (ext4_es_is_hole(es1))
  350 + return 1;
  351 +
  352 + /* we need to check delayed extent is without unwritten status */
  353 + if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
  354 + return 1;
  355 +
  356 + return 0;
347 357 }
348 358  
349 359 static struct extent_status *
... ... @@ -389,6 +399,179 @@
389 399 return es;
390 400 }
391 401  
  402 +#ifdef ES_AGGRESSIVE_TEST
  403 +static void ext4_es_insert_extent_ext_check(struct inode *inode,
  404 + struct extent_status *es)
  405 +{
  406 + struct ext4_ext_path *path = NULL;
  407 + struct ext4_extent *ex;
  408 + ext4_lblk_t ee_block;
  409 + ext4_fsblk_t ee_start;
  410 + unsigned short ee_len;
  411 + int depth, ee_status, es_status;
  412 +
  413 + path = ext4_ext_find_extent(inode, es->es_lblk, NULL);
  414 + if (IS_ERR(path))
  415 + return;
  416 +
  417 + depth = ext_depth(inode);
  418 + ex = path[depth].p_ext;
  419 +
  420 + if (ex) {
  421 +
  422 + ee_block = le32_to_cpu(ex->ee_block);
  423 + ee_start = ext4_ext_pblock(ex);
  424 + ee_len = ext4_ext_get_actual_len(ex);
  425 +
  426 + ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0;
  427 + es_status = ext4_es_is_unwritten(es) ? 1 : 0;
  428 +
  429 + /*
  430 + * Make sure ex and es are not overlap when we try to insert
  431 + * a delayed/hole extent.
  432 + */
  433 + if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
  434 + if (in_range(es->es_lblk, ee_block, ee_len)) {
  435 + pr_warn("ES insert assertation failed for "
  436 + "inode: %lu we can find an extent "
  437 + "at block [%d/%d/%llu/%c], but we "
  438 + "want to add an delayed/hole extent "
  439 + "[%d/%d/%llu/%llx]\n",
  440 + inode->i_ino, ee_block, ee_len,
  441 + ee_start, ee_status ? 'u' : 'w',
  442 + es->es_lblk, es->es_len,
  443 + ext4_es_pblock(es), ext4_es_status(es));
  444 + }
  445 + goto out;
  446 + }
  447 +
  448 + /*
  449 + * We don't check ee_block == es->es_lblk, etc. because es
  450 + * might be a part of whole extent, vice versa.
  451 + */
  452 + if (es->es_lblk < ee_block ||
  453 + ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
  454 + pr_warn("ES insert assertation failed for inode: %lu "
  455 + "ex_status [%d/%d/%llu/%c] != "
  456 + "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
  457 + ee_block, ee_len, ee_start,
  458 + ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
  459 + ext4_es_pblock(es), es_status ? 'u' : 'w');
  460 + goto out;
  461 + }
  462 +
  463 + if (ee_status ^ es_status) {
  464 + pr_warn("ES insert assertation failed for inode: %lu "
  465 + "ex_status [%d/%d/%llu/%c] != "
  466 + "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
  467 + ee_block, ee_len, ee_start,
  468 + ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
  469 + ext4_es_pblock(es), es_status ? 'u' : 'w');
  470 + }
  471 + } else {
  472 + /*
  473 + * We can't find an extent on disk. So we need to make sure
  474 + * that we don't want to add an written/unwritten extent.
  475 + */
  476 + if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
  477 + pr_warn("ES insert assertation failed for inode: %lu "
  478 + "can't find an extent at block %d but we want "
  479 + "to add an written/unwritten extent "
  480 + "[%d/%d/%llu/%llx]\n", inode->i_ino,
  481 + es->es_lblk, es->es_lblk, es->es_len,
  482 + ext4_es_pblock(es), ext4_es_status(es));
  483 + }
  484 + }
  485 +out:
  486 + if (path) {
  487 + ext4_ext_drop_refs(path);
  488 + kfree(path);
  489 + }
  490 +}
  491 +
  492 +static void ext4_es_insert_extent_ind_check(struct inode *inode,
  493 + struct extent_status *es)
  494 +{
  495 + struct ext4_map_blocks map;
  496 + int retval;
  497 +
  498 + /*
  499 + * Here we call ext4_ind_map_blocks to lookup a block mapping because
  500 + * 'Indirect' structure is defined in indirect.c. So we couldn't
  501 + * access direct/indirect tree from outside. It is too dirty to define
  502 + * this function in indirect.c file.
  503 + */
  504 +
  505 + map.m_lblk = es->es_lblk;
  506 + map.m_len = es->es_len;
  507 +
  508 + retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
  509 + if (retval > 0) {
  510 + if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
  511 + /*
  512 + * We want to add a delayed/hole extent but this
  513 + * block has been allocated.
  514 + */
  515 + pr_warn("ES insert assertation failed for inode: %lu "
  516 + "We can find blocks but we want to add a "
  517 + "delayed/hole extent [%d/%d/%llu/%llx]\n",
  518 + inode->i_ino, es->es_lblk, es->es_len,
  519 + ext4_es_pblock(es), ext4_es_status(es));
  520 + return;
  521 + } else if (ext4_es_is_written(es)) {
  522 + if (retval != es->es_len) {
  523 + pr_warn("ES insert assertation failed for "
  524 + "inode: %lu retval %d != es_len %d\n",
  525 + inode->i_ino, retval, es->es_len);
  526 + return;
  527 + }
  528 + if (map.m_pblk != ext4_es_pblock(es)) {
  529 + pr_warn("ES insert assertation failed for "
  530 + "inode: %lu m_pblk %llu != "
  531 + "es_pblk %llu\n",
  532 + inode->i_ino, map.m_pblk,
  533 + ext4_es_pblock(es));
  534 + return;
  535 + }
  536 + } else {
  537 + /*
  538 + * We don't need to check unwritten extent because
  539 + * indirect-based file doesn't have it.
  540 + */
  541 + BUG_ON(1);
  542 + }
  543 + } else if (retval == 0) {
  544 + if (ext4_es_is_written(es)) {
  545 + pr_warn("ES insert assertation failed for inode: %lu "
  546 + "We can't find the block but we want to add "
  547 + "an written extent [%d/%d/%llu/%llx]\n",
  548 + inode->i_ino, es->es_lblk, es->es_len,
  549 + ext4_es_pblock(es), ext4_es_status(es));
  550 + return;
  551 + }
  552 + }
  553 +}
  554 +
  555 +static inline void ext4_es_insert_extent_check(struct inode *inode,
  556 + struct extent_status *es)
  557 +{
  558 + /*
  559 + * We don't need to worry about the race condition because
  560 + * caller takes i_data_sem locking.
  561 + */
  562 + BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
  563 + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
  564 + ext4_es_insert_extent_ext_check(inode, es);
  565 + else
  566 + ext4_es_insert_extent_ind_check(inode, es);
  567 +}
  568 +#else
  569 +static inline void ext4_es_insert_extent_check(struct inode *inode,
  570 + struct extent_status *es)
  571 +{
  572 +}
  573 +#endif
  574 +
392 575 static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
393 576 {
394 577 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
... ... @@ -471,6 +654,8 @@
471 654 ext4_es_store_status(&newes, status);
472 655 trace_ext4_es_insert_extent(inode, &newes);
473 656  
  657 + ext4_es_insert_extent_check(inode, &newes);
  658 +
474 659 write_lock(&EXT4_I(inode)->i_es_lock);
475 660 err = __es_remove_extent(inode, lblk, end);
476 661 if (err != 0)
... ... @@ -667,6 +852,23 @@
667 852 write_unlock(&EXT4_I(inode)->i_es_lock);
668 853 ext4_es_print_tree(inode);
669 854 return err;
  855 +}
  856 +
  857 +int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
  858 +{
  859 + ext4_lblk_t ee_block;
  860 + ext4_fsblk_t ee_pblock;
  861 + unsigned int ee_len;
  862 +
  863 + ee_block = le32_to_cpu(ex->ee_block);
  864 + ee_len = ext4_ext_get_actual_len(ex);
  865 + ee_pblock = ext4_ext_pblock(ex);
  866 +
  867 + if (ee_len == 0)
  868 + return 0;
  869 +
  870 + return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
  871 + EXTENT_STATUS_WRITTEN);
670 872 }
671 873  
672 874 static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
fs/ext4/extents_status.h
... ... @@ -21,6 +21,12 @@
21 21 #endif
22 22  
23 23 /*
  24 + * With ES_AGGRESSIVE_TEST defined, the result of es caching will be
  25 + * checked with old map_block's result.
  26 + */
  27 +#define ES_AGGRESSIVE_TEST__
  28 +
  29 +/*
24 30 * These flags live in the high bits of extent_status.es_pblk
25 31 */
26 32 #define EXTENT_STATUS_WRITTEN (1ULL << 63)
... ... @@ -33,6 +39,8 @@
33 39 EXTENT_STATUS_DELAYED | \
34 40 EXTENT_STATUS_HOLE)
35 41  
  42 +struct ext4_extent;
  43 +
36 44 struct extent_status {
37 45 struct rb_node rb_node;
38 46 ext4_lblk_t es_lblk; /* first logical block extent covers */
... ... @@ -58,6 +66,7 @@
58 66 struct extent_status *es);
59 67 extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
60 68 struct extent_status *es);
  69 +extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex);
61 70  
62 71 static inline int ext4_es_is_written(struct extent_status *es)
63 72 {
... ... @@ -324,8 +324,8 @@
324 324 }
325 325  
326 326 struct orlov_stats {
  327 + __u64 free_clusters;
327 328 __u32 free_inodes;
328   - __u32 free_clusters;
329 329 __u32 used_dirs;
330 330 };
331 331  
... ... @@ -342,7 +342,7 @@
342 342  
343 343 if (flex_size > 1) {
344 344 stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
345   - stats->free_clusters = atomic_read(&flex_group[g].free_clusters);
  345 + stats->free_clusters = atomic64_read(&flex_group[g].free_clusters);
346 346 stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
347 347 return;
348 348 }
... ... @@ -185,8 +185,6 @@
185 185  
186 186 trace_ext4_evict_inode(inode);
187 187  
188   - ext4_ioend_wait(inode);
189   -
190 188 if (inode->i_nlink) {
191 189 /*
192 190 * When journalling data dirty buffers are tracked only in the
... ... @@ -207,7 +205,8 @@
207 205 * don't use page cache.
208 206 */
209 207 if (ext4_should_journal_data(inode) &&
210   - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
  208 + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
  209 + inode->i_ino != EXT4_JOURNAL_INO) {
211 210 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
212 211 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
213 212  
... ... @@ -216,6 +215,7 @@
216 215 filemap_write_and_wait(&inode->i_data);
217 216 }
218 217 truncate_inode_pages(&inode->i_data, 0);
  218 + ext4_ioend_shutdown(inode);
219 219 goto no_delete;
220 220 }
221 221  
... ... @@ -225,6 +225,7 @@
225 225 if (ext4_should_order_data(inode))
226 226 ext4_begin_ordered_truncate(inode, 0);
227 227 truncate_inode_pages(&inode->i_data, 0);
  228 + ext4_ioend_shutdown(inode);
228 229  
229 230 if (is_bad_inode(inode))
230 231 goto no_delete;
... ... @@ -482,6 +483,58 @@
482 483 return num;
483 484 }
484 485  
  486 +#ifdef ES_AGGRESSIVE_TEST
  487 +static void ext4_map_blocks_es_recheck(handle_t *handle,
  488 + struct inode *inode,
  489 + struct ext4_map_blocks *es_map,
  490 + struct ext4_map_blocks *map,
  491 + int flags)
  492 +{
  493 + int retval;
  494 +
  495 + map->m_flags = 0;
  496 + /*
  497 + * There is a race window that the result is not the same.
  498 + * e.g. xfstests #223 when dioread_nolock enables. The reason
  499 + * is that we lookup a block mapping in extent status tree with
  500 + * out taking i_data_sem. So at the time the unwritten extent
  501 + * could be converted.
  502 + */
  503 + if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
  504 + down_read((&EXT4_I(inode)->i_data_sem));
  505 + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
  506 + retval = ext4_ext_map_blocks(handle, inode, map, flags &
  507 + EXT4_GET_BLOCKS_KEEP_SIZE);
  508 + } else {
  509 + retval = ext4_ind_map_blocks(handle, inode, map, flags &
  510 + EXT4_GET_BLOCKS_KEEP_SIZE);
  511 + }
  512 + if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
  513 + up_read((&EXT4_I(inode)->i_data_sem));
  514 + /*
  515 + * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag
  516 + * because it shouldn't be marked in es_map->m_flags.
  517 + */
  518 + map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY);
  519 +
  520 + /*
  521 + * We don't check m_len because extent will be collpased in status
  522 + * tree. So the m_len might not equal.
  523 + */
  524 + if (es_map->m_lblk != map->m_lblk ||
  525 + es_map->m_flags != map->m_flags ||
  526 + es_map->m_pblk != map->m_pblk) {
  527 + printk("ES cache assertation failed for inode: %lu "
  528 + "es_cached ex [%d/%d/%llu/%x] != "
  529 + "found ex [%d/%d/%llu/%x] retval %d flags %x\n",
  530 + inode->i_ino, es_map->m_lblk, es_map->m_len,
  531 + es_map->m_pblk, es_map->m_flags, map->m_lblk,
  532 + map->m_len, map->m_pblk, map->m_flags,
  533 + retval, flags);
  534 + }
  535 +}
  536 +#endif /* ES_AGGRESSIVE_TEST */
  537 +
485 538 /*
486 539 * The ext4_map_blocks() function tries to look up the requested blocks,
487 540 * and returns if the blocks are already mapped.
488 541  
... ... @@ -509,7 +562,12 @@
509 562 {
510 563 struct extent_status es;
511 564 int retval;
  565 +#ifdef ES_AGGRESSIVE_TEST
  566 + struct ext4_map_blocks orig_map;
512 567  
  568 + memcpy(&orig_map, map, sizeof(*map));
  569 +#endif
  570 +
513 571 map->m_flags = 0;
514 572 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
515 573 "logical block %lu\n", inode->i_ino, flags, map->m_len,
... ... @@ -531,6 +589,10 @@
531 589 } else {
532 590 BUG_ON(1);
533 591 }
  592 +#ifdef ES_AGGRESSIVE_TEST
  593 + ext4_map_blocks_es_recheck(handle, inode, map,
  594 + &orig_map, flags);
  595 +#endif
534 596 goto found;
535 597 }
536 598  
... ... @@ -551,6 +613,15 @@
551 613 int ret;
552 614 unsigned long long status;
553 615  
  616 +#ifdef ES_AGGRESSIVE_TEST
  617 + if (retval != map->m_len) {
  618 + printk("ES len assertation failed for inode: %lu "
  619 + "retval %d != map->m_len %d "
  620 + "in %s (lookup)\n", inode->i_ino, retval,
  621 + map->m_len, __func__);
  622 + }
  623 +#endif
  624 +
554 625 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
555 626 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
556 627 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
... ... @@ -643,6 +714,24 @@
643 714 int ret;
644 715 unsigned long long status;
645 716  
  717 +#ifdef ES_AGGRESSIVE_TEST
  718 + if (retval != map->m_len) {
  719 + printk("ES len assertation failed for inode: %lu "
  720 + "retval %d != map->m_len %d "
  721 + "in %s (allocation)\n", inode->i_ino, retval,
  722 + map->m_len, __func__);
  723 + }
  724 +#endif
  725 +
  726 + /*
  727 + * If the extent has been zeroed out, we don't need to update
  728 + * extent status tree.
  729 + */
  730 + if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
  731 + ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
  732 + if (ext4_es_is_written(&es))
  733 + goto has_zeroout;
  734 + }
646 735 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
647 736 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
648 737 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
... ... @@ -655,6 +744,7 @@
655 744 retval = ret;
656 745 }
657 746  
  747 +has_zeroout:
658 748 up_write((&EXT4_I(inode)->i_data_sem));
659 749 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
660 750 int ret = check_block_validity(inode, map);
... ... @@ -1216,6 +1306,55 @@
1216 1306 }
1217 1307  
1218 1308 /*
  1309 + * Reserve a metadata for a single block located at lblock
  1310 + */
  1311 +static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
  1312 +{
  1313 + int retries = 0;
  1314 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
  1315 + struct ext4_inode_info *ei = EXT4_I(inode);
  1316 + unsigned int md_needed;
  1317 + ext4_lblk_t save_last_lblock;
  1318 + int save_len;
  1319 +
  1320 + /*
  1321 + * recalculate the amount of metadata blocks to reserve
  1322 + * in order to allocate nrblocks
  1323 + * worse case is one extent per block
  1324 + */
  1325 +repeat:
  1326 + spin_lock(&ei->i_block_reservation_lock);
  1327 + /*
  1328 + * ext4_calc_metadata_amount() has side effects, which we have
  1329 + * to be prepared undo if we fail to claim space.
  1330 + */
  1331 + save_len = ei->i_da_metadata_calc_len;
  1332 + save_last_lblock = ei->i_da_metadata_calc_last_lblock;
  1333 + md_needed = EXT4_NUM_B2C(sbi,
  1334 + ext4_calc_metadata_amount(inode, lblock));
  1335 + trace_ext4_da_reserve_space(inode, md_needed);
  1336 +
  1337 + /*
  1338 + * We do still charge estimated metadata to the sb though;
  1339 + * we cannot afford to run out of free blocks.
  1340 + */
  1341 + if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
  1342 + ei->i_da_metadata_calc_len = save_len;
  1343 + ei->i_da_metadata_calc_last_lblock = save_last_lblock;
  1344 + spin_unlock(&ei->i_block_reservation_lock);
  1345 + if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
  1346 + cond_resched();
  1347 + goto repeat;
  1348 + }
  1349 + return -ENOSPC;
  1350 + }
  1351 + ei->i_reserved_meta_blocks += md_needed;
  1352 + spin_unlock(&ei->i_block_reservation_lock);
  1353 +
  1354 + return 0; /* success */
  1355 +}
  1356 +
  1357 +/*
1219 1358 * Reserve a single cluster located at lblock
1220 1359 */
1221 1360 static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
... ... @@ -1263,7 +1402,7 @@
1263 1402 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1264 1403 spin_unlock(&ei->i_block_reservation_lock);
1265 1404 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1266   - yield();
  1405 + cond_resched();
1267 1406 goto repeat;
1268 1407 }
1269 1408 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
1270 1409  
... ... @@ -1768,7 +1907,12 @@
1768 1907 struct extent_status es;
1769 1908 int retval;
1770 1909 sector_t invalid_block = ~((sector_t) 0xffff);
  1910 +#ifdef ES_AGGRESSIVE_TEST
  1911 + struct ext4_map_blocks orig_map;
1771 1912  
  1913 + memcpy(&orig_map, map, sizeof(*map));
  1914 +#endif
  1915 +
1772 1916 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1773 1917 invalid_block = ~0;
1774 1918  
... ... @@ -1809,6 +1953,9 @@
1809 1953 else
1810 1954 BUG_ON(1);
1811 1955  
  1956 +#ifdef ES_AGGRESSIVE_TEST
  1957 + ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
  1958 +#endif
1812 1959 return retval;
1813 1960 }
1814 1961  
... ... @@ -1843,8 +1990,11 @@
1843 1990 * XXX: __block_prepare_write() unmaps passed block,
1844 1991 * is it OK?
1845 1992 */
1846   - /* If the block was allocated from previously allocated cluster,
1847   - * then we dont need to reserve it again. */
  1993 + /*
  1994 + * If the block was allocated from previously allocated cluster,
  1995 + * then we don't need to reserve it again. However we still need
  1996 + * to reserve metadata for every block we're going to write.
  1997 + */
1848 1998 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
1849 1999 ret = ext4_da_reserve_space(inode, iblock);
1850 2000 if (ret) {
... ... @@ -1852,6 +2002,13 @@
1852 2002 retval = ret;
1853 2003 goto out_unlock;
1854 2004 }
  2005 + } else {
  2006 + ret = ext4_da_reserve_metadata(inode, iblock);
  2007 + if (ret) {
  2008 + /* not enough space to reserve */
  2009 + retval = ret;
  2010 + goto out_unlock;
  2011 + }
1855 2012 }
1856 2013  
1857 2014 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
... ... @@ -1873,6 +2030,15 @@
1873 2030 int ret;
1874 2031 unsigned long long status;
1875 2032  
  2033 +#ifdef ES_AGGRESSIVE_TEST
  2034 + if (retval != map->m_len) {
  2035 + printk("ES len assertation failed for inode: %lu "
  2036 + "retval %d != map->m_len %d "
  2037 + "in %s (lookup)\n", inode->i_ino, retval,
  2038 + map->m_len, __func__);
  2039 + }
  2040 +#endif
  2041 +
1876 2042 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
1877 2043 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
1878 2044 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
... ... @@ -2908,8 +3074,8 @@
2908 3074  
2909 3075 trace_ext4_releasepage(page);
2910 3076  
2911   - WARN_ON(PageChecked(page));
2912   - if (!page_has_buffers(page))
  3077 + /* Page has dirty journalled data -> cannot release */
  3078 + if (PageChecked(page))
2913 3079 return 0;
2914 3080 if (journal)
2915 3081 return jbd2_journal_try_to_free_buffers(journal, page, wait);
... ... @@ -2804,8 +2804,8 @@
2804 2804 if (sbi->s_log_groups_per_flex) {
2805 2805 ext4_group_t flex_group = ext4_flex_group(sbi,
2806 2806 ac->ac_b_ex.fe_group);
2807   - atomic_sub(ac->ac_b_ex.fe_len,
2808   - &sbi->s_flex_groups[flex_group].free_clusters);
  2807 + atomic64_sub(ac->ac_b_ex.fe_len,
  2808 + &sbi->s_flex_groups[flex_group].free_clusters);
2809 2809 }
2810 2810  
2811 2811 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
... ... @@ -3692,11 +3692,7 @@
3692 3692 if (free < needed && busy) {
3693 3693 busy = 0;
3694 3694 ext4_unlock_group(sb, group);
3695   - /*
3696   - * Yield the CPU here so that we don't get soft lockup
3697   - * in non preempt case.
3698   - */
3699   - yield();
  3695 + cond_resched();
3700 3696 goto repeat;
3701 3697 }
3702 3698  
... ... @@ -4246,7 +4242,7 @@
4246 4242 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4247 4243  
4248 4244 /* let others to free the space */
4249   - yield();
  4245 + cond_resched();
4250 4246 ar->len = ar->len >> 1;
4251 4247 }
4252 4248 if (!ar->len) {
... ... @@ -4464,7 +4460,6 @@
4464 4460 struct buffer_head *bitmap_bh = NULL;
4465 4461 struct super_block *sb = inode->i_sb;
4466 4462 struct ext4_group_desc *gdp;
4467   - unsigned long freed = 0;
4468 4463 unsigned int overflow;
4469 4464 ext4_grpblk_t bit;
4470 4465 struct buffer_head *gd_bh;
4471 4466  
... ... @@ -4666,14 +4661,12 @@
4666 4661  
4667 4662 if (sbi->s_log_groups_per_flex) {
4668 4663 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4669   - atomic_add(count_clusters,
4670   - &sbi->s_flex_groups[flex_group].free_clusters);
  4664 + atomic64_add(count_clusters,
  4665 + &sbi->s_flex_groups[flex_group].free_clusters);
4671 4666 }
4672 4667  
4673 4668 ext4_mb_unload_buddy(&e4b);
4674 4669  
4675   - freed += count;
4676   -
4677 4670 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4678 4671 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4679 4672  
... ... @@ -4811,8 +4804,8 @@
4811 4804  
4812 4805 if (sbi->s_log_groups_per_flex) {
4813 4806 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4814   - atomic_add(EXT4_NUM_B2C(sbi, blocks_freed),
4815   - &sbi->s_flex_groups[flex_group].free_clusters);
  4807 + atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed),
  4808 + &sbi->s_flex_groups[flex_group].free_clusters);
4816 4809 }
4817 4810  
4818 4811 ext4_mb_unload_buddy(&e4b);
fs/ext4/move_extent.c
... ... @@ -32,16 +32,18 @@
32 32 */
33 33 static inline int
34 34 get_ext_path(struct inode *inode, ext4_lblk_t lblock,
35   - struct ext4_ext_path **path)
  35 + struct ext4_ext_path **orig_path)
36 36 {
37 37 int ret = 0;
  38 + struct ext4_ext_path *path;
38 39  
39   - *path = ext4_ext_find_extent(inode, lblock, *path);
40   - if (IS_ERR(*path)) {
41   - ret = PTR_ERR(*path);
42   - *path = NULL;
43   - } else if ((*path)[ext_depth(inode)].p_ext == NULL)
  40 + path = ext4_ext_find_extent(inode, lblock, *orig_path);
  41 + if (IS_ERR(path))
  42 + ret = PTR_ERR(path);
  43 + else if (path[ext_depth(inode)].p_ext == NULL)
44 44 ret = -ENODATA;
  45 + else
  46 + *orig_path = path;
45 47  
46 48 return ret;
47 49 }
48 50  
49 51  
50 52  
... ... @@ -611,24 +613,25 @@
611 613 {
612 614 struct ext4_ext_path *path = NULL;
613 615 struct ext4_extent *ext;
  616 + int ret = 0;
614 617 ext4_lblk_t last = from + count;
615 618 while (from < last) {
616 619 *err = get_ext_path(inode, from, &path);
617 620 if (*err)
618   - return 0;
  621 + goto out;
619 622 ext = path[ext_depth(inode)].p_ext;
620   - if (!ext) {
621   - ext4_ext_drop_refs(path);
622   - return 0;
623   - }
624   - if (uninit != ext4_ext_is_uninitialized(ext)) {
625   - ext4_ext_drop_refs(path);
626   - return 0;
627   - }
  623 + if (uninit != ext4_ext_is_uninitialized(ext))
  624 + goto out;
628 625 from += ext4_ext_get_actual_len(ext);
629 626 ext4_ext_drop_refs(path);
630 627 }
631   - return 1;
  628 + ret = 1;
  629 +out:
  630 + if (path) {
  631 + ext4_ext_drop_refs(path);
  632 + kfree(path);
  633 + }
  634 + return ret;
632 635 }
633 636  
634 637 /**
... ... @@ -665,6 +668,14 @@
665 668 int depth;
666 669 int replaced_count = 0;
667 670 int dext_alen;
  671 +
  672 + *err = ext4_es_remove_extent(orig_inode, from, count);
  673 + if (*err)
  674 + goto out;
  675 +
  676 + *err = ext4_es_remove_extent(donor_inode, from, count);
  677 + if (*err)
  678 + goto out;
668 679  
669 680 /* Get the original extent for the block "orig_off" */
670 681 *err = get_ext_path(orig_inode, orig_off, &orig_path);
... ... @@ -50,11 +50,21 @@
50 50 kmem_cache_destroy(io_page_cachep);
51 51 }
52 52  
53   -void ext4_ioend_wait(struct inode *inode)
  53 +/*
  54 + * This function is called by ext4_evict_inode() to make sure there is
  55 + * no more pending I/O completion work left to do.
  56 + */
  57 +void ext4_ioend_shutdown(struct inode *inode)
54 58 {
55 59 wait_queue_head_t *wq = ext4_ioend_wq(inode);
56 60  
57 61 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
  62 + /*
  63 + * We need to make sure the work structure is finished being
  64 + * used before we let the inode get destroyed.
  65 + */
  66 + if (work_pending(&EXT4_I(inode)->i_unwritten_work))
  67 + cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
58 68 }
59 69  
60 70 static void put_io_page(struct ext4_io_page *io_page)
... ... @@ -1360,8 +1360,8 @@
1360 1360 sbi->s_log_groups_per_flex) {
1361 1361 ext4_group_t flex_group;
1362 1362 flex_group = ext4_flex_group(sbi, group_data[0].group);
1363   - atomic_add(EXT4_NUM_B2C(sbi, free_blocks),
1364   - &sbi->s_flex_groups[flex_group].free_clusters);
  1363 + atomic64_add(EXT4_NUM_B2C(sbi, free_blocks),
  1364 + &sbi->s_flex_groups[flex_group].free_clusters);
1365 1365 atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
1366 1366 &sbi->s_flex_groups[flex_group].free_inodes);
1367 1367 }
... ... @@ -1927,8 +1927,8 @@
1927 1927 flex_group = ext4_flex_group(sbi, i);
1928 1928 atomic_add(ext4_free_inodes_count(sb, gdp),
1929 1929 &sbi->s_flex_groups[flex_group].free_inodes);
1930   - atomic_add(ext4_free_group_clusters(sb, gdp),
1931   - &sbi->s_flex_groups[flex_group].free_clusters);
  1930 + atomic64_add(ext4_free_group_clusters(sb, gdp),
  1931 + &sbi->s_flex_groups[flex_group].free_clusters);
1932 1932 atomic_add(ext4_used_dirs_count(sb, gdp),
1933 1933 &sbi->s_flex_groups[flex_group].used_dirs);
1934 1934 }
fs/jbd2/transaction.c
... ... @@ -1065,9 +1065,12 @@
1065 1065 void jbd2_journal_set_triggers(struct buffer_head *bh,
1066 1066 struct jbd2_buffer_trigger_type *type)
1067 1067 {
1068   - struct journal_head *jh = bh2jh(bh);
  1068 + struct journal_head *jh = jbd2_journal_grab_journal_head(bh);
1069 1069  
  1070 + if (WARN_ON(!jh))
  1071 + return;
1070 1072 jh->b_triggers = type;
  1073 + jbd2_journal_put_journal_head(jh);
1071 1074 }
1072 1075  
1073 1076 void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
1074 1077  
1075 1078  
1076 1079  
... ... @@ -1119,17 +1122,18 @@
1119 1122 {
1120 1123 transaction_t *transaction = handle->h_transaction;
1121 1124 journal_t *journal = transaction->t_journal;
1122   - struct journal_head *jh = bh2jh(bh);
  1125 + struct journal_head *jh;
1123 1126 int ret = 0;
1124 1127  
1125   - jbd_debug(5, "journal_head %p\n", jh);
1126   - JBUFFER_TRACE(jh, "entry");
1127 1128 if (is_handle_aborted(handle))
1128 1129 goto out;
1129   - if (!buffer_jbd(bh)) {
  1130 + jh = jbd2_journal_grab_journal_head(bh);
  1131 + if (!jh) {
1130 1132 ret = -EUCLEAN;
1131 1133 goto out;
1132 1134 }
  1135 + jbd_debug(5, "journal_head %p\n", jh);
  1136 + JBUFFER_TRACE(jh, "entry");
1133 1137  
1134 1138 jbd_lock_bh_state(bh);
1135 1139  
... ... @@ -1220,6 +1224,7 @@
1220 1224 spin_unlock(&journal->j_list_lock);
1221 1225 out_unlock_bh:
1222 1226 jbd_unlock_bh_state(bh);
  1227 + jbd2_journal_put_journal_head(jh);
1223 1228 out:
1224 1229 JBUFFER_TRACE(jh, "exit");
1225 1230 WARN_ON(ret); /* All errors are bugs, so dump the stack */