Commit d3c926264a92e5ea448add3e883530e1edad3ce2
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
Merge tag 'ext4_for_linue' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 fixes from Ted Ts'o: "Fix a number of regression and other bugs in ext4, most of which were relatively obscure cornercases or races that were found using regression tests." * tag 'ext4_for_linue' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (21 commits) ext4: fix data=journal fast mount/umount hang ext4: fix ext4_evict_inode() racing against workqueue processing code ext4: fix memory leakage in mext_check_coverage ext4: use s_extent_max_zeroout_kb value as number of kb ext4: use atomic64_t for the per-flexbg free_clusters count jbd2: fix use after free in jbd2_journal_dirty_metadata() ext4: reserve metadata block for every delayed write ext4: update reserved space after the 'correction' ext4: do not use yield() ext4: remove unused variable in ext4_free_blocks() ext4: fix WARN_ON from ext4_releasepage() ext4: fix the wrong number of the allocated blocks in ext4_split_extent() ext4: update extent status tree after an extent is zeroed out ext4: fix wrong m_len value after unwritten extent conversion ext4: add self-testing infrastructure to do a sanity check ext4: avoid a potential overflow in ext4_es_can_be_merged() ext4: invalidate extent status tree during extent migration ext4: remove unnecessary wait for extent conversion in ext4_fallocate() ext4: add warning to ext4_convert_unwritten_extents_endio ext4: disable merging of uninitialized extents ...
Showing 12 changed files Side-by-side Diff
fs/ext4/ext4.h
... | ... | @@ -335,9 +335,9 @@ |
335 | 335 | */ |
336 | 336 | |
337 | 337 | struct flex_groups { |
338 | - atomic_t free_inodes; | |
339 | - atomic_t free_clusters; | |
340 | - atomic_t used_dirs; | |
338 | + atomic64_t free_clusters; | |
339 | + atomic_t free_inodes; | |
340 | + atomic_t used_dirs; | |
341 | 341 | }; |
342 | 342 | |
343 | 343 | #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ |
... | ... | @@ -2617,7 +2617,7 @@ |
2617 | 2617 | extern int __init ext4_init_pageio(void); |
2618 | 2618 | extern void ext4_add_complete_io(ext4_io_end_t *io_end); |
2619 | 2619 | extern void ext4_exit_pageio(void); |
2620 | -extern void ext4_ioend_wait(struct inode *); | |
2620 | +extern void ext4_ioend_shutdown(struct inode *); | |
2621 | 2621 | extern void ext4_free_io_end(ext4_io_end_t *io); |
2622 | 2622 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); |
2623 | 2623 | extern void ext4_end_io_work(struct work_struct *work); |
fs/ext4/extents.c
... | ... | @@ -1584,10 +1584,12 @@ |
1584 | 1584 | unsigned short ext1_ee_len, ext2_ee_len, max_len; |
1585 | 1585 | |
1586 | 1586 | /* |
1587 | - * Make sure that either both extents are uninitialized, or | |
1588 | - * both are _not_. | |
1587 | + * Make sure that both extents are initialized. We don't merge | |
1588 | + * uninitialized extents so that we can be sure that end_io code has | |
1589 | + * the extent that was written properly split out and conversion to | |
1590 | + * initialized is trivial. | |
1589 | 1591 | */ |
1590 | - if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) | |
1592 | + if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) | |
1591 | 1593 | return 0; |
1592 | 1594 | |
1593 | 1595 | if (ext4_ext_is_uninitialized(ex1)) |
... | ... | @@ -2923,7 +2925,7 @@ |
2923 | 2925 | { |
2924 | 2926 | ext4_fsblk_t newblock; |
2925 | 2927 | ext4_lblk_t ee_block; |
2926 | - struct ext4_extent *ex, newex, orig_ex; | |
2928 | + struct ext4_extent *ex, newex, orig_ex, zero_ex; | |
2927 | 2929 | struct ext4_extent *ex2 = NULL; |
2928 | 2930 | unsigned int ee_len, depth; |
2929 | 2931 | int err = 0; |
... | ... | @@ -2943,6 +2945,10 @@ |
2943 | 2945 | newblock = split - ee_block + ext4_ext_pblock(ex); |
2944 | 2946 | |
2945 | 2947 | BUG_ON(split < ee_block || split >= (ee_block + ee_len)); |
2948 | + BUG_ON(!ext4_ext_is_uninitialized(ex) && | |
2949 | + split_flag & (EXT4_EXT_MAY_ZEROOUT | | |
2950 | + EXT4_EXT_MARK_UNINIT1 | | |
2951 | + EXT4_EXT_MARK_UNINIT2)); | |
2946 | 2952 | |
2947 | 2953 | err = ext4_ext_get_access(handle, inode, path + depth); |
2948 | 2954 | if (err) |
2949 | 2955 | |
2950 | 2956 | |
2951 | 2957 | |
... | ... | @@ -2990,12 +2996,26 @@ |
2990 | 2996 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); |
2991 | 2997 | if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { |
2992 | 2998 | if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { |
2993 | - if (split_flag & EXT4_EXT_DATA_VALID1) | |
2999 | + if (split_flag & EXT4_EXT_DATA_VALID1) { | |
2994 | 3000 | err = ext4_ext_zeroout(inode, ex2); |
2995 | - else | |
3001 | + zero_ex.ee_block = ex2->ee_block; | |
3002 | + zero_ex.ee_len = ext4_ext_get_actual_len(ex2); | |
3003 | + ext4_ext_store_pblock(&zero_ex, | |
3004 | + ext4_ext_pblock(ex2)); | |
3005 | + } else { | |
2996 | 3006 | err = ext4_ext_zeroout(inode, ex); |
2997 | - } else | |
3007 | + zero_ex.ee_block = ex->ee_block; | |
3008 | + zero_ex.ee_len = ext4_ext_get_actual_len(ex); | |
3009 | + ext4_ext_store_pblock(&zero_ex, | |
3010 | + ext4_ext_pblock(ex)); | |
3011 | + } | |
3012 | + } else { | |
2998 | 3013 | err = ext4_ext_zeroout(inode, &orig_ex); |
3014 | + zero_ex.ee_block = orig_ex.ee_block; | |
3015 | + zero_ex.ee_len = ext4_ext_get_actual_len(&orig_ex); | |
3016 | + ext4_ext_store_pblock(&zero_ex, | |
3017 | + ext4_ext_pblock(&orig_ex)); | |
3018 | + } | |
2999 | 3019 | |
3000 | 3020 | if (err) |
3001 | 3021 | goto fix_extent_len; |
... | ... | @@ -3003,6 +3023,12 @@ |
3003 | 3023 | ex->ee_len = cpu_to_le16(ee_len); |
3004 | 3024 | ext4_ext_try_to_merge(handle, inode, path, ex); |
3005 | 3025 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
3026 | + if (err) | |
3027 | + goto fix_extent_len; | |
3028 | + | |
3029 | + /* update extent status tree */ | |
3030 | + err = ext4_es_zeroout(inode, &zero_ex); | |
3031 | + | |
3006 | 3032 | goto out; |
3007 | 3033 | } else if (err) |
3008 | 3034 | goto fix_extent_len; |
... | ... | @@ -3041,6 +3067,7 @@ |
3041 | 3067 | int err = 0; |
3042 | 3068 | int uninitialized; |
3043 | 3069 | int split_flag1, flags1; |
3070 | + int allocated = map->m_len; | |
3044 | 3071 | |
3045 | 3072 | depth = ext_depth(inode); |
3046 | 3073 | ex = path[depth].p_ext; |
3047 | 3074 | |
3048 | 3075 | |
3049 | 3076 | |
3050 | 3077 | |
... | ... | @@ -3060,20 +3087,29 @@ |
3060 | 3087 | map->m_lblk + map->m_len, split_flag1, flags1); |
3061 | 3088 | if (err) |
3062 | 3089 | goto out; |
3090 | + } else { | |
3091 | + allocated = ee_len - (map->m_lblk - ee_block); | |
3063 | 3092 | } |
3064 | - | |
3093 | + /* | |
3094 | + * Update path is required because previous ext4_split_extent_at() may | |
3095 | + * result in split of original leaf or extent zeroout. | |
3096 | + */ | |
3065 | 3097 | ext4_ext_drop_refs(path); |
3066 | 3098 | path = ext4_ext_find_extent(inode, map->m_lblk, path); |
3067 | 3099 | if (IS_ERR(path)) |
3068 | 3100 | return PTR_ERR(path); |
3101 | + depth = ext_depth(inode); | |
3102 | + ex = path[depth].p_ext; | |
3103 | + uninitialized = ext4_ext_is_uninitialized(ex); | |
3104 | + split_flag1 = 0; | |
3069 | 3105 | |
3070 | 3106 | if (map->m_lblk >= ee_block) { |
3071 | - split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | | |
3072 | - EXT4_EXT_DATA_VALID2); | |
3073 | - if (uninitialized) | |
3107 | + split_flag1 = split_flag & EXT4_EXT_DATA_VALID2; | |
3108 | + if (uninitialized) { | |
3074 | 3109 | split_flag1 |= EXT4_EXT_MARK_UNINIT1; |
3075 | - if (split_flag & EXT4_EXT_MARK_UNINIT2) | |
3076 | - split_flag1 |= EXT4_EXT_MARK_UNINIT2; | |
3110 | + split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | | |
3111 | + EXT4_EXT_MARK_UNINIT2); | |
3112 | + } | |
3077 | 3113 | err = ext4_split_extent_at(handle, inode, path, |
3078 | 3114 | map->m_lblk, split_flag1, flags); |
3079 | 3115 | if (err) |
... | ... | @@ -3082,7 +3118,7 @@ |
3082 | 3118 | |
3083 | 3119 | ext4_ext_show_leaf(inode, path); |
3084 | 3120 | out: |
3085 | - return err ? err : map->m_len; | |
3121 | + return err ? err : allocated; | |
3086 | 3122 | } |
3087 | 3123 | |
3088 | 3124 | /* |
... | ... | @@ -3137,6 +3173,7 @@ |
3137 | 3173 | ee_block = le32_to_cpu(ex->ee_block); |
3138 | 3174 | ee_len = ext4_ext_get_actual_len(ex); |
3139 | 3175 | allocated = ee_len - (map->m_lblk - ee_block); |
3176 | + zero_ex.ee_len = 0; | |
3140 | 3177 | |
3141 | 3178 | trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); |
3142 | 3179 | |
3143 | 3180 | |
... | ... | @@ -3227,13 +3264,16 @@ |
3227 | 3264 | |
3228 | 3265 | if (EXT4_EXT_MAY_ZEROOUT & split_flag) |
3229 | 3266 | max_zeroout = sbi->s_extent_max_zeroout_kb >> |
3230 | - inode->i_sb->s_blocksize_bits; | |
3267 | + (inode->i_sb->s_blocksize_bits - 10); | |
3231 | 3268 | |
3232 | 3269 | /* If extent is less than s_max_zeroout_kb, zeroout directly */ |
3233 | 3270 | if (max_zeroout && (ee_len <= max_zeroout)) { |
3234 | 3271 | err = ext4_ext_zeroout(inode, ex); |
3235 | 3272 | if (err) |
3236 | 3273 | goto out; |
3274 | + zero_ex.ee_block = ex->ee_block; | |
3275 | + zero_ex.ee_len = ext4_ext_get_actual_len(ex); | |
3276 | + ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex)); | |
3237 | 3277 | |
3238 | 3278 | err = ext4_ext_get_access(handle, inode, path + depth); |
3239 | 3279 | if (err) |
... | ... | @@ -3292,6 +3332,9 @@ |
3292 | 3332 | err = allocated; |
3293 | 3333 | |
3294 | 3334 | out: |
3335 | + /* If we have gotten a failure, don't zero out status tree */ | |
3336 | + if (!err) | |
3337 | + err = ext4_es_zeroout(inode, &zero_ex); | |
3295 | 3338 | return err ? err : allocated; |
3296 | 3339 | } |
3297 | 3340 | |
3298 | 3341 | |
... | ... | @@ -3374,8 +3417,19 @@ |
3374 | 3417 | "block %llu, max_blocks %u\n", inode->i_ino, |
3375 | 3418 | (unsigned long long)ee_block, ee_len); |
3376 | 3419 | |
3377 | - /* If extent is larger than requested then split is required */ | |
3420 | + /* If extent is larger than requested it is a clear sign that we still | |
3421 | + * have some extent state machine issues left. So extent_split is still | |
3422 | + * required. | |
3423 | + * TODO: Once all related issues will be fixed this situation should be | |
3424 | + * illegal. | |
3425 | + */ | |
3378 | 3426 | if (ee_block != map->m_lblk || ee_len > map->m_len) { |
3427 | +#ifdef EXT4_DEBUG | |
3428 | + ext4_warning("Inode (%ld) finished: extent logical block %llu," | |
3429 | + " len %u; IO logical block %llu, len %u\n", | |
3430 | + inode->i_ino, (unsigned long long)ee_block, ee_len, | |
3431 | + (unsigned long long)map->m_lblk, map->m_len); | |
3432 | +#endif | |
3379 | 3433 | err = ext4_split_unwritten_extents(handle, inode, map, path, |
3380 | 3434 | EXT4_GET_BLOCKS_CONVERT); |
3381 | 3435 | if (err < 0) |
... | ... | @@ -3626,6 +3680,10 @@ |
3626 | 3680 | path, map->m_len); |
3627 | 3681 | } else |
3628 | 3682 | err = ret; |
3683 | + map->m_flags |= EXT4_MAP_MAPPED; | |
3684 | + if (allocated > map->m_len) | |
3685 | + allocated = map->m_len; | |
3686 | + map->m_len = allocated; | |
3629 | 3687 | goto out2; |
3630 | 3688 | } |
3631 | 3689 | /* buffered IO case */ |
... | ... | @@ -3675,6 +3733,7 @@ |
3675 | 3733 | allocated - map->m_len); |
3676 | 3734 | allocated = map->m_len; |
3677 | 3735 | } |
3736 | + map->m_len = allocated; | |
3678 | 3737 | |
3679 | 3738 | /* |
3680 | 3739 | * If we have done fallocate with the offset that is already |
... | ... | @@ -4106,9 +4165,6 @@ |
4106 | 4165 | } |
4107 | 4166 | } else { |
4108 | 4167 | BUG_ON(allocated_clusters < reserved_clusters); |
4109 | - /* We will claim quota for all newly allocated blocks.*/ | |
4110 | - ext4_da_update_reserve_space(inode, allocated_clusters, | |
4111 | - 1); | |
4112 | 4168 | if (reserved_clusters < allocated_clusters) { |
4113 | 4169 | struct ext4_inode_info *ei = EXT4_I(inode); |
4114 | 4170 | int reservation = allocated_clusters - |
... | ... | @@ -4159,6 +4215,15 @@ |
4159 | 4215 | ei->i_reserved_data_blocks += reservation; |
4160 | 4216 | spin_unlock(&ei->i_block_reservation_lock); |
4161 | 4217 | } |
4218 | + /* | |
4219 | + * We will claim quota for all newly allocated blocks. | |
4220 | + * We're updating the reserved space *after* the | |
4221 | + * correction above so we do not accidentally free | |
4222 | + * all the metadata reservation because we might | |
4223 | + * actually need it later on. | |
4224 | + */ | |
4225 | + ext4_da_update_reserve_space(inode, allocated_clusters, | |
4226 | + 1); | |
4162 | 4227 | } |
4163 | 4228 | } |
4164 | 4229 | |
... | ... | @@ -4368,8 +4433,6 @@ |
4368 | 4433 | if (len <= EXT_UNINIT_MAX_LEN << blkbits) |
4369 | 4434 | flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; |
4370 | 4435 | |
4371 | - /* Prevent race condition between unwritten */ | |
4372 | - ext4_flush_unwritten_io(inode); | |
4373 | 4436 | retry: |
4374 | 4437 | while (ret >= 0 && ret < max_blocks) { |
4375 | 4438 | map.m_lblk = map.m_lblk + ret; |
fs/ext4/extents_status.c
... | ... | @@ -333,17 +333,27 @@ |
333 | 333 | static int ext4_es_can_be_merged(struct extent_status *es1, |
334 | 334 | struct extent_status *es2) |
335 | 335 | { |
336 | - if (es1->es_lblk + es1->es_len != es2->es_lblk) | |
336 | + if (ext4_es_status(es1) != ext4_es_status(es2)) | |
337 | 337 | return 0; |
338 | 338 | |
339 | - if (ext4_es_status(es1) != ext4_es_status(es2)) | |
339 | + if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL) | |
340 | 340 | return 0; |
341 | 341 | |
342 | - if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) && | |
343 | - (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2))) | |
342 | + if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk) | |
344 | 343 | return 0; |
345 | 344 | |
346 | - return 1; | |
345 | + if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) && | |
346 | + (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2))) | |
347 | + return 1; | |
348 | + | |
349 | + if (ext4_es_is_hole(es1)) | |
350 | + return 1; | |
351 | + | |
352 | + /* we need to check delayed extent is without unwritten status */ | |
353 | + if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1)) | |
354 | + return 1; | |
355 | + | |
356 | + return 0; | |
347 | 357 | } |
348 | 358 | |
349 | 359 | static struct extent_status * |
... | ... | @@ -389,6 +399,179 @@ |
389 | 399 | return es; |
390 | 400 | } |
391 | 401 | |
402 | +#ifdef ES_AGGRESSIVE_TEST | |
403 | +static void ext4_es_insert_extent_ext_check(struct inode *inode, | |
404 | + struct extent_status *es) | |
405 | +{ | |
406 | + struct ext4_ext_path *path = NULL; | |
407 | + struct ext4_extent *ex; | |
408 | + ext4_lblk_t ee_block; | |
409 | + ext4_fsblk_t ee_start; | |
410 | + unsigned short ee_len; | |
411 | + int depth, ee_status, es_status; | |
412 | + | |
413 | + path = ext4_ext_find_extent(inode, es->es_lblk, NULL); | |
414 | + if (IS_ERR(path)) | |
415 | + return; | |
416 | + | |
417 | + depth = ext_depth(inode); | |
418 | + ex = path[depth].p_ext; | |
419 | + | |
420 | + if (ex) { | |
421 | + | |
422 | + ee_block = le32_to_cpu(ex->ee_block); | |
423 | + ee_start = ext4_ext_pblock(ex); | |
424 | + ee_len = ext4_ext_get_actual_len(ex); | |
425 | + | |
426 | + ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0; | |
427 | + es_status = ext4_es_is_unwritten(es) ? 1 : 0; | |
428 | + | |
429 | + /* | |
430 | + * Make sure ex and es are not overlap when we try to insert | |
431 | + * a delayed/hole extent. | |
432 | + */ | |
433 | + if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { | |
434 | + if (in_range(es->es_lblk, ee_block, ee_len)) { | |
435 | + pr_warn("ES insert assertation failed for " | |
436 | + "inode: %lu we can find an extent " | |
437 | + "at block [%d/%d/%llu/%c], but we " | |
438 | + "want to add an delayed/hole extent " | |
439 | + "[%d/%d/%llu/%llx]\n", | |
440 | + inode->i_ino, ee_block, ee_len, | |
441 | + ee_start, ee_status ? 'u' : 'w', | |
442 | + es->es_lblk, es->es_len, | |
443 | + ext4_es_pblock(es), ext4_es_status(es)); | |
444 | + } | |
445 | + goto out; | |
446 | + } | |
447 | + | |
448 | + /* | |
449 | + * We don't check ee_block == es->es_lblk, etc. because es | |
450 | + * might be a part of whole extent, vice versa. | |
451 | + */ | |
452 | + if (es->es_lblk < ee_block || | |
453 | + ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { | |
454 | + pr_warn("ES insert assertation failed for inode: %lu " | |
455 | + "ex_status [%d/%d/%llu/%c] != " | |
456 | + "es_status [%d/%d/%llu/%c]\n", inode->i_ino, | |
457 | + ee_block, ee_len, ee_start, | |
458 | + ee_status ? 'u' : 'w', es->es_lblk, es->es_len, | |
459 | + ext4_es_pblock(es), es_status ? 'u' : 'w'); | |
460 | + goto out; | |
461 | + } | |
462 | + | |
463 | + if (ee_status ^ es_status) { | |
464 | + pr_warn("ES insert assertation failed for inode: %lu " | |
465 | + "ex_status [%d/%d/%llu/%c] != " | |
466 | + "es_status [%d/%d/%llu/%c]\n", inode->i_ino, | |
467 | + ee_block, ee_len, ee_start, | |
468 | + ee_status ? 'u' : 'w', es->es_lblk, es->es_len, | |
469 | + ext4_es_pblock(es), es_status ? 'u' : 'w'); | |
470 | + } | |
471 | + } else { | |
472 | + /* | |
473 | + * We can't find an extent on disk. So we need to make sure | |
474 | + * that we don't want to add an written/unwritten extent. | |
475 | + */ | |
476 | + if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { | |
477 | + pr_warn("ES insert assertation failed for inode: %lu " | |
478 | + "can't find an extent at block %d but we want " | |
479 | + "to add an written/unwritten extent " | |
480 | + "[%d/%d/%llu/%llx]\n", inode->i_ino, | |
481 | + es->es_lblk, es->es_lblk, es->es_len, | |
482 | + ext4_es_pblock(es), ext4_es_status(es)); | |
483 | + } | |
484 | + } | |
485 | +out: | |
486 | + if (path) { | |
487 | + ext4_ext_drop_refs(path); | |
488 | + kfree(path); | |
489 | + } | |
490 | +} | |
491 | + | |
492 | +static void ext4_es_insert_extent_ind_check(struct inode *inode, | |
493 | + struct extent_status *es) | |
494 | +{ | |
495 | + struct ext4_map_blocks map; | |
496 | + int retval; | |
497 | + | |
498 | + /* | |
499 | + * Here we call ext4_ind_map_blocks to lookup a block mapping because | |
500 | + * 'Indirect' structure is defined in indirect.c. So we couldn't | |
501 | + * access direct/indirect tree from outside. It is too dirty to define | |
502 | + * this function in indirect.c file. | |
503 | + */ | |
504 | + | |
505 | + map.m_lblk = es->es_lblk; | |
506 | + map.m_len = es->es_len; | |
507 | + | |
508 | + retval = ext4_ind_map_blocks(NULL, inode, &map, 0); | |
509 | + if (retval > 0) { | |
510 | + if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) { | |
511 | + /* | |
512 | + * We want to add a delayed/hole extent but this | |
513 | + * block has been allocated. | |
514 | + */ | |
515 | + pr_warn("ES insert assertation failed for inode: %lu " | |
516 | + "We can find blocks but we want to add a " | |
517 | + "delayed/hole extent [%d/%d/%llu/%llx]\n", | |
518 | + inode->i_ino, es->es_lblk, es->es_len, | |
519 | + ext4_es_pblock(es), ext4_es_status(es)); | |
520 | + return; | |
521 | + } else if (ext4_es_is_written(es)) { | |
522 | + if (retval != es->es_len) { | |
523 | + pr_warn("ES insert assertation failed for " | |
524 | + "inode: %lu retval %d != es_len %d\n", | |
525 | + inode->i_ino, retval, es->es_len); | |
526 | + return; | |
527 | + } | |
528 | + if (map.m_pblk != ext4_es_pblock(es)) { | |
529 | + pr_warn("ES insert assertation failed for " | |
530 | + "inode: %lu m_pblk %llu != " | |
531 | + "es_pblk %llu\n", | |
532 | + inode->i_ino, map.m_pblk, | |
533 | + ext4_es_pblock(es)); | |
534 | + return; | |
535 | + } | |
536 | + } else { | |
537 | + /* | |
538 | + * We don't need to check unwritten extent because | |
539 | + * indirect-based file doesn't have it. | |
540 | + */ | |
541 | + BUG_ON(1); | |
542 | + } | |
543 | + } else if (retval == 0) { | |
544 | + if (ext4_es_is_written(es)) { | |
545 | + pr_warn("ES insert assertation failed for inode: %lu " | |
546 | + "We can't find the block but we want to add " | |
547 | + "an written extent [%d/%d/%llu/%llx]\n", | |
548 | + inode->i_ino, es->es_lblk, es->es_len, | |
549 | + ext4_es_pblock(es), ext4_es_status(es)); | |
550 | + return; | |
551 | + } | |
552 | + } | |
553 | +} | |
554 | + | |
555 | +static inline void ext4_es_insert_extent_check(struct inode *inode, | |
556 | + struct extent_status *es) | |
557 | +{ | |
558 | + /* | |
559 | + * We don't need to worry about the race condition because | |
560 | + * caller takes i_data_sem locking. | |
561 | + */ | |
562 | + BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem)); | |
563 | + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | |
564 | + ext4_es_insert_extent_ext_check(inode, es); | |
565 | + else | |
566 | + ext4_es_insert_extent_ind_check(inode, es); | |
567 | +} | |
568 | +#else | |
569 | +static inline void ext4_es_insert_extent_check(struct inode *inode, | |
570 | + struct extent_status *es) | |
571 | +{ | |
572 | +} | |
573 | +#endif | |
574 | + | |
392 | 575 | static int __es_insert_extent(struct inode *inode, struct extent_status *newes) |
393 | 576 | { |
394 | 577 | struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; |
... | ... | @@ -471,6 +654,8 @@ |
471 | 654 | ext4_es_store_status(&newes, status); |
472 | 655 | trace_ext4_es_insert_extent(inode, &newes); |
473 | 656 | |
657 | + ext4_es_insert_extent_check(inode, &newes); | |
658 | + | |
474 | 659 | write_lock(&EXT4_I(inode)->i_es_lock); |
475 | 660 | err = __es_remove_extent(inode, lblk, end); |
476 | 661 | if (err != 0) |
... | ... | @@ -667,6 +852,23 @@ |
667 | 852 | write_unlock(&EXT4_I(inode)->i_es_lock); |
668 | 853 | ext4_es_print_tree(inode); |
669 | 854 | return err; |
855 | +} | |
856 | + | |
857 | +int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex) | |
858 | +{ | |
859 | + ext4_lblk_t ee_block; | |
860 | + ext4_fsblk_t ee_pblock; | |
861 | + unsigned int ee_len; | |
862 | + | |
863 | + ee_block = le32_to_cpu(ex->ee_block); | |
864 | + ee_len = ext4_ext_get_actual_len(ex); | |
865 | + ee_pblock = ext4_ext_pblock(ex); | |
866 | + | |
867 | + if (ee_len == 0) | |
868 | + return 0; | |
869 | + | |
870 | + return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, | |
871 | + EXTENT_STATUS_WRITTEN); | |
670 | 872 | } |
671 | 873 | |
672 | 874 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) |
fs/ext4/extents_status.h
... | ... | @@ -21,6 +21,12 @@ |
21 | 21 | #endif |
22 | 22 | |
23 | 23 | /* |
24 | + * With ES_AGGRESSIVE_TEST defined, the result of es caching will be | |
25 | + * checked with old map_block's result. | |
26 | + */ | |
27 | +#define ES_AGGRESSIVE_TEST__ | |
28 | + | |
29 | +/* | |
24 | 30 | * These flags live in the high bits of extent_status.es_pblk |
25 | 31 | */ |
26 | 32 | #define EXTENT_STATUS_WRITTEN (1ULL << 63) |
... | ... | @@ -33,6 +39,8 @@ |
33 | 39 | EXTENT_STATUS_DELAYED | \ |
34 | 40 | EXTENT_STATUS_HOLE) |
35 | 41 | |
42 | +struct ext4_extent; | |
43 | + | |
36 | 44 | struct extent_status { |
37 | 45 | struct rb_node rb_node; |
38 | 46 | ext4_lblk_t es_lblk; /* first logical block extent covers */ |
... | ... | @@ -58,6 +66,7 @@ |
58 | 66 | struct extent_status *es); |
59 | 67 | extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, |
60 | 68 | struct extent_status *es); |
69 | +extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex); | |
61 | 70 | |
62 | 71 | static inline int ext4_es_is_written(struct extent_status *es) |
63 | 72 | { |
fs/ext4/ialloc.c
... | ... | @@ -324,8 +324,8 @@ |
324 | 324 | } |
325 | 325 | |
326 | 326 | struct orlov_stats { |
327 | + __u64 free_clusters; | |
327 | 328 | __u32 free_inodes; |
328 | - __u32 free_clusters; | |
329 | 329 | __u32 used_dirs; |
330 | 330 | }; |
331 | 331 | |
... | ... | @@ -342,7 +342,7 @@ |
342 | 342 | |
343 | 343 | if (flex_size > 1) { |
344 | 344 | stats->free_inodes = atomic_read(&flex_group[g].free_inodes); |
345 | - stats->free_clusters = atomic_read(&flex_group[g].free_clusters); | |
345 | + stats->free_clusters = atomic64_read(&flex_group[g].free_clusters); | |
346 | 346 | stats->used_dirs = atomic_read(&flex_group[g].used_dirs); |
347 | 347 | return; |
348 | 348 | } |
fs/ext4/inode.c
... | ... | @@ -185,8 +185,6 @@ |
185 | 185 | |
186 | 186 | trace_ext4_evict_inode(inode); |
187 | 187 | |
188 | - ext4_ioend_wait(inode); | |
189 | - | |
190 | 188 | if (inode->i_nlink) { |
191 | 189 | /* |
192 | 190 | * When journalling data dirty buffers are tracked only in the |
... | ... | @@ -207,7 +205,8 @@ |
207 | 205 | * don't use page cache. |
208 | 206 | */ |
209 | 207 | if (ext4_should_journal_data(inode) && |
210 | - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { | |
208 | + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && | |
209 | + inode->i_ino != EXT4_JOURNAL_INO) { | |
211 | 210 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
212 | 211 | tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; |
213 | 212 | |
... | ... | @@ -216,6 +215,7 @@ |
216 | 215 | filemap_write_and_wait(&inode->i_data); |
217 | 216 | } |
218 | 217 | truncate_inode_pages(&inode->i_data, 0); |
218 | + ext4_ioend_shutdown(inode); | |
219 | 219 | goto no_delete; |
220 | 220 | } |
221 | 221 | |
... | ... | @@ -225,6 +225,7 @@ |
225 | 225 | if (ext4_should_order_data(inode)) |
226 | 226 | ext4_begin_ordered_truncate(inode, 0); |
227 | 227 | truncate_inode_pages(&inode->i_data, 0); |
228 | + ext4_ioend_shutdown(inode); | |
228 | 229 | |
229 | 230 | if (is_bad_inode(inode)) |
230 | 231 | goto no_delete; |
... | ... | @@ -482,6 +483,58 @@ |
482 | 483 | return num; |
483 | 484 | } |
484 | 485 | |
486 | +#ifdef ES_AGGRESSIVE_TEST | |
487 | +static void ext4_map_blocks_es_recheck(handle_t *handle, | |
488 | + struct inode *inode, | |
489 | + struct ext4_map_blocks *es_map, | |
490 | + struct ext4_map_blocks *map, | |
491 | + int flags) | |
492 | +{ | |
493 | + int retval; | |
494 | + | |
495 | + map->m_flags = 0; | |
496 | + /* | |
497 | + * There is a race window that the result is not the same. | |
498 | + * e.g. xfstests #223 when dioread_nolock enables. The reason | |
499 | + * is that we lookup a block mapping in extent status tree with | |
500 | + * out taking i_data_sem. So at the time the unwritten extent | |
501 | + * could be converted. | |
502 | + */ | |
503 | + if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | |
504 | + down_read((&EXT4_I(inode)->i_data_sem)); | |
505 | + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | |
506 | + retval = ext4_ext_map_blocks(handle, inode, map, flags & | |
507 | + EXT4_GET_BLOCKS_KEEP_SIZE); | |
508 | + } else { | |
509 | + retval = ext4_ind_map_blocks(handle, inode, map, flags & | |
510 | + EXT4_GET_BLOCKS_KEEP_SIZE); | |
511 | + } | |
512 | + if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | |
513 | + up_read((&EXT4_I(inode)->i_data_sem)); | |
514 | + /* | |
515 | + * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag | |
516 | + * because it shouldn't be marked in es_map->m_flags. | |
517 | + */ | |
518 | + map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY); | |
519 | + | |
520 | + /* | |
521 | + * We don't check m_len because extent will be collpased in status | |
522 | + * tree. So the m_len might not equal. | |
523 | + */ | |
524 | + if (es_map->m_lblk != map->m_lblk || | |
525 | + es_map->m_flags != map->m_flags || | |
526 | + es_map->m_pblk != map->m_pblk) { | |
527 | + printk("ES cache assertation failed for inode: %lu " | |
528 | + "es_cached ex [%d/%d/%llu/%x] != " | |
529 | + "found ex [%d/%d/%llu/%x] retval %d flags %x\n", | |
530 | + inode->i_ino, es_map->m_lblk, es_map->m_len, | |
531 | + es_map->m_pblk, es_map->m_flags, map->m_lblk, | |
532 | + map->m_len, map->m_pblk, map->m_flags, | |
533 | + retval, flags); | |
534 | + } | |
535 | +} | |
536 | +#endif /* ES_AGGRESSIVE_TEST */ | |
537 | + | |
485 | 538 | /* |
486 | 539 | * The ext4_map_blocks() function tries to look up the requested blocks, |
487 | 540 | * and returns if the blocks are already mapped. |
488 | 541 | |
... | ... | @@ -509,7 +562,12 @@ |
509 | 562 | { |
510 | 563 | struct extent_status es; |
511 | 564 | int retval; |
565 | +#ifdef ES_AGGRESSIVE_TEST | |
566 | + struct ext4_map_blocks orig_map; | |
512 | 567 | |
568 | + memcpy(&orig_map, map, sizeof(*map)); | |
569 | +#endif | |
570 | + | |
513 | 571 | map->m_flags = 0; |
514 | 572 | ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," |
515 | 573 | "logical block %lu\n", inode->i_ino, flags, map->m_len, |
... | ... | @@ -531,6 +589,10 @@ |
531 | 589 | } else { |
532 | 590 | BUG_ON(1); |
533 | 591 | } |
592 | +#ifdef ES_AGGRESSIVE_TEST | |
593 | + ext4_map_blocks_es_recheck(handle, inode, map, | |
594 | + &orig_map, flags); | |
595 | +#endif | |
534 | 596 | goto found; |
535 | 597 | } |
536 | 598 | |
... | ... | @@ -551,6 +613,15 @@ |
551 | 613 | int ret; |
552 | 614 | unsigned long long status; |
553 | 615 | |
616 | +#ifdef ES_AGGRESSIVE_TEST | |
617 | + if (retval != map->m_len) { | |
618 | + printk("ES len assertation failed for inode: %lu " | |
619 | + "retval %d != map->m_len %d " | |
620 | + "in %s (lookup)\n", inode->i_ino, retval, | |
621 | + map->m_len, __func__); | |
622 | + } | |
623 | +#endif | |
624 | + | |
554 | 625 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
555 | 626 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
556 | 627 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && |
... | ... | @@ -643,6 +714,24 @@ |
643 | 714 | int ret; |
644 | 715 | unsigned long long status; |
645 | 716 | |
717 | +#ifdef ES_AGGRESSIVE_TEST | |
718 | + if (retval != map->m_len) { | |
719 | + printk("ES len assertation failed for inode: %lu " | |
720 | + "retval %d != map->m_len %d " | |
721 | + "in %s (allocation)\n", inode->i_ino, retval, | |
722 | + map->m_len, __func__); | |
723 | + } | |
724 | +#endif | |
725 | + | |
726 | + /* | |
727 | + * If the extent has been zeroed out, we don't need to update | |
728 | + * extent status tree. | |
729 | + */ | |
730 | + if ((flags & EXT4_GET_BLOCKS_PRE_IO) && | |
731 | + ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | |
732 | + if (ext4_es_is_written(&es)) | |
733 | + goto has_zeroout; | |
734 | + } | |
646 | 735 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
647 | 736 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
648 | 737 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && |
... | ... | @@ -655,6 +744,7 @@ |
655 | 744 | retval = ret; |
656 | 745 | } |
657 | 746 | |
747 | +has_zeroout: | |
658 | 748 | up_write((&EXT4_I(inode)->i_data_sem)); |
659 | 749 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
660 | 750 | int ret = check_block_validity(inode, map); |
... | ... | @@ -1216,6 +1306,55 @@ |
1216 | 1306 | } |
1217 | 1307 | |
1218 | 1308 | /* |
1309 | + * Reserve a metadata for a single block located at lblock | |
1310 | + */ | |
1311 | +static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) | |
1312 | +{ | |
1313 | + int retries = 0; | |
1314 | + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | |
1315 | + struct ext4_inode_info *ei = EXT4_I(inode); | |
1316 | + unsigned int md_needed; | |
1317 | + ext4_lblk_t save_last_lblock; | |
1318 | + int save_len; | |
1319 | + | |
1320 | + /* | |
1321 | + * recalculate the amount of metadata blocks to reserve | |
1322 | + * in order to allocate nrblocks | |
1323 | + * worse case is one extent per block | |
1324 | + */ | |
1325 | +repeat: | |
1326 | + spin_lock(&ei->i_block_reservation_lock); | |
1327 | + /* | |
1328 | + * ext4_calc_metadata_amount() has side effects, which we have | |
1329 | + * to be prepared undo if we fail to claim space. | |
1330 | + */ | |
1331 | + save_len = ei->i_da_metadata_calc_len; | |
1332 | + save_last_lblock = ei->i_da_metadata_calc_last_lblock; | |
1333 | + md_needed = EXT4_NUM_B2C(sbi, | |
1334 | + ext4_calc_metadata_amount(inode, lblock)); | |
1335 | + trace_ext4_da_reserve_space(inode, md_needed); | |
1336 | + | |
1337 | + /* | |
1338 | + * We do still charge estimated metadata to the sb though; | |
1339 | + * we cannot afford to run out of free blocks. | |
1340 | + */ | |
1341 | + if (ext4_claim_free_clusters(sbi, md_needed, 0)) { | |
1342 | + ei->i_da_metadata_calc_len = save_len; | |
1343 | + ei->i_da_metadata_calc_last_lblock = save_last_lblock; | |
1344 | + spin_unlock(&ei->i_block_reservation_lock); | |
1345 | + if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | |
1346 | + cond_resched(); | |
1347 | + goto repeat; | |
1348 | + } | |
1349 | + return -ENOSPC; | |
1350 | + } | |
1351 | + ei->i_reserved_meta_blocks += md_needed; | |
1352 | + spin_unlock(&ei->i_block_reservation_lock); | |
1353 | + | |
1354 | + return 0; /* success */ | |
1355 | +} | |
1356 | + | |
1357 | +/* | |
1219 | 1358 | * Reserve a single cluster located at lblock |
1220 | 1359 | */ |
1221 | 1360 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) |
... | ... | @@ -1263,7 +1402,7 @@ |
1263 | 1402 | ei->i_da_metadata_calc_last_lblock = save_last_lblock; |
1264 | 1403 | spin_unlock(&ei->i_block_reservation_lock); |
1265 | 1404 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1266 | - yield(); | |
1405 | + cond_resched(); | |
1267 | 1406 | goto repeat; |
1268 | 1407 | } |
1269 | 1408 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); |
1270 | 1409 | |
... | ... | @@ -1768,7 +1907,12 @@ |
1768 | 1907 | struct extent_status es; |
1769 | 1908 | int retval; |
1770 | 1909 | sector_t invalid_block = ~((sector_t) 0xffff); |
1910 | +#ifdef ES_AGGRESSIVE_TEST | |
1911 | + struct ext4_map_blocks orig_map; | |
1771 | 1912 | |
1913 | + memcpy(&orig_map, map, sizeof(*map)); | |
1914 | +#endif | |
1915 | + | |
1772 | 1916 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) |
1773 | 1917 | invalid_block = ~0; |
1774 | 1918 | |
... | ... | @@ -1809,6 +1953,9 @@ |
1809 | 1953 | else |
1810 | 1954 | BUG_ON(1); |
1811 | 1955 | |
1956 | +#ifdef ES_AGGRESSIVE_TEST | |
1957 | + ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0); | |
1958 | +#endif | |
1812 | 1959 | return retval; |
1813 | 1960 | } |
1814 | 1961 | |
... | ... | @@ -1843,8 +1990,11 @@ |
1843 | 1990 | * XXX: __block_prepare_write() unmaps passed block, |
1844 | 1991 | * is it OK? |
1845 | 1992 | */ |
1846 | - /* If the block was allocated from previously allocated cluster, | |
1847 | - * then we dont need to reserve it again. */ | |
1993 | + /* | |
1994 | + * If the block was allocated from previously allocated cluster, | |
1995 | + * then we don't need to reserve it again. However we still need | |
1996 | + * to reserve metadata for every block we're going to write. | |
1997 | + */ | |
1848 | 1998 | if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { |
1849 | 1999 | ret = ext4_da_reserve_space(inode, iblock); |
1850 | 2000 | if (ret) { |
... | ... | @@ -1852,6 +2002,13 @@ |
1852 | 2002 | retval = ret; |
1853 | 2003 | goto out_unlock; |
1854 | 2004 | } |
2005 | + } else { | |
2006 | + ret = ext4_da_reserve_metadata(inode, iblock); | |
2007 | + if (ret) { | |
2008 | + /* not enough space to reserve */ | |
2009 | + retval = ret; | |
2010 | + goto out_unlock; | |
2011 | + } | |
1855 | 2012 | } |
1856 | 2013 | |
1857 | 2014 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, |
... | ... | @@ -1873,6 +2030,15 @@ |
1873 | 2030 | int ret; |
1874 | 2031 | unsigned long long status; |
1875 | 2032 | |
2033 | +#ifdef ES_AGGRESSIVE_TEST | |
2034 | + if (retval != map->m_len) { | |
2035 | + printk("ES len assertation failed for inode: %lu " | |
2036 | + "retval %d != map->m_len %d " | |
2037 | + "in %s (lookup)\n", inode->i_ino, retval, | |
2038 | + map->m_len, __func__); | |
2039 | + } | |
2040 | +#endif | |
2041 | + | |
1876 | 2042 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
1877 | 2043 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
1878 | 2044 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, |
... | ... | @@ -2908,8 +3074,8 @@ |
2908 | 3074 | |
2909 | 3075 | trace_ext4_releasepage(page); |
2910 | 3076 | |
2911 | - WARN_ON(PageChecked(page)); | |
2912 | - if (!page_has_buffers(page)) | |
3077 | + /* Page has dirty journalled data -> cannot release */ | |
3078 | + if (PageChecked(page)) | |
2913 | 3079 | return 0; |
2914 | 3080 | if (journal) |
2915 | 3081 | return jbd2_journal_try_to_free_buffers(journal, page, wait); |
fs/ext4/mballoc.c
... | ... | @@ -2804,8 +2804,8 @@ |
2804 | 2804 | if (sbi->s_log_groups_per_flex) { |
2805 | 2805 | ext4_group_t flex_group = ext4_flex_group(sbi, |
2806 | 2806 | ac->ac_b_ex.fe_group); |
2807 | - atomic_sub(ac->ac_b_ex.fe_len, | |
2808 | - &sbi->s_flex_groups[flex_group].free_clusters); | |
2807 | + atomic64_sub(ac->ac_b_ex.fe_len, | |
2808 | + &sbi->s_flex_groups[flex_group].free_clusters); | |
2809 | 2809 | } |
2810 | 2810 | |
2811 | 2811 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
... | ... | @@ -3692,11 +3692,7 @@ |
3692 | 3692 | if (free < needed && busy) { |
3693 | 3693 | busy = 0; |
3694 | 3694 | ext4_unlock_group(sb, group); |
3695 | - /* | |
3696 | - * Yield the CPU here so that we don't get soft lockup | |
3697 | - * in non preempt case. | |
3698 | - */ | |
3699 | - yield(); | |
3695 | + cond_resched(); | |
3700 | 3696 | goto repeat; |
3701 | 3697 | } |
3702 | 3698 | |
... | ... | @@ -4246,7 +4242,7 @@ |
4246 | 4242 | ext4_claim_free_clusters(sbi, ar->len, ar->flags)) { |
4247 | 4243 | |
4248 | 4244 | /* let others to free the space */ |
4249 | - yield(); | |
4245 | + cond_resched(); | |
4250 | 4246 | ar->len = ar->len >> 1; |
4251 | 4247 | } |
4252 | 4248 | if (!ar->len) { |
... | ... | @@ -4464,7 +4460,6 @@ |
4464 | 4460 | struct buffer_head *bitmap_bh = NULL; |
4465 | 4461 | struct super_block *sb = inode->i_sb; |
4466 | 4462 | struct ext4_group_desc *gdp; |
4467 | - unsigned long freed = 0; | |
4468 | 4463 | unsigned int overflow; |
4469 | 4464 | ext4_grpblk_t bit; |
4470 | 4465 | struct buffer_head *gd_bh; |
4471 | 4466 | |
... | ... | @@ -4666,14 +4661,12 @@ |
4666 | 4661 | |
4667 | 4662 | if (sbi->s_log_groups_per_flex) { |
4668 | 4663 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); |
4669 | - atomic_add(count_clusters, | |
4670 | - &sbi->s_flex_groups[flex_group].free_clusters); | |
4664 | + atomic64_add(count_clusters, | |
4665 | + &sbi->s_flex_groups[flex_group].free_clusters); | |
4671 | 4666 | } |
4672 | 4667 | |
4673 | 4668 | ext4_mb_unload_buddy(&e4b); |
4674 | 4669 | |
4675 | - freed += count; | |
4676 | - | |
4677 | 4670 | if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) |
4678 | 4671 | dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); |
4679 | 4672 | |
... | ... | @@ -4811,8 +4804,8 @@ |
4811 | 4804 | |
4812 | 4805 | if (sbi->s_log_groups_per_flex) { |
4813 | 4806 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); |
4814 | - atomic_add(EXT4_NUM_B2C(sbi, blocks_freed), | |
4815 | - &sbi->s_flex_groups[flex_group].free_clusters); | |
4807 | + atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed), | |
4808 | + &sbi->s_flex_groups[flex_group].free_clusters); | |
4816 | 4809 | } |
4817 | 4810 | |
4818 | 4811 | ext4_mb_unload_buddy(&e4b); |
fs/ext4/move_extent.c
... | ... | @@ -32,16 +32,18 @@ |
32 | 32 | */ |
33 | 33 | static inline int |
34 | 34 | get_ext_path(struct inode *inode, ext4_lblk_t lblock, |
35 | - struct ext4_ext_path **path) | |
35 | + struct ext4_ext_path **orig_path) | |
36 | 36 | { |
37 | 37 | int ret = 0; |
38 | + struct ext4_ext_path *path; | |
38 | 39 | |
39 | - *path = ext4_ext_find_extent(inode, lblock, *path); | |
40 | - if (IS_ERR(*path)) { | |
41 | - ret = PTR_ERR(*path); | |
42 | - *path = NULL; | |
43 | - } else if ((*path)[ext_depth(inode)].p_ext == NULL) | |
40 | + path = ext4_ext_find_extent(inode, lblock, *orig_path); | |
41 | + if (IS_ERR(path)) | |
42 | + ret = PTR_ERR(path); | |
43 | + else if (path[ext_depth(inode)].p_ext == NULL) | |
44 | 44 | ret = -ENODATA; |
45 | + else | |
46 | + *orig_path = path; | |
45 | 47 | |
46 | 48 | return ret; |
47 | 49 | } |
48 | 50 | |
49 | 51 | |
50 | 52 | |
... | ... | @@ -611,24 +613,25 @@ |
611 | 613 | { |
612 | 614 | struct ext4_ext_path *path = NULL; |
613 | 615 | struct ext4_extent *ext; |
616 | + int ret = 0; | |
614 | 617 | ext4_lblk_t last = from + count; |
615 | 618 | while (from < last) { |
616 | 619 | *err = get_ext_path(inode, from, &path); |
617 | 620 | if (*err) |
618 | - return 0; | |
621 | + goto out; | |
619 | 622 | ext = path[ext_depth(inode)].p_ext; |
620 | - if (!ext) { | |
621 | - ext4_ext_drop_refs(path); | |
622 | - return 0; | |
623 | - } | |
624 | - if (uninit != ext4_ext_is_uninitialized(ext)) { | |
625 | - ext4_ext_drop_refs(path); | |
626 | - return 0; | |
627 | - } | |
623 | + if (uninit != ext4_ext_is_uninitialized(ext)) | |
624 | + goto out; | |
628 | 625 | from += ext4_ext_get_actual_len(ext); |
629 | 626 | ext4_ext_drop_refs(path); |
630 | 627 | } |
631 | - return 1; | |
628 | + ret = 1; | |
629 | +out: | |
630 | + if (path) { | |
631 | + ext4_ext_drop_refs(path); | |
632 | + kfree(path); | |
633 | + } | |
634 | + return ret; | |
632 | 635 | } |
633 | 636 | |
634 | 637 | /** |
... | ... | @@ -665,6 +668,14 @@ |
665 | 668 | int depth; |
666 | 669 | int replaced_count = 0; |
667 | 670 | int dext_alen; |
671 | + | |
672 | + *err = ext4_es_remove_extent(orig_inode, from, count); | |
673 | + if (*err) | |
674 | + goto out; | |
675 | + | |
676 | + *err = ext4_es_remove_extent(donor_inode, from, count); | |
677 | + if (*err) | |
678 | + goto out; | |
668 | 679 | |
669 | 680 | /* Get the original extent for the block "orig_off" */ |
670 | 681 | *err = get_ext_path(orig_inode, orig_off, &orig_path); |
fs/ext4/page-io.c
... | ... | @@ -50,11 +50,21 @@ |
50 | 50 | kmem_cache_destroy(io_page_cachep); |
51 | 51 | } |
52 | 52 | |
53 | -void ext4_ioend_wait(struct inode *inode) | |
53 | +/* | |
54 | + * This function is called by ext4_evict_inode() to make sure there is | |
55 | + * no more pending I/O completion work left to do. | |
56 | + */ | |
57 | +void ext4_ioend_shutdown(struct inode *inode) | |
54 | 58 | { |
55 | 59 | wait_queue_head_t *wq = ext4_ioend_wq(inode); |
56 | 60 | |
57 | 61 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); |
62 | + /* | |
63 | + * We need to make sure the work structure is finished being | |
64 | + * used before we let the inode get destroyed. | |
65 | + */ | |
66 | + if (work_pending(&EXT4_I(inode)->i_unwritten_work)) | |
67 | + cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); | |
58 | 68 | } |
59 | 69 | |
60 | 70 | static void put_io_page(struct ext4_io_page *io_page) |
fs/ext4/resize.c
... | ... | @@ -1360,8 +1360,8 @@ |
1360 | 1360 | sbi->s_log_groups_per_flex) { |
1361 | 1361 | ext4_group_t flex_group; |
1362 | 1362 | flex_group = ext4_flex_group(sbi, group_data[0].group); |
1363 | - atomic_add(EXT4_NUM_B2C(sbi, free_blocks), | |
1364 | - &sbi->s_flex_groups[flex_group].free_clusters); | |
1363 | + atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), | |
1364 | + &sbi->s_flex_groups[flex_group].free_clusters); | |
1365 | 1365 | atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, |
1366 | 1366 | &sbi->s_flex_groups[flex_group].free_inodes); |
1367 | 1367 | } |
fs/ext4/super.c
... | ... | @@ -1927,8 +1927,8 @@ |
1927 | 1927 | flex_group = ext4_flex_group(sbi, i); |
1928 | 1928 | atomic_add(ext4_free_inodes_count(sb, gdp), |
1929 | 1929 | &sbi->s_flex_groups[flex_group].free_inodes); |
1930 | - atomic_add(ext4_free_group_clusters(sb, gdp), | |
1931 | - &sbi->s_flex_groups[flex_group].free_clusters); | |
1930 | + atomic64_add(ext4_free_group_clusters(sb, gdp), | |
1931 | + &sbi->s_flex_groups[flex_group].free_clusters); | |
1932 | 1932 | atomic_add(ext4_used_dirs_count(sb, gdp), |
1933 | 1933 | &sbi->s_flex_groups[flex_group].used_dirs); |
1934 | 1934 | } |
fs/jbd2/transaction.c
... | ... | @@ -1065,9 +1065,12 @@ |
1065 | 1065 | void jbd2_journal_set_triggers(struct buffer_head *bh, |
1066 | 1066 | struct jbd2_buffer_trigger_type *type) |
1067 | 1067 | { |
1068 | - struct journal_head *jh = bh2jh(bh); | |
1068 | + struct journal_head *jh = jbd2_journal_grab_journal_head(bh); | |
1069 | 1069 | |
1070 | + if (WARN_ON(!jh)) | |
1071 | + return; | |
1070 | 1072 | jh->b_triggers = type; |
1073 | + jbd2_journal_put_journal_head(jh); | |
1071 | 1074 | } |
1072 | 1075 | |
1073 | 1076 | void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, |
1074 | 1077 | |
1075 | 1078 | |
1076 | 1079 | |
... | ... | @@ -1119,17 +1122,18 @@ |
1119 | 1122 | { |
1120 | 1123 | transaction_t *transaction = handle->h_transaction; |
1121 | 1124 | journal_t *journal = transaction->t_journal; |
1122 | - struct journal_head *jh = bh2jh(bh); | |
1125 | + struct journal_head *jh; | |
1123 | 1126 | int ret = 0; |
1124 | 1127 | |
1125 | - jbd_debug(5, "journal_head %p\n", jh); | |
1126 | - JBUFFER_TRACE(jh, "entry"); | |
1127 | 1128 | if (is_handle_aborted(handle)) |
1128 | 1129 | goto out; |
1129 | - if (!buffer_jbd(bh)) { | |
1130 | + jh = jbd2_journal_grab_journal_head(bh); | |
1131 | + if (!jh) { | |
1130 | 1132 | ret = -EUCLEAN; |
1131 | 1133 | goto out; |
1132 | 1134 | } |
1135 | + jbd_debug(5, "journal_head %p\n", jh); | |
1136 | + JBUFFER_TRACE(jh, "entry"); | |
1133 | 1137 | |
1134 | 1138 | jbd_lock_bh_state(bh); |
1135 | 1139 | |
... | ... | @@ -1220,6 +1224,7 @@ |
1220 | 1224 | spin_unlock(&journal->j_list_lock); |
1221 | 1225 | out_unlock_bh: |
1222 | 1226 | jbd_unlock_bh_state(bh); |
1227 | + jbd2_journal_put_journal_head(jh); | |
1223 | 1228 | out: |
1224 | 1229 | JBUFFER_TRACE(jh, "exit"); |
1225 | 1230 | WARN_ON(ret); /* All errors are bugs, so dump the stack */ |