Commit 3e624fc72fba09b6f999a9fbb87b64efccd38036
1 parent
22359f5745
Exists in
master
and in
4 other branches
ext4: Replace hackish ext4_mb_poll_new_transaction with commit callback
The multiblock allocator needs to be able to release blocks (and issue a blkdev discard request) when the transaction which freed those blocks is committed. Previously this was done via a polling mechanism when blocks are allocated or freed. A much better way of doing things is to create a jbd2 callback function and attaching the list of blocks to be freed directly to the transaction structure. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Showing 6 changed files with 29 additions and 75 deletions Side-by-side Diff
fs/ext4/ext4_sb.h
... | ... | @@ -99,9 +99,6 @@ |
99 | 99 | struct inode *s_buddy_cache; |
100 | 100 | long s_blocks_reserved; |
101 | 101 | spinlock_t s_reserve_lock; |
102 | - struct list_head s_active_transaction; | |
103 | - struct list_head s_closed_transaction; | |
104 | - struct list_head s_committed_transaction; | |
105 | 102 | spinlock_t s_md_lock; |
106 | 103 | tid_t s_last_transaction; |
107 | 104 | unsigned short *s_mb_offsets, *s_mb_maxs; |
fs/ext4/mballoc.c
... | ... | @@ -2523,9 +2523,6 @@ |
2523 | 2523 | } |
2524 | 2524 | |
2525 | 2525 | spin_lock_init(&sbi->s_md_lock); |
2526 | - INIT_LIST_HEAD(&sbi->s_active_transaction); | |
2527 | - INIT_LIST_HEAD(&sbi->s_closed_transaction); | |
2528 | - INIT_LIST_HEAD(&sbi->s_committed_transaction); | |
2529 | 2526 | spin_lock_init(&sbi->s_bal_lock); |
2530 | 2527 | |
2531 | 2528 | sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; |
... | ... | @@ -2554,6 +2551,8 @@ |
2554 | 2551 | ext4_mb_init_per_dev_proc(sb); |
2555 | 2552 | ext4_mb_history_init(sb); |
2556 | 2553 | |
2554 | + sbi->s_journal->j_commit_callback = release_blocks_on_commit; | |
2555 | + | |
2557 | 2556 | printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); |
2558 | 2557 | return 0; |
2559 | 2558 | } |
... | ... | @@ -2583,15 +2582,6 @@ |
2583 | 2582 | struct ext4_group_info *grinfo; |
2584 | 2583 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2585 | 2584 | |
2586 | - /* release freed, non-committed blocks */ | |
2587 | - spin_lock(&sbi->s_md_lock); | |
2588 | - list_splice_init(&sbi->s_closed_transaction, | |
2589 | - &sbi->s_committed_transaction); | |
2590 | - list_splice_init(&sbi->s_active_transaction, | |
2591 | - &sbi->s_committed_transaction); | |
2592 | - spin_unlock(&sbi->s_md_lock); | |
2593 | - ext4_mb_free_committed_blocks(sb); | |
2594 | - | |
2595 | 2585 | if (sbi->s_group_info) { |
2596 | 2586 | for (i = 0; i < sbi->s_groups_count; i++) { |
2597 | 2587 | grinfo = ext4_get_group_info(sb, i); |
2598 | 2588 | |
2599 | 2589 | |
2600 | 2590 | |
2601 | 2591 | |
2602 | 2592 | |
2603 | 2593 | |
... | ... | @@ -2645,36 +2635,25 @@ |
2645 | 2635 | return 0; |
2646 | 2636 | } |
2647 | 2637 | |
2648 | -static noinline_for_stack void | |
2649 | -ext4_mb_free_committed_blocks(struct super_block *sb) | |
2638 | +/* | |
2639 | + * This function is called by the jbd2 layer once the commit has finished, | |
2640 | + * so we know we can free the blocks that were released with that commit. | |
2641 | + */ | |
2642 | +static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |
2650 | 2643 | { |
2644 | + struct super_block *sb = journal->j_private; | |
2651 | 2645 | struct ext4_buddy e4b; |
2652 | 2646 | struct ext4_group_info *db; |
2653 | - struct ext4_sb_info *sbi = EXT4_SB(sb); | |
2654 | 2647 | int err, count = 0, count2 = 0; |
2655 | 2648 | struct ext4_free_data *entry; |
2656 | 2649 | ext4_fsblk_t discard_block; |
2650 | + struct list_head *l, *ltmp; | |
2657 | 2651 | |
2658 | - if (list_empty(&sbi->s_committed_transaction)) | |
2659 | - return; | |
2652 | + list_for_each_safe(l, ltmp, &txn->t_private_list) { | |
2653 | + entry = list_entry(l, struct ext4_free_data, list); | |
2660 | 2654 | |
2661 | - /* there is committed blocks to be freed yet */ | |
2662 | - do { | |
2663 | - /* get next array of blocks */ | |
2664 | - entry = NULL; | |
2665 | - spin_lock(&sbi->s_md_lock); | |
2666 | - if (!list_empty(&sbi->s_committed_transaction)) { | |
2667 | - entry = list_entry(sbi->s_committed_transaction.next, | |
2668 | - struct ext4_free_data, list); | |
2669 | - list_del(&entry->list); | |
2670 | - } | |
2671 | - spin_unlock(&sbi->s_md_lock); | |
2672 | - | |
2673 | - if (entry == NULL) | |
2674 | - break; | |
2675 | - | |
2676 | 2655 | mb_debug("gonna free %u blocks in group %lu (0x%p):", |
2677 | - entry->count, entry->group, entry); | |
2656 | + entry->count, entry->group, entry); | |
2678 | 2657 | |
2679 | 2658 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2680 | 2659 | /* we expect to find existing buddy because it's pinned */ |
... | ... | @@ -2706,7 +2685,7 @@ |
2706 | 2685 | |
2707 | 2686 | kmem_cache_free(ext4_free_ext_cachep, entry); |
2708 | 2687 | ext4_mb_release_desc(&e4b); |
2709 | - } while (1); | |
2688 | + } | |
2710 | 2689 | |
2711 | 2690 | mb_debug("freed %u blocks in %u structures\n", count, count2); |
2712 | 2691 | } |
... | ... | @@ -4348,8 +4327,6 @@ |
4348 | 4327 | goto out1; |
4349 | 4328 | } |
4350 | 4329 | |
4351 | - ext4_mb_poll_new_transaction(sb, handle); | |
4352 | - | |
4353 | 4330 | *errp = ext4_mb_initialize_context(ac, ar); |
4354 | 4331 | if (*errp) { |
4355 | 4332 | ar->len = 0; |
4356 | 4333 | |
... | ... | @@ -4408,37 +4385,7 @@ |
4408 | 4385 | |
4409 | 4386 | return block; |
4410 | 4387 | } |
4411 | -static void ext4_mb_poll_new_transaction(struct super_block *sb, | |
4412 | - handle_t *handle) | |
4413 | -{ | |
4414 | - struct ext4_sb_info *sbi = EXT4_SB(sb); | |
4415 | 4388 | |
4416 | - if (sbi->s_last_transaction == handle->h_transaction->t_tid) | |
4417 | - return; | |
4418 | - | |
4419 | - /* new transaction! time to close last one and free blocks for | |
4420 | - * committed transaction. we know that only transaction can be | |
4421 | - * active, so previos transaction can be being logged and we | |
4422 | - * know that transaction before previous is known to be already | |
4423 | - * logged. this means that now we may free blocks freed in all | |
4424 | - * transactions before previous one. hope I'm clear enough ... */ | |
4425 | - | |
4426 | - spin_lock(&sbi->s_md_lock); | |
4427 | - if (sbi->s_last_transaction != handle->h_transaction->t_tid) { | |
4428 | - mb_debug("new transaction %lu, old %lu\n", | |
4429 | - (unsigned long) handle->h_transaction->t_tid, | |
4430 | - (unsigned long) sbi->s_last_transaction); | |
4431 | - list_splice_init(&sbi->s_closed_transaction, | |
4432 | - &sbi->s_committed_transaction); | |
4433 | - list_splice_init(&sbi->s_active_transaction, | |
4434 | - &sbi->s_closed_transaction); | |
4435 | - sbi->s_last_transaction = handle->h_transaction->t_tid; | |
4436 | - } | |
4437 | - spin_unlock(&sbi->s_md_lock); | |
4438 | - | |
4439 | - ext4_mb_free_committed_blocks(sb); | |
4440 | -} | |
4441 | - | |
4442 | 4389 | /* |
4443 | 4390 | * We can merge two free data extents only if the physical blocks |
4444 | 4391 | * are contiguous, AND the extents were freed by the same transaction, |
4445 | 4392 | |
... | ... | @@ -4531,9 +4478,9 @@ |
4531 | 4478 | kmem_cache_free(ext4_free_ext_cachep, entry); |
4532 | 4479 | } |
4533 | 4480 | } |
4534 | - /* Add the extent to active_transaction list */ | |
4481 | + /* Add the extent to transaction's private list */ | |
4535 | 4482 | spin_lock(&sbi->s_md_lock); |
4536 | - list_add(&new_entry->list, &sbi->s_active_transaction); | |
4483 | + list_add(&new_entry->list, &handle->h_transaction->t_private_list); | |
4537 | 4484 | spin_unlock(&sbi->s_md_lock); |
4538 | 4485 | ext4_unlock_group(sb, group); |
4539 | 4486 | return 0; |
... | ... | @@ -4561,8 +4508,6 @@ |
4561 | 4508 | int ret; |
4562 | 4509 | |
4563 | 4510 | *freed = 0; |
4564 | - | |
4565 | - ext4_mb_poll_new_transaction(sb, handle); | |
4566 | 4511 | |
4567 | 4512 | sbi = EXT4_SB(sb); |
4568 | 4513 | es = EXT4_SB(sb)->s_es; |
fs/ext4/mballoc.h
... | ... | @@ -269,8 +269,6 @@ |
269 | 269 | |
270 | 270 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
271 | 271 | ext4_group_t group); |
272 | -static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); | |
273 | -static void ext4_mb_free_committed_blocks(struct super_block *); | |
274 | 272 | static void ext4_mb_return_to_preallocation(struct inode *inode, |
275 | 273 | struct ext4_buddy *e4b, sector_t block, |
276 | 274 | int count); |
... | ... | @@ -278,6 +276,7 @@ |
278 | 276 | struct super_block *, struct ext4_prealloc_space *pa); |
279 | 277 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); |
280 | 278 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); |
279 | +static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); | |
281 | 280 | |
282 | 281 | |
283 | 282 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) |
fs/jbd2/commit.c
... | ... | @@ -995,6 +995,9 @@ |
995 | 995 | } |
996 | 996 | spin_unlock(&journal->j_list_lock); |
997 | 997 | |
998 | + if (journal->j_commit_callback) | |
999 | + journal->j_commit_callback(journal, commit_transaction); | |
1000 | + | |
998 | 1001 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", |
999 | 1002 | journal->j_devname, commit_transaction->t_tid, |
1000 | 1003 | journal->j_tail_sequence); |
fs/jbd2/transaction.c
... | ... | @@ -52,6 +52,7 @@ |
52 | 52 | transaction->t_expires = jiffies + journal->j_commit_interval; |
53 | 53 | spin_lock_init(&transaction->t_handle_lock); |
54 | 54 | INIT_LIST_HEAD(&transaction->t_inode_list); |
55 | + INIT_LIST_HEAD(&transaction->t_private_list); | |
55 | 56 | |
56 | 57 | /* Set up the commit timer for the new transaction. */ |
57 | 58 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); |
include/linux/jbd2.h
... | ... | @@ -641,6 +641,11 @@ |
641 | 641 | */ |
642 | 642 | int t_handle_count; |
643 | 643 | |
644 | + /* | |
645 | + * For use by the filesystem to store fs-specific data | |
646 | + * structures associated with the transaction | |
647 | + */ | |
648 | + struct list_head t_private_list; | |
644 | 649 | }; |
645 | 650 | |
646 | 651 | struct transaction_run_stats_s { |
... | ... | @@ -934,6 +939,10 @@ |
934 | 939 | int j_wbufsize; |
935 | 940 | |
936 | 941 | pid_t j_last_sync_writer; |
942 | + | |
943 | + /* This function is called when a transaction is closed */ | |
944 | + void (*j_commit_callback)(journal_t *, | |
945 | + transaction_t *); | |
937 | 946 | |
938 | 947 | /* |
939 | 948 | * Journal statistics |