Commit 3e624fc72fba09b6f999a9fbb87b64efccd38036

Authored by Theodore Ts'o
1 parent 22359f5745

ext4: Replace hackish ext4_mb_poll_new_transaction with commit callback

The multiblock allocator needs to be able to release blocks (and issue
a blkdev discard request) when the transaction which freed those
blocks is committed.  Previously this was done via a polling mechanism
when blocks are allocated or freed.  A much better way of doing things
is to create a jbd2 callback function and attaching the list of blocks
to be freed directly to the transaction structure.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

Showing 6 changed files with 29 additions and 75 deletions Side-by-side Diff

... ... @@ -99,9 +99,6 @@
99 99 struct inode *s_buddy_cache;
100 100 long s_blocks_reserved;
101 101 spinlock_t s_reserve_lock;
102   - struct list_head s_active_transaction;
103   - struct list_head s_closed_transaction;
104   - struct list_head s_committed_transaction;
105 102 spinlock_t s_md_lock;
106 103 tid_t s_last_transaction;
107 104 unsigned short *s_mb_offsets, *s_mb_maxs;
... ... @@ -2523,9 +2523,6 @@
2523 2523 }
2524 2524  
2525 2525 spin_lock_init(&sbi->s_md_lock);
2526   - INIT_LIST_HEAD(&sbi->s_active_transaction);
2527   - INIT_LIST_HEAD(&sbi->s_closed_transaction);
2528   - INIT_LIST_HEAD(&sbi->s_committed_transaction);
2529 2526 spin_lock_init(&sbi->s_bal_lock);
2530 2527  
2531 2528 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
... ... @@ -2554,6 +2551,8 @@
2554 2551 ext4_mb_init_per_dev_proc(sb);
2555 2552 ext4_mb_history_init(sb);
2556 2553  
  2554 + sbi->s_journal->j_commit_callback = release_blocks_on_commit;
  2555 +
2557 2556 printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
2558 2557 return 0;
2559 2558 }
... ... @@ -2583,15 +2582,6 @@
2583 2582 struct ext4_group_info *grinfo;
2584 2583 struct ext4_sb_info *sbi = EXT4_SB(sb);
2585 2584  
2586   - /* release freed, non-committed blocks */
2587   - spin_lock(&sbi->s_md_lock);
2588   - list_splice_init(&sbi->s_closed_transaction,
2589   - &sbi->s_committed_transaction);
2590   - list_splice_init(&sbi->s_active_transaction,
2591   - &sbi->s_committed_transaction);
2592   - spin_unlock(&sbi->s_md_lock);
2593   - ext4_mb_free_committed_blocks(sb);
2594   -
2595 2585 if (sbi->s_group_info) {
2596 2586 for (i = 0; i < sbi->s_groups_count; i++) {
2597 2587 grinfo = ext4_get_group_info(sb, i);
2598 2588  
2599 2589  
2600 2590  
2601 2591  
2602 2592  
2603 2593  
... ... @@ -2645,36 +2635,25 @@
2645 2635 return 0;
2646 2636 }
2647 2637  
2648   -static noinline_for_stack void
2649   -ext4_mb_free_committed_blocks(struct super_block *sb)
  2638 +/*
  2639 + * This function is called by the jbd2 layer once the commit has finished,
  2640 + * so we know we can free the blocks that were released with that commit.
  2641 + */
  2642 +static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2650 2643 {
  2644 + struct super_block *sb = journal->j_private;
2651 2645 struct ext4_buddy e4b;
2652 2646 struct ext4_group_info *db;
2653   - struct ext4_sb_info *sbi = EXT4_SB(sb);
2654 2647 int err, count = 0, count2 = 0;
2655 2648 struct ext4_free_data *entry;
2656 2649 ext4_fsblk_t discard_block;
  2650 + struct list_head *l, *ltmp;
2657 2651  
2658   - if (list_empty(&sbi->s_committed_transaction))
2659   - return;
  2652 + list_for_each_safe(l, ltmp, &txn->t_private_list) {
  2653 + entry = list_entry(l, struct ext4_free_data, list);
2660 2654  
2661   - /* there is committed blocks to be freed yet */
2662   - do {
2663   - /* get next array of blocks */
2664   - entry = NULL;
2665   - spin_lock(&sbi->s_md_lock);
2666   - if (!list_empty(&sbi->s_committed_transaction)) {
2667   - entry = list_entry(sbi->s_committed_transaction.next,
2668   - struct ext4_free_data, list);
2669   - list_del(&entry->list);
2670   - }
2671   - spin_unlock(&sbi->s_md_lock);
2672   -
2673   - if (entry == NULL)
2674   - break;
2675   -
2676 2655 mb_debug("gonna free %u blocks in group %lu (0x%p):",
2677   - entry->count, entry->group, entry);
  2656 + entry->count, entry->group, entry);
2678 2657  
2679 2658 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2680 2659 /* we expect to find existing buddy because it's pinned */
... ... @@ -2706,7 +2685,7 @@
2706 2685  
2707 2686 kmem_cache_free(ext4_free_ext_cachep, entry);
2708 2687 ext4_mb_release_desc(&e4b);
2709   - } while (1);
  2688 + }
2710 2689  
2711 2690 mb_debug("freed %u blocks in %u structures\n", count, count2);
2712 2691 }
... ... @@ -4348,8 +4327,6 @@
4348 4327 goto out1;
4349 4328 }
4350 4329  
4351   - ext4_mb_poll_new_transaction(sb, handle);
4352   -
4353 4330 *errp = ext4_mb_initialize_context(ac, ar);
4354 4331 if (*errp) {
4355 4332 ar->len = 0;
4356 4333  
... ... @@ -4408,37 +4385,7 @@
4408 4385  
4409 4386 return block;
4410 4387 }
4411   -static void ext4_mb_poll_new_transaction(struct super_block *sb,
4412   - handle_t *handle)
4413   -{
4414   - struct ext4_sb_info *sbi = EXT4_SB(sb);
4415 4388  
4416   - if (sbi->s_last_transaction == handle->h_transaction->t_tid)
4417   - return;
4418   -
4419   - /* new transaction! time to close last one and free blocks for
4420   - * committed transaction. we know that only transaction can be
4421   - * active, so previos transaction can be being logged and we
4422   - * know that transaction before previous is known to be already
4423   - * logged. this means that now we may free blocks freed in all
4424   - * transactions before previous one. hope I'm clear enough ... */
4425   -
4426   - spin_lock(&sbi->s_md_lock);
4427   - if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
4428   - mb_debug("new transaction %lu, old %lu\n",
4429   - (unsigned long) handle->h_transaction->t_tid,
4430   - (unsigned long) sbi->s_last_transaction);
4431   - list_splice_init(&sbi->s_closed_transaction,
4432   - &sbi->s_committed_transaction);
4433   - list_splice_init(&sbi->s_active_transaction,
4434   - &sbi->s_closed_transaction);
4435   - sbi->s_last_transaction = handle->h_transaction->t_tid;
4436   - }
4437   - spin_unlock(&sbi->s_md_lock);
4438   -
4439   - ext4_mb_free_committed_blocks(sb);
4440   -}
4441   -
4442 4389 /*
4443 4390 * We can merge two free data extents only if the physical blocks
4444 4391 * are contiguous, AND the extents were freed by the same transaction,
4445 4392  
... ... @@ -4531,9 +4478,9 @@
4531 4478 kmem_cache_free(ext4_free_ext_cachep, entry);
4532 4479 }
4533 4480 }
4534   - /* Add the extent to active_transaction list */
  4481 + /* Add the extent to transaction's private list */
4535 4482 spin_lock(&sbi->s_md_lock);
4536   - list_add(&new_entry->list, &sbi->s_active_transaction);
  4483 + list_add(&new_entry->list, &handle->h_transaction->t_private_list);
4537 4484 spin_unlock(&sbi->s_md_lock);
4538 4485 ext4_unlock_group(sb, group);
4539 4486 return 0;
... ... @@ -4561,8 +4508,6 @@
4561 4508 int ret;
4562 4509  
4563 4510 *freed = 0;
4564   -
4565   - ext4_mb_poll_new_transaction(sb, handle);
4566 4511  
4567 4512 sbi = EXT4_SB(sb);
4568 4513 es = EXT4_SB(sb)->s_es;
... ... @@ -269,8 +269,6 @@
269 269  
270 270 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
271 271 ext4_group_t group);
272   -static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
273   -static void ext4_mb_free_committed_blocks(struct super_block *);
274 272 static void ext4_mb_return_to_preallocation(struct inode *inode,
275 273 struct ext4_buddy *e4b, sector_t block,
276 274 int count);
... ... @@ -278,6 +276,7 @@
278 276 struct super_block *, struct ext4_prealloc_space *pa);
279 277 static int ext4_mb_init_per_dev_proc(struct super_block *sb);
280 278 static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
  279 +static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
281 280  
282 281  
283 282 static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
... ... @@ -995,6 +995,9 @@
995 995 }
996 996 spin_unlock(&journal->j_list_lock);
997 997  
  998 + if (journal->j_commit_callback)
  999 + journal->j_commit_callback(journal, commit_transaction);
  1000 +
998 1001 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
999 1002 journal->j_devname, commit_transaction->t_tid,
1000 1003 journal->j_tail_sequence);
fs/jbd2/transaction.c
... ... @@ -52,6 +52,7 @@
52 52 transaction->t_expires = jiffies + journal->j_commit_interval;
53 53 spin_lock_init(&transaction->t_handle_lock);
54 54 INIT_LIST_HEAD(&transaction->t_inode_list);
  55 + INIT_LIST_HEAD(&transaction->t_private_list);
55 56  
56 57 /* Set up the commit timer for the new transaction. */
57 58 journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
include/linux/jbd2.h
... ... @@ -641,6 +641,11 @@
641 641 */
642 642 int t_handle_count;
643 643  
  644 + /*
  645 + * For use by the filesystem to store fs-specific data
  646 + * structures associated with the transaction
  647 + */
  648 + struct list_head t_private_list;
644 649 };
645 650  
646 651 struct transaction_run_stats_s {
... ... @@ -934,6 +939,10 @@
934 939 int j_wbufsize;
935 940  
936 941 pid_t j_last_sync_writer;
  942 +
  943 + /* This function is called when a transaction is closed */
  944 + void (*j_commit_callback)(journal_t *,
  945 + transaction_t *);
937 946  
938 947 /*
939 948 * Journal statistics