Commit 8cef4e160d74920ad1725f58c89fd75ec4c4ac38

Authored by Yan, Zheng
Committed by Chris Mason
1 parent 22763c5cf3

Btrfs: Avoid superfluous tree-log writeout

We allow two log transactions at a time, but use same flag
to mark dirty tree-log btree blocks. So we may flush dirty
blocks belonging to newer log transaction when committing a
log transaction. This patch fixes the issue by using two
flags to mark dirty tree-log btree blocks.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>

Showing 5 changed files with 47 additions and 31 deletions Side-by-side Diff

... ... @@ -980,12 +980,12 @@
980 980  
981 981 while (1) {
982 982 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
983   - 0, &start, &end, EXTENT_DIRTY);
  983 + 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
984 984 if (ret)
985 985 break;
986 986  
987   - clear_extent_dirty(&log_root_tree->dirty_log_pages,
988   - start, end, GFP_NOFS);
  987 + clear_extent_bits(&log_root_tree->dirty_log_pages, start, end,
  988 + EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
989 989 }
990 990 eb = fs_info->log_root_tree->node;
991 991  
fs/btrfs/extent-tree.c
... ... @@ -4919,8 +4919,16 @@
4919 4919 btrfs_set_buffer_uptodate(buf);
4920 4920  
4921 4921 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4922   - set_extent_dirty(&root->dirty_log_pages, buf->start,
4923   - buf->start + buf->len - 1, GFP_NOFS);
  4922 + /*
  4923 + * we allow two log transactions at a time, use different
  4924 + * EXENT bit to differentiate dirty pages.
  4925 + */
  4926 + if (root->log_transid % 2 == 0)
  4927 + set_extent_dirty(&root->dirty_log_pages, buf->start,
  4928 + buf->start + buf->len - 1, GFP_NOFS);
  4929 + else
  4930 + set_extent_new(&root->dirty_log_pages, buf->start,
  4931 + buf->start + buf->len - 1, GFP_NOFS);
4924 4932 } else {
4925 4933 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4926 4934 buf->start + buf->len - 1, GFP_NOFS);
fs/btrfs/transaction.c
... ... @@ -354,7 +354,7 @@
354 354 * those extents are sent to disk but does not wait on them
355 355 */
356 356 int btrfs_write_marked_extents(struct btrfs_root *root,
357   - struct extent_io_tree *dirty_pages)
  357 + struct extent_io_tree *dirty_pages, int mark)
358 358 {
359 359 int ret;
360 360 int err = 0;
... ... @@ -367,7 +367,7 @@
367 367  
368 368 while (1) {
369 369 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
370   - EXTENT_DIRTY);
  370 + mark);
371 371 if (ret)
372 372 break;
373 373 while (start <= end) {
... ... @@ -413,7 +413,7 @@
413 413 * on all the pages and clear them from the dirty pages state tree
414 414 */
415 415 int btrfs_wait_marked_extents(struct btrfs_root *root,
416   - struct extent_io_tree *dirty_pages)
  416 + struct extent_io_tree *dirty_pages, int mark)
417 417 {
418 418 int ret;
419 419 int err = 0;
420 420  
... ... @@ -425,12 +425,12 @@
425 425 unsigned long index;
426 426  
427 427 while (1) {
428   - ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
429   - EXTENT_DIRTY);
  428 + ret = find_first_extent_bit(dirty_pages, start, &start, &end,
  429 + mark);
430 430 if (ret)
431 431 break;
432 432  
433   - clear_extent_dirty(dirty_pages, start, end, GFP_NOFS);
  433 + clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
434 434 while (start <= end) {
435 435 index = start >> PAGE_CACHE_SHIFT;
436 436 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
437 437  
... ... @@ -460,13 +460,13 @@
460 460 * those extents are on disk for transaction or log commit
461 461 */
462 462 int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
463   - struct extent_io_tree *dirty_pages)
  463 + struct extent_io_tree *dirty_pages, int mark)
464 464 {
465 465 int ret;
466 466 int ret2;
467 467  
468   - ret = btrfs_write_marked_extents(root, dirty_pages);
469   - ret2 = btrfs_wait_marked_extents(root, dirty_pages);
  468 + ret = btrfs_write_marked_extents(root, dirty_pages, mark);
  469 + ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
470 470 return ret || ret2;
471 471 }
472 472  
... ... @@ -479,7 +479,8 @@
479 479 return filemap_write_and_wait(btree_inode->i_mapping);
480 480 }
481 481 return btrfs_write_and_wait_marked_extents(root,
482   - &trans->transaction->dirty_pages);
  482 + &trans->transaction->dirty_pages,
  483 + EXTENT_DIRTY);
483 484 }
484 485  
485 486 /*
fs/btrfs/transaction.h
... ... @@ -107,11 +107,11 @@
107 107 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
108 108 struct btrfs_root *root);
109 109 int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
110   - struct extent_io_tree *dirty_pages);
  110 + struct extent_io_tree *dirty_pages, int mark);
111 111 int btrfs_write_marked_extents(struct btrfs_root *root,
112   - struct extent_io_tree *dirty_pages);
  112 + struct extent_io_tree *dirty_pages, int mark);
113 113 int btrfs_wait_marked_extents(struct btrfs_root *root,
114   - struct extent_io_tree *dirty_pages);
  114 + struct extent_io_tree *dirty_pages, int mark);
115 115 int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
116 116 #endif
... ... @@ -1977,10 +1977,11 @@
1977 1977 {
1978 1978 int index1;
1979 1979 int index2;
  1980 + int mark;
1980 1981 int ret;
1981 1982 struct btrfs_root *log = root->log_root;
1982 1983 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
1983   - u64 log_transid = 0;
  1984 + unsigned long log_transid = 0;
1984 1985  
1985 1986 mutex_lock(&root->log_mutex);
1986 1987 index1 = root->log_transid % 2;
1987 1988  
1988 1989  
1989 1990  
... ... @@ -2014,24 +2015,29 @@
2014 2015 goto out;
2015 2016 }
2016 2017  
  2018 + log_transid = root->log_transid;
  2019 + if (log_transid % 2 == 0)
  2020 + mark = EXTENT_DIRTY;
  2021 + else
  2022 + mark = EXTENT_NEW;
  2023 +
2017 2024 /* we start IO on all the marked extents here, but we don't actually
2018 2025 * wait for them until later.
2019 2026 */
2020   - ret = btrfs_write_marked_extents(log, &log->dirty_log_pages);
  2027 + ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
2021 2028 BUG_ON(ret);
2022 2029  
2023 2030 btrfs_set_root_node(&log->root_item, log->node);
2024 2031  
2025 2032 root->log_batch = 0;
2026   - log_transid = root->log_transid;
2027 2033 root->log_transid++;
2028 2034 log->log_transid = root->log_transid;
2029 2035 root->log_start_pid = 0;
2030 2036 smp_mb();
2031 2037 /*
2032   - * log tree has been flushed to disk, new modifications of
2033   - * the log will be written to new positions. so it's safe to
2034   - * allow log writers to go in.
  2038 + * IO has been started, blocks of the log tree have WRITTEN flag set
  2039 + * in their headers. new modifications of the log will be written to
  2040 + * new positions. so it's safe to allow log writers to go in.
2035 2041 */
2036 2042 mutex_unlock(&root->log_mutex);
2037 2043  
... ... @@ -2052,7 +2058,7 @@
2052 2058  
2053 2059 index2 = log_root_tree->log_transid % 2;
2054 2060 if (atomic_read(&log_root_tree->log_commit[index2])) {
2055   - btrfs_wait_marked_extents(log, &log->dirty_log_pages);
  2061 + btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2056 2062 wait_log_commit(trans, log_root_tree,
2057 2063 log_root_tree->log_transid);
2058 2064 mutex_unlock(&log_root_tree->log_mutex);
2059 2065  
2060 2066  
... ... @@ -2072,16 +2078,17 @@
2072 2078 * check the full commit flag again
2073 2079 */
2074 2080 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2075   - btrfs_wait_marked_extents(log, &log->dirty_log_pages);
  2081 + btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2076 2082 mutex_unlock(&log_root_tree->log_mutex);
2077 2083 ret = -EAGAIN;
2078 2084 goto out_wake_log_root;
2079 2085 }
2080 2086  
2081 2087 ret = btrfs_write_and_wait_marked_extents(log_root_tree,
2082   - &log_root_tree->dirty_log_pages);
  2088 + &log_root_tree->dirty_log_pages,
  2089 + EXTENT_DIRTY | EXTENT_NEW);
2083 2090 BUG_ON(ret);
2084   - btrfs_wait_marked_extents(log, &log->dirty_log_pages);
  2091 + btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2085 2092  
2086 2093 btrfs_set_super_log_root(&root->fs_info->super_for_commit,
2087 2094 log_root_tree->node->start);
2088 2095  
... ... @@ -2147,12 +2154,12 @@
2147 2154  
2148 2155 while (1) {
2149 2156 ret = find_first_extent_bit(&log->dirty_log_pages,
2150   - 0, &start, &end, EXTENT_DIRTY);
  2157 + 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
2151 2158 if (ret)
2152 2159 break;
2153 2160  
2154   - clear_extent_dirty(&log->dirty_log_pages,
2155   - start, end, GFP_NOFS);
  2161 + clear_extent_bits(&log->dirty_log_pages, start, end,
  2162 + EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
2156 2163 }
2157 2164  
2158 2165 if (log->log_transid > 0) {