Commit b6e51316daede0633e9274e1e30391cfa4747877

Authored by Jens Axboe
1 parent bcddc3f01c

writeback: separate starting of sync vs opportunistic writeback

bdi_start_writeback() is currently split into two paths, one for
WB_SYNC_NONE and one for WB_SYNC_ALL. Add bdi_sync_writeback()
for WB_SYNC_ALL writeback and let bdi_start_writeback() handle
only WB_SYNC_NONE.

Push down the writeback_control allocation and only accept the
parameters that make sense for each function. This cleans up
the API considerably.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

Showing 5 changed files with 75 additions and 95 deletions Side-by-side Diff

... ... @@ -74,14 +74,10 @@
74 74 }
75 75  
76 76 static inline void bdi_work_init(struct bdi_work *work,
77   - struct writeback_control *wbc)
  77 + struct wb_writeback_args *args)
78 78 {
79 79 INIT_RCU_HEAD(&work->rcu_head);
80   - work->args.sb = wbc->sb;
81   - work->args.nr_pages = wbc->nr_to_write;
82   - work->args.sync_mode = wbc->sync_mode;
83   - work->args.range_cyclic = wbc->range_cyclic;
84   - work->args.for_kupdate = 0;
  80 + work->args = *args;
85 81 work->state = WS_USED;
86 82 }
87 83  
... ... @@ -194,7 +190,7 @@
194 190 }
195 191  
196 192 static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
197   - struct writeback_control *wbc)
  193 + struct wb_writeback_args *args)
198 194 {
199 195 struct bdi_work *work;
200 196  
... ... @@ -204,7 +200,7 @@
204 200 */
205 201 work = kmalloc(sizeof(*work), GFP_ATOMIC);
206 202 if (work) {
207   - bdi_work_init(work, wbc);
  203 + bdi_work_init(work, args);
208 204 bdi_queue_work(bdi, work);
209 205 } else {
210 206 struct bdi_writeback *wb = &bdi->wb;
211 207  
212 208  
213 209  
214 210  
... ... @@ -214,26 +210,56 @@
214 210 }
215 211 }
216 212  
217   -void bdi_start_writeback(struct writeback_control *wbc)
  213 +/**
  214 + * bdi_sync_writeback - start and wait for writeback
  215 + * @bdi: the backing device to write from
  216 + * @sb: write inodes from this super_block
  217 + *
  218 + * Description:
  219 + * This does WB_SYNC_ALL data integrity writeback and waits for the
  220 + * IO to complete. Callers must hold the sb s_umount semaphore for
  221 + * reading, to avoid having the super disappear before we are done.
  222 + */
  223 +static void bdi_sync_writeback(struct backing_dev_info *bdi,
  224 + struct super_block *sb)
218 225 {
219   - /*
220   - * WB_SYNC_NONE is opportunistic writeback. If this allocation fails,
221   - * bdi_queue_work() will wake up the thread and flush old data. This
222   - * should ensure some amount of progress in freeing memory.
223   - */
224   - if (wbc->sync_mode != WB_SYNC_ALL)
225   - bdi_alloc_queue_work(wbc->bdi, wbc);
226   - else {
227   - struct bdi_work work;
  226 + struct wb_writeback_args args = {
  227 + .sb = sb,
  228 + .sync_mode = WB_SYNC_ALL,
  229 + .nr_pages = LONG_MAX,
  230 + .range_cyclic = 0,
  231 + };
  232 + struct bdi_work work;
228 233  
229   - bdi_work_init(&work, wbc);
230   - work.state |= WS_ONSTACK;
  234 + bdi_work_init(&work, &args);
  235 + work.state |= WS_ONSTACK;
231 236  
232   - bdi_queue_work(wbc->bdi, &work);
233   - bdi_wait_on_work_clear(&work);
234   - }
  237 + bdi_queue_work(bdi, &work);
  238 + bdi_wait_on_work_clear(&work);
235 239 }
236 240  
  241 +/**
  242 + * bdi_start_writeback - start writeback
  243 + * @bdi: the backing device to write from
  244 + * @nr_pages: the number of pages to write
  245 + *
  246 + * Description:
  247 + * This does WB_SYNC_NONE opportunistic writeback. The IO is only
  248 + * started when this function returns, we make no guarentees on
  249 + * completion. Caller need not hold sb s_umount semaphore.
  250 + *
  251 + */
  252 +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
  253 +{
  254 + struct wb_writeback_args args = {
  255 + .sync_mode = WB_SYNC_NONE,
  256 + .nr_pages = nr_pages,
  257 + .range_cyclic = 1,
  258 + };
  259 +
  260 + bdi_alloc_queue_work(bdi, &args);
  261 +}
  262 +
237 263 /*
238 264 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
239 265 * furthest end of its superblock's dirty-inode list.
240 266  
241 267  
242 268  
243 269  
... ... @@ -863,23 +889,25 @@
863 889 }
864 890  
865 891 /*
866   - * Schedule writeback for all backing devices. Can only be used for
867   - * WB_SYNC_NONE writeback, WB_SYNC_ALL should use bdi_start_writeback()
868   - * and pass in the superblock.
  892 + * Schedule writeback for all backing devices. This does WB_SYNC_NONE
  893 + * writeback, for integrity writeback see bdi_sync_writeback().
869 894 */
870   -static void bdi_writeback_all(struct writeback_control *wbc)
  895 +static void bdi_writeback_all(struct super_block *sb, long nr_pages)
871 896 {
  897 + struct wb_writeback_args args = {
  898 + .sb = sb,
  899 + .nr_pages = nr_pages,
  900 + .sync_mode = WB_SYNC_NONE,
  901 + };
872 902 struct backing_dev_info *bdi;
873 903  
874   - WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
875   -
876 904 rcu_read_lock();
877 905  
878 906 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
879 907 if (!bdi_has_dirty_io(bdi))
880 908 continue;
881 909  
882   - bdi_alloc_queue_work(bdi, wbc);
  910 + bdi_alloc_queue_work(bdi, &args);
883 911 }
884 912  
885 913 rcu_read_unlock();
886 914  
... ... @@ -891,17 +919,10 @@
891 919 */
892 920 void wakeup_flusher_threads(long nr_pages)
893 921 {
894   - struct writeback_control wbc = {
895   - .sync_mode = WB_SYNC_NONE,
896   - .older_than_this = NULL,
897   - .range_cyclic = 1,
898   - };
899   -
900 922 if (nr_pages == 0)
901 923 nr_pages = global_page_state(NR_FILE_DIRTY) +
902 924 global_page_state(NR_UNSTABLE_NFS);
903   - wbc.nr_to_write = nr_pages;
904   - bdi_writeback_all(&wbc);
  925 + bdi_writeback_all(NULL, nr_pages);
905 926 }
906 927  
907 928 static noinline void block_dump___mark_inode_dirty(struct inode *inode)
... ... @@ -1048,7 +1069,7 @@
1048 1069 * on the writer throttling path, and we get decent balancing between many
1049 1070 * throttled threads: we don't want them all piling up on inode_sync_wait.
1050 1071 */
1051   -static void wait_sb_inodes(struct writeback_control *wbc)
  1072 +static void wait_sb_inodes(struct super_block *sb)
1052 1073 {
1053 1074 struct inode *inode, *old_inode = NULL;
1054 1075  
... ... @@ -1056,7 +1077,7 @@
1056 1077 * We need to be protected against the filesystem going from
1057 1078 * r/o to r/w or vice versa.
1058 1079 */
1059   - WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount));
  1080 + WARN_ON(!rwsem_is_locked(&sb->s_umount));
1060 1081  
1061 1082 spin_lock(&inode_lock);
1062 1083  
... ... @@ -1067,7 +1088,7 @@
1067 1088 * In which case, the inode may not be on the dirty list, but
1068 1089 * we still have to wait for that writeout.
1069 1090 */
1070   - list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) {
  1091 + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
1071 1092 struct address_space *mapping;
1072 1093  
1073 1094 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
1074 1095  
... ... @@ -1107,14 +1128,8 @@
1107 1128 * for IO completion of submitted IO. The number of pages submitted is
1108 1129 * returned.
1109 1130 */
1110   -long writeback_inodes_sb(struct super_block *sb)
  1131 +void writeback_inodes_sb(struct super_block *sb)
1111 1132 {
1112   - struct writeback_control wbc = {
1113   - .sb = sb,
1114   - .sync_mode = WB_SYNC_NONE,
1115   - .range_start = 0,
1116   - .range_end = LLONG_MAX,
1117   - };
1118 1133 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1119 1134 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1120 1135 long nr_to_write;
... ... @@ -1122,9 +1137,7 @@
1122 1137 nr_to_write = nr_dirty + nr_unstable +
1123 1138 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1124 1139  
1125   - wbc.nr_to_write = nr_to_write;
1126   - bdi_writeback_all(&wbc);
1127   - return nr_to_write - wbc.nr_to_write;
  1140 + bdi_writeback_all(sb, nr_to_write);
1128 1141 }
1129 1142 EXPORT_SYMBOL(writeback_inodes_sb);
1130 1143  
1131 1144  
... ... @@ -1135,21 +1148,10 @@
1135 1148 * This function writes and waits on any dirty inode belonging to this
1136 1149 * super_block. The number of pages synced is returned.
1137 1150 */
1138   -long sync_inodes_sb(struct super_block *sb)
  1151 +void sync_inodes_sb(struct super_block *sb)
1139 1152 {
1140   - struct writeback_control wbc = {
1141   - .sb = sb,
1142   - .bdi = sb->s_bdi,
1143   - .sync_mode = WB_SYNC_ALL,
1144   - .range_start = 0,
1145   - .range_end = LLONG_MAX,
1146   - };
1147   - long nr_to_write = LONG_MAX; /* doesn't actually matter */
1148   -
1149   - wbc.nr_to_write = nr_to_write;
1150   - bdi_start_writeback(&wbc);
1151   - wait_sb_inodes(&wbc);
1152   - return nr_to_write - wbc.nr_to_write;
  1153 + bdi_sync_writeback(sb->s_bdi, sb);
  1154 + wait_sb_inodes(sb);
1153 1155 }
1154 1156 EXPORT_SYMBOL(sync_inodes_sb);
1155 1157  
... ... @@ -54,29 +54,15 @@
54 54 * @nr_to_write: how many dirty pages to write-back
55 55 *
56 56 * This function shrinks UBIFS liability by means of writing back some amount
57   - * of dirty inodes and their pages. Returns the amount of pages which were
58   - * written back. The returned value does not include dirty inodes which were
59   - * synchronized.
  57 + * of dirty inodes and their pages.
60 58 *
61 59 * Note, this function synchronizes even VFS inodes which are locked
62 60 * (@i_mutex) by the caller of the budgeting function, because write-back does
63 61 * not touch @i_mutex.
64 62 */
65   -static int shrink_liability(struct ubifs_info *c, int nr_to_write)
  63 +static void shrink_liability(struct ubifs_info *c, int nr_to_write)
66 64 {
67   - int nr_written;
68   -
69   - nr_written = writeback_inodes_sb(c->vfs_sb);
70   - if (!nr_written) {
71   - /*
72   - * Re-try again but wait on pages/inodes which are being
73   - * written-back concurrently (e.g., by pdflush).
74   - */
75   - nr_written = sync_inodes_sb(c->vfs_sb);
76   - }
77   -
78   - dbg_budg("%d pages were written back", nr_written);
79   - return nr_written;
  65 + writeback_inodes_sb(c->vfs_sb);
80 66 }
81 67  
82 68 /**
include/linux/backing-dev.h
... ... @@ -101,7 +101,7 @@
101 101 const char *fmt, ...);
102 102 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
103 103 void bdi_unregister(struct backing_dev_info *bdi);
104   -void bdi_start_writeback(struct writeback_control *wbc);
  104 +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
105 105 int bdi_writeback_task(struct bdi_writeback *wb);
106 106 int bdi_has_dirty_io(struct backing_dev_info *bdi);
107 107  
include/linux/writeback.h
... ... @@ -68,8 +68,8 @@
68 68 */
69 69 struct bdi_writeback;
70 70 int inode_wait(void *);
71   -long writeback_inodes_sb(struct super_block *);
72   -long sync_inodes_sb(struct super_block *);
  71 +void writeback_inodes_sb(struct super_block *);
  72 +void sync_inodes_sb(struct super_block *);
73 73 void writeback_inodes_wbc(struct writeback_control *wbc);
74 74 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
75 75 void wakeup_flusher_threads(long nr_pages);
... ... @@ -582,16 +582,8 @@
582 582 if ((laptop_mode && pages_written) ||
583 583 (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY)
584 584 + global_page_state(NR_UNSTABLE_NFS))
585   - > background_thresh))) {
586   - struct writeback_control wbc = {
587   - .bdi = bdi,
588   - .sync_mode = WB_SYNC_NONE,
589   - .nr_to_write = nr_writeback,
590   - };
591   -
592   -
593   - bdi_start_writeback(&wbc);
594   - }
  585 + > background_thresh)))
  586 + bdi_start_writeback(bdi, nr_writeback);
595 587 }
596 588  
597 589 void set_page_dirty_balance(struct page *page, int page_mkwrite)