Blame view

fs/fs-writeback.c 35.4 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
  /*
   * fs/fs-writeback.c
   *
   * Copyright (C) 2002, Linus Torvalds.
   *
   * Contains all the functions related to writing back and waiting
   * upon dirty inodes against superblocks, and writing back dirty
   * pages against inodes.  ie: data writeback.  Writeout of the
   * inode itself is not handled here.
   *
e1f8e8744   Francois Cami   Remove Andrew Mor...
11
   * 10Apr2002	Andrew Morton
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
15
16
   *		Split out of fs/inode.c
   *		Additions for address_space-based writeback
   */
  
  #include <linux/kernel.h>
f5ff8422b   Jens Axboe   Fix warnings with...
17
  #include <linux/module.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include <linux/spinlock.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
19
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
21
22
  #include <linux/sched.h>
  #include <linux/fs.h>
  #include <linux/mm.h>
03ba3782e   Jens Axboe   writeback: switch...
23
24
  #include <linux/kthread.h>
  #include <linux/freezer.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
28
  #include <linux/writeback.h>
  #include <linux/blkdev.h>
  #include <linux/backing-dev.h>
  #include <linux/buffer_head.h>
455b28646   Dave Chinner   writeback: Initia...
29
  #include <linux/tracepoint.h>
07f3f05c1   David Howells   [PATCH] BLOCK: Mo...
30
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31

d0bceac74   Jens Axboe   writeback: get ri...
32
  /*
c4a77a6c7   Jens Axboe   writeback: make w...
33
34
   * Passed into wb_writeback(), essentially a subset of writeback_control
   */
83ba7b071   Christoph Hellwig   writeback: simpli...
35
  struct wb_writeback_work {
c4a77a6c7   Jens Axboe   writeback: make w...
36
37
38
  	long nr_pages;
  	struct super_block *sb;
  	enum writeback_sync_modes sync_mode;
52957fe1c   H Hartley Sweeten   fs-writeback.c: b...
39
40
41
  	unsigned int for_kupdate:1;
  	unsigned int range_cyclic:1;
  	unsigned int for_background:1;
c4a77a6c7   Jens Axboe   writeback: make w...
42

8010c3b63   Jens Axboe   writeback: add co...
43
  	struct list_head list;		/* pending work list */
83ba7b071   Christoph Hellwig   writeback: simpli...
44
  	struct completion *done;	/* set if the caller waits */
03ba3782e   Jens Axboe   writeback: switch...
45
  };
455b28646   Dave Chinner   writeback: Initia...
46
47
48
49
50
51
52
  /*
   * Include the creation of the trace points after defining the
   * wb_writeback_work structure so that the definition remains local to this
   * file.
   */
  #define CREATE_TRACE_POINTS
  #include <trace/events/writeback.h>
455b28646   Dave Chinner   writeback: Initia...
53
54
55
56
  /*
   * We don't actually have pdflush, but this one is exported though /proc...
   */
  int nr_pdflush_threads;
f11b00f3b   Adrian Bunk   fs/fs-writeback.c...
57
58
59
60
  /**
   * writeback_in_progress - determine whether there is writeback in progress
   * @bdi: the device's backing_dev_info structure.
   *
03ba3782e   Jens Axboe   writeback: switch...
61
62
   * Determine whether there is writeback waiting to be handled against a
   * backing device.
f11b00f3b   Adrian Bunk   fs/fs-writeback.c...
63
64
65
   */
  int writeback_in_progress(struct backing_dev_info *bdi)
  {
81d73a32d   Jan Kara   mm: fix writeback...
66
  	return test_bit(BDI_writeback_running, &bdi->state);
f11b00f3b   Adrian Bunk   fs/fs-writeback.c...
67
  }
692ebd17c   Jan Kara   bdi: Fix warnings...
68
69
70
  static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
  {
  	struct super_block *sb = inode->i_sb;
692ebd17c   Jan Kara   bdi: Fix warnings...
71

aaead25b9   Christoph Hellwig   writeback: always...
72
73
74
75
  	if (strcmp(sb->s_type->name, "bdev") == 0)
  		return inode->i_mapping->backing_dev_info;
  
  	return sb->s_bdi;
692ebd17c   Jan Kara   bdi: Fix warnings...
76
  }
7ccf19a80   Nick Piggin   fs: inode split I...
77
78
79
80
  static inline struct inode *wb_inode(struct list_head *head)
  {
  	return list_entry(head, struct inode, i_wb_list);
  }
6585027a5   Jan Kara   writeback: integr...
81
82
  /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
  static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
03ba3782e   Jens Axboe   writeback: switch...
83
  {
fff5b85aa   Artem Bityutskiy   writeback: move b...
84
85
86
87
88
89
90
  	if (bdi->wb.task) {
  		wake_up_process(bdi->wb.task);
  	} else {
  		/*
  		 * The bdi thread isn't there, wake up the forker thread which
  		 * will create and run it.
  		 */
03ba3782e   Jens Axboe   writeback: switch...
91
  		wake_up_process(default_backing_dev_info.wb.task);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
92
  	}
6585027a5   Jan Kara   writeback: integr...
93
94
95
96
97
98
99
100
101
102
103
104
  }
  
  static void bdi_queue_work(struct backing_dev_info *bdi,
  			   struct wb_writeback_work *work)
  {
  	trace_writeback_queue(bdi, work);
  
  	spin_lock_bh(&bdi->wb_lock);
  	list_add_tail(&work->list, &bdi->work_list);
  	if (!bdi->wb.task)
  		trace_writeback_nothread(bdi, work);
  	bdi_wakeup_flusher(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
105
  	spin_unlock_bh(&bdi->wb_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
106
  }
83ba7b071   Christoph Hellwig   writeback: simpli...
107
108
  static void
  __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
6585027a5   Jan Kara   writeback: integr...
109
  		      bool range_cyclic)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
110
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
111
  	struct wb_writeback_work *work;
03ba3782e   Jens Axboe   writeback: switch...
112

bcddc3f01   Jens Axboe   writeback: inline...
113
114
115
116
  	/*
  	 * This is WB_SYNC_NONE writeback, so if allocation fails just
  	 * wakeup the thread for old dirty data writeback
  	 */
83ba7b071   Christoph Hellwig   writeback: simpli...
117
118
  	work = kzalloc(sizeof(*work), GFP_ATOMIC);
  	if (!work) {
455b28646   Dave Chinner   writeback: Initia...
119
120
  		if (bdi->wb.task) {
  			trace_writeback_nowork(bdi);
83ba7b071   Christoph Hellwig   writeback: simpli...
121
  			wake_up_process(bdi->wb.task);
455b28646   Dave Chinner   writeback: Initia...
122
  		}
83ba7b071   Christoph Hellwig   writeback: simpli...
123
  		return;
bcddc3f01   Jens Axboe   writeback: inline...
124
  	}
03ba3782e   Jens Axboe   writeback: switch...
125

83ba7b071   Christoph Hellwig   writeback: simpli...
126
127
128
  	work->sync_mode	= WB_SYNC_NONE;
  	work->nr_pages	= nr_pages;
  	work->range_cyclic = range_cyclic;
03ba3782e   Jens Axboe   writeback: switch...
129

83ba7b071   Christoph Hellwig   writeback: simpli...
130
  	bdi_queue_work(bdi, work);
b6e51316d   Jens Axboe   writeback: separa...
131
132
133
134
135
136
137
138
139
  }
  
  /**
   * bdi_start_writeback - start writeback
   * @bdi: the backing device to write from
   * @nr_pages: the number of pages to write
   *
   * Description:
   *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
25985edce   Lucas De Marchi   Fix common misspe...
140
   *   started when this function returns, we make no guarantees on
0e3c9a228   Jens Axboe   Revert "writeback...
141
   *   completion. Caller need not hold sb s_umount semaphore.
b6e51316d   Jens Axboe   writeback: separa...
142
143
   *
   */
c5444198c   Christoph Hellwig   writeback: simpli...
144
  void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
b6e51316d   Jens Axboe   writeback: separa...
145
  {
6585027a5   Jan Kara   writeback: integr...
146
  	__bdi_start_writeback(bdi, nr_pages, true);
c5444198c   Christoph Hellwig   writeback: simpli...
147
  }
d3ddec763   Wu Fengguang   writeback: stop b...
148

c5444198c   Christoph Hellwig   writeback: simpli...
149
150
151
152
153
  /**
   * bdi_start_background_writeback - start background writeback
   * @bdi: the backing device to write from
   *
   * Description:
6585027a5   Jan Kara   writeback: integr...
154
155
156
157
   *   This makes sure WB_SYNC_NONE background writeback happens. When
   *   this function returns, it is only guaranteed that for given BDI
   *   some IO is happening if we are over background dirty threshold.
   *   Caller need not hold sb s_umount semaphore.
c5444198c   Christoph Hellwig   writeback: simpli...
158
159
160
   */
  void bdi_start_background_writeback(struct backing_dev_info *bdi)
  {
6585027a5   Jan Kara   writeback: integr...
161
162
163
164
  	/*
  	 * We just wake up the flusher thread. It will perform background
  	 * writeback as soon as there is no other work to do.
  	 */
71927e84e   Wu Fengguang   writeback: trace ...
165
  	trace_writeback_wake_background(bdi);
6585027a5   Jan Kara   writeback: integr...
166
167
168
  	spin_lock_bh(&bdi->wb_lock);
  	bdi_wakeup_flusher(bdi);
  	spin_unlock_bh(&bdi->wb_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
169
170
171
  }
  
  /*
a66979aba   Dave Chinner   fs: move i_wb_lis...
172
173
174
175
176
177
178
179
180
181
182
   * Remove the inode from the writeback list it is on.
   */
  void inode_wb_list_del(struct inode *inode)
  {
  	spin_lock(&inode_wb_list_lock);
  	list_del_init(&inode->i_wb_list);
  	spin_unlock(&inode_wb_list_lock);
  }
  
  
  /*
6610a0bc8   Andrew Morton   writeback: fix ti...
183
184
185
186
   * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
   * furthest end of its superblock's dirty-inode list.
   *
   * Before stamping the inode's ->dirtied_when, we check to see whether it is
66f3b8e2e   Jens Axboe   writeback: move d...
187
   * already the most-recently-dirtied inode on the b_dirty list.  If that is
6610a0bc8   Andrew Morton   writeback: fix ti...
188
189
190
191
192
   * the case then the inode must have been redirtied while it was being written
   * out and we don't reset its dirtied_when.
   */
  static void redirty_tail(struct inode *inode)
  {
03ba3782e   Jens Axboe   writeback: switch...
193
  	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
6610a0bc8   Andrew Morton   writeback: fix ti...
194

a66979aba   Dave Chinner   fs: move i_wb_lis...
195
  	assert_spin_locked(&inode_wb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
196
  	if (!list_empty(&wb->b_dirty)) {
66f3b8e2e   Jens Axboe   writeback: move d...
197
  		struct inode *tail;
6610a0bc8   Andrew Morton   writeback: fix ti...
198

7ccf19a80   Nick Piggin   fs: inode split I...
199
  		tail = wb_inode(wb->b_dirty.next);
66f3b8e2e   Jens Axboe   writeback: move d...
200
  		if (time_before(inode->dirtied_when, tail->dirtied_when))
6610a0bc8   Andrew Morton   writeback: fix ti...
201
202
  			inode->dirtied_when = jiffies;
  	}
7ccf19a80   Nick Piggin   fs: inode split I...
203
  	list_move(&inode->i_wb_list, &wb->b_dirty);
6610a0bc8   Andrew Morton   writeback: fix ti...
204
205
206
  }
  
  /*
66f3b8e2e   Jens Axboe   writeback: move d...
207
   * requeue inode for re-scanning after bdi->b_io list is exhausted.
c986d1e2a   Andrew Morton   writeback: fix ti...
208
   */
0e0f4fc22   Ken Chen   writeback: fix pe...
209
  static void requeue_io(struct inode *inode)
c986d1e2a   Andrew Morton   writeback: fix ti...
210
  {
03ba3782e   Jens Axboe   writeback: switch...
211
  	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
a66979aba   Dave Chinner   fs: move i_wb_lis...
212
  	assert_spin_locked(&inode_wb_list_lock);
7ccf19a80   Nick Piggin   fs: inode split I...
213
  	list_move(&inode->i_wb_list, &wb->b_more_io);
c986d1e2a   Andrew Morton   writeback: fix ti...
214
  }
1c0eeaf56   Joern Engel   introduce I_SYNC
215
216
217
  static void inode_sync_complete(struct inode *inode)
  {
  	/*
a66979aba   Dave Chinner   fs: move i_wb_lis...
218
219
  	 * Prevent speculative execution through
  	 * spin_unlock(&inode_wb_list_lock);
1c0eeaf56   Joern Engel   introduce I_SYNC
220
  	 */
a66979aba   Dave Chinner   fs: move i_wb_lis...
221

1c0eeaf56   Joern Engel   introduce I_SYNC
222
223
224
  	smp_mb();
  	wake_up_bit(&inode->i_state, __I_SYNC);
  }
d2caa3c54   Jeff Layton   writeback: guard ...
225
226
227
228
229
230
231
232
  static bool inode_dirtied_after(struct inode *inode, unsigned long t)
  {
  	bool ret = time_after(inode->dirtied_when, t);
  #ifndef CONFIG_64BIT
  	/*
  	 * For inodes being constantly redirtied, dirtied_when can get stuck.
  	 * It _appears_ to be in the future, but is actually in distant past.
  	 * This test is necessary to prevent such wrapped-around relative times
5b0830cb9   Jens Axboe   writeback: get ri...
233
  	 * from permanently stopping the whole bdi writeback.
d2caa3c54   Jeff Layton   writeback: guard ...
234
235
236
237
238
  	 */
  	ret = ret && time_before_eq(inode->dirtied_when, jiffies);
  #endif
  	return ret;
  }
c986d1e2a   Andrew Morton   writeback: fix ti...
239
  /*
2c1365791   Fengguang Wu   writeback: fix ti...
240
241
242
243
244
245
   * Move expired dirty inodes from @delaying_queue to @dispatch_queue.
   */
  static void move_expired_inodes(struct list_head *delaying_queue,
  			       struct list_head *dispatch_queue,
  				unsigned long *older_than_this)
  {
5c03449d3   Shaohua Li   writeback: move i...
246
247
  	LIST_HEAD(tmp);
  	struct list_head *pos, *node;
cf137307c   Jens Axboe   writeback: don't ...
248
  	struct super_block *sb = NULL;
5c03449d3   Shaohua Li   writeback: move i...
249
  	struct inode *inode;
cf137307c   Jens Axboe   writeback: don't ...
250
  	int do_sb_sort = 0;
5c03449d3   Shaohua Li   writeback: move i...
251

2c1365791   Fengguang Wu   writeback: fix ti...
252
  	while (!list_empty(delaying_queue)) {
7ccf19a80   Nick Piggin   fs: inode split I...
253
  		inode = wb_inode(delaying_queue->prev);
2c1365791   Fengguang Wu   writeback: fix ti...
254
  		if (older_than_this &&
d2caa3c54   Jeff Layton   writeback: guard ...
255
  		    inode_dirtied_after(inode, *older_than_this))
2c1365791   Fengguang Wu   writeback: fix ti...
256
  			break;
cf137307c   Jens Axboe   writeback: don't ...
257
258
259
  		if (sb && sb != inode->i_sb)
  			do_sb_sort = 1;
  		sb = inode->i_sb;
7ccf19a80   Nick Piggin   fs: inode split I...
260
  		list_move(&inode->i_wb_list, &tmp);
5c03449d3   Shaohua Li   writeback: move i...
261
  	}
cf137307c   Jens Axboe   writeback: don't ...
262
263
264
265
266
  	/* just one sb in list, splice to dispatch_queue and we're done */
  	if (!do_sb_sort) {
  		list_splice(&tmp, dispatch_queue);
  		return;
  	}
5c03449d3   Shaohua Li   writeback: move i...
267
268
  	/* Move inodes from one superblock together */
  	while (!list_empty(&tmp)) {
7ccf19a80   Nick Piggin   fs: inode split I...
269
  		sb = wb_inode(tmp.prev)->i_sb;
5c03449d3   Shaohua Li   writeback: move i...
270
  		list_for_each_prev_safe(pos, node, &tmp) {
7ccf19a80   Nick Piggin   fs: inode split I...
271
  			inode = wb_inode(pos);
5c03449d3   Shaohua Li   writeback: move i...
272
  			if (inode->i_sb == sb)
7ccf19a80   Nick Piggin   fs: inode split I...
273
  				list_move(&inode->i_wb_list, dispatch_queue);
5c03449d3   Shaohua Li   writeback: move i...
274
  		}
2c1365791   Fengguang Wu   writeback: fix ti...
275
276
277
278
279
  	}
  }
  
  /*
   * Queue all expired dirty inodes for io, eldest first.
4ea879b96   Wu Fengguang   writeback: fix qu...
280
281
282
283
284
285
286
287
   * Before
   *         newly dirtied     b_dirty    b_io    b_more_io
   *         =============>    gf         edc     BA
   * After
   *         newly dirtied     b_dirty    b_io    b_more_io
   *         =============>    g          fBAedc
   *                                           |
   *                                           +--> dequeue for IO
2c1365791   Fengguang Wu   writeback: fix ti...
288
   */
03ba3782e   Jens Axboe   writeback: switch...
289
  static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
66f3b8e2e   Jens Axboe   writeback: move d...
290
  {
a66979aba   Dave Chinner   fs: move i_wb_lis...
291
  	assert_spin_locked(&inode_wb_list_lock);
4ea879b96   Wu Fengguang   writeback: fix qu...
292
  	list_splice_init(&wb->b_more_io, &wb->b_io);
03ba3782e   Jens Axboe   writeback: switch...
293
  	move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
66f3b8e2e   Jens Axboe   writeback: move d...
294
  }
a9185b41a   Christoph Hellwig   pass writeback_co...
295
  static int write_inode(struct inode *inode, struct writeback_control *wbc)
08d8e9749   Fengguang Wu   writeback: fix nt...
296
  {
03ba3782e   Jens Axboe   writeback: switch...
297
  	if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
a9185b41a   Christoph Hellwig   pass writeback_co...
298
  		return inode->i_sb->s_op->write_inode(inode, wbc);
03ba3782e   Jens Axboe   writeback: switch...
299
  	return 0;
08d8e9749   Fengguang Wu   writeback: fix nt...
300
  }
08d8e9749   Fengguang Wu   writeback: fix nt...
301

2c1365791   Fengguang Wu   writeback: fix ti...
302
  /*
01c031945   Christoph Hellwig   cleanup __writeba...
303
304
305
306
307
308
309
310
   * Wait for writeback on an inode to complete.
   */
  static void inode_wait_for_writeback(struct inode *inode)
  {
  	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
  	wait_queue_head_t *wqh;
  
  	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
250df6ed2   Dave Chinner   fs: protect inode...
311
312
  	while (inode->i_state & I_SYNC) {
  		spin_unlock(&inode->i_lock);
a66979aba   Dave Chinner   fs: move i_wb_lis...
313
  		spin_unlock(&inode_wb_list_lock);
01c031945   Christoph Hellwig   cleanup __writeba...
314
  		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
a66979aba   Dave Chinner   fs: move i_wb_lis...
315
  		spin_lock(&inode_wb_list_lock);
250df6ed2   Dave Chinner   fs: protect inode...
316
  		spin_lock(&inode->i_lock);
58a9d3d8d   Richard Kennedy   fs-writeback: che...
317
  	}
01c031945   Christoph Hellwig   cleanup __writeba...
318
319
320
  }
  
  /*
0f1b1fd86   Dave Chinner   fs: pull inode->i...
321
322
323
   * Write out an inode's dirty pages.  Called under inode_wb_list_lock and
   * inode->i_lock.  Either the caller has an active reference on the inode or
   * the inode has I_WILL_FREE set.
01c031945   Christoph Hellwig   cleanup __writeba...
324
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
325
326
327
328
329
   * If `wait' is set, wait on the writeout.
   *
   * The whole writeout design is quite complex and fragile.  We want to avoid
   * starvation of particular inodes when others are being redirtied, prevent
   * livelocks, etc.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
330
331
   */
  static int
01c031945   Christoph Hellwig   cleanup __writeba...
332
  writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
333
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
334
  	struct address_space *mapping = inode->i_mapping;
01c031945   Christoph Hellwig   cleanup __writeba...
335
  	unsigned dirty;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
336
  	int ret;
0f1b1fd86   Dave Chinner   fs: pull inode->i...
337
338
  	assert_spin_locked(&inode_wb_list_lock);
  	assert_spin_locked(&inode->i_lock);
01c031945   Christoph Hellwig   cleanup __writeba...
339
340
341
342
343
344
345
346
  	if (!atomic_read(&inode->i_count))
  		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
  	else
  		WARN_ON(inode->i_state & I_WILL_FREE);
  
  	if (inode->i_state & I_SYNC) {
  		/*
  		 * If this inode is locked for writeback and we are not doing
66f3b8e2e   Jens Axboe   writeback: move d...
347
  		 * writeback-for-data-integrity, move it to b_more_io so that
01c031945   Christoph Hellwig   cleanup __writeba...
348
349
350
  		 * writeback can proceed with the other inodes on s_io.
  		 *
  		 * We'll have another go at writing back this inode when we
66f3b8e2e   Jens Axboe   writeback: move d...
351
  		 * completed a full scan of b_io.
01c031945   Christoph Hellwig   cleanup __writeba...
352
  		 */
a9185b41a   Christoph Hellwig   pass writeback_co...
353
  		if (wbc->sync_mode != WB_SYNC_ALL) {
01c031945   Christoph Hellwig   cleanup __writeba...
354
355
356
357
358
359
360
361
362
  			requeue_io(inode);
  			return 0;
  		}
  
  		/*
  		 * It's a data-integrity sync.  We must wait.
  		 */
  		inode_wait_for_writeback(inode);
  	}
1c0eeaf56   Joern Engel   introduce I_SYNC
363
  	BUG_ON(inode->i_state & I_SYNC);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
364

5547e8aac   Dmitry Monakhov   writeback: Update...
365
  	/* Set I_SYNC, reset I_DIRTY_PAGES */
1c0eeaf56   Joern Engel   introduce I_SYNC
366
  	inode->i_state |= I_SYNC;
5547e8aac   Dmitry Monakhov   writeback: Update...
367
  	inode->i_state &= ~I_DIRTY_PAGES;
250df6ed2   Dave Chinner   fs: protect inode...
368
  	spin_unlock(&inode->i_lock);
a66979aba   Dave Chinner   fs: move i_wb_lis...
369
  	spin_unlock(&inode_wb_list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
370
371
  
  	ret = do_writepages(mapping, wbc);
26821ed40   Christoph Hellwig   make sure data is...
372
373
374
375
376
  	/*
  	 * Make sure to wait on the data before writing out the metadata.
  	 * This is important for filesystems that modify metadata on data
  	 * I/O completion.
  	 */
a9185b41a   Christoph Hellwig   pass writeback_co...
377
  	if (wbc->sync_mode == WB_SYNC_ALL) {
26821ed40   Christoph Hellwig   make sure data is...
378
  		int err = filemap_fdatawait(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
379
380
381
  		if (ret == 0)
  			ret = err;
  	}
5547e8aac   Dmitry Monakhov   writeback: Update...
382
383
384
385
386
  	/*
  	 * Some filesystems may redirty the inode during the writeback
  	 * due to delalloc, clear dirty metadata flags right before
  	 * write_inode()
  	 */
250df6ed2   Dave Chinner   fs: protect inode...
387
  	spin_lock(&inode->i_lock);
5547e8aac   Dmitry Monakhov   writeback: Update...
388
389
  	dirty = inode->i_state & I_DIRTY;
  	inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
250df6ed2   Dave Chinner   fs: protect inode...
390
  	spin_unlock(&inode->i_lock);
26821ed40   Christoph Hellwig   make sure data is...
391
392
  	/* Don't write the inode if only I_DIRTY_PAGES was set */
  	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
a9185b41a   Christoph Hellwig   pass writeback_co...
393
  		int err = write_inode(inode, wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
394
395
396
  		if (ret == 0)
  			ret = err;
  	}
a66979aba   Dave Chinner   fs: move i_wb_lis...
397
  	spin_lock(&inode_wb_list_lock);
250df6ed2   Dave Chinner   fs: protect inode...
398
  	spin_lock(&inode->i_lock);
1c0eeaf56   Joern Engel   introduce I_SYNC
399
  	inode->i_state &= ~I_SYNC;
a4ffdde6e   Al Viro   simplify checks f...
400
  	if (!(inode->i_state & I_FREEING)) {
23539afc7   Wu Fengguang   writeback: don't ...
401
  		if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
402
403
  			/*
  			 * We didn't write back all the pages.  nfs_writepages()
a50aeb401   Wu Fengguang   writeback: merge ...
404
  			 * sometimes bales out without doing anything.
1b43ef91d   Andrew Morton   writeback: fix co...
405
  			 */
a50aeb401   Wu Fengguang   writeback: merge ...
406
407
  			inode->i_state |= I_DIRTY_PAGES;
  			if (wbc->nr_to_write <= 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
408
  				/*
a50aeb401   Wu Fengguang   writeback: merge ...
409
  				 * slice used up: queue for next turn
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
410
  				 */
a50aeb401   Wu Fengguang   writeback: merge ...
411
  				requeue_io(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
412
413
  			} else {
  				/*
a50aeb401   Wu Fengguang   writeback: merge ...
414
415
416
417
418
  				 * Writeback blocked by something other than
  				 * congestion. Delay the inode for some time to
  				 * avoid spinning on the CPU (100% iowait)
  				 * retrying writeback of the dirty page/inode
  				 * that cannot be performed immediately.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
419
  				 */
1b43ef91d   Andrew Morton   writeback: fix co...
420
  				redirty_tail(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
421
  			}
23539afc7   Wu Fengguang   writeback: don't ...
422
423
424
425
426
427
428
429
  		} else if (inode->i_state & I_DIRTY) {
  			/*
  			 * Filesystems can dirty the inode during writeback
  			 * operations, such as delayed allocation during
  			 * submission or metadata updates after data IO
  			 * completion.
  			 */
  			redirty_tail(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
430
431
  		} else {
  			/*
9e38d86ff   Nick Piggin   fs: Implement laz...
432
433
434
  			 * The inode is clean.  At this point we either have
  			 * a reference to the inode or it's on it's way out.
  			 * No need to add it back to the LRU.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
435
  			 */
7ccf19a80   Nick Piggin   fs: inode split I...
436
  			list_del_init(&inode->i_wb_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
437
438
  		}
  	}
1c0eeaf56   Joern Engel   introduce I_SYNC
439
  	inode_sync_complete(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
440
441
  	return ret;
  }
03ba3782e   Jens Axboe   writeback: switch...
442
  /*
d19de7edf   Christoph Hellwig   writeback: fix wr...
443
   * For background writeback the caller does not have the sb pinned
03ba3782e   Jens Axboe   writeback: switch...
444
445
   * before calling writeback. So make sure that we do pin it, so it doesn't
   * go away while we are writing inodes from it.
03ba3782e   Jens Axboe   writeback: switch...
446
   */
d19de7edf   Christoph Hellwig   writeback: fix wr...
447
  static bool pin_sb_for_writeback(struct super_block *sb)
03ba3782e   Jens Axboe   writeback: switch...
448
  {
03ba3782e   Jens Axboe   writeback: switch...
449
  	spin_lock(&sb_lock);
29cb48594   Christoph Hellwig   writeback: fix pi...
450
451
452
453
  	if (list_empty(&sb->s_instances)) {
  		spin_unlock(&sb_lock);
  		return false;
  	}
03ba3782e   Jens Axboe   writeback: switch...
454
  	sb->s_count++;
29cb48594   Christoph Hellwig   writeback: fix pi...
455
  	spin_unlock(&sb_lock);
03ba3782e   Jens Axboe   writeback: switch...
456
  	if (down_read_trylock(&sb->s_umount)) {
29cb48594   Christoph Hellwig   writeback: fix pi...
457
  		if (sb->s_root)
d19de7edf   Christoph Hellwig   writeback: fix wr...
458
  			return true;
03ba3782e   Jens Axboe   writeback: switch...
459
460
  		up_read(&sb->s_umount);
  	}
29cb48594   Christoph Hellwig   writeback: fix pi...
461
462
  
  	put_super(sb);
d19de7edf   Christoph Hellwig   writeback: fix wr...
463
  	return false;
03ba3782e   Jens Axboe   writeback: switch...
464
  }
f11c9c5c2   Edward Shishkin   vfs: improve writ...
465
466
  /*
   * Write a portion of b_io inodes which belong to @sb.
edadfb10b   Christoph Hellwig   writeback: split ...
467
468
   *
   * If @only_this_sb is true, then find and write all such
f11c9c5c2   Edward Shishkin   vfs: improve writ...
469
470
   * inodes. Otherwise write only ones which go sequentially
   * in reverse order.
edadfb10b   Christoph Hellwig   writeback: split ...
471
   *
f11c9c5c2   Edward Shishkin   vfs: improve writ...
472
473
474
   * Return 1, if the caller writeback routine should be
   * interrupted. Otherwise return 0.
   */
edadfb10b   Christoph Hellwig   writeback: split ...
475
476
  static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
  		struct writeback_control *wbc, bool only_this_sb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
477
  {
03ba3782e   Jens Axboe   writeback: switch...
478
  	while (!list_empty(&wb->b_io)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
479
  		long pages_skipped;
7ccf19a80   Nick Piggin   fs: inode split I...
480
  		struct inode *inode = wb_inode(wb->b_io.prev);
edadfb10b   Christoph Hellwig   writeback: split ...
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
  
  		if (inode->i_sb != sb) {
  			if (only_this_sb) {
  				/*
  				 * We only want to write back data for this
  				 * superblock, move all inodes not belonging
  				 * to it back onto the dirty list.
  				 */
  				redirty_tail(inode);
  				continue;
  			}
  
  			/*
  			 * The inode belongs to a different superblock.
  			 * Bounce back to the caller to unpin this and
  			 * pin the next superblock.
  			 */
f11c9c5c2   Edward Shishkin   vfs: improve writ...
498
  			return 0;
edadfb10b   Christoph Hellwig   writeback: split ...
499
  		}
9843b76aa   Christoph Hellwig   fs: skip I_FREEIN...
500
501
502
503
504
  		/*
  		 * Don't bother with new inodes or inodes beeing freed, first
  		 * kind does not need peridic writeout yet, and for the latter
  		 * kind writeout is handled by the freer.
  		 */
250df6ed2   Dave Chinner   fs: protect inode...
505
  		spin_lock(&inode->i_lock);
9843b76aa   Christoph Hellwig   fs: skip I_FREEIN...
506
  		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
250df6ed2   Dave Chinner   fs: protect inode...
507
  			spin_unlock(&inode->i_lock);
7ef0d7377   Nick Piggin   fs: new inode i_s...
508
509
510
  			requeue_io(inode);
  			continue;
  		}
9843b76aa   Christoph Hellwig   fs: skip I_FREEIN...
511

d2caa3c54   Jeff Layton   writeback: guard ...
512
513
514
515
  		/*
  		 * Was this inode dirtied after sync_sb_inodes was called?
  		 * This keeps sync from extra jobs and livelock.
  		 */
250df6ed2   Dave Chinner   fs: protect inode...
516
517
  		if (inode_dirtied_after(inode, wbc->wb_start)) {
  			spin_unlock(&inode->i_lock);
f11c9c5c2   Edward Shishkin   vfs: improve writ...
518
  			return 1;
250df6ed2   Dave Chinner   fs: protect inode...
519
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
520

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
521
  		__iget(inode);
250df6ed2   Dave Chinner   fs: protect inode...
522

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
523
  		pages_skipped = wbc->pages_skipped;
01c031945   Christoph Hellwig   cleanup __writeba...
524
  		writeback_single_inode(inode, wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
525
526
527
528
529
  		if (wbc->pages_skipped != pages_skipped) {
  			/*
  			 * writeback is not making progress due to locked
  			 * buffers.  Skip this inode for now.
  			 */
f57b9b7b4   Andrew Morton   writeback: fix ti...
530
  			redirty_tail(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
531
  		}
0f1b1fd86   Dave Chinner   fs: pull inode->i...
532
  		spin_unlock(&inode->i_lock);
a66979aba   Dave Chinner   fs: move i_wb_lis...
533
  		spin_unlock(&inode_wb_list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
534
  		iput(inode);
4ffc84442   OGAWA Hirofumi   [PATCH] Move cond...
535
  		cond_resched();
a66979aba   Dave Chinner   fs: move i_wb_lis...
536
  		spin_lock(&inode_wb_list_lock);
8bc3be275   Fengguang Wu   writeback: speed ...
537
538
  		if (wbc->nr_to_write <= 0) {
  			wbc->more_io = 1;
f11c9c5c2   Edward Shishkin   vfs: improve writ...
539
  			return 1;
8bc3be275   Fengguang Wu   writeback: speed ...
540
  		}
03ba3782e   Jens Axboe   writeback: switch...
541
  		if (!list_empty(&wb->b_more_io))
8bc3be275   Fengguang Wu   writeback: speed ...
542
  			wbc->more_io = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
543
  	}
f11c9c5c2   Edward Shishkin   vfs: improve writ...
544
545
546
  	/* b_io is empty */
  	return 1;
  }
9c3a8ee8a   Christoph Hellwig   writeback: remove...
547
548
  void writeback_inodes_wb(struct bdi_writeback *wb,
  		struct writeback_control *wbc)
f11c9c5c2   Edward Shishkin   vfs: improve writ...
549
550
  {
  	int ret = 0;
7624ee72a   Jan Kara   mm: avoid resetti...
551
552
  	if (!wbc->wb_start)
  		wbc->wb_start = jiffies; /* livelock avoidance */
a66979aba   Dave Chinner   fs: move i_wb_lis...
553
  	spin_lock(&inode_wb_list_lock);
f11c9c5c2   Edward Shishkin   vfs: improve writ...
554
555
  	if (!wbc->for_kupdate || list_empty(&wb->b_io))
  		queue_io(wb, wbc->older_than_this);
38f219776   Nick Piggin   fs: sync_sb_inode...
556

f11c9c5c2   Edward Shishkin   vfs: improve writ...
557
  	while (!list_empty(&wb->b_io)) {
7ccf19a80   Nick Piggin   fs: inode split I...
558
  		struct inode *inode = wb_inode(wb->b_io.prev);
f11c9c5c2   Edward Shishkin   vfs: improve writ...
559
  		struct super_block *sb = inode->i_sb;
9ecc2738a   Jens Axboe   writeback: make t...
560

edadfb10b   Christoph Hellwig   writeback: split ...
561
562
563
  		if (!pin_sb_for_writeback(sb)) {
  			requeue_io(inode);
  			continue;
f11c9c5c2   Edward Shishkin   vfs: improve writ...
564
  		}
edadfb10b   Christoph Hellwig   writeback: split ...
565
566
  		ret = writeback_sb_inodes(sb, wb, wbc, false);
  		drop_super(sb);
f11c9c5c2   Edward Shishkin   vfs: improve writ...
567

f11c9c5c2   Edward Shishkin   vfs: improve writ...
568
569
570
  		if (ret)
  			break;
  	}
a66979aba   Dave Chinner   fs: move i_wb_lis...
571
  	spin_unlock(&inode_wb_list_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
572
573
  	/* Leave any unwritten inodes on b_io */
  }
edadfb10b   Christoph Hellwig   writeback: split ...
574
575
576
577
  static void __writeback_inodes_sb(struct super_block *sb,
  		struct bdi_writeback *wb, struct writeback_control *wbc)
  {
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
a66979aba   Dave Chinner   fs: move i_wb_lis...
578
  	spin_lock(&inode_wb_list_lock);
edadfb10b   Christoph Hellwig   writeback: split ...
579
580
581
  	if (!wbc->for_kupdate || list_empty(&wb->b_io))
  		queue_io(wb, wbc->older_than_this);
  	writeback_sb_inodes(sb, wb, wbc, true);
a66979aba   Dave Chinner   fs: move i_wb_lis...
582
  	spin_unlock(&inode_wb_list_lock);
edadfb10b   Christoph Hellwig   writeback: split ...
583
  }
66f3b8e2e   Jens Axboe   writeback: move d...
584
  /*
03ba3782e   Jens Axboe   writeback: switch...
585
586
587
588
589
590
591
592
593
594
595
   * The maximum number of pages to writeout in a single bdi flush/kupdate
   * operation.  We do this so we don't hold I_SYNC against an inode for
   * enormous amounts of time, which would block a userspace task which has
   * been forced to throttle against that inode.  Also, the code reevaluates
   * the dirty each time it has written this many pages.
   */
  #define MAX_WRITEBACK_PAGES     1024
  
  static inline bool over_bground_thresh(void)
  {
  	unsigned long background_thresh, dirty_thresh;
16c4042f0   Wu Fengguang   writeback: avoid ...
596
  	global_dirty_limits(&background_thresh, &dirty_thresh);
03ba3782e   Jens Axboe   writeback: switch...
597
598
  
  	return (global_page_state(NR_FILE_DIRTY) +
4cbec4c8b   Wu Fengguang   writeback: remove...
599
  		global_page_state(NR_UNSTABLE_NFS) > background_thresh);
03ba3782e   Jens Axboe   writeback: switch...
600
601
602
603
  }
  
  /*
   * Explicit flushing or periodic writeback of "old" data.
66f3b8e2e   Jens Axboe   writeback: move d...
604
   *
03ba3782e   Jens Axboe   writeback: switch...
605
606
607
608
   * Define "old": the first time one of an inode's pages is dirtied, we mark the
   * dirtying-time in the inode's address_space.  So this periodic writeback code
   * just walks the superblock inode list, writing back any inodes which are
   * older than a specific point in time.
66f3b8e2e   Jens Axboe   writeback: move d...
609
   *
03ba3782e   Jens Axboe   writeback: switch...
610
611
612
   * Try to run once per dirty_writeback_interval.  But if a writeback event
   * takes longer than a dirty_writeback_interval interval, then leave a
   * one-second gap.
66f3b8e2e   Jens Axboe   writeback: move d...
613
   *
03ba3782e   Jens Axboe   writeback: switch...
614
615
   * older_than_this takes precedence over nr_to_write.  So we'll only write back
   * all dirty pages if they are all attached to "old" mappings.
66f3b8e2e   Jens Axboe   writeback: move d...
616
   */
c4a77a6c7   Jens Axboe   writeback: make w...
617
  static long wb_writeback(struct bdi_writeback *wb,
83ba7b071   Christoph Hellwig   writeback: simpli...
618
  			 struct wb_writeback_work *work)
66f3b8e2e   Jens Axboe   writeback: move d...
619
  {
03ba3782e   Jens Axboe   writeback: switch...
620
  	struct writeback_control wbc = {
83ba7b071   Christoph Hellwig   writeback: simpli...
621
  		.sync_mode		= work->sync_mode,
03ba3782e   Jens Axboe   writeback: switch...
622
  		.older_than_this	= NULL,
83ba7b071   Christoph Hellwig   writeback: simpli...
623
624
625
  		.for_kupdate		= work->for_kupdate,
  		.for_background		= work->for_background,
  		.range_cyclic		= work->range_cyclic,
03ba3782e   Jens Axboe   writeback: switch...
626
627
628
  	};
  	unsigned long oldest_jif;
  	long wrote = 0;
b9543dac5   Jan Kara   writeback: avoid ...
629
  	long write_chunk;
a5989bdc9   Jan Kara   fs: Fix busyloop ...
630
  	struct inode *inode;
66f3b8e2e   Jens Axboe   writeback: move d...
631

03ba3782e   Jens Axboe   writeback: switch...
632
633
634
635
636
  	if (wbc.for_kupdate) {
  		wbc.older_than_this = &oldest_jif;
  		oldest_jif = jiffies -
  				msecs_to_jiffies(dirty_expire_interval * 10);
  	}
c4a77a6c7   Jens Axboe   writeback: make w...
637
638
639
640
  	if (!wbc.range_cyclic) {
  		wbc.range_start = 0;
  		wbc.range_end = LLONG_MAX;
  	}
38f219776   Nick Piggin   fs: sync_sb_inode...
641

b9543dac5   Jan Kara   writeback: avoid ...
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
  	/*
  	 * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
  	 * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
  	 * here avoids calling into writeback_inodes_wb() more than once.
  	 *
  	 * The intended call sequence for WB_SYNC_ALL writeback is:
  	 *
  	 *      wb_writeback()
  	 *          __writeback_inodes_sb()     <== called only once
  	 *              write_cache_pages()     <== called once for each inode
  	 *                   (quickly) tag currently dirty pages
  	 *                   (maybe slowly) sync all tagged pages
  	 */
  	if (wbc.sync_mode == WB_SYNC_NONE)
  		write_chunk = MAX_WRITEBACK_PAGES;
  	else
  		write_chunk = LONG_MAX;
7624ee72a   Jan Kara   mm: avoid resetti...
659
  	wbc.wb_start = jiffies; /* livelock avoidance */
03ba3782e   Jens Axboe   writeback: switch...
660
661
  	for (;;) {
  		/*
d3ddec763   Wu Fengguang   writeback: stop b...
662
  		 * Stop writeback when nr_pages has been consumed
03ba3782e   Jens Axboe   writeback: switch...
663
  		 */
83ba7b071   Christoph Hellwig   writeback: simpli...
664
  		if (work->nr_pages <= 0)
03ba3782e   Jens Axboe   writeback: switch...
665
  			break;
66f3b8e2e   Jens Axboe   writeback: move d...
666

38f219776   Nick Piggin   fs: sync_sb_inode...
667
  		/*
aa373cf55   Jan Kara   writeback: stop b...
668
669
670
671
672
673
674
675
676
677
  		 * Background writeout and kupdate-style writeback may
  		 * run forever. Stop them if there is other work to do
  		 * so that e.g. sync can proceed. They'll be restarted
  		 * after the other works are all done.
  		 */
  		if ((work->for_background || work->for_kupdate) &&
  		    !list_empty(&wb->bdi->work_list))
  			break;
  
  		/*
d3ddec763   Wu Fengguang   writeback: stop b...
678
679
  		 * For background writeout, stop when we are below the
  		 * background dirty threshold
38f219776   Nick Piggin   fs: sync_sb_inode...
680
  		 */
83ba7b071   Christoph Hellwig   writeback: simpli...
681
  		if (work->for_background && !over_bground_thresh())
03ba3782e   Jens Axboe   writeback: switch...
682
  			break;
38f219776   Nick Piggin   fs: sync_sb_inode...
683

03ba3782e   Jens Axboe   writeback: switch...
684
  		wbc.more_io = 0;
b9543dac5   Jan Kara   writeback: avoid ...
685
  		wbc.nr_to_write = write_chunk;
03ba3782e   Jens Axboe   writeback: switch...
686
  		wbc.pages_skipped = 0;
028c2dd18   Dave Chinner   writeback: Add tr...
687
688
  
  		trace_wbc_writeback_start(&wbc, wb->bdi);
83ba7b071   Christoph Hellwig   writeback: simpli...
689
690
  		if (work->sb)
  			__writeback_inodes_sb(work->sb, wb, &wbc);
edadfb10b   Christoph Hellwig   writeback: split ...
691
692
  		else
  			writeback_inodes_wb(wb, &wbc);
028c2dd18   Dave Chinner   writeback: Add tr...
693
  		trace_wbc_writeback_written(&wbc, wb->bdi);
b9543dac5   Jan Kara   writeback: avoid ...
694
695
  		work->nr_pages -= write_chunk - wbc.nr_to_write;
  		wrote += write_chunk - wbc.nr_to_write;
03ba3782e   Jens Axboe   writeback: switch...
696
697
  
  		/*
71fd05a88   Jens Axboe   writeback: improv...
698
  		 * If we consumed everything, see if we have more
03ba3782e   Jens Axboe   writeback: switch...
699
  		 */
71fd05a88   Jens Axboe   writeback: improv...
700
701
702
703
704
705
  		if (wbc.nr_to_write <= 0)
  			continue;
  		/*
  		 * Didn't write everything and we don't have more IO, bail
  		 */
  		if (!wbc.more_io)
03ba3782e   Jens Axboe   writeback: switch...
706
  			break;
71fd05a88   Jens Axboe   writeback: improv...
707
708
709
  		/*
  		 * Did we write something? Try for more
  		 */
b9543dac5   Jan Kara   writeback: avoid ...
710
  		if (wbc.nr_to_write < write_chunk)
71fd05a88   Jens Axboe   writeback: improv...
711
712
713
714
715
716
  			continue;
  		/*
  		 * Nothing written. Wait for some inode to
  		 * become available for writeback. Otherwise
  		 * we'll just busyloop.
  		 */
a66979aba   Dave Chinner   fs: move i_wb_lis...
717
  		spin_lock(&inode_wb_list_lock);
71fd05a88   Jens Axboe   writeback: improv...
718
  		if (!list_empty(&wb->b_more_io))  {
7ccf19a80   Nick Piggin   fs: inode split I...
719
  			inode = wb_inode(wb->b_more_io.prev);
028c2dd18   Dave Chinner   writeback: Add tr...
720
  			trace_wbc_writeback_wait(&wbc, wb->bdi);
250df6ed2   Dave Chinner   fs: protect inode...
721
  			spin_lock(&inode->i_lock);
71fd05a88   Jens Axboe   writeback: improv...
722
  			inode_wait_for_writeback(inode);
250df6ed2   Dave Chinner   fs: protect inode...
723
  			spin_unlock(&inode->i_lock);
03ba3782e   Jens Axboe   writeback: switch...
724
  		}
a66979aba   Dave Chinner   fs: move i_wb_lis...
725
  		spin_unlock(&inode_wb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
726
727
728
729
730
731
  	}
  
  	return wrote;
  }
  
  /*
83ba7b071   Christoph Hellwig   writeback: simpli...
732
   * Return the next wb_writeback_work struct that hasn't been processed yet.
03ba3782e   Jens Axboe   writeback: switch...
733
   */
83ba7b071   Christoph Hellwig   writeback: simpli...
734
  static struct wb_writeback_work *
08852b6d6   Minchan Kim   writeback: remove...
735
  get_next_work_item(struct backing_dev_info *bdi)
03ba3782e   Jens Axboe   writeback: switch...
736
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
737
  	struct wb_writeback_work *work = NULL;
03ba3782e   Jens Axboe   writeback: switch...
738

6467716a3   Artem Bityutskiy   writeback: optimi...
739
  	spin_lock_bh(&bdi->wb_lock);
83ba7b071   Christoph Hellwig   writeback: simpli...
740
741
742
743
  	if (!list_empty(&bdi->work_list)) {
  		work = list_entry(bdi->work_list.next,
  				  struct wb_writeback_work, list);
  		list_del_init(&work->list);
03ba3782e   Jens Axboe   writeback: switch...
744
  	}
6467716a3   Artem Bityutskiy   writeback: optimi...
745
  	spin_unlock_bh(&bdi->wb_lock);
83ba7b071   Christoph Hellwig   writeback: simpli...
746
  	return work;
03ba3782e   Jens Axboe   writeback: switch...
747
  }
cdf01dd54   Linus Torvalds   fs-writeback.c: u...
748
749
750
751
752
753
754
755
756
757
  /*
   * Add in the number of potentially dirty inodes, because each inode
   * write can dirty pagecache in the underlying blockdev.
   */
  static unsigned long get_nr_dirty_pages(void)
  {
  	return global_page_state(NR_FILE_DIRTY) +
  		global_page_state(NR_UNSTABLE_NFS) +
  		get_nr_dirty_inodes();
  }
6585027a5   Jan Kara   writeback: integr...
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
  static long wb_check_background_flush(struct bdi_writeback *wb)
  {
  	if (over_bground_thresh()) {
  
  		struct wb_writeback_work work = {
  			.nr_pages	= LONG_MAX,
  			.sync_mode	= WB_SYNC_NONE,
  			.for_background	= 1,
  			.range_cyclic	= 1,
  		};
  
  		return wb_writeback(wb, &work);
  	}
  
  	return 0;
  }
03ba3782e   Jens Axboe   writeback: switch...
774
775
776
777
  static long wb_check_old_data_flush(struct bdi_writeback *wb)
  {
  	unsigned long expired;
  	long nr_pages;
69b62d01e   Jens Axboe   writeback: disabl...
778
779
780
781
782
  	/*
  	 * When set to zero, disable periodic writeback
  	 */
  	if (!dirty_writeback_interval)
  		return 0;
03ba3782e   Jens Axboe   writeback: switch...
783
784
785
786
787
788
  	expired = wb->last_old_flush +
  			msecs_to_jiffies(dirty_writeback_interval * 10);
  	if (time_before(jiffies, expired))
  		return 0;
  
  	wb->last_old_flush = jiffies;
cdf01dd54   Linus Torvalds   fs-writeback.c: u...
789
  	nr_pages = get_nr_dirty_pages();
03ba3782e   Jens Axboe   writeback: switch...
790

c4a77a6c7   Jens Axboe   writeback: make w...
791
  	if (nr_pages) {
83ba7b071   Christoph Hellwig   writeback: simpli...
792
  		struct wb_writeback_work work = {
c4a77a6c7   Jens Axboe   writeback: make w...
793
794
795
796
797
  			.nr_pages	= nr_pages,
  			.sync_mode	= WB_SYNC_NONE,
  			.for_kupdate	= 1,
  			.range_cyclic	= 1,
  		};
83ba7b071   Christoph Hellwig   writeback: simpli...
798
  		return wb_writeback(wb, &work);
c4a77a6c7   Jens Axboe   writeback: make w...
799
  	}
03ba3782e   Jens Axboe   writeback: switch...
800
801
802
803
804
805
806
807
808
809
  
  	return 0;
  }
  
  /*
   * Retrieve work items and do the writeback they describe
   */
  long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
  {
  	struct backing_dev_info *bdi = wb->bdi;
83ba7b071   Christoph Hellwig   writeback: simpli...
810
  	struct wb_writeback_work *work;
c4a77a6c7   Jens Axboe   writeback: make w...
811
  	long wrote = 0;
03ba3782e   Jens Axboe   writeback: switch...
812

81d73a32d   Jan Kara   mm: fix writeback...
813
  	set_bit(BDI_writeback_running, &wb->bdi->state);
08852b6d6   Minchan Kim   writeback: remove...
814
  	while ((work = get_next_work_item(bdi)) != NULL) {
03ba3782e   Jens Axboe   writeback: switch...
815
816
  		/*
  		 * Override sync mode, in case we must wait for completion
83ba7b071   Christoph Hellwig   writeback: simpli...
817
  		 * because this thread is exiting now.
03ba3782e   Jens Axboe   writeback: switch...
818
819
  		 */
  		if (force_wait)
83ba7b071   Christoph Hellwig   writeback: simpli...
820
  			work->sync_mode = WB_SYNC_ALL;
03ba3782e   Jens Axboe   writeback: switch...
821

455b28646   Dave Chinner   writeback: Initia...
822
  		trace_writeback_exec(bdi, work);
83ba7b071   Christoph Hellwig   writeback: simpli...
823
  		wrote += wb_writeback(wb, work);
03ba3782e   Jens Axboe   writeback: switch...
824
825
  
  		/*
83ba7b071   Christoph Hellwig   writeback: simpli...
826
827
  		 * Notify the caller of completion if this is a synchronous
  		 * work item, otherwise just free it.
03ba3782e   Jens Axboe   writeback: switch...
828
  		 */
83ba7b071   Christoph Hellwig   writeback: simpli...
829
830
831
832
  		if (work->done)
  			complete(work->done);
  		else
  			kfree(work);
03ba3782e   Jens Axboe   writeback: switch...
833
834
835
836
837
838
  	}
  
  	/*
  	 * Check for periodic writeback, kupdated() style
  	 */
  	wrote += wb_check_old_data_flush(wb);
6585027a5   Jan Kara   writeback: integr...
839
  	wrote += wb_check_background_flush(wb);
81d73a32d   Jan Kara   mm: fix writeback...
840
  	clear_bit(BDI_writeback_running, &wb->bdi->state);
03ba3782e   Jens Axboe   writeback: switch...
841
842
843
844
845
846
847
848
  
  	return wrote;
  }
  
  /*
   * Handle writeback of dirty data for the device backed by this bdi. Also
   * wakes up periodically and does kupdated style flushing.
   */
082439004   Christoph Hellwig   writeback: merge ...
849
  int bdi_writeback_thread(void *data)
03ba3782e   Jens Axboe   writeback: switch...
850
  {
082439004   Christoph Hellwig   writeback: merge ...
851
852
  	struct bdi_writeback *wb = data;
  	struct backing_dev_info *bdi = wb->bdi;
03ba3782e   Jens Axboe   writeback: switch...
853
  	long pages_written;
766f91641   Peter Zijlstra   kernel: remove PF...
854
  	current->flags |= PF_SWAPWRITE;
082439004   Christoph Hellwig   writeback: merge ...
855
  	set_freezable();
ecd584030   Artem Bityutskiy   writeback: move l...
856
  	wb->last_active = jiffies;
082439004   Christoph Hellwig   writeback: merge ...
857
858
859
860
861
  
  	/*
  	 * Our parent may run at a different priority, just set us to normal
  	 */
  	set_user_nice(current, 0);
455b28646   Dave Chinner   writeback: Initia...
862
  	trace_writeback_thread_start(bdi);
03ba3782e   Jens Axboe   writeback: switch...
863
  	while (!kthread_should_stop()) {
6467716a3   Artem Bityutskiy   writeback: optimi...
864
865
866
867
868
  		/*
  		 * Remove own delayed wake-up timer, since we are already awake
  		 * and we'll take care of the preriodic write-back.
  		 */
  		del_timer(&wb->wakeup_timer);
03ba3782e   Jens Axboe   writeback: switch...
869
  		pages_written = wb_do_writeback(wb, 0);
455b28646   Dave Chinner   writeback: Initia...
870
  		trace_writeback_pages_written(pages_written);
03ba3782e   Jens Axboe   writeback: switch...
871
  		if (pages_written)
ecd584030   Artem Bityutskiy   writeback: move l...
872
  			wb->last_active = jiffies;
03ba3782e   Jens Axboe   writeback: switch...
873

297252c81   Artem Bityutskiy   writeback: do not...
874
  		set_current_state(TASK_INTERRUPTIBLE);
b76b4014f   J. Bruce Fields   writeback: Fix lo...
875
  		if (!list_empty(&bdi->work_list) || kthread_should_stop()) {
f9eadbbd4   Jens Axboe   writeback: bdi_wr...
876
  			__set_current_state(TASK_RUNNING);
297252c81   Artem Bityutskiy   writeback: do not...
877
  			continue;
03ba3782e   Jens Axboe   writeback: switch...
878
  		}
253c34e9b   Artem Bityutskiy   writeback: preven...
879
  		if (wb_has_dirty_io(wb) && dirty_writeback_interval)
fff5b85aa   Artem Bityutskiy   writeback: move b...
880
  			schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
253c34e9b   Artem Bityutskiy   writeback: preven...
881
882
883
884
885
886
  		else {
  			/*
  			 * We have nothing to do, so can go sleep without any
  			 * timeout and save power. When a work is queued or
  			 * something is made dirty - we will be woken up.
  			 */
297252c81   Artem Bityutskiy   writeback: do not...
887
  			schedule();
f9eadbbd4   Jens Axboe   writeback: bdi_wr...
888
  		}
69b62d01e   Jens Axboe   writeback: disabl...
889

03ba3782e   Jens Axboe   writeback: switch...
890
891
  		try_to_freeze();
  	}
fff5b85aa   Artem Bityutskiy   writeback: move b...
892
  	/* Flush any work that raced with us exiting */
082439004   Christoph Hellwig   writeback: merge ...
893
894
  	if (!list_empty(&bdi->work_list))
  		wb_do_writeback(wb, 1);
455b28646   Dave Chinner   writeback: Initia...
895
896
  
  	trace_writeback_thread_stop(bdi);
03ba3782e   Jens Axboe   writeback: switch...
897
898
  	return 0;
  }
082439004   Christoph Hellwig   writeback: merge ...
899

03ba3782e   Jens Axboe   writeback: switch...
900
  /*
b8c2f3474   Christoph Hellwig   writeback: simpli...
901
902
   * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
   * the whole world.
03ba3782e   Jens Axboe   writeback: switch...
903
   */
b8c2f3474   Christoph Hellwig   writeback: simpli...
904
  void wakeup_flusher_threads(long nr_pages)
03ba3782e   Jens Axboe   writeback: switch...
905
  {
b8c2f3474   Christoph Hellwig   writeback: simpli...
906
  	struct backing_dev_info *bdi;
03ba3782e   Jens Axboe   writeback: switch...
907

83ba7b071   Christoph Hellwig   writeback: simpli...
908
909
  	if (!nr_pages) {
  		nr_pages = global_page_state(NR_FILE_DIRTY) +
b8c2f3474   Christoph Hellwig   writeback: simpli...
910
911
  				global_page_state(NR_UNSTABLE_NFS);
  	}
03ba3782e   Jens Axboe   writeback: switch...
912

b8c2f3474   Christoph Hellwig   writeback: simpli...
913
  	rcu_read_lock();
cfc4ba536   Jens Axboe   writeback: use RC...
914
  	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
03ba3782e   Jens Axboe   writeback: switch...
915
916
  		if (!bdi_has_dirty_io(bdi))
  			continue;
6585027a5   Jan Kara   writeback: integr...
917
  		__bdi_start_writeback(bdi, nr_pages, false);
03ba3782e   Jens Axboe   writeback: switch...
918
  	}
cfc4ba536   Jens Axboe   writeback: use RC...
919
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
920
  }
03ba3782e   Jens Axboe   writeback: switch...
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
  static noinline void block_dump___mark_inode_dirty(struct inode *inode)
  {
  	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
  		struct dentry *dentry;
  		const char *name = "?";
  
  		dentry = d_find_alias(inode);
  		if (dentry) {
  			spin_lock(&dentry->d_lock);
  			name = (const char *) dentry->d_name.name;
  		}
  		printk(KERN_DEBUG
  		       "%s(%d): dirtied inode %lu (%s) on %s
  ",
  		       current->comm, task_pid_nr(current), inode->i_ino,
  		       name, inode->i_sb->s_id);
  		if (dentry) {
  			spin_unlock(&dentry->d_lock);
  			dput(dentry);
  		}
  	}
  }
  
  /**
   *	__mark_inode_dirty -	internal function
   *	@inode: inode to mark
   *	@flags: what kind of dirty (i.e. I_DIRTY_SYNC)
   *	Mark an inode as dirty. Callers should use mark_inode_dirty or
   *  	mark_inode_dirty_sync.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
950
   *
03ba3782e   Jens Axboe   writeback: switch...
951
952
953
954
955
956
957
958
959
   * Put the inode on the super block's dirty list.
   *
   * CAREFUL! We mark it dirty unconditionally, but move it onto the
   * dirty list only if it is hashed or if it refers to a blockdev.
   * If it was not hashed, it will never be added to the dirty list
   * even if it is later hashed, as it will have been marked dirty already.
   *
   * In short, make sure you hash any inodes _before_ you start marking
   * them dirty.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
960
   *
03ba3782e   Jens Axboe   writeback: switch...
961
962
   * This function *must* be atomic for the I_DIRTY_PAGES case -
   * set_page_dirty() is called under spinlock in several places.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
963
   *
03ba3782e   Jens Axboe   writeback: switch...
964
965
966
967
968
969
   * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
   * the block-special inode (/dev/hda1) itself.  And the ->dirtied_when field of
   * the kernel-internal blockdev inode represents the dirtying time of the
   * blockdev's pages.  This is why for I_DIRTY_PAGES we always use
   * page->mapping->host, so the page-dirtying time is recorded in the internal
   * blockdev inode.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
970
   */
03ba3782e   Jens Axboe   writeback: switch...
971
  void __mark_inode_dirty(struct inode *inode, int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
972
  {
03ba3782e   Jens Axboe   writeback: switch...
973
  	struct super_block *sb = inode->i_sb;
253c34e9b   Artem Bityutskiy   writeback: preven...
974
  	struct backing_dev_info *bdi = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
975

03ba3782e   Jens Axboe   writeback: switch...
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
  	/*
  	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
  	 * dirty the inode itself
  	 */
  	if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
  		if (sb->s_op->dirty_inode)
  			sb->s_op->dirty_inode(inode);
  	}
  
  	/*
  	 * make sure that changes are seen by all cpus before we test i_state
  	 * -- mikulas
  	 */
  	smp_mb();
  
  	/* avoid the locking if we can */
  	if ((inode->i_state & flags) == flags)
  		return;
  
  	if (unlikely(block_dump))
  		block_dump___mark_inode_dirty(inode);
250df6ed2   Dave Chinner   fs: protect inode...
997
  	spin_lock(&inode->i_lock);
03ba3782e   Jens Axboe   writeback: switch...
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
  	if ((inode->i_state & flags) != flags) {
  		const int was_dirty = inode->i_state & I_DIRTY;
  
  		inode->i_state |= flags;
  
  		/*
  		 * If the inode is being synced, just update its dirty state.
  		 * The unlocker will place the inode on the appropriate
  		 * superblock list, based upon its state.
  		 */
  		if (inode->i_state & I_SYNC)
250df6ed2   Dave Chinner   fs: protect inode...
1009
  			goto out_unlock_inode;
03ba3782e   Jens Axboe   writeback: switch...
1010
1011
1012
1013
1014
1015
  
  		/*
  		 * Only add valid (hashed) inodes to the superblock's
  		 * dirty list.  Add blockdev inodes as well.
  		 */
  		if (!S_ISBLK(inode->i_mode)) {
1d3382cbf   Al Viro   new helper: inode...
1016
  			if (inode_unhashed(inode))
250df6ed2   Dave Chinner   fs: protect inode...
1017
  				goto out_unlock_inode;
03ba3782e   Jens Axboe   writeback: switch...
1018
  		}
a4ffdde6e   Al Viro   simplify checks f...
1019
  		if (inode->i_state & I_FREEING)
250df6ed2   Dave Chinner   fs: protect inode...
1020
  			goto out_unlock_inode;
03ba3782e   Jens Axboe   writeback: switch...
1021
1022
1023
1024
1025
1026
  
  		/*
  		 * If the inode was already on b_dirty/b_io/b_more_io, don't
  		 * reposition it (that would break b_dirty time-ordering).
  		 */
  		if (!was_dirty) {
a66979aba   Dave Chinner   fs: move i_wb_lis...
1027
  			bool wakeup_bdi = false;
253c34e9b   Artem Bityutskiy   writeback: preven...
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
  			bdi = inode_to_bdi(inode);
  
  			if (bdi_cap_writeback_dirty(bdi)) {
  				WARN(!test_bit(BDI_registered, &bdi->state),
  				     "bdi-%s not registered
  ", bdi->name);
  
  				/*
  				 * If this is the first dirty inode for this
  				 * bdi, we have to wake-up the corresponding
  				 * bdi thread to make sure background
  				 * write-back happens later.
  				 */
  				if (!wb_has_dirty_io(&bdi->wb))
  					wakeup_bdi = true;
500b067c5   Jens Axboe   writeback: check ...
1043
  			}
03ba3782e   Jens Axboe   writeback: switch...
1044

a66979aba   Dave Chinner   fs: move i_wb_lis...
1045
1046
  			spin_unlock(&inode->i_lock);
  			spin_lock(&inode_wb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1047
  			inode->dirtied_when = jiffies;
7ccf19a80   Nick Piggin   fs: inode split I...
1048
  			list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
a66979aba   Dave Chinner   fs: move i_wb_lis...
1049
1050
1051
1052
1053
  			spin_unlock(&inode_wb_list_lock);
  
  			if (wakeup_bdi)
  				bdi_wakeup_thread_delayed(bdi);
  			return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1054
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1055
  	}
250df6ed2   Dave Chinner   fs: protect inode...
1056
1057
  out_unlock_inode:
  	spin_unlock(&inode->i_lock);
253c34e9b   Artem Bityutskiy   writeback: preven...
1058

03ba3782e   Jens Axboe   writeback: switch...
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
  }
  EXPORT_SYMBOL(__mark_inode_dirty);
  
  /*
   * Write out a superblock's list of dirty inodes.  A wait will be performed
   * upon no inodes, all inodes or the final one, depending upon sync_mode.
   *
   * If older_than_this is non-NULL, then only write out inodes which
   * had their first dirtying at a time earlier than *older_than_this.
   *
03ba3782e   Jens Axboe   writeback: switch...
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
   * If `bdi' is non-zero then we're being asked to writeback a specific queue.
   * This function assumes that the blockdev superblock's inodes are backed by
   * a variety of queues, so all inodes are searched.  For other superblocks,
   * assume that all inodes are backed by the same queue.
   *
   * The inodes to be written are parked on bdi->b_io.  They are moved back onto
   * bdi->b_dirty as they are selected for writing.  This way, none can be missed
   * on the writer throttling path, and we get decent balancing between many
   * throttled threads: we don't want them all piling up on inode_sync_wait.
   */
b6e51316d   Jens Axboe   writeback: separa...
1079
  static void wait_sb_inodes(struct super_block *sb)
03ba3782e   Jens Axboe   writeback: switch...
1080
1081
1082
1083
1084
1085
1086
  {
  	struct inode *inode, *old_inode = NULL;
  
  	/*
  	 * We need to be protected against the filesystem going from
  	 * r/o to r/w or vice versa.
  	 */
b6e51316d   Jens Axboe   writeback: separa...
1087
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
03ba3782e   Jens Axboe   writeback: switch...
1088

55fa6091d   Dave Chinner   fs: move i_sb_lis...
1089
  	spin_lock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1090
1091
1092
1093
1094
1095
1096
1097
  
  	/*
  	 * Data integrity sync. Must wait for all pages under writeback,
  	 * because there may have been pages dirtied before our sync
  	 * call, but which had writeout started before we write it out.
  	 * In which case, the inode may not be on the dirty list, but
  	 * we still have to wait for that writeout.
  	 */
b6e51316d   Jens Axboe   writeback: separa...
1098
  	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
250df6ed2   Dave Chinner   fs: protect inode...
1099
  		struct address_space *mapping = inode->i_mapping;
03ba3782e   Jens Axboe   writeback: switch...
1100

250df6ed2   Dave Chinner   fs: protect inode...
1101
1102
1103
1104
  		spin_lock(&inode->i_lock);
  		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
  		    (mapping->nrpages == 0)) {
  			spin_unlock(&inode->i_lock);
03ba3782e   Jens Axboe   writeback: switch...
1105
  			continue;
250df6ed2   Dave Chinner   fs: protect inode...
1106
  		}
03ba3782e   Jens Axboe   writeback: switch...
1107
  		__iget(inode);
250df6ed2   Dave Chinner   fs: protect inode...
1108
  		spin_unlock(&inode->i_lock);
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1109
  		spin_unlock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1110
  		/*
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1111
1112
1113
1114
1115
1116
  		 * We hold a reference to 'inode' so it couldn't have been
  		 * removed from s_inodes list while we dropped the
  		 * inode_sb_list_lock.  We cannot iput the inode now as we can
  		 * be holding the last reference and we cannot iput it under
  		 * inode_sb_list_lock. So we keep the reference and iput it
  		 * later.
03ba3782e   Jens Axboe   writeback: switch...
1117
1118
1119
1120
1121
1122
1123
  		 */
  		iput(old_inode);
  		old_inode = inode;
  
  		filemap_fdatawait(mapping);
  
  		cond_resched();
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1124
  		spin_lock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1125
  	}
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1126
  	spin_unlock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1127
  	iput(old_inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1128
  }
d8a8559cd   Jens Axboe   writeback: get ri...
1129
  /**
3259f8bed   Chris Mason   Add new functions...
1130
   * writeback_inodes_sb_nr -	writeback dirty inodes from given super_block
d8a8559cd   Jens Axboe   writeback: get ri...
1131
   * @sb: the superblock
3259f8bed   Chris Mason   Add new functions...
1132
   * @nr: the number of pages to write
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1133
   *
d8a8559cd   Jens Axboe   writeback: get ri...
1134
1135
   * Start writeback on some inodes on this super_block. No guarantees are made
   * on how many (if any) will be written, and this function does not wait
3259f8bed   Chris Mason   Add new functions...
1136
   * for IO completion of submitted IO.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1137
   */
3259f8bed   Chris Mason   Add new functions...
1138
  void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1139
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
1140
1141
  	DECLARE_COMPLETION_ONSTACK(done);
  	struct wb_writeback_work work = {
3c4d71653   Christoph Hellwig   writeback: queue ...
1142
1143
  		.sb		= sb,
  		.sync_mode	= WB_SYNC_NONE,
83ba7b071   Christoph Hellwig   writeback: simpli...
1144
  		.done		= &done,
3259f8bed   Chris Mason   Add new functions...
1145
  		.nr_pages	= nr,
3c4d71653   Christoph Hellwig   writeback: queue ...
1146
  	};
d8a8559cd   Jens Axboe   writeback: get ri...
1147

cf37e9724   Christoph Hellwig   writeback: enforc...
1148
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
83ba7b071   Christoph Hellwig   writeback: simpli...
1149
1150
  	bdi_queue_work(sb->s_bdi, &work);
  	wait_for_completion(&done);
e913fc825   Jens Axboe   writeback: fix WB...
1151
  }
3259f8bed   Chris Mason   Add new functions...
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
  EXPORT_SYMBOL(writeback_inodes_sb_nr);
  
  /**
   * writeback_inodes_sb	-	writeback dirty inodes from given super_block
   * @sb: the superblock
   *
   * Start writeback on some inodes on this super_block. No guarantees are made
   * on how many (if any) will be written, and this function does not wait
   * for IO completion of submitted IO.
   */
  void writeback_inodes_sb(struct super_block *sb)
  {
925d169f5   Linus Torvalds   Merge branch 'for...
1164
  	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
3259f8bed   Chris Mason   Add new functions...
1165
  }
0e3c9a228   Jens Axboe   Revert "writeback...
1166
  EXPORT_SYMBOL(writeback_inodes_sb);
e913fc825   Jens Axboe   writeback: fix WB...
1167
1168
  
  /**
17bd55d03   Eric Sandeen   fs-writeback: Add...
1169
1170
1171
1172
1173
1174
1175
1176
1177
   * writeback_inodes_sb_if_idle	-	start writeback if none underway
   * @sb: the superblock
   *
   * Invoke writeback_inodes_sb if no writeback is currently underway.
   * Returns 1 if writeback was started, 0 if not.
   */
  int writeback_inodes_sb_if_idle(struct super_block *sb)
  {
  	if (!writeback_in_progress(sb->s_bdi)) {
cf37e9724   Christoph Hellwig   writeback: enforc...
1178
  		down_read(&sb->s_umount);
17bd55d03   Eric Sandeen   fs-writeback: Add...
1179
  		writeback_inodes_sb(sb);
cf37e9724   Christoph Hellwig   writeback: enforc...
1180
  		up_read(&sb->s_umount);
17bd55d03   Eric Sandeen   fs-writeback: Add...
1181
1182
1183
1184
1185
1186
1187
  		return 1;
  	} else
  		return 0;
  }
  EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
  
  /**
3259f8bed   Chris Mason   Add new functions...
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
   * writeback_inodes_sb_if_idle	-	start writeback if none underway
   * @sb: the superblock
   * @nr: the number of pages to write
   *
   * Invoke writeback_inodes_sb if no writeback is currently underway.
   * Returns 1 if writeback was started, 0 if not.
   */
  int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
  				   unsigned long nr)
  {
  	if (!writeback_in_progress(sb->s_bdi)) {
  		down_read(&sb->s_umount);
  		writeback_inodes_sb_nr(sb, nr);
  		up_read(&sb->s_umount);
  		return 1;
  	} else
  		return 0;
  }
  EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
  
  /**
d8a8559cd   Jens Axboe   writeback: get ri...
1209
1210
1211
1212
   * sync_inodes_sb	-	sync sb inode pages
   * @sb: the superblock
   *
   * This function writes and waits on any dirty inode belonging to this
cb9ef8d5e   Stefan Hajnoczi   fs/fs-writeback.c...
1213
   * super_block.
d8a8559cd   Jens Axboe   writeback: get ri...
1214
   */
b6e51316d   Jens Axboe   writeback: separa...
1215
  void sync_inodes_sb(struct super_block *sb)
d8a8559cd   Jens Axboe   writeback: get ri...
1216
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
1217
1218
  	DECLARE_COMPLETION_ONSTACK(done);
  	struct wb_writeback_work work = {
3c4d71653   Christoph Hellwig   writeback: queue ...
1219
1220
1221
1222
  		.sb		= sb,
  		.sync_mode	= WB_SYNC_ALL,
  		.nr_pages	= LONG_MAX,
  		.range_cyclic	= 0,
83ba7b071   Christoph Hellwig   writeback: simpli...
1223
  		.done		= &done,
3c4d71653   Christoph Hellwig   writeback: queue ...
1224
  	};
cf37e9724   Christoph Hellwig   writeback: enforc...
1225
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
83ba7b071   Christoph Hellwig   writeback: simpli...
1226
1227
  	bdi_queue_work(sb->s_bdi, &work);
  	wait_for_completion(&done);
b6e51316d   Jens Axboe   writeback: separa...
1228
  	wait_sb_inodes(sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1229
  }
d8a8559cd   Jens Axboe   writeback: get ri...
1230
  EXPORT_SYMBOL(sync_inodes_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1231

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1232
  /**
7f04c26d7   Andrea Arcangeli   [PATCH] fix nr_un...
1233
1234
1235
1236
1237
1238
   * write_inode_now	-	write an inode to disk
   * @inode: inode to write to disk
   * @sync: whether the write should be synchronous or not
   *
   * This function commits an inode to disk immediately if it is dirty. This is
   * primarily needed by knfsd.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1239
   *
7f04c26d7   Andrea Arcangeli   [PATCH] fix nr_un...
1240
   * The caller must either have a ref on the inode or must have set I_WILL_FREE.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1241
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1242
1243
1244
1245
1246
  int write_inode_now(struct inode *inode, int sync)
  {
  	int ret;
  	struct writeback_control wbc = {
  		.nr_to_write = LONG_MAX,
18914b188   Mike Galbraith   write_inode_now()...
1247
  		.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
111ebb6e6   OGAWA Hirofumi   [PATCH] writeback...
1248
1249
  		.range_start = 0,
  		.range_end = LLONG_MAX,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1250
1251
1252
  	};
  
  	if (!mapping_cap_writeback_dirty(inode->i_mapping))
49364ce25   Andrew Morton   [PATCH] write_ino...
1253
  		wbc.nr_to_write = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1254
1255
  
  	might_sleep();
a66979aba   Dave Chinner   fs: move i_wb_lis...
1256
  	spin_lock(&inode_wb_list_lock);
0f1b1fd86   Dave Chinner   fs: pull inode->i...
1257
  	spin_lock(&inode->i_lock);
01c031945   Christoph Hellwig   cleanup __writeba...
1258
  	ret = writeback_single_inode(inode, &wbc);
0f1b1fd86   Dave Chinner   fs: pull inode->i...
1259
  	spin_unlock(&inode->i_lock);
a66979aba   Dave Chinner   fs: move i_wb_lis...
1260
  	spin_unlock(&inode_wb_list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1261
  	if (sync)
1c0eeaf56   Joern Engel   introduce I_SYNC
1262
  		inode_sync_wait(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
  	return ret;
  }
  EXPORT_SYMBOL(write_inode_now);
  
  /**
   * sync_inode - write an inode and its pages to disk.
   * @inode: the inode to sync
   * @wbc: controls the writeback mode
   *
   * sync_inode() will write an inode and its pages to disk.  It will also
   * correctly update the inode on its superblock's dirty inode lists and will
   * update inode->i_state.
   *
   * The caller must have a ref on the inode.
   */
  int sync_inode(struct inode *inode, struct writeback_control *wbc)
  {
  	int ret;
a66979aba   Dave Chinner   fs: move i_wb_lis...
1281
  	spin_lock(&inode_wb_list_lock);
0f1b1fd86   Dave Chinner   fs: pull inode->i...
1282
  	spin_lock(&inode->i_lock);
01c031945   Christoph Hellwig   cleanup __writeba...
1283
  	ret = writeback_single_inode(inode, wbc);
0f1b1fd86   Dave Chinner   fs: pull inode->i...
1284
  	spin_unlock(&inode->i_lock);
a66979aba   Dave Chinner   fs: move i_wb_lis...
1285
  	spin_unlock(&inode_wb_list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1286
1287
1288
  	return ret;
  }
  EXPORT_SYMBOL(sync_inode);
c37650161   Christoph Hellwig   fs: add sync_inod...
1289
1290
  
  /**
c691b9d98   Andrew Morton   sync_inode_metada...
1291
   * sync_inode_metadata - write an inode to disk
c37650161   Christoph Hellwig   fs: add sync_inod...
1292
1293
1294
   * @inode: the inode to sync
   * @wait: wait for I/O to complete.
   *
c691b9d98   Andrew Morton   sync_inode_metada...
1295
   * Write an inode to disk and adjust its dirty state after completion.
c37650161   Christoph Hellwig   fs: add sync_inod...
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
   *
   * Note: only writes the actual inode, no associated data or other metadata.
   */
  int sync_inode_metadata(struct inode *inode, int wait)
  {
  	struct writeback_control wbc = {
  		.sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
  		.nr_to_write = 0, /* metadata-only */
  	};
  
  	return sync_inode(inode, &wbc);
  }
  EXPORT_SYMBOL(sync_inode_metadata);