Blame view

fs/fs-writeback.c 39.6 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
  /*
   * fs/fs-writeback.c
   *
   * Copyright (C) 2002, Linus Torvalds.
   *
   * Contains all the functions related to writing back and waiting
   * upon dirty inodes against superblocks, and writing back dirty
   * pages against inodes.  ie: data writeback.  Writeout of the
   * inode itself is not handled here.
   *
e1f8e8744   Francois Cami   Remove Andrew Mor...
11
   * 10Apr2002	Andrew Morton
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
15
16
   *		Split out of fs/inode.c
   *		Additions for address_space-based writeback
   */
  
  #include <linux/kernel.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
17
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include <linux/spinlock.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
19
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
21
22
  #include <linux/sched.h>
  #include <linux/fs.h>
  #include <linux/mm.h>
bc31b86a5   Wu Fengguang   writeback: move M...
23
  #include <linux/pagemap.h>
03ba3782e   Jens Axboe   writeback: switch...
24
  #include <linux/kthread.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
  #include <linux/writeback.h>
  #include <linux/blkdev.h>
  #include <linux/backing-dev.h>
455b28646   Dave Chinner   writeback: Initia...
28
  #include <linux/tracepoint.h>
719ea2fbb   Al Viro   new helpers: lock...
29
  #include <linux/device.h>
07f3f05c1   David Howells   [PATCH] BLOCK: Mo...
30
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31

d0bceac74   Jens Axboe   writeback: get ri...
32
  /*
bc31b86a5   Wu Fengguang   writeback: move M...
33
34
35
36
37
   * 4MB minimal write chunk size
   */
  #define MIN_WRITEBACK_PAGES	(4096UL >> (PAGE_CACHE_SHIFT - 10))
  
  /*
c4a77a6c7   Jens Axboe   writeback: make w...
38
39
   * Passed into wb_writeback(), essentially a subset of writeback_control
   */
83ba7b071   Christoph Hellwig   writeback: simpli...
40
  struct wb_writeback_work {
c4a77a6c7   Jens Axboe   writeback: make w...
41
42
  	long nr_pages;
  	struct super_block *sb;
0dc83bd30   Jan Kara   Revert "writeback...
43
  	unsigned long *older_than_this;
c4a77a6c7   Jens Axboe   writeback: make w...
44
  	enum writeback_sync_modes sync_mode;
6e6938b6d   Wu Fengguang   writeback: introd...
45
  	unsigned int tagged_writepages:1;
52957fe1c   H Hartley Sweeten   fs-writeback.c: b...
46
47
48
  	unsigned int for_kupdate:1;
  	unsigned int range_cyclic:1;
  	unsigned int for_background:1;
7747bd4bc   Dave Chinner   sync: don't block...
49
  	unsigned int for_sync:1;	/* sync(2) WB_SYNC_ALL writeback */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
50
  	enum wb_reason reason;		/* why was writeback initiated? */
c4a77a6c7   Jens Axboe   writeback: make w...
51

8010c3b63   Jens Axboe   writeback: add co...
52
  	struct list_head list;		/* pending work list */
83ba7b071   Christoph Hellwig   writeback: simpli...
53
  	struct completion *done;	/* set if the caller waits */
03ba3782e   Jens Axboe   writeback: switch...
54
  };
f11b00f3b   Adrian Bunk   fs/fs-writeback.c...
55
56
57
58
  /**
   * writeback_in_progress - determine whether there is writeback in progress
   * @bdi: the device's backing_dev_info structure.
   *
03ba3782e   Jens Axboe   writeback: switch...
59
60
   * Determine whether there is writeback waiting to be handled against a
   * backing device.
f11b00f3b   Adrian Bunk   fs/fs-writeback.c...
61
62
63
   */
  int writeback_in_progress(struct backing_dev_info *bdi)
  {
81d73a32d   Jan Kara   mm: fix writeback...
64
  	return test_bit(BDI_writeback_running, &bdi->state);
f11b00f3b   Adrian Bunk   fs/fs-writeback.c...
65
  }
00d4e7362   Theodore Ts'o   ext4: fix potenti...
66
  EXPORT_SYMBOL(writeback_in_progress);
f11b00f3b   Adrian Bunk   fs/fs-writeback.c...
67

692ebd17c   Jan Kara   bdi: Fix warnings...
68
69
70
  static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
  {
  	struct super_block *sb = inode->i_sb;
692ebd17c   Jan Kara   bdi: Fix warnings...
71

a8855990e   Jan Kara   writeback: Do not...
72
  	if (sb_is_blkdev_sb(sb))
aaead25b9   Christoph Hellwig   writeback: always...
73
74
75
  		return inode->i_mapping->backing_dev_info;
  
  	return sb->s_bdi;
692ebd17c   Jan Kara   bdi: Fix warnings...
76
  }
7ccf19a80   Nick Piggin   fs: inode split I...
77
78
79
80
  static inline struct inode *wb_inode(struct list_head *head)
  {
  	return list_entry(head, struct inode, i_wb_list);
  }
15eb77a07   Wu Fengguang   writeback: fix NU...
81
82
83
84
85
86
87
  /*
   * Include the creation of the trace points after defining the
   * wb_writeback_work structure and inline functions so that the definition
   * remains local to this file.
   */
  #define CREATE_TRACE_POINTS
  #include <trace/events/writeback.h>
774016b2d   Steven Whitehouse   GFS2: journal dat...
88
  EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepage);
5acda9d12   Jan Kara   bdi: avoid oops o...
89
90
91
92
93
94
95
  static void bdi_wakeup_thread(struct backing_dev_info *bdi)
  {
  	spin_lock_bh(&bdi->wb_lock);
  	if (test_bit(BDI_registered, &bdi->state))
  		mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
  	spin_unlock_bh(&bdi->wb_lock);
  }
6585027a5   Jan Kara   writeback: integr...
96
97
98
99
100
101
  static void bdi_queue_work(struct backing_dev_info *bdi,
  			   struct wb_writeback_work *work)
  {
  	trace_writeback_queue(bdi, work);
  
  	spin_lock_bh(&bdi->wb_lock);
5acda9d12   Jan Kara   bdi: avoid oops o...
102
103
104
105
106
  	if (!test_bit(BDI_registered, &bdi->state)) {
  		if (work->done)
  			complete(work->done);
  		goto out_unlock;
  	}
6585027a5   Jan Kara   writeback: integr...
107
  	list_add_tail(&work->list, &bdi->work_list);
839a8e866   Tejun Heo   writeback: replac...
108
  	mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
5acda9d12   Jan Kara   bdi: avoid oops o...
109
110
  out_unlock:
  	spin_unlock_bh(&bdi->wb_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
111
  }
83ba7b071   Christoph Hellwig   writeback: simpli...
112
113
  static void
  __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
114
  		      bool range_cyclic, enum wb_reason reason)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
115
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
116
  	struct wb_writeback_work *work;
03ba3782e   Jens Axboe   writeback: switch...
117

bcddc3f01   Jens Axboe   writeback: inline...
118
119
120
121
  	/*
  	 * This is WB_SYNC_NONE writeback, so if allocation fails just
  	 * wakeup the thread for old dirty data writeback
  	 */
83ba7b071   Christoph Hellwig   writeback: simpli...
122
123
  	work = kzalloc(sizeof(*work), GFP_ATOMIC);
  	if (!work) {
839a8e866   Tejun Heo   writeback: replac...
124
  		trace_writeback_nowork(bdi);
5acda9d12   Jan Kara   bdi: avoid oops o...
125
  		bdi_wakeup_thread(bdi);
83ba7b071   Christoph Hellwig   writeback: simpli...
126
  		return;
bcddc3f01   Jens Axboe   writeback: inline...
127
  	}
03ba3782e   Jens Axboe   writeback: switch...
128

83ba7b071   Christoph Hellwig   writeback: simpli...
129
130
131
  	work->sync_mode	= WB_SYNC_NONE;
  	work->nr_pages	= nr_pages;
  	work->range_cyclic = range_cyclic;
0e175a183   Curt Wohlgemuth   writeback: Add a ...
132
  	work->reason	= reason;
03ba3782e   Jens Axboe   writeback: switch...
133

83ba7b071   Christoph Hellwig   writeback: simpli...
134
  	bdi_queue_work(bdi, work);
b6e51316d   Jens Axboe   writeback: separa...
135
136
137
138
139
140
  }
  
  /**
   * bdi_start_writeback - start writeback
   * @bdi: the backing device to write from
   * @nr_pages: the number of pages to write
786228ab3   Marcos Paulo de Souza   writeback: Fix is...
141
   * @reason: reason why some writeback work was initiated
b6e51316d   Jens Axboe   writeback: separa...
142
143
144
   *
   * Description:
   *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
25985edce   Lucas De Marchi   Fix common misspe...
145
   *   started when this function returns, we make no guarantees on
0e3c9a228   Jens Axboe   Revert "writeback...
146
   *   completion. Caller need not hold sb s_umount semaphore.
b6e51316d   Jens Axboe   writeback: separa...
147
148
   *
   */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
149
150
  void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
  			enum wb_reason reason)
b6e51316d   Jens Axboe   writeback: separa...
151
  {
0e175a183   Curt Wohlgemuth   writeback: Add a ...
152
  	__bdi_start_writeback(bdi, nr_pages, true, reason);
c5444198c   Christoph Hellwig   writeback: simpli...
153
  }
d3ddec763   Wu Fengguang   writeback: stop b...
154

c5444198c   Christoph Hellwig   writeback: simpli...
155
156
157
158
159
  /**
   * bdi_start_background_writeback - start background writeback
   * @bdi: the backing device to write from
   *
   * Description:
6585027a5   Jan Kara   writeback: integr...
160
161
162
163
   *   This makes sure WB_SYNC_NONE background writeback happens. When
   *   this function returns, it is only guaranteed that for given BDI
   *   some IO is happening if we are over background dirty threshold.
   *   Caller need not hold sb s_umount semaphore.
c5444198c   Christoph Hellwig   writeback: simpli...
164
165
166
   */
  void bdi_start_background_writeback(struct backing_dev_info *bdi)
  {
6585027a5   Jan Kara   writeback: integr...
167
168
169
170
  	/*
  	 * We just wake up the flusher thread. It will perform background
  	 * writeback as soon as there is no other work to do.
  	 */
71927e84e   Wu Fengguang   writeback: trace ...
171
  	trace_writeback_wake_background(bdi);
5acda9d12   Jan Kara   bdi: avoid oops o...
172
  	bdi_wakeup_thread(bdi);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
173
174
175
  }
  
  /*
a66979aba   Dave Chinner   fs: move i_wb_lis...
176
177
178
179
   * Remove the inode from the writeback list it is on.
   */
  void inode_wb_list_del(struct inode *inode)
  {
f758eeabe   Christoph Hellwig   writeback: split ...
180
181
182
  	struct backing_dev_info *bdi = inode_to_bdi(inode);
  
  	spin_lock(&bdi->wb.list_lock);
a66979aba   Dave Chinner   fs: move i_wb_lis...
183
  	list_del_init(&inode->i_wb_list);
f758eeabe   Christoph Hellwig   writeback: split ...
184
  	spin_unlock(&bdi->wb.list_lock);
a66979aba   Dave Chinner   fs: move i_wb_lis...
185
  }
a66979aba   Dave Chinner   fs: move i_wb_lis...
186
  /*
6610a0bc8   Andrew Morton   writeback: fix ti...
187
188
189
190
   * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
   * furthest end of its superblock's dirty-inode list.
   *
   * Before stamping the inode's ->dirtied_when, we check to see whether it is
66f3b8e2e   Jens Axboe   writeback: move d...
191
   * already the most-recently-dirtied inode on the b_dirty list.  If that is
6610a0bc8   Andrew Morton   writeback: fix ti...
192
193
194
   * the case then the inode must have been redirtied while it was being written
   * out and we don't reset its dirtied_when.
   */
f758eeabe   Christoph Hellwig   writeback: split ...
195
  static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
6610a0bc8   Andrew Morton   writeback: fix ti...
196
  {
f758eeabe   Christoph Hellwig   writeback: split ...
197
  	assert_spin_locked(&wb->list_lock);
03ba3782e   Jens Axboe   writeback: switch...
198
  	if (!list_empty(&wb->b_dirty)) {
66f3b8e2e   Jens Axboe   writeback: move d...
199
  		struct inode *tail;
6610a0bc8   Andrew Morton   writeback: fix ti...
200

7ccf19a80   Nick Piggin   fs: inode split I...
201
  		tail = wb_inode(wb->b_dirty.next);
66f3b8e2e   Jens Axboe   writeback: move d...
202
  		if (time_before(inode->dirtied_when, tail->dirtied_when))
6610a0bc8   Andrew Morton   writeback: fix ti...
203
204
  			inode->dirtied_when = jiffies;
  	}
7ccf19a80   Nick Piggin   fs: inode split I...
205
  	list_move(&inode->i_wb_list, &wb->b_dirty);
6610a0bc8   Andrew Morton   writeback: fix ti...
206
207
208
  }
  
  /*
66f3b8e2e   Jens Axboe   writeback: move d...
209
   * requeue inode for re-scanning after bdi->b_io list is exhausted.
c986d1e2a   Andrew Morton   writeback: fix ti...
210
   */
f758eeabe   Christoph Hellwig   writeback: split ...
211
  static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
c986d1e2a   Andrew Morton   writeback: fix ti...
212
  {
f758eeabe   Christoph Hellwig   writeback: split ...
213
  	assert_spin_locked(&wb->list_lock);
7ccf19a80   Nick Piggin   fs: inode split I...
214
  	list_move(&inode->i_wb_list, &wb->b_more_io);
c986d1e2a   Andrew Morton   writeback: fix ti...
215
  }
1c0eeaf56   Joern Engel   introduce I_SYNC
216
217
  static void inode_sync_complete(struct inode *inode)
  {
365b94ae6   Jan Kara   writeback: Move c...
218
  	inode->i_state &= ~I_SYNC;
4eff96dd5   Jan Kara   writeback: put un...
219
220
  	/* If inode is clean an unused, put it into LRU now... */
  	inode_add_lru(inode);
365b94ae6   Jan Kara   writeback: Move c...
221
  	/* Waiters must see I_SYNC cleared before being woken up */
1c0eeaf56   Joern Engel   introduce I_SYNC
222
223
224
  	smp_mb();
  	wake_up_bit(&inode->i_state, __I_SYNC);
  }
d2caa3c54   Jeff Layton   writeback: guard ...
225
226
227
228
229
230
231
232
  static bool inode_dirtied_after(struct inode *inode, unsigned long t)
  {
  	bool ret = time_after(inode->dirtied_when, t);
  #ifndef CONFIG_64BIT
  	/*
  	 * For inodes being constantly redirtied, dirtied_when can get stuck.
  	 * It _appears_ to be in the future, but is actually in distant past.
  	 * This test is necessary to prevent such wrapped-around relative times
5b0830cb9   Jens Axboe   writeback: get ri...
233
  	 * from permanently stopping the whole bdi writeback.
d2caa3c54   Jeff Layton   writeback: guard ...
234
235
236
237
238
  	 */
  	ret = ret && time_before_eq(inode->dirtied_when, jiffies);
  #endif
  	return ret;
  }
c986d1e2a   Andrew Morton   writeback: fix ti...
239
  /*
0e2f2b236   Wang Sheng-Hui   writeback: correc...
240
   * Move expired (dirtied before work->older_than_this) dirty inodes from
697e6fed9   Jan Kara   writeback: Remove...
241
   * @delaying_queue to @dispatch_queue.
2c1365791   Fengguang Wu   writeback: fix ti...
242
   */
e84d0a4f8   Wu Fengguang   writeback: trace ...
243
  static int move_expired_inodes(struct list_head *delaying_queue,
2c1365791   Fengguang Wu   writeback: fix ti...
244
  			       struct list_head *dispatch_queue,
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
245
  			       struct wb_writeback_work *work)
2c1365791   Fengguang Wu   writeback: fix ti...
246
  {
5c03449d3   Shaohua Li   writeback: move i...
247
248
  	LIST_HEAD(tmp);
  	struct list_head *pos, *node;
cf137307c   Jens Axboe   writeback: don't ...
249
  	struct super_block *sb = NULL;
5c03449d3   Shaohua Li   writeback: move i...
250
  	struct inode *inode;
cf137307c   Jens Axboe   writeback: don't ...
251
  	int do_sb_sort = 0;
e84d0a4f8   Wu Fengguang   writeback: trace ...
252
  	int moved = 0;
5c03449d3   Shaohua Li   writeback: move i...
253

2c1365791   Fengguang Wu   writeback: fix ti...
254
  	while (!list_empty(delaying_queue)) {
7ccf19a80   Nick Piggin   fs: inode split I...
255
  		inode = wb_inode(delaying_queue->prev);
0dc83bd30   Jan Kara   Revert "writeback...
256
257
  		if (work->older_than_this &&
  		    inode_dirtied_after(inode, *work->older_than_this))
2c1365791   Fengguang Wu   writeback: fix ti...
258
  			break;
a8855990e   Jan Kara   writeback: Do not...
259
260
261
262
  		list_move(&inode->i_wb_list, &tmp);
  		moved++;
  		if (sb_is_blkdev_sb(inode->i_sb))
  			continue;
cf137307c   Jens Axboe   writeback: don't ...
263
264
265
  		if (sb && sb != inode->i_sb)
  			do_sb_sort = 1;
  		sb = inode->i_sb;
5c03449d3   Shaohua Li   writeback: move i...
266
  	}
cf137307c   Jens Axboe   writeback: don't ...
267
268
269
  	/* just one sb in list, splice to dispatch_queue and we're done */
  	if (!do_sb_sort) {
  		list_splice(&tmp, dispatch_queue);
e84d0a4f8   Wu Fengguang   writeback: trace ...
270
  		goto out;
cf137307c   Jens Axboe   writeback: don't ...
271
  	}
5c03449d3   Shaohua Li   writeback: move i...
272
273
  	/* Move inodes from one superblock together */
  	while (!list_empty(&tmp)) {
7ccf19a80   Nick Piggin   fs: inode split I...
274
  		sb = wb_inode(tmp.prev)->i_sb;
5c03449d3   Shaohua Li   writeback: move i...
275
  		list_for_each_prev_safe(pos, node, &tmp) {
7ccf19a80   Nick Piggin   fs: inode split I...
276
  			inode = wb_inode(pos);
5c03449d3   Shaohua Li   writeback: move i...
277
  			if (inode->i_sb == sb)
7ccf19a80   Nick Piggin   fs: inode split I...
278
  				list_move(&inode->i_wb_list, dispatch_queue);
5c03449d3   Shaohua Li   writeback: move i...
279
  		}
2c1365791   Fengguang Wu   writeback: fix ti...
280
  	}
e84d0a4f8   Wu Fengguang   writeback: trace ...
281
282
  out:
  	return moved;
2c1365791   Fengguang Wu   writeback: fix ti...
283
284
285
286
  }
  
  /*
   * Queue all expired dirty inodes for io, eldest first.
4ea879b96   Wu Fengguang   writeback: fix qu...
287
288
289
290
291
292
293
294
   * Before
   *         newly dirtied     b_dirty    b_io    b_more_io
   *         =============>    gf         edc     BA
   * After
   *         newly dirtied     b_dirty    b_io    b_more_io
   *         =============>    g          fBAedc
   *                                           |
   *                                           +--> dequeue for IO
2c1365791   Fengguang Wu   writeback: fix ti...
295
   */
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
296
  static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
66f3b8e2e   Jens Axboe   writeback: move d...
297
  {
e84d0a4f8   Wu Fengguang   writeback: trace ...
298
  	int moved;
f758eeabe   Christoph Hellwig   writeback: split ...
299
  	assert_spin_locked(&wb->list_lock);
4ea879b96   Wu Fengguang   writeback: fix qu...
300
  	list_splice_init(&wb->b_more_io, &wb->b_io);
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
301
302
  	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work);
  	trace_writeback_queue_io(wb, work, moved);
66f3b8e2e   Jens Axboe   writeback: move d...
303
  }
a9185b41a   Christoph Hellwig   pass writeback_co...
304
  static int write_inode(struct inode *inode, struct writeback_control *wbc)
08d8e9749   Fengguang Wu   writeback: fix nt...
305
  {
9fb0a7da0   Tejun Heo   writeback: add mo...
306
307
308
309
310
311
312
313
  	int ret;
  
  	if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) {
  		trace_writeback_write_inode_start(inode, wbc);
  		ret = inode->i_sb->s_op->write_inode(inode, wbc);
  		trace_writeback_write_inode(inode, wbc);
  		return ret;
  	}
03ba3782e   Jens Axboe   writeback: switch...
314
  	return 0;
08d8e9749   Fengguang Wu   writeback: fix nt...
315
  }
08d8e9749   Fengguang Wu   writeback: fix nt...
316

2c1365791   Fengguang Wu   writeback: fix ti...
317
  /*
169ebd901   Jan Kara   writeback: Avoid ...
318
319
   * Wait for writeback on an inode to complete. Called with i_lock held.
   * Caller must make sure inode cannot go away when we drop i_lock.
01c031945   Christoph Hellwig   cleanup __writeba...
320
   */
169ebd901   Jan Kara   writeback: Avoid ...
321
322
323
  static void __inode_wait_for_writeback(struct inode *inode)
  	__releases(inode->i_lock)
  	__acquires(inode->i_lock)
01c031945   Christoph Hellwig   cleanup __writeba...
324
325
326
327
328
  {
  	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
  	wait_queue_head_t *wqh;
  
  	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
250df6ed2   Dave Chinner   fs: protect inode...
329
330
  	while (inode->i_state & I_SYNC) {
  		spin_unlock(&inode->i_lock);
01c031945   Christoph Hellwig   cleanup __writeba...
331
  		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
250df6ed2   Dave Chinner   fs: protect inode...
332
  		spin_lock(&inode->i_lock);
58a9d3d8d   Richard Kennedy   fs-writeback: che...
333
  	}
01c031945   Christoph Hellwig   cleanup __writeba...
334
335
336
  }
  
  /*
169ebd901   Jan Kara   writeback: Avoid ...
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
   * Wait for writeback on an inode to complete. Caller must have inode pinned.
   */
  void inode_wait_for_writeback(struct inode *inode)
  {
  	spin_lock(&inode->i_lock);
  	__inode_wait_for_writeback(inode);
  	spin_unlock(&inode->i_lock);
  }
  
  /*
   * Sleep until I_SYNC is cleared. This function must be called with i_lock
   * held and drops it. It is aimed for callers not holding any inode reference
   * so once i_lock is dropped, inode can go away.
   */
  static void inode_sleep_on_writeback(struct inode *inode)
  	__releases(inode->i_lock)
  {
  	DEFINE_WAIT(wait);
  	wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
  	int sleep;
  
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	sleep = inode->i_state & I_SYNC;
  	spin_unlock(&inode->i_lock);
  	if (sleep)
  		schedule();
  	finish_wait(wqh, &wait);
  }
  
  /*
ccb26b5a6   Jan Kara   writeback: Separa...
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
   * Find proper writeback list for the inode depending on its current state and
   * possibly also change of its state while we were doing writeback.  Here we
   * handle things such as livelock prevention or fairness of writeback among
   * inodes. This function can be called only by flusher thread - noone else
   * processes all inodes in writeback lists and requeueing inodes behind flusher
   * thread's back can have unexpected consequences.
   */
  static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
  			  struct writeback_control *wbc)
  {
  	if (inode->i_state & I_FREEING)
  		return;
  
  	/*
  	 * Sync livelock prevention. Each inode is tagged and synced in one
  	 * shot. If still dirty, it will be redirty_tail()'ed below.  Update
  	 * the dirty time to prevent enqueue and sync it again.
  	 */
  	if ((inode->i_state & I_DIRTY) &&
  	    (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
  		inode->dirtied_when = jiffies;
4f8ad655d   Jan Kara   writeback: Refact...
388
389
390
391
392
393
394
395
  	if (wbc->pages_skipped) {
  		/*
  		 * writeback is not making progress due to locked
  		 * buffers. Skip this inode for now.
  		 */
  		redirty_tail(inode, wb);
  		return;
  	}
ccb26b5a6   Jan Kara   writeback: Separa...
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
  	if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
  		/*
  		 * We didn't write back all the pages.  nfs_writepages()
  		 * sometimes bales out without doing anything.
  		 */
  		if (wbc->nr_to_write <= 0) {
  			/* Slice used up. Queue for next turn. */
  			requeue_io(inode, wb);
  		} else {
  			/*
  			 * Writeback blocked by something other than
  			 * congestion. Delay the inode for some time to
  			 * avoid spinning on the CPU (100% iowait)
  			 * retrying writeback of the dirty page/inode
  			 * that cannot be performed immediately.
  			 */
  			redirty_tail(inode, wb);
  		}
  	} else if (inode->i_state & I_DIRTY) {
  		/*
  		 * Filesystems can dirty the inode during writeback operations,
  		 * such as delayed allocation during submission or metadata
  		 * updates after data IO completion.
  		 */
  		redirty_tail(inode, wb);
  	} else {
  		/* The inode is clean. Remove from writeback lists. */
  		list_del_init(&inode->i_wb_list);
  	}
  }
  
  /*
4f8ad655d   Jan Kara   writeback: Refact...
428
429
430
   * Write out an inode and its dirty pages. Do not update the writeback list
   * linkage. That is left to the caller. The caller is also responsible for
   * setting I_SYNC flag and calling inode_sync_complete() to clear it.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
431
432
   */
  static int
cd8ed2a45   Yan Hong   fs/fs-writeback.c...
433
  __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
434
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
435
  	struct address_space *mapping = inode->i_mapping;
251d6a471   Wu Fengguang   writeback: trace ...
436
  	long nr_to_write = wbc->nr_to_write;
01c031945   Christoph Hellwig   cleanup __writeba...
437
  	unsigned dirty;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
438
  	int ret;
4f8ad655d   Jan Kara   writeback: Refact...
439
  	WARN_ON(!(inode->i_state & I_SYNC));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
440

9fb0a7da0   Tejun Heo   writeback: add mo...
441
  	trace_writeback_single_inode_start(inode, wbc, nr_to_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
442
  	ret = do_writepages(mapping, wbc);
26821ed40   Christoph Hellwig   make sure data is...
443
444
445
  	/*
  	 * Make sure to wait on the data before writing out the metadata.
  	 * This is important for filesystems that modify metadata on data
7747bd4bc   Dave Chinner   sync: don't block...
446
447
448
  	 * I/O completion. We don't do it for sync(2) writeback because it has a
  	 * separate, external IO completion path and ->sync_fs for guaranteeing
  	 * inode metadata is written back correctly.
26821ed40   Christoph Hellwig   make sure data is...
449
  	 */
7747bd4bc   Dave Chinner   sync: don't block...
450
  	if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) {
26821ed40   Christoph Hellwig   make sure data is...
451
  		int err = filemap_fdatawait(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
452
453
454
  		if (ret == 0)
  			ret = err;
  	}
5547e8aac   Dmitry Monakhov   writeback: Update...
455
456
457
458
459
  	/*
  	 * Some filesystems may redirty the inode during the writeback
  	 * due to delalloc, clear dirty metadata flags right before
  	 * write_inode()
  	 */
250df6ed2   Dave Chinner   fs: protect inode...
460
  	spin_lock(&inode->i_lock);
6290be1c1   Jan Kara   writeback: Move I...
461
462
463
  	/* Clear I_DIRTY_PAGES if we've written out all dirty pages */
  	if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
  		inode->i_state &= ~I_DIRTY_PAGES;
5547e8aac   Dmitry Monakhov   writeback: Update...
464
465
  	dirty = inode->i_state & I_DIRTY;
  	inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
250df6ed2   Dave Chinner   fs: protect inode...
466
  	spin_unlock(&inode->i_lock);
26821ed40   Christoph Hellwig   make sure data is...
467
468
  	/* Don't write the inode if only I_DIRTY_PAGES was set */
  	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
a9185b41a   Christoph Hellwig   pass writeback_co...
469
  		int err = write_inode(inode, wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
470
471
472
  		if (ret == 0)
  			ret = err;
  	}
4f8ad655d   Jan Kara   writeback: Refact...
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
  	trace_writeback_single_inode(inode, wbc, nr_to_write);
  	return ret;
  }
  
  /*
   * Write out an inode's dirty pages. Either the caller has an active reference
   * on the inode or the inode has I_WILL_FREE set.
   *
   * This function is designed to be called for writing back one inode which
   * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
   * and does more profound writeback list handling in writeback_sb_inodes().
   */
  static int
  writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
  		       struct writeback_control *wbc)
  {
  	int ret = 0;
  
  	spin_lock(&inode->i_lock);
  	if (!atomic_read(&inode->i_count))
  		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
  	else
  		WARN_ON(inode->i_state & I_WILL_FREE);
  
  	if (inode->i_state & I_SYNC) {
  		if (wbc->sync_mode != WB_SYNC_ALL)
  			goto out;
  		/*
169ebd901   Jan Kara   writeback: Avoid ...
501
502
503
  		 * It's a data-integrity sync. We must wait. Since callers hold
  		 * inode reference or inode has I_WILL_FREE set, it cannot go
  		 * away under us.
4f8ad655d   Jan Kara   writeback: Refact...
504
  		 */
169ebd901   Jan Kara   writeback: Avoid ...
505
  		__inode_wait_for_writeback(inode);
4f8ad655d   Jan Kara   writeback: Refact...
506
507
508
  	}
  	WARN_ON(inode->i_state & I_SYNC);
  	/*
f9b0e058c   Jan Kara   writeback: Fix da...
509
510
511
512
513
514
  	 * Skip inode if it is clean and we have no outstanding writeback in
  	 * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this
  	 * function since flusher thread may be doing for example sync in
  	 * parallel and if we move the inode, it could get skipped. So here we
  	 * make sure inode is on some writeback list and leave it there unless
  	 * we have completely cleaned the inode.
4f8ad655d   Jan Kara   writeback: Refact...
515
  	 */
f9b0e058c   Jan Kara   writeback: Fix da...
516
517
518
  	if (!(inode->i_state & I_DIRTY) &&
  	    (wbc->sync_mode != WB_SYNC_ALL ||
  	     !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
4f8ad655d   Jan Kara   writeback: Refact...
519
520
521
  		goto out;
  	inode->i_state |= I_SYNC;
  	spin_unlock(&inode->i_lock);
cd8ed2a45   Yan Hong   fs/fs-writeback.c...
522
  	ret = __writeback_single_inode(inode, wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
523

f758eeabe   Christoph Hellwig   writeback: split ...
524
  	spin_lock(&wb->list_lock);
250df6ed2   Dave Chinner   fs: protect inode...
525
  	spin_lock(&inode->i_lock);
4f8ad655d   Jan Kara   writeback: Refact...
526
527
528
529
530
531
532
  	/*
  	 * If inode is clean, remove it from writeback lists. Otherwise don't
  	 * touch it. See comment above for explanation.
  	 */
  	if (!(inode->i_state & I_DIRTY))
  		list_del_init(&inode->i_wb_list);
  	spin_unlock(&wb->list_lock);
1c0eeaf56   Joern Engel   introduce I_SYNC
533
  	inode_sync_complete(inode);
4f8ad655d   Jan Kara   writeback: Refact...
534
535
  out:
  	spin_unlock(&inode->i_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
536
537
  	return ret;
  }
1a12d8bd7   Wu Fengguang   writeback: scale ...
538
539
  static long writeback_chunk_size(struct backing_dev_info *bdi,
  				 struct wb_writeback_work *work)
d46db3d58   Wu Fengguang   writeback: make w...
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
  {
  	long pages;
  
  	/*
  	 * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
  	 * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
  	 * here avoids calling into writeback_inodes_wb() more than once.
  	 *
  	 * The intended call sequence for WB_SYNC_ALL writeback is:
  	 *
  	 *      wb_writeback()
  	 *          writeback_sb_inodes()       <== called only once
  	 *              write_cache_pages()     <== called once for each inode
  	 *                   (quickly) tag currently dirty pages
  	 *                   (maybe slowly) sync all tagged pages
  	 */
  	if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
  		pages = LONG_MAX;
1a12d8bd7   Wu Fengguang   writeback: scale ...
558
559
560
561
562
563
564
  	else {
  		pages = min(bdi->avg_write_bandwidth / 2,
  			    global_dirty_limit / DIRTY_SCOPE);
  		pages = min(pages, work->nr_pages);
  		pages = round_down(pages + MIN_WRITEBACK_PAGES,
  				   MIN_WRITEBACK_PAGES);
  	}
d46db3d58   Wu Fengguang   writeback: make w...
565
566
567
  
  	return pages;
  }
03ba3782e   Jens Axboe   writeback: switch...
568
  /*
f11c9c5c2   Edward Shishkin   vfs: improve writ...
569
   * Write a portion of b_io inodes which belong to @sb.
edadfb10b   Christoph Hellwig   writeback: split ...
570
   *
d46db3d58   Wu Fengguang   writeback: make w...
571
   * Return the number of pages and/or inodes written.
f11c9c5c2   Edward Shishkin   vfs: improve writ...
572
   */
d46db3d58   Wu Fengguang   writeback: make w...
573
574
575
  static long writeback_sb_inodes(struct super_block *sb,
  				struct bdi_writeback *wb,
  				struct wb_writeback_work *work)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
576
  {
d46db3d58   Wu Fengguang   writeback: make w...
577
578
579
580
581
  	struct writeback_control wbc = {
  		.sync_mode		= work->sync_mode,
  		.tagged_writepages	= work->tagged_writepages,
  		.for_kupdate		= work->for_kupdate,
  		.for_background		= work->for_background,
7747bd4bc   Dave Chinner   sync: don't block...
582
  		.for_sync		= work->for_sync,
d46db3d58   Wu Fengguang   writeback: make w...
583
584
585
586
587
588
589
  		.range_cyclic		= work->range_cyclic,
  		.range_start		= 0,
  		.range_end		= LLONG_MAX,
  	};
  	unsigned long start_time = jiffies;
  	long write_chunk;
  	long wrote = 0;  /* count both pages and inodes */
03ba3782e   Jens Axboe   writeback: switch...
590
  	while (!list_empty(&wb->b_io)) {
7ccf19a80   Nick Piggin   fs: inode split I...
591
  		struct inode *inode = wb_inode(wb->b_io.prev);
edadfb10b   Christoph Hellwig   writeback: split ...
592
593
  
  		if (inode->i_sb != sb) {
d46db3d58   Wu Fengguang   writeback: make w...
594
  			if (work->sb) {
edadfb10b   Christoph Hellwig   writeback: split ...
595
596
597
598
599
  				/*
  				 * We only want to write back data for this
  				 * superblock, move all inodes not belonging
  				 * to it back onto the dirty list.
  				 */
f758eeabe   Christoph Hellwig   writeback: split ...
600
  				redirty_tail(inode, wb);
edadfb10b   Christoph Hellwig   writeback: split ...
601
602
603
604
605
606
607
608
  				continue;
  			}
  
  			/*
  			 * The inode belongs to a different superblock.
  			 * Bounce back to the caller to unpin this and
  			 * pin the next superblock.
  			 */
d46db3d58   Wu Fengguang   writeback: make w...
609
  			break;
edadfb10b   Christoph Hellwig   writeback: split ...
610
  		}
9843b76aa   Christoph Hellwig   fs: skip I_FREEIN...
611
  		/*
331cbdeed   Wanpeng Li   writeback: Fix so...
612
613
  		 * Don't bother with new inodes or inodes being freed, first
  		 * kind does not need periodic writeout yet, and for the latter
9843b76aa   Christoph Hellwig   fs: skip I_FREEIN...
614
615
  		 * kind writeout is handled by the freer.
  		 */
250df6ed2   Dave Chinner   fs: protect inode...
616
  		spin_lock(&inode->i_lock);
9843b76aa   Christoph Hellwig   fs: skip I_FREEIN...
617
  		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
250df6ed2   Dave Chinner   fs: protect inode...
618
  			spin_unlock(&inode->i_lock);
fcc5c2221   Wu Fengguang   writeback: don't ...
619
  			redirty_tail(inode, wb);
7ef0d7377   Nick Piggin   fs: new inode i_s...
620
621
  			continue;
  		}
cc1676d91   Jan Kara   writeback: Move r...
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
  		if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
  			/*
  			 * If this inode is locked for writeback and we are not
  			 * doing writeback-for-data-integrity, move it to
  			 * b_more_io so that writeback can proceed with the
  			 * other inodes on s_io.
  			 *
  			 * We'll have another go at writing back this inode
  			 * when we completed a full scan of b_io.
  			 */
  			spin_unlock(&inode->i_lock);
  			requeue_io(inode, wb);
  			trace_writeback_sb_inodes_requeue(inode);
  			continue;
  		}
f0d07b7ff   Jan Kara   writeback: Remove...
637
  		spin_unlock(&wb->list_lock);
4f8ad655d   Jan Kara   writeback: Refact...
638
639
640
641
642
  		/*
  		 * We already requeued the inode if it had I_SYNC set and we
  		 * are doing WB_SYNC_NONE writeback. So this catches only the
  		 * WB_SYNC_ALL case.
  		 */
169ebd901   Jan Kara   writeback: Avoid ...
643
644
645
646
  		if (inode->i_state & I_SYNC) {
  			/* Wait for I_SYNC. This function drops i_lock... */
  			inode_sleep_on_writeback(inode);
  			/* Inode may be gone, start again */
ead188f9f   Jan Kara   writeback: Fix lo...
647
  			spin_lock(&wb->list_lock);
169ebd901   Jan Kara   writeback: Avoid ...
648
649
  			continue;
  		}
4f8ad655d   Jan Kara   writeback: Refact...
650
651
  		inode->i_state |= I_SYNC;
  		spin_unlock(&inode->i_lock);
169ebd901   Jan Kara   writeback: Avoid ...
652

1a12d8bd7   Wu Fengguang   writeback: scale ...
653
  		write_chunk = writeback_chunk_size(wb->bdi, work);
d46db3d58   Wu Fengguang   writeback: make w...
654
655
  		wbc.nr_to_write = write_chunk;
  		wbc.pages_skipped = 0;
250df6ed2   Dave Chinner   fs: protect inode...
656

169ebd901   Jan Kara   writeback: Avoid ...
657
658
659
660
  		/*
  		 * We use I_SYNC to pin the inode in memory. While it is set
  		 * evict_inode() will wait so the inode cannot be freed.
  		 */
cd8ed2a45   Yan Hong   fs/fs-writeback.c...
661
  		__writeback_single_inode(inode, &wbc);
250df6ed2   Dave Chinner   fs: protect inode...
662

d46db3d58   Wu Fengguang   writeback: make w...
663
664
  		work->nr_pages -= write_chunk - wbc.nr_to_write;
  		wrote += write_chunk - wbc.nr_to_write;
4f8ad655d   Jan Kara   writeback: Refact...
665
666
  		spin_lock(&wb->list_lock);
  		spin_lock(&inode->i_lock);
d46db3d58   Wu Fengguang   writeback: make w...
667
668
  		if (!(inode->i_state & I_DIRTY))
  			wrote++;
4f8ad655d   Jan Kara   writeback: Refact...
669
670
  		requeue_inode(inode, wb, &wbc);
  		inode_sync_complete(inode);
0f1b1fd86   Dave Chinner   fs: pull inode->i...
671
  		spin_unlock(&inode->i_lock);
169ebd901   Jan Kara   writeback: Avoid ...
672
  		cond_resched_lock(&wb->list_lock);
d46db3d58   Wu Fengguang   writeback: make w...
673
674
675
676
677
678
679
680
681
  		/*
  		 * bail out to wb_writeback() often enough to check
  		 * background threshold and other termination conditions.
  		 */
  		if (wrote) {
  			if (time_is_before_jiffies(start_time + HZ / 10UL))
  				break;
  			if (work->nr_pages <= 0)
  				break;
8bc3be275   Fengguang Wu   writeback: speed ...
682
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
683
  	}
d46db3d58   Wu Fengguang   writeback: make w...
684
  	return wrote;
f11c9c5c2   Edward Shishkin   vfs: improve writ...
685
  }
d46db3d58   Wu Fengguang   writeback: make w...
686
687
  static long __writeback_inodes_wb(struct bdi_writeback *wb,
  				  struct wb_writeback_work *work)
f11c9c5c2   Edward Shishkin   vfs: improve writ...
688
  {
d46db3d58   Wu Fengguang   writeback: make w...
689
690
  	unsigned long start_time = jiffies;
  	long wrote = 0;
38f219776   Nick Piggin   fs: sync_sb_inode...
691

f11c9c5c2   Edward Shishkin   vfs: improve writ...
692
  	while (!list_empty(&wb->b_io)) {
7ccf19a80   Nick Piggin   fs: inode split I...
693
  		struct inode *inode = wb_inode(wb->b_io.prev);
f11c9c5c2   Edward Shishkin   vfs: improve writ...
694
  		struct super_block *sb = inode->i_sb;
9ecc2738a   Jens Axboe   writeback: make t...
695

12ad3ab66   Dave Chinner   superblock: move ...
696
  		if (!grab_super_passive(sb)) {
0e995816f   Wu Fengguang   don't busy retry ...
697
698
699
700
701
702
  			/*
  			 * grab_super_passive() may fail consistently due to
  			 * s_umount being grabbed by someone else. Don't use
  			 * requeue_io() to avoid busy retrying the inode/sb.
  			 */
  			redirty_tail(inode, wb);
edadfb10b   Christoph Hellwig   writeback: split ...
703
  			continue;
f11c9c5c2   Edward Shishkin   vfs: improve writ...
704
  		}
d46db3d58   Wu Fengguang   writeback: make w...
705
  		wrote += writeback_sb_inodes(sb, wb, work);
edadfb10b   Christoph Hellwig   writeback: split ...
706
  		drop_super(sb);
f11c9c5c2   Edward Shishkin   vfs: improve writ...
707

d46db3d58   Wu Fengguang   writeback: make w...
708
709
710
711
712
713
714
  		/* refer to the same tests at the end of writeback_sb_inodes */
  		if (wrote) {
  			if (time_is_before_jiffies(start_time + HZ / 10UL))
  				break;
  			if (work->nr_pages <= 0)
  				break;
  		}
f11c9c5c2   Edward Shishkin   vfs: improve writ...
715
  	}
66f3b8e2e   Jens Axboe   writeback: move d...
716
  	/* Leave any unwritten inodes on b_io */
d46db3d58   Wu Fengguang   writeback: make w...
717
  	return wrote;
66f3b8e2e   Jens Axboe   writeback: move d...
718
  }
7d9f073b8   Wanpeng Li   mm/writeback: mak...
719
  static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
720
  				enum wb_reason reason)
edadfb10b   Christoph Hellwig   writeback: split ...
721
  {
d46db3d58   Wu Fengguang   writeback: make w...
722
723
724
725
  	struct wb_writeback_work work = {
  		.nr_pages	= nr_pages,
  		.sync_mode	= WB_SYNC_NONE,
  		.range_cyclic	= 1,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
726
  		.reason		= reason,
d46db3d58   Wu Fengguang   writeback: make w...
727
  	};
edadfb10b   Christoph Hellwig   writeback: split ...
728

f758eeabe   Christoph Hellwig   writeback: split ...
729
  	spin_lock(&wb->list_lock);
424b351fe   Wu Fengguang   writeback: refill...
730
  	if (list_empty(&wb->b_io))
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
731
  		queue_io(wb, &work);
d46db3d58   Wu Fengguang   writeback: make w...
732
  	__writeback_inodes_wb(wb, &work);
f758eeabe   Christoph Hellwig   writeback: split ...
733
  	spin_unlock(&wb->list_lock);
edadfb10b   Christoph Hellwig   writeback: split ...
734

d46db3d58   Wu Fengguang   writeback: make w...
735
736
  	return nr_pages - work.nr_pages;
  }
03ba3782e   Jens Axboe   writeback: switch...
737

b00949aa2   Wu Fengguang   writeback: per-bd...
738
  static bool over_bground_thresh(struct backing_dev_info *bdi)
03ba3782e   Jens Axboe   writeback: switch...
739
740
  {
  	unsigned long background_thresh, dirty_thresh;
16c4042f0   Wu Fengguang   writeback: avoid ...
741
  	global_dirty_limits(&background_thresh, &dirty_thresh);
03ba3782e   Jens Axboe   writeback: switch...
742

b00949aa2   Wu Fengguang   writeback: per-bd...
743
744
745
746
747
748
749
750
751
  	if (global_page_state(NR_FILE_DIRTY) +
  	    global_page_state(NR_UNSTABLE_NFS) > background_thresh)
  		return true;
  
  	if (bdi_stat(bdi, BDI_RECLAIMABLE) >
  				bdi_dirty_limit(bdi, background_thresh))
  		return true;
  
  	return false;
03ba3782e   Jens Axboe   writeback: switch...
752
753
754
  }
  
  /*
e98be2d59   Wu Fengguang   writeback: bdi wr...
755
756
757
758
759
760
   * Called under wb->list_lock. If there are multiple wb per bdi,
   * only the flusher working on the first wb should do it.
   */
  static void wb_update_bandwidth(struct bdi_writeback *wb,
  				unsigned long start_time)
  {
af6a31138   Wu Fengguang   writeback: add bg...
761
  	__bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, 0, start_time);
e98be2d59   Wu Fengguang   writeback: bdi wr...
762
763
764
  }
  
  /*
03ba3782e   Jens Axboe   writeback: switch...
765
   * Explicit flushing or periodic writeback of "old" data.
66f3b8e2e   Jens Axboe   writeback: move d...
766
   *
03ba3782e   Jens Axboe   writeback: switch...
767
768
769
770
   * Define "old": the first time one of an inode's pages is dirtied, we mark the
   * dirtying-time in the inode's address_space.  So this periodic writeback code
   * just walks the superblock inode list, writing back any inodes which are
   * older than a specific point in time.
66f3b8e2e   Jens Axboe   writeback: move d...
771
   *
03ba3782e   Jens Axboe   writeback: switch...
772
773
774
   * Try to run once per dirty_writeback_interval.  But if a writeback event
   * takes longer than a dirty_writeback_interval interval, then leave a
   * one-second gap.
66f3b8e2e   Jens Axboe   writeback: move d...
775
   *
03ba3782e   Jens Axboe   writeback: switch...
776
777
   * older_than_this takes precedence over nr_to_write.  So we'll only write back
   * all dirty pages if they are all attached to "old" mappings.
66f3b8e2e   Jens Axboe   writeback: move d...
778
   */
c4a77a6c7   Jens Axboe   writeback: make w...
779
  static long wb_writeback(struct bdi_writeback *wb,
83ba7b071   Christoph Hellwig   writeback: simpli...
780
  			 struct wb_writeback_work *work)
66f3b8e2e   Jens Axboe   writeback: move d...
781
  {
e98be2d59   Wu Fengguang   writeback: bdi wr...
782
  	unsigned long wb_start = jiffies;
d46db3d58   Wu Fengguang   writeback: make w...
783
  	long nr_pages = work->nr_pages;
0dc83bd30   Jan Kara   Revert "writeback...
784
  	unsigned long oldest_jif;
a5989bdc9   Jan Kara   fs: Fix busyloop ...
785
  	struct inode *inode;
d46db3d58   Wu Fengguang   writeback: make w...
786
  	long progress;
66f3b8e2e   Jens Axboe   writeback: move d...
787

0dc83bd30   Jan Kara   Revert "writeback...
788
789
  	oldest_jif = jiffies;
  	work->older_than_this = &oldest_jif;
38f219776   Nick Piggin   fs: sync_sb_inode...
790

e8dfc3058   Wu Fengguang   writeback: elevat...
791
  	spin_lock(&wb->list_lock);
03ba3782e   Jens Axboe   writeback: switch...
792
793
  	for (;;) {
  		/*
d3ddec763   Wu Fengguang   writeback: stop b...
794
  		 * Stop writeback when nr_pages has been consumed
03ba3782e   Jens Axboe   writeback: switch...
795
  		 */
83ba7b071   Christoph Hellwig   writeback: simpli...
796
  		if (work->nr_pages <= 0)
03ba3782e   Jens Axboe   writeback: switch...
797
  			break;
66f3b8e2e   Jens Axboe   writeback: move d...
798

38f219776   Nick Piggin   fs: sync_sb_inode...
799
  		/*
aa373cf55   Jan Kara   writeback: stop b...
800
801
802
803
804
805
806
807
808
809
  		 * Background writeout and kupdate-style writeback may
  		 * run forever. Stop them if there is other work to do
  		 * so that e.g. sync can proceed. They'll be restarted
  		 * after the other works are all done.
  		 */
  		if ((work->for_background || work->for_kupdate) &&
  		    !list_empty(&wb->bdi->work_list))
  			break;
  
  		/*
d3ddec763   Wu Fengguang   writeback: stop b...
810
811
  		 * For background writeout, stop when we are below the
  		 * background dirty threshold
38f219776   Nick Piggin   fs: sync_sb_inode...
812
  		 */
b00949aa2   Wu Fengguang   writeback: per-bd...
813
  		if (work->for_background && !over_bground_thresh(wb->bdi))
03ba3782e   Jens Axboe   writeback: switch...
814
  			break;
38f219776   Nick Piggin   fs: sync_sb_inode...
815

1bc36b642   Jan Kara   writeback: Includ...
816
817
818
819
820
821
  		/*
  		 * Kupdate and background works are special and we want to
  		 * include all inodes that need writing. Livelock avoidance is
  		 * handled by these works yielding to any other work so we are
  		 * safe.
  		 */
ba9aa8399   Wu Fengguang   writeback: the ku...
822
  		if (work->for_kupdate) {
0dc83bd30   Jan Kara   Revert "writeback...
823
  			oldest_jif = jiffies -
ba9aa8399   Wu Fengguang   writeback: the ku...
824
  				msecs_to_jiffies(dirty_expire_interval * 10);
1bc36b642   Jan Kara   writeback: Includ...
825
  		} else if (work->for_background)
0dc83bd30   Jan Kara   Revert "writeback...
826
  			oldest_jif = jiffies;
028c2dd18   Dave Chinner   writeback: Add tr...
827

d46db3d58   Wu Fengguang   writeback: make w...
828
  		trace_writeback_start(wb->bdi, work);
e8dfc3058   Wu Fengguang   writeback: elevat...
829
  		if (list_empty(&wb->b_io))
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
830
  			queue_io(wb, work);
83ba7b071   Christoph Hellwig   writeback: simpli...
831
  		if (work->sb)
d46db3d58   Wu Fengguang   writeback: make w...
832
  			progress = writeback_sb_inodes(work->sb, wb, work);
edadfb10b   Christoph Hellwig   writeback: split ...
833
  		else
d46db3d58   Wu Fengguang   writeback: make w...
834
835
  			progress = __writeback_inodes_wb(wb, work);
  		trace_writeback_written(wb->bdi, work);
028c2dd18   Dave Chinner   writeback: Add tr...
836

e98be2d59   Wu Fengguang   writeback: bdi wr...
837
  		wb_update_bandwidth(wb, wb_start);
03ba3782e   Jens Axboe   writeback: switch...
838
839
  
  		/*
e6fb6da2e   Wu Fengguang   writeback: try mo...
840
841
842
843
844
845
  		 * Did we write something? Try for more
  		 *
  		 * Dirty inodes are moved to b_io for writeback in batches.
  		 * The completion of the current batch does not necessarily
  		 * mean the overall work is done. So we keep looping as long
  		 * as made some progress on cleaning pages or inodes.
03ba3782e   Jens Axboe   writeback: switch...
846
  		 */
d46db3d58   Wu Fengguang   writeback: make w...
847
  		if (progress)
71fd05a88   Jens Axboe   writeback: improv...
848
849
  			continue;
  		/*
e6fb6da2e   Wu Fengguang   writeback: try mo...
850
  		 * No more inodes for IO, bail
71fd05a88   Jens Axboe   writeback: improv...
851
  		 */
b7a2441f9   Wu Fengguang   writeback: remove...
852
  		if (list_empty(&wb->b_more_io))
03ba3782e   Jens Axboe   writeback: switch...
853
  			break;
71fd05a88   Jens Axboe   writeback: improv...
854
  		/*
71fd05a88   Jens Axboe   writeback: improv...
855
856
857
858
  		 * Nothing written. Wait for some inode to
  		 * become available for writeback. Otherwise
  		 * we'll just busyloop.
  		 */
71fd05a88   Jens Axboe   writeback: improv...
859
  		if (!list_empty(&wb->b_more_io))  {
d46db3d58   Wu Fengguang   writeback: make w...
860
  			trace_writeback_wait(wb->bdi, work);
7ccf19a80   Nick Piggin   fs: inode split I...
861
  			inode = wb_inode(wb->b_more_io.prev);
250df6ed2   Dave Chinner   fs: protect inode...
862
  			spin_lock(&inode->i_lock);
f0d07b7ff   Jan Kara   writeback: Remove...
863
  			spin_unlock(&wb->list_lock);
169ebd901   Jan Kara   writeback: Avoid ...
864
865
  			/* This function drops i_lock... */
  			inode_sleep_on_writeback(inode);
f0d07b7ff   Jan Kara   writeback: Remove...
866
  			spin_lock(&wb->list_lock);
03ba3782e   Jens Axboe   writeback: switch...
867
868
  		}
  	}
e8dfc3058   Wu Fengguang   writeback: elevat...
869
  	spin_unlock(&wb->list_lock);
03ba3782e   Jens Axboe   writeback: switch...
870

d46db3d58   Wu Fengguang   writeback: make w...
871
  	return nr_pages - work->nr_pages;
03ba3782e   Jens Axboe   writeback: switch...
872
873
874
  }
  
  /*
83ba7b071   Christoph Hellwig   writeback: simpli...
875
   * Return the next wb_writeback_work struct that hasn't been processed yet.
03ba3782e   Jens Axboe   writeback: switch...
876
   */
83ba7b071   Christoph Hellwig   writeback: simpli...
877
  static struct wb_writeback_work *
08852b6d6   Minchan Kim   writeback: remove...
878
  get_next_work_item(struct backing_dev_info *bdi)
03ba3782e   Jens Axboe   writeback: switch...
879
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
880
  	struct wb_writeback_work *work = NULL;
03ba3782e   Jens Axboe   writeback: switch...
881

6467716a3   Artem Bityutskiy   writeback: optimi...
882
  	spin_lock_bh(&bdi->wb_lock);
83ba7b071   Christoph Hellwig   writeback: simpli...
883
884
885
886
  	if (!list_empty(&bdi->work_list)) {
  		work = list_entry(bdi->work_list.next,
  				  struct wb_writeback_work, list);
  		list_del_init(&work->list);
03ba3782e   Jens Axboe   writeback: switch...
887
  	}
6467716a3   Artem Bityutskiy   writeback: optimi...
888
  	spin_unlock_bh(&bdi->wb_lock);
83ba7b071   Christoph Hellwig   writeback: simpli...
889
  	return work;
03ba3782e   Jens Axboe   writeback: switch...
890
  }
cdf01dd54   Linus Torvalds   fs-writeback.c: u...
891
892
893
894
895
896
897
898
899
900
  /*
   * Add in the number of potentially dirty inodes, because each inode
   * write can dirty pagecache in the underlying blockdev.
   */
  static unsigned long get_nr_dirty_pages(void)
  {
  	return global_page_state(NR_FILE_DIRTY) +
  		global_page_state(NR_UNSTABLE_NFS) +
  		get_nr_dirty_inodes();
  }
6585027a5   Jan Kara   writeback: integr...
901
902
  static long wb_check_background_flush(struct bdi_writeback *wb)
  {
b00949aa2   Wu Fengguang   writeback: per-bd...
903
  	if (over_bground_thresh(wb->bdi)) {
6585027a5   Jan Kara   writeback: integr...
904
905
906
907
908
909
  
  		struct wb_writeback_work work = {
  			.nr_pages	= LONG_MAX,
  			.sync_mode	= WB_SYNC_NONE,
  			.for_background	= 1,
  			.range_cyclic	= 1,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
910
  			.reason		= WB_REASON_BACKGROUND,
6585027a5   Jan Kara   writeback: integr...
911
912
913
914
915
916
917
  		};
  
  		return wb_writeback(wb, &work);
  	}
  
  	return 0;
  }
03ba3782e   Jens Axboe   writeback: switch...
918
919
920
921
  static long wb_check_old_data_flush(struct bdi_writeback *wb)
  {
  	unsigned long expired;
  	long nr_pages;
69b62d01e   Jens Axboe   writeback: disabl...
922
923
924
925
926
  	/*
  	 * When set to zero, disable periodic writeback
  	 */
  	if (!dirty_writeback_interval)
  		return 0;
03ba3782e   Jens Axboe   writeback: switch...
927
928
929
930
931
932
  	expired = wb->last_old_flush +
  			msecs_to_jiffies(dirty_writeback_interval * 10);
  	if (time_before(jiffies, expired))
  		return 0;
  
  	wb->last_old_flush = jiffies;
cdf01dd54   Linus Torvalds   fs-writeback.c: u...
933
  	nr_pages = get_nr_dirty_pages();
03ba3782e   Jens Axboe   writeback: switch...
934

c4a77a6c7   Jens Axboe   writeback: make w...
935
  	if (nr_pages) {
83ba7b071   Christoph Hellwig   writeback: simpli...
936
  		struct wb_writeback_work work = {
c4a77a6c7   Jens Axboe   writeback: make w...
937
938
939
940
  			.nr_pages	= nr_pages,
  			.sync_mode	= WB_SYNC_NONE,
  			.for_kupdate	= 1,
  			.range_cyclic	= 1,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
941
  			.reason		= WB_REASON_PERIODIC,
c4a77a6c7   Jens Axboe   writeback: make w...
942
  		};
83ba7b071   Christoph Hellwig   writeback: simpli...
943
  		return wb_writeback(wb, &work);
c4a77a6c7   Jens Axboe   writeback: make w...
944
  	}
03ba3782e   Jens Axboe   writeback: switch...
945
946
947
948
949
950
951
  
  	return 0;
  }
  
  /*
   * Retrieve work items and do the writeback they describe
   */
25d130ba2   Wanpeng Li   mm/writeback: don...
952
  static long wb_do_writeback(struct bdi_writeback *wb)
03ba3782e   Jens Axboe   writeback: switch...
953
954
  {
  	struct backing_dev_info *bdi = wb->bdi;
83ba7b071   Christoph Hellwig   writeback: simpli...
955
  	struct wb_writeback_work *work;
c4a77a6c7   Jens Axboe   writeback: make w...
956
  	long wrote = 0;
03ba3782e   Jens Axboe   writeback: switch...
957

81d73a32d   Jan Kara   mm: fix writeback...
958
  	set_bit(BDI_writeback_running, &wb->bdi->state);
08852b6d6   Minchan Kim   writeback: remove...
959
  	while ((work = get_next_work_item(bdi)) != NULL) {
03ba3782e   Jens Axboe   writeback: switch...
960

455b28646   Dave Chinner   writeback: Initia...
961
  		trace_writeback_exec(bdi, work);
83ba7b071   Christoph Hellwig   writeback: simpli...
962
  		wrote += wb_writeback(wb, work);
03ba3782e   Jens Axboe   writeback: switch...
963
964
  
  		/*
83ba7b071   Christoph Hellwig   writeback: simpli...
965
966
  		 * Notify the caller of completion if this is a synchronous
  		 * work item, otherwise just free it.
03ba3782e   Jens Axboe   writeback: switch...
967
  		 */
83ba7b071   Christoph Hellwig   writeback: simpli...
968
969
970
971
  		if (work->done)
  			complete(work->done);
  		else
  			kfree(work);
03ba3782e   Jens Axboe   writeback: switch...
972
973
974
975
976
977
  	}
  
  	/*
  	 * Check for periodic writeback, kupdated() style
  	 */
  	wrote += wb_check_old_data_flush(wb);
6585027a5   Jan Kara   writeback: integr...
978
  	wrote += wb_check_background_flush(wb);
81d73a32d   Jan Kara   mm: fix writeback...
979
  	clear_bit(BDI_writeback_running, &wb->bdi->state);
03ba3782e   Jens Axboe   writeback: switch...
980
981
982
983
984
985
  
  	return wrote;
  }
  
  /*
   * Handle writeback of dirty data for the device backed by this bdi. Also
839a8e866   Tejun Heo   writeback: replac...
986
   * reschedules periodically and does kupdated style flushing.
03ba3782e   Jens Axboe   writeback: switch...
987
   */
839a8e866   Tejun Heo   writeback: replac...
988
  void bdi_writeback_workfn(struct work_struct *work)
03ba3782e   Jens Axboe   writeback: switch...
989
  {
839a8e866   Tejun Heo   writeback: replac...
990
991
  	struct bdi_writeback *wb = container_of(to_delayed_work(work),
  						struct bdi_writeback, dwork);
082439004   Christoph Hellwig   writeback: merge ...
992
  	struct backing_dev_info *bdi = wb->bdi;
03ba3782e   Jens Axboe   writeback: switch...
993
  	long pages_written;
ef3b10192   Tejun Heo   writeback: set wo...
994
  	set_worker_desc("flush-%s", dev_name(bdi->dev));
766f91641   Peter Zijlstra   kernel: remove PF...
995
  	current->flags |= PF_SWAPWRITE;
455b28646   Dave Chinner   writeback: Initia...
996

839a8e866   Tejun Heo   writeback: replac...
997
  	if (likely(!current_is_workqueue_rescuer() ||
5acda9d12   Jan Kara   bdi: avoid oops o...
998
  		   !test_bit(BDI_registered, &bdi->state))) {
6467716a3   Artem Bityutskiy   writeback: optimi...
999
  		/*
839a8e866   Tejun Heo   writeback: replac...
1000
1001
1002
1003
  		 * The normal path.  Keep writing back @bdi until its
  		 * work_list is empty.  Note that this path is also taken
  		 * if @bdi is shutting down even when we're running off the
  		 * rescuer as work_list needs to be drained.
6467716a3   Artem Bityutskiy   writeback: optimi...
1004
  		 */
839a8e866   Tejun Heo   writeback: replac...
1005
  		do {
25d130ba2   Wanpeng Li   mm/writeback: don...
1006
  			pages_written = wb_do_writeback(wb);
839a8e866   Tejun Heo   writeback: replac...
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
  			trace_writeback_pages_written(pages_written);
  		} while (!list_empty(&bdi->work_list));
  	} else {
  		/*
  		 * bdi_wq can't get enough workers and we're running off
  		 * the emergency worker.  Don't hog it.  Hopefully, 1024 is
  		 * enough for efficient IO.
  		 */
  		pages_written = writeback_inodes_wb(&bdi->wb, 1024,
  						    WB_REASON_FORKER_THREAD);
455b28646   Dave Chinner   writeback: Initia...
1017
  		trace_writeback_pages_written(pages_written);
03ba3782e   Jens Axboe   writeback: switch...
1018
  	}
6ca738d60   Derek Basehore   backing_dev: fix ...
1019
1020
1021
1022
  	if (!list_empty(&bdi->work_list))
  		mod_delayed_work(bdi_wq, &wb->dwork, 0);
  	else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
  		bdi_wakeup_thread_delayed(bdi);
455b28646   Dave Chinner   writeback: Initia...
1023

839a8e866   Tejun Heo   writeback: replac...
1024
  	current->flags &= ~PF_SWAPWRITE;
03ba3782e   Jens Axboe   writeback: switch...
1025
1026
1027
  }
  
  /*
b8c2f3474   Christoph Hellwig   writeback: simpli...
1028
1029
   * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
   * the whole world.
03ba3782e   Jens Axboe   writeback: switch...
1030
   */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1031
  void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
03ba3782e   Jens Axboe   writeback: switch...
1032
  {
b8c2f3474   Christoph Hellwig   writeback: simpli...
1033
  	struct backing_dev_info *bdi;
03ba3782e   Jens Axboe   writeback: switch...
1034

47df3dded   Jan Kara   writeback: fix oc...
1035
1036
  	if (!nr_pages)
  		nr_pages = get_nr_dirty_pages();
03ba3782e   Jens Axboe   writeback: switch...
1037

b8c2f3474   Christoph Hellwig   writeback: simpli...
1038
  	rcu_read_lock();
cfc4ba536   Jens Axboe   writeback: use RC...
1039
  	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
03ba3782e   Jens Axboe   writeback: switch...
1040
1041
  		if (!bdi_has_dirty_io(bdi))
  			continue;
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1042
  		__bdi_start_writeback(bdi, nr_pages, false, reason);
03ba3782e   Jens Axboe   writeback: switch...
1043
  	}
cfc4ba536   Jens Axboe   writeback: use RC...
1044
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1045
  }
03ba3782e   Jens Axboe   writeback: switch...
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
  static noinline void block_dump___mark_inode_dirty(struct inode *inode)
  {
  	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
  		struct dentry *dentry;
  		const char *name = "?";
  
  		dentry = d_find_alias(inode);
  		if (dentry) {
  			spin_lock(&dentry->d_lock);
  			name = (const char *) dentry->d_name.name;
  		}
  		printk(KERN_DEBUG
  		       "%s(%d): dirtied inode %lu (%s) on %s
  ",
  		       current->comm, task_pid_nr(current), inode->i_ino,
  		       name, inode->i_sb->s_id);
  		if (dentry) {
  			spin_unlock(&dentry->d_lock);
  			dput(dentry);
  		}
  	}
  }
  
  /**
   *	__mark_inode_dirty -	internal function
   *	@inode: inode to mark
   *	@flags: what kind of dirty (i.e. I_DIRTY_SYNC)
   *	Mark an inode as dirty. Callers should use mark_inode_dirty or
   *  	mark_inode_dirty_sync.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1075
   *
03ba3782e   Jens Axboe   writeback: switch...
1076
1077
1078
1079
1080
1081
1082
1083
1084
   * Put the inode on the super block's dirty list.
   *
   * CAREFUL! We mark it dirty unconditionally, but move it onto the
   * dirty list only if it is hashed or if it refers to a blockdev.
   * If it was not hashed, it will never be added to the dirty list
   * even if it is later hashed, as it will have been marked dirty already.
   *
   * In short, make sure you hash any inodes _before_ you start marking
   * them dirty.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1085
   *
03ba3782e   Jens Axboe   writeback: switch...
1086
1087
1088
1089
1090
1091
   * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
   * the block-special inode (/dev/hda1) itself.  And the ->dirtied_when field of
   * the kernel-internal blockdev inode represents the dirtying time of the
   * blockdev's pages.  This is why for I_DIRTY_PAGES we always use
   * page->mapping->host, so the page-dirtying time is recorded in the internal
   * blockdev inode.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1092
   */
03ba3782e   Jens Axboe   writeback: switch...
1093
  void __mark_inode_dirty(struct inode *inode, int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1094
  {
03ba3782e   Jens Axboe   writeback: switch...
1095
  	struct super_block *sb = inode->i_sb;
253c34e9b   Artem Bityutskiy   writeback: preven...
1096
  	struct backing_dev_info *bdi = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1097

03ba3782e   Jens Axboe   writeback: switch...
1098
1099
1100
1101
1102
  	/*
  	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
  	 * dirty the inode itself
  	 */
  	if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
9fb0a7da0   Tejun Heo   writeback: add mo...
1103
  		trace_writeback_dirty_inode_start(inode, flags);
03ba3782e   Jens Axboe   writeback: switch...
1104
  		if (sb->s_op->dirty_inode)
aa3857295   Christoph Hellwig   fs: pass exact ty...
1105
  			sb->s_op->dirty_inode(inode, flags);
9fb0a7da0   Tejun Heo   writeback: add mo...
1106
1107
  
  		trace_writeback_dirty_inode(inode, flags);
03ba3782e   Jens Axboe   writeback: switch...
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
  	}
  
  	/*
  	 * make sure that changes are seen by all cpus before we test i_state
  	 * -- mikulas
  	 */
  	smp_mb();
  
  	/* avoid the locking if we can */
  	if ((inode->i_state & flags) == flags)
  		return;
  
  	if (unlikely(block_dump))
  		block_dump___mark_inode_dirty(inode);
250df6ed2   Dave Chinner   fs: protect inode...
1122
  	spin_lock(&inode->i_lock);
03ba3782e   Jens Axboe   writeback: switch...
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
  	if ((inode->i_state & flags) != flags) {
  		const int was_dirty = inode->i_state & I_DIRTY;
  
  		inode->i_state |= flags;
  
  		/*
  		 * If the inode is being synced, just update its dirty state.
  		 * The unlocker will place the inode on the appropriate
  		 * superblock list, based upon its state.
  		 */
  		if (inode->i_state & I_SYNC)
250df6ed2   Dave Chinner   fs: protect inode...
1134
  			goto out_unlock_inode;
03ba3782e   Jens Axboe   writeback: switch...
1135
1136
1137
1138
1139
1140
  
  		/*
  		 * Only add valid (hashed) inodes to the superblock's
  		 * dirty list.  Add blockdev inodes as well.
  		 */
  		if (!S_ISBLK(inode->i_mode)) {
1d3382cbf   Al Viro   new helper: inode...
1141
  			if (inode_unhashed(inode))
250df6ed2   Dave Chinner   fs: protect inode...
1142
  				goto out_unlock_inode;
03ba3782e   Jens Axboe   writeback: switch...
1143
  		}
a4ffdde6e   Al Viro   simplify checks f...
1144
  		if (inode->i_state & I_FREEING)
250df6ed2   Dave Chinner   fs: protect inode...
1145
  			goto out_unlock_inode;
03ba3782e   Jens Axboe   writeback: switch...
1146
1147
1148
1149
1150
1151
  
  		/*
  		 * If the inode was already on b_dirty/b_io/b_more_io, don't
  		 * reposition it (that would break b_dirty time-ordering).
  		 */
  		if (!was_dirty) {
a66979aba   Dave Chinner   fs: move i_wb_lis...
1152
  			bool wakeup_bdi = false;
253c34e9b   Artem Bityutskiy   writeback: preven...
1153
  			bdi = inode_to_bdi(inode);
146d7009b   Junxiao Bi   writeback: fix ra...
1154
1155
  			spin_unlock(&inode->i_lock);
  			spin_lock(&bdi->wb.list_lock);
253c34e9b   Artem Bityutskiy   writeback: preven...
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
  			if (bdi_cap_writeback_dirty(bdi)) {
  				WARN(!test_bit(BDI_registered, &bdi->state),
  				     "bdi-%s not registered
  ", bdi->name);
  
  				/*
  				 * If this is the first dirty inode for this
  				 * bdi, we have to wake-up the corresponding
  				 * bdi thread to make sure background
  				 * write-back happens later.
  				 */
  				if (!wb_has_dirty_io(&bdi->wb))
  					wakeup_bdi = true;
500b067c5   Jens Axboe   writeback: check ...
1169
  			}
03ba3782e   Jens Axboe   writeback: switch...
1170
1171
  
  			inode->dirtied_when = jiffies;
7ccf19a80   Nick Piggin   fs: inode split I...
1172
  			list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
f758eeabe   Christoph Hellwig   writeback: split ...
1173
  			spin_unlock(&bdi->wb.list_lock);
a66979aba   Dave Chinner   fs: move i_wb_lis...
1174
1175
1176
1177
  
  			if (wakeup_bdi)
  				bdi_wakeup_thread_delayed(bdi);
  			return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1178
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1179
  	}
250df6ed2   Dave Chinner   fs: protect inode...
1180
1181
  out_unlock_inode:
  	spin_unlock(&inode->i_lock);
253c34e9b   Artem Bityutskiy   writeback: preven...
1182

03ba3782e   Jens Axboe   writeback: switch...
1183
1184
  }
  EXPORT_SYMBOL(__mark_inode_dirty);
b6e51316d   Jens Axboe   writeback: separa...
1185
  static void wait_sb_inodes(struct super_block *sb)
03ba3782e   Jens Axboe   writeback: switch...
1186
1187
1188
1189
1190
1191
1192
  {
  	struct inode *inode, *old_inode = NULL;
  
  	/*
  	 * We need to be protected against the filesystem going from
  	 * r/o to r/w or vice versa.
  	 */
b6e51316d   Jens Axboe   writeback: separa...
1193
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
03ba3782e   Jens Axboe   writeback: switch...
1194

55fa6091d   Dave Chinner   fs: move i_sb_lis...
1195
  	spin_lock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1196
1197
1198
1199
1200
1201
1202
1203
  
  	/*
  	 * Data integrity sync. Must wait for all pages under writeback,
  	 * because there may have been pages dirtied before our sync
  	 * call, but which had writeout started before we write it out.
  	 * In which case, the inode may not be on the dirty list, but
  	 * we still have to wait for that writeout.
  	 */
b6e51316d   Jens Axboe   writeback: separa...
1204
  	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
250df6ed2   Dave Chinner   fs: protect inode...
1205
  		struct address_space *mapping = inode->i_mapping;
03ba3782e   Jens Axboe   writeback: switch...
1206

250df6ed2   Dave Chinner   fs: protect inode...
1207
1208
1209
1210
  		spin_lock(&inode->i_lock);
  		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
  		    (mapping->nrpages == 0)) {
  			spin_unlock(&inode->i_lock);
03ba3782e   Jens Axboe   writeback: switch...
1211
  			continue;
250df6ed2   Dave Chinner   fs: protect inode...
1212
  		}
03ba3782e   Jens Axboe   writeback: switch...
1213
  		__iget(inode);
250df6ed2   Dave Chinner   fs: protect inode...
1214
  		spin_unlock(&inode->i_lock);
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1215
  		spin_unlock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1216
  		/*
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1217
1218
1219
1220
1221
1222
  		 * We hold a reference to 'inode' so it couldn't have been
  		 * removed from s_inodes list while we dropped the
  		 * inode_sb_list_lock.  We cannot iput the inode now as we can
  		 * be holding the last reference and we cannot iput it under
  		 * inode_sb_list_lock. So we keep the reference and iput it
  		 * later.
03ba3782e   Jens Axboe   writeback: switch...
1223
1224
1225
1226
1227
1228
1229
  		 */
  		iput(old_inode);
  		old_inode = inode;
  
  		filemap_fdatawait(mapping);
  
  		cond_resched();
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1230
  		spin_lock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1231
  	}
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1232
  	spin_unlock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1233
  	iput(old_inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1234
  }
d8a8559cd   Jens Axboe   writeback: get ri...
1235
  /**
3259f8bed   Chris Mason   Add new functions...
1236
   * writeback_inodes_sb_nr -	writeback dirty inodes from given super_block
d8a8559cd   Jens Axboe   writeback: get ri...
1237
   * @sb: the superblock
3259f8bed   Chris Mason   Add new functions...
1238
   * @nr: the number of pages to write
786228ab3   Marcos Paulo de Souza   writeback: Fix is...
1239
   * @reason: reason why some writeback work initiated
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1240
   *
d8a8559cd   Jens Axboe   writeback: get ri...
1241
1242
   * Start writeback on some inodes on this super_block. No guarantees are made
   * on how many (if any) will be written, and this function does not wait
3259f8bed   Chris Mason   Add new functions...
1243
   * for IO completion of submitted IO.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1244
   */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1245
1246
1247
  void writeback_inodes_sb_nr(struct super_block *sb,
  			    unsigned long nr,
  			    enum wb_reason reason)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1248
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
1249
1250
  	DECLARE_COMPLETION_ONSTACK(done);
  	struct wb_writeback_work work = {
6e6938b6d   Wu Fengguang   writeback: introd...
1251
1252
1253
1254
1255
  		.sb			= sb,
  		.sync_mode		= WB_SYNC_NONE,
  		.tagged_writepages	= 1,
  		.done			= &done,
  		.nr_pages		= nr,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1256
  		.reason			= reason,
3c4d71653   Christoph Hellwig   writeback: queue ...
1257
  	};
d8a8559cd   Jens Axboe   writeback: get ri...
1258

6eedc7015   Jan Kara   vfs: Move noop_ba...
1259
1260
  	if (sb->s_bdi == &noop_backing_dev_info)
  		return;
cf37e9724   Christoph Hellwig   writeback: enforc...
1261
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
83ba7b071   Christoph Hellwig   writeback: simpli...
1262
1263
  	bdi_queue_work(sb->s_bdi, &work);
  	wait_for_completion(&done);
e913fc825   Jens Axboe   writeback: fix WB...
1264
  }
3259f8bed   Chris Mason   Add new functions...
1265
1266
1267
1268
1269
  EXPORT_SYMBOL(writeback_inodes_sb_nr);
  
  /**
   * writeback_inodes_sb	-	writeback dirty inodes from given super_block
   * @sb: the superblock
786228ab3   Marcos Paulo de Souza   writeback: Fix is...
1270
   * @reason: reason why some writeback work was initiated
3259f8bed   Chris Mason   Add new functions...
1271
1272
1273
1274
1275
   *
   * Start writeback on some inodes on this super_block. No guarantees are made
   * on how many (if any) will be written, and this function does not wait
   * for IO completion of submitted IO.
   */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1276
  void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
3259f8bed   Chris Mason   Add new functions...
1277
  {
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1278
  	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
3259f8bed   Chris Mason   Add new functions...
1279
  }
0e3c9a228   Jens Axboe   Revert "writeback...
1280
  EXPORT_SYMBOL(writeback_inodes_sb);
e913fc825   Jens Axboe   writeback: fix WB...
1281
1282
  
  /**
10ee27a06   Miao Xie   vfs: re-implement...
1283
   * try_to_writeback_inodes_sb_nr - try to start writeback if none underway
17bd55d03   Eric Sandeen   fs-writeback: Add...
1284
   * @sb: the superblock
10ee27a06   Miao Xie   vfs: re-implement...
1285
1286
   * @nr: the number of pages to write
   * @reason: the reason of writeback
17bd55d03   Eric Sandeen   fs-writeback: Add...
1287
   *
10ee27a06   Miao Xie   vfs: re-implement...
1288
   * Invoke writeback_inodes_sb_nr if no writeback is currently underway.
17bd55d03   Eric Sandeen   fs-writeback: Add...
1289
1290
   * Returns 1 if writeback was started, 0 if not.
   */
10ee27a06   Miao Xie   vfs: re-implement...
1291
1292
1293
  int try_to_writeback_inodes_sb_nr(struct super_block *sb,
  				  unsigned long nr,
  				  enum wb_reason reason)
17bd55d03   Eric Sandeen   fs-writeback: Add...
1294
  {
10ee27a06   Miao Xie   vfs: re-implement...
1295
  	if (writeback_in_progress(sb->s_bdi))
17bd55d03   Eric Sandeen   fs-writeback: Add...
1296
  		return 1;
10ee27a06   Miao Xie   vfs: re-implement...
1297
1298
  
  	if (!down_read_trylock(&sb->s_umount))
17bd55d03   Eric Sandeen   fs-writeback: Add...
1299
  		return 0;
10ee27a06   Miao Xie   vfs: re-implement...
1300
1301
1302
1303
  
  	writeback_inodes_sb_nr(sb, nr, reason);
  	up_read(&sb->s_umount);
  	return 1;
17bd55d03   Eric Sandeen   fs-writeback: Add...
1304
  }
10ee27a06   Miao Xie   vfs: re-implement...
1305
  EXPORT_SYMBOL(try_to_writeback_inodes_sb_nr);
17bd55d03   Eric Sandeen   fs-writeback: Add...
1306
1307
  
  /**
10ee27a06   Miao Xie   vfs: re-implement...
1308
   * try_to_writeback_inodes_sb - try to start writeback if none underway
3259f8bed   Chris Mason   Add new functions...
1309
   * @sb: the superblock
786228ab3   Marcos Paulo de Souza   writeback: Fix is...
1310
   * @reason: reason why some writeback work was initiated
3259f8bed   Chris Mason   Add new functions...
1311
   *
10ee27a06   Miao Xie   vfs: re-implement...
1312
   * Implement by try_to_writeback_inodes_sb_nr()
3259f8bed   Chris Mason   Add new functions...
1313
1314
   * Returns 1 if writeback was started, 0 if not.
   */
10ee27a06   Miao Xie   vfs: re-implement...
1315
  int try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
3259f8bed   Chris Mason   Add new functions...
1316
  {
10ee27a06   Miao Xie   vfs: re-implement...
1317
  	return try_to_writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
3259f8bed   Chris Mason   Add new functions...
1318
  }
10ee27a06   Miao Xie   vfs: re-implement...
1319
  EXPORT_SYMBOL(try_to_writeback_inodes_sb);
3259f8bed   Chris Mason   Add new functions...
1320
1321
  
  /**
d8a8559cd   Jens Axboe   writeback: get ri...
1322
   * sync_inodes_sb	-	sync sb inode pages
0dc83bd30   Jan Kara   Revert "writeback...
1323
   * @sb: the superblock
d8a8559cd   Jens Axboe   writeback: get ri...
1324
1325
   *
   * This function writes and waits on any dirty inode belonging to this
0dc83bd30   Jan Kara   Revert "writeback...
1326
   * super_block.
d8a8559cd   Jens Axboe   writeback: get ri...
1327
   */
0dc83bd30   Jan Kara   Revert "writeback...
1328
  void sync_inodes_sb(struct super_block *sb)
d8a8559cd   Jens Axboe   writeback: get ri...
1329
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
1330
1331
  	DECLARE_COMPLETION_ONSTACK(done);
  	struct wb_writeback_work work = {
3c4d71653   Christoph Hellwig   writeback: queue ...
1332
1333
1334
1335
  		.sb		= sb,
  		.sync_mode	= WB_SYNC_ALL,
  		.nr_pages	= LONG_MAX,
  		.range_cyclic	= 0,
83ba7b071   Christoph Hellwig   writeback: simpli...
1336
  		.done		= &done,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1337
  		.reason		= WB_REASON_SYNC,
7747bd4bc   Dave Chinner   sync: don't block...
1338
  		.for_sync	= 1,
3c4d71653   Christoph Hellwig   writeback: queue ...
1339
  	};
6eedc7015   Jan Kara   vfs: Move noop_ba...
1340
1341
1342
  	/* Nothing to do? */
  	if (sb->s_bdi == &noop_backing_dev_info)
  		return;
cf37e9724   Christoph Hellwig   writeback: enforc...
1343
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
83ba7b071   Christoph Hellwig   writeback: simpli...
1344
1345
  	bdi_queue_work(sb->s_bdi, &work);
  	wait_for_completion(&done);
b6e51316d   Jens Axboe   writeback: separa...
1346
  	wait_sb_inodes(sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1347
  }
d8a8559cd   Jens Axboe   writeback: get ri...
1348
  EXPORT_SYMBOL(sync_inodes_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1349

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1350
  /**
7f04c26d7   Andrea Arcangeli   [PATCH] fix nr_un...
1351
1352
1353
1354
1355
1356
   * write_inode_now	-	write an inode to disk
   * @inode: inode to write to disk
   * @sync: whether the write should be synchronous or not
   *
   * This function commits an inode to disk immediately if it is dirty. This is
   * primarily needed by knfsd.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1357
   *
7f04c26d7   Andrea Arcangeli   [PATCH] fix nr_un...
1358
   * The caller must either have a ref on the inode or must have set I_WILL_FREE.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1359
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1360
1361
  int write_inode_now(struct inode *inode, int sync)
  {
f758eeabe   Christoph Hellwig   writeback: split ...
1362
  	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1363
1364
  	struct writeback_control wbc = {
  		.nr_to_write = LONG_MAX,
18914b188   Mike Galbraith   write_inode_now()...
1365
  		.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
111ebb6e6   OGAWA Hirofumi   [PATCH] writeback...
1366
1367
  		.range_start = 0,
  		.range_end = LLONG_MAX,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1368
1369
1370
  	};
  
  	if (!mapping_cap_writeback_dirty(inode->i_mapping))
49364ce25   Andrew Morton   [PATCH] write_ino...
1371
  		wbc.nr_to_write = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1372
1373
  
  	might_sleep();
4f8ad655d   Jan Kara   writeback: Refact...
1374
  	return writeback_single_inode(inode, wb, &wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
  }
  EXPORT_SYMBOL(write_inode_now);
  
  /**
   * sync_inode - write an inode and its pages to disk.
   * @inode: the inode to sync
   * @wbc: controls the writeback mode
   *
   * sync_inode() will write an inode and its pages to disk.  It will also
   * correctly update the inode on its superblock's dirty inode lists and will
   * update inode->i_state.
   *
   * The caller must have a ref on the inode.
   */
  int sync_inode(struct inode *inode, struct writeback_control *wbc)
  {
4f8ad655d   Jan Kara   writeback: Refact...
1391
  	return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1392
1393
  }
  EXPORT_SYMBOL(sync_inode);
c37650161   Christoph Hellwig   fs: add sync_inod...
1394
1395
  
  /**
c691b9d98   Andrew Morton   sync_inode_metada...
1396
   * sync_inode_metadata - write an inode to disk
c37650161   Christoph Hellwig   fs: add sync_inod...
1397
1398
1399
   * @inode: the inode to sync
   * @wait: wait for I/O to complete.
   *
c691b9d98   Andrew Morton   sync_inode_metada...
1400
   * Write an inode to disk and adjust its dirty state after completion.
c37650161   Christoph Hellwig   fs: add sync_inod...
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
   *
   * Note: only writes the actual inode, no associated data or other metadata.
   */
  int sync_inode_metadata(struct inode *inode, int wait)
  {
  	struct writeback_control wbc = {
  		.sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
  		.nr_to_write = 0, /* metadata-only */
  	};
  
  	return sync_inode(inode, &wbc);
  }
  EXPORT_SYMBOL(sync_inode_metadata);