Blame view

fs/fs-writeback.c 37.5 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
  /*
   * fs/fs-writeback.c
   *
   * Copyright (C) 2002, Linus Torvalds.
   *
   * Contains all the functions related to writing back and waiting
   * upon dirty inodes against superblocks, and writing back dirty
   * pages against inodes.  ie: data writeback.  Writeout of the
   * inode itself is not handled here.
   *
e1f8e8744   Francois Cami   Remove Andrew Mor...
11
   * 10Apr2002	Andrew Morton
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
15
16
   *		Split out of fs/inode.c
   *		Additions for address_space-based writeback
   */
  
  #include <linux/kernel.h>
f5ff8422b   Jens Axboe   Fix warnings with...
17
  #include <linux/module.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include <linux/spinlock.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
19
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
21
22
  #include <linux/sched.h>
  #include <linux/fs.h>
  #include <linux/mm.h>
bc31b86a5   Wu Fengguang   writeback: move M...
23
  #include <linux/pagemap.h>
03ba3782e   Jens Axboe   writeback: switch...
24
25
  #include <linux/kthread.h>
  #include <linux/freezer.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
27
28
  #include <linux/writeback.h>
  #include <linux/blkdev.h>
  #include <linux/backing-dev.h>
455b28646   Dave Chinner   writeback: Initia...
29
  #include <linux/tracepoint.h>
07f3f05c1   David Howells   [PATCH] BLOCK: Mo...
30
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31

d0bceac74   Jens Axboe   writeback: get ri...
32
  /*
bc31b86a5   Wu Fengguang   writeback: move M...
33
34
35
36
37
   * 4MB minimal write chunk size
   */
  #define MIN_WRITEBACK_PAGES	(4096UL >> (PAGE_CACHE_SHIFT - 10))
  
  /*
c4a77a6c7   Jens Axboe   writeback: make w...
38
39
   * Passed into wb_writeback(), essentially a subset of writeback_control
   */
83ba7b071   Christoph Hellwig   writeback: simpli...
40
  struct wb_writeback_work {
c4a77a6c7   Jens Axboe   writeback: make w...
41
42
  	long nr_pages;
  	struct super_block *sb;
d46db3d58   Wu Fengguang   writeback: make w...
43
  	unsigned long *older_than_this;
c4a77a6c7   Jens Axboe   writeback: make w...
44
  	enum writeback_sync_modes sync_mode;
6e6938b6d   Wu Fengguang   writeback: introd...
45
  	unsigned int tagged_writepages:1;
52957fe1c   H Hartley Sweeten   fs-writeback.c: b...
46
47
48
  	unsigned int for_kupdate:1;
  	unsigned int range_cyclic:1;
  	unsigned int for_background:1;
0e175a183   Curt Wohlgemuth   writeback: Add a ...
49
  	enum wb_reason reason;		/* why was writeback initiated? */
c4a77a6c7   Jens Axboe   writeback: make w...
50

8010c3b63   Jens Axboe   writeback: add co...
51
  	struct list_head list;		/* pending work list */
83ba7b071   Christoph Hellwig   writeback: simpli...
52
  	struct completion *done;	/* set if the caller waits */
03ba3782e   Jens Axboe   writeback: switch...
53
  };
455b28646   Dave Chinner   writeback: Initia...
54
55
56
57
58
59
60
  /*
   * Include the creation of the trace points after defining the
   * wb_writeback_work structure so that the definition remains local to this
   * file.
   */
  #define CREATE_TRACE_POINTS
  #include <trace/events/writeback.h>
455b28646   Dave Chinner   writeback: Initia...
61
62
63
64
  /*
   * We don't actually have pdflush, but this one is exported though /proc...
   */
  int nr_pdflush_threads;
f11b00f3b   Adrian Bunk   fs/fs-writeback.c...
65
66
67
68
  /**
   * writeback_in_progress - determine whether there is writeback in progress
   * @bdi: the device's backing_dev_info structure.
   *
03ba3782e   Jens Axboe   writeback: switch...
69
70
   * Determine whether there is writeback waiting to be handled against a
   * backing device.
f11b00f3b   Adrian Bunk   fs/fs-writeback.c...
71
72
73
   */
  int writeback_in_progress(struct backing_dev_info *bdi)
  {
81d73a32d   Jan Kara   mm: fix writeback...
74
  	return test_bit(BDI_writeback_running, &bdi->state);
f11b00f3b   Adrian Bunk   fs/fs-writeback.c...
75
  }
692ebd17c   Jan Kara   bdi: Fix warnings...
76
77
78
  static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
  {
  	struct super_block *sb = inode->i_sb;
692ebd17c   Jan Kara   bdi: Fix warnings...
79

aaead25b9   Christoph Hellwig   writeback: always...
80
81
82
83
  	if (strcmp(sb->s_type->name, "bdev") == 0)
  		return inode->i_mapping->backing_dev_info;
  
  	return sb->s_bdi;
692ebd17c   Jan Kara   bdi: Fix warnings...
84
  }
7ccf19a80   Nick Piggin   fs: inode split I...
85
86
87
88
  static inline struct inode *wb_inode(struct list_head *head)
  {
  	return list_entry(head, struct inode, i_wb_list);
  }
6585027a5   Jan Kara   writeback: integr...
89
90
  /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
  static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
03ba3782e   Jens Axboe   writeback: switch...
91
  {
fff5b85aa   Artem Bityutskiy   writeback: move b...
92
93
94
95
96
97
98
  	if (bdi->wb.task) {
  		wake_up_process(bdi->wb.task);
  	} else {
  		/*
  		 * The bdi thread isn't there, wake up the forker thread which
  		 * will create and run it.
  		 */
03ba3782e   Jens Axboe   writeback: switch...
99
  		wake_up_process(default_backing_dev_info.wb.task);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
100
  	}
6585027a5   Jan Kara   writeback: integr...
101
102
103
104
105
106
107
108
109
110
111
112
  }
  
  static void bdi_queue_work(struct backing_dev_info *bdi,
  			   struct wb_writeback_work *work)
  {
  	trace_writeback_queue(bdi, work);
  
  	spin_lock_bh(&bdi->wb_lock);
  	list_add_tail(&work->list, &bdi->work_list);
  	if (!bdi->wb.task)
  		trace_writeback_nothread(bdi, work);
  	bdi_wakeup_flusher(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
113
  	spin_unlock_bh(&bdi->wb_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
114
  }
83ba7b071   Christoph Hellwig   writeback: simpli...
115
116
  static void
  __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
117
  		      bool range_cyclic, enum wb_reason reason)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
118
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
119
  	struct wb_writeback_work *work;
03ba3782e   Jens Axboe   writeback: switch...
120

bcddc3f01   Jens Axboe   writeback: inline...
121
122
123
124
  	/*
  	 * This is WB_SYNC_NONE writeback, so if allocation fails just
  	 * wakeup the thread for old dirty data writeback
  	 */
83ba7b071   Christoph Hellwig   writeback: simpli...
125
126
  	work = kzalloc(sizeof(*work), GFP_ATOMIC);
  	if (!work) {
455b28646   Dave Chinner   writeback: Initia...
127
128
  		if (bdi->wb.task) {
  			trace_writeback_nowork(bdi);
83ba7b071   Christoph Hellwig   writeback: simpli...
129
  			wake_up_process(bdi->wb.task);
455b28646   Dave Chinner   writeback: Initia...
130
  		}
83ba7b071   Christoph Hellwig   writeback: simpli...
131
  		return;
bcddc3f01   Jens Axboe   writeback: inline...
132
  	}
03ba3782e   Jens Axboe   writeback: switch...
133

83ba7b071   Christoph Hellwig   writeback: simpli...
134
135
136
  	work->sync_mode	= WB_SYNC_NONE;
  	work->nr_pages	= nr_pages;
  	work->range_cyclic = range_cyclic;
0e175a183   Curt Wohlgemuth   writeback: Add a ...
137
  	work->reason	= reason;
03ba3782e   Jens Axboe   writeback: switch...
138

83ba7b071   Christoph Hellwig   writeback: simpli...
139
  	bdi_queue_work(bdi, work);
b6e51316d   Jens Axboe   writeback: separa...
140
141
142
143
144
145
  }
  
  /**
   * bdi_start_writeback - start writeback
   * @bdi: the backing device to write from
   * @nr_pages: the number of pages to write
786228ab3   Marcos Paulo de Souza   writeback: Fix is...
146
   * @reason: reason why some writeback work was initiated
b6e51316d   Jens Axboe   writeback: separa...
147
148
149
   *
   * Description:
   *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
25985edce   Lucas De Marchi   Fix common misspe...
150
   *   started when this function returns, we make no guarantees on
0e3c9a228   Jens Axboe   Revert "writeback...
151
   *   completion. Caller need not hold sb s_umount semaphore.
b6e51316d   Jens Axboe   writeback: separa...
152
153
   *
   */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
154
155
  void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
  			enum wb_reason reason)
b6e51316d   Jens Axboe   writeback: separa...
156
  {
0e175a183   Curt Wohlgemuth   writeback: Add a ...
157
  	__bdi_start_writeback(bdi, nr_pages, true, reason);
c5444198c   Christoph Hellwig   writeback: simpli...
158
  }
d3ddec763   Wu Fengguang   writeback: stop b...
159

c5444198c   Christoph Hellwig   writeback: simpli...
160
161
162
163
164
  /**
   * bdi_start_background_writeback - start background writeback
   * @bdi: the backing device to write from
   *
   * Description:
6585027a5   Jan Kara   writeback: integr...
165
166
167
168
   *   This makes sure WB_SYNC_NONE background writeback happens. When
   *   this function returns, it is only guaranteed that for given BDI
   *   some IO is happening if we are over background dirty threshold.
   *   Caller need not hold sb s_umount semaphore.
c5444198c   Christoph Hellwig   writeback: simpli...
169
170
171
   */
  void bdi_start_background_writeback(struct backing_dev_info *bdi)
  {
6585027a5   Jan Kara   writeback: integr...
172
173
174
175
  	/*
  	 * We just wake up the flusher thread. It will perform background
  	 * writeback as soon as there is no other work to do.
  	 */
71927e84e   Wu Fengguang   writeback: trace ...
176
  	trace_writeback_wake_background(bdi);
6585027a5   Jan Kara   writeback: integr...
177
178
179
  	spin_lock_bh(&bdi->wb_lock);
  	bdi_wakeup_flusher(bdi);
  	spin_unlock_bh(&bdi->wb_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
180
181
182
  }
  
  /*
a66979aba   Dave Chinner   fs: move i_wb_lis...
183
184
185
186
   * Remove the inode from the writeback list it is on.
   */
  void inode_wb_list_del(struct inode *inode)
  {
f758eeabe   Christoph Hellwig   writeback: split ...
187
188
189
  	struct backing_dev_info *bdi = inode_to_bdi(inode);
  
  	spin_lock(&bdi->wb.list_lock);
a66979aba   Dave Chinner   fs: move i_wb_lis...
190
  	list_del_init(&inode->i_wb_list);
f758eeabe   Christoph Hellwig   writeback: split ...
191
  	spin_unlock(&bdi->wb.list_lock);
a66979aba   Dave Chinner   fs: move i_wb_lis...
192
  }
a66979aba   Dave Chinner   fs: move i_wb_lis...
193
  /*
6610a0bc8   Andrew Morton   writeback: fix ti...
194
195
196
197
   * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
   * furthest end of its superblock's dirty-inode list.
   *
   * Before stamping the inode's ->dirtied_when, we check to see whether it is
66f3b8e2e   Jens Axboe   writeback: move d...
198
   * already the most-recently-dirtied inode on the b_dirty list.  If that is
6610a0bc8   Andrew Morton   writeback: fix ti...
199
200
201
   * the case then the inode must have been redirtied while it was being written
   * out and we don't reset its dirtied_when.
   */
f758eeabe   Christoph Hellwig   writeback: split ...
202
  static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
6610a0bc8   Andrew Morton   writeback: fix ti...
203
  {
f758eeabe   Christoph Hellwig   writeback: split ...
204
  	assert_spin_locked(&wb->list_lock);
03ba3782e   Jens Axboe   writeback: switch...
205
  	if (!list_empty(&wb->b_dirty)) {
66f3b8e2e   Jens Axboe   writeback: move d...
206
  		struct inode *tail;
6610a0bc8   Andrew Morton   writeback: fix ti...
207

7ccf19a80   Nick Piggin   fs: inode split I...
208
  		tail = wb_inode(wb->b_dirty.next);
66f3b8e2e   Jens Axboe   writeback: move d...
209
  		if (time_before(inode->dirtied_when, tail->dirtied_when))
6610a0bc8   Andrew Morton   writeback: fix ti...
210
211
  			inode->dirtied_when = jiffies;
  	}
7ccf19a80   Nick Piggin   fs: inode split I...
212
  	list_move(&inode->i_wb_list, &wb->b_dirty);
6610a0bc8   Andrew Morton   writeback: fix ti...
213
214
215
  }
  
  /*
66f3b8e2e   Jens Axboe   writeback: move d...
216
   * requeue inode for re-scanning after bdi->b_io list is exhausted.
c986d1e2a   Andrew Morton   writeback: fix ti...
217
   */
f758eeabe   Christoph Hellwig   writeback: split ...
218
  static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
c986d1e2a   Andrew Morton   writeback: fix ti...
219
  {
f758eeabe   Christoph Hellwig   writeback: split ...
220
  	assert_spin_locked(&wb->list_lock);
7ccf19a80   Nick Piggin   fs: inode split I...
221
  	list_move(&inode->i_wb_list, &wb->b_more_io);
c986d1e2a   Andrew Morton   writeback: fix ti...
222
  }
1c0eeaf56   Joern Engel   introduce I_SYNC
223
224
225
  static void inode_sync_complete(struct inode *inode)
  {
  	/*
a66979aba   Dave Chinner   fs: move i_wb_lis...
226
  	 * Prevent speculative execution through
f758eeabe   Christoph Hellwig   writeback: split ...
227
  	 * spin_unlock(&wb->list_lock);
1c0eeaf56   Joern Engel   introduce I_SYNC
228
  	 */
a66979aba   Dave Chinner   fs: move i_wb_lis...
229

1c0eeaf56   Joern Engel   introduce I_SYNC
230
231
232
  	smp_mb();
  	wake_up_bit(&inode->i_state, __I_SYNC);
  }
d2caa3c54   Jeff Layton   writeback: guard ...
233
234
235
236
237
238
239
240
  static bool inode_dirtied_after(struct inode *inode, unsigned long t)
  {
  	bool ret = time_after(inode->dirtied_when, t);
  #ifndef CONFIG_64BIT
  	/*
  	 * For inodes being constantly redirtied, dirtied_when can get stuck.
  	 * It _appears_ to be in the future, but is actually in distant past.
  	 * This test is necessary to prevent such wrapped-around relative times
5b0830cb9   Jens Axboe   writeback: get ri...
241
  	 * from permanently stopping the whole bdi writeback.
d2caa3c54   Jeff Layton   writeback: guard ...
242
243
244
245
246
  	 */
  	ret = ret && time_before_eq(inode->dirtied_when, jiffies);
  #endif
  	return ret;
  }
c986d1e2a   Andrew Morton   writeback: fix ti...
247
  /*
2c1365791   Fengguang Wu   writeback: fix ti...
248
249
   * Move expired dirty inodes from @delaying_queue to @dispatch_queue.
   */
e84d0a4f8   Wu Fengguang   writeback: trace ...
250
  static int move_expired_inodes(struct list_head *delaying_queue,
2c1365791   Fengguang Wu   writeback: fix ti...
251
  			       struct list_head *dispatch_queue,
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
252
  			       struct wb_writeback_work *work)
2c1365791   Fengguang Wu   writeback: fix ti...
253
  {
5c03449d3   Shaohua Li   writeback: move i...
254
255
  	LIST_HEAD(tmp);
  	struct list_head *pos, *node;
cf137307c   Jens Axboe   writeback: don't ...
256
  	struct super_block *sb = NULL;
5c03449d3   Shaohua Li   writeback: move i...
257
  	struct inode *inode;
cf137307c   Jens Axboe   writeback: don't ...
258
  	int do_sb_sort = 0;
e84d0a4f8   Wu Fengguang   writeback: trace ...
259
  	int moved = 0;
5c03449d3   Shaohua Li   writeback: move i...
260

2c1365791   Fengguang Wu   writeback: fix ti...
261
  	while (!list_empty(delaying_queue)) {
7ccf19a80   Nick Piggin   fs: inode split I...
262
  		inode = wb_inode(delaying_queue->prev);
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
263
264
  		if (work->older_than_this &&
  		    inode_dirtied_after(inode, *work->older_than_this))
2c1365791   Fengguang Wu   writeback: fix ti...
265
  			break;
cf137307c   Jens Axboe   writeback: don't ...
266
267
268
  		if (sb && sb != inode->i_sb)
  			do_sb_sort = 1;
  		sb = inode->i_sb;
7ccf19a80   Nick Piggin   fs: inode split I...
269
  		list_move(&inode->i_wb_list, &tmp);
e84d0a4f8   Wu Fengguang   writeback: trace ...
270
  		moved++;
5c03449d3   Shaohua Li   writeback: move i...
271
  	}
cf137307c   Jens Axboe   writeback: don't ...
272
273
274
  	/* just one sb in list, splice to dispatch_queue and we're done */
  	if (!do_sb_sort) {
  		list_splice(&tmp, dispatch_queue);
e84d0a4f8   Wu Fengguang   writeback: trace ...
275
  		goto out;
cf137307c   Jens Axboe   writeback: don't ...
276
  	}
5c03449d3   Shaohua Li   writeback: move i...
277
278
  	/* Move inodes from one superblock together */
  	while (!list_empty(&tmp)) {
7ccf19a80   Nick Piggin   fs: inode split I...
279
  		sb = wb_inode(tmp.prev)->i_sb;
5c03449d3   Shaohua Li   writeback: move i...
280
  		list_for_each_prev_safe(pos, node, &tmp) {
7ccf19a80   Nick Piggin   fs: inode split I...
281
  			inode = wb_inode(pos);
5c03449d3   Shaohua Li   writeback: move i...
282
  			if (inode->i_sb == sb)
7ccf19a80   Nick Piggin   fs: inode split I...
283
  				list_move(&inode->i_wb_list, dispatch_queue);
5c03449d3   Shaohua Li   writeback: move i...
284
  		}
2c1365791   Fengguang Wu   writeback: fix ti...
285
  	}
e84d0a4f8   Wu Fengguang   writeback: trace ...
286
287
  out:
  	return moved;
2c1365791   Fengguang Wu   writeback: fix ti...
288
289
290
291
  }
  
  /*
   * Queue all expired dirty inodes for io, eldest first.
4ea879b96   Wu Fengguang   writeback: fix qu...
292
293
294
295
296
297
298
299
   * Before
   *         newly dirtied     b_dirty    b_io    b_more_io
   *         =============>    gf         edc     BA
   * After
   *         newly dirtied     b_dirty    b_io    b_more_io
   *         =============>    g          fBAedc
   *                                           |
   *                                           +--> dequeue for IO
2c1365791   Fengguang Wu   writeback: fix ti...
300
   */
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
301
  static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
66f3b8e2e   Jens Axboe   writeback: move d...
302
  {
e84d0a4f8   Wu Fengguang   writeback: trace ...
303
  	int moved;
f758eeabe   Christoph Hellwig   writeback: split ...
304
  	assert_spin_locked(&wb->list_lock);
4ea879b96   Wu Fengguang   writeback: fix qu...
305
  	list_splice_init(&wb->b_more_io, &wb->b_io);
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
306
307
  	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work);
  	trace_writeback_queue_io(wb, work, moved);
66f3b8e2e   Jens Axboe   writeback: move d...
308
  }
a9185b41a   Christoph Hellwig   pass writeback_co...
309
  static int write_inode(struct inode *inode, struct writeback_control *wbc)
08d8e9749   Fengguang Wu   writeback: fix nt...
310
  {
03ba3782e   Jens Axboe   writeback: switch...
311
  	if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
a9185b41a   Christoph Hellwig   pass writeback_co...
312
  		return inode->i_sb->s_op->write_inode(inode, wbc);
03ba3782e   Jens Axboe   writeback: switch...
313
  	return 0;
08d8e9749   Fengguang Wu   writeback: fix nt...
314
  }
08d8e9749   Fengguang Wu   writeback: fix nt...
315

2c1365791   Fengguang Wu   writeback: fix ti...
316
  /*
01c031945   Christoph Hellwig   cleanup __writeba...
317
318
   * Wait for writeback on an inode to complete.
   */
f758eeabe   Christoph Hellwig   writeback: split ...
319
320
  static void inode_wait_for_writeback(struct inode *inode,
  				     struct bdi_writeback *wb)
01c031945   Christoph Hellwig   cleanup __writeba...
321
322
323
324
325
  {
  	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
  	wait_queue_head_t *wqh;
  
  	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
250df6ed2   Dave Chinner   fs: protect inode...
326
327
  	while (inode->i_state & I_SYNC) {
  		spin_unlock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
328
  		spin_unlock(&wb->list_lock);
01c031945   Christoph Hellwig   cleanup __writeba...
329
  		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
f758eeabe   Christoph Hellwig   writeback: split ...
330
  		spin_lock(&wb->list_lock);
250df6ed2   Dave Chinner   fs: protect inode...
331
  		spin_lock(&inode->i_lock);
58a9d3d8d   Richard Kennedy   fs-writeback: che...
332
  	}
01c031945   Christoph Hellwig   cleanup __writeba...
333
334
335
  }
  
  /*
f758eeabe   Christoph Hellwig   writeback: split ...
336
   * Write out an inode's dirty pages.  Called under wb->list_lock and
0f1b1fd86   Dave Chinner   fs: pull inode->i...
337
338
   * inode->i_lock.  Either the caller has an active reference on the inode or
   * the inode has I_WILL_FREE set.
01c031945   Christoph Hellwig   cleanup __writeba...
339
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
340
341
342
343
344
   * If `wait' is set, wait on the writeout.
   *
   * The whole writeout design is quite complex and fragile.  We want to avoid
   * starvation of particular inodes when others are being redirtied, prevent
   * livelocks, etc.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
345
346
   */
  static int
f758eeabe   Christoph Hellwig   writeback: split ...
347
348
  writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
  		       struct writeback_control *wbc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
349
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
350
  	struct address_space *mapping = inode->i_mapping;
251d6a471   Wu Fengguang   writeback: trace ...
351
  	long nr_to_write = wbc->nr_to_write;
01c031945   Christoph Hellwig   cleanup __writeba...
352
  	unsigned dirty;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
353
  	int ret;
f758eeabe   Christoph Hellwig   writeback: split ...
354
  	assert_spin_locked(&wb->list_lock);
0f1b1fd86   Dave Chinner   fs: pull inode->i...
355
  	assert_spin_locked(&inode->i_lock);
01c031945   Christoph Hellwig   cleanup __writeba...
356
357
358
359
360
361
362
363
  	if (!atomic_read(&inode->i_count))
  		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
  	else
  		WARN_ON(inode->i_state & I_WILL_FREE);
  
  	if (inode->i_state & I_SYNC) {
  		/*
  		 * If this inode is locked for writeback and we are not doing
66f3b8e2e   Jens Axboe   writeback: move d...
364
  		 * writeback-for-data-integrity, move it to b_more_io so that
01c031945   Christoph Hellwig   cleanup __writeba...
365
366
367
  		 * writeback can proceed with the other inodes on s_io.
  		 *
  		 * We'll have another go at writing back this inode when we
66f3b8e2e   Jens Axboe   writeback: move d...
368
  		 * completed a full scan of b_io.
01c031945   Christoph Hellwig   cleanup __writeba...
369
  		 */
a9185b41a   Christoph Hellwig   pass writeback_co...
370
  		if (wbc->sync_mode != WB_SYNC_ALL) {
f758eeabe   Christoph Hellwig   writeback: split ...
371
  			requeue_io(inode, wb);
251d6a471   Wu Fengguang   writeback: trace ...
372
373
  			trace_writeback_single_inode_requeue(inode, wbc,
  							     nr_to_write);
01c031945   Christoph Hellwig   cleanup __writeba...
374
375
376
377
378
379
  			return 0;
  		}
  
  		/*
  		 * It's a data-integrity sync.  We must wait.
  		 */
f758eeabe   Christoph Hellwig   writeback: split ...
380
  		inode_wait_for_writeback(inode, wb);
01c031945   Christoph Hellwig   cleanup __writeba...
381
  	}
1c0eeaf56   Joern Engel   introduce I_SYNC
382
  	BUG_ON(inode->i_state & I_SYNC);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
383

5547e8aac   Dmitry Monakhov   writeback: Update...
384
  	/* Set I_SYNC, reset I_DIRTY_PAGES */
1c0eeaf56   Joern Engel   introduce I_SYNC
385
  	inode->i_state |= I_SYNC;
5547e8aac   Dmitry Monakhov   writeback: Update...
386
  	inode->i_state &= ~I_DIRTY_PAGES;
250df6ed2   Dave Chinner   fs: protect inode...
387
  	spin_unlock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
388
  	spin_unlock(&wb->list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
389
390
  
  	ret = do_writepages(mapping, wbc);
26821ed40   Christoph Hellwig   make sure data is...
391
392
393
394
395
  	/*
  	 * Make sure to wait on the data before writing out the metadata.
  	 * This is important for filesystems that modify metadata on data
  	 * I/O completion.
  	 */
a9185b41a   Christoph Hellwig   pass writeback_co...
396
  	if (wbc->sync_mode == WB_SYNC_ALL) {
26821ed40   Christoph Hellwig   make sure data is...
397
  		int err = filemap_fdatawait(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
398
399
400
  		if (ret == 0)
  			ret = err;
  	}
5547e8aac   Dmitry Monakhov   writeback: Update...
401
402
403
404
405
  	/*
  	 * Some filesystems may redirty the inode during the writeback
  	 * due to delalloc, clear dirty metadata flags right before
  	 * write_inode()
  	 */
250df6ed2   Dave Chinner   fs: protect inode...
406
  	spin_lock(&inode->i_lock);
5547e8aac   Dmitry Monakhov   writeback: Update...
407
408
  	dirty = inode->i_state & I_DIRTY;
  	inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
250df6ed2   Dave Chinner   fs: protect inode...
409
  	spin_unlock(&inode->i_lock);
26821ed40   Christoph Hellwig   make sure data is...
410
411
  	/* Don't write the inode if only I_DIRTY_PAGES was set */
  	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
a9185b41a   Christoph Hellwig   pass writeback_co...
412
  		int err = write_inode(inode, wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
413
414
415
  		if (ret == 0)
  			ret = err;
  	}
f758eeabe   Christoph Hellwig   writeback: split ...
416
  	spin_lock(&wb->list_lock);
250df6ed2   Dave Chinner   fs: protect inode...
417
  	spin_lock(&inode->i_lock);
1c0eeaf56   Joern Engel   introduce I_SYNC
418
  	inode->i_state &= ~I_SYNC;
a4ffdde6e   Al Viro   simplify checks f...
419
  	if (!(inode->i_state & I_FREEING)) {
94c3dcbb0   Wu Fengguang   writeback: update...
420
421
422
423
424
425
426
427
  		/*
  		 * Sync livelock prevention. Each inode is tagged and synced in
  		 * one shot. If still dirty, it will be redirty_tail()'ed below.
  		 * Update the dirty time to prevent enqueue and sync it again.
  		 */
  		if ((inode->i_state & I_DIRTY) &&
  		    (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
  			inode->dirtied_when = jiffies;
23539afc7   Wu Fengguang   writeback: don't ...
428
  		if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
429
430
  			/*
  			 * We didn't write back all the pages.  nfs_writepages()
a50aeb401   Wu Fengguang   writeback: merge ...
431
  			 * sometimes bales out without doing anything.
1b43ef91d   Andrew Morton   writeback: fix co...
432
  			 */
a50aeb401   Wu Fengguang   writeback: merge ...
433
434
  			inode->i_state |= I_DIRTY_PAGES;
  			if (wbc->nr_to_write <= 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
435
  				/*
a50aeb401   Wu Fengguang   writeback: merge ...
436
  				 * slice used up: queue for next turn
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
437
  				 */
f758eeabe   Christoph Hellwig   writeback: split ...
438
  				requeue_io(inode, wb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
439
440
  			} else {
  				/*
a50aeb401   Wu Fengguang   writeback: merge ...
441
442
443
444
445
  				 * Writeback blocked by something other than
  				 * congestion. Delay the inode for some time to
  				 * avoid spinning on the CPU (100% iowait)
  				 * retrying writeback of the dirty page/inode
  				 * that cannot be performed immediately.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
446
  				 */
f758eeabe   Christoph Hellwig   writeback: split ...
447
  				redirty_tail(inode, wb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
448
  			}
23539afc7   Wu Fengguang   writeback: don't ...
449
450
451
452
453
454
455
  		} else if (inode->i_state & I_DIRTY) {
  			/*
  			 * Filesystems can dirty the inode during writeback
  			 * operations, such as delayed allocation during
  			 * submission or metadata updates after data IO
  			 * completion.
  			 */
f758eeabe   Christoph Hellwig   writeback: split ...
456
  			redirty_tail(inode, wb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
457
458
  		} else {
  			/*
9e38d86ff   Nick Piggin   fs: Implement laz...
459
460
461
  			 * The inode is clean.  At this point we either have
  			 * a reference to the inode or it's on it's way out.
  			 * No need to add it back to the LRU.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
462
  			 */
7ccf19a80   Nick Piggin   fs: inode split I...
463
  			list_del_init(&inode->i_wb_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
464
465
  		}
  	}
1c0eeaf56   Joern Engel   introduce I_SYNC
466
  	inode_sync_complete(inode);
251d6a471   Wu Fengguang   writeback: trace ...
467
  	trace_writeback_single_inode(inode, wbc, nr_to_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
468
469
  	return ret;
  }
1a12d8bd7   Wu Fengguang   writeback: scale ...
470
471
  static long writeback_chunk_size(struct backing_dev_info *bdi,
  				 struct wb_writeback_work *work)
d46db3d58   Wu Fengguang   writeback: make w...
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
  {
  	long pages;
  
  	/*
  	 * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
  	 * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
  	 * here avoids calling into writeback_inodes_wb() more than once.
  	 *
  	 * The intended call sequence for WB_SYNC_ALL writeback is:
  	 *
  	 *      wb_writeback()
  	 *          writeback_sb_inodes()       <== called only once
  	 *              write_cache_pages()     <== called once for each inode
  	 *                   (quickly) tag currently dirty pages
  	 *                   (maybe slowly) sync all tagged pages
  	 */
  	if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
  		pages = LONG_MAX;
1a12d8bd7   Wu Fengguang   writeback: scale ...
490
491
492
493
494
495
496
  	else {
  		pages = min(bdi->avg_write_bandwidth / 2,
  			    global_dirty_limit / DIRTY_SCOPE);
  		pages = min(pages, work->nr_pages);
  		pages = round_down(pages + MIN_WRITEBACK_PAGES,
  				   MIN_WRITEBACK_PAGES);
  	}
d46db3d58   Wu Fengguang   writeback: make w...
497
498
499
  
  	return pages;
  }
03ba3782e   Jens Axboe   writeback: switch...
500
  /*
f11c9c5c2   Edward Shishkin   vfs: improve writ...
501
   * Write a portion of b_io inodes which belong to @sb.
edadfb10b   Christoph Hellwig   writeback: split ...
502
503
   *
   * If @only_this_sb is true, then find and write all such
f11c9c5c2   Edward Shishkin   vfs: improve writ...
504
505
   * inodes. Otherwise write only ones which go sequentially
   * in reverse order.
edadfb10b   Christoph Hellwig   writeback: split ...
506
   *
d46db3d58   Wu Fengguang   writeback: make w...
507
   * Return the number of pages and/or inodes written.
f11c9c5c2   Edward Shishkin   vfs: improve writ...
508
   */
d46db3d58   Wu Fengguang   writeback: make w...
509
510
511
  static long writeback_sb_inodes(struct super_block *sb,
  				struct bdi_writeback *wb,
  				struct wb_writeback_work *work)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
512
  {
d46db3d58   Wu Fengguang   writeback: make w...
513
514
515
516
517
518
519
520
521
522
523
524
  	struct writeback_control wbc = {
  		.sync_mode		= work->sync_mode,
  		.tagged_writepages	= work->tagged_writepages,
  		.for_kupdate		= work->for_kupdate,
  		.for_background		= work->for_background,
  		.range_cyclic		= work->range_cyclic,
  		.range_start		= 0,
  		.range_end		= LLONG_MAX,
  	};
  	unsigned long start_time = jiffies;
  	long write_chunk;
  	long wrote = 0;  /* count both pages and inodes */
03ba3782e   Jens Axboe   writeback: switch...
525
  	while (!list_empty(&wb->b_io)) {
7ccf19a80   Nick Piggin   fs: inode split I...
526
  		struct inode *inode = wb_inode(wb->b_io.prev);
edadfb10b   Christoph Hellwig   writeback: split ...
527
528
  
  		if (inode->i_sb != sb) {
d46db3d58   Wu Fengguang   writeback: make w...
529
  			if (work->sb) {
edadfb10b   Christoph Hellwig   writeback: split ...
530
531
532
533
534
  				/*
  				 * We only want to write back data for this
  				 * superblock, move all inodes not belonging
  				 * to it back onto the dirty list.
  				 */
f758eeabe   Christoph Hellwig   writeback: split ...
535
  				redirty_tail(inode, wb);
edadfb10b   Christoph Hellwig   writeback: split ...
536
537
538
539
540
541
542
543
  				continue;
  			}
  
  			/*
  			 * The inode belongs to a different superblock.
  			 * Bounce back to the caller to unpin this and
  			 * pin the next superblock.
  			 */
d46db3d58   Wu Fengguang   writeback: make w...
544
  			break;
edadfb10b   Christoph Hellwig   writeback: split ...
545
  		}
9843b76aa   Christoph Hellwig   fs: skip I_FREEIN...
546
547
548
549
550
  		/*
  		 * Don't bother with new inodes or inodes beeing freed, first
  		 * kind does not need peridic writeout yet, and for the latter
  		 * kind writeout is handled by the freer.
  		 */
250df6ed2   Dave Chinner   fs: protect inode...
551
  		spin_lock(&inode->i_lock);
9843b76aa   Christoph Hellwig   fs: skip I_FREEIN...
552
  		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
250df6ed2   Dave Chinner   fs: protect inode...
553
  			spin_unlock(&inode->i_lock);
fcc5c2221   Wu Fengguang   writeback: don't ...
554
  			redirty_tail(inode, wb);
7ef0d7377   Nick Piggin   fs: new inode i_s...
555
556
  			continue;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
557
  		__iget(inode);
1a12d8bd7   Wu Fengguang   writeback: scale ...
558
  		write_chunk = writeback_chunk_size(wb->bdi, work);
d46db3d58   Wu Fengguang   writeback: make w...
559
560
  		wbc.nr_to_write = write_chunk;
  		wbc.pages_skipped = 0;
250df6ed2   Dave Chinner   fs: protect inode...
561

d46db3d58   Wu Fengguang   writeback: make w...
562
  		writeback_single_inode(inode, wb, &wbc);
250df6ed2   Dave Chinner   fs: protect inode...
563

d46db3d58   Wu Fengguang   writeback: make w...
564
565
566
567
568
  		work->nr_pages -= write_chunk - wbc.nr_to_write;
  		wrote += write_chunk - wbc.nr_to_write;
  		if (!(inode->i_state & I_DIRTY))
  			wrote++;
  		if (wbc.pages_skipped) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
569
570
571
572
  			/*
  			 * writeback is not making progress due to locked
  			 * buffers.  Skip this inode for now.
  			 */
f758eeabe   Christoph Hellwig   writeback: split ...
573
  			redirty_tail(inode, wb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
574
  		}
0f1b1fd86   Dave Chinner   fs: pull inode->i...
575
  		spin_unlock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
576
  		spin_unlock(&wb->list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
577
  		iput(inode);
4ffc84442   OGAWA Hirofumi   [PATCH] Move cond...
578
  		cond_resched();
f758eeabe   Christoph Hellwig   writeback: split ...
579
  		spin_lock(&wb->list_lock);
d46db3d58   Wu Fengguang   writeback: make w...
580
581
582
583
584
585
586
587
588
  		/*
  		 * bail out to wb_writeback() often enough to check
  		 * background threshold and other termination conditions.
  		 */
  		if (wrote) {
  			if (time_is_before_jiffies(start_time + HZ / 10UL))
  				break;
  			if (work->nr_pages <= 0)
  				break;
8bc3be275   Fengguang Wu   writeback: speed ...
589
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
590
  	}
d46db3d58   Wu Fengguang   writeback: make w...
591
  	return wrote;
f11c9c5c2   Edward Shishkin   vfs: improve writ...
592
  }
d46db3d58   Wu Fengguang   writeback: make w...
593
594
  static long __writeback_inodes_wb(struct bdi_writeback *wb,
  				  struct wb_writeback_work *work)
f11c9c5c2   Edward Shishkin   vfs: improve writ...
595
  {
d46db3d58   Wu Fengguang   writeback: make w...
596
597
  	unsigned long start_time = jiffies;
  	long wrote = 0;
38f219776   Nick Piggin   fs: sync_sb_inode...
598

f11c9c5c2   Edward Shishkin   vfs: improve writ...
599
  	while (!list_empty(&wb->b_io)) {
7ccf19a80   Nick Piggin   fs: inode split I...
600
  		struct inode *inode = wb_inode(wb->b_io.prev);
f11c9c5c2   Edward Shishkin   vfs: improve writ...
601
  		struct super_block *sb = inode->i_sb;
9ecc2738a   Jens Axboe   writeback: make t...
602

12ad3ab66   Dave Chinner   superblock: move ...
603
  		if (!grab_super_passive(sb)) {
0e995816f   Wu Fengguang   don't busy retry ...
604
605
606
607
608
609
  			/*
  			 * grab_super_passive() may fail consistently due to
  			 * s_umount being grabbed by someone else. Don't use
  			 * requeue_io() to avoid busy retrying the inode/sb.
  			 */
  			redirty_tail(inode, wb);
edadfb10b   Christoph Hellwig   writeback: split ...
610
  			continue;
f11c9c5c2   Edward Shishkin   vfs: improve writ...
611
  		}
d46db3d58   Wu Fengguang   writeback: make w...
612
  		wrote += writeback_sb_inodes(sb, wb, work);
edadfb10b   Christoph Hellwig   writeback: split ...
613
  		drop_super(sb);
f11c9c5c2   Edward Shishkin   vfs: improve writ...
614

d46db3d58   Wu Fengguang   writeback: make w...
615
616
617
618
619
620
621
  		/* refer to the same tests at the end of writeback_sb_inodes */
  		if (wrote) {
  			if (time_is_before_jiffies(start_time + HZ / 10UL))
  				break;
  			if (work->nr_pages <= 0)
  				break;
  		}
f11c9c5c2   Edward Shishkin   vfs: improve writ...
622
  	}
66f3b8e2e   Jens Axboe   writeback: move d...
623
  	/* Leave any unwritten inodes on b_io */
d46db3d58   Wu Fengguang   writeback: make w...
624
  	return wrote;
66f3b8e2e   Jens Axboe   writeback: move d...
625
  }
0e175a183   Curt Wohlgemuth   writeback: Add a ...
626
627
  long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
  				enum wb_reason reason)
edadfb10b   Christoph Hellwig   writeback: split ...
628
  {
d46db3d58   Wu Fengguang   writeback: make w...
629
630
631
632
  	struct wb_writeback_work work = {
  		.nr_pages	= nr_pages,
  		.sync_mode	= WB_SYNC_NONE,
  		.range_cyclic	= 1,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
633
  		.reason		= reason,
d46db3d58   Wu Fengguang   writeback: make w...
634
  	};
edadfb10b   Christoph Hellwig   writeback: split ...
635

f758eeabe   Christoph Hellwig   writeback: split ...
636
  	spin_lock(&wb->list_lock);
424b351fe   Wu Fengguang   writeback: refill...
637
  	if (list_empty(&wb->b_io))
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
638
  		queue_io(wb, &work);
d46db3d58   Wu Fengguang   writeback: make w...
639
  	__writeback_inodes_wb(wb, &work);
f758eeabe   Christoph Hellwig   writeback: split ...
640
  	spin_unlock(&wb->list_lock);
edadfb10b   Christoph Hellwig   writeback: split ...
641

d46db3d58   Wu Fengguang   writeback: make w...
642
643
  	return nr_pages - work.nr_pages;
  }
03ba3782e   Jens Axboe   writeback: switch...
644

b00949aa2   Wu Fengguang   writeback: per-bd...
645
  static bool over_bground_thresh(struct backing_dev_info *bdi)
03ba3782e   Jens Axboe   writeback: switch...
646
647
  {
  	unsigned long background_thresh, dirty_thresh;
16c4042f0   Wu Fengguang   writeback: avoid ...
648
  	global_dirty_limits(&background_thresh, &dirty_thresh);
03ba3782e   Jens Axboe   writeback: switch...
649

b00949aa2   Wu Fengguang   writeback: per-bd...
650
651
652
653
654
655
656
657
658
  	if (global_page_state(NR_FILE_DIRTY) +
  	    global_page_state(NR_UNSTABLE_NFS) > background_thresh)
  		return true;
  
  	if (bdi_stat(bdi, BDI_RECLAIMABLE) >
  				bdi_dirty_limit(bdi, background_thresh))
  		return true;
  
  	return false;
03ba3782e   Jens Axboe   writeback: switch...
659
660
661
  }
  
  /*
e98be2d59   Wu Fengguang   writeback: bdi wr...
662
663
664
665
666
667
   * Called under wb->list_lock. If there are multiple wb per bdi,
   * only the flusher working on the first wb should do it.
   */
  static void wb_update_bandwidth(struct bdi_writeback *wb,
  				unsigned long start_time)
  {
af6a31138   Wu Fengguang   writeback: add bg...
668
  	__bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, 0, start_time);
e98be2d59   Wu Fengguang   writeback: bdi wr...
669
670
671
  }
  
  /*
03ba3782e   Jens Axboe   writeback: switch...
672
   * Explicit flushing or periodic writeback of "old" data.
66f3b8e2e   Jens Axboe   writeback: move d...
673
   *
03ba3782e   Jens Axboe   writeback: switch...
674
675
676
677
   * Define "old": the first time one of an inode's pages is dirtied, we mark the
   * dirtying-time in the inode's address_space.  So this periodic writeback code
   * just walks the superblock inode list, writing back any inodes which are
   * older than a specific point in time.
66f3b8e2e   Jens Axboe   writeback: move d...
678
   *
03ba3782e   Jens Axboe   writeback: switch...
679
680
681
   * Try to run once per dirty_writeback_interval.  But if a writeback event
   * takes longer than a dirty_writeback_interval interval, then leave a
   * one-second gap.
66f3b8e2e   Jens Axboe   writeback: move d...
682
   *
03ba3782e   Jens Axboe   writeback: switch...
683
684
   * older_than_this takes precedence over nr_to_write.  So we'll only write back
   * all dirty pages if they are all attached to "old" mappings.
66f3b8e2e   Jens Axboe   writeback: move d...
685
   */
c4a77a6c7   Jens Axboe   writeback: make w...
686
  static long wb_writeback(struct bdi_writeback *wb,
83ba7b071   Christoph Hellwig   writeback: simpli...
687
  			 struct wb_writeback_work *work)
66f3b8e2e   Jens Axboe   writeback: move d...
688
  {
e98be2d59   Wu Fengguang   writeback: bdi wr...
689
  	unsigned long wb_start = jiffies;
d46db3d58   Wu Fengguang   writeback: make w...
690
  	long nr_pages = work->nr_pages;
03ba3782e   Jens Axboe   writeback: switch...
691
  	unsigned long oldest_jif;
a5989bdc9   Jan Kara   fs: Fix busyloop ...
692
  	struct inode *inode;
d46db3d58   Wu Fengguang   writeback: make w...
693
  	long progress;
66f3b8e2e   Jens Axboe   writeback: move d...
694

e185dda89   Wu Fengguang   writeback: avoid ...
695
  	oldest_jif = jiffies;
d46db3d58   Wu Fengguang   writeback: make w...
696
  	work->older_than_this = &oldest_jif;
38f219776   Nick Piggin   fs: sync_sb_inode...
697

e8dfc3058   Wu Fengguang   writeback: elevat...
698
  	spin_lock(&wb->list_lock);
03ba3782e   Jens Axboe   writeback: switch...
699
700
  	for (;;) {
  		/*
d3ddec763   Wu Fengguang   writeback: stop b...
701
  		 * Stop writeback when nr_pages has been consumed
03ba3782e   Jens Axboe   writeback: switch...
702
  		 */
83ba7b071   Christoph Hellwig   writeback: simpli...
703
  		if (work->nr_pages <= 0)
03ba3782e   Jens Axboe   writeback: switch...
704
  			break;
66f3b8e2e   Jens Axboe   writeback: move d...
705

38f219776   Nick Piggin   fs: sync_sb_inode...
706
  		/*
aa373cf55   Jan Kara   writeback: stop b...
707
708
709
710
711
712
713
714
715
716
  		 * Background writeout and kupdate-style writeback may
  		 * run forever. Stop them if there is other work to do
  		 * so that e.g. sync can proceed. They'll be restarted
  		 * after the other works are all done.
  		 */
  		if ((work->for_background || work->for_kupdate) &&
  		    !list_empty(&wb->bdi->work_list))
  			break;
  
  		/*
d3ddec763   Wu Fengguang   writeback: stop b...
717
718
  		 * For background writeout, stop when we are below the
  		 * background dirty threshold
38f219776   Nick Piggin   fs: sync_sb_inode...
719
  		 */
b00949aa2   Wu Fengguang   writeback: per-bd...
720
  		if (work->for_background && !over_bground_thresh(wb->bdi))
03ba3782e   Jens Axboe   writeback: switch...
721
  			break;
38f219776   Nick Piggin   fs: sync_sb_inode...
722

1bc36b642   Jan Kara   writeback: Includ...
723
724
725
726
727
728
  		/*
  		 * Kupdate and background works are special and we want to
  		 * include all inodes that need writing. Livelock avoidance is
  		 * handled by these works yielding to any other work so we are
  		 * safe.
  		 */
ba9aa8399   Wu Fengguang   writeback: the ku...
729
730
731
  		if (work->for_kupdate) {
  			oldest_jif = jiffies -
  				msecs_to_jiffies(dirty_expire_interval * 10);
1bc36b642   Jan Kara   writeback: Includ...
732
733
  		} else if (work->for_background)
  			oldest_jif = jiffies;
028c2dd18   Dave Chinner   writeback: Add tr...
734

d46db3d58   Wu Fengguang   writeback: make w...
735
  		trace_writeback_start(wb->bdi, work);
e8dfc3058   Wu Fengguang   writeback: elevat...
736
  		if (list_empty(&wb->b_io))
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
737
  			queue_io(wb, work);
83ba7b071   Christoph Hellwig   writeback: simpli...
738
  		if (work->sb)
d46db3d58   Wu Fengguang   writeback: make w...
739
  			progress = writeback_sb_inodes(work->sb, wb, work);
edadfb10b   Christoph Hellwig   writeback: split ...
740
  		else
d46db3d58   Wu Fengguang   writeback: make w...
741
742
  			progress = __writeback_inodes_wb(wb, work);
  		trace_writeback_written(wb->bdi, work);
028c2dd18   Dave Chinner   writeback: Add tr...
743

e98be2d59   Wu Fengguang   writeback: bdi wr...
744
  		wb_update_bandwidth(wb, wb_start);
03ba3782e   Jens Axboe   writeback: switch...
745
746
  
  		/*
e6fb6da2e   Wu Fengguang   writeback: try mo...
747
748
749
750
751
752
  		 * Did we write something? Try for more
  		 *
  		 * Dirty inodes are moved to b_io for writeback in batches.
  		 * The completion of the current batch does not necessarily
  		 * mean the overall work is done. So we keep looping as long
  		 * as made some progress on cleaning pages or inodes.
03ba3782e   Jens Axboe   writeback: switch...
753
  		 */
d46db3d58   Wu Fengguang   writeback: make w...
754
  		if (progress)
71fd05a88   Jens Axboe   writeback: improv...
755
756
  			continue;
  		/*
e6fb6da2e   Wu Fengguang   writeback: try mo...
757
  		 * No more inodes for IO, bail
71fd05a88   Jens Axboe   writeback: improv...
758
  		 */
b7a2441f9   Wu Fengguang   writeback: remove...
759
  		if (list_empty(&wb->b_more_io))
03ba3782e   Jens Axboe   writeback: switch...
760
  			break;
71fd05a88   Jens Axboe   writeback: improv...
761
  		/*
71fd05a88   Jens Axboe   writeback: improv...
762
763
764
765
  		 * Nothing written. Wait for some inode to
  		 * become available for writeback. Otherwise
  		 * we'll just busyloop.
  		 */
71fd05a88   Jens Axboe   writeback: improv...
766
  		if (!list_empty(&wb->b_more_io))  {
d46db3d58   Wu Fengguang   writeback: make w...
767
  			trace_writeback_wait(wb->bdi, work);
7ccf19a80   Nick Piggin   fs: inode split I...
768
  			inode = wb_inode(wb->b_more_io.prev);
250df6ed2   Dave Chinner   fs: protect inode...
769
  			spin_lock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
770
  			inode_wait_for_writeback(inode, wb);
250df6ed2   Dave Chinner   fs: protect inode...
771
  			spin_unlock(&inode->i_lock);
03ba3782e   Jens Axboe   writeback: switch...
772
773
  		}
  	}
e8dfc3058   Wu Fengguang   writeback: elevat...
774
  	spin_unlock(&wb->list_lock);
03ba3782e   Jens Axboe   writeback: switch...
775

d46db3d58   Wu Fengguang   writeback: make w...
776
  	return nr_pages - work->nr_pages;
03ba3782e   Jens Axboe   writeback: switch...
777
778
779
  }
  
  /*
83ba7b071   Christoph Hellwig   writeback: simpli...
780
   * Return the next wb_writeback_work struct that hasn't been processed yet.
03ba3782e   Jens Axboe   writeback: switch...
781
   */
83ba7b071   Christoph Hellwig   writeback: simpli...
782
  static struct wb_writeback_work *
08852b6d6   Minchan Kim   writeback: remove...
783
  get_next_work_item(struct backing_dev_info *bdi)
03ba3782e   Jens Axboe   writeback: switch...
784
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
785
  	struct wb_writeback_work *work = NULL;
03ba3782e   Jens Axboe   writeback: switch...
786

6467716a3   Artem Bityutskiy   writeback: optimi...
787
  	spin_lock_bh(&bdi->wb_lock);
83ba7b071   Christoph Hellwig   writeback: simpli...
788
789
790
791
  	if (!list_empty(&bdi->work_list)) {
  		work = list_entry(bdi->work_list.next,
  				  struct wb_writeback_work, list);
  		list_del_init(&work->list);
03ba3782e   Jens Axboe   writeback: switch...
792
  	}
6467716a3   Artem Bityutskiy   writeback: optimi...
793
  	spin_unlock_bh(&bdi->wb_lock);
83ba7b071   Christoph Hellwig   writeback: simpli...
794
  	return work;
03ba3782e   Jens Axboe   writeback: switch...
795
  }
cdf01dd54   Linus Torvalds   fs-writeback.c: u...
796
797
798
799
800
801
802
803
804
805
  /*
   * Add in the number of potentially dirty inodes, because each inode
   * write can dirty pagecache in the underlying blockdev.
   */
  static unsigned long get_nr_dirty_pages(void)
  {
  	return global_page_state(NR_FILE_DIRTY) +
  		global_page_state(NR_UNSTABLE_NFS) +
  		get_nr_dirty_inodes();
  }
6585027a5   Jan Kara   writeback: integr...
806
807
  static long wb_check_background_flush(struct bdi_writeback *wb)
  {
b00949aa2   Wu Fengguang   writeback: per-bd...
808
  	if (over_bground_thresh(wb->bdi)) {
6585027a5   Jan Kara   writeback: integr...
809
810
811
812
813
814
  
  		struct wb_writeback_work work = {
  			.nr_pages	= LONG_MAX,
  			.sync_mode	= WB_SYNC_NONE,
  			.for_background	= 1,
  			.range_cyclic	= 1,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
815
  			.reason		= WB_REASON_BACKGROUND,
6585027a5   Jan Kara   writeback: integr...
816
817
818
819
820
821
822
  		};
  
  		return wb_writeback(wb, &work);
  	}
  
  	return 0;
  }
03ba3782e   Jens Axboe   writeback: switch...
823
824
825
826
  static long wb_check_old_data_flush(struct bdi_writeback *wb)
  {
  	unsigned long expired;
  	long nr_pages;
69b62d01e   Jens Axboe   writeback: disabl...
827
828
829
830
831
  	/*
  	 * When set to zero, disable periodic writeback
  	 */
  	if (!dirty_writeback_interval)
  		return 0;
03ba3782e   Jens Axboe   writeback: switch...
832
833
834
835
836
837
  	expired = wb->last_old_flush +
  			msecs_to_jiffies(dirty_writeback_interval * 10);
  	if (time_before(jiffies, expired))
  		return 0;
  
  	wb->last_old_flush = jiffies;
cdf01dd54   Linus Torvalds   fs-writeback.c: u...
838
  	nr_pages = get_nr_dirty_pages();
03ba3782e   Jens Axboe   writeback: switch...
839

c4a77a6c7   Jens Axboe   writeback: make w...
840
  	if (nr_pages) {
83ba7b071   Christoph Hellwig   writeback: simpli...
841
  		struct wb_writeback_work work = {
c4a77a6c7   Jens Axboe   writeback: make w...
842
843
844
845
  			.nr_pages	= nr_pages,
  			.sync_mode	= WB_SYNC_NONE,
  			.for_kupdate	= 1,
  			.range_cyclic	= 1,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
846
  			.reason		= WB_REASON_PERIODIC,
c4a77a6c7   Jens Axboe   writeback: make w...
847
  		};
83ba7b071   Christoph Hellwig   writeback: simpli...
848
  		return wb_writeback(wb, &work);
c4a77a6c7   Jens Axboe   writeback: make w...
849
  	}
03ba3782e   Jens Axboe   writeback: switch...
850
851
852
853
854
855
856
857
858
859
  
  	return 0;
  }
  
  /*
   * Retrieve work items and do the writeback they describe
   */
  long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
  {
  	struct backing_dev_info *bdi = wb->bdi;
83ba7b071   Christoph Hellwig   writeback: simpli...
860
  	struct wb_writeback_work *work;
c4a77a6c7   Jens Axboe   writeback: make w...
861
  	long wrote = 0;
03ba3782e   Jens Axboe   writeback: switch...
862

81d73a32d   Jan Kara   mm: fix writeback...
863
  	set_bit(BDI_writeback_running, &wb->bdi->state);
08852b6d6   Minchan Kim   writeback: remove...
864
  	while ((work = get_next_work_item(bdi)) != NULL) {
03ba3782e   Jens Axboe   writeback: switch...
865
866
  		/*
  		 * Override sync mode, in case we must wait for completion
83ba7b071   Christoph Hellwig   writeback: simpli...
867
  		 * because this thread is exiting now.
03ba3782e   Jens Axboe   writeback: switch...
868
869
  		 */
  		if (force_wait)
83ba7b071   Christoph Hellwig   writeback: simpli...
870
  			work->sync_mode = WB_SYNC_ALL;
03ba3782e   Jens Axboe   writeback: switch...
871

455b28646   Dave Chinner   writeback: Initia...
872
  		trace_writeback_exec(bdi, work);
83ba7b071   Christoph Hellwig   writeback: simpli...
873
  		wrote += wb_writeback(wb, work);
03ba3782e   Jens Axboe   writeback: switch...
874
875
  
  		/*
83ba7b071   Christoph Hellwig   writeback: simpli...
876
877
  		 * Notify the caller of completion if this is a synchronous
  		 * work item, otherwise just free it.
03ba3782e   Jens Axboe   writeback: switch...
878
  		 */
83ba7b071   Christoph Hellwig   writeback: simpli...
879
880
881
882
  		if (work->done)
  			complete(work->done);
  		else
  			kfree(work);
03ba3782e   Jens Axboe   writeback: switch...
883
884
885
886
887
888
  	}
  
  	/*
  	 * Check for periodic writeback, kupdated() style
  	 */
  	wrote += wb_check_old_data_flush(wb);
6585027a5   Jan Kara   writeback: integr...
889
  	wrote += wb_check_background_flush(wb);
81d73a32d   Jan Kara   mm: fix writeback...
890
  	clear_bit(BDI_writeback_running, &wb->bdi->state);
03ba3782e   Jens Axboe   writeback: switch...
891
892
893
894
895
896
897
898
  
  	return wrote;
  }
  
  /*
   * Handle writeback of dirty data for the device backed by this bdi. Also
   * wakes up periodically and does kupdated style flushing.
   */
082439004   Christoph Hellwig   writeback: merge ...
899
  int bdi_writeback_thread(void *data)
03ba3782e   Jens Axboe   writeback: switch...
900
  {
082439004   Christoph Hellwig   writeback: merge ...
901
902
  	struct bdi_writeback *wb = data;
  	struct backing_dev_info *bdi = wb->bdi;
03ba3782e   Jens Axboe   writeback: switch...
903
  	long pages_written;
766f91641   Peter Zijlstra   kernel: remove PF...
904
  	current->flags |= PF_SWAPWRITE;
082439004   Christoph Hellwig   writeback: merge ...
905
  	set_freezable();
ecd584030   Artem Bityutskiy   writeback: move l...
906
  	wb->last_active = jiffies;
082439004   Christoph Hellwig   writeback: merge ...
907
908
909
910
911
  
  	/*
  	 * Our parent may run at a different priority, just set us to normal
  	 */
  	set_user_nice(current, 0);
455b28646   Dave Chinner   writeback: Initia...
912
  	trace_writeback_thread_start(bdi);
8a32c441c   Tejun Heo   freezer: implemen...
913
  	while (!kthread_freezable_should_stop(NULL)) {
6467716a3   Artem Bityutskiy   writeback: optimi...
914
915
916
917
918
  		/*
  		 * Remove own delayed wake-up timer, since we are already awake
  		 * and we'll take care of the preriodic write-back.
  		 */
  		del_timer(&wb->wakeup_timer);
03ba3782e   Jens Axboe   writeback: switch...
919
  		pages_written = wb_do_writeback(wb, 0);
455b28646   Dave Chinner   writeback: Initia...
920
  		trace_writeback_pages_written(pages_written);
03ba3782e   Jens Axboe   writeback: switch...
921
  		if (pages_written)
ecd584030   Artem Bityutskiy   writeback: move l...
922
  			wb->last_active = jiffies;
03ba3782e   Jens Axboe   writeback: switch...
923

297252c81   Artem Bityutskiy   writeback: do not...
924
  		set_current_state(TASK_INTERRUPTIBLE);
b76b4014f   J. Bruce Fields   writeback: Fix lo...
925
  		if (!list_empty(&bdi->work_list) || kthread_should_stop()) {
f9eadbbd4   Jens Axboe   writeback: bdi_wr...
926
  			__set_current_state(TASK_RUNNING);
297252c81   Artem Bityutskiy   writeback: do not...
927
  			continue;
03ba3782e   Jens Axboe   writeback: switch...
928
  		}
253c34e9b   Artem Bityutskiy   writeback: preven...
929
  		if (wb_has_dirty_io(wb) && dirty_writeback_interval)
fff5b85aa   Artem Bityutskiy   writeback: move b...
930
  			schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
253c34e9b   Artem Bityutskiy   writeback: preven...
931
932
933
934
935
936
  		else {
  			/*
  			 * We have nothing to do, so can go sleep without any
  			 * timeout and save power. When a work is queued or
  			 * something is made dirty - we will be woken up.
  			 */
297252c81   Artem Bityutskiy   writeback: do not...
937
  			schedule();
f9eadbbd4   Jens Axboe   writeback: bdi_wr...
938
  		}
03ba3782e   Jens Axboe   writeback: switch...
939
  	}
fff5b85aa   Artem Bityutskiy   writeback: move b...
940
  	/* Flush any work that raced with us exiting */
082439004   Christoph Hellwig   writeback: merge ...
941
942
  	if (!list_empty(&bdi->work_list))
  		wb_do_writeback(wb, 1);
455b28646   Dave Chinner   writeback: Initia...
943
944
  
  	trace_writeback_thread_stop(bdi);
03ba3782e   Jens Axboe   writeback: switch...
945
946
  	return 0;
  }
082439004   Christoph Hellwig   writeback: merge ...
947

03ba3782e   Jens Axboe   writeback: switch...
948
  /*
b8c2f3474   Christoph Hellwig   writeback: simpli...
949
950
   * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
   * the whole world.
03ba3782e   Jens Axboe   writeback: switch...
951
   */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
952
  void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
03ba3782e   Jens Axboe   writeback: switch...
953
  {
b8c2f3474   Christoph Hellwig   writeback: simpli...
954
  	struct backing_dev_info *bdi;
03ba3782e   Jens Axboe   writeback: switch...
955

83ba7b071   Christoph Hellwig   writeback: simpli...
956
957
  	if (!nr_pages) {
  		nr_pages = global_page_state(NR_FILE_DIRTY) +
b8c2f3474   Christoph Hellwig   writeback: simpli...
958
959
  				global_page_state(NR_UNSTABLE_NFS);
  	}
03ba3782e   Jens Axboe   writeback: switch...
960

b8c2f3474   Christoph Hellwig   writeback: simpli...
961
  	rcu_read_lock();
cfc4ba536   Jens Axboe   writeback: use RC...
962
  	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
03ba3782e   Jens Axboe   writeback: switch...
963
964
  		if (!bdi_has_dirty_io(bdi))
  			continue;
0e175a183   Curt Wohlgemuth   writeback: Add a ...
965
  		__bdi_start_writeback(bdi, nr_pages, false, reason);
03ba3782e   Jens Axboe   writeback: switch...
966
  	}
cfc4ba536   Jens Axboe   writeback: use RC...
967
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
968
  }
03ba3782e   Jens Axboe   writeback: switch...
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
  static noinline void block_dump___mark_inode_dirty(struct inode *inode)
  {
  	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
  		struct dentry *dentry;
  		const char *name = "?";
  
  		dentry = d_find_alias(inode);
  		if (dentry) {
  			spin_lock(&dentry->d_lock);
  			name = (const char *) dentry->d_name.name;
  		}
  		printk(KERN_DEBUG
  		       "%s(%d): dirtied inode %lu (%s) on %s
  ",
  		       current->comm, task_pid_nr(current), inode->i_ino,
  		       name, inode->i_sb->s_id);
  		if (dentry) {
  			spin_unlock(&dentry->d_lock);
  			dput(dentry);
  		}
  	}
  }
  
  /**
   *	__mark_inode_dirty -	internal function
   *	@inode: inode to mark
   *	@flags: what kind of dirty (i.e. I_DIRTY_SYNC)
   *	Mark an inode as dirty. Callers should use mark_inode_dirty or
   *  	mark_inode_dirty_sync.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
998
   *
03ba3782e   Jens Axboe   writeback: switch...
999
1000
1001
1002
1003
1004
1005
1006
1007
   * Put the inode on the super block's dirty list.
   *
   * CAREFUL! We mark it dirty unconditionally, but move it onto the
   * dirty list only if it is hashed or if it refers to a blockdev.
   * If it was not hashed, it will never be added to the dirty list
   * even if it is later hashed, as it will have been marked dirty already.
   *
   * In short, make sure you hash any inodes _before_ you start marking
   * them dirty.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1008
   *
03ba3782e   Jens Axboe   writeback: switch...
1009
1010
1011
1012
1013
1014
   * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
   * the block-special inode (/dev/hda1) itself.  And the ->dirtied_when field of
   * the kernel-internal blockdev inode represents the dirtying time of the
   * blockdev's pages.  This is why for I_DIRTY_PAGES we always use
   * page->mapping->host, so the page-dirtying time is recorded in the internal
   * blockdev inode.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1015
   */
03ba3782e   Jens Axboe   writeback: switch...
1016
  void __mark_inode_dirty(struct inode *inode, int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1017
  {
03ba3782e   Jens Axboe   writeback: switch...
1018
  	struct super_block *sb = inode->i_sb;
253c34e9b   Artem Bityutskiy   writeback: preven...
1019
  	struct backing_dev_info *bdi = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1020

03ba3782e   Jens Axboe   writeback: switch...
1021
1022
1023
1024
1025
1026
  	/*
  	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
  	 * dirty the inode itself
  	 */
  	if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
  		if (sb->s_op->dirty_inode)
aa3857295   Christoph Hellwig   fs: pass exact ty...
1027
  			sb->s_op->dirty_inode(inode, flags);
03ba3782e   Jens Axboe   writeback: switch...
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
  	}
  
  	/*
  	 * make sure that changes are seen by all cpus before we test i_state
  	 * -- mikulas
  	 */
  	smp_mb();
  
  	/* avoid the locking if we can */
  	if ((inode->i_state & flags) == flags)
  		return;
  
  	if (unlikely(block_dump))
  		block_dump___mark_inode_dirty(inode);
250df6ed2   Dave Chinner   fs: protect inode...
1042
  	spin_lock(&inode->i_lock);
03ba3782e   Jens Axboe   writeback: switch...
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
  	if ((inode->i_state & flags) != flags) {
  		const int was_dirty = inode->i_state & I_DIRTY;
  
  		inode->i_state |= flags;
  
  		/*
  		 * If the inode is being synced, just update its dirty state.
  		 * The unlocker will place the inode on the appropriate
  		 * superblock list, based upon its state.
  		 */
  		if (inode->i_state & I_SYNC)
250df6ed2   Dave Chinner   fs: protect inode...
1054
  			goto out_unlock_inode;
03ba3782e   Jens Axboe   writeback: switch...
1055
1056
1057
1058
1059
1060
  
  		/*
  		 * Only add valid (hashed) inodes to the superblock's
  		 * dirty list.  Add blockdev inodes as well.
  		 */
  		if (!S_ISBLK(inode->i_mode)) {
1d3382cbf   Al Viro   new helper: inode...
1061
  			if (inode_unhashed(inode))
250df6ed2   Dave Chinner   fs: protect inode...
1062
  				goto out_unlock_inode;
03ba3782e   Jens Axboe   writeback: switch...
1063
  		}
a4ffdde6e   Al Viro   simplify checks f...
1064
  		if (inode->i_state & I_FREEING)
250df6ed2   Dave Chinner   fs: protect inode...
1065
  			goto out_unlock_inode;
03ba3782e   Jens Axboe   writeback: switch...
1066
1067
1068
1069
1070
1071
  
  		/*
  		 * If the inode was already on b_dirty/b_io/b_more_io, don't
  		 * reposition it (that would break b_dirty time-ordering).
  		 */
  		if (!was_dirty) {
a66979aba   Dave Chinner   fs: move i_wb_lis...
1072
  			bool wakeup_bdi = false;
253c34e9b   Artem Bityutskiy   writeback: preven...
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
  			bdi = inode_to_bdi(inode);
  
  			if (bdi_cap_writeback_dirty(bdi)) {
  				WARN(!test_bit(BDI_registered, &bdi->state),
  				     "bdi-%s not registered
  ", bdi->name);
  
  				/*
  				 * If this is the first dirty inode for this
  				 * bdi, we have to wake-up the corresponding
  				 * bdi thread to make sure background
  				 * write-back happens later.
  				 */
  				if (!wb_has_dirty_io(&bdi->wb))
  					wakeup_bdi = true;
500b067c5   Jens Axboe   writeback: check ...
1088
  			}
03ba3782e   Jens Axboe   writeback: switch...
1089

a66979aba   Dave Chinner   fs: move i_wb_lis...
1090
  			spin_unlock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
1091
  			spin_lock(&bdi->wb.list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1092
  			inode->dirtied_when = jiffies;
7ccf19a80   Nick Piggin   fs: inode split I...
1093
  			list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
f758eeabe   Christoph Hellwig   writeback: split ...
1094
  			spin_unlock(&bdi->wb.list_lock);
a66979aba   Dave Chinner   fs: move i_wb_lis...
1095
1096
1097
1098
  
  			if (wakeup_bdi)
  				bdi_wakeup_thread_delayed(bdi);
  			return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1099
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1100
  	}
250df6ed2   Dave Chinner   fs: protect inode...
1101
1102
  out_unlock_inode:
  	spin_unlock(&inode->i_lock);
253c34e9b   Artem Bityutskiy   writeback: preven...
1103

03ba3782e   Jens Axboe   writeback: switch...
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
  }
  EXPORT_SYMBOL(__mark_inode_dirty);
  
  /*
   * Write out a superblock's list of dirty inodes.  A wait will be performed
   * upon no inodes, all inodes or the final one, depending upon sync_mode.
   *
   * If older_than_this is non-NULL, then only write out inodes which
   * had their first dirtying at a time earlier than *older_than_this.
   *
03ba3782e   Jens Axboe   writeback: switch...
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
   * If `bdi' is non-zero then we're being asked to writeback a specific queue.
   * This function assumes that the blockdev superblock's inodes are backed by
   * a variety of queues, so all inodes are searched.  For other superblocks,
   * assume that all inodes are backed by the same queue.
   *
   * The inodes to be written are parked on bdi->b_io.  They are moved back onto
   * bdi->b_dirty as they are selected for writing.  This way, none can be missed
   * on the writer throttling path, and we get decent balancing between many
   * throttled threads: we don't want them all piling up on inode_sync_wait.
   */
b6e51316d   Jens Axboe   writeback: separa...
1124
  static void wait_sb_inodes(struct super_block *sb)
03ba3782e   Jens Axboe   writeback: switch...
1125
1126
1127
1128
1129
1130
1131
  {
  	struct inode *inode, *old_inode = NULL;
  
  	/*
  	 * We need to be protected against the filesystem going from
  	 * r/o to r/w or vice versa.
  	 */
b6e51316d   Jens Axboe   writeback: separa...
1132
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
03ba3782e   Jens Axboe   writeback: switch...
1133

55fa6091d   Dave Chinner   fs: move i_sb_lis...
1134
  	spin_lock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1135
1136
1137
1138
1139
1140
1141
1142
  
  	/*
  	 * Data integrity sync. Must wait for all pages under writeback,
  	 * because there may have been pages dirtied before our sync
  	 * call, but which had writeout started before we write it out.
  	 * In which case, the inode may not be on the dirty list, but
  	 * we still have to wait for that writeout.
  	 */
b6e51316d   Jens Axboe   writeback: separa...
1143
  	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
250df6ed2   Dave Chinner   fs: protect inode...
1144
  		struct address_space *mapping = inode->i_mapping;
03ba3782e   Jens Axboe   writeback: switch...
1145

250df6ed2   Dave Chinner   fs: protect inode...
1146
1147
1148
1149
  		spin_lock(&inode->i_lock);
  		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
  		    (mapping->nrpages == 0)) {
  			spin_unlock(&inode->i_lock);
03ba3782e   Jens Axboe   writeback: switch...
1150
  			continue;
250df6ed2   Dave Chinner   fs: protect inode...
1151
  		}
03ba3782e   Jens Axboe   writeback: switch...
1152
  		__iget(inode);
250df6ed2   Dave Chinner   fs: protect inode...
1153
  		spin_unlock(&inode->i_lock);
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1154
  		spin_unlock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1155
  		/*
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1156
1157
1158
1159
1160
1161
  		 * We hold a reference to 'inode' so it couldn't have been
  		 * removed from s_inodes list while we dropped the
  		 * inode_sb_list_lock.  We cannot iput the inode now as we can
  		 * be holding the last reference and we cannot iput it under
  		 * inode_sb_list_lock. So we keep the reference and iput it
  		 * later.
03ba3782e   Jens Axboe   writeback: switch...
1162
1163
1164
1165
1166
1167
1168
  		 */
  		iput(old_inode);
  		old_inode = inode;
  
  		filemap_fdatawait(mapping);
  
  		cond_resched();
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1169
  		spin_lock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1170
  	}
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1171
  	spin_unlock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1172
  	iput(old_inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1173
  }
d8a8559cd   Jens Axboe   writeback: get ri...
1174
  /**
3259f8bed   Chris Mason   Add new functions...
1175
   * writeback_inodes_sb_nr -	writeback dirty inodes from given super_block
d8a8559cd   Jens Axboe   writeback: get ri...
1176
   * @sb: the superblock
3259f8bed   Chris Mason   Add new functions...
1177
   * @nr: the number of pages to write
786228ab3   Marcos Paulo de Souza   writeback: Fix is...
1178
   * @reason: reason why some writeback work initiated
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1179
   *
d8a8559cd   Jens Axboe   writeback: get ri...
1180
1181
   * Start writeback on some inodes on this super_block. No guarantees are made
   * on how many (if any) will be written, and this function does not wait
3259f8bed   Chris Mason   Add new functions...
1182
   * for IO completion of submitted IO.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1183
   */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1184
1185
1186
  void writeback_inodes_sb_nr(struct super_block *sb,
  			    unsigned long nr,
  			    enum wb_reason reason)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1187
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
1188
1189
  	DECLARE_COMPLETION_ONSTACK(done);
  	struct wb_writeback_work work = {
6e6938b6d   Wu Fengguang   writeback: introd...
1190
1191
1192
1193
1194
  		.sb			= sb,
  		.sync_mode		= WB_SYNC_NONE,
  		.tagged_writepages	= 1,
  		.done			= &done,
  		.nr_pages		= nr,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1195
  		.reason			= reason,
3c4d71653   Christoph Hellwig   writeback: queue ...
1196
  	};
d8a8559cd   Jens Axboe   writeback: get ri...
1197

cf37e9724   Christoph Hellwig   writeback: enforc...
1198
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
83ba7b071   Christoph Hellwig   writeback: simpli...
1199
1200
  	bdi_queue_work(sb->s_bdi, &work);
  	wait_for_completion(&done);
e913fc825   Jens Axboe   writeback: fix WB...
1201
  }
3259f8bed   Chris Mason   Add new functions...
1202
1203
1204
1205
1206
  EXPORT_SYMBOL(writeback_inodes_sb_nr);
  
  /**
   * writeback_inodes_sb	-	writeback dirty inodes from given super_block
   * @sb: the superblock
786228ab3   Marcos Paulo de Souza   writeback: Fix is...
1207
   * @reason: reason why some writeback work was initiated
3259f8bed   Chris Mason   Add new functions...
1208
1209
1210
1211
1212
   *
   * Start writeback on some inodes on this super_block. No guarantees are made
   * on how many (if any) will be written, and this function does not wait
   * for IO completion of submitted IO.
   */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1213
  void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
3259f8bed   Chris Mason   Add new functions...
1214
  {
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1215
  	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
3259f8bed   Chris Mason   Add new functions...
1216
  }
0e3c9a228   Jens Axboe   Revert "writeback...
1217
  EXPORT_SYMBOL(writeback_inodes_sb);
e913fc825   Jens Axboe   writeback: fix WB...
1218
1219
  
  /**
17bd55d03   Eric Sandeen   fs-writeback: Add...
1220
1221
   * writeback_inodes_sb_if_idle	-	start writeback if none underway
   * @sb: the superblock
786228ab3   Marcos Paulo de Souza   writeback: Fix is...
1222
   * @reason: reason why some writeback work was initiated
17bd55d03   Eric Sandeen   fs-writeback: Add...
1223
1224
1225
1226
   *
   * Invoke writeback_inodes_sb if no writeback is currently underway.
   * Returns 1 if writeback was started, 0 if not.
   */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1227
  int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
17bd55d03   Eric Sandeen   fs-writeback: Add...
1228
1229
  {
  	if (!writeback_in_progress(sb->s_bdi)) {
cf37e9724   Christoph Hellwig   writeback: enforc...
1230
  		down_read(&sb->s_umount);
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1231
  		writeback_inodes_sb(sb, reason);
cf37e9724   Christoph Hellwig   writeback: enforc...
1232
  		up_read(&sb->s_umount);
17bd55d03   Eric Sandeen   fs-writeback: Add...
1233
1234
1235
1236
1237
1238
1239
  		return 1;
  	} else
  		return 0;
  }
  EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
  
  /**
3259f8bed   Chris Mason   Add new functions...
1240
1241
1242
   * writeback_inodes_sb_if_idle	-	start writeback if none underway
   * @sb: the superblock
   * @nr: the number of pages to write
786228ab3   Marcos Paulo de Souza   writeback: Fix is...
1243
   * @reason: reason why some writeback work was initiated
3259f8bed   Chris Mason   Add new functions...
1244
1245
1246
1247
1248
   *
   * Invoke writeback_inodes_sb if no writeback is currently underway.
   * Returns 1 if writeback was started, 0 if not.
   */
  int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1249
1250
  				   unsigned long nr,
  				   enum wb_reason reason)
3259f8bed   Chris Mason   Add new functions...
1251
1252
1253
  {
  	if (!writeback_in_progress(sb->s_bdi)) {
  		down_read(&sb->s_umount);
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1254
  		writeback_inodes_sb_nr(sb, nr, reason);
3259f8bed   Chris Mason   Add new functions...
1255
1256
1257
1258
1259
1260
1261
1262
  		up_read(&sb->s_umount);
  		return 1;
  	} else
  		return 0;
  }
  EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
  
  /**
d8a8559cd   Jens Axboe   writeback: get ri...
1263
1264
1265
1266
   * sync_inodes_sb	-	sync sb inode pages
   * @sb: the superblock
   *
   * This function writes and waits on any dirty inode belonging to this
cb9ef8d5e   Stefan Hajnoczi   fs/fs-writeback.c...
1267
   * super_block.
d8a8559cd   Jens Axboe   writeback: get ri...
1268
   */
b6e51316d   Jens Axboe   writeback: separa...
1269
  void sync_inodes_sb(struct super_block *sb)
d8a8559cd   Jens Axboe   writeback: get ri...
1270
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
1271
1272
  	DECLARE_COMPLETION_ONSTACK(done);
  	struct wb_writeback_work work = {
3c4d71653   Christoph Hellwig   writeback: queue ...
1273
1274
1275
1276
  		.sb		= sb,
  		.sync_mode	= WB_SYNC_ALL,
  		.nr_pages	= LONG_MAX,
  		.range_cyclic	= 0,
83ba7b071   Christoph Hellwig   writeback: simpli...
1277
  		.done		= &done,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1278
  		.reason		= WB_REASON_SYNC,
3c4d71653   Christoph Hellwig   writeback: queue ...
1279
  	};
cf37e9724   Christoph Hellwig   writeback: enforc...
1280
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
83ba7b071   Christoph Hellwig   writeback: simpli...
1281
1282
  	bdi_queue_work(sb->s_bdi, &work);
  	wait_for_completion(&done);
b6e51316d   Jens Axboe   writeback: separa...
1283
  	wait_sb_inodes(sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1284
  }
d8a8559cd   Jens Axboe   writeback: get ri...
1285
  EXPORT_SYMBOL(sync_inodes_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1286

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1287
  /**
7f04c26d7   Andrea Arcangeli   [PATCH] fix nr_un...
1288
1289
1290
1291
1292
1293
   * write_inode_now	-	write an inode to disk
   * @inode: inode to write to disk
   * @sync: whether the write should be synchronous or not
   *
   * This function commits an inode to disk immediately if it is dirty. This is
   * primarily needed by knfsd.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1294
   *
7f04c26d7   Andrea Arcangeli   [PATCH] fix nr_un...
1295
   * The caller must either have a ref on the inode or must have set I_WILL_FREE.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1296
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1297
1298
  int write_inode_now(struct inode *inode, int sync)
  {
f758eeabe   Christoph Hellwig   writeback: split ...
1299
  	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1300
1301
1302
  	int ret;
  	struct writeback_control wbc = {
  		.nr_to_write = LONG_MAX,
18914b188   Mike Galbraith   write_inode_now()...
1303
  		.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
111ebb6e6   OGAWA Hirofumi   [PATCH] writeback...
1304
1305
  		.range_start = 0,
  		.range_end = LLONG_MAX,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1306
1307
1308
  	};
  
  	if (!mapping_cap_writeback_dirty(inode->i_mapping))
49364ce25   Andrew Morton   [PATCH] write_ino...
1309
  		wbc.nr_to_write = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1310
1311
  
  	might_sleep();
f758eeabe   Christoph Hellwig   writeback: split ...
1312
  	spin_lock(&wb->list_lock);
0f1b1fd86   Dave Chinner   fs: pull inode->i...
1313
  	spin_lock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
1314
  	ret = writeback_single_inode(inode, wb, &wbc);
0f1b1fd86   Dave Chinner   fs: pull inode->i...
1315
  	spin_unlock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
1316
  	spin_unlock(&wb->list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1317
  	if (sync)
1c0eeaf56   Joern Engel   introduce I_SYNC
1318
  		inode_sync_wait(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
  	return ret;
  }
  EXPORT_SYMBOL(write_inode_now);
  
  /**
   * sync_inode - write an inode and its pages to disk.
   * @inode: the inode to sync
   * @wbc: controls the writeback mode
   *
   * sync_inode() will write an inode and its pages to disk.  It will also
   * correctly update the inode on its superblock's dirty inode lists and will
   * update inode->i_state.
   *
   * The caller must have a ref on the inode.
   */
  int sync_inode(struct inode *inode, struct writeback_control *wbc)
  {
f758eeabe   Christoph Hellwig   writeback: split ...
1336
  	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1337
  	int ret;
f758eeabe   Christoph Hellwig   writeback: split ...
1338
  	spin_lock(&wb->list_lock);
0f1b1fd86   Dave Chinner   fs: pull inode->i...
1339
  	spin_lock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
1340
  	ret = writeback_single_inode(inode, wb, wbc);
0f1b1fd86   Dave Chinner   fs: pull inode->i...
1341
  	spin_unlock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
1342
  	spin_unlock(&wb->list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1343
1344
1345
  	return ret;
  }
  EXPORT_SYMBOL(sync_inode);
c37650161   Christoph Hellwig   fs: add sync_inod...
1346
1347
  
  /**
c691b9d98   Andrew Morton   sync_inode_metada...
1348
   * sync_inode_metadata - write an inode to disk
c37650161   Christoph Hellwig   fs: add sync_inod...
1349
1350
1351
   * @inode: the inode to sync
   * @wait: wait for I/O to complete.
   *
c691b9d98   Andrew Morton   sync_inode_metada...
1352
   * Write an inode to disk and adjust its dirty state after completion.
c37650161   Christoph Hellwig   fs: add sync_inod...
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
   *
   * Note: only writes the actual inode, no associated data or other metadata.
   */
  int sync_inode_metadata(struct inode *inode, int wait)
  {
  	struct writeback_control wbc = {
  		.sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
  		.nr_to_write = 0, /* metadata-only */
  	};
  
  	return sync_inode(inode, &wbc);
  }
  EXPORT_SYMBOL(sync_inode_metadata);