Blame view

fs/fs-writeback.c 37.5 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
  /*
   * fs/fs-writeback.c
   *
   * Copyright (C) 2002, Linus Torvalds.
   *
   * Contains all the functions related to writing back and waiting
   * upon dirty inodes against superblocks, and writing back dirty
   * pages against inodes.  ie: data writeback.  Writeout of the
   * inode itself is not handled here.
   *
e1f8e8744   Francois Cami   Remove Andrew Mor...
11
   * 10Apr2002	Andrew Morton
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
15
16
   *		Split out of fs/inode.c
   *		Additions for address_space-based writeback
   */
  
  #include <linux/kernel.h>
f5ff8422b   Jens Axboe   Fix warnings with...
17
  #include <linux/module.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include <linux/spinlock.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
19
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
21
22
  #include <linux/sched.h>
  #include <linux/fs.h>
  #include <linux/mm.h>
03ba3782e   Jens Axboe   writeback: switch...
23
24
  #include <linux/kthread.h>
  #include <linux/freezer.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
28
  #include <linux/writeback.h>
  #include <linux/blkdev.h>
  #include <linux/backing-dev.h>
  #include <linux/buffer_head.h>
455b28646   Dave Chinner   writeback: Initia...
29
  #include <linux/tracepoint.h>
07f3f05c1   David Howells   [PATCH] BLOCK: Mo...
30
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31

d0bceac74   Jens Axboe   writeback: get ri...
32
  /*
c4a77a6c7   Jens Axboe   writeback: make w...
33
34
   * Passed into wb_writeback(), essentially a subset of writeback_control
   */
83ba7b071   Christoph Hellwig   writeback: simpli...
35
  struct wb_writeback_work {
c4a77a6c7   Jens Axboe   writeback: make w...
36
37
  	long nr_pages;
  	struct super_block *sb;
d46db3d58   Wu Fengguang   writeback: make w...
38
  	unsigned long *older_than_this;
c4a77a6c7   Jens Axboe   writeback: make w...
39
  	enum writeback_sync_modes sync_mode;
6e6938b6d   Wu Fengguang   writeback: introd...
40
  	unsigned int tagged_writepages:1;
52957fe1c   H Hartley Sweeten   fs-writeback.c: b...
41
42
43
  	unsigned int for_kupdate:1;
  	unsigned int range_cyclic:1;
  	unsigned int for_background:1;
0e175a183   Curt Wohlgemuth   writeback: Add a ...
44
  	enum wb_reason reason;		/* why was writeback initiated? */
c4a77a6c7   Jens Axboe   writeback: make w...
45

8010c3b63   Jens Axboe   writeback: add co...
46
  	struct list_head list;		/* pending work list */
83ba7b071   Christoph Hellwig   writeback: simpli...
47
  	struct completion *done;	/* set if the caller waits */
03ba3782e   Jens Axboe   writeback: switch...
48
  };
0e175a183   Curt Wohlgemuth   writeback: Add a ...
49
50
51
52
53
54
55
56
57
58
  const char *wb_reason_name[] = {
  	[WB_REASON_BACKGROUND]		= "background",
  	[WB_REASON_TRY_TO_FREE_PAGES]	= "try_to_free_pages",
  	[WB_REASON_SYNC]		= "sync",
  	[WB_REASON_PERIODIC]		= "periodic",
  	[WB_REASON_LAPTOP_TIMER]	= "laptop_timer",
  	[WB_REASON_FREE_MORE_MEM]	= "free_more_memory",
  	[WB_REASON_FS_FREE_SPACE]	= "fs_free_space",
  	[WB_REASON_FORKER_THREAD]	= "forker_thread"
  };
455b28646   Dave Chinner   writeback: Initia...
59
60
61
62
63
64
65
  /*
   * Include the creation of the trace points after defining the
   * wb_writeback_work structure so that the definition remains local to this
   * file.
   */
  #define CREATE_TRACE_POINTS
  #include <trace/events/writeback.h>
455b28646   Dave Chinner   writeback: Initia...
66
67
68
69
  /*
   * We don't actually have pdflush, but this one is exported though /proc...
   */
  int nr_pdflush_threads;
f11b00f3b   Adrian Bunk   fs/fs-writeback.c...
70
71
72
73
  /**
   * writeback_in_progress - determine whether there is writeback in progress
   * @bdi: the device's backing_dev_info structure.
   *
03ba3782e   Jens Axboe   writeback: switch...
74
75
   * Determine whether there is writeback waiting to be handled against a
   * backing device.
f11b00f3b   Adrian Bunk   fs/fs-writeback.c...
76
77
78
   */
  int writeback_in_progress(struct backing_dev_info *bdi)
  {
81d73a32d   Jan Kara   mm: fix writeback...
79
  	return test_bit(BDI_writeback_running, &bdi->state);
f11b00f3b   Adrian Bunk   fs/fs-writeback.c...
80
  }
692ebd17c   Jan Kara   bdi: Fix warnings...
81
82
83
  static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
  {
  	struct super_block *sb = inode->i_sb;
692ebd17c   Jan Kara   bdi: Fix warnings...
84

aaead25b9   Christoph Hellwig   writeback: always...
85
86
87
88
  	if (strcmp(sb->s_type->name, "bdev") == 0)
  		return inode->i_mapping->backing_dev_info;
  
  	return sb->s_bdi;
692ebd17c   Jan Kara   bdi: Fix warnings...
89
  }
7ccf19a80   Nick Piggin   fs: inode split I...
90
91
92
93
  static inline struct inode *wb_inode(struct list_head *head)
  {
  	return list_entry(head, struct inode, i_wb_list);
  }
6585027a5   Jan Kara   writeback: integr...
94
95
  /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
  static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
03ba3782e   Jens Axboe   writeback: switch...
96
  {
fff5b85aa   Artem Bityutskiy   writeback: move b...
97
98
99
100
101
102
103
  	if (bdi->wb.task) {
  		wake_up_process(bdi->wb.task);
  	} else {
  		/*
  		 * The bdi thread isn't there, wake up the forker thread which
  		 * will create and run it.
  		 */
03ba3782e   Jens Axboe   writeback: switch...
104
  		wake_up_process(default_backing_dev_info.wb.task);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
105
  	}
6585027a5   Jan Kara   writeback: integr...
106
107
108
109
110
111
112
113
114
115
116
117
  }
  
  static void bdi_queue_work(struct backing_dev_info *bdi,
  			   struct wb_writeback_work *work)
  {
  	trace_writeback_queue(bdi, work);
  
  	spin_lock_bh(&bdi->wb_lock);
  	list_add_tail(&work->list, &bdi->work_list);
  	if (!bdi->wb.task)
  		trace_writeback_nothread(bdi, work);
  	bdi_wakeup_flusher(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
118
  	spin_unlock_bh(&bdi->wb_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
119
  }
83ba7b071   Christoph Hellwig   writeback: simpli...
120
121
  static void
  __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
122
  		      bool range_cyclic, enum wb_reason reason)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
123
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
124
  	struct wb_writeback_work *work;
03ba3782e   Jens Axboe   writeback: switch...
125

bcddc3f01   Jens Axboe   writeback: inline...
126
127
128
129
  	/*
  	 * This is WB_SYNC_NONE writeback, so if allocation fails just
  	 * wakeup the thread for old dirty data writeback
  	 */
83ba7b071   Christoph Hellwig   writeback: simpli...
130
131
  	work = kzalloc(sizeof(*work), GFP_ATOMIC);
  	if (!work) {
455b28646   Dave Chinner   writeback: Initia...
132
133
  		if (bdi->wb.task) {
  			trace_writeback_nowork(bdi);
83ba7b071   Christoph Hellwig   writeback: simpli...
134
  			wake_up_process(bdi->wb.task);
455b28646   Dave Chinner   writeback: Initia...
135
  		}
83ba7b071   Christoph Hellwig   writeback: simpli...
136
  		return;
bcddc3f01   Jens Axboe   writeback: inline...
137
  	}
03ba3782e   Jens Axboe   writeback: switch...
138

83ba7b071   Christoph Hellwig   writeback: simpli...
139
140
141
  	work->sync_mode	= WB_SYNC_NONE;
  	work->nr_pages	= nr_pages;
  	work->range_cyclic = range_cyclic;
0e175a183   Curt Wohlgemuth   writeback: Add a ...
142
  	work->reason	= reason;
03ba3782e   Jens Axboe   writeback: switch...
143

83ba7b071   Christoph Hellwig   writeback: simpli...
144
  	bdi_queue_work(bdi, work);
b6e51316d   Jens Axboe   writeback: separa...
145
146
147
148
149
150
  }
  
  /**
   * bdi_start_writeback - start writeback
   * @bdi: the backing device to write from
   * @nr_pages: the number of pages to write
786228ab3   Marcos Paulo de Souza   writeback: Fix is...
151
   * @reason: reason why some writeback work was initiated
b6e51316d   Jens Axboe   writeback: separa...
152
153
154
   *
   * Description:
   *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
25985edce   Lucas De Marchi   Fix common misspe...
155
   *   started when this function returns, we make no guarantees on
0e3c9a228   Jens Axboe   Revert "writeback...
156
   *   completion. Caller need not hold sb s_umount semaphore.
b6e51316d   Jens Axboe   writeback: separa...
157
158
   *
   */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
159
160
  void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
  			enum wb_reason reason)
b6e51316d   Jens Axboe   writeback: separa...
161
  {
0e175a183   Curt Wohlgemuth   writeback: Add a ...
162
  	__bdi_start_writeback(bdi, nr_pages, true, reason);
c5444198c   Christoph Hellwig   writeback: simpli...
163
  }
d3ddec763   Wu Fengguang   writeback: stop b...
164

c5444198c   Christoph Hellwig   writeback: simpli...
165
166
167
168
169
  /**
   * bdi_start_background_writeback - start background writeback
   * @bdi: the backing device to write from
   *
   * Description:
6585027a5   Jan Kara   writeback: integr...
170
171
172
173
   *   This makes sure WB_SYNC_NONE background writeback happens. When
   *   this function returns, it is only guaranteed that for given BDI
   *   some IO is happening if we are over background dirty threshold.
   *   Caller need not hold sb s_umount semaphore.
c5444198c   Christoph Hellwig   writeback: simpli...
174
175
176
   */
  void bdi_start_background_writeback(struct backing_dev_info *bdi)
  {
6585027a5   Jan Kara   writeback: integr...
177
178
179
180
  	/*
  	 * We just wake up the flusher thread. It will perform background
  	 * writeback as soon as there is no other work to do.
  	 */
71927e84e   Wu Fengguang   writeback: trace ...
181
  	trace_writeback_wake_background(bdi);
6585027a5   Jan Kara   writeback: integr...
182
183
184
  	spin_lock_bh(&bdi->wb_lock);
  	bdi_wakeup_flusher(bdi);
  	spin_unlock_bh(&bdi->wb_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
185
186
187
  }
  
  /*
a66979aba   Dave Chinner   fs: move i_wb_lis...
188
189
190
191
   * Remove the inode from the writeback list it is on.
   */
  void inode_wb_list_del(struct inode *inode)
  {
f758eeabe   Christoph Hellwig   writeback: split ...
192
193
194
  	struct backing_dev_info *bdi = inode_to_bdi(inode);
  
  	spin_lock(&bdi->wb.list_lock);
a66979aba   Dave Chinner   fs: move i_wb_lis...
195
  	list_del_init(&inode->i_wb_list);
f758eeabe   Christoph Hellwig   writeback: split ...
196
  	spin_unlock(&bdi->wb.list_lock);
a66979aba   Dave Chinner   fs: move i_wb_lis...
197
  }
a66979aba   Dave Chinner   fs: move i_wb_lis...
198
  /*
6610a0bc8   Andrew Morton   writeback: fix ti...
199
200
201
202
   * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
   * furthest end of its superblock's dirty-inode list.
   *
   * Before stamping the inode's ->dirtied_when, we check to see whether it is
66f3b8e2e   Jens Axboe   writeback: move d...
203
   * already the most-recently-dirtied inode on the b_dirty list.  If that is
6610a0bc8   Andrew Morton   writeback: fix ti...
204
205
206
   * the case then the inode must have been redirtied while it was being written
   * out and we don't reset its dirtied_when.
   */
f758eeabe   Christoph Hellwig   writeback: split ...
207
  static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
6610a0bc8   Andrew Morton   writeback: fix ti...
208
  {
f758eeabe   Christoph Hellwig   writeback: split ...
209
  	assert_spin_locked(&wb->list_lock);
03ba3782e   Jens Axboe   writeback: switch...
210
  	if (!list_empty(&wb->b_dirty)) {
66f3b8e2e   Jens Axboe   writeback: move d...
211
  		struct inode *tail;
6610a0bc8   Andrew Morton   writeback: fix ti...
212

7ccf19a80   Nick Piggin   fs: inode split I...
213
  		tail = wb_inode(wb->b_dirty.next);
66f3b8e2e   Jens Axboe   writeback: move d...
214
  		if (time_before(inode->dirtied_when, tail->dirtied_when))
6610a0bc8   Andrew Morton   writeback: fix ti...
215
216
  			inode->dirtied_when = jiffies;
  	}
7ccf19a80   Nick Piggin   fs: inode split I...
217
  	list_move(&inode->i_wb_list, &wb->b_dirty);
6610a0bc8   Andrew Morton   writeback: fix ti...
218
219
220
  }
  
  /*
66f3b8e2e   Jens Axboe   writeback: move d...
221
   * requeue inode for re-scanning after bdi->b_io list is exhausted.
c986d1e2a   Andrew Morton   writeback: fix ti...
222
   */
f758eeabe   Christoph Hellwig   writeback: split ...
223
  static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
c986d1e2a   Andrew Morton   writeback: fix ti...
224
  {
f758eeabe   Christoph Hellwig   writeback: split ...
225
  	assert_spin_locked(&wb->list_lock);
7ccf19a80   Nick Piggin   fs: inode split I...
226
  	list_move(&inode->i_wb_list, &wb->b_more_io);
c986d1e2a   Andrew Morton   writeback: fix ti...
227
  }
1c0eeaf56   Joern Engel   introduce I_SYNC
228
229
230
  static void inode_sync_complete(struct inode *inode)
  {
  	/*
a66979aba   Dave Chinner   fs: move i_wb_lis...
231
  	 * Prevent speculative execution through
f758eeabe   Christoph Hellwig   writeback: split ...
232
  	 * spin_unlock(&wb->list_lock);
1c0eeaf56   Joern Engel   introduce I_SYNC
233
  	 */
a66979aba   Dave Chinner   fs: move i_wb_lis...
234

1c0eeaf56   Joern Engel   introduce I_SYNC
235
236
237
  	smp_mb();
  	wake_up_bit(&inode->i_state, __I_SYNC);
  }
d2caa3c54   Jeff Layton   writeback: guard ...
238
239
240
241
242
243
244
245
  static bool inode_dirtied_after(struct inode *inode, unsigned long t)
  {
  	bool ret = time_after(inode->dirtied_when, t);
  #ifndef CONFIG_64BIT
  	/*
  	 * For inodes being constantly redirtied, dirtied_when can get stuck.
  	 * It _appears_ to be in the future, but is actually in distant past.
  	 * This test is necessary to prevent such wrapped-around relative times
5b0830cb9   Jens Axboe   writeback: get ri...
246
  	 * from permanently stopping the whole bdi writeback.
d2caa3c54   Jeff Layton   writeback: guard ...
247
248
249
250
251
  	 */
  	ret = ret && time_before_eq(inode->dirtied_when, jiffies);
  #endif
  	return ret;
  }
c986d1e2a   Andrew Morton   writeback: fix ti...
252
  /*
2c1365791   Fengguang Wu   writeback: fix ti...
253
254
   * Move expired dirty inodes from @delaying_queue to @dispatch_queue.
   */
e84d0a4f8   Wu Fengguang   writeback: trace ...
255
  static int move_expired_inodes(struct list_head *delaying_queue,
2c1365791   Fengguang Wu   writeback: fix ti...
256
  			       struct list_head *dispatch_queue,
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
257
  			       struct wb_writeback_work *work)
2c1365791   Fengguang Wu   writeback: fix ti...
258
  {
5c03449d3   Shaohua Li   writeback: move i...
259
260
  	LIST_HEAD(tmp);
  	struct list_head *pos, *node;
cf137307c   Jens Axboe   writeback: don't ...
261
  	struct super_block *sb = NULL;
5c03449d3   Shaohua Li   writeback: move i...
262
  	struct inode *inode;
cf137307c   Jens Axboe   writeback: don't ...
263
  	int do_sb_sort = 0;
e84d0a4f8   Wu Fengguang   writeback: trace ...
264
  	int moved = 0;
5c03449d3   Shaohua Li   writeback: move i...
265

2c1365791   Fengguang Wu   writeback: fix ti...
266
  	while (!list_empty(delaying_queue)) {
7ccf19a80   Nick Piggin   fs: inode split I...
267
  		inode = wb_inode(delaying_queue->prev);
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
268
269
  		if (work->older_than_this &&
  		    inode_dirtied_after(inode, *work->older_than_this))
2c1365791   Fengguang Wu   writeback: fix ti...
270
  			break;
cf137307c   Jens Axboe   writeback: don't ...
271
272
273
  		if (sb && sb != inode->i_sb)
  			do_sb_sort = 1;
  		sb = inode->i_sb;
7ccf19a80   Nick Piggin   fs: inode split I...
274
  		list_move(&inode->i_wb_list, &tmp);
e84d0a4f8   Wu Fengguang   writeback: trace ...
275
  		moved++;
5c03449d3   Shaohua Li   writeback: move i...
276
  	}
cf137307c   Jens Axboe   writeback: don't ...
277
278
279
  	/* just one sb in list, splice to dispatch_queue and we're done */
  	if (!do_sb_sort) {
  		list_splice(&tmp, dispatch_queue);
e84d0a4f8   Wu Fengguang   writeback: trace ...
280
  		goto out;
cf137307c   Jens Axboe   writeback: don't ...
281
  	}
5c03449d3   Shaohua Li   writeback: move i...
282
283
  	/* Move inodes from one superblock together */
  	while (!list_empty(&tmp)) {
7ccf19a80   Nick Piggin   fs: inode split I...
284
  		sb = wb_inode(tmp.prev)->i_sb;
5c03449d3   Shaohua Li   writeback: move i...
285
  		list_for_each_prev_safe(pos, node, &tmp) {
7ccf19a80   Nick Piggin   fs: inode split I...
286
  			inode = wb_inode(pos);
5c03449d3   Shaohua Li   writeback: move i...
287
  			if (inode->i_sb == sb)
7ccf19a80   Nick Piggin   fs: inode split I...
288
  				list_move(&inode->i_wb_list, dispatch_queue);
5c03449d3   Shaohua Li   writeback: move i...
289
  		}
2c1365791   Fengguang Wu   writeback: fix ti...
290
  	}
e84d0a4f8   Wu Fengguang   writeback: trace ...
291
292
  out:
  	return moved;
2c1365791   Fengguang Wu   writeback: fix ti...
293
294
295
296
  }
  
  /*
   * Queue all expired dirty inodes for io, eldest first.
4ea879b96   Wu Fengguang   writeback: fix qu...
297
298
299
300
301
302
303
304
   * Before
   *         newly dirtied     b_dirty    b_io    b_more_io
   *         =============>    gf         edc     BA
   * After
   *         newly dirtied     b_dirty    b_io    b_more_io
   *         =============>    g          fBAedc
   *                                           |
   *                                           +--> dequeue for IO
2c1365791   Fengguang Wu   writeback: fix ti...
305
   */
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
306
  static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
66f3b8e2e   Jens Axboe   writeback: move d...
307
  {
e84d0a4f8   Wu Fengguang   writeback: trace ...
308
  	int moved;
f758eeabe   Christoph Hellwig   writeback: split ...
309
  	assert_spin_locked(&wb->list_lock);
4ea879b96   Wu Fengguang   writeback: fix qu...
310
  	list_splice_init(&wb->b_more_io, &wb->b_io);
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
311
312
  	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work);
  	trace_writeback_queue_io(wb, work, moved);
66f3b8e2e   Jens Axboe   writeback: move d...
313
  }
a9185b41a   Christoph Hellwig   pass writeback_co...
314
  static int write_inode(struct inode *inode, struct writeback_control *wbc)
08d8e9749   Fengguang Wu   writeback: fix nt...
315
  {
03ba3782e   Jens Axboe   writeback: switch...
316
  	if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
a9185b41a   Christoph Hellwig   pass writeback_co...
317
  		return inode->i_sb->s_op->write_inode(inode, wbc);
03ba3782e   Jens Axboe   writeback: switch...
318
  	return 0;
08d8e9749   Fengguang Wu   writeback: fix nt...
319
  }
08d8e9749   Fengguang Wu   writeback: fix nt...
320

2c1365791   Fengguang Wu   writeback: fix ti...
321
  /*
01c031945   Christoph Hellwig   cleanup __writeba...
322
323
   * Wait for writeback on an inode to complete.
   */
f758eeabe   Christoph Hellwig   writeback: split ...
324
325
  static void inode_wait_for_writeback(struct inode *inode,
  				     struct bdi_writeback *wb)
01c031945   Christoph Hellwig   cleanup __writeba...
326
327
328
329
330
  {
  	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
  	wait_queue_head_t *wqh;
  
  	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
250df6ed2   Dave Chinner   fs: protect inode...
331
332
  	while (inode->i_state & I_SYNC) {
  		spin_unlock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
333
  		spin_unlock(&wb->list_lock);
01c031945   Christoph Hellwig   cleanup __writeba...
334
  		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
f758eeabe   Christoph Hellwig   writeback: split ...
335
  		spin_lock(&wb->list_lock);
250df6ed2   Dave Chinner   fs: protect inode...
336
  		spin_lock(&inode->i_lock);
58a9d3d8d   Richard Kennedy   fs-writeback: che...
337
  	}
01c031945   Christoph Hellwig   cleanup __writeba...
338
339
340
  }
  
  /*
f758eeabe   Christoph Hellwig   writeback: split ...
341
   * Write out an inode's dirty pages.  Called under wb->list_lock and
0f1b1fd86   Dave Chinner   fs: pull inode->i...
342
343
   * inode->i_lock.  Either the caller has an active reference on the inode or
   * the inode has I_WILL_FREE set.
01c031945   Christoph Hellwig   cleanup __writeba...
344
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
345
346
347
348
349
   * If `wait' is set, wait on the writeout.
   *
   * The whole writeout design is quite complex and fragile.  We want to avoid
   * starvation of particular inodes when others are being redirtied, prevent
   * livelocks, etc.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
350
351
   */
  static int
f758eeabe   Christoph Hellwig   writeback: split ...
352
353
  writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
  		       struct writeback_control *wbc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
354
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
355
  	struct address_space *mapping = inode->i_mapping;
251d6a471   Wu Fengguang   writeback: trace ...
356
  	long nr_to_write = wbc->nr_to_write;
01c031945   Christoph Hellwig   cleanup __writeba...
357
  	unsigned dirty;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
358
  	int ret;
f758eeabe   Christoph Hellwig   writeback: split ...
359
  	assert_spin_locked(&wb->list_lock);
0f1b1fd86   Dave Chinner   fs: pull inode->i...
360
  	assert_spin_locked(&inode->i_lock);
01c031945   Christoph Hellwig   cleanup __writeba...
361
362
363
364
365
366
367
368
  	if (!atomic_read(&inode->i_count))
  		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
  	else
  		WARN_ON(inode->i_state & I_WILL_FREE);
  
  	if (inode->i_state & I_SYNC) {
  		/*
  		 * If this inode is locked for writeback and we are not doing
66f3b8e2e   Jens Axboe   writeback: move d...
369
  		 * writeback-for-data-integrity, move it to b_more_io so that
01c031945   Christoph Hellwig   cleanup __writeba...
370
371
372
  		 * writeback can proceed with the other inodes on s_io.
  		 *
  		 * We'll have another go at writing back this inode when we
66f3b8e2e   Jens Axboe   writeback: move d...
373
  		 * completed a full scan of b_io.
01c031945   Christoph Hellwig   cleanup __writeba...
374
  		 */
a9185b41a   Christoph Hellwig   pass writeback_co...
375
  		if (wbc->sync_mode != WB_SYNC_ALL) {
f758eeabe   Christoph Hellwig   writeback: split ...
376
  			requeue_io(inode, wb);
251d6a471   Wu Fengguang   writeback: trace ...
377
378
  			trace_writeback_single_inode_requeue(inode, wbc,
  							     nr_to_write);
01c031945   Christoph Hellwig   cleanup __writeba...
379
380
381
382
383
384
  			return 0;
  		}
  
  		/*
  		 * It's a data-integrity sync.  We must wait.
  		 */
f758eeabe   Christoph Hellwig   writeback: split ...
385
  		inode_wait_for_writeback(inode, wb);
01c031945   Christoph Hellwig   cleanup __writeba...
386
  	}
1c0eeaf56   Joern Engel   introduce I_SYNC
387
  	BUG_ON(inode->i_state & I_SYNC);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
388

5547e8aac   Dmitry Monakhov   writeback: Update...
389
  	/* Set I_SYNC, reset I_DIRTY_PAGES */
1c0eeaf56   Joern Engel   introduce I_SYNC
390
  	inode->i_state |= I_SYNC;
5547e8aac   Dmitry Monakhov   writeback: Update...
391
  	inode->i_state &= ~I_DIRTY_PAGES;
250df6ed2   Dave Chinner   fs: protect inode...
392
  	spin_unlock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
393
  	spin_unlock(&wb->list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
394
395
  
  	ret = do_writepages(mapping, wbc);
26821ed40   Christoph Hellwig   make sure data is...
396
397
398
399
400
  	/*
  	 * Make sure to wait on the data before writing out the metadata.
  	 * This is important for filesystems that modify metadata on data
  	 * I/O completion.
  	 */
a9185b41a   Christoph Hellwig   pass writeback_co...
401
  	if (wbc->sync_mode == WB_SYNC_ALL) {
26821ed40   Christoph Hellwig   make sure data is...
402
  		int err = filemap_fdatawait(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
403
404
405
  		if (ret == 0)
  			ret = err;
  	}
5547e8aac   Dmitry Monakhov   writeback: Update...
406
407
408
409
410
  	/*
  	 * Some filesystems may redirty the inode during the writeback
  	 * due to delalloc, clear dirty metadata flags right before
  	 * write_inode()
  	 */
250df6ed2   Dave Chinner   fs: protect inode...
411
  	spin_lock(&inode->i_lock);
5547e8aac   Dmitry Monakhov   writeback: Update...
412
413
  	dirty = inode->i_state & I_DIRTY;
  	inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
250df6ed2   Dave Chinner   fs: protect inode...
414
  	spin_unlock(&inode->i_lock);
26821ed40   Christoph Hellwig   make sure data is...
415
416
  	/* Don't write the inode if only I_DIRTY_PAGES was set */
  	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
a9185b41a   Christoph Hellwig   pass writeback_co...
417
  		int err = write_inode(inode, wbc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
418
419
420
  		if (ret == 0)
  			ret = err;
  	}
f758eeabe   Christoph Hellwig   writeback: split ...
421
  	spin_lock(&wb->list_lock);
250df6ed2   Dave Chinner   fs: protect inode...
422
  	spin_lock(&inode->i_lock);
1c0eeaf56   Joern Engel   introduce I_SYNC
423
  	inode->i_state &= ~I_SYNC;
a4ffdde6e   Al Viro   simplify checks f...
424
  	if (!(inode->i_state & I_FREEING)) {
94c3dcbb0   Wu Fengguang   writeback: update...
425
426
427
428
429
430
431
432
  		/*
  		 * Sync livelock prevention. Each inode is tagged and synced in
  		 * one shot. If still dirty, it will be redirty_tail()'ed below.
  		 * Update the dirty time to prevent enqueue and sync it again.
  		 */
  		if ((inode->i_state & I_DIRTY) &&
  		    (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
  			inode->dirtied_when = jiffies;
23539afc7   Wu Fengguang   writeback: don't ...
433
  		if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
434
435
  			/*
  			 * We didn't write back all the pages.  nfs_writepages()
a50aeb401   Wu Fengguang   writeback: merge ...
436
  			 * sometimes bales out without doing anything.
1b43ef91d   Andrew Morton   writeback: fix co...
437
  			 */
a50aeb401   Wu Fengguang   writeback: merge ...
438
439
  			inode->i_state |= I_DIRTY_PAGES;
  			if (wbc->nr_to_write <= 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
440
  				/*
a50aeb401   Wu Fengguang   writeback: merge ...
441
  				 * slice used up: queue for next turn
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
442
  				 */
f758eeabe   Christoph Hellwig   writeback: split ...
443
  				requeue_io(inode, wb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
444
445
  			} else {
  				/*
a50aeb401   Wu Fengguang   writeback: merge ...
446
447
448
449
450
  				 * Writeback blocked by something other than
  				 * congestion. Delay the inode for some time to
  				 * avoid spinning on the CPU (100% iowait)
  				 * retrying writeback of the dirty page/inode
  				 * that cannot be performed immediately.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
451
  				 */
f758eeabe   Christoph Hellwig   writeback: split ...
452
  				redirty_tail(inode, wb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
453
  			}
23539afc7   Wu Fengguang   writeback: don't ...
454
455
456
457
458
459
460
  		} else if (inode->i_state & I_DIRTY) {
  			/*
  			 * Filesystems can dirty the inode during writeback
  			 * operations, such as delayed allocation during
  			 * submission or metadata updates after data IO
  			 * completion.
  			 */
f758eeabe   Christoph Hellwig   writeback: split ...
461
  			redirty_tail(inode, wb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
462
463
  		} else {
  			/*
9e38d86ff   Nick Piggin   fs: Implement laz...
464
465
466
  			 * The inode is clean.  At this point we either have
  			 * a reference to the inode or it's on it's way out.
  			 * No need to add it back to the LRU.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
467
  			 */
7ccf19a80   Nick Piggin   fs: inode split I...
468
  			list_del_init(&inode->i_wb_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
469
470
  		}
  	}
1c0eeaf56   Joern Engel   introduce I_SYNC
471
  	inode_sync_complete(inode);
251d6a471   Wu Fengguang   writeback: trace ...
472
  	trace_writeback_single_inode(inode, wbc, nr_to_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
473
474
  	return ret;
  }
1a12d8bd7   Wu Fengguang   writeback: scale ...
475
476
  static long writeback_chunk_size(struct backing_dev_info *bdi,
  				 struct wb_writeback_work *work)
d46db3d58   Wu Fengguang   writeback: make w...
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
  {
  	long pages;
  
  	/*
  	 * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
  	 * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
  	 * here avoids calling into writeback_inodes_wb() more than once.
  	 *
  	 * The intended call sequence for WB_SYNC_ALL writeback is:
  	 *
  	 *      wb_writeback()
  	 *          writeback_sb_inodes()       <== called only once
  	 *              write_cache_pages()     <== called once for each inode
  	 *                   (quickly) tag currently dirty pages
  	 *                   (maybe slowly) sync all tagged pages
  	 */
  	if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
  		pages = LONG_MAX;
1a12d8bd7   Wu Fengguang   writeback: scale ...
495
496
497
498
499
500
501
  	else {
  		pages = min(bdi->avg_write_bandwidth / 2,
  			    global_dirty_limit / DIRTY_SCOPE);
  		pages = min(pages, work->nr_pages);
  		pages = round_down(pages + MIN_WRITEBACK_PAGES,
  				   MIN_WRITEBACK_PAGES);
  	}
d46db3d58   Wu Fengguang   writeback: make w...
502
503
504
  
  	return pages;
  }
03ba3782e   Jens Axboe   writeback: switch...
505
  /*
f11c9c5c2   Edward Shishkin   vfs: improve writ...
506
   * Write a portion of b_io inodes which belong to @sb.
edadfb10b   Christoph Hellwig   writeback: split ...
507
508
   *
   * If @only_this_sb is true, then find and write all such
f11c9c5c2   Edward Shishkin   vfs: improve writ...
509
510
   * inodes. Otherwise write only ones which go sequentially
   * in reverse order.
edadfb10b   Christoph Hellwig   writeback: split ...
511
   *
d46db3d58   Wu Fengguang   writeback: make w...
512
   * Return the number of pages and/or inodes written.
f11c9c5c2   Edward Shishkin   vfs: improve writ...
513
   */
d46db3d58   Wu Fengguang   writeback: make w...
514
515
516
  static long writeback_sb_inodes(struct super_block *sb,
  				struct bdi_writeback *wb,
  				struct wb_writeback_work *work)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
517
  {
d46db3d58   Wu Fengguang   writeback: make w...
518
519
520
521
522
523
524
525
526
527
528
529
  	struct writeback_control wbc = {
  		.sync_mode		= work->sync_mode,
  		.tagged_writepages	= work->tagged_writepages,
  		.for_kupdate		= work->for_kupdate,
  		.for_background		= work->for_background,
  		.range_cyclic		= work->range_cyclic,
  		.range_start		= 0,
  		.range_end		= LLONG_MAX,
  	};
  	unsigned long start_time = jiffies;
  	long write_chunk;
  	long wrote = 0;  /* count both pages and inodes */
03ba3782e   Jens Axboe   writeback: switch...
530
  	while (!list_empty(&wb->b_io)) {
7ccf19a80   Nick Piggin   fs: inode split I...
531
  		struct inode *inode = wb_inode(wb->b_io.prev);
edadfb10b   Christoph Hellwig   writeback: split ...
532
533
  
  		if (inode->i_sb != sb) {
d46db3d58   Wu Fengguang   writeback: make w...
534
  			if (work->sb) {
edadfb10b   Christoph Hellwig   writeback: split ...
535
536
537
538
539
  				/*
  				 * We only want to write back data for this
  				 * superblock, move all inodes not belonging
  				 * to it back onto the dirty list.
  				 */
f758eeabe   Christoph Hellwig   writeback: split ...
540
  				redirty_tail(inode, wb);
edadfb10b   Christoph Hellwig   writeback: split ...
541
542
543
544
545
546
547
548
  				continue;
  			}
  
  			/*
  			 * The inode belongs to a different superblock.
  			 * Bounce back to the caller to unpin this and
  			 * pin the next superblock.
  			 */
d46db3d58   Wu Fengguang   writeback: make w...
549
  			break;
edadfb10b   Christoph Hellwig   writeback: split ...
550
  		}
9843b76aa   Christoph Hellwig   fs: skip I_FREEIN...
551
552
553
554
555
  		/*
  		 * Don't bother with new inodes or inodes beeing freed, first
  		 * kind does not need peridic writeout yet, and for the latter
  		 * kind writeout is handled by the freer.
  		 */
250df6ed2   Dave Chinner   fs: protect inode...
556
  		spin_lock(&inode->i_lock);
9843b76aa   Christoph Hellwig   fs: skip I_FREEIN...
557
  		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
250df6ed2   Dave Chinner   fs: protect inode...
558
  			spin_unlock(&inode->i_lock);
fcc5c2221   Wu Fengguang   writeback: don't ...
559
  			redirty_tail(inode, wb);
7ef0d7377   Nick Piggin   fs: new inode i_s...
560
561
  			continue;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
562
  		__iget(inode);
1a12d8bd7   Wu Fengguang   writeback: scale ...
563
  		write_chunk = writeback_chunk_size(wb->bdi, work);
d46db3d58   Wu Fengguang   writeback: make w...
564
565
  		wbc.nr_to_write = write_chunk;
  		wbc.pages_skipped = 0;
250df6ed2   Dave Chinner   fs: protect inode...
566

d46db3d58   Wu Fengguang   writeback: make w...
567
  		writeback_single_inode(inode, wb, &wbc);
250df6ed2   Dave Chinner   fs: protect inode...
568

d46db3d58   Wu Fengguang   writeback: make w...
569
570
571
572
573
  		work->nr_pages -= write_chunk - wbc.nr_to_write;
  		wrote += write_chunk - wbc.nr_to_write;
  		if (!(inode->i_state & I_DIRTY))
  			wrote++;
  		if (wbc.pages_skipped) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
574
575
576
577
  			/*
  			 * writeback is not making progress due to locked
  			 * buffers.  Skip this inode for now.
  			 */
f758eeabe   Christoph Hellwig   writeback: split ...
578
  			redirty_tail(inode, wb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
579
  		}
0f1b1fd86   Dave Chinner   fs: pull inode->i...
580
  		spin_unlock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
581
  		spin_unlock(&wb->list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
582
  		iput(inode);
4ffc84442   OGAWA Hirofumi   [PATCH] Move cond...
583
  		cond_resched();
f758eeabe   Christoph Hellwig   writeback: split ...
584
  		spin_lock(&wb->list_lock);
d46db3d58   Wu Fengguang   writeback: make w...
585
586
587
588
589
590
591
592
593
  		/*
  		 * bail out to wb_writeback() often enough to check
  		 * background threshold and other termination conditions.
  		 */
  		if (wrote) {
  			if (time_is_before_jiffies(start_time + HZ / 10UL))
  				break;
  			if (work->nr_pages <= 0)
  				break;
8bc3be275   Fengguang Wu   writeback: speed ...
594
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
595
  	}
d46db3d58   Wu Fengguang   writeback: make w...
596
  	return wrote;
f11c9c5c2   Edward Shishkin   vfs: improve writ...
597
  }
d46db3d58   Wu Fengguang   writeback: make w...
598
599
  static long __writeback_inodes_wb(struct bdi_writeback *wb,
  				  struct wb_writeback_work *work)
f11c9c5c2   Edward Shishkin   vfs: improve writ...
600
  {
d46db3d58   Wu Fengguang   writeback: make w...
601
602
  	unsigned long start_time = jiffies;
  	long wrote = 0;
38f219776   Nick Piggin   fs: sync_sb_inode...
603

f11c9c5c2   Edward Shishkin   vfs: improve writ...
604
  	while (!list_empty(&wb->b_io)) {
7ccf19a80   Nick Piggin   fs: inode split I...
605
  		struct inode *inode = wb_inode(wb->b_io.prev);
f11c9c5c2   Edward Shishkin   vfs: improve writ...
606
  		struct super_block *sb = inode->i_sb;
9ecc2738a   Jens Axboe   writeback: make t...
607

12ad3ab66   Dave Chinner   superblock: move ...
608
  		if (!grab_super_passive(sb)) {
0e995816f   Wu Fengguang   don't busy retry ...
609
610
611
612
613
614
  			/*
  			 * grab_super_passive() may fail consistently due to
  			 * s_umount being grabbed by someone else. Don't use
  			 * requeue_io() to avoid busy retrying the inode/sb.
  			 */
  			redirty_tail(inode, wb);
edadfb10b   Christoph Hellwig   writeback: split ...
615
  			continue;
f11c9c5c2   Edward Shishkin   vfs: improve writ...
616
  		}
d46db3d58   Wu Fengguang   writeback: make w...
617
  		wrote += writeback_sb_inodes(sb, wb, work);
edadfb10b   Christoph Hellwig   writeback: split ...
618
  		drop_super(sb);
f11c9c5c2   Edward Shishkin   vfs: improve writ...
619

d46db3d58   Wu Fengguang   writeback: make w...
620
621
622
623
624
625
626
  		/* refer to the same tests at the end of writeback_sb_inodes */
  		if (wrote) {
  			if (time_is_before_jiffies(start_time + HZ / 10UL))
  				break;
  			if (work->nr_pages <= 0)
  				break;
  		}
f11c9c5c2   Edward Shishkin   vfs: improve writ...
627
  	}
66f3b8e2e   Jens Axboe   writeback: move d...
628
  	/* Leave any unwritten inodes on b_io */
d46db3d58   Wu Fengguang   writeback: make w...
629
  	return wrote;
66f3b8e2e   Jens Axboe   writeback: move d...
630
  }
0e175a183   Curt Wohlgemuth   writeback: Add a ...
631
632
  long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
  				enum wb_reason reason)
edadfb10b   Christoph Hellwig   writeback: split ...
633
  {
d46db3d58   Wu Fengguang   writeback: make w...
634
635
636
637
  	struct wb_writeback_work work = {
  		.nr_pages	= nr_pages,
  		.sync_mode	= WB_SYNC_NONE,
  		.range_cyclic	= 1,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
638
  		.reason		= reason,
d46db3d58   Wu Fengguang   writeback: make w...
639
  	};
edadfb10b   Christoph Hellwig   writeback: split ...
640

f758eeabe   Christoph Hellwig   writeback: split ...
641
  	spin_lock(&wb->list_lock);
424b351fe   Wu Fengguang   writeback: refill...
642
  	if (list_empty(&wb->b_io))
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
643
  		queue_io(wb, &work);
d46db3d58   Wu Fengguang   writeback: make w...
644
  	__writeback_inodes_wb(wb, &work);
f758eeabe   Christoph Hellwig   writeback: split ...
645
  	spin_unlock(&wb->list_lock);
edadfb10b   Christoph Hellwig   writeback: split ...
646

d46db3d58   Wu Fengguang   writeback: make w...
647
648
  	return nr_pages - work.nr_pages;
  }
03ba3782e   Jens Axboe   writeback: switch...
649

b00949aa2   Wu Fengguang   writeback: per-bd...
650
  static bool over_bground_thresh(struct backing_dev_info *bdi)
03ba3782e   Jens Axboe   writeback: switch...
651
652
  {
  	unsigned long background_thresh, dirty_thresh;
16c4042f0   Wu Fengguang   writeback: avoid ...
653
  	global_dirty_limits(&background_thresh, &dirty_thresh);
03ba3782e   Jens Axboe   writeback: switch...
654

b00949aa2   Wu Fengguang   writeback: per-bd...
655
656
657
658
659
660
661
662
663
  	if (global_page_state(NR_FILE_DIRTY) +
  	    global_page_state(NR_UNSTABLE_NFS) > background_thresh)
  		return true;
  
  	if (bdi_stat(bdi, BDI_RECLAIMABLE) >
  				bdi_dirty_limit(bdi, background_thresh))
  		return true;
  
  	return false;
03ba3782e   Jens Axboe   writeback: switch...
664
665
666
  }
  
  /*
e98be2d59   Wu Fengguang   writeback: bdi wr...
667
668
669
670
671
672
   * Called under wb->list_lock. If there are multiple wb per bdi,
   * only the flusher working on the first wb should do it.
   */
  static void wb_update_bandwidth(struct bdi_writeback *wb,
  				unsigned long start_time)
  {
af6a31138   Wu Fengguang   writeback: add bg...
673
  	__bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, 0, start_time);
e98be2d59   Wu Fengguang   writeback: bdi wr...
674
675
676
  }
  
  /*
03ba3782e   Jens Axboe   writeback: switch...
677
   * Explicit flushing or periodic writeback of "old" data.
66f3b8e2e   Jens Axboe   writeback: move d...
678
   *
03ba3782e   Jens Axboe   writeback: switch...
679
680
681
682
   * Define "old": the first time one of an inode's pages is dirtied, we mark the
   * dirtying-time in the inode's address_space.  So this periodic writeback code
   * just walks the superblock inode list, writing back any inodes which are
   * older than a specific point in time.
66f3b8e2e   Jens Axboe   writeback: move d...
683
   *
03ba3782e   Jens Axboe   writeback: switch...
684
685
686
   * Try to run once per dirty_writeback_interval.  But if a writeback event
   * takes longer than a dirty_writeback_interval interval, then leave a
   * one-second gap.
66f3b8e2e   Jens Axboe   writeback: move d...
687
   *
03ba3782e   Jens Axboe   writeback: switch...
688
689
   * older_than_this takes precedence over nr_to_write.  So we'll only write back
   * all dirty pages if they are all attached to "old" mappings.
66f3b8e2e   Jens Axboe   writeback: move d...
690
   */
c4a77a6c7   Jens Axboe   writeback: make w...
691
  static long wb_writeback(struct bdi_writeback *wb,
83ba7b071   Christoph Hellwig   writeback: simpli...
692
  			 struct wb_writeback_work *work)
66f3b8e2e   Jens Axboe   writeback: move d...
693
  {
e98be2d59   Wu Fengguang   writeback: bdi wr...
694
  	unsigned long wb_start = jiffies;
d46db3d58   Wu Fengguang   writeback: make w...
695
  	long nr_pages = work->nr_pages;
03ba3782e   Jens Axboe   writeback: switch...
696
  	unsigned long oldest_jif;
a5989bdc9   Jan Kara   fs: Fix busyloop ...
697
  	struct inode *inode;
d46db3d58   Wu Fengguang   writeback: make w...
698
  	long progress;
66f3b8e2e   Jens Axboe   writeback: move d...
699

e185dda89   Wu Fengguang   writeback: avoid ...
700
  	oldest_jif = jiffies;
d46db3d58   Wu Fengguang   writeback: make w...
701
  	work->older_than_this = &oldest_jif;
38f219776   Nick Piggin   fs: sync_sb_inode...
702

e8dfc3058   Wu Fengguang   writeback: elevat...
703
  	spin_lock(&wb->list_lock);
03ba3782e   Jens Axboe   writeback: switch...
704
705
  	for (;;) {
  		/*
d3ddec763   Wu Fengguang   writeback: stop b...
706
  		 * Stop writeback when nr_pages has been consumed
03ba3782e   Jens Axboe   writeback: switch...
707
  		 */
83ba7b071   Christoph Hellwig   writeback: simpli...
708
  		if (work->nr_pages <= 0)
03ba3782e   Jens Axboe   writeback: switch...
709
  			break;
66f3b8e2e   Jens Axboe   writeback: move d...
710

38f219776   Nick Piggin   fs: sync_sb_inode...
711
  		/*
aa373cf55   Jan Kara   writeback: stop b...
712
713
714
715
716
717
718
719
720
721
  		 * Background writeout and kupdate-style writeback may
  		 * run forever. Stop them if there is other work to do
  		 * so that e.g. sync can proceed. They'll be restarted
  		 * after the other works are all done.
  		 */
  		if ((work->for_background || work->for_kupdate) &&
  		    !list_empty(&wb->bdi->work_list))
  			break;
  
  		/*
d3ddec763   Wu Fengguang   writeback: stop b...
722
723
  		 * For background writeout, stop when we are below the
  		 * background dirty threshold
38f219776   Nick Piggin   fs: sync_sb_inode...
724
  		 */
b00949aa2   Wu Fengguang   writeback: per-bd...
725
  		if (work->for_background && !over_bground_thresh(wb->bdi))
03ba3782e   Jens Axboe   writeback: switch...
726
  			break;
38f219776   Nick Piggin   fs: sync_sb_inode...
727

ba9aa8399   Wu Fengguang   writeback: the ku...
728
729
730
  		if (work->for_kupdate) {
  			oldest_jif = jiffies -
  				msecs_to_jiffies(dirty_expire_interval * 10);
d46db3d58   Wu Fengguang   writeback: make w...
731
  			work->older_than_this = &oldest_jif;
ba9aa8399   Wu Fengguang   writeback: the ku...
732
  		}
028c2dd18   Dave Chinner   writeback: Add tr...
733

d46db3d58   Wu Fengguang   writeback: make w...
734
  		trace_writeback_start(wb->bdi, work);
e8dfc3058   Wu Fengguang   writeback: elevat...
735
  		if (list_empty(&wb->b_io))
ad4e38dd6   Curt Wohlgemuth   writeback: send w...
736
  			queue_io(wb, work);
83ba7b071   Christoph Hellwig   writeback: simpli...
737
  		if (work->sb)
d46db3d58   Wu Fengguang   writeback: make w...
738
  			progress = writeback_sb_inodes(work->sb, wb, work);
edadfb10b   Christoph Hellwig   writeback: split ...
739
  		else
d46db3d58   Wu Fengguang   writeback: make w...
740
741
  			progress = __writeback_inodes_wb(wb, work);
  		trace_writeback_written(wb->bdi, work);
028c2dd18   Dave Chinner   writeback: Add tr...
742

e98be2d59   Wu Fengguang   writeback: bdi wr...
743
  		wb_update_bandwidth(wb, wb_start);
03ba3782e   Jens Axboe   writeback: switch...
744
745
  
  		/*
e6fb6da2e   Wu Fengguang   writeback: try mo...
746
747
748
749
750
751
  		 * Did we write something? Try for more
  		 *
  		 * Dirty inodes are moved to b_io for writeback in batches.
  		 * The completion of the current batch does not necessarily
  		 * mean the overall work is done. So we keep looping as long
  		 * as made some progress on cleaning pages or inodes.
03ba3782e   Jens Axboe   writeback: switch...
752
  		 */
d46db3d58   Wu Fengguang   writeback: make w...
753
  		if (progress)
71fd05a88   Jens Axboe   writeback: improv...
754
755
  			continue;
  		/*
e6fb6da2e   Wu Fengguang   writeback: try mo...
756
  		 * No more inodes for IO, bail
71fd05a88   Jens Axboe   writeback: improv...
757
  		 */
b7a2441f9   Wu Fengguang   writeback: remove...
758
  		if (list_empty(&wb->b_more_io))
03ba3782e   Jens Axboe   writeback: switch...
759
  			break;
71fd05a88   Jens Axboe   writeback: improv...
760
  		/*
71fd05a88   Jens Axboe   writeback: improv...
761
762
763
764
  		 * Nothing written. Wait for some inode to
  		 * become available for writeback. Otherwise
  		 * we'll just busyloop.
  		 */
71fd05a88   Jens Axboe   writeback: improv...
765
  		if (!list_empty(&wb->b_more_io))  {
d46db3d58   Wu Fengguang   writeback: make w...
766
  			trace_writeback_wait(wb->bdi, work);
7ccf19a80   Nick Piggin   fs: inode split I...
767
  			inode = wb_inode(wb->b_more_io.prev);
250df6ed2   Dave Chinner   fs: protect inode...
768
  			spin_lock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
769
  			inode_wait_for_writeback(inode, wb);
250df6ed2   Dave Chinner   fs: protect inode...
770
  			spin_unlock(&inode->i_lock);
03ba3782e   Jens Axboe   writeback: switch...
771
772
  		}
  	}
e8dfc3058   Wu Fengguang   writeback: elevat...
773
  	spin_unlock(&wb->list_lock);
03ba3782e   Jens Axboe   writeback: switch...
774

d46db3d58   Wu Fengguang   writeback: make w...
775
  	return nr_pages - work->nr_pages;
03ba3782e   Jens Axboe   writeback: switch...
776
777
778
  }
  
  /*
83ba7b071   Christoph Hellwig   writeback: simpli...
779
   * Return the next wb_writeback_work struct that hasn't been processed yet.
03ba3782e   Jens Axboe   writeback: switch...
780
   */
83ba7b071   Christoph Hellwig   writeback: simpli...
781
  static struct wb_writeback_work *
08852b6d6   Minchan Kim   writeback: remove...
782
  get_next_work_item(struct backing_dev_info *bdi)
03ba3782e   Jens Axboe   writeback: switch...
783
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
784
  	struct wb_writeback_work *work = NULL;
03ba3782e   Jens Axboe   writeback: switch...
785

6467716a3   Artem Bityutskiy   writeback: optimi...
786
  	spin_lock_bh(&bdi->wb_lock);
83ba7b071   Christoph Hellwig   writeback: simpli...
787
788
789
790
  	if (!list_empty(&bdi->work_list)) {
  		work = list_entry(bdi->work_list.next,
  				  struct wb_writeback_work, list);
  		list_del_init(&work->list);
03ba3782e   Jens Axboe   writeback: switch...
791
  	}
6467716a3   Artem Bityutskiy   writeback: optimi...
792
  	spin_unlock_bh(&bdi->wb_lock);
83ba7b071   Christoph Hellwig   writeback: simpli...
793
  	return work;
03ba3782e   Jens Axboe   writeback: switch...
794
  }
cdf01dd54   Linus Torvalds   fs-writeback.c: u...
795
796
797
798
799
800
801
802
803
804
  /*
   * Add in the number of potentially dirty inodes, because each inode
   * write can dirty pagecache in the underlying blockdev.
   */
  static unsigned long get_nr_dirty_pages(void)
  {
  	return global_page_state(NR_FILE_DIRTY) +
  		global_page_state(NR_UNSTABLE_NFS) +
  		get_nr_dirty_inodes();
  }
6585027a5   Jan Kara   writeback: integr...
805
806
  static long wb_check_background_flush(struct bdi_writeback *wb)
  {
b00949aa2   Wu Fengguang   writeback: per-bd...
807
  	if (over_bground_thresh(wb->bdi)) {
6585027a5   Jan Kara   writeback: integr...
808
809
810
811
812
813
  
  		struct wb_writeback_work work = {
  			.nr_pages	= LONG_MAX,
  			.sync_mode	= WB_SYNC_NONE,
  			.for_background	= 1,
  			.range_cyclic	= 1,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
814
  			.reason		= WB_REASON_BACKGROUND,
6585027a5   Jan Kara   writeback: integr...
815
816
817
818
819
820
821
  		};
  
  		return wb_writeback(wb, &work);
  	}
  
  	return 0;
  }
03ba3782e   Jens Axboe   writeback: switch...
822
823
824
825
  static long wb_check_old_data_flush(struct bdi_writeback *wb)
  {
  	unsigned long expired;
  	long nr_pages;
69b62d01e   Jens Axboe   writeback: disabl...
826
827
828
829
830
  	/*
  	 * When set to zero, disable periodic writeback
  	 */
  	if (!dirty_writeback_interval)
  		return 0;
03ba3782e   Jens Axboe   writeback: switch...
831
832
833
834
835
836
  	expired = wb->last_old_flush +
  			msecs_to_jiffies(dirty_writeback_interval * 10);
  	if (time_before(jiffies, expired))
  		return 0;
  
  	wb->last_old_flush = jiffies;
cdf01dd54   Linus Torvalds   fs-writeback.c: u...
837
  	nr_pages = get_nr_dirty_pages();
03ba3782e   Jens Axboe   writeback: switch...
838

c4a77a6c7   Jens Axboe   writeback: make w...
839
  	if (nr_pages) {
83ba7b071   Christoph Hellwig   writeback: simpli...
840
  		struct wb_writeback_work work = {
c4a77a6c7   Jens Axboe   writeback: make w...
841
842
843
844
  			.nr_pages	= nr_pages,
  			.sync_mode	= WB_SYNC_NONE,
  			.for_kupdate	= 1,
  			.range_cyclic	= 1,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
845
  			.reason		= WB_REASON_PERIODIC,
c4a77a6c7   Jens Axboe   writeback: make w...
846
  		};
83ba7b071   Christoph Hellwig   writeback: simpli...
847
  		return wb_writeback(wb, &work);
c4a77a6c7   Jens Axboe   writeback: make w...
848
  	}
03ba3782e   Jens Axboe   writeback: switch...
849
850
851
852
853
854
855
856
857
858
  
  	return 0;
  }
  
  /*
   * Retrieve work items and do the writeback they describe
   */
  long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
  {
  	struct backing_dev_info *bdi = wb->bdi;
83ba7b071   Christoph Hellwig   writeback: simpli...
859
  	struct wb_writeback_work *work;
c4a77a6c7   Jens Axboe   writeback: make w...
860
  	long wrote = 0;
03ba3782e   Jens Axboe   writeback: switch...
861

81d73a32d   Jan Kara   mm: fix writeback...
862
  	set_bit(BDI_writeback_running, &wb->bdi->state);
08852b6d6   Minchan Kim   writeback: remove...
863
  	while ((work = get_next_work_item(bdi)) != NULL) {
03ba3782e   Jens Axboe   writeback: switch...
864
865
  		/*
  		 * Override sync mode, in case we must wait for completion
83ba7b071   Christoph Hellwig   writeback: simpli...
866
  		 * because this thread is exiting now.
03ba3782e   Jens Axboe   writeback: switch...
867
868
  		 */
  		if (force_wait)
83ba7b071   Christoph Hellwig   writeback: simpli...
869
  			work->sync_mode = WB_SYNC_ALL;
03ba3782e   Jens Axboe   writeback: switch...
870

455b28646   Dave Chinner   writeback: Initia...
871
  		trace_writeback_exec(bdi, work);
83ba7b071   Christoph Hellwig   writeback: simpli...
872
  		wrote += wb_writeback(wb, work);
03ba3782e   Jens Axboe   writeback: switch...
873
874
  
  		/*
83ba7b071   Christoph Hellwig   writeback: simpli...
875
876
  		 * Notify the caller of completion if this is a synchronous
  		 * work item, otherwise just free it.
03ba3782e   Jens Axboe   writeback: switch...
877
  		 */
83ba7b071   Christoph Hellwig   writeback: simpli...
878
879
880
881
  		if (work->done)
  			complete(work->done);
  		else
  			kfree(work);
03ba3782e   Jens Axboe   writeback: switch...
882
883
884
885
886
887
  	}
  
  	/*
  	 * Check for periodic writeback, kupdated() style
  	 */
  	wrote += wb_check_old_data_flush(wb);
6585027a5   Jan Kara   writeback: integr...
888
  	wrote += wb_check_background_flush(wb);
81d73a32d   Jan Kara   mm: fix writeback...
889
  	clear_bit(BDI_writeback_running, &wb->bdi->state);
03ba3782e   Jens Axboe   writeback: switch...
890
891
892
893
894
895
896
897
  
  	return wrote;
  }
  
  /*
   * Handle writeback of dirty data for the device backed by this bdi. Also
   * wakes up periodically and does kupdated style flushing.
   */
082439004   Christoph Hellwig   writeback: merge ...
898
  int bdi_writeback_thread(void *data)
03ba3782e   Jens Axboe   writeback: switch...
899
  {
082439004   Christoph Hellwig   writeback: merge ...
900
901
  	struct bdi_writeback *wb = data;
  	struct backing_dev_info *bdi = wb->bdi;
03ba3782e   Jens Axboe   writeback: switch...
902
  	long pages_written;
766f91641   Peter Zijlstra   kernel: remove PF...
903
  	current->flags |= PF_SWAPWRITE;
082439004   Christoph Hellwig   writeback: merge ...
904
  	set_freezable();
ecd584030   Artem Bityutskiy   writeback: move l...
905
  	wb->last_active = jiffies;
082439004   Christoph Hellwig   writeback: merge ...
906
907
908
909
910
  
  	/*
  	 * Our parent may run at a different priority, just set us to normal
  	 */
  	set_user_nice(current, 0);
455b28646   Dave Chinner   writeback: Initia...
911
  	trace_writeback_thread_start(bdi);
03ba3782e   Jens Axboe   writeback: switch...
912
  	while (!kthread_should_stop()) {
6467716a3   Artem Bityutskiy   writeback: optimi...
913
914
915
916
917
  		/*
  		 * Remove own delayed wake-up timer, since we are already awake
  		 * and we'll take care of the preriodic write-back.
  		 */
  		del_timer(&wb->wakeup_timer);
03ba3782e   Jens Axboe   writeback: switch...
918
  		pages_written = wb_do_writeback(wb, 0);
455b28646   Dave Chinner   writeback: Initia...
919
  		trace_writeback_pages_written(pages_written);
03ba3782e   Jens Axboe   writeback: switch...
920
  		if (pages_written)
ecd584030   Artem Bityutskiy   writeback: move l...
921
  			wb->last_active = jiffies;
03ba3782e   Jens Axboe   writeback: switch...
922

297252c81   Artem Bityutskiy   writeback: do not...
923
  		set_current_state(TASK_INTERRUPTIBLE);
b76b4014f   J. Bruce Fields   writeback: Fix lo...
924
  		if (!list_empty(&bdi->work_list) || kthread_should_stop()) {
f9eadbbd4   Jens Axboe   writeback: bdi_wr...
925
  			__set_current_state(TASK_RUNNING);
297252c81   Artem Bityutskiy   writeback: do not...
926
  			continue;
03ba3782e   Jens Axboe   writeback: switch...
927
  		}
253c34e9b   Artem Bityutskiy   writeback: preven...
928
  		if (wb_has_dirty_io(wb) && dirty_writeback_interval)
fff5b85aa   Artem Bityutskiy   writeback: move b...
929
  			schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
253c34e9b   Artem Bityutskiy   writeback: preven...
930
931
932
933
934
935
  		else {
  			/*
  			 * We have nothing to do, so can go sleep without any
  			 * timeout and save power. When a work is queued or
  			 * something is made dirty - we will be woken up.
  			 */
297252c81   Artem Bityutskiy   writeback: do not...
936
  			schedule();
f9eadbbd4   Jens Axboe   writeback: bdi_wr...
937
  		}
69b62d01e   Jens Axboe   writeback: disabl...
938

03ba3782e   Jens Axboe   writeback: switch...
939
940
  		try_to_freeze();
  	}
fff5b85aa   Artem Bityutskiy   writeback: move b...
941
  	/* Flush any work that raced with us exiting */
082439004   Christoph Hellwig   writeback: merge ...
942
943
  	if (!list_empty(&bdi->work_list))
  		wb_do_writeback(wb, 1);
455b28646   Dave Chinner   writeback: Initia...
944
945
  
  	trace_writeback_thread_stop(bdi);
03ba3782e   Jens Axboe   writeback: switch...
946
947
  	return 0;
  }
082439004   Christoph Hellwig   writeback: merge ...
948

03ba3782e   Jens Axboe   writeback: switch...
949
  /*
b8c2f3474   Christoph Hellwig   writeback: simpli...
950
951
   * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
   * the whole world.
03ba3782e   Jens Axboe   writeback: switch...
952
   */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
953
  void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
03ba3782e   Jens Axboe   writeback: switch...
954
  {
b8c2f3474   Christoph Hellwig   writeback: simpli...
955
  	struct backing_dev_info *bdi;
03ba3782e   Jens Axboe   writeback: switch...
956

83ba7b071   Christoph Hellwig   writeback: simpli...
957
958
  	if (!nr_pages) {
  		nr_pages = global_page_state(NR_FILE_DIRTY) +
b8c2f3474   Christoph Hellwig   writeback: simpli...
959
960
  				global_page_state(NR_UNSTABLE_NFS);
  	}
03ba3782e   Jens Axboe   writeback: switch...
961

b8c2f3474   Christoph Hellwig   writeback: simpli...
962
  	rcu_read_lock();
cfc4ba536   Jens Axboe   writeback: use RC...
963
  	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
03ba3782e   Jens Axboe   writeback: switch...
964
965
  		if (!bdi_has_dirty_io(bdi))
  			continue;
0e175a183   Curt Wohlgemuth   writeback: Add a ...
966
  		__bdi_start_writeback(bdi, nr_pages, false, reason);
03ba3782e   Jens Axboe   writeback: switch...
967
  	}
cfc4ba536   Jens Axboe   writeback: use RC...
968
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
969
  }
03ba3782e   Jens Axboe   writeback: switch...
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
  static noinline void block_dump___mark_inode_dirty(struct inode *inode)
  {
  	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
  		struct dentry *dentry;
  		const char *name = "?";
  
  		dentry = d_find_alias(inode);
  		if (dentry) {
  			spin_lock(&dentry->d_lock);
  			name = (const char *) dentry->d_name.name;
  		}
  		printk(KERN_DEBUG
  		       "%s(%d): dirtied inode %lu (%s) on %s
  ",
  		       current->comm, task_pid_nr(current), inode->i_ino,
  		       name, inode->i_sb->s_id);
  		if (dentry) {
  			spin_unlock(&dentry->d_lock);
  			dput(dentry);
  		}
  	}
  }
  
  /**
   *	__mark_inode_dirty -	internal function
   *	@inode: inode to mark
   *	@flags: what kind of dirty (i.e. I_DIRTY_SYNC)
   *	Mark an inode as dirty. Callers should use mark_inode_dirty or
   *  	mark_inode_dirty_sync.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
999
   *
03ba3782e   Jens Axboe   writeback: switch...
1000
1001
1002
1003
1004
1005
1006
1007
1008
   * Put the inode on the super block's dirty list.
   *
   * CAREFUL! We mark it dirty unconditionally, but move it onto the
   * dirty list only if it is hashed or if it refers to a blockdev.
   * If it was not hashed, it will never be added to the dirty list
   * even if it is later hashed, as it will have been marked dirty already.
   *
   * In short, make sure you hash any inodes _before_ you start marking
   * them dirty.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1009
   *
03ba3782e   Jens Axboe   writeback: switch...
1010
1011
1012
1013
1014
1015
   * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
   * the block-special inode (/dev/hda1) itself.  And the ->dirtied_when field of
   * the kernel-internal blockdev inode represents the dirtying time of the
   * blockdev's pages.  This is why for I_DIRTY_PAGES we always use
   * page->mapping->host, so the page-dirtying time is recorded in the internal
   * blockdev inode.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1016
   */
03ba3782e   Jens Axboe   writeback: switch...
1017
  void __mark_inode_dirty(struct inode *inode, int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1018
  {
03ba3782e   Jens Axboe   writeback: switch...
1019
  	struct super_block *sb = inode->i_sb;
253c34e9b   Artem Bityutskiy   writeback: preven...
1020
  	struct backing_dev_info *bdi = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1021

03ba3782e   Jens Axboe   writeback: switch...
1022
1023
1024
1025
1026
1027
  	/*
  	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
  	 * dirty the inode itself
  	 */
  	if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
  		if (sb->s_op->dirty_inode)
aa3857295   Christoph Hellwig   fs: pass exact ty...
1028
  			sb->s_op->dirty_inode(inode, flags);
03ba3782e   Jens Axboe   writeback: switch...
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
  	}
  
  	/*
  	 * make sure that changes are seen by all cpus before we test i_state
  	 * -- mikulas
  	 */
  	smp_mb();
  
  	/* avoid the locking if we can */
  	if ((inode->i_state & flags) == flags)
  		return;
  
  	if (unlikely(block_dump))
  		block_dump___mark_inode_dirty(inode);
250df6ed2   Dave Chinner   fs: protect inode...
1043
  	spin_lock(&inode->i_lock);
03ba3782e   Jens Axboe   writeback: switch...
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
  	if ((inode->i_state & flags) != flags) {
  		const int was_dirty = inode->i_state & I_DIRTY;
  
  		inode->i_state |= flags;
  
  		/*
  		 * If the inode is being synced, just update its dirty state.
  		 * The unlocker will place the inode on the appropriate
  		 * superblock list, based upon its state.
  		 */
  		if (inode->i_state & I_SYNC)
250df6ed2   Dave Chinner   fs: protect inode...
1055
  			goto out_unlock_inode;
03ba3782e   Jens Axboe   writeback: switch...
1056
1057
1058
1059
1060
1061
  
  		/*
  		 * Only add valid (hashed) inodes to the superblock's
  		 * dirty list.  Add blockdev inodes as well.
  		 */
  		if (!S_ISBLK(inode->i_mode)) {
1d3382cbf   Al Viro   new helper: inode...
1062
  			if (inode_unhashed(inode))
250df6ed2   Dave Chinner   fs: protect inode...
1063
  				goto out_unlock_inode;
03ba3782e   Jens Axboe   writeback: switch...
1064
  		}
a4ffdde6e   Al Viro   simplify checks f...
1065
  		if (inode->i_state & I_FREEING)
250df6ed2   Dave Chinner   fs: protect inode...
1066
  			goto out_unlock_inode;
03ba3782e   Jens Axboe   writeback: switch...
1067
1068
1069
1070
1071
1072
  
  		/*
  		 * If the inode was already on b_dirty/b_io/b_more_io, don't
  		 * reposition it (that would break b_dirty time-ordering).
  		 */
  		if (!was_dirty) {
a66979aba   Dave Chinner   fs: move i_wb_lis...
1073
  			bool wakeup_bdi = false;
253c34e9b   Artem Bityutskiy   writeback: preven...
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
  			bdi = inode_to_bdi(inode);
  
  			if (bdi_cap_writeback_dirty(bdi)) {
  				WARN(!test_bit(BDI_registered, &bdi->state),
  				     "bdi-%s not registered
  ", bdi->name);
  
  				/*
  				 * If this is the first dirty inode for this
  				 * bdi, we have to wake-up the corresponding
  				 * bdi thread to make sure background
  				 * write-back happens later.
  				 */
  				if (!wb_has_dirty_io(&bdi->wb))
  					wakeup_bdi = true;
500b067c5   Jens Axboe   writeback: check ...
1089
  			}
03ba3782e   Jens Axboe   writeback: switch...
1090

a66979aba   Dave Chinner   fs: move i_wb_lis...
1091
  			spin_unlock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
1092
  			spin_lock(&bdi->wb.list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1093
  			inode->dirtied_when = jiffies;
7ccf19a80   Nick Piggin   fs: inode split I...
1094
  			list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
f758eeabe   Christoph Hellwig   writeback: split ...
1095
  			spin_unlock(&bdi->wb.list_lock);
a66979aba   Dave Chinner   fs: move i_wb_lis...
1096
1097
1098
1099
  
  			if (wakeup_bdi)
  				bdi_wakeup_thread_delayed(bdi);
  			return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1100
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1101
  	}
250df6ed2   Dave Chinner   fs: protect inode...
1102
1103
  out_unlock_inode:
  	spin_unlock(&inode->i_lock);
253c34e9b   Artem Bityutskiy   writeback: preven...
1104

03ba3782e   Jens Axboe   writeback: switch...
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
  }
  EXPORT_SYMBOL(__mark_inode_dirty);
  
  /*
   * Write out a superblock's list of dirty inodes.  A wait will be performed
   * upon no inodes, all inodes or the final one, depending upon sync_mode.
   *
   * If older_than_this is non-NULL, then only write out inodes which
   * had their first dirtying at a time earlier than *older_than_this.
   *
03ba3782e   Jens Axboe   writeback: switch...
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
   * If `bdi' is non-zero then we're being asked to writeback a specific queue.
   * This function assumes that the blockdev superblock's inodes are backed by
   * a variety of queues, so all inodes are searched.  For other superblocks,
   * assume that all inodes are backed by the same queue.
   *
   * The inodes to be written are parked on bdi->b_io.  They are moved back onto
   * bdi->b_dirty as they are selected for writing.  This way, none can be missed
   * on the writer throttling path, and we get decent balancing between many
   * throttled threads: we don't want them all piling up on inode_sync_wait.
   */
b6e51316d   Jens Axboe   writeback: separa...
1125
  static void wait_sb_inodes(struct super_block *sb)
03ba3782e   Jens Axboe   writeback: switch...
1126
1127
1128
1129
1130
1131
1132
  {
  	struct inode *inode, *old_inode = NULL;
  
  	/*
  	 * We need to be protected against the filesystem going from
  	 * r/o to r/w or vice versa.
  	 */
b6e51316d   Jens Axboe   writeback: separa...
1133
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
03ba3782e   Jens Axboe   writeback: switch...
1134

55fa6091d   Dave Chinner   fs: move i_sb_lis...
1135
  	spin_lock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1136
1137
1138
1139
1140
1141
1142
1143
  
  	/*
  	 * Data integrity sync. Must wait for all pages under writeback,
  	 * because there may have been pages dirtied before our sync
  	 * call, but which had writeout started before we write it out.
  	 * In which case, the inode may not be on the dirty list, but
  	 * we still have to wait for that writeout.
  	 */
b6e51316d   Jens Axboe   writeback: separa...
1144
  	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
250df6ed2   Dave Chinner   fs: protect inode...
1145
  		struct address_space *mapping = inode->i_mapping;
03ba3782e   Jens Axboe   writeback: switch...
1146

250df6ed2   Dave Chinner   fs: protect inode...
1147
1148
1149
1150
  		spin_lock(&inode->i_lock);
  		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
  		    (mapping->nrpages == 0)) {
  			spin_unlock(&inode->i_lock);
03ba3782e   Jens Axboe   writeback: switch...
1151
  			continue;
250df6ed2   Dave Chinner   fs: protect inode...
1152
  		}
03ba3782e   Jens Axboe   writeback: switch...
1153
  		__iget(inode);
250df6ed2   Dave Chinner   fs: protect inode...
1154
  		spin_unlock(&inode->i_lock);
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1155
  		spin_unlock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1156
  		/*
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1157
1158
1159
1160
1161
1162
  		 * We hold a reference to 'inode' so it couldn't have been
  		 * removed from s_inodes list while we dropped the
  		 * inode_sb_list_lock.  We cannot iput the inode now as we can
  		 * be holding the last reference and we cannot iput it under
  		 * inode_sb_list_lock. So we keep the reference and iput it
  		 * later.
03ba3782e   Jens Axboe   writeback: switch...
1163
1164
1165
1166
1167
1168
1169
  		 */
  		iput(old_inode);
  		old_inode = inode;
  
  		filemap_fdatawait(mapping);
  
  		cond_resched();
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1170
  		spin_lock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1171
  	}
55fa6091d   Dave Chinner   fs: move i_sb_lis...
1172
  	spin_unlock(&inode_sb_list_lock);
03ba3782e   Jens Axboe   writeback: switch...
1173
  	iput(old_inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1174
  }
d8a8559cd   Jens Axboe   writeback: get ri...
1175
  /**
3259f8bed   Chris Mason   Add new functions...
1176
   * writeback_inodes_sb_nr -	writeback dirty inodes from given super_block
d8a8559cd   Jens Axboe   writeback: get ri...
1177
   * @sb: the superblock
3259f8bed   Chris Mason   Add new functions...
1178
   * @nr: the number of pages to write
786228ab3   Marcos Paulo de Souza   writeback: Fix is...
1179
   * @reason: reason why some writeback work initiated
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1180
   *
d8a8559cd   Jens Axboe   writeback: get ri...
1181
1182
   * Start writeback on some inodes on this super_block. No guarantees are made
   * on how many (if any) will be written, and this function does not wait
3259f8bed   Chris Mason   Add new functions...
1183
   * for IO completion of submitted IO.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1184
   */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1185
1186
1187
  void writeback_inodes_sb_nr(struct super_block *sb,
  			    unsigned long nr,
  			    enum wb_reason reason)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1188
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
1189
1190
  	DECLARE_COMPLETION_ONSTACK(done);
  	struct wb_writeback_work work = {
6e6938b6d   Wu Fengguang   writeback: introd...
1191
1192
1193
1194
1195
  		.sb			= sb,
  		.sync_mode		= WB_SYNC_NONE,
  		.tagged_writepages	= 1,
  		.done			= &done,
  		.nr_pages		= nr,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1196
  		.reason			= reason,
3c4d71653   Christoph Hellwig   writeback: queue ...
1197
  	};
d8a8559cd   Jens Axboe   writeback: get ri...
1198

cf37e9724   Christoph Hellwig   writeback: enforc...
1199
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
83ba7b071   Christoph Hellwig   writeback: simpli...
1200
1201
  	bdi_queue_work(sb->s_bdi, &work);
  	wait_for_completion(&done);
e913fc825   Jens Axboe   writeback: fix WB...
1202
  }
3259f8bed   Chris Mason   Add new functions...
1203
1204
1205
1206
1207
  EXPORT_SYMBOL(writeback_inodes_sb_nr);
  
  /**
   * writeback_inodes_sb	-	writeback dirty inodes from given super_block
   * @sb: the superblock
786228ab3   Marcos Paulo de Souza   writeback: Fix is...
1208
   * @reason: reason why some writeback work was initiated
3259f8bed   Chris Mason   Add new functions...
1209
1210
1211
1212
1213
   *
   * Start writeback on some inodes on this super_block. No guarantees are made
   * on how many (if any) will be written, and this function does not wait
   * for IO completion of submitted IO.
   */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1214
  void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
3259f8bed   Chris Mason   Add new functions...
1215
  {
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1216
  	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
3259f8bed   Chris Mason   Add new functions...
1217
  }
0e3c9a228   Jens Axboe   Revert "writeback...
1218
  EXPORT_SYMBOL(writeback_inodes_sb);
e913fc825   Jens Axboe   writeback: fix WB...
1219
1220
  
  /**
17bd55d03   Eric Sandeen   fs-writeback: Add...
1221
1222
   * writeback_inodes_sb_if_idle	-	start writeback if none underway
   * @sb: the superblock
786228ab3   Marcos Paulo de Souza   writeback: Fix is...
1223
   * @reason: reason why some writeback work was initiated
17bd55d03   Eric Sandeen   fs-writeback: Add...
1224
1225
1226
1227
   *
   * Invoke writeback_inodes_sb if no writeback is currently underway.
   * Returns 1 if writeback was started, 0 if not.
   */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1228
  int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
17bd55d03   Eric Sandeen   fs-writeback: Add...
1229
1230
  {
  	if (!writeback_in_progress(sb->s_bdi)) {
cf37e9724   Christoph Hellwig   writeback: enforc...
1231
  		down_read(&sb->s_umount);
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1232
  		writeback_inodes_sb(sb, reason);
cf37e9724   Christoph Hellwig   writeback: enforc...
1233
  		up_read(&sb->s_umount);
17bd55d03   Eric Sandeen   fs-writeback: Add...
1234
1235
1236
1237
1238
1239
1240
  		return 1;
  	} else
  		return 0;
  }
  EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
  
  /**
3259f8bed   Chris Mason   Add new functions...
1241
1242
1243
   * writeback_inodes_sb_if_idle	-	start writeback if none underway
   * @sb: the superblock
   * @nr: the number of pages to write
786228ab3   Marcos Paulo de Souza   writeback: Fix is...
1244
   * @reason: reason why some writeback work was initiated
3259f8bed   Chris Mason   Add new functions...
1245
1246
1247
1248
1249
   *
   * Invoke writeback_inodes_sb if no writeback is currently underway.
   * Returns 1 if writeback was started, 0 if not.
   */
  int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1250
1251
  				   unsigned long nr,
  				   enum wb_reason reason)
3259f8bed   Chris Mason   Add new functions...
1252
1253
1254
  {
  	if (!writeback_in_progress(sb->s_bdi)) {
  		down_read(&sb->s_umount);
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1255
  		writeback_inodes_sb_nr(sb, nr, reason);
3259f8bed   Chris Mason   Add new functions...
1256
1257
1258
1259
1260
1261
1262
1263
  		up_read(&sb->s_umount);
  		return 1;
  	} else
  		return 0;
  }
  EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
  
  /**
d8a8559cd   Jens Axboe   writeback: get ri...
1264
1265
1266
1267
   * sync_inodes_sb	-	sync sb inode pages
   * @sb: the superblock
   *
   * This function writes and waits on any dirty inode belonging to this
cb9ef8d5e   Stefan Hajnoczi   fs/fs-writeback.c...
1268
   * super_block.
d8a8559cd   Jens Axboe   writeback: get ri...
1269
   */
b6e51316d   Jens Axboe   writeback: separa...
1270
  void sync_inodes_sb(struct super_block *sb)
d8a8559cd   Jens Axboe   writeback: get ri...
1271
  {
83ba7b071   Christoph Hellwig   writeback: simpli...
1272
1273
  	DECLARE_COMPLETION_ONSTACK(done);
  	struct wb_writeback_work work = {
3c4d71653   Christoph Hellwig   writeback: queue ...
1274
1275
1276
1277
  		.sb		= sb,
  		.sync_mode	= WB_SYNC_ALL,
  		.nr_pages	= LONG_MAX,
  		.range_cyclic	= 0,
83ba7b071   Christoph Hellwig   writeback: simpli...
1278
  		.done		= &done,
0e175a183   Curt Wohlgemuth   writeback: Add a ...
1279
  		.reason		= WB_REASON_SYNC,
3c4d71653   Christoph Hellwig   writeback: queue ...
1280
  	};
cf37e9724   Christoph Hellwig   writeback: enforc...
1281
  	WARN_ON(!rwsem_is_locked(&sb->s_umount));
83ba7b071   Christoph Hellwig   writeback: simpli...
1282
1283
  	bdi_queue_work(sb->s_bdi, &work);
  	wait_for_completion(&done);
b6e51316d   Jens Axboe   writeback: separa...
1284
  	wait_sb_inodes(sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1285
  }
d8a8559cd   Jens Axboe   writeback: get ri...
1286
  EXPORT_SYMBOL(sync_inodes_sb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1287

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1288
  /**
7f04c26d7   Andrea Arcangeli   [PATCH] fix nr_un...
1289
1290
1291
1292
1293
1294
   * write_inode_now	-	write an inode to disk
   * @inode: inode to write to disk
   * @sync: whether the write should be synchronous or not
   *
   * This function commits an inode to disk immediately if it is dirty. This is
   * primarily needed by knfsd.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1295
   *
7f04c26d7   Andrea Arcangeli   [PATCH] fix nr_un...
1296
   * The caller must either have a ref on the inode or must have set I_WILL_FREE.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1297
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1298
1299
  int write_inode_now(struct inode *inode, int sync)
  {
f758eeabe   Christoph Hellwig   writeback: split ...
1300
  	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1301
1302
1303
  	int ret;
  	struct writeback_control wbc = {
  		.nr_to_write = LONG_MAX,
18914b188   Mike Galbraith   write_inode_now()...
1304
  		.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
111ebb6e6   OGAWA Hirofumi   [PATCH] writeback...
1305
1306
  		.range_start = 0,
  		.range_end = LLONG_MAX,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1307
1308
1309
  	};
  
  	if (!mapping_cap_writeback_dirty(inode->i_mapping))
49364ce25   Andrew Morton   [PATCH] write_ino...
1310
  		wbc.nr_to_write = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1311
1312
  
  	might_sleep();
f758eeabe   Christoph Hellwig   writeback: split ...
1313
  	spin_lock(&wb->list_lock);
0f1b1fd86   Dave Chinner   fs: pull inode->i...
1314
  	spin_lock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
1315
  	ret = writeback_single_inode(inode, wb, &wbc);
0f1b1fd86   Dave Chinner   fs: pull inode->i...
1316
  	spin_unlock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
1317
  	spin_unlock(&wb->list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1318
  	if (sync)
1c0eeaf56   Joern Engel   introduce I_SYNC
1319
  		inode_sync_wait(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
  	return ret;
  }
  EXPORT_SYMBOL(write_inode_now);
  
  /**
   * sync_inode - write an inode and its pages to disk.
   * @inode: the inode to sync
   * @wbc: controls the writeback mode
   *
   * sync_inode() will write an inode and its pages to disk.  It will also
   * correctly update the inode on its superblock's dirty inode lists and will
   * update inode->i_state.
   *
   * The caller must have a ref on the inode.
   */
  int sync_inode(struct inode *inode, struct writeback_control *wbc)
  {
f758eeabe   Christoph Hellwig   writeback: split ...
1337
  	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1338
  	int ret;
f758eeabe   Christoph Hellwig   writeback: split ...
1339
  	spin_lock(&wb->list_lock);
0f1b1fd86   Dave Chinner   fs: pull inode->i...
1340
  	spin_lock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
1341
  	ret = writeback_single_inode(inode, wb, wbc);
0f1b1fd86   Dave Chinner   fs: pull inode->i...
1342
  	spin_unlock(&inode->i_lock);
f758eeabe   Christoph Hellwig   writeback: split ...
1343
  	spin_unlock(&wb->list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1344
1345
1346
  	return ret;
  }
  EXPORT_SYMBOL(sync_inode);
c37650161   Christoph Hellwig   fs: add sync_inod...
1347
1348
  
  /**
c691b9d98   Andrew Morton   sync_inode_metada...
1349
   * sync_inode_metadata - write an inode to disk
c37650161   Christoph Hellwig   fs: add sync_inod...
1350
1351
1352
   * @inode: the inode to sync
   * @wait: wait for I/O to complete.
   *
c691b9d98   Andrew Morton   sync_inode_metada...
1353
   * Write an inode to disk and adjust its dirty state after completion.
c37650161   Christoph Hellwig   fs: add sync_inod...
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
   *
   * Note: only writes the actual inode, no associated data or other metadata.
   */
  int sync_inode_metadata(struct inode *inode, int wait)
  {
  	struct writeback_control wbc = {
  		.sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
  		.nr_to_write = 0, /* metadata-only */
  	};
  
  	return sync_inode(inode, &wbc);
  }
  EXPORT_SYMBOL(sync_inode_metadata);