Blame view

mm/backing-dev.c 22 KB
3fcfab16c   Andrew Morton   [PATCH] separate ...
1
2
3
  
  #include <linux/wait.h>
  #include <linux/backing-dev.h>
03ba3782e   Jens Axboe   writeback: switch...
4
5
  #include <linux/kthread.h>
  #include <linux/freezer.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
6
  #include <linux/fs.h>
26160158d   Jens Axboe   Move the default_...
7
  #include <linux/pagemap.h>
03ba3782e   Jens Axboe   writeback: switch...
8
  #include <linux/mm.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
9
10
  #include <linux/sched.h>
  #include <linux/module.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
11
12
  #include <linux/writeback.h>
  #include <linux/device.h>
455b28646   Dave Chinner   writeback: Initia...
13
  #include <trace/events/writeback.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
14

c3c532061   Jens Axboe   bdi: add helper f...
15
  static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
26160158d   Jens Axboe   Move the default_...
16
  struct backing_dev_info default_backing_dev_info = {
d993831fa   Jens Axboe   writeback: add na...
17
  	.name		= "default",
26160158d   Jens Axboe   Move the default_...
18
19
20
  	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
  	.state		= 0,
  	.capabilities	= BDI_CAP_MAP_COPY,
26160158d   Jens Axboe   Move the default_...
21
22
  };
  EXPORT_SYMBOL_GPL(default_backing_dev_info);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
23

5129a469a   Jörn Engel   Catch filesystems...
24
25
  struct backing_dev_info noop_backing_dev_info = {
  	.name		= "noop",
976e48f8a   Jan Kara   bdi: Initialize n...
26
  	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
5129a469a   Jörn Engel   Catch filesystems...
27
28
  };
  EXPORT_SYMBOL_GPL(noop_backing_dev_info);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
29
  static struct class *bdi_class;
cfc4ba536   Jens Axboe   writeback: use RC...
30
31
32
33
34
35
  
  /*
   * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as
   * reader side protection for bdi_pending_list. bdi_list has RCU reader side
   * locking.
   */
03ba3782e   Jens Axboe   writeback: switch...
36
  DEFINE_SPINLOCK(bdi_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
37
  LIST_HEAD(bdi_list);
03ba3782e   Jens Axboe   writeback: switch...
38
39
40
41
42
43
44
  LIST_HEAD(bdi_pending_list);
  
  static struct task_struct *sync_supers_tsk;
  static struct timer_list sync_supers_timer;
  
  static int bdi_sync_supers(void *);
  static void sync_supers_timer_fn(unsigned long);
03ba3782e   Jens Axboe   writeback: switch...
45

f758eeabe   Christoph Hellwig   writeback: split ...
46
47
48
49
50
51
52
53
54
55
  void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
  {
  	if (wb1 < wb2) {
  		spin_lock(&wb1->list_lock);
  		spin_lock_nested(&wb2->list_lock, 1);
  	} else {
  		spin_lock(&wb2->list_lock);
  		spin_lock_nested(&wb1->list_lock, 1);
  	}
  }
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
56
57
58
59
60
61
62
63
64
65
66
67
68
69
  #ifdef CONFIG_DEBUG_FS
  #include <linux/debugfs.h>
  #include <linux/seq_file.h>
  
  static struct dentry *bdi_debug_root;
  
  static void bdi_debug_init(void)
  {
  	bdi_debug_root = debugfs_create_dir("bdi", NULL);
  }
  
  static int bdi_debug_stats_show(struct seq_file *m, void *v)
  {
  	struct backing_dev_info *bdi = m->private;
c1955ce32   Christoph Hellwig   writeback: remove...
70
  	struct bdi_writeback *wb = &bdi->wb;
364aeb284   David Rientjes   mm: change dirty ...
71
72
73
  	unsigned long background_thresh;
  	unsigned long dirty_thresh;
  	unsigned long bdi_thresh;
345227d70   Gustavo F. Padovan   backing-dev: Kill...
74
  	unsigned long nr_dirty, nr_io, nr_more_io;
f09b00d3e   Jens Axboe   writeback: add so...
75
  	struct inode *inode;
345227d70   Gustavo F. Padovan   backing-dev: Kill...
76
  	nr_dirty = nr_io = nr_more_io = 0;
f758eeabe   Christoph Hellwig   writeback: split ...
77
  	spin_lock(&wb->list_lock);
7ccf19a80   Nick Piggin   fs: inode split I...
78
  	list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
c1955ce32   Christoph Hellwig   writeback: remove...
79
  		nr_dirty++;
7ccf19a80   Nick Piggin   fs: inode split I...
80
  	list_for_each_entry(inode, &wb->b_io, i_wb_list)
c1955ce32   Christoph Hellwig   writeback: remove...
81
  		nr_io++;
7ccf19a80   Nick Piggin   fs: inode split I...
82
  	list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
c1955ce32   Christoph Hellwig   writeback: remove...
83
  		nr_more_io++;
f758eeabe   Christoph Hellwig   writeback: split ...
84
  	spin_unlock(&wb->list_lock);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
85

16c4042f0   Wu Fengguang   writeback: avoid ...
86
87
  	global_dirty_limits(&background_thresh, &dirty_thresh);
  	bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
88
89
90
  
  #define K(x) ((x) << (PAGE_SHIFT - 10))
  	seq_printf(m,
00821b002   Wu Fengguang   writeback: show b...
91
92
93
94
95
96
97
98
99
100
  		   "BdiWriteback:       %10lu kB
  "
  		   "BdiReclaimable:     %10lu kB
  "
  		   "BdiDirtyThresh:     %10lu kB
  "
  		   "DirtyThresh:        %10lu kB
  "
  		   "BackgroundThresh:   %10lu kB
  "
c8e28ce04   Wu Fengguang   writeback: accoun...
101
102
  		   "BdiDirtied:         %10lu kB
  "
00821b002   Wu Fengguang   writeback: show b...
103
104
105
106
107
108
109
110
111
112
113
114
115
116
  		   "BdiWritten:         %10lu kB
  "
  		   "BdiWriteBandwidth:  %10lu kBps
  "
  		   "b_dirty:            %10lu
  "
  		   "b_io:               %10lu
  "
  		   "b_more_io:          %10lu
  "
  		   "bdi_list:           %10u
  "
  		   "state:              %10lx
  ",
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
117
118
  		   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
  		   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
f7d2b1ecd   Jan Kara   writeback: accoun...
119
120
121
  		   K(bdi_thresh),
  		   K(dirty_thresh),
  		   K(background_thresh),
c8e28ce04   Wu Fengguang   writeback: accoun...
122
  		   (unsigned long) K(bdi_stat(bdi, BDI_DIRTIED)),
f7d2b1ecd   Jan Kara   writeback: accoun...
123
  		   (unsigned long) K(bdi_stat(bdi, BDI_WRITTEN)),
00821b002   Wu Fengguang   writeback: show b...
124
  		   (unsigned long) K(bdi->write_bandwidth),
f7d2b1ecd   Jan Kara   writeback: accoun...
125
126
127
  		   nr_dirty,
  		   nr_io,
  		   nr_more_io,
c1955ce32   Christoph Hellwig   writeback: remove...
128
  		   !list_empty(&bdi->bdi_list), bdi->state);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
  #undef K
  
  	return 0;
  }
  
  static int bdi_debug_stats_open(struct inode *inode, struct file *file)
  {
  	return single_open(file, bdi_debug_stats_show, inode->i_private);
  }
  
  static const struct file_operations bdi_debug_stats_fops = {
  	.open		= bdi_debug_stats_open,
  	.read		= seq_read,
  	.llseek		= seq_lseek,
  	.release	= single_release,
  };
  
  static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
  {
  	bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
  	bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir,
  					       bdi, &bdi_debug_stats_fops);
  }
  
  static void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  	debugfs_remove(bdi->debug_stats);
  	debugfs_remove(bdi->debug_dir);
  }
  #else
  static inline void bdi_debug_init(void)
  {
  }
  static inline void bdi_debug_register(struct backing_dev_info *bdi,
  				      const char *name)
  {
  }
  static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  }
  #endif
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
  static ssize_t read_ahead_kb_store(struct device *dev,
  				  struct device_attribute *attr,
  				  const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
  	char *end;
  	unsigned long read_ahead_kb;
  	ssize_t ret = -EINVAL;
  
  	read_ahead_kb = simple_strtoul(buf, &end, 10);
  	if (*buf && (end[0] == '\0' || (end[0] == '
  ' && end[1] == '\0'))) {
  		bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10);
  		ret = count;
  	}
  	return ret;
  }
  
  #define K(pages) ((pages) << (PAGE_SHIFT - 10))
  
  #define BDI_SHOW(name, expr)						\
  static ssize_t name##_show(struct device *dev,				\
  			   struct device_attribute *attr, char *page)	\
  {									\
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);		\
  									\
  	return snprintf(page, PAGE_SIZE-1, "%lld
  ", (long long)expr);	\
  }
  
  BDI_SHOW(read_ahead_kb, K(bdi->ra_pages))
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
  static ssize_t min_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
  	char *end;
  	unsigned int ratio;
  	ssize_t ret = -EINVAL;
  
  	ratio = simple_strtoul(buf, &end, 10);
  	if (*buf && (end[0] == '\0' || (end[0] == '
  ' && end[1] == '\0'))) {
  		ret = bdi_set_min_ratio(bdi, ratio);
  		if (!ret)
  			ret = count;
  	}
  	return ret;
  }
  BDI_SHOW(min_ratio, bdi->min_ratio)
a42dde041   Peter Zijlstra   mm: bdi: allow se...
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
  static ssize_t max_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
  	char *end;
  	unsigned int ratio;
  	ssize_t ret = -EINVAL;
  
  	ratio = simple_strtoul(buf, &end, 10);
  	if (*buf && (end[0] == '\0' || (end[0] == '
  ' && end[1] == '\0'))) {
  		ret = bdi_set_max_ratio(bdi, ratio);
  		if (!ret)
  			ret = count;
  	}
  	return ret;
  }
  BDI_SHOW(max_ratio, bdi->max_ratio)
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
237
238
239
240
  #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
  
  static struct device_attribute bdi_dev_attrs[] = {
  	__ATTR_RW(read_ahead_kb),
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
241
  	__ATTR_RW(min_ratio),
a42dde041   Peter Zijlstra   mm: bdi: allow se...
242
  	__ATTR_RW(max_ratio),
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
243
244
245
246
247
248
  	__ATTR_NULL,
  };
  
  static __init int bdi_class_init(void)
  {
  	bdi_class = class_create(THIS_MODULE, "bdi");
144214537   Anton Blanchard   backing-dev: Hand...
249
250
  	if (IS_ERR(bdi_class))
  		return PTR_ERR(bdi_class);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
251
  	bdi_class->dev_attrs = bdi_dev_attrs;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
252
  	bdi_debug_init();
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
253
254
  	return 0;
  }
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
255
  postcore_initcall(bdi_class_init);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
256

26160158d   Jens Axboe   Move the default_...
257
258
259
  static int __init default_bdi_init(void)
  {
  	int err;
03ba3782e   Jens Axboe   writeback: switch...
260
261
  	sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers");
  	BUG_ON(IS_ERR(sync_supers_tsk));
03ba3782e   Jens Axboe   writeback: switch...
262
  	setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0);
6423104b6   Jens Axboe   writeback: fixups...
263
  	bdi_arm_supers_timer();
03ba3782e   Jens Axboe   writeback: switch...
264

26160158d   Jens Axboe   Move the default_...
265
266
267
  	err = bdi_init(&default_backing_dev_info);
  	if (!err)
  		bdi_register(&default_backing_dev_info, NULL, "default");
976e48f8a   Jan Kara   bdi: Initialize n...
268
  	err = bdi_init(&noop_backing_dev_info);
26160158d   Jens Axboe   Move the default_...
269
270
271
272
  
  	return err;
  }
  subsys_initcall(default_bdi_init);
03ba3782e   Jens Axboe   writeback: switch...
273
274
275
276
  int bdi_has_dirty_io(struct backing_dev_info *bdi)
  {
  	return wb_has_dirty_io(&bdi->wb);
  }
03ba3782e   Jens Axboe   writeback: switch...
277
  /*
6f904ff0e   Artem Bityutskiy   writeback: harmon...
278
   * kupdated() used to do this. We cannot do it from the bdi_forker_thread()
03ba3782e   Jens Axboe   writeback: switch...
279
280
   * or we risk deadlocking on ->s_umount. The longer term solution would be
   * to implement sync_supers_bdi() or similar and simply do it from the
6f904ff0e   Artem Bityutskiy   writeback: harmon...
281
   * bdi writeback thread individually.
03ba3782e   Jens Axboe   writeback: switch...
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
   */
  static int bdi_sync_supers(void *unused)
  {
  	set_user_nice(current, 0);
  
  	while (!kthread_should_stop()) {
  		set_current_state(TASK_INTERRUPTIBLE);
  		schedule();
  
  		/*
  		 * Do this periodically, like kupdated() did before.
  		 */
  		sync_supers();
  	}
  
  	return 0;
  }
6423104b6   Jens Axboe   writeback: fixups...
299
  void bdi_arm_supers_timer(void)
03ba3782e   Jens Axboe   writeback: switch...
300
301
  {
  	unsigned long next;
6423104b6   Jens Axboe   writeback: fixups...
302
303
  	if (!dirty_writeback_interval)
  		return;
03ba3782e   Jens Axboe   writeback: switch...
304
305
306
307
308
309
310
  	next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies;
  	mod_timer(&sync_supers_timer, round_jiffies_up(next));
  }
  
  static void sync_supers_timer_fn(unsigned long unused)
  {
  	wake_up_process(sync_supers_tsk);
6423104b6   Jens Axboe   writeback: fixups...
311
  	bdi_arm_supers_timer();
03ba3782e   Jens Axboe   writeback: switch...
312
  }
6467716a3   Artem Bityutskiy   writeback: optimi...
313
314
315
316
317
318
  static void wakeup_timer_fn(unsigned long data)
  {
  	struct backing_dev_info *bdi = (struct backing_dev_info *)data;
  
  	spin_lock_bh(&bdi->wb_lock);
  	if (bdi->wb.task) {
603320239   Artem Bityutskiy   writeback: add ne...
319
  		trace_writeback_wake_thread(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
320
321
322
323
324
325
326
  		wake_up_process(bdi->wb.task);
  	} else {
  		/*
  		 * When bdi tasks are inactive for long time, they are killed.
  		 * In this case we have to wake-up the forker thread which
  		 * should create and run the bdi thread.
  		 */
603320239   Artem Bityutskiy   writeback: add ne...
327
  		trace_writeback_wake_forker_thread(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
  		wake_up_process(default_backing_dev_info.wb.task);
  	}
  	spin_unlock_bh(&bdi->wb_lock);
  }
  
  /*
   * This function is used when the first inode for this bdi is marked dirty. It
   * wakes-up the corresponding bdi thread which should then take care of the
   * periodic background write-out of dirty inodes. Since the write-out would
   * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
   * set up a timer which wakes the bdi thread up later.
   *
   * Note, we wouldn't bother setting up the timer, but this function is on the
   * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
   * by delaying the wake-up.
   */
  void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
  {
  	unsigned long timeout;
  
  	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
  	mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout);
  }
fff5b85aa   Artem Bityutskiy   writeback: move b...
351
352
353
354
355
356
357
358
359
360
361
  /*
   * Calculate the longest interval (jiffies) bdi threads are allowed to be
   * inactive.
   */
  static unsigned long bdi_longest_inactive(void)
  {
  	unsigned long interval;
  
  	interval = msecs_to_jiffies(dirty_writeback_interval * 10);
  	return max(5UL * 60 * HZ, interval);
  }
5a042aa4b   Jan Kara   mm: Cleanup clear...
362
363
364
365
366
367
368
369
370
371
  /*
   * Clear pending bit and wakeup anybody waiting for flusher thread creation or
   * shutdown
   */
  static void bdi_clear_pending(struct backing_dev_info *bdi)
  {
  	clear_bit(BDI_pending, &bdi->state);
  	smp_mb__after_clear_bit();
  	wake_up_bit(&bdi->state, BDI_pending);
  }
6f904ff0e   Artem Bityutskiy   writeback: harmon...
372
  static int bdi_forker_thread(void *ptr)
03ba3782e   Jens Axboe   writeback: switch...
373
374
  {
  	struct bdi_writeback *me = ptr;
766f91641   Peter Zijlstra   kernel: remove PF...
375
  	current->flags |= PF_SWAPWRITE;
c1955ce32   Christoph Hellwig   writeback: remove...
376
377
378
379
380
381
  	set_freezable();
  
  	/*
  	 * Our parent may run at a different priority, just set us to normal
  	 */
  	set_user_nice(current, 0);
03ba3782e   Jens Axboe   writeback: switch...
382
383
  
  	for (;;) {
fff5b85aa   Artem Bityutskiy   writeback: move b...
384
  		struct task_struct *task = NULL;
78c40cb65   Artem Bityutskiy   writeback: do not...
385
  		struct backing_dev_info *bdi;
adf392407   Artem Bityutskiy   writeback: restru...
386
387
388
  		enum {
  			NO_ACTION,   /* Nothing to do */
  			FORK_THREAD, /* Fork bdi thread */
fff5b85aa   Artem Bityutskiy   writeback: move b...
389
  			KILL_THREAD, /* Kill inactive bdi thread */
adf392407   Artem Bityutskiy   writeback: restru...
390
  		} action = NO_ACTION;
03ba3782e   Jens Axboe   writeback: switch...
391
392
393
394
395
  
  		/*
  		 * Temporary measure, we want to make sure we don't see
  		 * dirty data on the default backing_dev_info
  		 */
6467716a3   Artem Bityutskiy   writeback: optimi...
396
397
  		if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) {
  			del_timer(&me->wakeup_timer);
03ba3782e   Jens Axboe   writeback: switch...
398
  			wb_do_writeback(me, 0);
6467716a3   Artem Bityutskiy   writeback: optimi...
399
  		}
03ba3782e   Jens Axboe   writeback: switch...
400

cfc4ba536   Jens Axboe   writeback: use RC...
401
  		spin_lock_bh(&bdi_lock);
09f40f98b   Jan Kara   mm: Add comment e...
402
403
404
  		/*
  		 * In the following loop we are going to check whether we have
  		 * some work to do without any synchronization with tasks
20c8c6289   Andrew Morton   mm-add-comment-ex...
405
406
  		 * waking us up to do work for them. Set the task state here
  		 * so that we don't miss wakeups after verifying conditions.
09f40f98b   Jan Kara   mm: Add comment e...
407
  		 */
c5f7ad233   Artem Bityutskiy   writeback: do not...
408
  		set_current_state(TASK_INTERRUPTIBLE);
03ba3782e   Jens Axboe   writeback: switch...
409

78c40cb65   Artem Bityutskiy   writeback: do not...
410
  		list_for_each_entry(bdi, &bdi_list, bdi_list) {
adf392407   Artem Bityutskiy   writeback: restru...
411
412
413
414
  			bool have_dirty_io;
  
  			if (!bdi_cap_writeback_dirty(bdi) ||
  			     bdi_cap_flush_forker(bdi))
03ba3782e   Jens Axboe   writeback: switch...
415
  				continue;
080dcec41   Artem Bityutskiy   writeback: simpli...
416
417
418
  			WARN(!test_bit(BDI_registered, &bdi->state),
  			     "bdi %p/%s is not registered!
  ", bdi, bdi->name);
adf392407   Artem Bityutskiy   writeback: restru...
419
420
  			have_dirty_io = !list_empty(&bdi->work_list) ||
  					wb_has_dirty_io(&bdi->wb);
78c40cb65   Artem Bityutskiy   writeback: do not...
421
422
  
  			/*
adf392407   Artem Bityutskiy   writeback: restru...
423
424
  			 * If the bdi has work to do, but the thread does not
  			 * exist - create it.
78c40cb65   Artem Bityutskiy   writeback: do not...
425
  			 */
adf392407   Artem Bityutskiy   writeback: restru...
426
427
428
429
430
431
432
433
434
  			if (!bdi->wb.task && have_dirty_io) {
  				/*
  				 * Set the pending bit - if someone will try to
  				 * unregister this bdi - it'll wait on this bit.
  				 */
  				set_bit(BDI_pending, &bdi->state);
  				action = FORK_THREAD;
  				break;
  			}
fff5b85aa   Artem Bityutskiy   writeback: move b...
435

6bf05d03e   Jens Axboe   writeback: fix ba...
436
  			spin_lock(&bdi->wb_lock);
fff5b85aa   Artem Bityutskiy   writeback: move b...
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
  			/*
  			 * If there is no work to do and the bdi thread was
  			 * inactive long enough - kill it. The wb_lock is taken
  			 * to make sure no-one adds more work to this bdi and
  			 * wakes the bdi thread up.
  			 */
  			if (bdi->wb.task && !have_dirty_io &&
  			    time_after(jiffies, bdi->wb.last_active +
  						bdi_longest_inactive())) {
  				task = bdi->wb.task;
  				bdi->wb.task = NULL;
  				spin_unlock(&bdi->wb_lock);
  				set_bit(BDI_pending, &bdi->state);
  				action = KILL_THREAD;
  				break;
  			}
6bf05d03e   Jens Axboe   writeback: fix ba...
453
  			spin_unlock(&bdi->wb_lock);
03ba3782e   Jens Axboe   writeback: switch...
454
  		}
080dcec41   Artem Bityutskiy   writeback: simpli...
455
  		spin_unlock_bh(&bdi_lock);
03ba3782e   Jens Axboe   writeback: switch...
456

c4ec7908c   Artem Bityutskiy   writeback: do not...
457
458
459
  		/* Keep working if default bdi still has things to do */
  		if (!list_empty(&me->bdi->work_list))
  			__set_current_state(TASK_RUNNING);
adf392407   Artem Bityutskiy   writeback: restru...
460
461
462
  		switch (action) {
  		case FORK_THREAD:
  			__set_current_state(TASK_RUNNING);
6628bc74f   Artem Bityutskiy   writeback: do not...
463
464
  			task = kthread_create(bdi_writeback_thread, &bdi->wb,
  					      "flush-%s", dev_name(bdi->dev));
adf392407   Artem Bityutskiy   writeback: restru...
465
466
467
  			if (IS_ERR(task)) {
  				/*
  				 * If thread creation fails, force writeout of
d46db3d58   Wu Fengguang   writeback: make w...
468
469
  				 * the bdi from the thread. Hopefully 1024 is
  				 * large enough for efficient IO.
adf392407   Artem Bityutskiy   writeback: restru...
470
  				 */
0e175a183   Curt Wohlgemuth   writeback: Add a ...
471
472
  				writeback_inodes_wb(&bdi->wb, 1024,
  						    WB_REASON_FORKER_THREAD);
fff5b85aa   Artem Bityutskiy   writeback: move b...
473
474
475
476
  			} else {
  				/*
  				 * The spinlock makes sure we do not lose
  				 * wake-ups when racing with 'bdi_queue_work()'.
6628bc74f   Artem Bityutskiy   writeback: do not...
477
478
  				 * And as soon as the bdi thread is visible, we
  				 * can start it.
fff5b85aa   Artem Bityutskiy   writeback: move b...
479
  				 */
6467716a3   Artem Bityutskiy   writeback: optimi...
480
  				spin_lock_bh(&bdi->wb_lock);
adf392407   Artem Bityutskiy   writeback: restru...
481
  				bdi->wb.task = task;
6467716a3   Artem Bityutskiy   writeback: optimi...
482
  				spin_unlock_bh(&bdi->wb_lock);
6628bc74f   Artem Bityutskiy   writeback: do not...
483
  				wake_up_process(task);
fff5b85aa   Artem Bityutskiy   writeback: move b...
484
  			}
5a042aa4b   Jan Kara   mm: Cleanup clear...
485
  			bdi_clear_pending(bdi);
fff5b85aa   Artem Bityutskiy   writeback: move b...
486
487
488
489
490
  			break;
  
  		case KILL_THREAD:
  			__set_current_state(TASK_RUNNING);
  			kthread_stop(task);
5a042aa4b   Jan Kara   mm: Cleanup clear...
491
  			bdi_clear_pending(bdi);
adf392407   Artem Bityutskiy   writeback: restru...
492
  			break;
03ba3782e   Jens Axboe   writeback: switch...
493

adf392407   Artem Bityutskiy   writeback: restru...
494
  		case NO_ACTION:
253c34e9b   Artem Bityutskiy   writeback: preven...
495
496
497
498
499
500
501
502
503
  			if (!wb_has_dirty_io(me) || !dirty_writeback_interval)
  				/*
  				 * There are no dirty data. The only thing we
  				 * should now care about is checking for
  				 * inactive bdi threads and killing them. Thus,
  				 * let's sleep for longer time, save energy and
  				 * be friendly for battery-driven devices.
  				 */
  				schedule_timeout(bdi_longest_inactive());
6423104b6   Jens Axboe   writeback: fixups...
504
  			else
253c34e9b   Artem Bityutskiy   writeback: preven...
505
  				schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
03ba3782e   Jens Axboe   writeback: switch...
506
  			try_to_freeze();
5a042aa4b   Jan Kara   mm: Cleanup clear...
507
  			break;
03ba3782e   Jens Axboe   writeback: switch...
508
  		}
03ba3782e   Jens Axboe   writeback: switch...
509
510
511
512
  	}
  
  	return 0;
  }
cfc4ba536   Jens Axboe   writeback: use RC...
513
514
515
516
517
518
519
520
  /*
   * Remove bdi from bdi_list, and ensure that it is no longer visible
   */
  static void bdi_remove_from_list(struct backing_dev_info *bdi)
  {
  	spin_lock_bh(&bdi_lock);
  	list_del_rcu(&bdi->bdi_list);
  	spin_unlock_bh(&bdi_lock);
ef3230880   Mikulas Patocka   backing-dev: use ...
521
  	synchronize_rcu_expedited();
cfc4ba536   Jens Axboe   writeback: use RC...
522
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
523
524
525
  int bdi_register(struct backing_dev_info *bdi, struct device *parent,
  		const char *fmt, ...)
  {
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
526
  	va_list args;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
527
  	struct device *dev;
69fc208be   Andrew Morton   mm/backing-dev.c:...
528
  	if (bdi->dev)	/* The driver needs to use separate queues per device */
c284de61d   Artem Bityutskiy   writeback: cleanu...
529
  		return 0;
f1d0b063d   Kay Sievers   bdi: register sys...
530

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
531
  	va_start(args, fmt);
19051c503   Greg Kroah-Hartman   mm: bdi: fix race...
532
  	dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
533
  	va_end(args);
c284de61d   Artem Bityutskiy   writeback: cleanu...
534
535
  	if (IS_ERR(dev))
  		return PTR_ERR(dev);
66f3b8e2e   Jens Axboe   writeback: move d...
536

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
537
  	bdi->dev = dev;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
538

03ba3782e   Jens Axboe   writeback: switch...
539
540
541
542
543
544
545
  	/*
  	 * Just start the forker thread for our default backing_dev_info,
  	 * and add other bdi's to the list. They will get a thread created
  	 * on-demand when they need it.
  	 */
  	if (bdi_cap_flush_forker(bdi)) {
  		struct bdi_writeback *wb = &bdi->wb;
6f904ff0e   Artem Bityutskiy   writeback: harmon...
546
  		wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s",
03ba3782e   Jens Axboe   writeback: switch...
547
  						dev_name(dev));
c284de61d   Artem Bityutskiy   writeback: cleanu...
548
549
  		if (IS_ERR(wb->task))
  			return PTR_ERR(wb->task);
03ba3782e   Jens Axboe   writeback: switch...
550
551
552
  	}
  
  	bdi_debug_register(bdi, dev_name(dev));
500b067c5   Jens Axboe   writeback: check ...
553
  	set_bit(BDI_registered, &bdi->state);
c284de61d   Artem Bityutskiy   writeback: cleanu...
554
555
556
557
  
  	spin_lock_bh(&bdi_lock);
  	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
  	spin_unlock_bh(&bdi_lock);
455b28646   Dave Chinner   writeback: Initia...
558
  	trace_writeback_bdi_register(bdi);
c284de61d   Artem Bityutskiy   writeback: cleanu...
559
  	return 0;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
560
561
562
563
564
565
566
567
  }
  EXPORT_SYMBOL(bdi_register);
  
  int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
  {
  	return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev));
  }
  EXPORT_SYMBOL(bdi_register_dev);
03ba3782e   Jens Axboe   writeback: switch...
568
569
570
571
  /*
   * Remove bdi from the global list and shutdown any threads we have running
   */
  static void bdi_wb_shutdown(struct backing_dev_info *bdi)
66f3b8e2e   Jens Axboe   writeback: move d...
572
  {
03ba3782e   Jens Axboe   writeback: switch...
573
574
575
576
  	if (!bdi_cap_writeback_dirty(bdi))
  		return;
  
  	/*
fff5b85aa   Artem Bityutskiy   writeback: move b...
577
  	 * Make sure nobody finds us on the bdi_list anymore
03ba3782e   Jens Axboe   writeback: switch...
578
  	 */
fff5b85aa   Artem Bityutskiy   writeback: move b...
579
  	bdi_remove_from_list(bdi);
03ba3782e   Jens Axboe   writeback: switch...
580
581
  
  	/*
fff5b85aa   Artem Bityutskiy   writeback: move b...
582
  	 * If setup is pending, wait for that to complete first
03ba3782e   Jens Axboe   writeback: switch...
583
  	 */
fff5b85aa   Artem Bityutskiy   writeback: move b...
584
585
  	wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
  			TASK_UNINTERRUPTIBLE);
03ba3782e   Jens Axboe   writeback: switch...
586
587
  
  	/*
c1955ce32   Christoph Hellwig   writeback: remove...
588
  	 * Finally, kill the kernel thread. We don't need to be RCU
8a32c441c   Tejun Heo   freezer: implemen...
589
  	 * safe anymore, since the bdi is gone from visibility.
03ba3782e   Jens Axboe   writeback: switch...
590
  	 */
8a32c441c   Tejun Heo   freezer: implemen...
591
  	if (bdi->wb.task)
c1955ce32   Christoph Hellwig   writeback: remove...
592
  		kthread_stop(bdi->wb.task);
66f3b8e2e   Jens Axboe   writeback: move d...
593
  }
592b09a42   Jens Axboe   backing-dev: ensu...
594
595
596
597
598
599
600
601
602
603
  /*
   * This bdi is going away now, make sure that no super_blocks point to it
   */
  static void bdi_prune_sb(struct backing_dev_info *bdi)
  {
  	struct super_block *sb;
  
  	spin_lock(&sb_lock);
  	list_for_each_entry(sb, &super_blocks, s_list) {
  		if (sb->s_bdi == bdi)
95f28604a   Jens Axboe   fs: assign sb->s_...
604
  			sb->s_bdi = &default_backing_dev_info;
592b09a42   Jens Axboe   backing-dev: ensu...
605
606
607
  	}
  	spin_unlock(&sb_lock);
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
608
609
610
  void bdi_unregister(struct backing_dev_info *bdi)
  {
  	if (bdi->dev) {
ccb6108f5   Peter Zijlstra   mm/backing-dev.c:...
611
  		bdi_set_min_ratio(bdi, 0);
455b28646   Dave Chinner   writeback: Initia...
612
  		trace_writeback_bdi_unregister(bdi);
8c4db3355   Jens Axboe   backing-dev: bdi ...
613
  		bdi_prune_sb(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
614
  		del_timer_sync(&bdi->wb.wakeup_timer);
8c4db3355   Jens Axboe   backing-dev: bdi ...
615

03ba3782e   Jens Axboe   writeback: switch...
616
617
  		if (!bdi_cap_flush_forker(bdi))
  			bdi_wb_shutdown(bdi);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
618
  		bdi_debug_unregister(bdi);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
619
620
621
622
623
  		device_unregister(bdi->dev);
  		bdi->dev = NULL;
  	}
  }
  EXPORT_SYMBOL(bdi_unregister);
3fcfab16c   Andrew Morton   [PATCH] separate ...
624

6467716a3   Artem Bityutskiy   writeback: optimi...
625
626
627
628
629
630
631
632
633
  static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
  {
  	memset(wb, 0, sizeof(*wb));
  
  	wb->bdi = bdi;
  	wb->last_old_flush = jiffies;
  	INIT_LIST_HEAD(&wb->b_dirty);
  	INIT_LIST_HEAD(&wb->b_io);
  	INIT_LIST_HEAD(&wb->b_more_io);
f758eeabe   Christoph Hellwig   writeback: split ...
634
  	spin_lock_init(&wb->list_lock);
6467716a3   Artem Bityutskiy   writeback: optimi...
635
636
  	setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
  }
e98be2d59   Wu Fengguang   writeback: bdi wr...
637
638
639
640
  /*
   * Initial write bandwidth: 100 MB/s
   */
  #define INIT_BW		(100 << (20 - PAGE_SHIFT))
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
641
642
  int bdi_init(struct backing_dev_info *bdi)
  {
03ba3782e   Jens Axboe   writeback: switch...
643
  	int i, err;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
644

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
645
  	bdi->dev = NULL;
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
646
  	bdi->min_ratio = 0;
a42dde041   Peter Zijlstra   mm: bdi: allow se...
647
648
  	bdi->max_ratio = 100;
  	bdi->max_prop_frac = PROP_FRAC_BASE;
03ba3782e   Jens Axboe   writeback: switch...
649
  	spin_lock_init(&bdi->wb_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
650
  	INIT_LIST_HEAD(&bdi->bdi_list);
03ba3782e   Jens Axboe   writeback: switch...
651
652
653
  	INIT_LIST_HEAD(&bdi->work_list);
  
  	bdi_wb_init(&bdi->wb, bdi);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
654
  	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
ea319518b   Peter Zijlstra   locking, percpu c...
655
  		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
04fbfdc14   Peter Zijlstra   mm: per device di...
656
657
658
659
660
  		if (err)
  			goto err;
  	}
  
  	bdi->dirty_exceeded = 0;
e98be2d59   Wu Fengguang   writeback: bdi wr...
661
662
663
  
  	bdi->bw_time_stamp = jiffies;
  	bdi->written_stamp = 0;
7381131cb   Wu Fengguang   writeback: stabil...
664
  	bdi->balanced_dirty_ratelimit = INIT_BW;
be3ffa276   Wu Fengguang   writeback: dirty ...
665
  	bdi->dirty_ratelimit = INIT_BW;
e98be2d59   Wu Fengguang   writeback: bdi wr...
666
667
  	bdi->write_bandwidth = INIT_BW;
  	bdi->avg_write_bandwidth = INIT_BW;
04fbfdc14   Peter Zijlstra   mm: per device di...
668
669
670
671
  	err = prop_local_init_percpu(&bdi->completions);
  
  	if (err) {
  err:
4b01a0b16   Denis Cheng   mm/backing-dev.c:...
672
  		while (i--)
04fbfdc14   Peter Zijlstra   mm: per device di...
673
  			percpu_counter_destroy(&bdi->bdi_stat[i]);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
674
675
676
677
678
679
680
681
682
  	}
  
  	return err;
  }
  EXPORT_SYMBOL(bdi_init);
  
  void bdi_destroy(struct backing_dev_info *bdi)
  {
  	int i;
ce5f8e779   Jens Axboe   writeback: splice...
683
684
685
686
687
688
  	/*
  	 * Splice our entries to the default_backing_dev_info, if this
  	 * bdi disappears
  	 */
  	if (bdi_has_dirty_io(bdi)) {
  		struct bdi_writeback *dst = &default_backing_dev_info.wb;
f758eeabe   Christoph Hellwig   writeback: split ...
689
  		bdi_lock_two(&bdi->wb, dst);
ce5f8e779   Jens Axboe   writeback: splice...
690
691
692
  		list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
  		list_splice(&bdi->wb.b_io, &dst->b_io);
  		list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
f758eeabe   Christoph Hellwig   writeback: split ...
693
694
  		spin_unlock(&bdi->wb.list_lock);
  		spin_unlock(&dst->list_lock);
ce5f8e779   Jens Axboe   writeback: splice...
695
  	}
66f3b8e2e   Jens Axboe   writeback: move d...
696

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
697
  	bdi_unregister(bdi);
7a401a972   Rabin Vincent   backing-dev: ensu...
698
699
700
701
702
703
704
  	/*
  	 * If bdi_unregister() had already been called earlier, the
  	 * wakeup_timer could still be armed because bdi_prune_sb()
  	 * can race with the bdi_wakeup_thread_delayed() calls from
  	 * __mark_inode_dirty().
  	 */
  	del_timer_sync(&bdi->wb.wakeup_timer);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
705
706
  	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
  		percpu_counter_destroy(&bdi->bdi_stat[i]);
04fbfdc14   Peter Zijlstra   mm: per device di...
707
708
  
  	prop_local_destroy_percpu(&bdi->completions);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
709
710
  }
  EXPORT_SYMBOL(bdi_destroy);
c3c532061   Jens Axboe   bdi: add helper f...
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
  /*
   * For use from filesystems to quickly init and register a bdi associated
   * with dirty writeback
   */
  int bdi_setup_and_register(struct backing_dev_info *bdi, char *name,
  			   unsigned int cap)
  {
  	char tmp[32];
  	int err;
  
  	bdi->name = name;
  	bdi->capabilities = cap;
  	err = bdi_init(bdi);
  	if (err)
  		return err;
  
  	sprintf(tmp, "%.28s%s", name, "-%d");
  	err = bdi_register(bdi, NULL, tmp, atomic_long_inc_return(&bdi_seq));
  	if (err) {
  		bdi_destroy(bdi);
  		return err;
  	}
  
  	return 0;
  }
  EXPORT_SYMBOL(bdi_setup_and_register);
3fcfab16c   Andrew Morton   [PATCH] separate ...
737
738
739
740
  static wait_queue_head_t congestion_wqh[2] = {
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
  	};
0e093d997   Mel Gorman   writeback: do not...
741
  static atomic_t nr_bdi_congested[2];
3fcfab16c   Andrew Morton   [PATCH] separate ...
742

1faa16d22   Jens Axboe   block: change the...
743
  void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
744
745
  {
  	enum bdi_state bit;
1faa16d22   Jens Axboe   block: change the...
746
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
3fcfab16c   Andrew Morton   [PATCH] separate ...
747

1faa16d22   Jens Axboe   block: change the...
748
  	bit = sync ? BDI_sync_congested : BDI_async_congested;
0e093d997   Mel Gorman   writeback: do not...
749
750
  	if (test_and_clear_bit(bit, &bdi->state))
  		atomic_dec(&nr_bdi_congested[sync]);
3fcfab16c   Andrew Morton   [PATCH] separate ...
751
752
753
754
755
  	smp_mb__after_clear_bit();
  	if (waitqueue_active(wqh))
  		wake_up(wqh);
  }
  EXPORT_SYMBOL(clear_bdi_congested);
1faa16d22   Jens Axboe   block: change the...
756
  void set_bdi_congested(struct backing_dev_info *bdi, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
757
758
  {
  	enum bdi_state bit;
1faa16d22   Jens Axboe   block: change the...
759
  	bit = sync ? BDI_sync_congested : BDI_async_congested;
0e093d997   Mel Gorman   writeback: do not...
760
761
  	if (!test_and_set_bit(bit, &bdi->state))
  		atomic_inc(&nr_bdi_congested[sync]);
3fcfab16c   Andrew Morton   [PATCH] separate ...
762
763
764
765
766
  }
  EXPORT_SYMBOL(set_bdi_congested);
  
  /**
   * congestion_wait - wait for a backing_dev to become uncongested
8aa7e847d   Jens Axboe   Fix congestion_wa...
767
   * @sync: SYNC or ASYNC IO
3fcfab16c   Andrew Morton   [PATCH] separate ...
768
769
770
771
772
773
   * @timeout: timeout in jiffies
   *
   * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
   * write congestion.  If no backing_devs are congested then just wait for the
   * next write to be completed.
   */
8aa7e847d   Jens Axboe   Fix congestion_wa...
774
  long congestion_wait(int sync, long timeout)
3fcfab16c   Andrew Morton   [PATCH] separate ...
775
776
  {
  	long ret;
52bb91986   Mel Gorman   writeback: accoun...
777
  	unsigned long start = jiffies;
3fcfab16c   Andrew Morton   [PATCH] separate ...
778
  	DEFINE_WAIT(wait);
8aa7e847d   Jens Axboe   Fix congestion_wa...
779
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
3fcfab16c   Andrew Morton   [PATCH] separate ...
780
781
782
783
  
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
52bb91986   Mel Gorman   writeback: accoun...
784
785
786
  
  	trace_writeback_congestion_wait(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
3fcfab16c   Andrew Morton   [PATCH] separate ...
787
788
789
  	return ret;
  }
  EXPORT_SYMBOL(congestion_wait);
04fbfdc14   Peter Zijlstra   mm: per device di...
790

0e093d997   Mel Gorman   writeback: do not...
791
792
793
794
795
796
797
798
799
800
801
  /**
   * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a zone to complete writes
   * @zone: A zone to check if it is heavily congested
   * @sync: SYNC or ASYNC IO
   * @timeout: timeout in jiffies
   *
   * In the event of a congested backing_dev (any backing_dev) and the given
   * @zone has experienced recent congestion, this waits for up to @timeout
   * jiffies for either a BDI to exit congestion of the given @sync queue
   * or a write to complete.
   *
25985edce   Lucas De Marchi   Fix common misspe...
802
   * In the absence of zone congestion, cond_resched() is called to yield
0e093d997   Mel Gorman   writeback: do not...
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
   * the processor if necessary but otherwise does not sleep.
   *
   * The return value is 0 if the sleep is for the full timeout. Otherwise,
   * it is the number of jiffies that were still remaining when the function
   * returned. return_value == timeout implies the function did not sleep.
   */
  long wait_iff_congested(struct zone *zone, int sync, long timeout)
  {
  	long ret;
  	unsigned long start = jiffies;
  	DEFINE_WAIT(wait);
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
  
  	/*
  	 * If there is no congestion, or heavy congestion is not being
  	 * encountered in the current zone, yield if necessary instead
  	 * of sleeping on the congestion queue
  	 */
  	if (atomic_read(&nr_bdi_congested[sync]) == 0 ||
  			!zone_is_reclaim_congested(zone)) {
  		cond_resched();
  
  		/* In case we scheduled, work out time remaining */
  		ret = timeout - (jiffies - start);
  		if (ret < 0)
  			ret = 0;
  
  		goto out;
  	}
  
  	/* Sleep until uncongested or a write happens */
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
  
  out:
  	trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
  
  	return ret;
  }
  EXPORT_SYMBOL(wait_iff_congested);