Blame view

mm/backing-dev.c 21.8 KB
3fcfab16c   Andrew Morton   [PATCH] separate ...
1
2
3
  
  #include <linux/wait.h>
  #include <linux/backing-dev.h>
03ba3782e   Jens Axboe   writeback: switch...
4
5
  #include <linux/kthread.h>
  #include <linux/freezer.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
6
  #include <linux/fs.h>
26160158d   Jens Axboe   Move the default_...
7
  #include <linux/pagemap.h>
03ba3782e   Jens Axboe   writeback: switch...
8
  #include <linux/mm.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
9
10
  #include <linux/sched.h>
  #include <linux/module.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
11
12
  #include <linux/writeback.h>
  #include <linux/device.h>
455b28646   Dave Chinner   writeback: Initia...
13
  #include <trace/events/writeback.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
14

c3c532061   Jens Axboe   bdi: add helper f...
15
  static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
26160158d   Jens Axboe   Move the default_...
16
  struct backing_dev_info default_backing_dev_info = {
d993831fa   Jens Axboe   writeback: add na...
17
  	.name		= "default",
26160158d   Jens Axboe   Move the default_...
18
19
20
  	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
  	.state		= 0,
  	.capabilities	= BDI_CAP_MAP_COPY,
26160158d   Jens Axboe   Move the default_...
21
22
  };
  EXPORT_SYMBOL_GPL(default_backing_dev_info);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
23

5129a469a   Jörn Engel   Catch filesystems...
24
25
  struct backing_dev_info noop_backing_dev_info = {
  	.name		= "noop",
976e48f8a   Jan Kara   bdi: Initialize n...
26
  	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
5129a469a   Jörn Engel   Catch filesystems...
27
28
  };
  EXPORT_SYMBOL_GPL(noop_backing_dev_info);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
29
  static struct class *bdi_class;
cfc4ba536   Jens Axboe   writeback: use RC...
30
31
32
33
34
35
  
  /*
   * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as
   * reader side protection for bdi_pending_list. bdi_list has RCU reader side
   * locking.
   */
03ba3782e   Jens Axboe   writeback: switch...
36
  DEFINE_SPINLOCK(bdi_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
37
  LIST_HEAD(bdi_list);
03ba3782e   Jens Axboe   writeback: switch...
38
39
40
41
42
43
44
  LIST_HEAD(bdi_pending_list);
  
  static struct task_struct *sync_supers_tsk;
  static struct timer_list sync_supers_timer;
  
  static int bdi_sync_supers(void *);
  static void sync_supers_timer_fn(unsigned long);
03ba3782e   Jens Axboe   writeback: switch...
45

f758eeabe   Christoph Hellwig   writeback: split ...
46
47
48
49
50
51
52
53
54
55
  void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
  {
  	if (wb1 < wb2) {
  		spin_lock(&wb1->list_lock);
  		spin_lock_nested(&wb2->list_lock, 1);
  	} else {
  		spin_lock(&wb2->list_lock);
  		spin_lock_nested(&wb1->list_lock, 1);
  	}
  }
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
56
57
58
59
60
61
62
63
64
65
66
67
68
69
  #ifdef CONFIG_DEBUG_FS
  #include <linux/debugfs.h>
  #include <linux/seq_file.h>
  
  static struct dentry *bdi_debug_root;
  
  static void bdi_debug_init(void)
  {
  	bdi_debug_root = debugfs_create_dir("bdi", NULL);
  }
  
  static int bdi_debug_stats_show(struct seq_file *m, void *v)
  {
  	struct backing_dev_info *bdi = m->private;
c1955ce32   Christoph Hellwig   writeback: remove...
70
  	struct bdi_writeback *wb = &bdi->wb;
364aeb284   David Rientjes   mm: change dirty ...
71
72
73
  	unsigned long background_thresh;
  	unsigned long dirty_thresh;
  	unsigned long bdi_thresh;
345227d70   Gustavo F. Padovan   backing-dev: Kill...
74
  	unsigned long nr_dirty, nr_io, nr_more_io;
f09b00d3e   Jens Axboe   writeback: add so...
75
  	struct inode *inode;
345227d70   Gustavo F. Padovan   backing-dev: Kill...
76
  	nr_dirty = nr_io = nr_more_io = 0;
f758eeabe   Christoph Hellwig   writeback: split ...
77
  	spin_lock(&wb->list_lock);
7ccf19a80   Nick Piggin   fs: inode split I...
78
  	list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
c1955ce32   Christoph Hellwig   writeback: remove...
79
  		nr_dirty++;
7ccf19a80   Nick Piggin   fs: inode split I...
80
  	list_for_each_entry(inode, &wb->b_io, i_wb_list)
c1955ce32   Christoph Hellwig   writeback: remove...
81
  		nr_io++;
7ccf19a80   Nick Piggin   fs: inode split I...
82
  	list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
c1955ce32   Christoph Hellwig   writeback: remove...
83
  		nr_more_io++;
f758eeabe   Christoph Hellwig   writeback: split ...
84
  	spin_unlock(&wb->list_lock);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
85

16c4042f0   Wu Fengguang   writeback: avoid ...
86
87
  	global_dirty_limits(&background_thresh, &dirty_thresh);
  	bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
88
89
90
  
  #define K(x) ((x) << (PAGE_SHIFT - 10))
  	seq_printf(m,
00821b002   Wu Fengguang   writeback: show b...
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
  		   "BdiWriteback:       %10lu kB
  "
  		   "BdiReclaimable:     %10lu kB
  "
  		   "BdiDirtyThresh:     %10lu kB
  "
  		   "DirtyThresh:        %10lu kB
  "
  		   "BackgroundThresh:   %10lu kB
  "
  		   "BdiWritten:         %10lu kB
  "
  		   "BdiWriteBandwidth:  %10lu kBps
  "
  		   "b_dirty:            %10lu
  "
  		   "b_io:               %10lu
  "
  		   "b_more_io:          %10lu
  "
  		   "bdi_list:           %10u
  "
  		   "state:              %10lx
  ",
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
115
116
  		   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
  		   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
f7d2b1ecd   Jan Kara   writeback: accoun...
117
118
119
120
  		   K(bdi_thresh),
  		   K(dirty_thresh),
  		   K(background_thresh),
  		   (unsigned long) K(bdi_stat(bdi, BDI_WRITTEN)),
00821b002   Wu Fengguang   writeback: show b...
121
  		   (unsigned long) K(bdi->write_bandwidth),
f7d2b1ecd   Jan Kara   writeback: accoun...
122
123
124
  		   nr_dirty,
  		   nr_io,
  		   nr_more_io,
c1955ce32   Christoph Hellwig   writeback: remove...
125
  		   !list_empty(&bdi->bdi_list), bdi->state);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
  #undef K
  
  	return 0;
  }
  
  static int bdi_debug_stats_open(struct inode *inode, struct file *file)
  {
  	return single_open(file, bdi_debug_stats_show, inode->i_private);
  }
  
  static const struct file_operations bdi_debug_stats_fops = {
  	.open		= bdi_debug_stats_open,
  	.read		= seq_read,
  	.llseek		= seq_lseek,
  	.release	= single_release,
  };
  
  static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
  {
  	bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
  	bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir,
  					       bdi, &bdi_debug_stats_fops);
  }
  
  static void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  	debugfs_remove(bdi->debug_stats);
  	debugfs_remove(bdi->debug_dir);
  }
  #else
  static inline void bdi_debug_init(void)
  {
  }
  static inline void bdi_debug_register(struct backing_dev_info *bdi,
  				      const char *name)
  {
  }
  static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  }
  #endif
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
  static ssize_t read_ahead_kb_store(struct device *dev,
  				  struct device_attribute *attr,
  				  const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
  	char *end;
  	unsigned long read_ahead_kb;
  	ssize_t ret = -EINVAL;
  
  	read_ahead_kb = simple_strtoul(buf, &end, 10);
  	if (*buf && (end[0] == '\0' || (end[0] == '
  ' && end[1] == '\0'))) {
  		bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10);
  		ret = count;
  	}
  	return ret;
  }
  
  #define K(pages) ((pages) << (PAGE_SHIFT - 10))
  
  #define BDI_SHOW(name, expr)						\
  static ssize_t name##_show(struct device *dev,				\
  			   struct device_attribute *attr, char *page)	\
  {									\
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);		\
  									\
  	return snprintf(page, PAGE_SIZE-1, "%lld
  ", (long long)expr);	\
  }
  
  BDI_SHOW(read_ahead_kb, K(bdi->ra_pages))
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
  static ssize_t min_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
  	char *end;
  	unsigned int ratio;
  	ssize_t ret = -EINVAL;
  
  	ratio = simple_strtoul(buf, &end, 10);
  	if (*buf && (end[0] == '\0' || (end[0] == '
  ' && end[1] == '\0'))) {
  		ret = bdi_set_min_ratio(bdi, ratio);
  		if (!ret)
  			ret = count;
  	}
  	return ret;
  }
  BDI_SHOW(min_ratio, bdi->min_ratio)
a42dde041   Peter Zijlstra   mm: bdi: allow se...
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
  static ssize_t max_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
  	char *end;
  	unsigned int ratio;
  	ssize_t ret = -EINVAL;
  
  	ratio = simple_strtoul(buf, &end, 10);
  	if (*buf && (end[0] == '\0' || (end[0] == '
  ' && end[1] == '\0'))) {
  		ret = bdi_set_max_ratio(bdi, ratio);
  		if (!ret)
  			ret = count;
  	}
  	return ret;
  }
  BDI_SHOW(max_ratio, bdi->max_ratio)
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
234
235
236
237
  #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
  
  static struct device_attribute bdi_dev_attrs[] = {
  	__ATTR_RW(read_ahead_kb),
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
238
  	__ATTR_RW(min_ratio),
a42dde041   Peter Zijlstra   mm: bdi: allow se...
239
  	__ATTR_RW(max_ratio),
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
240
241
242
243
244
245
  	__ATTR_NULL,
  };
  
  static __init int bdi_class_init(void)
  {
  	bdi_class = class_create(THIS_MODULE, "bdi");
144214537   Anton Blanchard   backing-dev: Hand...
246
247
  	if (IS_ERR(bdi_class))
  		return PTR_ERR(bdi_class);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
248
  	bdi_class->dev_attrs = bdi_dev_attrs;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
249
  	bdi_debug_init();
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
250
251
  	return 0;
  }
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
252
  postcore_initcall(bdi_class_init);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
253

26160158d   Jens Axboe   Move the default_...
254
255
256
  static int __init default_bdi_init(void)
  {
  	int err;
03ba3782e   Jens Axboe   writeback: switch...
257
258
  	sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers");
  	BUG_ON(IS_ERR(sync_supers_tsk));
03ba3782e   Jens Axboe   writeback: switch...
259
  	setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0);
6423104b6   Jens Axboe   writeback: fixups...
260
  	bdi_arm_supers_timer();
03ba3782e   Jens Axboe   writeback: switch...
261

26160158d   Jens Axboe   Move the default_...
262
263
264
  	err = bdi_init(&default_backing_dev_info);
  	if (!err)
  		bdi_register(&default_backing_dev_info, NULL, "default");
976e48f8a   Jan Kara   bdi: Initialize n...
265
  	err = bdi_init(&noop_backing_dev_info);
26160158d   Jens Axboe   Move the default_...
266
267
268
269
  
  	return err;
  }
  subsys_initcall(default_bdi_init);
03ba3782e   Jens Axboe   writeback: switch...
270
271
272
273
  int bdi_has_dirty_io(struct backing_dev_info *bdi)
  {
  	return wb_has_dirty_io(&bdi->wb);
  }
03ba3782e   Jens Axboe   writeback: switch...
274
  /*
6f904ff0e   Artem Bityutskiy   writeback: harmon...
275
   * kupdated() used to do this. We cannot do it from the bdi_forker_thread()
03ba3782e   Jens Axboe   writeback: switch...
276
277
   * or we risk deadlocking on ->s_umount. The longer term solution would be
   * to implement sync_supers_bdi() or similar and simply do it from the
6f904ff0e   Artem Bityutskiy   writeback: harmon...
278
   * bdi writeback thread individually.
03ba3782e   Jens Axboe   writeback: switch...
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
   */
  static int bdi_sync_supers(void *unused)
  {
  	set_user_nice(current, 0);
  
  	while (!kthread_should_stop()) {
  		set_current_state(TASK_INTERRUPTIBLE);
  		schedule();
  
  		/*
  		 * Do this periodically, like kupdated() did before.
  		 */
  		sync_supers();
  	}
  
  	return 0;
  }
6423104b6   Jens Axboe   writeback: fixups...
296
  void bdi_arm_supers_timer(void)
03ba3782e   Jens Axboe   writeback: switch...
297
298
  {
  	unsigned long next;
6423104b6   Jens Axboe   writeback: fixups...
299
300
  	if (!dirty_writeback_interval)
  		return;
03ba3782e   Jens Axboe   writeback: switch...
301
302
303
304
305
306
307
  	next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies;
  	mod_timer(&sync_supers_timer, round_jiffies_up(next));
  }
  
  static void sync_supers_timer_fn(unsigned long unused)
  {
  	wake_up_process(sync_supers_tsk);
6423104b6   Jens Axboe   writeback: fixups...
308
  	bdi_arm_supers_timer();
03ba3782e   Jens Axboe   writeback: switch...
309
  }
6467716a3   Artem Bityutskiy   writeback: optimi...
310
311
312
313
314
315
  static void wakeup_timer_fn(unsigned long data)
  {
  	struct backing_dev_info *bdi = (struct backing_dev_info *)data;
  
  	spin_lock_bh(&bdi->wb_lock);
  	if (bdi->wb.task) {
603320239   Artem Bityutskiy   writeback: add ne...
316
  		trace_writeback_wake_thread(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
317
318
319
320
321
322
323
  		wake_up_process(bdi->wb.task);
  	} else {
  		/*
  		 * When bdi tasks are inactive for long time, they are killed.
  		 * In this case we have to wake-up the forker thread which
  		 * should create and run the bdi thread.
  		 */
603320239   Artem Bityutskiy   writeback: add ne...
324
  		trace_writeback_wake_forker_thread(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
  		wake_up_process(default_backing_dev_info.wb.task);
  	}
  	spin_unlock_bh(&bdi->wb_lock);
  }
  
  /*
   * This function is used when the first inode for this bdi is marked dirty. It
   * wakes-up the corresponding bdi thread which should then take care of the
   * periodic background write-out of dirty inodes. Since the write-out would
   * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
   * set up a timer which wakes the bdi thread up later.
   *
   * Note, we wouldn't bother setting up the timer, but this function is on the
   * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
   * by delaying the wake-up.
   */
  void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
  {
  	unsigned long timeout;
  
  	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
  	mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout);
  }
fff5b85aa   Artem Bityutskiy   writeback: move b...
348
349
350
351
352
353
354
355
356
357
358
  /*
   * Calculate the longest interval (jiffies) bdi threads are allowed to be
   * inactive.
   */
  static unsigned long bdi_longest_inactive(void)
  {
  	unsigned long interval;
  
  	interval = msecs_to_jiffies(dirty_writeback_interval * 10);
  	return max(5UL * 60 * HZ, interval);
  }
5a042aa4b   Jan Kara   mm: Cleanup clear...
359
360
361
362
363
364
365
366
367
368
  /*
   * Clear pending bit and wakeup anybody waiting for flusher thread creation or
   * shutdown
   */
  static void bdi_clear_pending(struct backing_dev_info *bdi)
  {
  	clear_bit(BDI_pending, &bdi->state);
  	smp_mb__after_clear_bit();
  	wake_up_bit(&bdi->state, BDI_pending);
  }
6f904ff0e   Artem Bityutskiy   writeback: harmon...
369
  static int bdi_forker_thread(void *ptr)
03ba3782e   Jens Axboe   writeback: switch...
370
371
  {
  	struct bdi_writeback *me = ptr;
766f91641   Peter Zijlstra   kernel: remove PF...
372
  	current->flags |= PF_SWAPWRITE;
c1955ce32   Christoph Hellwig   writeback: remove...
373
374
375
376
377
378
  	set_freezable();
  
  	/*
  	 * Our parent may run at a different priority, just set us to normal
  	 */
  	set_user_nice(current, 0);
03ba3782e   Jens Axboe   writeback: switch...
379
380
  
  	for (;;) {
fff5b85aa   Artem Bityutskiy   writeback: move b...
381
  		struct task_struct *task = NULL;
78c40cb65   Artem Bityutskiy   writeback: do not...
382
  		struct backing_dev_info *bdi;
adf392407   Artem Bityutskiy   writeback: restru...
383
384
385
  		enum {
  			NO_ACTION,   /* Nothing to do */
  			FORK_THREAD, /* Fork bdi thread */
fff5b85aa   Artem Bityutskiy   writeback: move b...
386
  			KILL_THREAD, /* Kill inactive bdi thread */
adf392407   Artem Bityutskiy   writeback: restru...
387
  		} action = NO_ACTION;
03ba3782e   Jens Axboe   writeback: switch...
388
389
390
391
392
  
  		/*
  		 * Temporary measure, we want to make sure we don't see
  		 * dirty data on the default backing_dev_info
  		 */
6467716a3   Artem Bityutskiy   writeback: optimi...
393
394
  		if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) {
  			del_timer(&me->wakeup_timer);
03ba3782e   Jens Axboe   writeback: switch...
395
  			wb_do_writeback(me, 0);
6467716a3   Artem Bityutskiy   writeback: optimi...
396
  		}
03ba3782e   Jens Axboe   writeback: switch...
397

cfc4ba536   Jens Axboe   writeback: use RC...
398
  		spin_lock_bh(&bdi_lock);
09f40f98b   Jan Kara   mm: Add comment e...
399
400
401
402
403
404
405
  		/*
  		 * In the following loop we are going to check whether we have
  		 * some work to do without any synchronization with tasks
  		 * waking us up to do work for them. So we have to set task
  		 * state already here so that we don't miss wakeups coming
  		 * after we verify some condition.
  		 */
c5f7ad233   Artem Bityutskiy   writeback: do not...
406
  		set_current_state(TASK_INTERRUPTIBLE);
03ba3782e   Jens Axboe   writeback: switch...
407

78c40cb65   Artem Bityutskiy   writeback: do not...
408
  		list_for_each_entry(bdi, &bdi_list, bdi_list) {
adf392407   Artem Bityutskiy   writeback: restru...
409
410
411
412
  			bool have_dirty_io;
  
  			if (!bdi_cap_writeback_dirty(bdi) ||
  			     bdi_cap_flush_forker(bdi))
03ba3782e   Jens Axboe   writeback: switch...
413
  				continue;
080dcec41   Artem Bityutskiy   writeback: simpli...
414
415
416
  			WARN(!test_bit(BDI_registered, &bdi->state),
  			     "bdi %p/%s is not registered!
  ", bdi, bdi->name);
adf392407   Artem Bityutskiy   writeback: restru...
417
418
  			have_dirty_io = !list_empty(&bdi->work_list) ||
  					wb_has_dirty_io(&bdi->wb);
78c40cb65   Artem Bityutskiy   writeback: do not...
419
420
  
  			/*
adf392407   Artem Bityutskiy   writeback: restru...
421
422
  			 * If the bdi has work to do, but the thread does not
  			 * exist - create it.
78c40cb65   Artem Bityutskiy   writeback: do not...
423
  			 */
adf392407   Artem Bityutskiy   writeback: restru...
424
425
426
427
428
429
430
431
432
  			if (!bdi->wb.task && have_dirty_io) {
  				/*
  				 * Set the pending bit - if someone will try to
  				 * unregister this bdi - it'll wait on this bit.
  				 */
  				set_bit(BDI_pending, &bdi->state);
  				action = FORK_THREAD;
  				break;
  			}
fff5b85aa   Artem Bityutskiy   writeback: move b...
433

6bf05d03e   Jens Axboe   writeback: fix ba...
434
  			spin_lock(&bdi->wb_lock);
fff5b85aa   Artem Bityutskiy   writeback: move b...
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
  			/*
  			 * If there is no work to do and the bdi thread was
  			 * inactive long enough - kill it. The wb_lock is taken
  			 * to make sure no-one adds more work to this bdi and
  			 * wakes the bdi thread up.
  			 */
  			if (bdi->wb.task && !have_dirty_io &&
  			    time_after(jiffies, bdi->wb.last_active +
  						bdi_longest_inactive())) {
  				task = bdi->wb.task;
  				bdi->wb.task = NULL;
  				spin_unlock(&bdi->wb_lock);
  				set_bit(BDI_pending, &bdi->state);
  				action = KILL_THREAD;
  				break;
  			}
6bf05d03e   Jens Axboe   writeback: fix ba...
451
  			spin_unlock(&bdi->wb_lock);
03ba3782e   Jens Axboe   writeback: switch...
452
  		}
080dcec41   Artem Bityutskiy   writeback: simpli...
453
  		spin_unlock_bh(&bdi_lock);
03ba3782e   Jens Axboe   writeback: switch...
454

c4ec7908c   Artem Bityutskiy   writeback: do not...
455
456
457
  		/* Keep working if default bdi still has things to do */
  		if (!list_empty(&me->bdi->work_list))
  			__set_current_state(TASK_RUNNING);
adf392407   Artem Bityutskiy   writeback: restru...
458
459
460
  		switch (action) {
  		case FORK_THREAD:
  			__set_current_state(TASK_RUNNING);
6628bc74f   Artem Bityutskiy   writeback: do not...
461
462
  			task = kthread_create(bdi_writeback_thread, &bdi->wb,
  					      "flush-%s", dev_name(bdi->dev));
adf392407   Artem Bityutskiy   writeback: restru...
463
464
465
  			if (IS_ERR(task)) {
  				/*
  				 * If thread creation fails, force writeout of
d46db3d58   Wu Fengguang   writeback: make w...
466
467
  				 * the bdi from the thread. Hopefully 1024 is
  				 * large enough for efficient IO.
adf392407   Artem Bityutskiy   writeback: restru...
468
  				 */
d46db3d58   Wu Fengguang   writeback: make w...
469
  				writeback_inodes_wb(&bdi->wb, 1024);
fff5b85aa   Artem Bityutskiy   writeback: move b...
470
471
472
473
  			} else {
  				/*
  				 * The spinlock makes sure we do not lose
  				 * wake-ups when racing with 'bdi_queue_work()'.
6628bc74f   Artem Bityutskiy   writeback: do not...
474
475
  				 * And as soon as the bdi thread is visible, we
  				 * can start it.
fff5b85aa   Artem Bityutskiy   writeback: move b...
476
  				 */
6467716a3   Artem Bityutskiy   writeback: optimi...
477
  				spin_lock_bh(&bdi->wb_lock);
adf392407   Artem Bityutskiy   writeback: restru...
478
  				bdi->wb.task = task;
6467716a3   Artem Bityutskiy   writeback: optimi...
479
  				spin_unlock_bh(&bdi->wb_lock);
6628bc74f   Artem Bityutskiy   writeback: do not...
480
  				wake_up_process(task);
fff5b85aa   Artem Bityutskiy   writeback: move b...
481
  			}
5a042aa4b   Jan Kara   mm: Cleanup clear...
482
  			bdi_clear_pending(bdi);
fff5b85aa   Artem Bityutskiy   writeback: move b...
483
484
485
486
487
  			break;
  
  		case KILL_THREAD:
  			__set_current_state(TASK_RUNNING);
  			kthread_stop(task);
5a042aa4b   Jan Kara   mm: Cleanup clear...
488
  			bdi_clear_pending(bdi);
adf392407   Artem Bityutskiy   writeback: restru...
489
  			break;
03ba3782e   Jens Axboe   writeback: switch...
490

adf392407   Artem Bityutskiy   writeback: restru...
491
  		case NO_ACTION:
253c34e9b   Artem Bityutskiy   writeback: preven...
492
493
494
495
496
497
498
499
500
  			if (!wb_has_dirty_io(me) || !dirty_writeback_interval)
  				/*
  				 * There are no dirty data. The only thing we
  				 * should now care about is checking for
  				 * inactive bdi threads and killing them. Thus,
  				 * let's sleep for longer time, save energy and
  				 * be friendly for battery-driven devices.
  				 */
  				schedule_timeout(bdi_longest_inactive());
6423104b6   Jens Axboe   writeback: fixups...
501
  			else
253c34e9b   Artem Bityutskiy   writeback: preven...
502
  				schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
03ba3782e   Jens Axboe   writeback: switch...
503
  			try_to_freeze();
5a042aa4b   Jan Kara   mm: Cleanup clear...
504
  			break;
03ba3782e   Jens Axboe   writeback: switch...
505
  		}
03ba3782e   Jens Axboe   writeback: switch...
506
507
508
509
  	}
  
  	return 0;
  }
cfc4ba536   Jens Axboe   writeback: use RC...
510
511
512
513
514
515
516
517
  /*
   * Remove bdi from bdi_list, and ensure that it is no longer visible
   */
  static void bdi_remove_from_list(struct backing_dev_info *bdi)
  {
  	spin_lock_bh(&bdi_lock);
  	list_del_rcu(&bdi->bdi_list);
  	spin_unlock_bh(&bdi_lock);
ef3230880   Mikulas Patocka   backing-dev: use ...
518
  	synchronize_rcu_expedited();
cfc4ba536   Jens Axboe   writeback: use RC...
519
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
520
521
522
  int bdi_register(struct backing_dev_info *bdi, struct device *parent,
  		const char *fmt, ...)
  {
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
523
  	va_list args;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
524
  	struct device *dev;
69fc208be   Andrew Morton   mm/backing-dev.c:...
525
  	if (bdi->dev)	/* The driver needs to use separate queues per device */
c284de61d   Artem Bityutskiy   writeback: cleanu...
526
  		return 0;
f1d0b063d   Kay Sievers   bdi: register sys...
527

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
528
  	va_start(args, fmt);
19051c503   Greg Kroah-Hartman   mm: bdi: fix race...
529
  	dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
530
  	va_end(args);
c284de61d   Artem Bityutskiy   writeback: cleanu...
531
532
  	if (IS_ERR(dev))
  		return PTR_ERR(dev);
66f3b8e2e   Jens Axboe   writeback: move d...
533

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
534
  	bdi->dev = dev;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
535

03ba3782e   Jens Axboe   writeback: switch...
536
537
538
539
540
541
542
  	/*
  	 * Just start the forker thread for our default backing_dev_info,
  	 * and add other bdi's to the list. They will get a thread created
  	 * on-demand when they need it.
  	 */
  	if (bdi_cap_flush_forker(bdi)) {
  		struct bdi_writeback *wb = &bdi->wb;
6f904ff0e   Artem Bityutskiy   writeback: harmon...
543
  		wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s",
03ba3782e   Jens Axboe   writeback: switch...
544
  						dev_name(dev));
c284de61d   Artem Bityutskiy   writeback: cleanu...
545
546
  		if (IS_ERR(wb->task))
  			return PTR_ERR(wb->task);
03ba3782e   Jens Axboe   writeback: switch...
547
548
549
  	}
  
  	bdi_debug_register(bdi, dev_name(dev));
500b067c5   Jens Axboe   writeback: check ...
550
  	set_bit(BDI_registered, &bdi->state);
c284de61d   Artem Bityutskiy   writeback: cleanu...
551
552
553
554
  
  	spin_lock_bh(&bdi_lock);
  	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
  	spin_unlock_bh(&bdi_lock);
455b28646   Dave Chinner   writeback: Initia...
555
  	trace_writeback_bdi_register(bdi);
c284de61d   Artem Bityutskiy   writeback: cleanu...
556
  	return 0;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
557
558
559
560
561
562
563
564
  }
  EXPORT_SYMBOL(bdi_register);
  
  int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
  {
  	return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev));
  }
  EXPORT_SYMBOL(bdi_register_dev);
03ba3782e   Jens Axboe   writeback: switch...
565
566
567
568
  /*
   * Remove bdi from the global list and shutdown any threads we have running
   */
  static void bdi_wb_shutdown(struct backing_dev_info *bdi)
66f3b8e2e   Jens Axboe   writeback: move d...
569
  {
03ba3782e   Jens Axboe   writeback: switch...
570
571
572
573
  	if (!bdi_cap_writeback_dirty(bdi))
  		return;
  
  	/*
fff5b85aa   Artem Bityutskiy   writeback: move b...
574
  	 * Make sure nobody finds us on the bdi_list anymore
03ba3782e   Jens Axboe   writeback: switch...
575
  	 */
fff5b85aa   Artem Bityutskiy   writeback: move b...
576
  	bdi_remove_from_list(bdi);
03ba3782e   Jens Axboe   writeback: switch...
577
578
  
  	/*
fff5b85aa   Artem Bityutskiy   writeback: move b...
579
  	 * If setup is pending, wait for that to complete first
03ba3782e   Jens Axboe   writeback: switch...
580
  	 */
fff5b85aa   Artem Bityutskiy   writeback: move b...
581
582
  	wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
  			TASK_UNINTERRUPTIBLE);
03ba3782e   Jens Axboe   writeback: switch...
583
584
  
  	/*
c1955ce32   Christoph Hellwig   writeback: remove...
585
  	 * Finally, kill the kernel thread. We don't need to be RCU
c62b17a58   Romit Dasgupta   Thaw refrigerated...
586
587
588
  	 * safe anymore, since the bdi is gone from visibility. Force
  	 * unfreeze of the thread before calling kthread_stop(), otherwise
  	 * it would never exet if it is currently stuck in the refrigerator.
03ba3782e   Jens Axboe   writeback: switch...
589
  	 */
c1955ce32   Christoph Hellwig   writeback: remove...
590
591
592
  	if (bdi->wb.task) {
  		thaw_process(bdi->wb.task);
  		kthread_stop(bdi->wb.task);
c62b17a58   Romit Dasgupta   Thaw refrigerated...
593
  	}
66f3b8e2e   Jens Axboe   writeback: move d...
594
  }
592b09a42   Jens Axboe   backing-dev: ensu...
595
596
597
598
599
600
601
602
603
604
  /*
   * This bdi is going away now, make sure that no super_blocks point to it
   */
  static void bdi_prune_sb(struct backing_dev_info *bdi)
  {
  	struct super_block *sb;
  
  	spin_lock(&sb_lock);
  	list_for_each_entry(sb, &super_blocks, s_list) {
  		if (sb->s_bdi == bdi)
95f28604a   Jens Axboe   fs: assign sb->s_...
605
  			sb->s_bdi = &default_backing_dev_info;
592b09a42   Jens Axboe   backing-dev: ensu...
606
607
608
  	}
  	spin_unlock(&sb_lock);
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
609
610
611
  void bdi_unregister(struct backing_dev_info *bdi)
  {
  	if (bdi->dev) {
ccb6108f5   Peter Zijlstra   mm/backing-dev.c:...
612
  		bdi_set_min_ratio(bdi, 0);
455b28646   Dave Chinner   writeback: Initia...
613
  		trace_writeback_bdi_unregister(bdi);
8c4db3355   Jens Axboe   backing-dev: bdi ...
614
  		bdi_prune_sb(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
615
  		del_timer_sync(&bdi->wb.wakeup_timer);
8c4db3355   Jens Axboe   backing-dev: bdi ...
616

03ba3782e   Jens Axboe   writeback: switch...
617
618
  		if (!bdi_cap_flush_forker(bdi))
  			bdi_wb_shutdown(bdi);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
619
  		bdi_debug_unregister(bdi);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
620
621
622
623
624
  		device_unregister(bdi->dev);
  		bdi->dev = NULL;
  	}
  }
  EXPORT_SYMBOL(bdi_unregister);
3fcfab16c   Andrew Morton   [PATCH] separate ...
625

6467716a3   Artem Bityutskiy   writeback: optimi...
626
627
628
629
630
631
632
633
634
  static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
  {
  	memset(wb, 0, sizeof(*wb));
  
  	wb->bdi = bdi;
  	wb->last_old_flush = jiffies;
  	INIT_LIST_HEAD(&wb->b_dirty);
  	INIT_LIST_HEAD(&wb->b_io);
  	INIT_LIST_HEAD(&wb->b_more_io);
f758eeabe   Christoph Hellwig   writeback: split ...
635
  	spin_lock_init(&wb->list_lock);
6467716a3   Artem Bityutskiy   writeback: optimi...
636
637
  	setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
  }
e98be2d59   Wu Fengguang   writeback: bdi wr...
638
639
640
641
  /*
   * Initial write bandwidth: 100 MB/s
   */
  #define INIT_BW		(100 << (20 - PAGE_SHIFT))
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
642
643
  int bdi_init(struct backing_dev_info *bdi)
  {
03ba3782e   Jens Axboe   writeback: switch...
644
  	int i, err;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
645

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
646
  	bdi->dev = NULL;
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
647
  	bdi->min_ratio = 0;
a42dde041   Peter Zijlstra   mm: bdi: allow se...
648
649
  	bdi->max_ratio = 100;
  	bdi->max_prop_frac = PROP_FRAC_BASE;
03ba3782e   Jens Axboe   writeback: switch...
650
  	spin_lock_init(&bdi->wb_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
651
  	INIT_LIST_HEAD(&bdi->bdi_list);
03ba3782e   Jens Axboe   writeback: switch...
652
653
654
  	INIT_LIST_HEAD(&bdi->work_list);
  
  	bdi_wb_init(&bdi->wb, bdi);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
655
  	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
ea319518b   Peter Zijlstra   locking, percpu c...
656
  		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
04fbfdc14   Peter Zijlstra   mm: per device di...
657
658
659
660
661
  		if (err)
  			goto err;
  	}
  
  	bdi->dirty_exceeded = 0;
e98be2d59   Wu Fengguang   writeback: bdi wr...
662
663
664
665
666
667
  
  	bdi->bw_time_stamp = jiffies;
  	bdi->written_stamp = 0;
  
  	bdi->write_bandwidth = INIT_BW;
  	bdi->avg_write_bandwidth = INIT_BW;
04fbfdc14   Peter Zijlstra   mm: per device di...
668
669
670
671
  	err = prop_local_init_percpu(&bdi->completions);
  
  	if (err) {
  err:
4b01a0b16   Denis Cheng   mm/backing-dev.c:...
672
  		while (i--)
04fbfdc14   Peter Zijlstra   mm: per device di...
673
  			percpu_counter_destroy(&bdi->bdi_stat[i]);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
674
675
676
677
678
679
680
681
682
  	}
  
  	return err;
  }
  EXPORT_SYMBOL(bdi_init);
  
  void bdi_destroy(struct backing_dev_info *bdi)
  {
  	int i;
ce5f8e779   Jens Axboe   writeback: splice...
683
684
685
686
687
688
  	/*
  	 * Splice our entries to the default_backing_dev_info, if this
  	 * bdi disappears
  	 */
  	if (bdi_has_dirty_io(bdi)) {
  		struct bdi_writeback *dst = &default_backing_dev_info.wb;
f758eeabe   Christoph Hellwig   writeback: split ...
689
  		bdi_lock_two(&bdi->wb, dst);
ce5f8e779   Jens Axboe   writeback: splice...
690
691
692
  		list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
  		list_splice(&bdi->wb.b_io, &dst->b_io);
  		list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
f758eeabe   Christoph Hellwig   writeback: split ...
693
694
  		spin_unlock(&bdi->wb.list_lock);
  		spin_unlock(&dst->list_lock);
ce5f8e779   Jens Axboe   writeback: splice...
695
  	}
66f3b8e2e   Jens Axboe   writeback: move d...
696

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
697
  	bdi_unregister(bdi);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
698
699
  	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
  		percpu_counter_destroy(&bdi->bdi_stat[i]);
04fbfdc14   Peter Zijlstra   mm: per device di...
700
701
  
  	prop_local_destroy_percpu(&bdi->completions);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
702
703
  }
  EXPORT_SYMBOL(bdi_destroy);
c3c532061   Jens Axboe   bdi: add helper f...
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
  /*
   * For use from filesystems to quickly init and register a bdi associated
   * with dirty writeback
   */
  int bdi_setup_and_register(struct backing_dev_info *bdi, char *name,
  			   unsigned int cap)
  {
  	char tmp[32];
  	int err;
  
  	bdi->name = name;
  	bdi->capabilities = cap;
  	err = bdi_init(bdi);
  	if (err)
  		return err;
  
  	sprintf(tmp, "%.28s%s", name, "-%d");
  	err = bdi_register(bdi, NULL, tmp, atomic_long_inc_return(&bdi_seq));
  	if (err) {
  		bdi_destroy(bdi);
  		return err;
  	}
  
  	return 0;
  }
  EXPORT_SYMBOL(bdi_setup_and_register);
3fcfab16c   Andrew Morton   [PATCH] separate ...
730
731
732
733
  static wait_queue_head_t congestion_wqh[2] = {
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
  	};
0e093d997   Mel Gorman   writeback: do not...
734
  static atomic_t nr_bdi_congested[2];
3fcfab16c   Andrew Morton   [PATCH] separate ...
735

1faa16d22   Jens Axboe   block: change the...
736
  void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
737
738
  {
  	enum bdi_state bit;
1faa16d22   Jens Axboe   block: change the...
739
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
3fcfab16c   Andrew Morton   [PATCH] separate ...
740

1faa16d22   Jens Axboe   block: change the...
741
  	bit = sync ? BDI_sync_congested : BDI_async_congested;
0e093d997   Mel Gorman   writeback: do not...
742
743
  	if (test_and_clear_bit(bit, &bdi->state))
  		atomic_dec(&nr_bdi_congested[sync]);
3fcfab16c   Andrew Morton   [PATCH] separate ...
744
745
746
747
748
  	smp_mb__after_clear_bit();
  	if (waitqueue_active(wqh))
  		wake_up(wqh);
  }
  EXPORT_SYMBOL(clear_bdi_congested);
1faa16d22   Jens Axboe   block: change the...
749
  void set_bdi_congested(struct backing_dev_info *bdi, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
750
751
  {
  	enum bdi_state bit;
1faa16d22   Jens Axboe   block: change the...
752
  	bit = sync ? BDI_sync_congested : BDI_async_congested;
0e093d997   Mel Gorman   writeback: do not...
753
754
  	if (!test_and_set_bit(bit, &bdi->state))
  		atomic_inc(&nr_bdi_congested[sync]);
3fcfab16c   Andrew Morton   [PATCH] separate ...
755
756
757
758
759
  }
  EXPORT_SYMBOL(set_bdi_congested);
  
  /**
   * congestion_wait - wait for a backing_dev to become uncongested
8aa7e847d   Jens Axboe   Fix congestion_wa...
760
   * @sync: SYNC or ASYNC IO
3fcfab16c   Andrew Morton   [PATCH] separate ...
761
762
763
764
765
766
   * @timeout: timeout in jiffies
   *
   * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
   * write congestion.  If no backing_devs are congested then just wait for the
   * next write to be completed.
   */
8aa7e847d   Jens Axboe   Fix congestion_wa...
767
  long congestion_wait(int sync, long timeout)
3fcfab16c   Andrew Morton   [PATCH] separate ...
768
769
  {
  	long ret;
52bb91986   Mel Gorman   writeback: accoun...
770
  	unsigned long start = jiffies;
3fcfab16c   Andrew Morton   [PATCH] separate ...
771
  	DEFINE_WAIT(wait);
8aa7e847d   Jens Axboe   Fix congestion_wa...
772
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
3fcfab16c   Andrew Morton   [PATCH] separate ...
773
774
775
776
  
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
52bb91986   Mel Gorman   writeback: accoun...
777
778
779
  
  	trace_writeback_congestion_wait(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
3fcfab16c   Andrew Morton   [PATCH] separate ...
780
781
782
  	return ret;
  }
  EXPORT_SYMBOL(congestion_wait);
04fbfdc14   Peter Zijlstra   mm: per device di...
783

0e093d997   Mel Gorman   writeback: do not...
784
785
786
787
788
789
790
791
792
793
794
  /**
   * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a zone to complete writes
   * @zone: A zone to check if it is heavily congested
   * @sync: SYNC or ASYNC IO
   * @timeout: timeout in jiffies
   *
   * In the event of a congested backing_dev (any backing_dev) and the given
   * @zone has experienced recent congestion, this waits for up to @timeout
   * jiffies for either a BDI to exit congestion of the given @sync queue
   * or a write to complete.
   *
25985edce   Lucas De Marchi   Fix common misspe...
795
   * In the absence of zone congestion, cond_resched() is called to yield
0e093d997   Mel Gorman   writeback: do not...
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
   * the processor if necessary but otherwise does not sleep.
   *
   * The return value is 0 if the sleep is for the full timeout. Otherwise,
   * it is the number of jiffies that were still remaining when the function
   * returned. return_value == timeout implies the function did not sleep.
   */
  long wait_iff_congested(struct zone *zone, int sync, long timeout)
  {
  	long ret;
  	unsigned long start = jiffies;
  	DEFINE_WAIT(wait);
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
  
  	/*
  	 * If there is no congestion, or heavy congestion is not being
  	 * encountered in the current zone, yield if necessary instead
  	 * of sleeping on the congestion queue
  	 */
  	if (atomic_read(&nr_bdi_congested[sync]) == 0 ||
  			!zone_is_reclaim_congested(zone)) {
  		cond_resched();
  
  		/* In case we scheduled, work out time remaining */
  		ret = timeout - (jiffies - start);
  		if (ret < 0)
  			ret = 0;
  
  		goto out;
  	}
  
  	/* Sleep until uncongested or a write happens */
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
  
  out:
  	trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
  
  	return ret;
  }
  EXPORT_SYMBOL(wait_iff_congested);