Blame view

mm/backing-dev.c 18.9 KB
3fcfab16c   Andrew Morton   [PATCH] separate ...
1
2
3
  
  #include <linux/wait.h>
  #include <linux/backing-dev.h>
03ba3782e   Jens Axboe   writeback: switch...
4
5
  #include <linux/kthread.h>
  #include <linux/freezer.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
6
  #include <linux/fs.h>
26160158d   Jens Axboe   Move the default_...
7
  #include <linux/pagemap.h>
03ba3782e   Jens Axboe   writeback: switch...
8
  #include <linux/mm.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
9
10
  #include <linux/sched.h>
  #include <linux/module.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
11
12
  #include <linux/writeback.h>
  #include <linux/device.h>
455b28646   Dave Chinner   writeback: Initia...
13
  #include <trace/events/writeback.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
14

c3c532061   Jens Axboe   bdi: add helper f...
15
  static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
26160158d   Jens Axboe   Move the default_...
16
17
18
19
20
21
  void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
  {
  }
  EXPORT_SYMBOL(default_unplug_io_fn);
  
  struct backing_dev_info default_backing_dev_info = {
d993831fa   Jens Axboe   writeback: add na...
22
  	.name		= "default",
26160158d   Jens Axboe   Move the default_...
23
24
25
26
27
28
  	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
  	.state		= 0,
  	.capabilities	= BDI_CAP_MAP_COPY,
  	.unplug_io_fn	= default_unplug_io_fn,
  };
  EXPORT_SYMBOL_GPL(default_backing_dev_info);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
29

5129a469a   Jörn Engel   Catch filesystems...
30
31
  struct backing_dev_info noop_backing_dev_info = {
  	.name		= "noop",
976e48f8a   Jan Kara   bdi: Initialize n...
32
  	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
5129a469a   Jörn Engel   Catch filesystems...
33
34
  };
  EXPORT_SYMBOL_GPL(noop_backing_dev_info);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
35
  static struct class *bdi_class;
cfc4ba536   Jens Axboe   writeback: use RC...
36
37
38
39
40
41
  
  /*
   * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as
   * reader side protection for bdi_pending_list. bdi_list has RCU reader side
   * locking.
   */
03ba3782e   Jens Axboe   writeback: switch...
42
  DEFINE_SPINLOCK(bdi_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
43
  LIST_HEAD(bdi_list);
03ba3782e   Jens Axboe   writeback: switch...
44
45
46
47
48
49
50
  LIST_HEAD(bdi_pending_list);
  
  static struct task_struct *sync_supers_tsk;
  static struct timer_list sync_supers_timer;
  
  static int bdi_sync_supers(void *);
  static void sync_supers_timer_fn(unsigned long);
03ba3782e   Jens Axboe   writeback: switch...
51

76f1418b4   Miklos Szeredi   mm: bdi: move sta...
52
53
54
55
56
57
58
59
60
61
62
63
64
65
  #ifdef CONFIG_DEBUG_FS
  #include <linux/debugfs.h>
  #include <linux/seq_file.h>
  
  static struct dentry *bdi_debug_root;
  
  static void bdi_debug_init(void)
  {
  	bdi_debug_root = debugfs_create_dir("bdi", NULL);
  }
  
  static int bdi_debug_stats_show(struct seq_file *m, void *v)
  {
  	struct backing_dev_info *bdi = m->private;
c1955ce32   Christoph Hellwig   writeback: remove...
66
  	struct bdi_writeback *wb = &bdi->wb;
364aeb284   David Rientjes   mm: change dirty ...
67
68
69
  	unsigned long background_thresh;
  	unsigned long dirty_thresh;
  	unsigned long bdi_thresh;
f09b00d3e   Jens Axboe   writeback: add so...
70
71
  	unsigned long nr_dirty, nr_io, nr_more_io, nr_wb;
  	struct inode *inode;
f09b00d3e   Jens Axboe   writeback: add so...
72
73
  	nr_wb = nr_dirty = nr_io = nr_more_io = 0;
  	spin_lock(&inode_lock);
c1955ce32   Christoph Hellwig   writeback: remove...
74
75
76
77
78
79
  	list_for_each_entry(inode, &wb->b_dirty, i_list)
  		nr_dirty++;
  	list_for_each_entry(inode, &wb->b_io, i_list)
  		nr_io++;
  	list_for_each_entry(inode, &wb->b_more_io, i_list)
  		nr_more_io++;
f09b00d3e   Jens Axboe   writeback: add so...
80
  	spin_unlock(&inode_lock);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
81

16c4042f0   Wu Fengguang   writeback: avoid ...
82
83
  	global_dirty_limits(&background_thresh, &dirty_thresh);
  	bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
84
85
86
87
88
89
90
91
92
93
94
  
  #define K(x) ((x) << (PAGE_SHIFT - 10))
  	seq_printf(m,
  		   "BdiWriteback:     %8lu kB
  "
  		   "BdiReclaimable:   %8lu kB
  "
  		   "BdiDirtyThresh:   %8lu kB
  "
  		   "DirtyThresh:      %8lu kB
  "
f09b00d3e   Jens Axboe   writeback: add so...
95
96
  		   "BackgroundThresh: %8lu kB
  "
f09b00d3e   Jens Axboe   writeback: add so...
97
98
99
100
101
102
103
104
  		   "b_dirty:          %8lu
  "
  		   "b_io:             %8lu
  "
  		   "b_more_io:        %8lu
  "
  		   "bdi_list:         %8u
  "
c1955ce32   Christoph Hellwig   writeback: remove...
105
106
  		   "state:            %8lx
  ",
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
107
108
  		   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
  		   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
f09b00d3e   Jens Axboe   writeback: add so...
109
  		   K(bdi_thresh), K(dirty_thresh),
c1955ce32   Christoph Hellwig   writeback: remove...
110
111
  		   K(background_thresh), nr_dirty, nr_io, nr_more_io,
  		   !list_empty(&bdi->bdi_list), bdi->state);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
  #undef K
  
  	return 0;
  }
  
  static int bdi_debug_stats_open(struct inode *inode, struct file *file)
  {
  	return single_open(file, bdi_debug_stats_show, inode->i_private);
  }
  
  static const struct file_operations bdi_debug_stats_fops = {
  	.open		= bdi_debug_stats_open,
  	.read		= seq_read,
  	.llseek		= seq_lseek,
  	.release	= single_release,
  };
  
  static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
  {
  	bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
  	bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir,
  					       bdi, &bdi_debug_stats_fops);
  }
  
  static void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  	debugfs_remove(bdi->debug_stats);
  	debugfs_remove(bdi->debug_dir);
  }
  #else
  static inline void bdi_debug_init(void)
  {
  }
  static inline void bdi_debug_register(struct backing_dev_info *bdi,
  				      const char *name)
  {
  }
  static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  }
  #endif
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
  static ssize_t read_ahead_kb_store(struct device *dev,
  				  struct device_attribute *attr,
  				  const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
  	char *end;
  	unsigned long read_ahead_kb;
  	ssize_t ret = -EINVAL;
  
  	read_ahead_kb = simple_strtoul(buf, &end, 10);
  	if (*buf && (end[0] == '\0' || (end[0] == '
  ' && end[1] == '\0'))) {
  		bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10);
  		ret = count;
  	}
  	return ret;
  }
  
  #define K(pages) ((pages) << (PAGE_SHIFT - 10))
  
  #define BDI_SHOW(name, expr)						\
  static ssize_t name##_show(struct device *dev,				\
  			   struct device_attribute *attr, char *page)	\
  {									\
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);		\
  									\
  	return snprintf(page, PAGE_SIZE-1, "%lld
  ", (long long)expr);	\
  }
  
  BDI_SHOW(read_ahead_kb, K(bdi->ra_pages))
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
  static ssize_t min_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
  	char *end;
  	unsigned int ratio;
  	ssize_t ret = -EINVAL;
  
  	ratio = simple_strtoul(buf, &end, 10);
  	if (*buf && (end[0] == '\0' || (end[0] == '
  ' && end[1] == '\0'))) {
  		ret = bdi_set_min_ratio(bdi, ratio);
  		if (!ret)
  			ret = count;
  	}
  	return ret;
  }
  BDI_SHOW(min_ratio, bdi->min_ratio)
a42dde041   Peter Zijlstra   mm: bdi: allow se...
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
  static ssize_t max_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
  	char *end;
  	unsigned int ratio;
  	ssize_t ret = -EINVAL;
  
  	ratio = simple_strtoul(buf, &end, 10);
  	if (*buf && (end[0] == '\0' || (end[0] == '
  ' && end[1] == '\0'))) {
  		ret = bdi_set_max_ratio(bdi, ratio);
  		if (!ret)
  			ret = count;
  	}
  	return ret;
  }
  BDI_SHOW(max_ratio, bdi->max_ratio)
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
220
221
222
223
  #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
  
  static struct device_attribute bdi_dev_attrs[] = {
  	__ATTR_RW(read_ahead_kb),
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
224
  	__ATTR_RW(min_ratio),
a42dde041   Peter Zijlstra   mm: bdi: allow se...
225
  	__ATTR_RW(max_ratio),
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
226
227
228
229
230
231
  	__ATTR_NULL,
  };
  
  static __init int bdi_class_init(void)
  {
  	bdi_class = class_create(THIS_MODULE, "bdi");
144214537   Anton Blanchard   backing-dev: Hand...
232
233
  	if (IS_ERR(bdi_class))
  		return PTR_ERR(bdi_class);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
234
  	bdi_class->dev_attrs = bdi_dev_attrs;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
235
  	bdi_debug_init();
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
236
237
  	return 0;
  }
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
238
  postcore_initcall(bdi_class_init);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
239

26160158d   Jens Axboe   Move the default_...
240
241
242
  static int __init default_bdi_init(void)
  {
  	int err;
03ba3782e   Jens Axboe   writeback: switch...
243
244
  	sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers");
  	BUG_ON(IS_ERR(sync_supers_tsk));
03ba3782e   Jens Axboe   writeback: switch...
245
  	setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0);
6423104b6   Jens Axboe   writeback: fixups...
246
  	bdi_arm_supers_timer();
03ba3782e   Jens Axboe   writeback: switch...
247

26160158d   Jens Axboe   Move the default_...
248
249
250
  	err = bdi_init(&default_backing_dev_info);
  	if (!err)
  		bdi_register(&default_backing_dev_info, NULL, "default");
976e48f8a   Jan Kara   bdi: Initialize n...
251
  	err = bdi_init(&noop_backing_dev_info);
26160158d   Jens Axboe   Move the default_...
252
253
254
255
  
  	return err;
  }
  subsys_initcall(default_bdi_init);
03ba3782e   Jens Axboe   writeback: switch...
256
257
258
259
260
261
262
263
  int bdi_has_dirty_io(struct backing_dev_info *bdi)
  {
  	return wb_has_dirty_io(&bdi->wb);
  }
  
  static void bdi_flush_io(struct backing_dev_info *bdi)
  {
  	struct writeback_control wbc = {
03ba3782e   Jens Axboe   writeback: switch...
264
265
266
267
268
  		.sync_mode		= WB_SYNC_NONE,
  		.older_than_this	= NULL,
  		.range_cyclic		= 1,
  		.nr_to_write		= 1024,
  	};
9c3a8ee8a   Christoph Hellwig   writeback: remove...
269
  	writeback_inodes_wb(&bdi->wb, &wbc);
03ba3782e   Jens Axboe   writeback: switch...
270
271
272
  }
  
  /*
6f904ff0e   Artem Bityutskiy   writeback: harmon...
273
   * kupdated() used to do this. We cannot do it from the bdi_forker_thread()
03ba3782e   Jens Axboe   writeback: switch...
274
275
   * or we risk deadlocking on ->s_umount. The longer term solution would be
   * to implement sync_supers_bdi() or similar and simply do it from the
6f904ff0e   Artem Bityutskiy   writeback: harmon...
276
   * bdi writeback thread individually.
03ba3782e   Jens Axboe   writeback: switch...
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
   */
  static int bdi_sync_supers(void *unused)
  {
  	set_user_nice(current, 0);
  
  	while (!kthread_should_stop()) {
  		set_current_state(TASK_INTERRUPTIBLE);
  		schedule();
  
  		/*
  		 * Do this periodically, like kupdated() did before.
  		 */
  		sync_supers();
  	}
  
  	return 0;
  }
6423104b6   Jens Axboe   writeback: fixups...
294
  void bdi_arm_supers_timer(void)
03ba3782e   Jens Axboe   writeback: switch...
295
296
  {
  	unsigned long next;
6423104b6   Jens Axboe   writeback: fixups...
297
298
  	if (!dirty_writeback_interval)
  		return;
03ba3782e   Jens Axboe   writeback: switch...
299
300
301
302
303
304
305
  	next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies;
  	mod_timer(&sync_supers_timer, round_jiffies_up(next));
  }
  
  static void sync_supers_timer_fn(unsigned long unused)
  {
  	wake_up_process(sync_supers_tsk);
6423104b6   Jens Axboe   writeback: fixups...
306
  	bdi_arm_supers_timer();
03ba3782e   Jens Axboe   writeback: switch...
307
  }
6467716a3   Artem Bityutskiy   writeback: optimi...
308
309
310
311
312
313
  static void wakeup_timer_fn(unsigned long data)
  {
  	struct backing_dev_info *bdi = (struct backing_dev_info *)data;
  
  	spin_lock_bh(&bdi->wb_lock);
  	if (bdi->wb.task) {
603320239   Artem Bityutskiy   writeback: add ne...
314
  		trace_writeback_wake_thread(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
315
316
317
318
319
320
321
  		wake_up_process(bdi->wb.task);
  	} else {
  		/*
  		 * When bdi tasks are inactive for long time, they are killed.
  		 * In this case we have to wake-up the forker thread which
  		 * should create and run the bdi thread.
  		 */
603320239   Artem Bityutskiy   writeback: add ne...
322
  		trace_writeback_wake_forker_thread(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
  		wake_up_process(default_backing_dev_info.wb.task);
  	}
  	spin_unlock_bh(&bdi->wb_lock);
  }
  
  /*
   * This function is used when the first inode for this bdi is marked dirty. It
   * wakes-up the corresponding bdi thread which should then take care of the
   * periodic background write-out of dirty inodes. Since the write-out would
   * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
   * set up a timer which wakes the bdi thread up later.
   *
   * Note, we wouldn't bother setting up the timer, but this function is on the
   * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
   * by delaying the wake-up.
   */
  void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
  {
  	unsigned long timeout;
  
  	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
  	mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout);
  }
fff5b85aa   Artem Bityutskiy   writeback: move b...
346
347
348
349
350
351
352
353
354
355
356
  /*
   * Calculate the longest interval (jiffies) bdi threads are allowed to be
   * inactive.
   */
  static unsigned long bdi_longest_inactive(void)
  {
  	unsigned long interval;
  
  	interval = msecs_to_jiffies(dirty_writeback_interval * 10);
  	return max(5UL * 60 * HZ, interval);
  }
6f904ff0e   Artem Bityutskiy   writeback: harmon...
357
  static int bdi_forker_thread(void *ptr)
03ba3782e   Jens Axboe   writeback: switch...
358
359
  {
  	struct bdi_writeback *me = ptr;
c1955ce32   Christoph Hellwig   writeback: remove...
360
361
362
363
364
365
366
  	current->flags |= PF_FLUSHER | PF_SWAPWRITE;
  	set_freezable();
  
  	/*
  	 * Our parent may run at a different priority, just set us to normal
  	 */
  	set_user_nice(current, 0);
03ba3782e   Jens Axboe   writeback: switch...
367
368
  
  	for (;;) {
fff5b85aa   Artem Bityutskiy   writeback: move b...
369
  		struct task_struct *task = NULL;
78c40cb65   Artem Bityutskiy   writeback: do not...
370
  		struct backing_dev_info *bdi;
adf392407   Artem Bityutskiy   writeback: restru...
371
372
373
  		enum {
  			NO_ACTION,   /* Nothing to do */
  			FORK_THREAD, /* Fork bdi thread */
fff5b85aa   Artem Bityutskiy   writeback: move b...
374
  			KILL_THREAD, /* Kill inactive bdi thread */
adf392407   Artem Bityutskiy   writeback: restru...
375
  		} action = NO_ACTION;
03ba3782e   Jens Axboe   writeback: switch...
376
377
378
379
380
  
  		/*
  		 * Temporary measure, we want to make sure we don't see
  		 * dirty data on the default backing_dev_info
  		 */
6467716a3   Artem Bityutskiy   writeback: optimi...
381
382
  		if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) {
  			del_timer(&me->wakeup_timer);
03ba3782e   Jens Axboe   writeback: switch...
383
  			wb_do_writeback(me, 0);
6467716a3   Artem Bityutskiy   writeback: optimi...
384
  		}
03ba3782e   Jens Axboe   writeback: switch...
385

cfc4ba536   Jens Axboe   writeback: use RC...
386
  		spin_lock_bh(&bdi_lock);
c5f7ad233   Artem Bityutskiy   writeback: do not...
387
  		set_current_state(TASK_INTERRUPTIBLE);
03ba3782e   Jens Axboe   writeback: switch...
388

78c40cb65   Artem Bityutskiy   writeback: do not...
389
  		list_for_each_entry(bdi, &bdi_list, bdi_list) {
adf392407   Artem Bityutskiy   writeback: restru...
390
391
392
393
  			bool have_dirty_io;
  
  			if (!bdi_cap_writeback_dirty(bdi) ||
  			     bdi_cap_flush_forker(bdi))
03ba3782e   Jens Axboe   writeback: switch...
394
  				continue;
080dcec41   Artem Bityutskiy   writeback: simpli...
395
396
397
  			WARN(!test_bit(BDI_registered, &bdi->state),
  			     "bdi %p/%s is not registered!
  ", bdi, bdi->name);
adf392407   Artem Bityutskiy   writeback: restru...
398
399
  			have_dirty_io = !list_empty(&bdi->work_list) ||
  					wb_has_dirty_io(&bdi->wb);
78c40cb65   Artem Bityutskiy   writeback: do not...
400
401
  
  			/*
adf392407   Artem Bityutskiy   writeback: restru...
402
403
  			 * If the bdi has work to do, but the thread does not
  			 * exist - create it.
78c40cb65   Artem Bityutskiy   writeback: do not...
404
  			 */
adf392407   Artem Bityutskiy   writeback: restru...
405
406
407
408
409
410
411
412
413
  			if (!bdi->wb.task && have_dirty_io) {
  				/*
  				 * Set the pending bit - if someone will try to
  				 * unregister this bdi - it'll wait on this bit.
  				 */
  				set_bit(BDI_pending, &bdi->state);
  				action = FORK_THREAD;
  				break;
  			}
fff5b85aa   Artem Bityutskiy   writeback: move b...
414

6bf05d03e   Jens Axboe   writeback: fix ba...
415
  			spin_lock(&bdi->wb_lock);
fff5b85aa   Artem Bityutskiy   writeback: move b...
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
  			/*
  			 * If there is no work to do and the bdi thread was
  			 * inactive long enough - kill it. The wb_lock is taken
  			 * to make sure no-one adds more work to this bdi and
  			 * wakes the bdi thread up.
  			 */
  			if (bdi->wb.task && !have_dirty_io &&
  			    time_after(jiffies, bdi->wb.last_active +
  						bdi_longest_inactive())) {
  				task = bdi->wb.task;
  				bdi->wb.task = NULL;
  				spin_unlock(&bdi->wb_lock);
  				set_bit(BDI_pending, &bdi->state);
  				action = KILL_THREAD;
  				break;
  			}
6bf05d03e   Jens Axboe   writeback: fix ba...
432
  			spin_unlock(&bdi->wb_lock);
03ba3782e   Jens Axboe   writeback: switch...
433
  		}
080dcec41   Artem Bityutskiy   writeback: simpli...
434
  		spin_unlock_bh(&bdi_lock);
03ba3782e   Jens Axboe   writeback: switch...
435

c4ec7908c   Artem Bityutskiy   writeback: do not...
436
437
438
  		/* Keep working if default bdi still has things to do */
  		if (!list_empty(&me->bdi->work_list))
  			__set_current_state(TASK_RUNNING);
adf392407   Artem Bityutskiy   writeback: restru...
439
440
441
  		switch (action) {
  		case FORK_THREAD:
  			__set_current_state(TASK_RUNNING);
6628bc74f   Artem Bityutskiy   writeback: do not...
442
443
  			task = kthread_create(bdi_writeback_thread, &bdi->wb,
  					      "flush-%s", dev_name(bdi->dev));
adf392407   Artem Bityutskiy   writeback: restru...
444
445
446
447
448
449
  			if (IS_ERR(task)) {
  				/*
  				 * If thread creation fails, force writeout of
  				 * the bdi from the thread.
  				 */
  				bdi_flush_io(bdi);
fff5b85aa   Artem Bityutskiy   writeback: move b...
450
451
452
453
  			} else {
  				/*
  				 * The spinlock makes sure we do not lose
  				 * wake-ups when racing with 'bdi_queue_work()'.
6628bc74f   Artem Bityutskiy   writeback: do not...
454
455
  				 * And as soon as the bdi thread is visible, we
  				 * can start it.
fff5b85aa   Artem Bityutskiy   writeback: move b...
456
  				 */
6467716a3   Artem Bityutskiy   writeback: optimi...
457
  				spin_lock_bh(&bdi->wb_lock);
adf392407   Artem Bityutskiy   writeback: restru...
458
  				bdi->wb.task = task;
6467716a3   Artem Bityutskiy   writeback: optimi...
459
  				spin_unlock_bh(&bdi->wb_lock);
6628bc74f   Artem Bityutskiy   writeback: do not...
460
  				wake_up_process(task);
fff5b85aa   Artem Bityutskiy   writeback: move b...
461
462
463
464
465
466
  			}
  			break;
  
  		case KILL_THREAD:
  			__set_current_state(TASK_RUNNING);
  			kthread_stop(task);
adf392407   Artem Bityutskiy   writeback: restru...
467
  			break;
03ba3782e   Jens Axboe   writeback: switch...
468

adf392407   Artem Bityutskiy   writeback: restru...
469
  		case NO_ACTION:
253c34e9b   Artem Bityutskiy   writeback: preven...
470
471
472
473
474
475
476
477
478
  			if (!wb_has_dirty_io(me) || !dirty_writeback_interval)
  				/*
  				 * There are no dirty data. The only thing we
  				 * should now care about is checking for
  				 * inactive bdi threads and killing them. Thus,
  				 * let's sleep for longer time, save energy and
  				 * be friendly for battery-driven devices.
  				 */
  				schedule_timeout(bdi_longest_inactive());
6423104b6   Jens Axboe   writeback: fixups...
479
  			else
253c34e9b   Artem Bityutskiy   writeback: preven...
480
  				schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
03ba3782e   Jens Axboe   writeback: switch...
481
  			try_to_freeze();
adf392407   Artem Bityutskiy   writeback: restru...
482
  			/* Back to the main loop */
03ba3782e   Jens Axboe   writeback: switch...
483
484
  			continue;
  		}
fff5b85aa   Artem Bityutskiy   writeback: move b...
485
486
487
488
489
490
491
  
  		/*
  		 * Clear pending bit and wakeup anybody waiting to tear us down.
  		 */
  		clear_bit(BDI_pending, &bdi->state);
  		smp_mb__after_clear_bit();
  		wake_up_bit(&bdi->state, BDI_pending);
03ba3782e   Jens Axboe   writeback: switch...
492
493
494
495
  	}
  
  	return 0;
  }
cfc4ba536   Jens Axboe   writeback: use RC...
496
497
498
499
500
501
502
503
504
505
506
  /*
   * Remove bdi from bdi_list, and ensure that it is no longer visible
   */
  static void bdi_remove_from_list(struct backing_dev_info *bdi)
  {
  	spin_lock_bh(&bdi_lock);
  	list_del_rcu(&bdi->bdi_list);
  	spin_unlock_bh(&bdi_lock);
  
  	synchronize_rcu();
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
507
508
509
  int bdi_register(struct backing_dev_info *bdi, struct device *parent,
  		const char *fmt, ...)
  {
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
510
  	va_list args;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
511
  	struct device *dev;
69fc208be   Andrew Morton   mm/backing-dev.c:...
512
  	if (bdi->dev)	/* The driver needs to use separate queues per device */
c284de61d   Artem Bityutskiy   writeback: cleanu...
513
  		return 0;
f1d0b063d   Kay Sievers   bdi: register sys...
514

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
515
  	va_start(args, fmt);
19051c503   Greg Kroah-Hartman   mm: bdi: fix race...
516
  	dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
517
  	va_end(args);
c284de61d   Artem Bityutskiy   writeback: cleanu...
518
519
  	if (IS_ERR(dev))
  		return PTR_ERR(dev);
66f3b8e2e   Jens Axboe   writeback: move d...
520

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
521
  	bdi->dev = dev;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
522

03ba3782e   Jens Axboe   writeback: switch...
523
524
525
526
527
528
529
  	/*
  	 * Just start the forker thread for our default backing_dev_info,
  	 * and add other bdi's to the list. They will get a thread created
  	 * on-demand when they need it.
  	 */
  	if (bdi_cap_flush_forker(bdi)) {
  		struct bdi_writeback *wb = &bdi->wb;
6f904ff0e   Artem Bityutskiy   writeback: harmon...
530
  		wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s",
03ba3782e   Jens Axboe   writeback: switch...
531
  						dev_name(dev));
c284de61d   Artem Bityutskiy   writeback: cleanu...
532
533
  		if (IS_ERR(wb->task))
  			return PTR_ERR(wb->task);
03ba3782e   Jens Axboe   writeback: switch...
534
535
536
  	}
  
  	bdi_debug_register(bdi, dev_name(dev));
500b067c5   Jens Axboe   writeback: check ...
537
  	set_bit(BDI_registered, &bdi->state);
c284de61d   Artem Bityutskiy   writeback: cleanu...
538
539
540
541
  
  	spin_lock_bh(&bdi_lock);
  	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
  	spin_unlock_bh(&bdi_lock);
455b28646   Dave Chinner   writeback: Initia...
542
  	trace_writeback_bdi_register(bdi);
c284de61d   Artem Bityutskiy   writeback: cleanu...
543
  	return 0;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
544
545
546
547
548
549
550
551
  }
  EXPORT_SYMBOL(bdi_register);
  
  int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
  {
  	return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev));
  }
  EXPORT_SYMBOL(bdi_register_dev);
03ba3782e   Jens Axboe   writeback: switch...
552
553
554
555
  /*
   * Remove bdi from the global list and shutdown any threads we have running
   */
  static void bdi_wb_shutdown(struct backing_dev_info *bdi)
66f3b8e2e   Jens Axboe   writeback: move d...
556
  {
03ba3782e   Jens Axboe   writeback: switch...
557
558
559
560
  	if (!bdi_cap_writeback_dirty(bdi))
  		return;
  
  	/*
fff5b85aa   Artem Bityutskiy   writeback: move b...
561
  	 * Make sure nobody finds us on the bdi_list anymore
03ba3782e   Jens Axboe   writeback: switch...
562
  	 */
fff5b85aa   Artem Bityutskiy   writeback: move b...
563
  	bdi_remove_from_list(bdi);
03ba3782e   Jens Axboe   writeback: switch...
564
565
  
  	/*
fff5b85aa   Artem Bityutskiy   writeback: move b...
566
  	 * If setup is pending, wait for that to complete first
03ba3782e   Jens Axboe   writeback: switch...
567
  	 */
fff5b85aa   Artem Bityutskiy   writeback: move b...
568
569
  	wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
  			TASK_UNINTERRUPTIBLE);
03ba3782e   Jens Axboe   writeback: switch...
570
571
  
  	/*
c1955ce32   Christoph Hellwig   writeback: remove...
572
  	 * Finally, kill the kernel thread. We don't need to be RCU
c62b17a58   Romit Dasgupta   Thaw refrigerated...
573
574
575
  	 * safe anymore, since the bdi is gone from visibility. Force
  	 * unfreeze of the thread before calling kthread_stop(), otherwise
  	 * it would never exet if it is currently stuck in the refrigerator.
03ba3782e   Jens Axboe   writeback: switch...
576
  	 */
c1955ce32   Christoph Hellwig   writeback: remove...
577
578
579
  	if (bdi->wb.task) {
  		thaw_process(bdi->wb.task);
  		kthread_stop(bdi->wb.task);
c62b17a58   Romit Dasgupta   Thaw refrigerated...
580
  	}
66f3b8e2e   Jens Axboe   writeback: move d...
581
  }
592b09a42   Jens Axboe   backing-dev: ensu...
582
583
584
585
586
587
588
589
590
591
592
593
594
595
  /*
   * This bdi is going away now, make sure that no super_blocks point to it
   */
  static void bdi_prune_sb(struct backing_dev_info *bdi)
  {
  	struct super_block *sb;
  
  	spin_lock(&sb_lock);
  	list_for_each_entry(sb, &super_blocks, s_list) {
  		if (sb->s_bdi == bdi)
  			sb->s_bdi = NULL;
  	}
  	spin_unlock(&sb_lock);
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
596
597
598
  void bdi_unregister(struct backing_dev_info *bdi)
  {
  	if (bdi->dev) {
455b28646   Dave Chinner   writeback: Initia...
599
  		trace_writeback_bdi_unregister(bdi);
8c4db3355   Jens Axboe   backing-dev: bdi ...
600
  		bdi_prune_sb(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
601
  		del_timer_sync(&bdi->wb.wakeup_timer);
8c4db3355   Jens Axboe   backing-dev: bdi ...
602

03ba3782e   Jens Axboe   writeback: switch...
603
604
  		if (!bdi_cap_flush_forker(bdi))
  			bdi_wb_shutdown(bdi);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
605
  		bdi_debug_unregister(bdi);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
606
607
608
609
610
  		device_unregister(bdi->dev);
  		bdi->dev = NULL;
  	}
  }
  EXPORT_SYMBOL(bdi_unregister);
3fcfab16c   Andrew Morton   [PATCH] separate ...
611

6467716a3   Artem Bityutskiy   writeback: optimi...
612
613
614
615
616
617
618
619
620
621
622
  static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
  {
  	memset(wb, 0, sizeof(*wb));
  
  	wb->bdi = bdi;
  	wb->last_old_flush = jiffies;
  	INIT_LIST_HEAD(&wb->b_dirty);
  	INIT_LIST_HEAD(&wb->b_io);
  	INIT_LIST_HEAD(&wb->b_more_io);
  	setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
  }
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
623
624
  int bdi_init(struct backing_dev_info *bdi)
  {
03ba3782e   Jens Axboe   writeback: switch...
625
  	int i, err;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
626

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
627
  	bdi->dev = NULL;
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
628
  	bdi->min_ratio = 0;
a42dde041   Peter Zijlstra   mm: bdi: allow se...
629
630
  	bdi->max_ratio = 100;
  	bdi->max_prop_frac = PROP_FRAC_BASE;
03ba3782e   Jens Axboe   writeback: switch...
631
  	spin_lock_init(&bdi->wb_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
632
  	INIT_LIST_HEAD(&bdi->bdi_list);
03ba3782e   Jens Axboe   writeback: switch...
633
634
635
  	INIT_LIST_HEAD(&bdi->work_list);
  
  	bdi_wb_init(&bdi->wb, bdi);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
636
  	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
ea319518b   Peter Zijlstra   locking, percpu c...
637
  		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
04fbfdc14   Peter Zijlstra   mm: per device di...
638
639
640
641
642
643
644
645
646
  		if (err)
  			goto err;
  	}
  
  	bdi->dirty_exceeded = 0;
  	err = prop_local_init_percpu(&bdi->completions);
  
  	if (err) {
  err:
4b01a0b16   Denis Cheng   mm/backing-dev.c:...
647
  		while (i--)
04fbfdc14   Peter Zijlstra   mm: per device di...
648
  			percpu_counter_destroy(&bdi->bdi_stat[i]);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
649
650
651
652
653
654
655
656
657
  	}
  
  	return err;
  }
  EXPORT_SYMBOL(bdi_init);
  
  void bdi_destroy(struct backing_dev_info *bdi)
  {
  	int i;
ce5f8e779   Jens Axboe   writeback: splice...
658
659
660
661
662
663
664
665
666
667
668
669
670
  	/*
  	 * Splice our entries to the default_backing_dev_info, if this
  	 * bdi disappears
  	 */
  	if (bdi_has_dirty_io(bdi)) {
  		struct bdi_writeback *dst = &default_backing_dev_info.wb;
  
  		spin_lock(&inode_lock);
  		list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
  		list_splice(&bdi->wb.b_io, &dst->b_io);
  		list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
  		spin_unlock(&inode_lock);
  	}
66f3b8e2e   Jens Axboe   writeback: move d...
671

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
672
  	bdi_unregister(bdi);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
673
674
  	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
  		percpu_counter_destroy(&bdi->bdi_stat[i]);
04fbfdc14   Peter Zijlstra   mm: per device di...
675
676
  
  	prop_local_destroy_percpu(&bdi->completions);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
677
678
  }
  EXPORT_SYMBOL(bdi_destroy);
c3c532061   Jens Axboe   bdi: add helper f...
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
  /*
   * For use from filesystems to quickly init and register a bdi associated
   * with dirty writeback
   */
  int bdi_setup_and_register(struct backing_dev_info *bdi, char *name,
  			   unsigned int cap)
  {
  	char tmp[32];
  	int err;
  
  	bdi->name = name;
  	bdi->capabilities = cap;
  	err = bdi_init(bdi);
  	if (err)
  		return err;
  
  	sprintf(tmp, "%.28s%s", name, "-%d");
  	err = bdi_register(bdi, NULL, tmp, atomic_long_inc_return(&bdi_seq));
  	if (err) {
  		bdi_destroy(bdi);
  		return err;
  	}
  
  	return 0;
  }
  EXPORT_SYMBOL(bdi_setup_and_register);
3fcfab16c   Andrew Morton   [PATCH] separate ...
705
706
707
708
  static wait_queue_head_t congestion_wqh[2] = {
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
  	};
1faa16d22   Jens Axboe   block: change the...
709
  void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
710
711
  {
  	enum bdi_state bit;
1faa16d22   Jens Axboe   block: change the...
712
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
3fcfab16c   Andrew Morton   [PATCH] separate ...
713

1faa16d22   Jens Axboe   block: change the...
714
  	bit = sync ? BDI_sync_congested : BDI_async_congested;
3fcfab16c   Andrew Morton   [PATCH] separate ...
715
716
717
718
719
720
  	clear_bit(bit, &bdi->state);
  	smp_mb__after_clear_bit();
  	if (waitqueue_active(wqh))
  		wake_up(wqh);
  }
  EXPORT_SYMBOL(clear_bdi_congested);
1faa16d22   Jens Axboe   block: change the...
721
  void set_bdi_congested(struct backing_dev_info *bdi, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
722
723
  {
  	enum bdi_state bit;
1faa16d22   Jens Axboe   block: change the...
724
  	bit = sync ? BDI_sync_congested : BDI_async_congested;
3fcfab16c   Andrew Morton   [PATCH] separate ...
725
726
727
728
729
730
  	set_bit(bit, &bdi->state);
  }
  EXPORT_SYMBOL(set_bdi_congested);
  
  /**
   * congestion_wait - wait for a backing_dev to become uncongested
8aa7e847d   Jens Axboe   Fix congestion_wa...
731
   * @sync: SYNC or ASYNC IO
3fcfab16c   Andrew Morton   [PATCH] separate ...
732
733
734
735
736
737
   * @timeout: timeout in jiffies
   *
   * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
   * write congestion.  If no backing_devs are congested then just wait for the
   * next write to be completed.
   */
8aa7e847d   Jens Axboe   Fix congestion_wa...
738
  long congestion_wait(int sync, long timeout)
3fcfab16c   Andrew Morton   [PATCH] separate ...
739
740
741
  {
  	long ret;
  	DEFINE_WAIT(wait);
8aa7e847d   Jens Axboe   Fix congestion_wa...
742
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
3fcfab16c   Andrew Morton   [PATCH] separate ...
743
744
745
746
747
748
749
  
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
  	return ret;
  }
  EXPORT_SYMBOL(congestion_wait);
04fbfdc14   Peter Zijlstra   mm: per device di...
750