Blame view

mm/backing-dev.c 16.9 KB
3fcfab16c   Andrew Morton   [PATCH] separate ...
1
2
3
  
  #include <linux/wait.h>
  #include <linux/backing-dev.h>
03ba3782e   Jens Axboe   writeback: switch...
4
5
  #include <linux/kthread.h>
  #include <linux/freezer.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
6
  #include <linux/fs.h>
26160158d   Jens Axboe   Move the default_...
7
  #include <linux/pagemap.h>
03ba3782e   Jens Axboe   writeback: switch...
8
  #include <linux/mm.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
9
10
  #include <linux/sched.h>
  #include <linux/module.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
11
12
  #include <linux/writeback.h>
  #include <linux/device.h>
455b28646   Dave Chinner   writeback: Initia...
13
  #include <trace/events/writeback.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
14

c3c532061   Jens Axboe   bdi: add helper f...
15
  static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
26160158d   Jens Axboe   Move the default_...
16
  struct backing_dev_info default_backing_dev_info = {
d993831fa   Jens Axboe   writeback: add na...
17
  	.name		= "default",
26160158d   Jens Axboe   Move the default_...
18
19
20
  	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
  	.state		= 0,
  	.capabilities	= BDI_CAP_MAP_COPY,
26160158d   Jens Axboe   Move the default_...
21
22
  };
  EXPORT_SYMBOL_GPL(default_backing_dev_info);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
23

5129a469a   Jörn Engel   Catch filesystems...
24
25
  struct backing_dev_info noop_backing_dev_info = {
  	.name		= "noop",
976e48f8a   Jan Kara   bdi: Initialize n...
26
  	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
5129a469a   Jörn Engel   Catch filesystems...
27
28
  };
  EXPORT_SYMBOL_GPL(noop_backing_dev_info);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
29
  static struct class *bdi_class;
cfc4ba536   Jens Axboe   writeback: use RC...
30
31
  
  /*
181387da2   Tejun Heo   writeback: remove...
32
   * bdi_lock protects updates to bdi_list. bdi_list has RCU reader side
cfc4ba536   Jens Axboe   writeback: use RC...
33
34
   * locking.
   */
03ba3782e   Jens Axboe   writeback: switch...
35
  DEFINE_SPINLOCK(bdi_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
36
  LIST_HEAD(bdi_list);
03ba3782e   Jens Axboe   writeback: switch...
37

839a8e866   Tejun Heo   writeback: replac...
38
39
  /* bdi_wq serves all asynchronous writeback tasks */
  struct workqueue_struct *bdi_wq;
018a17bdc   Tejun Heo   bdi: reimplement ...
40
  static void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
f758eeabe   Christoph Hellwig   writeback: split ...
41
42
43
44
45
46
47
48
49
  {
  	if (wb1 < wb2) {
  		spin_lock(&wb1->list_lock);
  		spin_lock_nested(&wb2->list_lock, 1);
  	} else {
  		spin_lock(&wb2->list_lock);
  		spin_lock_nested(&wb1->list_lock, 1);
  	}
  }
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
50
51
52
53
54
55
56
57
58
59
60
61
62
63
  #ifdef CONFIG_DEBUG_FS
  #include <linux/debugfs.h>
  #include <linux/seq_file.h>
  
  static struct dentry *bdi_debug_root;
  
  static void bdi_debug_init(void)
  {
  	bdi_debug_root = debugfs_create_dir("bdi", NULL);
  }
  
  static int bdi_debug_stats_show(struct seq_file *m, void *v)
  {
  	struct backing_dev_info *bdi = m->private;
c1955ce32   Christoph Hellwig   writeback: remove...
64
  	struct bdi_writeback *wb = &bdi->wb;
364aeb284   David Rientjes   mm: change dirty ...
65
66
67
  	unsigned long background_thresh;
  	unsigned long dirty_thresh;
  	unsigned long bdi_thresh;
345227d70   Gustavo Padovan   backing-dev: Kill...
68
  	unsigned long nr_dirty, nr_io, nr_more_io;
f09b00d3e   Jens Axboe   writeback: add so...
69
  	struct inode *inode;
345227d70   Gustavo Padovan   backing-dev: Kill...
70
  	nr_dirty = nr_io = nr_more_io = 0;
f758eeabe   Christoph Hellwig   writeback: split ...
71
  	spin_lock(&wb->list_lock);
7ccf19a80   Nick Piggin   fs: inode split I...
72
  	list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
c1955ce32   Christoph Hellwig   writeback: remove...
73
  		nr_dirty++;
7ccf19a80   Nick Piggin   fs: inode split I...
74
  	list_for_each_entry(inode, &wb->b_io, i_wb_list)
c1955ce32   Christoph Hellwig   writeback: remove...
75
  		nr_io++;
7ccf19a80   Nick Piggin   fs: inode split I...
76
  	list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
c1955ce32   Christoph Hellwig   writeback: remove...
77
  		nr_more_io++;
f758eeabe   Christoph Hellwig   writeback: split ...
78
  	spin_unlock(&wb->list_lock);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
79

16c4042f0   Wu Fengguang   writeback: avoid ...
80
81
  	global_dirty_limits(&background_thresh, &dirty_thresh);
  	bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
82
83
84
  
  #define K(x) ((x) << (PAGE_SHIFT - 10))
  	seq_printf(m,
00821b002   Wu Fengguang   writeback: show b...
85
86
87
88
89
90
91
92
93
94
  		   "BdiWriteback:       %10lu kB
  "
  		   "BdiReclaimable:     %10lu kB
  "
  		   "BdiDirtyThresh:     %10lu kB
  "
  		   "DirtyThresh:        %10lu kB
  "
  		   "BackgroundThresh:   %10lu kB
  "
c8e28ce04   Wu Fengguang   writeback: accoun...
95
96
  		   "BdiDirtied:         %10lu kB
  "
00821b002   Wu Fengguang   writeback: show b...
97
98
99
100
101
102
103
104
105
106
107
108
109
110
  		   "BdiWritten:         %10lu kB
  "
  		   "BdiWriteBandwidth:  %10lu kBps
  "
  		   "b_dirty:            %10lu
  "
  		   "b_io:               %10lu
  "
  		   "b_more_io:          %10lu
  "
  		   "bdi_list:           %10u
  "
  		   "state:              %10lx
  ",
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
111
112
  		   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
  		   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
f7d2b1ecd   Jan Kara   writeback: accoun...
113
114
115
  		   K(bdi_thresh),
  		   K(dirty_thresh),
  		   K(background_thresh),
c8e28ce04   Wu Fengguang   writeback: accoun...
116
  		   (unsigned long) K(bdi_stat(bdi, BDI_DIRTIED)),
f7d2b1ecd   Jan Kara   writeback: accoun...
117
  		   (unsigned long) K(bdi_stat(bdi, BDI_WRITTEN)),
00821b002   Wu Fengguang   writeback: show b...
118
  		   (unsigned long) K(bdi->write_bandwidth),
f7d2b1ecd   Jan Kara   writeback: accoun...
119
120
121
  		   nr_dirty,
  		   nr_io,
  		   nr_more_io,
c1955ce32   Christoph Hellwig   writeback: remove...
122
  		   !list_empty(&bdi->bdi_list), bdi->state);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
  #undef K
  
  	return 0;
  }
  
  static int bdi_debug_stats_open(struct inode *inode, struct file *file)
  {
  	return single_open(file, bdi_debug_stats_show, inode->i_private);
  }
  
  static const struct file_operations bdi_debug_stats_fops = {
  	.open		= bdi_debug_stats_open,
  	.read		= seq_read,
  	.llseek		= seq_lseek,
  	.release	= single_release,
  };
  
  static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
  {
  	bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
  	bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir,
  					       bdi, &bdi_debug_stats_fops);
  }
  
  static void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  	debugfs_remove(bdi->debug_stats);
  	debugfs_remove(bdi->debug_dir);
  }
  #else
  static inline void bdi_debug_init(void)
  {
  }
  static inline void bdi_debug_register(struct backing_dev_info *bdi,
  				      const char *name)
  {
  }
  static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  }
  #endif
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
164
165
166
167
168
  static ssize_t read_ahead_kb_store(struct device *dev,
  				  struct device_attribute *attr,
  				  const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
169
  	unsigned long read_ahead_kb;
7034ed132   Namjae Jeon   backing-dev: use ...
170
  	ssize_t ret;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
171

7034ed132   Namjae Jeon   backing-dev: use ...
172
173
174
175
176
177
178
  	ret = kstrtoul(buf, 10, &read_ahead_kb);
  	if (ret < 0)
  		return ret;
  
  	bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10);
  
  	return count;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
179
180
181
182
183
184
185
186
187
188
189
190
  }
  
  #define K(pages) ((pages) << (PAGE_SHIFT - 10))
  
  #define BDI_SHOW(name, expr)						\
  static ssize_t name##_show(struct device *dev,				\
  			   struct device_attribute *attr, char *page)	\
  {									\
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);		\
  									\
  	return snprintf(page, PAGE_SIZE-1, "%lld
  ", (long long)expr);	\
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
191
192
  }									\
  static DEVICE_ATTR_RW(name);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
193
194
  
  BDI_SHOW(read_ahead_kb, K(bdi->ra_pages))
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
195
196
197
198
  static ssize_t min_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
199
  	unsigned int ratio;
7034ed132   Namjae Jeon   backing-dev: use ...
200
201
202
203
204
205
206
207
208
  	ssize_t ret;
  
  	ret = kstrtouint(buf, 10, &ratio);
  	if (ret < 0)
  		return ret;
  
  	ret = bdi_set_min_ratio(bdi, ratio);
  	if (!ret)
  		ret = count;
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
209

189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
210
211
212
  	return ret;
  }
  BDI_SHOW(min_ratio, bdi->min_ratio)
a42dde041   Peter Zijlstra   mm: bdi: allow se...
213
214
215
216
  static ssize_t max_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
a42dde041   Peter Zijlstra   mm: bdi: allow se...
217
  	unsigned int ratio;
7034ed132   Namjae Jeon   backing-dev: use ...
218
219
220
221
222
223
224
225
226
  	ssize_t ret;
  
  	ret = kstrtouint(buf, 10, &ratio);
  	if (ret < 0)
  		return ret;
  
  	ret = bdi_set_max_ratio(bdi, ratio);
  	if (!ret)
  		ret = count;
a42dde041   Peter Zijlstra   mm: bdi: allow se...
227

a42dde041   Peter Zijlstra   mm: bdi: allow se...
228
229
230
  	return ret;
  }
  BDI_SHOW(max_ratio, bdi->max_ratio)
7d311cdab   Darrick J. Wong   bdi: allow block ...
231
232
233
234
235
236
237
238
239
240
  static ssize_t stable_pages_required_show(struct device *dev,
  					  struct device_attribute *attr,
  					  char *page)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
  
  	return snprintf(page, PAGE_SIZE-1, "%d
  ",
  			bdi_cap_stable_pages_required(bdi) ? 1 : 0);
  }
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
241
242
243
244
245
246
247
248
  static DEVICE_ATTR_RO(stable_pages_required);
  
  static struct attribute *bdi_dev_attrs[] = {
  	&dev_attr_read_ahead_kb.attr,
  	&dev_attr_min_ratio.attr,
  	&dev_attr_max_ratio.attr,
  	&dev_attr_stable_pages_required.attr,
  	NULL,
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
249
  };
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
250
  ATTRIBUTE_GROUPS(bdi_dev);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
251
252
253
254
  
  static __init int bdi_class_init(void)
  {
  	bdi_class = class_create(THIS_MODULE, "bdi");
144214537   Anton Blanchard   backing-dev: Hand...
255
256
  	if (IS_ERR(bdi_class))
  		return PTR_ERR(bdi_class);
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
257
  	bdi_class->dev_groups = bdi_dev_groups;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
258
  	bdi_debug_init();
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
259
260
  	return 0;
  }
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
261
  postcore_initcall(bdi_class_init);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
262

26160158d   Jens Axboe   Move the default_...
263
264
265
  static int __init default_bdi_init(void)
  {
  	int err;
839a8e866   Tejun Heo   writeback: replac...
266
  	bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
b5c872ddb   Tejun Heo   writeback: expose...
267
  					      WQ_UNBOUND | WQ_SYSFS, 0);
839a8e866   Tejun Heo   writeback: replac...
268
269
  	if (!bdi_wq)
  		return -ENOMEM;
26160158d   Jens Axboe   Move the default_...
270
271
272
  	err = bdi_init(&default_backing_dev_info);
  	if (!err)
  		bdi_register(&default_backing_dev_info, NULL, "default");
976e48f8a   Jan Kara   bdi: Initialize n...
273
  	err = bdi_init(&noop_backing_dev_info);
26160158d   Jens Axboe   Move the default_...
274
275
276
277
  
  	return err;
  }
  subsys_initcall(default_bdi_init);
03ba3782e   Jens Axboe   writeback: switch...
278
279
280
281
  int bdi_has_dirty_io(struct backing_dev_info *bdi)
  {
  	return wb_has_dirty_io(&bdi->wb);
  }
6467716a3   Artem Bityutskiy   writeback: optimi...
282
283
284
285
286
287
288
289
290
291
  /*
   * This function is used when the first inode for this bdi is marked dirty. It
   * wakes-up the corresponding bdi thread which should then take care of the
   * periodic background write-out of dirty inodes. Since the write-out would
   * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
   * set up a timer which wakes the bdi thread up later.
   *
   * Note, we wouldn't bother setting up the timer, but this function is on the
   * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
   * by delaying the wake-up.
6ca738d60   Derek Basehore   backing_dev: fix ...
292
293
294
   *
   * We have to be careful not to postpone flush work if it is scheduled for
   * earlier. Thus we use queue_delayed_work().
6467716a3   Artem Bityutskiy   writeback: optimi...
295
296
297
298
299
300
   */
  void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
  {
  	unsigned long timeout;
  
  	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
5acda9d12   Jan Kara   bdi: avoid oops o...
301
302
303
304
  	spin_lock_bh(&bdi->wb_lock);
  	if (test_bit(BDI_registered, &bdi->state))
  		queue_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);
  	spin_unlock_bh(&bdi->wb_lock);
03ba3782e   Jens Axboe   writeback: switch...
305
  }
cfc4ba536   Jens Axboe   writeback: use RC...
306
307
308
309
310
311
312
313
  /*
   * Remove bdi from bdi_list, and ensure that it is no longer visible
   */
  static void bdi_remove_from_list(struct backing_dev_info *bdi)
  {
  	spin_lock_bh(&bdi_lock);
  	list_del_rcu(&bdi->bdi_list);
  	spin_unlock_bh(&bdi_lock);
ef3230880   Mikulas Patocka   backing-dev: use ...
314
  	synchronize_rcu_expedited();
cfc4ba536   Jens Axboe   writeback: use RC...
315
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
316
317
318
  int bdi_register(struct backing_dev_info *bdi, struct device *parent,
  		const char *fmt, ...)
  {
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
319
  	va_list args;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
320
  	struct device *dev;
69fc208be   Andrew Morton   mm/backing-dev.c:...
321
  	if (bdi->dev)	/* The driver needs to use separate queues per device */
c284de61d   Artem Bityutskiy   writeback: cleanu...
322
  		return 0;
f1d0b063d   Kay Sievers   bdi: register sys...
323

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
324
  	va_start(args, fmt);
19051c503   Greg Kroah-Hartman   mm: bdi: fix race...
325
  	dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
326
  	va_end(args);
c284de61d   Artem Bityutskiy   writeback: cleanu...
327
328
  	if (IS_ERR(dev))
  		return PTR_ERR(dev);
66f3b8e2e   Jens Axboe   writeback: move d...
329

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
330
  	bdi->dev = dev;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
331

03ba3782e   Jens Axboe   writeback: switch...
332
  	bdi_debug_register(bdi, dev_name(dev));
500b067c5   Jens Axboe   writeback: check ...
333
  	set_bit(BDI_registered, &bdi->state);
c284de61d   Artem Bityutskiy   writeback: cleanu...
334
335
336
337
  
  	spin_lock_bh(&bdi_lock);
  	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
  	spin_unlock_bh(&bdi_lock);
455b28646   Dave Chinner   writeback: Initia...
338
  	trace_writeback_bdi_register(bdi);
c284de61d   Artem Bityutskiy   writeback: cleanu...
339
  	return 0;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
340
341
342
343
344
345
346
347
  }
  EXPORT_SYMBOL(bdi_register);
  
  int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
  {
  	return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev));
  }
  EXPORT_SYMBOL(bdi_register_dev);
03ba3782e   Jens Axboe   writeback: switch...
348
349
350
351
  /*
   * Remove bdi from the global list and shutdown any threads we have running
   */
  static void bdi_wb_shutdown(struct backing_dev_info *bdi)
66f3b8e2e   Jens Axboe   writeback: move d...
352
  {
03ba3782e   Jens Axboe   writeback: switch...
353
354
355
356
  	if (!bdi_cap_writeback_dirty(bdi))
  		return;
  
  	/*
fff5b85aa   Artem Bityutskiy   writeback: move b...
357
  	 * Make sure nobody finds us on the bdi_list anymore
03ba3782e   Jens Axboe   writeback: switch...
358
  	 */
fff5b85aa   Artem Bityutskiy   writeback: move b...
359
  	bdi_remove_from_list(bdi);
03ba3782e   Jens Axboe   writeback: switch...
360

5acda9d12   Jan Kara   bdi: avoid oops o...
361
362
363
364
  	/* Make sure nobody queues further work */
  	spin_lock_bh(&bdi->wb_lock);
  	clear_bit(BDI_registered, &bdi->state);
  	spin_unlock_bh(&bdi->wb_lock);
03ba3782e   Jens Axboe   writeback: switch...
365
  	/*
839a8e866   Tejun Heo   writeback: replac...
366
367
368
  	 * Drain work list and shutdown the delayed_work.  At this point,
  	 * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi
  	 * is dying and its work_list needs to be drained no matter what.
03ba3782e   Jens Axboe   writeback: switch...
369
  	 */
839a8e866   Tejun Heo   writeback: replac...
370
371
372
  	mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
  	flush_delayed_work(&bdi->wb.dwork);
  	WARN_ON(!list_empty(&bdi->work_list));
c0ea1c22b   Tejun Heo   bdi: make backing...
373
  	WARN_ON(delayed_work_pending(&bdi->wb.dwork));
66f3b8e2e   Jens Axboe   writeback: move d...
374
  }
592b09a42   Jens Axboe   backing-dev: ensu...
375
376
377
378
379
380
381
382
383
384
  /*
   * This bdi is going away now, make sure that no super_blocks point to it
   */
  static void bdi_prune_sb(struct backing_dev_info *bdi)
  {
  	struct super_block *sb;
  
  	spin_lock(&sb_lock);
  	list_for_each_entry(sb, &super_blocks, s_list) {
  		if (sb->s_bdi == bdi)
95f28604a   Jens Axboe   fs: assign sb->s_...
385
  			sb->s_bdi = &default_backing_dev_info;
592b09a42   Jens Axboe   backing-dev: ensu...
386
387
388
  	}
  	spin_unlock(&sb_lock);
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
389
390
  void bdi_unregister(struct backing_dev_info *bdi)
  {
b68757341   Tejun Heo   bdi: remove bdi->...
391
  	if (bdi->dev) {
ccb6108f5   Peter Zijlstra   mm/backing-dev.c:...
392
  		bdi_set_min_ratio(bdi, 0);
455b28646   Dave Chinner   writeback: Initia...
393
  		trace_writeback_bdi_unregister(bdi);
8c4db3355   Jens Axboe   backing-dev: bdi ...
394
  		bdi_prune_sb(bdi);
839a8e866   Tejun Heo   writeback: replac...
395
  		bdi_wb_shutdown(bdi);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
396
  		bdi_debug_unregister(bdi);
b68757341   Tejun Heo   bdi: remove bdi->...
397
  		device_unregister(bdi->dev);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
398
399
400
401
  		bdi->dev = NULL;
  	}
  }
  EXPORT_SYMBOL(bdi_unregister);
3fcfab16c   Andrew Morton   [PATCH] separate ...
402

6467716a3   Artem Bityutskiy   writeback: optimi...
403
404
405
406
407
408
409
410
411
  static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
  {
  	memset(wb, 0, sizeof(*wb));
  
  	wb->bdi = bdi;
  	wb->last_old_flush = jiffies;
  	INIT_LIST_HEAD(&wb->b_dirty);
  	INIT_LIST_HEAD(&wb->b_io);
  	INIT_LIST_HEAD(&wb->b_more_io);
f758eeabe   Christoph Hellwig   writeback: split ...
412
  	spin_lock_init(&wb->list_lock);
839a8e866   Tejun Heo   writeback: replac...
413
  	INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
6467716a3   Artem Bityutskiy   writeback: optimi...
414
  }
e98be2d59   Wu Fengguang   writeback: bdi wr...
415
416
417
418
  /*
   * Initial write bandwidth: 100 MB/s
   */
  #define INIT_BW		(100 << (20 - PAGE_SHIFT))
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
419
420
  int bdi_init(struct backing_dev_info *bdi)
  {
03ba3782e   Jens Axboe   writeback: switch...
421
  	int i, err;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
422

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
423
  	bdi->dev = NULL;
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
424
  	bdi->min_ratio = 0;
a42dde041   Peter Zijlstra   mm: bdi: allow se...
425
  	bdi->max_ratio = 100;
eb608e3a3   Jan Kara   block: Convert BD...
426
  	bdi->max_prop_frac = FPROP_FRAC_BASE;
03ba3782e   Jens Axboe   writeback: switch...
427
  	spin_lock_init(&bdi->wb_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
428
  	INIT_LIST_HEAD(&bdi->bdi_list);
03ba3782e   Jens Axboe   writeback: switch...
429
430
431
  	INIT_LIST_HEAD(&bdi->work_list);
  
  	bdi_wb_init(&bdi->wb, bdi);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
432
  	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
908c7f194   Tejun Heo   percpu_counter: a...
433
  		err = percpu_counter_init(&bdi->bdi_stat[i], 0, GFP_KERNEL);
04fbfdc14   Peter Zijlstra   mm: per device di...
434
435
436
437
438
  		if (err)
  			goto err;
  	}
  
  	bdi->dirty_exceeded = 0;
e98be2d59   Wu Fengguang   writeback: bdi wr...
439
440
441
  
  	bdi->bw_time_stamp = jiffies;
  	bdi->written_stamp = 0;
7381131cb   Wu Fengguang   writeback: stabil...
442
  	bdi->balanced_dirty_ratelimit = INIT_BW;
be3ffa276   Wu Fengguang   writeback: dirty ...
443
  	bdi->dirty_ratelimit = INIT_BW;
e98be2d59   Wu Fengguang   writeback: bdi wr...
444
445
  	bdi->write_bandwidth = INIT_BW;
  	bdi->avg_write_bandwidth = INIT_BW;
20ae00792   Tejun Heo   proportions: add ...
446
  	err = fprop_local_init_percpu(&bdi->completions, GFP_KERNEL);
04fbfdc14   Peter Zijlstra   mm: per device di...
447
448
449
  
  	if (err) {
  err:
4b01a0b16   Denis Cheng   mm/backing-dev.c:...
450
  		while (i--)
04fbfdc14   Peter Zijlstra   mm: per device di...
451
  			percpu_counter_destroy(&bdi->bdi_stat[i]);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
452
453
454
455
456
457
458
459
460
  	}
  
  	return err;
  }
  EXPORT_SYMBOL(bdi_init);
  
  void bdi_destroy(struct backing_dev_info *bdi)
  {
  	int i;
ce5f8e779   Jens Axboe   writeback: splice...
461
  	/*
1a1e4530e   Tejun Heo   bdi: explain the ...
462
463
464
465
466
467
468
469
470
471
472
  	 * Splice our entries to the default_backing_dev_info.  This
  	 * condition shouldn't happen.  @wb must be empty at this point and
  	 * dirty inodes on it might cause other issues.  This workaround is
  	 * added by ce5f8e779519 ("writeback: splice dirty inode entries to
  	 * default bdi on bdi_destroy()") without root-causing the issue.
  	 *
  	 * http://lkml.kernel.org/g/1253038617-30204-11-git-send-email-jens.axboe@oracle.com
  	 * http://thread.gmane.org/gmane.linux.file-systems/35341/focus=35350
  	 *
  	 * We should probably add WARN_ON() to find out whether it still
  	 * happens and track it down if so.
ce5f8e779   Jens Axboe   writeback: splice...
473
474
475
  	 */
  	if (bdi_has_dirty_io(bdi)) {
  		struct bdi_writeback *dst = &default_backing_dev_info.wb;
f758eeabe   Christoph Hellwig   writeback: split ...
476
  		bdi_lock_two(&bdi->wb, dst);
ce5f8e779   Jens Axboe   writeback: splice...
477
478
479
  		list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
  		list_splice(&bdi->wb.b_io, &dst->b_io);
  		list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
f758eeabe   Christoph Hellwig   writeback: split ...
480
481
  		spin_unlock(&bdi->wb.list_lock);
  		spin_unlock(&dst->list_lock);
ce5f8e779   Jens Axboe   writeback: splice...
482
  	}
66f3b8e2e   Jens Axboe   writeback: move d...
483

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
484
  	bdi_unregister(bdi);
c0ea1c22b   Tejun Heo   bdi: make backing...
485
  	WARN_ON(delayed_work_pending(&bdi->wb.dwork));
7a401a972   Rabin Vincent   backing-dev: ensu...
486

b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
487
488
  	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
  		percpu_counter_destroy(&bdi->bdi_stat[i]);
04fbfdc14   Peter Zijlstra   mm: per device di...
489

eb608e3a3   Jan Kara   block: Convert BD...
490
  	fprop_local_destroy_percpu(&bdi->completions);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
491
492
  }
  EXPORT_SYMBOL(bdi_destroy);
c3c532061   Jens Axboe   bdi: add helper f...
493
494
495
496
497
498
499
  /*
   * For use from filesystems to quickly init and register a bdi associated
   * with dirty writeback
   */
  int bdi_setup_and_register(struct backing_dev_info *bdi, char *name,
  			   unsigned int cap)
  {
c3c532061   Jens Axboe   bdi: add helper f...
500
501
502
503
504
505
506
  	int err;
  
  	bdi->name = name;
  	bdi->capabilities = cap;
  	err = bdi_init(bdi);
  	if (err)
  		return err;
02aa2a376   Kees Cook   drivers: avoid fo...
507
508
  	err = bdi_register(bdi, NULL, "%.28s-%ld", name,
  			   atomic_long_inc_return(&bdi_seq));
c3c532061   Jens Axboe   bdi: add helper f...
509
510
511
512
513
514
515
516
  	if (err) {
  		bdi_destroy(bdi);
  		return err;
  	}
  
  	return 0;
  }
  EXPORT_SYMBOL(bdi_setup_and_register);
3fcfab16c   Andrew Morton   [PATCH] separate ...
517
518
519
520
  static wait_queue_head_t congestion_wqh[2] = {
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
  	};
0e093d997   Mel Gorman   writeback: do not...
521
  static atomic_t nr_bdi_congested[2];
3fcfab16c   Andrew Morton   [PATCH] separate ...
522

1faa16d22   Jens Axboe   block: change the...
523
  void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
524
525
  {
  	enum bdi_state bit;
1faa16d22   Jens Axboe   block: change the...
526
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
3fcfab16c   Andrew Morton   [PATCH] separate ...
527

1faa16d22   Jens Axboe   block: change the...
528
  	bit = sync ? BDI_sync_congested : BDI_async_congested;
0e093d997   Mel Gorman   writeback: do not...
529
530
  	if (test_and_clear_bit(bit, &bdi->state))
  		atomic_dec(&nr_bdi_congested[sync]);
4e857c58e   Peter Zijlstra   arch: Mass conver...
531
  	smp_mb__after_atomic();
3fcfab16c   Andrew Morton   [PATCH] separate ...
532
533
534
535
  	if (waitqueue_active(wqh))
  		wake_up(wqh);
  }
  EXPORT_SYMBOL(clear_bdi_congested);
1faa16d22   Jens Axboe   block: change the...
536
  void set_bdi_congested(struct backing_dev_info *bdi, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
537
538
  {
  	enum bdi_state bit;
1faa16d22   Jens Axboe   block: change the...
539
  	bit = sync ? BDI_sync_congested : BDI_async_congested;
0e093d997   Mel Gorman   writeback: do not...
540
541
  	if (!test_and_set_bit(bit, &bdi->state))
  		atomic_inc(&nr_bdi_congested[sync]);
3fcfab16c   Andrew Morton   [PATCH] separate ...
542
543
544
545
546
  }
  EXPORT_SYMBOL(set_bdi_congested);
  
  /**
   * congestion_wait - wait for a backing_dev to become uncongested
8aa7e847d   Jens Axboe   Fix congestion_wa...
547
   * @sync: SYNC or ASYNC IO
3fcfab16c   Andrew Morton   [PATCH] separate ...
548
549
550
551
552
553
   * @timeout: timeout in jiffies
   *
   * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
   * write congestion.  If no backing_devs are congested then just wait for the
   * next write to be completed.
   */
8aa7e847d   Jens Axboe   Fix congestion_wa...
554
  long congestion_wait(int sync, long timeout)
3fcfab16c   Andrew Morton   [PATCH] separate ...
555
556
  {
  	long ret;
52bb91986   Mel Gorman   writeback: accoun...
557
  	unsigned long start = jiffies;
3fcfab16c   Andrew Morton   [PATCH] separate ...
558
  	DEFINE_WAIT(wait);
8aa7e847d   Jens Axboe   Fix congestion_wa...
559
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
3fcfab16c   Andrew Morton   [PATCH] separate ...
560
561
562
563
  
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
52bb91986   Mel Gorman   writeback: accoun...
564
565
566
  
  	trace_writeback_congestion_wait(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
3fcfab16c   Andrew Morton   [PATCH] separate ...
567
568
569
  	return ret;
  }
  EXPORT_SYMBOL(congestion_wait);
04fbfdc14   Peter Zijlstra   mm: per device di...
570

0e093d997   Mel Gorman   writeback: do not...
571
572
573
574
575
576
577
578
579
580
581
  /**
   * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a zone to complete writes
   * @zone: A zone to check if it is heavily congested
   * @sync: SYNC or ASYNC IO
   * @timeout: timeout in jiffies
   *
   * In the event of a congested backing_dev (any backing_dev) and the given
   * @zone has experienced recent congestion, this waits for up to @timeout
   * jiffies for either a BDI to exit congestion of the given @sync queue
   * or a write to complete.
   *
25985edce   Lucas De Marchi   Fix common misspe...
582
   * In the absence of zone congestion, cond_resched() is called to yield
0e093d997   Mel Gorman   writeback: do not...
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
   * the processor if necessary but otherwise does not sleep.
   *
   * The return value is 0 if the sleep is for the full timeout. Otherwise,
   * it is the number of jiffies that were still remaining when the function
   * returned. return_value == timeout implies the function did not sleep.
   */
  long wait_iff_congested(struct zone *zone, int sync, long timeout)
  {
  	long ret;
  	unsigned long start = jiffies;
  	DEFINE_WAIT(wait);
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
  
  	/*
  	 * If there is no congestion, or heavy congestion is not being
  	 * encountered in the current zone, yield if necessary instead
  	 * of sleeping on the congestion queue
  	 */
  	if (atomic_read(&nr_bdi_congested[sync]) == 0 ||
570546517   Johannes Weiner   mm: clean up zone...
602
  	    !test_bit(ZONE_CONGESTED, &zone->flags)) {
0e093d997   Mel Gorman   writeback: do not...
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
  		cond_resched();
  
  		/* In case we scheduled, work out time remaining */
  		ret = timeout - (jiffies - start);
  		if (ret < 0)
  			ret = 0;
  
  		goto out;
  	}
  
  	/* Sleep until uncongested or a write happens */
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
  
  out:
  	trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
  
  	return ret;
  }
  EXPORT_SYMBOL(wait_iff_congested);
3965c9ae4   Wanpeng Li   mm: prepare for r...
625
626
627
628
629
630
  
  int pdflush_proc_obsolete(struct ctl_table *table, int write,
  			void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	char kbuf[] = "0
  ";
4c3bffc27   Chen Gang   mm/backing-dev.c:...
631
  	if (*ppos || *lenp < sizeof(kbuf)) {
3965c9ae4   Wanpeng Li   mm: prepare for r...
632
633
634
635
636
637
638
639
640
641
642
643
644
645
  		*lenp = 0;
  		return 0;
  	}
  
  	if (copy_to_user(buffer, kbuf, sizeof(kbuf)))
  		return -EFAULT;
  	printk_once(KERN_WARNING "%s exported in /proc is scheduled for removal
  ",
  			table->procname);
  
  	*lenp = 2;
  	*ppos += *lenp;
  	return 2;
  }