Blame view

mm/backing-dev.c 26.3 KB
3fcfab16c   Andrew Morton   [PATCH] separate ...
1
2
3
  
  #include <linux/wait.h>
  #include <linux/backing-dev.h>
03ba3782e   Jens Axboe   writeback: switch...
4
5
  #include <linux/kthread.h>
  #include <linux/freezer.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
6
  #include <linux/fs.h>
26160158d   Jens Axboe   Move the default_...
7
  #include <linux/pagemap.h>
03ba3782e   Jens Axboe   writeback: switch...
8
  #include <linux/mm.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
9
10
  #include <linux/sched.h>
  #include <linux/module.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
11
12
  #include <linux/writeback.h>
  #include <linux/device.h>
455b28646   Dave Chinner   writeback: Initia...
13
  #include <trace/events/writeback.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
14

c3c532061   Jens Axboe   bdi: add helper f...
15
  static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
5129a469a   Jörn Engel   Catch filesystems...
16
17
  struct backing_dev_info noop_backing_dev_info = {
  	.name		= "noop",
976e48f8a   Jan Kara   bdi: Initialize n...
18
  	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
5129a469a   Jörn Engel   Catch filesystems...
19
  };
a212b105b   Tejun Heo   bdi: make inode_t...
20
  EXPORT_SYMBOL_GPL(noop_backing_dev_info);
5129a469a   Jörn Engel   Catch filesystems...
21

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
22
  static struct class *bdi_class;
cfc4ba536   Jens Axboe   writeback: use RC...
23
24
  
  /*
181387da2   Tejun Heo   writeback: remove...
25
   * bdi_lock protects updates to bdi_list. bdi_list has RCU reader side
cfc4ba536   Jens Axboe   writeback: use RC...
26
27
   * locking.
   */
03ba3782e   Jens Axboe   writeback: switch...
28
  DEFINE_SPINLOCK(bdi_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
29
  LIST_HEAD(bdi_list);
03ba3782e   Jens Axboe   writeback: switch...
30

839a8e866   Tejun Heo   writeback: replac...
31
32
  /* bdi_wq serves all asynchronous writeback tasks */
  struct workqueue_struct *bdi_wq;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
33
34
35
36
37
38
39
40
41
42
43
44
45
46
  #ifdef CONFIG_DEBUG_FS
  #include <linux/debugfs.h>
  #include <linux/seq_file.h>
  
  static struct dentry *bdi_debug_root;
  
  static void bdi_debug_init(void)
  {
  	bdi_debug_root = debugfs_create_dir("bdi", NULL);
  }
  
  static int bdi_debug_stats_show(struct seq_file *m, void *v)
  {
  	struct backing_dev_info *bdi = m->private;
c1955ce32   Christoph Hellwig   writeback: remove...
47
  	struct bdi_writeback *wb = &bdi->wb;
364aeb284   David Rientjes   mm: change dirty ...
48
49
  	unsigned long background_thresh;
  	unsigned long dirty_thresh;
0d960a383   Tejun Heo   writeback: clean ...
50
  	unsigned long wb_thresh;
0ae45f63d   Theodore Ts'o   vfs: add support ...
51
  	unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
f09b00d3e   Jens Axboe   writeback: add so...
52
  	struct inode *inode;
0ae45f63d   Theodore Ts'o   vfs: add support ...
53
  	nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
f758eeabe   Christoph Hellwig   writeback: split ...
54
  	spin_lock(&wb->list_lock);
c7f540849   Dave Chinner   inode: rename i_w...
55
  	list_for_each_entry(inode, &wb->b_dirty, i_io_list)
c1955ce32   Christoph Hellwig   writeback: remove...
56
  		nr_dirty++;
c7f540849   Dave Chinner   inode: rename i_w...
57
  	list_for_each_entry(inode, &wb->b_io, i_io_list)
c1955ce32   Christoph Hellwig   writeback: remove...
58
  		nr_io++;
c7f540849   Dave Chinner   inode: rename i_w...
59
  	list_for_each_entry(inode, &wb->b_more_io, i_io_list)
c1955ce32   Christoph Hellwig   writeback: remove...
60
  		nr_more_io++;
c7f540849   Dave Chinner   inode: rename i_w...
61
  	list_for_each_entry(inode, &wb->b_dirty_time, i_io_list)
0ae45f63d   Theodore Ts'o   vfs: add support ...
62
63
  		if (inode->i_state & I_DIRTY_TIME)
  			nr_dirty_time++;
f758eeabe   Christoph Hellwig   writeback: split ...
64
  	spin_unlock(&wb->list_lock);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
65

16c4042f0   Wu Fengguang   writeback: avoid ...
66
  	global_dirty_limits(&background_thresh, &dirty_thresh);
0d960a383   Tejun Heo   writeback: clean ...
67
  	wb_thresh = wb_calc_thresh(wb, dirty_thresh);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
68
69
70
  
  #define K(x) ((x) << (PAGE_SHIFT - 10))
  	seq_printf(m,
00821b002   Wu Fengguang   writeback: show b...
71
72
73
74
75
76
77
78
79
80
  		   "BdiWriteback:       %10lu kB
  "
  		   "BdiReclaimable:     %10lu kB
  "
  		   "BdiDirtyThresh:     %10lu kB
  "
  		   "DirtyThresh:        %10lu kB
  "
  		   "BackgroundThresh:   %10lu kB
  "
c8e28ce04   Wu Fengguang   writeback: accoun...
81
82
  		   "BdiDirtied:         %10lu kB
  "
00821b002   Wu Fengguang   writeback: show b...
83
84
85
86
87
88
89
90
91
92
  		   "BdiWritten:         %10lu kB
  "
  		   "BdiWriteBandwidth:  %10lu kBps
  "
  		   "b_dirty:            %10lu
  "
  		   "b_io:               %10lu
  "
  		   "b_more_io:          %10lu
  "
0ae45f63d   Theodore Ts'o   vfs: add support ...
93
94
  		   "b_dirty_time:       %10lu
  "
00821b002   Wu Fengguang   writeback: show b...
95
96
97
98
  		   "bdi_list:           %10u
  "
  		   "state:              %10lx
  ",
93f78d882   Tejun Heo   writeback: move b...
99
100
  		   (unsigned long) K(wb_stat(wb, WB_WRITEBACK)),
  		   (unsigned long) K(wb_stat(wb, WB_RECLAIMABLE)),
0d960a383   Tejun Heo   writeback: clean ...
101
  		   K(wb_thresh),
f7d2b1ecd   Jan Kara   writeback: accoun...
102
103
  		   K(dirty_thresh),
  		   K(background_thresh),
93f78d882   Tejun Heo   writeback: move b...
104
105
  		   (unsigned long) K(wb_stat(wb, WB_DIRTIED)),
  		   (unsigned long) K(wb_stat(wb, WB_WRITTEN)),
a88a341a7   Tejun Heo   writeback: move b...
106
  		   (unsigned long) K(wb->write_bandwidth),
f7d2b1ecd   Jan Kara   writeback: accoun...
107
108
109
  		   nr_dirty,
  		   nr_io,
  		   nr_more_io,
0ae45f63d   Theodore Ts'o   vfs: add support ...
110
  		   nr_dirty_time,
4452226ea   Tejun Heo   writeback: move b...
111
  		   !list_empty(&bdi->bdi_list), bdi->wb.state);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
  #undef K
  
  	return 0;
  }
  
  static int bdi_debug_stats_open(struct inode *inode, struct file *file)
  {
  	return single_open(file, bdi_debug_stats_show, inode->i_private);
  }
  
  static const struct file_operations bdi_debug_stats_fops = {
  	.open		= bdi_debug_stats_open,
  	.read		= seq_read,
  	.llseek		= seq_lseek,
  	.release	= single_release,
  };
  
  static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
  {
  	bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
  	bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir,
  					       bdi, &bdi_debug_stats_fops);
  }
  
  static void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  	debugfs_remove(bdi->debug_stats);
  	debugfs_remove(bdi->debug_dir);
  }
  #else
  static inline void bdi_debug_init(void)
  {
  }
  static inline void bdi_debug_register(struct backing_dev_info *bdi,
  				      const char *name)
  {
  }
  static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  }
  #endif
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
153
154
155
156
157
  static ssize_t read_ahead_kb_store(struct device *dev,
  				  struct device_attribute *attr,
  				  const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
158
  	unsigned long read_ahead_kb;
7034ed132   Namjae Jeon   backing-dev: use ...
159
  	ssize_t ret;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
160

7034ed132   Namjae Jeon   backing-dev: use ...
161
162
163
164
165
166
167
  	ret = kstrtoul(buf, 10, &read_ahead_kb);
  	if (ret < 0)
  		return ret;
  
  	bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10);
  
  	return count;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
168
169
170
171
172
173
174
175
176
177
178
179
  }
  
  #define K(pages) ((pages) << (PAGE_SHIFT - 10))
  
  #define BDI_SHOW(name, expr)						\
  static ssize_t name##_show(struct device *dev,				\
  			   struct device_attribute *attr, char *page)	\
  {									\
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);		\
  									\
  	return snprintf(page, PAGE_SIZE-1, "%lld
  ", (long long)expr);	\
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
180
181
  }									\
  static DEVICE_ATTR_RW(name);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
182
183
  
  BDI_SHOW(read_ahead_kb, K(bdi->ra_pages))
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
184
185
186
187
  static ssize_t min_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
188
  	unsigned int ratio;
7034ed132   Namjae Jeon   backing-dev: use ...
189
190
191
192
193
194
195
196
197
  	ssize_t ret;
  
  	ret = kstrtouint(buf, 10, &ratio);
  	if (ret < 0)
  		return ret;
  
  	ret = bdi_set_min_ratio(bdi, ratio);
  	if (!ret)
  		ret = count;
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
198

189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
199
200
201
  	return ret;
  }
  BDI_SHOW(min_ratio, bdi->min_ratio)
a42dde041   Peter Zijlstra   mm: bdi: allow se...
202
203
204
205
  static ssize_t max_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
a42dde041   Peter Zijlstra   mm: bdi: allow se...
206
  	unsigned int ratio;
7034ed132   Namjae Jeon   backing-dev: use ...
207
208
209
210
211
212
213
214
215
  	ssize_t ret;
  
  	ret = kstrtouint(buf, 10, &ratio);
  	if (ret < 0)
  		return ret;
  
  	ret = bdi_set_max_ratio(bdi, ratio);
  	if (!ret)
  		ret = count;
a42dde041   Peter Zijlstra   mm: bdi: allow se...
216

a42dde041   Peter Zijlstra   mm: bdi: allow se...
217
218
219
  	return ret;
  }
  BDI_SHOW(max_ratio, bdi->max_ratio)
7d311cdab   Darrick J. Wong   bdi: allow block ...
220
221
222
223
224
225
226
227
228
229
  static ssize_t stable_pages_required_show(struct device *dev,
  					  struct device_attribute *attr,
  					  char *page)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
  
  	return snprintf(page, PAGE_SIZE-1, "%d
  ",
  			bdi_cap_stable_pages_required(bdi) ? 1 : 0);
  }
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
230
231
232
233
234
235
236
237
  static DEVICE_ATTR_RO(stable_pages_required);
  
  static struct attribute *bdi_dev_attrs[] = {
  	&dev_attr_read_ahead_kb.attr,
  	&dev_attr_min_ratio.attr,
  	&dev_attr_max_ratio.attr,
  	&dev_attr_stable_pages_required.attr,
  	NULL,
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
238
  };
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
239
  ATTRIBUTE_GROUPS(bdi_dev);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
240
241
242
243
  
  static __init int bdi_class_init(void)
  {
  	bdi_class = class_create(THIS_MODULE, "bdi");
144214537   Anton Blanchard   backing-dev: Hand...
244
245
  	if (IS_ERR(bdi_class))
  		return PTR_ERR(bdi_class);
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
246
  	bdi_class->dev_groups = bdi_dev_groups;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
247
  	bdi_debug_init();
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
248
249
  	return 0;
  }
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
250
  postcore_initcall(bdi_class_init);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
251

26160158d   Jens Axboe   Move the default_...
252
253
254
  static int __init default_bdi_init(void)
  {
  	int err;
839a8e866   Tejun Heo   writeback: replac...
255
  	bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
b5c872ddb   Tejun Heo   writeback: expose...
256
  					      WQ_UNBOUND | WQ_SYSFS, 0);
839a8e866   Tejun Heo   writeback: replac...
257
258
  	if (!bdi_wq)
  		return -ENOMEM;
976e48f8a   Jan Kara   bdi: Initialize n...
259
  	err = bdi_init(&noop_backing_dev_info);
26160158d   Jens Axboe   Move the default_...
260
261
262
263
  
  	return err;
  }
  subsys_initcall(default_bdi_init);
6467716a3   Artem Bityutskiy   writeback: optimi...
264
  /*
f0054bb1e   Tejun Heo   writeback: move b...
265
   * This function is used when the first inode for this wb is marked dirty. It
6467716a3   Artem Bityutskiy   writeback: optimi...
266
267
268
269
270
271
272
273
   * wakes-up the corresponding bdi thread which should then take care of the
   * periodic background write-out of dirty inodes. Since the write-out would
   * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
   * set up a timer which wakes the bdi thread up later.
   *
   * Note, we wouldn't bother setting up the timer, but this function is on the
   * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
   * by delaying the wake-up.
6ca738d60   Derek Basehore   backing_dev: fix ...
274
275
276
   *
   * We have to be careful not to postpone flush work if it is scheduled for
   * earlier. Thus we use queue_delayed_work().
6467716a3   Artem Bityutskiy   writeback: optimi...
277
   */
f0054bb1e   Tejun Heo   writeback: move b...
278
  void wb_wakeup_delayed(struct bdi_writeback *wb)
6467716a3   Artem Bityutskiy   writeback: optimi...
279
280
281
282
  {
  	unsigned long timeout;
  
  	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
f0054bb1e   Tejun Heo   writeback: move b...
283
284
285
286
  	spin_lock_bh(&wb->work_lock);
  	if (test_bit(WB_registered, &wb->state))
  		queue_delayed_work(bdi_wq, &wb->dwork, timeout);
  	spin_unlock_bh(&wb->work_lock);
03ba3782e   Jens Axboe   writeback: switch...
287
  }
cfc4ba536   Jens Axboe   writeback: use RC...
288
  /*
a88a341a7   Tejun Heo   writeback: move b...
289
   * Initial write bandwidth: 100 MB/s
cfc4ba536   Jens Axboe   writeback: use RC...
290
   */
a88a341a7   Tejun Heo   writeback: move b...
291
  #define INIT_BW		(100 << (20 - PAGE_SHIFT))
cfc4ba536   Jens Axboe   writeback: use RC...
292

8395cd9f8   Tejun Heo   writeback: add @g...
293
  static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
a13f35e87   Tejun Heo   writeback: don't ...
294
  		   int blkcg_id, gfp_t gfp)
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
295
  {
93f78d882   Tejun Heo   writeback: move b...
296
  	int i, err;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
297

6467716a3   Artem Bityutskiy   writeback: optimi...
298
  	memset(wb, 0, sizeof(*wb));
f1d0b063d   Kay Sievers   bdi: register sys...
299

6467716a3   Artem Bityutskiy   writeback: optimi...
300
301
302
303
304
  	wb->bdi = bdi;
  	wb->last_old_flush = jiffies;
  	INIT_LIST_HEAD(&wb->b_dirty);
  	INIT_LIST_HEAD(&wb->b_io);
  	INIT_LIST_HEAD(&wb->b_more_io);
0ae45f63d   Theodore Ts'o   vfs: add support ...
305
  	INIT_LIST_HEAD(&wb->b_dirty_time);
f758eeabe   Christoph Hellwig   writeback: split ...
306
  	spin_lock_init(&wb->list_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
307

a88a341a7   Tejun Heo   writeback: move b...
308
309
310
311
312
  	wb->bw_time_stamp = jiffies;
  	wb->balanced_dirty_ratelimit = INIT_BW;
  	wb->dirty_ratelimit = INIT_BW;
  	wb->write_bandwidth = INIT_BW;
  	wb->avg_write_bandwidth = INIT_BW;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
313

f0054bb1e   Tejun Heo   writeback: move b...
314
315
316
  	spin_lock_init(&wb->work_lock);
  	INIT_LIST_HEAD(&wb->work_list);
  	INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
c284de61d   Artem Bityutskiy   writeback: cleanu...
317

a13f35e87   Tejun Heo   writeback: don't ...
318
319
320
  	wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
  	if (!wb->congested)
  		return -ENOMEM;
8395cd9f8   Tejun Heo   writeback: add @g...
321
  	err = fprop_local_init_percpu(&wb->completions, gfp);
a88a341a7   Tejun Heo   writeback: move b...
322
  	if (err)
a13f35e87   Tejun Heo   writeback: don't ...
323
  		goto out_put_cong;
c284de61d   Artem Bityutskiy   writeback: cleanu...
324

93f78d882   Tejun Heo   writeback: move b...
325
  	for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
8395cd9f8   Tejun Heo   writeback: add @g...
326
  		err = percpu_counter_init(&wb->stat[i], 0, gfp);
a13f35e87   Tejun Heo   writeback: don't ...
327
328
  		if (err)
  			goto out_destroy_stat;
93f78d882   Tejun Heo   writeback: move b...
329
  	}
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
330

93f78d882   Tejun Heo   writeback: move b...
331
  	return 0;
a13f35e87   Tejun Heo   writeback: don't ...
332
333
  
  out_destroy_stat:
078c6c3a5   Rasmus Villemoes   mm/backing-dev.c:...
334
  	while (i--)
a13f35e87   Tejun Heo   writeback: don't ...
335
336
337
338
339
  		percpu_counter_destroy(&wb->stat[i]);
  	fprop_local_destroy_percpu(&wb->completions);
  out_put_cong:
  	wb_congested_put(wb->congested);
  	return err;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
340
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
341

03ba3782e   Jens Axboe   writeback: switch...
342
343
344
  /*
   * Remove bdi from the global list and shutdown any threads we have running
   */
461000714   Tejun Heo   writeback: reorga...
345
  static void wb_shutdown(struct bdi_writeback *wb)
66f3b8e2e   Jens Axboe   writeback: move d...
346
  {
c4db59d31   Christoph Hellwig   fs: don't reassig...
347
  	/* Make sure nobody queues further work */
461000714   Tejun Heo   writeback: reorga...
348
349
350
  	spin_lock_bh(&wb->work_lock);
  	if (!test_and_clear_bit(WB_registered, &wb->state)) {
  		spin_unlock_bh(&wb->work_lock);
03ba3782e   Jens Axboe   writeback: switch...
351
  		return;
c4db59d31   Christoph Hellwig   fs: don't reassig...
352
  	}
461000714   Tejun Heo   writeback: reorga...
353
  	spin_unlock_bh(&wb->work_lock);
03ba3782e   Jens Axboe   writeback: switch...
354
355
  
  	/*
461000714   Tejun Heo   writeback: reorga...
356
357
358
  	 * Drain work list and shutdown the delayed_work.  !WB_registered
  	 * tells wb_workfn() that @wb is dying and its work_list needs to
  	 * be drained no matter what.
03ba3782e   Jens Axboe   writeback: switch...
359
  	 */
461000714   Tejun Heo   writeback: reorga...
360
361
362
363
  	mod_delayed_work(bdi_wq, &wb->dwork, 0);
  	flush_delayed_work(&wb->dwork);
  	WARN_ON(!list_empty(&wb->work_list));
  }
f0054bb1e   Tejun Heo   writeback: move b...
364
  static void wb_exit(struct bdi_writeback *wb)
93f78d882   Tejun Heo   writeback: move b...
365
366
367
368
369
370
371
  {
  	int i;
  
  	WARN_ON(delayed_work_pending(&wb->dwork));
  
  	for (i = 0; i < NR_WB_STAT_ITEMS; i++)
  		percpu_counter_destroy(&wb->stat[i]);
6467716a3   Artem Bityutskiy   writeback: optimi...
372

a88a341a7   Tejun Heo   writeback: move b...
373
  	fprop_local_destroy_percpu(&wb->completions);
a13f35e87   Tejun Heo   writeback: don't ...
374
  	wb_congested_put(wb->congested);
a88a341a7   Tejun Heo   writeback: move b...
375
  }
e98be2d59   Wu Fengguang   writeback: bdi wr...
376

52ebea749   Tejun Heo   writeback: make b...
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
  #ifdef CONFIG_CGROUP_WRITEBACK
  
  #include <linux/memcontrol.h>
  
  /*
   * cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree,
   * blkcg->cgwb_list, and memcg->cgwb_list.  bdi->cgwb_tree is also RCU
   * protected.  cgwb_release_wait is used to wait for the completion of cgwb
   * releases from bdi destruction path.
   */
  static DEFINE_SPINLOCK(cgwb_lock);
  static DECLARE_WAIT_QUEUE_HEAD(cgwb_release_wait);
  
  /**
   * wb_congested_get_create - get or create a wb_congested
   * @bdi: associated bdi
   * @blkcg_id: ID of the associated blkcg
   * @gfp: allocation mask
   *
   * Look up the wb_congested for @blkcg_id on @bdi.  If missing, create one.
   * The returned wb_congested has its reference count incremented.  Returns
   * NULL on failure.
   */
  struct bdi_writeback_congested *
  wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
  {
  	struct bdi_writeback_congested *new_congested = NULL, *congested;
  	struct rb_node **node, *parent;
  	unsigned long flags;
52ebea749   Tejun Heo   writeback: make b...
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
  retry:
  	spin_lock_irqsave(&cgwb_lock, flags);
  
  	node = &bdi->cgwb_congested_tree.rb_node;
  	parent = NULL;
  
  	while (*node != NULL) {
  		parent = *node;
  		congested = container_of(parent, struct bdi_writeback_congested,
  					 rb_node);
  		if (congested->blkcg_id < blkcg_id)
  			node = &parent->rb_left;
  		else if (congested->blkcg_id > blkcg_id)
  			node = &parent->rb_right;
  		else
  			goto found;
  	}
  
  	if (new_congested) {
  		/* !found and storage for new one already allocated, insert */
  		congested = new_congested;
  		new_congested = NULL;
  		rb_link_node(&congested->rb_node, parent, node);
  		rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
52ebea749   Tejun Heo   writeback: make b...
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
  		goto found;
  	}
  
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  
  	/* allocate storage for new one and retry */
  	new_congested = kzalloc(sizeof(*new_congested), gfp);
  	if (!new_congested)
  		return NULL;
  
  	atomic_set(&new_congested->refcnt, 0);
  	new_congested->bdi = bdi;
  	new_congested->blkcg_id = blkcg_id;
  	goto retry;
  
  found:
  	atomic_inc(&congested->refcnt);
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	kfree(new_congested);
  	return congested;
  }
  
  /**
   * wb_congested_put - put a wb_congested
   * @congested: wb_congested to put
   *
   * Put @congested and destroy it if the refcnt reaches zero.
   */
  void wb_congested_put(struct bdi_writeback_congested *congested)
  {
52ebea749   Tejun Heo   writeback: make b...
460
  	unsigned long flags;
52ebea749   Tejun Heo   writeback: make b...
461
462
463
464
465
  	local_irq_save(flags);
  	if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
  		local_irq_restore(flags);
  		return;
  	}
a20135ffb   Tejun Heo   writeback: don't ...
466
467
468
469
470
471
  	/* bdi might already have been destroyed leaving @congested unlinked */
  	if (congested->bdi) {
  		rb_erase(&congested->rb_node,
  			 &congested->bdi->cgwb_congested_tree);
  		congested->bdi = NULL;
  	}
52ebea749   Tejun Heo   writeback: make b...
472
473
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	kfree(congested);
52ebea749   Tejun Heo   writeback: make b...
474
475
476
477
478
479
480
  }
  
  static void cgwb_release_workfn(struct work_struct *work)
  {
  	struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
  						release_work);
  	struct backing_dev_info *bdi = wb->bdi;
b817525a4   Tejun Heo   writeback: bdi_wr...
481
482
483
  	spin_lock_irq(&cgwb_lock);
  	list_del_rcu(&wb->bdi_node);
  	spin_unlock_irq(&cgwb_lock);
52ebea749   Tejun Heo   writeback: make b...
484
485
486
487
  	wb_shutdown(wb);
  
  	css_put(wb->memcg_css);
  	css_put(wb->blkcg_css);
52ebea749   Tejun Heo   writeback: make b...
488

841710aa6   Tejun Heo   writeback: implem...
489
  	fprop_local_destroy_percpu(&wb->memcg_completions);
52ebea749   Tejun Heo   writeback: make b...
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
  	percpu_ref_exit(&wb->refcnt);
  	wb_exit(wb);
  	kfree_rcu(wb, rcu);
  
  	if (atomic_dec_and_test(&bdi->usage_cnt))
  		wake_up_all(&cgwb_release_wait);
  }
  
  static void cgwb_release(struct percpu_ref *refcnt)
  {
  	struct bdi_writeback *wb = container_of(refcnt, struct bdi_writeback,
  						refcnt);
  	schedule_work(&wb->release_work);
  }
  
  static void cgwb_kill(struct bdi_writeback *wb)
  {
  	lockdep_assert_held(&cgwb_lock);
  
  	WARN_ON(!radix_tree_delete(&wb->bdi->cgwb_tree, wb->memcg_css->id));
  	list_del(&wb->memcg_node);
  	list_del(&wb->blkcg_node);
  	percpu_ref_kill(&wb->refcnt);
  }
  
  static int cgwb_create(struct backing_dev_info *bdi,
  		       struct cgroup_subsys_state *memcg_css, gfp_t gfp)
  {
  	struct mem_cgroup *memcg;
  	struct cgroup_subsys_state *blkcg_css;
  	struct blkcg *blkcg;
  	struct list_head *memcg_cgwb_list, *blkcg_cgwb_list;
  	struct bdi_writeback *wb;
  	unsigned long flags;
  	int ret = 0;
  
  	memcg = mem_cgroup_from_css(memcg_css);
c165b3e3c   Tejun Heo   blkcg: rename sub...
527
  	blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
52ebea749   Tejun Heo   writeback: make b...
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
  	blkcg = css_to_blkcg(blkcg_css);
  	memcg_cgwb_list = mem_cgroup_cgwb_list(memcg);
  	blkcg_cgwb_list = &blkcg->cgwb_list;
  
  	/* look up again under lock and discard on blkcg mismatch */
  	spin_lock_irqsave(&cgwb_lock, flags);
  	wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
  	if (wb && wb->blkcg_css != blkcg_css) {
  		cgwb_kill(wb);
  		wb = NULL;
  	}
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	if (wb)
  		goto out_put;
  
  	/* need to create a new one */
  	wb = kmalloc(sizeof(*wb), gfp);
  	if (!wb)
  		return -ENOMEM;
a13f35e87   Tejun Heo   writeback: don't ...
547
  	ret = wb_init(wb, bdi, blkcg_css->id, gfp);
52ebea749   Tejun Heo   writeback: make b...
548
549
550
551
552
553
  	if (ret)
  		goto err_free;
  
  	ret = percpu_ref_init(&wb->refcnt, cgwb_release, 0, gfp);
  	if (ret)
  		goto err_wb_exit;
841710aa6   Tejun Heo   writeback: implem...
554
555
556
  	ret = fprop_local_init_percpu(&wb->memcg_completions, gfp);
  	if (ret)
  		goto err_ref_exit;
52ebea749   Tejun Heo   writeback: make b...
557
558
559
560
  	wb->memcg_css = memcg_css;
  	wb->blkcg_css = blkcg_css;
  	INIT_WORK(&wb->release_work, cgwb_release_workfn);
  	set_bit(WB_registered, &wb->state);
03ba3782e   Jens Axboe   writeback: switch...
561
562
  
  	/*
52ebea749   Tejun Heo   writeback: make b...
563
564
565
566
  	 * The root wb determines the registered state of the whole bdi and
  	 * memcg_cgwb_list and blkcg_cgwb_list's next pointers indicate
  	 * whether they're still online.  Don't link @wb if any is dead.
  	 * See wb_memcg_offline() and wb_blkcg_offline().
03ba3782e   Jens Axboe   writeback: switch...
567
  	 */
52ebea749   Tejun Heo   writeback: make b...
568
569
570
571
572
573
574
575
  	ret = -ENODEV;
  	spin_lock_irqsave(&cgwb_lock, flags);
  	if (test_bit(WB_registered, &bdi->wb.state) &&
  	    blkcg_cgwb_list->next && memcg_cgwb_list->next) {
  		/* we might have raced another instance of this function */
  		ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
  		if (!ret) {
  			atomic_inc(&bdi->usage_cnt);
b817525a4   Tejun Heo   writeback: bdi_wr...
576
  			list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
52ebea749   Tejun Heo   writeback: make b...
577
578
579
580
581
582
583
584
585
586
  			list_add(&wb->memcg_node, memcg_cgwb_list);
  			list_add(&wb->blkcg_node, blkcg_cgwb_list);
  			css_get(memcg_css);
  			css_get(blkcg_css);
  		}
  	}
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	if (ret) {
  		if (ret == -EEXIST)
  			ret = 0;
a13f35e87   Tejun Heo   writeback: don't ...
587
  		goto err_fprop_exit;
52ebea749   Tejun Heo   writeback: make b...
588
589
  	}
  	goto out_put;
841710aa6   Tejun Heo   writeback: implem...
590
591
  err_fprop_exit:
  	fprop_local_destroy_percpu(&wb->memcg_completions);
52ebea749   Tejun Heo   writeback: make b...
592
593
594
595
596
597
598
599
600
  err_ref_exit:
  	percpu_ref_exit(&wb->refcnt);
  err_wb_exit:
  	wb_exit(wb);
  err_free:
  	kfree(wb);
  out_put:
  	css_put(blkcg_css);
  	return ret;
66f3b8e2e   Jens Axboe   writeback: move d...
601
  }
52ebea749   Tejun Heo   writeback: make b...
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
  /**
   * wb_get_create - get wb for a given memcg, create if necessary
   * @bdi: target bdi
   * @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref)
   * @gfp: allocation mask to use
   *
   * Try to get the wb for @memcg_css on @bdi.  If it doesn't exist, try to
   * create one.  The returned wb has its refcount incremented.
   *
   * This function uses css_get() on @memcg_css and thus expects its refcnt
   * to be positive on invocation.  IOW, rcu_read_lock() protection on
   * @memcg_css isn't enough.  try_get it before calling this function.
   *
   * A wb is keyed by its associated memcg.  As blkcg implicitly enables
   * memcg on the default hierarchy, memcg association is guaranteed to be
   * more specific (equal or descendant to the associated blkcg) and thus can
   * identify both the memcg and blkcg associations.
   *
   * Because the blkcg associated with a memcg may change as blkcg is enabled
   * and disabled closer to root in the hierarchy, each wb keeps track of
   * both the memcg and blkcg associated with it and verifies the blkcg on
   * each lookup.  On mismatch, the existing wb is discarded and a new one is
   * created.
   */
  struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
  				    struct cgroup_subsys_state *memcg_css,
  				    gfp_t gfp)
6467716a3   Artem Bityutskiy   writeback: optimi...
629
  {
52ebea749   Tejun Heo   writeback: make b...
630
  	struct bdi_writeback *wb;
d0164adc8   Mel Gorman   mm, page_alloc: d...
631
  	might_sleep_if(gfpflags_allow_blocking(gfp));
52ebea749   Tejun Heo   writeback: make b...
632
633
634
635
636
637
638
639
640
641
642
643
  
  	if (!memcg_css->parent)
  		return &bdi->wb;
  
  	do {
  		rcu_read_lock();
  		wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
  		if (wb) {
  			struct cgroup_subsys_state *blkcg_css;
  
  			/* see whether the blkcg association has changed */
  			blkcg_css = cgroup_get_e_css(memcg_css->cgroup,
c165b3e3c   Tejun Heo   blkcg: rename sub...
644
  						     &io_cgrp_subsys);
52ebea749   Tejun Heo   writeback: make b...
645
646
647
648
649
650
651
652
653
654
  			if (unlikely(wb->blkcg_css != blkcg_css ||
  				     !wb_tryget(wb)))
  				wb = NULL;
  			css_put(blkcg_css);
  		}
  		rcu_read_unlock();
  	} while (!wb && !cgwb_create(bdi, memcg_css, gfp));
  
  	return wb;
  }
6467716a3   Artem Bityutskiy   writeback: optimi...
655

a13f35e87   Tejun Heo   writeback: don't ...
656
  static int cgwb_bdi_init(struct backing_dev_info *bdi)
52ebea749   Tejun Heo   writeback: make b...
657
  {
a13f35e87   Tejun Heo   writeback: don't ...
658
  	int ret;
52ebea749   Tejun Heo   writeback: make b...
659
660
661
  	INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
  	bdi->cgwb_congested_tree = RB_ROOT;
  	atomic_set(&bdi->usage_cnt, 1);
a13f35e87   Tejun Heo   writeback: don't ...
662
663
664
  
  	ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
  	if (!ret) {
7d828602e   Johannes Weiner   mm: memcontrol: e...
665
  		bdi->wb.memcg_css = &root_mem_cgroup->css;
a13f35e87   Tejun Heo   writeback: don't ...
666
667
668
  		bdi->wb.blkcg_css = blkcg_root_css;
  	}
  	return ret;
6467716a3   Artem Bityutskiy   writeback: optimi...
669
  }
52ebea749   Tejun Heo   writeback: make b...
670
671
672
  static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
  {
  	struct radix_tree_iter iter;
e27c5b9d2   Tejun Heo   writeback: remove...
673
  	struct rb_node *rbn;
52ebea749   Tejun Heo   writeback: make b...
674
675
676
677
678
  	void **slot;
  
  	WARN_ON(test_bit(WB_registered, &bdi->wb.state));
  
  	spin_lock_irq(&cgwb_lock);
a20135ffb   Tejun Heo   writeback: don't ...
679

52ebea749   Tejun Heo   writeback: make b...
680
681
  	radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
  		cgwb_kill(*slot);
a20135ffb   Tejun Heo   writeback: don't ...
682

e27c5b9d2   Tejun Heo   writeback: remove...
683
684
685
686
687
  	while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
  		struct bdi_writeback_congested *congested =
  			rb_entry(rbn, struct bdi_writeback_congested, rb_node);
  
  		rb_erase(rbn, &bdi->cgwb_congested_tree);
a20135ffb   Tejun Heo   writeback: don't ...
688
689
  		congested->bdi = NULL;	/* mark @congested unlinked */
  	}
52ebea749   Tejun Heo   writeback: make b...
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
  	spin_unlock_irq(&cgwb_lock);
  
  	/*
  	 * All cgwb's and their congested states must be shutdown and
  	 * released before returning.  Drain the usage counter to wait for
  	 * all cgwb's and cgwb_congested's ever created on @bdi.
  	 */
  	atomic_dec(&bdi->usage_cnt);
  	wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt));
  }
  
  /**
   * wb_memcg_offline - kill all wb's associated with a memcg being offlined
   * @memcg: memcg being offlined
   *
   * Also prevents creation of any new wb's associated with @memcg.
e98be2d59   Wu Fengguang   writeback: bdi wr...
706
   */
52ebea749   Tejun Heo   writeback: make b...
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
  void wb_memcg_offline(struct mem_cgroup *memcg)
  {
  	LIST_HEAD(to_destroy);
  	struct list_head *memcg_cgwb_list = mem_cgroup_cgwb_list(memcg);
  	struct bdi_writeback *wb, *next;
  
  	spin_lock_irq(&cgwb_lock);
  	list_for_each_entry_safe(wb, next, memcg_cgwb_list, memcg_node)
  		cgwb_kill(wb);
  	memcg_cgwb_list->next = NULL;	/* prevent new wb's */
  	spin_unlock_irq(&cgwb_lock);
  }
  
  /**
   * wb_blkcg_offline - kill all wb's associated with a blkcg being offlined
   * @blkcg: blkcg being offlined
   *
   * Also prevents creation of any new wb's associated with @blkcg.
   */
  void wb_blkcg_offline(struct blkcg *blkcg)
  {
  	LIST_HEAD(to_destroy);
  	struct bdi_writeback *wb, *next;
  
  	spin_lock_irq(&cgwb_lock);
  	list_for_each_entry_safe(wb, next, &blkcg->cgwb_list, blkcg_node)
  		cgwb_kill(wb);
  	blkcg->cgwb_list.next = NULL;	/* prevent new wb's */
  	spin_unlock_irq(&cgwb_lock);
  }
  
  #else	/* CONFIG_CGROUP_WRITEBACK */
a13f35e87   Tejun Heo   writeback: don't ...
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
  static int cgwb_bdi_init(struct backing_dev_info *bdi)
  {
  	int err;
  
  	bdi->wb_congested = kzalloc(sizeof(*bdi->wb_congested), GFP_KERNEL);
  	if (!bdi->wb_congested)
  		return -ENOMEM;
  
  	err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
  	if (err) {
  		kfree(bdi->wb_congested);
  		return err;
  	}
  	return 0;
  }
52ebea749   Tejun Heo   writeback: make b...
754
755
756
  static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { }
  
  #endif	/* CONFIG_CGROUP_WRITEBACK */
e98be2d59   Wu Fengguang   writeback: bdi wr...
757

b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
758
759
  int bdi_init(struct backing_dev_info *bdi)
  {
b817525a4   Tejun Heo   writeback: bdi_wr...
760
  	int ret;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
761
  	bdi->dev = NULL;
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
762
  	bdi->min_ratio = 0;
a42dde041   Peter Zijlstra   mm: bdi: allow se...
763
  	bdi->max_ratio = 100;
eb608e3a3   Jan Kara   block: Convert BD...
764
  	bdi->max_prop_frac = FPROP_FRAC_BASE;
66f3b8e2e   Jens Axboe   writeback: move d...
765
  	INIT_LIST_HEAD(&bdi->bdi_list);
b817525a4   Tejun Heo   writeback: bdi_wr...
766
  	INIT_LIST_HEAD(&bdi->wb_list);
cc395d7f1   Tejun Heo   writeback: implem...
767
  	init_waitqueue_head(&bdi->wb_waitq);
03ba3782e   Jens Axboe   writeback: switch...
768

b817525a4   Tejun Heo   writeback: bdi_wr...
769
770
771
772
773
  	ret = cgwb_bdi_init(bdi);
  
  	list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
  
  	return ret;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
774
775
  }
  EXPORT_SYMBOL(bdi_init);
e98be2d59   Wu Fengguang   writeback: bdi wr...
776

461000714   Tejun Heo   writeback: reorga...
777
778
779
780
781
  int bdi_register(struct backing_dev_info *bdi, struct device *parent,
  		const char *fmt, ...)
  {
  	va_list args;
  	struct device *dev;
e98be2d59   Wu Fengguang   writeback: bdi wr...
782

461000714   Tejun Heo   writeback: reorga...
783
784
  	if (bdi->dev)	/* The driver needs to use separate queues per device */
  		return 0;
e98be2d59   Wu Fengguang   writeback: bdi wr...
785

461000714   Tejun Heo   writeback: reorga...
786
787
788
789
790
  	va_start(args, fmt);
  	dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
  	va_end(args);
  	if (IS_ERR(dev))
  		return PTR_ERR(dev);
04fbfdc14   Peter Zijlstra   mm: per device di...
791

461000714   Tejun Heo   writeback: reorga...
792
  	bdi->dev = dev;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
793

461000714   Tejun Heo   writeback: reorga...
794
795
796
797
798
799
800
801
802
  	bdi_debug_register(bdi, dev_name(dev));
  	set_bit(WB_registered, &bdi->wb.state);
  
  	spin_lock_bh(&bdi_lock);
  	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
  	spin_unlock_bh(&bdi_lock);
  
  	trace_writeback_bdi_register(bdi);
  	return 0;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
803
  }
461000714   Tejun Heo   writeback: reorga...
804
  EXPORT_SYMBOL(bdi_register);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
805

461000714   Tejun Heo   writeback: reorga...
806
  int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
807
  {
461000714   Tejun Heo   writeback: reorga...
808
809
810
  	return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev));
  }
  EXPORT_SYMBOL(bdi_register_dev);
df08c32ce   Dan Williams   block: fix bdi vs...
811
812
813
814
815
816
817
818
819
820
821
822
823
  int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner)
  {
  	int rc;
  
  	rc = bdi_register(bdi, NULL, "%u:%u", MAJOR(owner->devt),
  			MINOR(owner->devt));
  	if (rc)
  		return rc;
  	bdi->owner = owner;
  	get_device(owner);
  	return 0;
  }
  EXPORT_SYMBOL(bdi_register_owner);
461000714   Tejun Heo   writeback: reorga...
824
825
826
827
828
829
830
831
  /*
   * Remove bdi from bdi_list, and ensure that it is no longer visible
   */
  static void bdi_remove_from_list(struct backing_dev_info *bdi)
  {
  	spin_lock_bh(&bdi_lock);
  	list_del_rcu(&bdi->bdi_list);
  	spin_unlock_bh(&bdi_lock);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
832

461000714   Tejun Heo   writeback: reorga...
833
834
  	synchronize_rcu_expedited();
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
835

b02176f30   Tejun Heo   block: don't rele...
836
  void bdi_unregister(struct backing_dev_info *bdi)
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
837
  {
f0054bb1e   Tejun Heo   writeback: move b...
838
839
840
  	/* make sure nobody finds us on the bdi_list anymore */
  	bdi_remove_from_list(bdi);
  	wb_shutdown(&bdi->wb);
52ebea749   Tejun Heo   writeback: make b...
841
  	cgwb_bdi_destroy(bdi);
7a401a972   Rabin Vincent   backing-dev: ensu...
842

c4db59d31   Christoph Hellwig   fs: don't reassig...
843
844
845
846
847
  	if (bdi->dev) {
  		bdi_debug_unregister(bdi);
  		device_unregister(bdi->dev);
  		bdi->dev = NULL;
  	}
df08c32ce   Dan Williams   block: fix bdi vs...
848
849
850
851
852
  
  	if (bdi->owner) {
  		put_device(bdi->owner);
  		bdi->owner = NULL;
  	}
b02176f30   Tejun Heo   block: don't rele...
853
  }
c4db59d31   Christoph Hellwig   fs: don't reassig...
854

b02176f30   Tejun Heo   block: don't rele...
855
856
857
  void bdi_exit(struct backing_dev_info *bdi)
  {
  	WARN_ON_ONCE(bdi->dev);
f0054bb1e   Tejun Heo   writeback: move b...
858
  	wb_exit(&bdi->wb);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
859
  }
b02176f30   Tejun Heo   block: don't rele...
860
861
862
863
864
865
  
  void bdi_destroy(struct backing_dev_info *bdi)
  {
  	bdi_unregister(bdi);
  	bdi_exit(bdi);
  }
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
866
  EXPORT_SYMBOL(bdi_destroy);
c3c532061   Jens Axboe   bdi: add helper f...
867
868
869
870
  /*
   * For use from filesystems to quickly init and register a bdi associated
   * with dirty writeback
   */
b4caecd48   Christoph Hellwig   fs: introduce f_o...
871
  int bdi_setup_and_register(struct backing_dev_info *bdi, char *name)
c3c532061   Jens Axboe   bdi: add helper f...
872
  {
c3c532061   Jens Axboe   bdi: add helper f...
873
874
875
  	int err;
  
  	bdi->name = name;
b4caecd48   Christoph Hellwig   fs: introduce f_o...
876
  	bdi->capabilities = 0;
c3c532061   Jens Axboe   bdi: add helper f...
877
878
879
  	err = bdi_init(bdi);
  	if (err)
  		return err;
02aa2a376   Kees Cook   drivers: avoid fo...
880
881
  	err = bdi_register(bdi, NULL, "%.28s-%ld", name,
  			   atomic_long_inc_return(&bdi_seq));
c3c532061   Jens Axboe   bdi: add helper f...
882
883
884
885
886
887
888
889
  	if (err) {
  		bdi_destroy(bdi);
  		return err;
  	}
  
  	return 0;
  }
  EXPORT_SYMBOL(bdi_setup_and_register);
3fcfab16c   Andrew Morton   [PATCH] separate ...
890
891
892
893
  static wait_queue_head_t congestion_wqh[2] = {
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
  	};
ec8a6f264   Tejun Heo   writeback: make c...
894
  static atomic_t nr_wb_congested[2];
3fcfab16c   Andrew Morton   [PATCH] separate ...
895

ec8a6f264   Tejun Heo   writeback: make c...
896
  void clear_wb_congested(struct bdi_writeback_congested *congested, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
897
  {
1faa16d22   Jens Axboe   block: change the...
898
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
c877ef8ae   Kaixu Xia   writeback: fix th...
899
  	enum wb_congested_state bit;
3fcfab16c   Andrew Morton   [PATCH] separate ...
900

4452226ea   Tejun Heo   writeback: move b...
901
  	bit = sync ? WB_sync_congested : WB_async_congested;
ec8a6f264   Tejun Heo   writeback: make c...
902
903
  	if (test_and_clear_bit(bit, &congested->state))
  		atomic_dec(&nr_wb_congested[sync]);
4e857c58e   Peter Zijlstra   arch: Mass conver...
904
  	smp_mb__after_atomic();
3fcfab16c   Andrew Morton   [PATCH] separate ...
905
906
907
  	if (waitqueue_active(wqh))
  		wake_up(wqh);
  }
ec8a6f264   Tejun Heo   writeback: make c...
908
  EXPORT_SYMBOL(clear_wb_congested);
3fcfab16c   Andrew Morton   [PATCH] separate ...
909

ec8a6f264   Tejun Heo   writeback: make c...
910
  void set_wb_congested(struct bdi_writeback_congested *congested, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
911
  {
c877ef8ae   Kaixu Xia   writeback: fix th...
912
  	enum wb_congested_state bit;
3fcfab16c   Andrew Morton   [PATCH] separate ...
913

4452226ea   Tejun Heo   writeback: move b...
914
  	bit = sync ? WB_sync_congested : WB_async_congested;
ec8a6f264   Tejun Heo   writeback: make c...
915
916
  	if (!test_and_set_bit(bit, &congested->state))
  		atomic_inc(&nr_wb_congested[sync]);
3fcfab16c   Andrew Morton   [PATCH] separate ...
917
  }
ec8a6f264   Tejun Heo   writeback: make c...
918
  EXPORT_SYMBOL(set_wb_congested);
3fcfab16c   Andrew Morton   [PATCH] separate ...
919
920
921
  
  /**
   * congestion_wait - wait for a backing_dev to become uncongested
8aa7e847d   Jens Axboe   Fix congestion_wa...
922
   * @sync: SYNC or ASYNC IO
3fcfab16c   Andrew Morton   [PATCH] separate ...
923
924
925
926
927
928
   * @timeout: timeout in jiffies
   *
   * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
   * write congestion.  If no backing_devs are congested then just wait for the
   * next write to be completed.
   */
8aa7e847d   Jens Axboe   Fix congestion_wa...
929
  long congestion_wait(int sync, long timeout)
3fcfab16c   Andrew Morton   [PATCH] separate ...
930
931
  {
  	long ret;
52bb91986   Mel Gorman   writeback: accoun...
932
  	unsigned long start = jiffies;
3fcfab16c   Andrew Morton   [PATCH] separate ...
933
  	DEFINE_WAIT(wait);
8aa7e847d   Jens Axboe   Fix congestion_wa...
934
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
3fcfab16c   Andrew Morton   [PATCH] separate ...
935
936
937
938
  
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
52bb91986   Mel Gorman   writeback: accoun...
939
940
941
  
  	trace_writeback_congestion_wait(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
3fcfab16c   Andrew Morton   [PATCH] separate ...
942
943
944
  	return ret;
  }
  EXPORT_SYMBOL(congestion_wait);
04fbfdc14   Peter Zijlstra   mm: per device di...
945

0e093d997   Mel Gorman   writeback: do not...
946
  /**
599d0c954   Mel Gorman   mm, vmscan: move ...
947
948
   * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a pgdat to complete writes
   * @pgdat: A pgdat to check if it is heavily congested
0e093d997   Mel Gorman   writeback: do not...
949
950
951
952
   * @sync: SYNC or ASYNC IO
   * @timeout: timeout in jiffies
   *
   * In the event of a congested backing_dev (any backing_dev) and the given
599d0c954   Mel Gorman   mm, vmscan: move ...
953
   * @pgdat has experienced recent congestion, this waits for up to @timeout
0e093d997   Mel Gorman   writeback: do not...
954
955
956
   * jiffies for either a BDI to exit congestion of the given @sync queue
   * or a write to complete.
   *
599d0c954   Mel Gorman   mm, vmscan: move ...
957
   * In the absence of pgdat congestion, cond_resched() is called to yield
ede377137   Michal Hocko   mm: throttle on I...
958
   * the processor if necessary but otherwise does not sleep.
0e093d997   Mel Gorman   writeback: do not...
959
960
961
962
963
   *
   * The return value is 0 if the sleep is for the full timeout. Otherwise,
   * it is the number of jiffies that were still remaining when the function
   * returned. return_value == timeout implies the function did not sleep.
   */
599d0c954   Mel Gorman   mm, vmscan: move ...
964
  long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout)
0e093d997   Mel Gorman   writeback: do not...
965
966
967
968
969
970
971
972
  {
  	long ret;
  	unsigned long start = jiffies;
  	DEFINE_WAIT(wait);
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
  
  	/*
  	 * If there is no congestion, or heavy congestion is not being
599d0c954   Mel Gorman   mm, vmscan: move ...
973
  	 * encountered in the current pgdat, yield if necessary instead
0e093d997   Mel Gorman   writeback: do not...
974
975
  	 * of sleeping on the congestion queue
  	 */
ec8a6f264   Tejun Heo   writeback: make c...
976
  	if (atomic_read(&nr_wb_congested[sync]) == 0 ||
599d0c954   Mel Gorman   mm, vmscan: move ...
977
  	    !test_bit(PGDAT_CONGESTED, &pgdat->flags)) {
ede377137   Michal Hocko   mm: throttle on I...
978
  		cond_resched();
599d0c954   Mel Gorman   mm, vmscan: move ...
979

0e093d997   Mel Gorman   writeback: do not...
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
  		/* In case we scheduled, work out time remaining */
  		ret = timeout - (jiffies - start);
  		if (ret < 0)
  			ret = 0;
  
  		goto out;
  	}
  
  	/* Sleep until uncongested or a write happens */
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
  
  out:
  	trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
  
  	return ret;
  }
  EXPORT_SYMBOL(wait_iff_congested);
3965c9ae4   Wanpeng Li   mm: prepare for r...
1000
1001
1002
1003
1004
1005
  
  int pdflush_proc_obsolete(struct ctl_table *table, int write,
  			void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	char kbuf[] = "0
  ";
4c3bffc27   Chen Gang   mm/backing-dev.c:...
1006
  	if (*ppos || *lenp < sizeof(kbuf)) {
3965c9ae4   Wanpeng Li   mm: prepare for r...
1007
1008
1009
1010
1011
1012
  		*lenp = 0;
  		return 0;
  	}
  
  	if (copy_to_user(buffer, kbuf, sizeof(kbuf)))
  		return -EFAULT;
1170532bb   Joe Perches   mm: convert print...
1013
1014
1015
  	pr_warn_once("%s exported in /proc is scheduled for removal
  ",
  		     table->procname);
3965c9ae4   Wanpeng Li   mm: prepare for r...
1016
1017
1018
1019
1020
  
  	*lenp = 2;
  	*ppos += *lenp;
  	return 2;
  }