Blame view

mm/backing-dev.c 27.7 KB
3fcfab16c   Andrew Morton   [PATCH] separate ...
1
2
3
  
  #include <linux/wait.h>
  #include <linux/backing-dev.h>
03ba3782e   Jens Axboe   writeback: switch...
4
5
  #include <linux/kthread.h>
  #include <linux/freezer.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
6
  #include <linux/fs.h>
26160158d   Jens Axboe   Move the default_...
7
  #include <linux/pagemap.h>
03ba3782e   Jens Axboe   writeback: switch...
8
  #include <linux/mm.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
9
10
  #include <linux/sched.h>
  #include <linux/module.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
11
12
  #include <linux/writeback.h>
  #include <linux/device.h>
455b28646   Dave Chinner   writeback: Initia...
13
  #include <trace/events/writeback.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
14

5129a469a   Jörn Engel   Catch filesystems...
15
16
  struct backing_dev_info noop_backing_dev_info = {
  	.name		= "noop",
976e48f8a   Jan Kara   bdi: Initialize n...
17
  	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
5129a469a   Jörn Engel   Catch filesystems...
18
  };
a212b105b   Tejun Heo   bdi: make inode_t...
19
  EXPORT_SYMBOL_GPL(noop_backing_dev_info);
5129a469a   Jörn Engel   Catch filesystems...
20

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
21
  static struct class *bdi_class;
cfc4ba536   Jens Axboe   writeback: use RC...
22
23
  
  /*
181387da2   Tejun Heo   writeback: remove...
24
   * bdi_lock protects updates to bdi_list. bdi_list has RCU reader side
cfc4ba536   Jens Axboe   writeback: use RC...
25
26
   * locking.
   */
03ba3782e   Jens Axboe   writeback: switch...
27
  DEFINE_SPINLOCK(bdi_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
28
  LIST_HEAD(bdi_list);
03ba3782e   Jens Axboe   writeback: switch...
29

839a8e866   Tejun Heo   writeback: replac...
30
31
  /* bdi_wq serves all asynchronous writeback tasks */
  struct workqueue_struct *bdi_wq;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
32
33
34
35
36
37
38
39
40
41
42
43
44
45
  #ifdef CONFIG_DEBUG_FS
  #include <linux/debugfs.h>
  #include <linux/seq_file.h>
  
  static struct dentry *bdi_debug_root;
  
  static void bdi_debug_init(void)
  {
  	bdi_debug_root = debugfs_create_dir("bdi", NULL);
  }
  
  static int bdi_debug_stats_show(struct seq_file *m, void *v)
  {
  	struct backing_dev_info *bdi = m->private;
c1955ce32   Christoph Hellwig   writeback: remove...
46
  	struct bdi_writeback *wb = &bdi->wb;
364aeb284   David Rientjes   mm: change dirty ...
47
48
  	unsigned long background_thresh;
  	unsigned long dirty_thresh;
0d960a383   Tejun Heo   writeback: clean ...
49
  	unsigned long wb_thresh;
0ae45f63d   Theodore Ts'o   vfs: add support ...
50
  	unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
f09b00d3e   Jens Axboe   writeback: add so...
51
  	struct inode *inode;
0ae45f63d   Theodore Ts'o   vfs: add support ...
52
  	nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
f758eeabe   Christoph Hellwig   writeback: split ...
53
  	spin_lock(&wb->list_lock);
c7f540849   Dave Chinner   inode: rename i_w...
54
  	list_for_each_entry(inode, &wb->b_dirty, i_io_list)
c1955ce32   Christoph Hellwig   writeback: remove...
55
  		nr_dirty++;
c7f540849   Dave Chinner   inode: rename i_w...
56
  	list_for_each_entry(inode, &wb->b_io, i_io_list)
c1955ce32   Christoph Hellwig   writeback: remove...
57
  		nr_io++;
c7f540849   Dave Chinner   inode: rename i_w...
58
  	list_for_each_entry(inode, &wb->b_more_io, i_io_list)
c1955ce32   Christoph Hellwig   writeback: remove...
59
  		nr_more_io++;
c7f540849   Dave Chinner   inode: rename i_w...
60
  	list_for_each_entry(inode, &wb->b_dirty_time, i_io_list)
0ae45f63d   Theodore Ts'o   vfs: add support ...
61
62
  		if (inode->i_state & I_DIRTY_TIME)
  			nr_dirty_time++;
f758eeabe   Christoph Hellwig   writeback: split ...
63
  	spin_unlock(&wb->list_lock);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
64

16c4042f0   Wu Fengguang   writeback: avoid ...
65
  	global_dirty_limits(&background_thresh, &dirty_thresh);
0d960a383   Tejun Heo   writeback: clean ...
66
  	wb_thresh = wb_calc_thresh(wb, dirty_thresh);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
67
68
69
  
  #define K(x) ((x) << (PAGE_SHIFT - 10))
  	seq_printf(m,
00821b002   Wu Fengguang   writeback: show b...
70
71
72
73
74
75
76
77
78
79
  		   "BdiWriteback:       %10lu kB
  "
  		   "BdiReclaimable:     %10lu kB
  "
  		   "BdiDirtyThresh:     %10lu kB
  "
  		   "DirtyThresh:        %10lu kB
  "
  		   "BackgroundThresh:   %10lu kB
  "
c8e28ce04   Wu Fengguang   writeback: accoun...
80
81
  		   "BdiDirtied:         %10lu kB
  "
00821b002   Wu Fengguang   writeback: show b...
82
83
84
85
86
87
88
89
90
91
  		   "BdiWritten:         %10lu kB
  "
  		   "BdiWriteBandwidth:  %10lu kBps
  "
  		   "b_dirty:            %10lu
  "
  		   "b_io:               %10lu
  "
  		   "b_more_io:          %10lu
  "
0ae45f63d   Theodore Ts'o   vfs: add support ...
92
93
  		   "b_dirty_time:       %10lu
  "
00821b002   Wu Fengguang   writeback: show b...
94
95
96
97
  		   "bdi_list:           %10u
  "
  		   "state:              %10lx
  ",
93f78d882   Tejun Heo   writeback: move b...
98
99
  		   (unsigned long) K(wb_stat(wb, WB_WRITEBACK)),
  		   (unsigned long) K(wb_stat(wb, WB_RECLAIMABLE)),
0d960a383   Tejun Heo   writeback: clean ...
100
  		   K(wb_thresh),
f7d2b1ecd   Jan Kara   writeback: accoun...
101
102
  		   K(dirty_thresh),
  		   K(background_thresh),
93f78d882   Tejun Heo   writeback: move b...
103
104
  		   (unsigned long) K(wb_stat(wb, WB_DIRTIED)),
  		   (unsigned long) K(wb_stat(wb, WB_WRITTEN)),
a88a341a7   Tejun Heo   writeback: move b...
105
  		   (unsigned long) K(wb->write_bandwidth),
f7d2b1ecd   Jan Kara   writeback: accoun...
106
107
108
  		   nr_dirty,
  		   nr_io,
  		   nr_more_io,
0ae45f63d   Theodore Ts'o   vfs: add support ...
109
  		   nr_dirty_time,
4452226ea   Tejun Heo   writeback: move b...
110
  		   !list_empty(&bdi->bdi_list), bdi->wb.state);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
  #undef K
  
  	return 0;
  }
  
  static int bdi_debug_stats_open(struct inode *inode, struct file *file)
  {
  	return single_open(file, bdi_debug_stats_show, inode->i_private);
  }
  
  static const struct file_operations bdi_debug_stats_fops = {
  	.open		= bdi_debug_stats_open,
  	.read		= seq_read,
  	.llseek		= seq_lseek,
  	.release	= single_release,
  };
  
  static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
  {
  	bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
  	bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir,
  					       bdi, &bdi_debug_stats_fops);
  }
  
  static void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  	debugfs_remove(bdi->debug_stats);
  	debugfs_remove(bdi->debug_dir);
  }
  #else
  static inline void bdi_debug_init(void)
  {
  }
  static inline void bdi_debug_register(struct backing_dev_info *bdi,
  				      const char *name)
  {
  }
  static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  }
  #endif
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
152
153
154
155
156
  static ssize_t read_ahead_kb_store(struct device *dev,
  				  struct device_attribute *attr,
  				  const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
157
  	unsigned long read_ahead_kb;
7034ed132   Namjae Jeon   backing-dev: use ...
158
  	ssize_t ret;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
159

7034ed132   Namjae Jeon   backing-dev: use ...
160
161
162
163
164
165
166
  	ret = kstrtoul(buf, 10, &read_ahead_kb);
  	if (ret < 0)
  		return ret;
  
  	bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10);
  
  	return count;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
167
168
169
170
171
172
173
174
175
176
177
178
  }
  
  #define K(pages) ((pages) << (PAGE_SHIFT - 10))
  
  #define BDI_SHOW(name, expr)						\
  static ssize_t name##_show(struct device *dev,				\
  			   struct device_attribute *attr, char *page)	\
  {									\
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);		\
  									\
  	return snprintf(page, PAGE_SIZE-1, "%lld
  ", (long long)expr);	\
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
179
180
  }									\
  static DEVICE_ATTR_RW(name);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
181
182
  
  BDI_SHOW(read_ahead_kb, K(bdi->ra_pages))
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
183
184
185
186
  static ssize_t min_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
187
  	unsigned int ratio;
7034ed132   Namjae Jeon   backing-dev: use ...
188
189
190
191
192
193
194
195
196
  	ssize_t ret;
  
  	ret = kstrtouint(buf, 10, &ratio);
  	if (ret < 0)
  		return ret;
  
  	ret = bdi_set_min_ratio(bdi, ratio);
  	if (!ret)
  		ret = count;
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
197

189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
198
199
200
  	return ret;
  }
  BDI_SHOW(min_ratio, bdi->min_ratio)
a42dde041   Peter Zijlstra   mm: bdi: allow se...
201
202
203
204
  static ssize_t max_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
a42dde041   Peter Zijlstra   mm: bdi: allow se...
205
  	unsigned int ratio;
7034ed132   Namjae Jeon   backing-dev: use ...
206
207
208
209
210
211
212
213
214
  	ssize_t ret;
  
  	ret = kstrtouint(buf, 10, &ratio);
  	if (ret < 0)
  		return ret;
  
  	ret = bdi_set_max_ratio(bdi, ratio);
  	if (!ret)
  		ret = count;
a42dde041   Peter Zijlstra   mm: bdi: allow se...
215

a42dde041   Peter Zijlstra   mm: bdi: allow se...
216
217
218
  	return ret;
  }
  BDI_SHOW(max_ratio, bdi->max_ratio)
7d311cdab   Darrick J. Wong   bdi: allow block ...
219
220
221
222
223
224
225
226
227
228
  static ssize_t stable_pages_required_show(struct device *dev,
  					  struct device_attribute *attr,
  					  char *page)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
  
  	return snprintf(page, PAGE_SIZE-1, "%d
  ",
  			bdi_cap_stable_pages_required(bdi) ? 1 : 0);
  }
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
229
230
231
232
233
234
235
236
  static DEVICE_ATTR_RO(stable_pages_required);
  
  static struct attribute *bdi_dev_attrs[] = {
  	&dev_attr_read_ahead_kb.attr,
  	&dev_attr_min_ratio.attr,
  	&dev_attr_max_ratio.attr,
  	&dev_attr_stable_pages_required.attr,
  	NULL,
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
237
  };
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
238
  ATTRIBUTE_GROUPS(bdi_dev);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
239
240
241
242
  
  static __init int bdi_class_init(void)
  {
  	bdi_class = class_create(THIS_MODULE, "bdi");
144214537   Anton Blanchard   backing-dev: Hand...
243
244
  	if (IS_ERR(bdi_class))
  		return PTR_ERR(bdi_class);
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
245
  	bdi_class->dev_groups = bdi_dev_groups;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
246
  	bdi_debug_init();
d03f6cdc1   Jan Kara   block: Dynamicall...
247

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
248
249
  	return 0;
  }
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
250
  postcore_initcall(bdi_class_init);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
251

2e82b84c0   Jan Kara   block: Remove unu...
252
  static int bdi_init(struct backing_dev_info *bdi);
26160158d   Jens Axboe   Move the default_...
253
254
255
  static int __init default_bdi_init(void)
  {
  	int err;
839a8e866   Tejun Heo   writeback: replac...
256
  	bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
b5c872ddb   Tejun Heo   writeback: expose...
257
  					      WQ_UNBOUND | WQ_SYSFS, 0);
839a8e866   Tejun Heo   writeback: replac...
258
259
  	if (!bdi_wq)
  		return -ENOMEM;
976e48f8a   Jan Kara   bdi: Initialize n...
260
  	err = bdi_init(&noop_backing_dev_info);
26160158d   Jens Axboe   Move the default_...
261
262
263
264
  
  	return err;
  }
  subsys_initcall(default_bdi_init);
6467716a3   Artem Bityutskiy   writeback: optimi...
265
  /*
f0054bb1e   Tejun Heo   writeback: move b...
266
   * This function is used when the first inode for this wb is marked dirty. It
6467716a3   Artem Bityutskiy   writeback: optimi...
267
268
269
270
271
272
273
274
   * wakes-up the corresponding bdi thread which should then take care of the
   * periodic background write-out of dirty inodes. Since the write-out would
   * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
   * set up a timer which wakes the bdi thread up later.
   *
   * Note, we wouldn't bother setting up the timer, but this function is on the
   * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
   * by delaying the wake-up.
6ca738d60   Derek Basehore   backing_dev: fix ...
275
276
277
   *
   * We have to be careful not to postpone flush work if it is scheduled for
   * earlier. Thus we use queue_delayed_work().
6467716a3   Artem Bityutskiy   writeback: optimi...
278
   */
f0054bb1e   Tejun Heo   writeback: move b...
279
  void wb_wakeup_delayed(struct bdi_writeback *wb)
6467716a3   Artem Bityutskiy   writeback: optimi...
280
281
282
283
  {
  	unsigned long timeout;
  
  	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
f0054bb1e   Tejun Heo   writeback: move b...
284
285
286
287
  	spin_lock_bh(&wb->work_lock);
  	if (test_bit(WB_registered, &wb->state))
  		queue_delayed_work(bdi_wq, &wb->dwork, timeout);
  	spin_unlock_bh(&wb->work_lock);
03ba3782e   Jens Axboe   writeback: switch...
288
  }
cfc4ba536   Jens Axboe   writeback: use RC...
289
  /*
a88a341a7   Tejun Heo   writeback: move b...
290
   * Initial write bandwidth: 100 MB/s
cfc4ba536   Jens Axboe   writeback: use RC...
291
   */
a88a341a7   Tejun Heo   writeback: move b...
292
  #define INIT_BW		(100 << (20 - PAGE_SHIFT))
cfc4ba536   Jens Axboe   writeback: use RC...
293

8395cd9f8   Tejun Heo   writeback: add @g...
294
  static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
a13f35e87   Tejun Heo   writeback: don't ...
295
  		   int blkcg_id, gfp_t gfp)
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
296
  {
93f78d882   Tejun Heo   writeback: move b...
297
  	int i, err;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
298

6467716a3   Artem Bityutskiy   writeback: optimi...
299
  	memset(wb, 0, sizeof(*wb));
f1d0b063d   Kay Sievers   bdi: register sys...
300

810df54a6   Jan Kara   bdi: Make wb->bdi...
301
302
  	if (wb != &bdi->wb)
  		bdi_get(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
303
304
305
306
307
  	wb->bdi = bdi;
  	wb->last_old_flush = jiffies;
  	INIT_LIST_HEAD(&wb->b_dirty);
  	INIT_LIST_HEAD(&wb->b_io);
  	INIT_LIST_HEAD(&wb->b_more_io);
0ae45f63d   Theodore Ts'o   vfs: add support ...
308
  	INIT_LIST_HEAD(&wb->b_dirty_time);
f758eeabe   Christoph Hellwig   writeback: split ...
309
  	spin_lock_init(&wb->list_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
310

a88a341a7   Tejun Heo   writeback: move b...
311
312
313
314
315
  	wb->bw_time_stamp = jiffies;
  	wb->balanced_dirty_ratelimit = INIT_BW;
  	wb->dirty_ratelimit = INIT_BW;
  	wb->write_bandwidth = INIT_BW;
  	wb->avg_write_bandwidth = INIT_BW;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
316

f0054bb1e   Tejun Heo   writeback: move b...
317
318
319
  	spin_lock_init(&wb->work_lock);
  	INIT_LIST_HEAD(&wb->work_list);
  	INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
b57d74aff   Jens Axboe   writeback: track ...
320
  	wb->dirty_sleep = jiffies;
c284de61d   Artem Bityutskiy   writeback: cleanu...
321

a13f35e87   Tejun Heo   writeback: don't ...
322
  	wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
810df54a6   Jan Kara   bdi: Make wb->bdi...
323
324
325
326
  	if (!wb->congested) {
  		err = -ENOMEM;
  		goto out_put_bdi;
  	}
a13f35e87   Tejun Heo   writeback: don't ...
327

8395cd9f8   Tejun Heo   writeback: add @g...
328
  	err = fprop_local_init_percpu(&wb->completions, gfp);
a88a341a7   Tejun Heo   writeback: move b...
329
  	if (err)
a13f35e87   Tejun Heo   writeback: don't ...
330
  		goto out_put_cong;
c284de61d   Artem Bityutskiy   writeback: cleanu...
331

93f78d882   Tejun Heo   writeback: move b...
332
  	for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
8395cd9f8   Tejun Heo   writeback: add @g...
333
  		err = percpu_counter_init(&wb->stat[i], 0, gfp);
a13f35e87   Tejun Heo   writeback: don't ...
334
335
  		if (err)
  			goto out_destroy_stat;
93f78d882   Tejun Heo   writeback: move b...
336
  	}
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
337

93f78d882   Tejun Heo   writeback: move b...
338
  	return 0;
a13f35e87   Tejun Heo   writeback: don't ...
339
340
  
  out_destroy_stat:
078c6c3a5   Rasmus Villemoes   mm/backing-dev.c:...
341
  	while (i--)
a13f35e87   Tejun Heo   writeback: don't ...
342
343
344
345
  		percpu_counter_destroy(&wb->stat[i]);
  	fprop_local_destroy_percpu(&wb->completions);
  out_put_cong:
  	wb_congested_put(wb->congested);
810df54a6   Jan Kara   bdi: Make wb->bdi...
346
347
348
  out_put_bdi:
  	if (wb != &bdi->wb)
  		bdi_put(bdi);
a13f35e87   Tejun Heo   writeback: don't ...
349
  	return err;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
350
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
351

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
352
  static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb);
03ba3782e   Jens Axboe   writeback: switch...
353
354
355
  /*
   * Remove bdi from the global list and shutdown any threads we have running
   */
461000714   Tejun Heo   writeback: reorga...
356
  static void wb_shutdown(struct bdi_writeback *wb)
66f3b8e2e   Jens Axboe   writeback: move d...
357
  {
c4db59d31   Christoph Hellwig   fs: don't reassig...
358
  	/* Make sure nobody queues further work */
461000714   Tejun Heo   writeback: reorga...
359
360
361
  	spin_lock_bh(&wb->work_lock);
  	if (!test_and_clear_bit(WB_registered, &wb->state)) {
  		spin_unlock_bh(&wb->work_lock);
03ba3782e   Jens Axboe   writeback: switch...
362
  		return;
c4db59d31   Christoph Hellwig   fs: don't reassig...
363
  	}
461000714   Tejun Heo   writeback: reorga...
364
  	spin_unlock_bh(&wb->work_lock);
03ba3782e   Jens Axboe   writeback: switch...
365

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
366
  	cgwb_remove_from_bdi_list(wb);
03ba3782e   Jens Axboe   writeback: switch...
367
  	/*
461000714   Tejun Heo   writeback: reorga...
368
369
370
  	 * Drain work list and shutdown the delayed_work.  !WB_registered
  	 * tells wb_workfn() that @wb is dying and its work_list needs to
  	 * be drained no matter what.
03ba3782e   Jens Axboe   writeback: switch...
371
  	 */
461000714   Tejun Heo   writeback: reorga...
372
373
374
375
  	mod_delayed_work(bdi_wq, &wb->dwork, 0);
  	flush_delayed_work(&wb->dwork);
  	WARN_ON(!list_empty(&wb->work_list));
  }
f0054bb1e   Tejun Heo   writeback: move b...
376
  static void wb_exit(struct bdi_writeback *wb)
93f78d882   Tejun Heo   writeback: move b...
377
378
379
380
381
382
383
  {
  	int i;
  
  	WARN_ON(delayed_work_pending(&wb->dwork));
  
  	for (i = 0; i < NR_WB_STAT_ITEMS; i++)
  		percpu_counter_destroy(&wb->stat[i]);
6467716a3   Artem Bityutskiy   writeback: optimi...
384

a88a341a7   Tejun Heo   writeback: move b...
385
  	fprop_local_destroy_percpu(&wb->completions);
a13f35e87   Tejun Heo   writeback: don't ...
386
  	wb_congested_put(wb->congested);
810df54a6   Jan Kara   bdi: Make wb->bdi...
387
388
  	if (wb != &wb->bdi->wb)
  		bdi_put(wb->bdi);
a88a341a7   Tejun Heo   writeback: move b...
389
  }
e98be2d59   Wu Fengguang   writeback: bdi wr...
390

52ebea749   Tejun Heo   writeback: make b...
391
392
393
394
395
396
397
  #ifdef CONFIG_CGROUP_WRITEBACK
  
  #include <linux/memcontrol.h>
  
  /*
   * cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree,
   * blkcg->cgwb_list, and memcg->cgwb_list.  bdi->cgwb_tree is also RCU
4514451e7   Jan Kara   bdi: Do not wait ...
398
   * protected.
52ebea749   Tejun Heo   writeback: make b...
399
400
   */
  static DEFINE_SPINLOCK(cgwb_lock);
67b46304b   Tejun Heo   bdi: Move cgroup ...
401
  static struct workqueue_struct *cgwb_release_wq;
52ebea749   Tejun Heo   writeback: make b...
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
  
  /**
   * wb_congested_get_create - get or create a wb_congested
   * @bdi: associated bdi
   * @blkcg_id: ID of the associated blkcg
   * @gfp: allocation mask
   *
   * Look up the wb_congested for @blkcg_id on @bdi.  If missing, create one.
   * The returned wb_congested has its reference count incremented.  Returns
   * NULL on failure.
   */
  struct bdi_writeback_congested *
  wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
  {
  	struct bdi_writeback_congested *new_congested = NULL, *congested;
  	struct rb_node **node, *parent;
  	unsigned long flags;
52ebea749   Tejun Heo   writeback: make b...
419
420
421
422
423
424
425
426
  retry:
  	spin_lock_irqsave(&cgwb_lock, flags);
  
  	node = &bdi->cgwb_congested_tree.rb_node;
  	parent = NULL;
  
  	while (*node != NULL) {
  		parent = *node;
bc71226b0   Geliang Tang   mm/backing-dev.c:...
427
428
  		congested = rb_entry(parent, struct bdi_writeback_congested,
  				     rb_node);
52ebea749   Tejun Heo   writeback: make b...
429
430
431
432
433
434
435
436
437
438
439
440
441
442
  		if (congested->blkcg_id < blkcg_id)
  			node = &parent->rb_left;
  		else if (congested->blkcg_id > blkcg_id)
  			node = &parent->rb_right;
  		else
  			goto found;
  	}
  
  	if (new_congested) {
  		/* !found and storage for new one already allocated, insert */
  		congested = new_congested;
  		new_congested = NULL;
  		rb_link_node(&congested->rb_node, parent, node);
  		rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
52ebea749   Tejun Heo   writeback: make b...
443
444
445
446
447
448
449
450
451
452
453
  		goto found;
  	}
  
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  
  	/* allocate storage for new one and retry */
  	new_congested = kzalloc(sizeof(*new_congested), gfp);
  	if (!new_congested)
  		return NULL;
  
  	atomic_set(&new_congested->refcnt, 0);
b7d680d7b   Jan Kara   bdi: Mark congest...
454
  	new_congested->__bdi = bdi;
52ebea749   Tejun Heo   writeback: make b...
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
  	new_congested->blkcg_id = blkcg_id;
  	goto retry;
  
  found:
  	atomic_inc(&congested->refcnt);
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	kfree(new_congested);
  	return congested;
  }
  
  /**
   * wb_congested_put - put a wb_congested
   * @congested: wb_congested to put
   *
   * Put @congested and destroy it if the refcnt reaches zero.
   */
  void wb_congested_put(struct bdi_writeback_congested *congested)
  {
52ebea749   Tejun Heo   writeback: make b...
473
  	unsigned long flags;
52ebea749   Tejun Heo   writeback: make b...
474
475
476
477
478
  	local_irq_save(flags);
  	if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
  		local_irq_restore(flags);
  		return;
  	}
a20135ffb   Tejun Heo   writeback: don't ...
479
  	/* bdi might already have been destroyed leaving @congested unlinked */
b7d680d7b   Jan Kara   bdi: Mark congest...
480
  	if (congested->__bdi) {
a20135ffb   Tejun Heo   writeback: don't ...
481
  		rb_erase(&congested->rb_node,
b7d680d7b   Jan Kara   bdi: Mark congest...
482
483
  			 &congested->__bdi->cgwb_congested_tree);
  		congested->__bdi = NULL;
a20135ffb   Tejun Heo   writeback: don't ...
484
  	}
52ebea749   Tejun Heo   writeback: make b...
485
486
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	kfree(congested);
52ebea749   Tejun Heo   writeback: make b...
487
488
489
490
491
492
  }
  
  static void cgwb_release_workfn(struct work_struct *work)
  {
  	struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
  						release_work);
52ebea749   Tejun Heo   writeback: make b...
493

1bbe05e27   Jan Kara   bdi: Fix another ...
494
  	mutex_lock(&wb->bdi->cgwb_release_mutex);
52ebea749   Tejun Heo   writeback: make b...
495
496
497
498
  	wb_shutdown(wb);
  
  	css_put(wb->memcg_css);
  	css_put(wb->blkcg_css);
1bbe05e27   Jan Kara   bdi: Fix another ...
499
  	mutex_unlock(&wb->bdi->cgwb_release_mutex);
52ebea749   Tejun Heo   writeback: make b...
500

841710aa6   Tejun Heo   writeback: implem...
501
  	fprop_local_destroy_percpu(&wb->memcg_completions);
52ebea749   Tejun Heo   writeback: make b...
502
503
504
  	percpu_ref_exit(&wb->refcnt);
  	wb_exit(wb);
  	kfree_rcu(wb, rcu);
52ebea749   Tejun Heo   writeback: make b...
505
506
507
508
509
510
  }
  
  static void cgwb_release(struct percpu_ref *refcnt)
  {
  	struct bdi_writeback *wb = container_of(refcnt, struct bdi_writeback,
  						refcnt);
67b46304b   Tejun Heo   bdi: Move cgroup ...
511
  	queue_work(cgwb_release_wq, &wb->release_work);
52ebea749   Tejun Heo   writeback: make b...
512
513
514
515
516
517
518
519
520
521
522
  }
  
  static void cgwb_kill(struct bdi_writeback *wb)
  {
  	lockdep_assert_held(&cgwb_lock);
  
  	WARN_ON(!radix_tree_delete(&wb->bdi->cgwb_tree, wb->memcg_css->id));
  	list_del(&wb->memcg_node);
  	list_del(&wb->blkcg_node);
  	percpu_ref_kill(&wb->refcnt);
  }
e8cb72b32   Jan Kara   bdi: Unify bdi->w...
523
524
525
526
527
528
  static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
  {
  	spin_lock_irq(&cgwb_lock);
  	list_del_rcu(&wb->bdi_node);
  	spin_unlock_irq(&cgwb_lock);
  }
52ebea749   Tejun Heo   writeback: make b...
529
530
531
532
533
534
535
536
537
538
539
540
  static int cgwb_create(struct backing_dev_info *bdi,
  		       struct cgroup_subsys_state *memcg_css, gfp_t gfp)
  {
  	struct mem_cgroup *memcg;
  	struct cgroup_subsys_state *blkcg_css;
  	struct blkcg *blkcg;
  	struct list_head *memcg_cgwb_list, *blkcg_cgwb_list;
  	struct bdi_writeback *wb;
  	unsigned long flags;
  	int ret = 0;
  
  	memcg = mem_cgroup_from_css(memcg_css);
c165b3e3c   Tejun Heo   blkcg: rename sub...
541
  	blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
52ebea749   Tejun Heo   writeback: make b...
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
  	blkcg = css_to_blkcg(blkcg_css);
  	memcg_cgwb_list = mem_cgroup_cgwb_list(memcg);
  	blkcg_cgwb_list = &blkcg->cgwb_list;
  
  	/* look up again under lock and discard on blkcg mismatch */
  	spin_lock_irqsave(&cgwb_lock, flags);
  	wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
  	if (wb && wb->blkcg_css != blkcg_css) {
  		cgwb_kill(wb);
  		wb = NULL;
  	}
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	if (wb)
  		goto out_put;
  
  	/* need to create a new one */
  	wb = kmalloc(sizeof(*wb), gfp);
0b045bd1c   Christophe JAILLET   mm/backing-dev.c:...
559
560
561
562
  	if (!wb) {
  		ret = -ENOMEM;
  		goto out_put;
  	}
52ebea749   Tejun Heo   writeback: make b...
563

a13f35e87   Tejun Heo   writeback: don't ...
564
  	ret = wb_init(wb, bdi, blkcg_css->id, gfp);
52ebea749   Tejun Heo   writeback: make b...
565
566
567
568
569
570
  	if (ret)
  		goto err_free;
  
  	ret = percpu_ref_init(&wb->refcnt, cgwb_release, 0, gfp);
  	if (ret)
  		goto err_wb_exit;
841710aa6   Tejun Heo   writeback: implem...
571
572
573
  	ret = fprop_local_init_percpu(&wb->memcg_completions, gfp);
  	if (ret)
  		goto err_ref_exit;
52ebea749   Tejun Heo   writeback: make b...
574
575
576
577
  	wb->memcg_css = memcg_css;
  	wb->blkcg_css = blkcg_css;
  	INIT_WORK(&wb->release_work, cgwb_release_workfn);
  	set_bit(WB_registered, &wb->state);
03ba3782e   Jens Axboe   writeback: switch...
578
579
  
  	/*
52ebea749   Tejun Heo   writeback: make b...
580
581
582
583
  	 * The root wb determines the registered state of the whole bdi and
  	 * memcg_cgwb_list and blkcg_cgwb_list's next pointers indicate
  	 * whether they're still online.  Don't link @wb if any is dead.
  	 * See wb_memcg_offline() and wb_blkcg_offline().
03ba3782e   Jens Axboe   writeback: switch...
584
  	 */
52ebea749   Tejun Heo   writeback: make b...
585
586
587
588
589
590
591
  	ret = -ENODEV;
  	spin_lock_irqsave(&cgwb_lock, flags);
  	if (test_bit(WB_registered, &bdi->wb.state) &&
  	    blkcg_cgwb_list->next && memcg_cgwb_list->next) {
  		/* we might have raced another instance of this function */
  		ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
  		if (!ret) {
b817525a4   Tejun Heo   writeback: bdi_wr...
592
  			list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
52ebea749   Tejun Heo   writeback: make b...
593
594
595
596
597
598
599
600
601
602
  			list_add(&wb->memcg_node, memcg_cgwb_list);
  			list_add(&wb->blkcg_node, blkcg_cgwb_list);
  			css_get(memcg_css);
  			css_get(blkcg_css);
  		}
  	}
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	if (ret) {
  		if (ret == -EEXIST)
  			ret = 0;
a13f35e87   Tejun Heo   writeback: don't ...
603
  		goto err_fprop_exit;
52ebea749   Tejun Heo   writeback: make b...
604
605
  	}
  	goto out_put;
841710aa6   Tejun Heo   writeback: implem...
606
607
  err_fprop_exit:
  	fprop_local_destroy_percpu(&wb->memcg_completions);
52ebea749   Tejun Heo   writeback: make b...
608
609
610
611
612
613
614
615
616
  err_ref_exit:
  	percpu_ref_exit(&wb->refcnt);
  err_wb_exit:
  	wb_exit(wb);
  err_free:
  	kfree(wb);
  out_put:
  	css_put(blkcg_css);
  	return ret;
66f3b8e2e   Jens Axboe   writeback: move d...
617
  }
52ebea749   Tejun Heo   writeback: make b...
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
  /**
   * wb_get_create - get wb for a given memcg, create if necessary
   * @bdi: target bdi
   * @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref)
   * @gfp: allocation mask to use
   *
   * Try to get the wb for @memcg_css on @bdi.  If it doesn't exist, try to
   * create one.  The returned wb has its refcount incremented.
   *
   * This function uses css_get() on @memcg_css and thus expects its refcnt
   * to be positive on invocation.  IOW, rcu_read_lock() protection on
   * @memcg_css isn't enough.  try_get it before calling this function.
   *
   * A wb is keyed by its associated memcg.  As blkcg implicitly enables
   * memcg on the default hierarchy, memcg association is guaranteed to be
   * more specific (equal or descendant to the associated blkcg) and thus can
   * identify both the memcg and blkcg associations.
   *
   * Because the blkcg associated with a memcg may change as blkcg is enabled
   * and disabled closer to root in the hierarchy, each wb keeps track of
   * both the memcg and blkcg associated with it and verifies the blkcg on
   * each lookup.  On mismatch, the existing wb is discarded and a new one is
   * created.
   */
  struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
  				    struct cgroup_subsys_state *memcg_css,
  				    gfp_t gfp)
6467716a3   Artem Bityutskiy   writeback: optimi...
645
  {
52ebea749   Tejun Heo   writeback: make b...
646
  	struct bdi_writeback *wb;
d0164adc8   Mel Gorman   mm, page_alloc: d...
647
  	might_sleep_if(gfpflags_allow_blocking(gfp));
52ebea749   Tejun Heo   writeback: make b...
648
649
650
651
652
653
654
655
656
657
658
659
  
  	if (!memcg_css->parent)
  		return &bdi->wb;
  
  	do {
  		rcu_read_lock();
  		wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
  		if (wb) {
  			struct cgroup_subsys_state *blkcg_css;
  
  			/* see whether the blkcg association has changed */
  			blkcg_css = cgroup_get_e_css(memcg_css->cgroup,
c165b3e3c   Tejun Heo   blkcg: rename sub...
660
  						     &io_cgrp_subsys);
52ebea749   Tejun Heo   writeback: make b...
661
662
663
664
665
666
667
668
669
670
  			if (unlikely(wb->blkcg_css != blkcg_css ||
  				     !wb_tryget(wb)))
  				wb = NULL;
  			css_put(blkcg_css);
  		}
  		rcu_read_unlock();
  	} while (!wb && !cgwb_create(bdi, memcg_css, gfp));
  
  	return wb;
  }
6467716a3   Artem Bityutskiy   writeback: optimi...
671

a13f35e87   Tejun Heo   writeback: don't ...
672
  static int cgwb_bdi_init(struct backing_dev_info *bdi)
52ebea749   Tejun Heo   writeback: make b...
673
  {
a13f35e87   Tejun Heo   writeback: don't ...
674
  	int ret;
52ebea749   Tejun Heo   writeback: make b...
675
676
  	INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
  	bdi->cgwb_congested_tree = RB_ROOT;
1bbe05e27   Jan Kara   bdi: Fix another ...
677
  	mutex_init(&bdi->cgwb_release_mutex);
a13f35e87   Tejun Heo   writeback: don't ...
678
679
680
  
  	ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
  	if (!ret) {
7d828602e   Johannes Weiner   mm: memcontrol: e...
681
  		bdi->wb.memcg_css = &root_mem_cgroup->css;
a13f35e87   Tejun Heo   writeback: don't ...
682
683
684
  		bdi->wb.blkcg_css = blkcg_root_css;
  	}
  	return ret;
6467716a3   Artem Bityutskiy   writeback: optimi...
685
  }
b1c51afc0   Jan Kara   bdi: Rename cgwb_...
686
  static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
52ebea749   Tejun Heo   writeback: make b...
687
688
689
  {
  	struct radix_tree_iter iter;
  	void **slot;
5318ce7d4   Jan Kara   bdi: Shutdown wri...
690
  	struct bdi_writeback *wb;
52ebea749   Tejun Heo   writeback: make b...
691
692
693
694
695
696
  
  	WARN_ON(test_bit(WB_registered, &bdi->wb.state));
  
  	spin_lock_irq(&cgwb_lock);
  	radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
  		cgwb_kill(*slot);
1bbe05e27   Jan Kara   bdi: Fix another ...
697
  	spin_unlock_irq(&cgwb_lock);
5318ce7d4   Jan Kara   bdi: Shutdown wri...
698

1bbe05e27   Jan Kara   bdi: Fix another ...
699
700
  	mutex_lock(&bdi->cgwb_release_mutex);
  	spin_lock_irq(&cgwb_lock);
5318ce7d4   Jan Kara   bdi: Shutdown wri...
701
702
703
704
705
706
707
  	while (!list_empty(&bdi->wb_list)) {
  		wb = list_first_entry(&bdi->wb_list, struct bdi_writeback,
  				      bdi_node);
  		spin_unlock_irq(&cgwb_lock);
  		wb_shutdown(wb);
  		spin_lock_irq(&cgwb_lock);
  	}
52ebea749   Tejun Heo   writeback: make b...
708
  	spin_unlock_irq(&cgwb_lock);
1bbe05e27   Jan Kara   bdi: Fix another ...
709
  	mutex_unlock(&bdi->cgwb_release_mutex);
52ebea749   Tejun Heo   writeback: make b...
710
711
712
713
714
715
716
  }
  
  /**
   * wb_memcg_offline - kill all wb's associated with a memcg being offlined
   * @memcg: memcg being offlined
   *
   * Also prevents creation of any new wb's associated with @memcg.
e98be2d59   Wu Fengguang   writeback: bdi wr...
717
   */
52ebea749   Tejun Heo   writeback: make b...
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
  void wb_memcg_offline(struct mem_cgroup *memcg)
  {
  	LIST_HEAD(to_destroy);
  	struct list_head *memcg_cgwb_list = mem_cgroup_cgwb_list(memcg);
  	struct bdi_writeback *wb, *next;
  
  	spin_lock_irq(&cgwb_lock);
  	list_for_each_entry_safe(wb, next, memcg_cgwb_list, memcg_node)
  		cgwb_kill(wb);
  	memcg_cgwb_list->next = NULL;	/* prevent new wb's */
  	spin_unlock_irq(&cgwb_lock);
  }
  
  /**
   * wb_blkcg_offline - kill all wb's associated with a blkcg being offlined
   * @blkcg: blkcg being offlined
   *
   * Also prevents creation of any new wb's associated with @blkcg.
   */
  void wb_blkcg_offline(struct blkcg *blkcg)
  {
  	LIST_HEAD(to_destroy);
  	struct bdi_writeback *wb, *next;
  
  	spin_lock_irq(&cgwb_lock);
  	list_for_each_entry_safe(wb, next, &blkcg->cgwb_list, blkcg_node)
  		cgwb_kill(wb);
  	blkcg->cgwb_list.next = NULL;	/* prevent new wb's */
  	spin_unlock_irq(&cgwb_lock);
  }
df23de556   Jan Kara   bdi: Fix use-afte...
748
749
750
751
752
753
754
755
756
757
  static void cgwb_bdi_exit(struct backing_dev_info *bdi)
  {
  	struct rb_node *rbn;
  
  	spin_lock_irq(&cgwb_lock);
  	while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
  		struct bdi_writeback_congested *congested =
  			rb_entry(rbn, struct bdi_writeback_congested, rb_node);
  
  		rb_erase(rbn, &bdi->cgwb_congested_tree);
b7d680d7b   Jan Kara   bdi: Mark congest...
758
  		congested->__bdi = NULL;	/* mark @congested unlinked */
df23de556   Jan Kara   bdi: Fix use-afte...
759
760
761
  	}
  	spin_unlock_irq(&cgwb_lock);
  }
e8cb72b32   Jan Kara   bdi: Unify bdi->w...
762
763
764
765
766
767
  static void cgwb_bdi_register(struct backing_dev_info *bdi)
  {
  	spin_lock_irq(&cgwb_lock);
  	list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
  	spin_unlock_irq(&cgwb_lock);
  }
67b46304b   Tejun Heo   bdi: Move cgroup ...
768
769
770
771
772
773
774
775
776
777
778
779
780
781
  static int __init cgwb_init(void)
  {
  	/*
  	 * There can be many concurrent release work items overwhelming
  	 * system_wq.  Put them in a separate wq and limit concurrency.
  	 * There's no point in executing many of these in parallel.
  	 */
  	cgwb_release_wq = alloc_workqueue("cgwb_release", 0, 1);
  	if (!cgwb_release_wq)
  		return -ENOMEM;
  
  	return 0;
  }
  subsys_initcall(cgwb_init);
52ebea749   Tejun Heo   writeback: make b...
782
  #else	/* CONFIG_CGROUP_WRITEBACK */
a13f35e87   Tejun Heo   writeback: don't ...
783
784
785
786
787
788
789
  static int cgwb_bdi_init(struct backing_dev_info *bdi)
  {
  	int err;
  
  	bdi->wb_congested = kzalloc(sizeof(*bdi->wb_congested), GFP_KERNEL);
  	if (!bdi->wb_congested)
  		return -ENOMEM;
5f478e4ea   Tejun Heo   block: fix double...
790
  	atomic_set(&bdi->wb_congested->refcnt, 1);
a13f35e87   Tejun Heo   writeback: don't ...
791
792
  	err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
  	if (err) {
5f478e4ea   Tejun Heo   block: fix double...
793
  		wb_congested_put(bdi->wb_congested);
a13f35e87   Tejun Heo   writeback: don't ...
794
795
796
797
  		return err;
  	}
  	return 0;
  }
b1c51afc0   Jan Kara   bdi: Rename cgwb_...
798
  static void cgwb_bdi_unregister(struct backing_dev_info *bdi) { }
df23de556   Jan Kara   bdi: Fix use-afte...
799
800
  
  static void cgwb_bdi_exit(struct backing_dev_info *bdi)
5f478e4ea   Tejun Heo   block: fix double...
801
802
803
  {
  	wb_congested_put(bdi->wb_congested);
  }
52ebea749   Tejun Heo   writeback: make b...
804

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
805
806
807
808
809
810
811
812
813
  static void cgwb_bdi_register(struct backing_dev_info *bdi)
  {
  	list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
  }
  
  static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
  {
  	list_del_rcu(&wb->bdi_node);
  }
52ebea749   Tejun Heo   writeback: make b...
814
  #endif	/* CONFIG_CGROUP_WRITEBACK */
e98be2d59   Wu Fengguang   writeback: bdi wr...
815

2e82b84c0   Jan Kara   block: Remove unu...
816
  static int bdi_init(struct backing_dev_info *bdi)
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
817
  {
b817525a4   Tejun Heo   writeback: bdi_wr...
818
  	int ret;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
819
  	bdi->dev = NULL;
d03f6cdc1   Jan Kara   block: Dynamicall...
820
  	kref_init(&bdi->refcnt);
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
821
  	bdi->min_ratio = 0;
a42dde041   Peter Zijlstra   mm: bdi: allow se...
822
  	bdi->max_ratio = 100;
eb608e3a3   Jan Kara   block: Convert BD...
823
  	bdi->max_prop_frac = FPROP_FRAC_BASE;
66f3b8e2e   Jens Axboe   writeback: move d...
824
  	INIT_LIST_HEAD(&bdi->bdi_list);
b817525a4   Tejun Heo   writeback: bdi_wr...
825
  	INIT_LIST_HEAD(&bdi->wb_list);
cc395d7f1   Tejun Heo   writeback: implem...
826
  	init_waitqueue_head(&bdi->wb_waitq);
03ba3782e   Jens Axboe   writeback: switch...
827

b817525a4   Tejun Heo   writeback: bdi_wr...
828
  	ret = cgwb_bdi_init(bdi);
b817525a4   Tejun Heo   writeback: bdi_wr...
829
  	return ret;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
830
  }
e98be2d59   Wu Fengguang   writeback: bdi wr...
831

d03f6cdc1   Jan Kara   block: Dynamicall...
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
  struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id)
  {
  	struct backing_dev_info *bdi;
  
  	bdi = kmalloc_node(sizeof(struct backing_dev_info),
  			   gfp_mask | __GFP_ZERO, node_id);
  	if (!bdi)
  		return NULL;
  
  	if (bdi_init(bdi)) {
  		kfree(bdi);
  		return NULL;
  	}
  	return bdi;
  }
62bf42adc   Jan Kara   bdi: Export bdi_a...
847
  EXPORT_SYMBOL(bdi_alloc_node);
d03f6cdc1   Jan Kara   block: Dynamicall...
848

7c4cc3002   Jan Kara   bdi: Drop 'parent...
849
  int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
461000714   Tejun Heo   writeback: reorga...
850
  {
461000714   Tejun Heo   writeback: reorga...
851
  	struct device *dev;
e98be2d59   Wu Fengguang   writeback: bdi wr...
852

461000714   Tejun Heo   writeback: reorga...
853
854
  	if (bdi->dev)	/* The driver needs to use separate queues per device */
  		return 0;
e98be2d59   Wu Fengguang   writeback: bdi wr...
855

7c4cc3002   Jan Kara   bdi: Drop 'parent...
856
  	dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args);
461000714   Tejun Heo   writeback: reorga...
857
858
  	if (IS_ERR(dev))
  		return PTR_ERR(dev);
04fbfdc14   Peter Zijlstra   mm: per device di...
859

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
860
  	cgwb_bdi_register(bdi);
461000714   Tejun Heo   writeback: reorga...
861
  	bdi->dev = dev;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
862

461000714   Tejun Heo   writeback: reorga...
863
864
865
866
867
868
869
870
871
  	bdi_debug_register(bdi, dev_name(dev));
  	set_bit(WB_registered, &bdi->wb.state);
  
  	spin_lock_bh(&bdi_lock);
  	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
  	spin_unlock_bh(&bdi_lock);
  
  	trace_writeback_bdi_register(bdi);
  	return 0;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
872
  }
baf7a616d   Jan Kara   bdi: Provide bdi_...
873
  EXPORT_SYMBOL(bdi_register_va);
7c4cc3002   Jan Kara   bdi: Drop 'parent...
874
  int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...)
baf7a616d   Jan Kara   bdi: Provide bdi_...
875
876
877
878
879
  {
  	va_list args;
  	int ret;
  
  	va_start(args, fmt);
7c4cc3002   Jan Kara   bdi: Drop 'parent...
880
  	ret = bdi_register_va(bdi, fmt, args);
baf7a616d   Jan Kara   bdi: Provide bdi_...
881
882
883
  	va_end(args);
  	return ret;
  }
461000714   Tejun Heo   writeback: reorga...
884
  EXPORT_SYMBOL(bdi_register);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
885

df08c32ce   Dan Williams   block: fix bdi vs...
886
887
888
  int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner)
  {
  	int rc;
7c4cc3002   Jan Kara   bdi: Drop 'parent...
889
  	rc = bdi_register(bdi, "%u:%u", MAJOR(owner->devt), MINOR(owner->devt));
df08c32ce   Dan Williams   block: fix bdi vs...
890
891
  	if (rc)
  		return rc;
b6f8fec44   Jan Kara   block: Allow bdi ...
892
893
  	/* Leaking owner reference... */
  	WARN_ON(bdi->owner);
df08c32ce   Dan Williams   block: fix bdi vs...
894
895
896
897
898
  	bdi->owner = owner;
  	get_device(owner);
  	return 0;
  }
  EXPORT_SYMBOL(bdi_register_owner);
461000714   Tejun Heo   writeback: reorga...
899
900
901
902
903
904
905
906
  /*
   * Remove bdi from bdi_list, and ensure that it is no longer visible
   */
  static void bdi_remove_from_list(struct backing_dev_info *bdi)
  {
  	spin_lock_bh(&bdi_lock);
  	list_del_rcu(&bdi->bdi_list);
  	spin_unlock_bh(&bdi_lock);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
907

461000714   Tejun Heo   writeback: reorga...
908
909
  	synchronize_rcu_expedited();
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
910

b02176f30   Tejun Heo   block: don't rele...
911
  void bdi_unregister(struct backing_dev_info *bdi)
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
912
  {
f0054bb1e   Tejun Heo   writeback: move b...
913
914
915
  	/* make sure nobody finds us on the bdi_list anymore */
  	bdi_remove_from_list(bdi);
  	wb_shutdown(&bdi->wb);
b1c51afc0   Jan Kara   bdi: Rename cgwb_...
916
  	cgwb_bdi_unregister(bdi);
7a401a972   Rabin Vincent   backing-dev: ensu...
917

c4db59d31   Christoph Hellwig   fs: don't reassig...
918
919
920
921
922
  	if (bdi->dev) {
  		bdi_debug_unregister(bdi);
  		device_unregister(bdi->dev);
  		bdi->dev = NULL;
  	}
df08c32ce   Dan Williams   block: fix bdi vs...
923
924
925
926
927
  
  	if (bdi->owner) {
  		put_device(bdi->owner);
  		bdi->owner = NULL;
  	}
b02176f30   Tejun Heo   block: don't rele...
928
  }
c4db59d31   Christoph Hellwig   fs: don't reassig...
929

d03f6cdc1   Jan Kara   block: Dynamicall...
930
931
932
933
  static void release_bdi(struct kref *ref)
  {
  	struct backing_dev_info *bdi =
  			container_of(ref, struct backing_dev_info, refcnt);
5af110b2f   Jan Kara   block: Unregister...
934
935
  	if (test_bit(WB_registered, &bdi->wb.state))
  		bdi_unregister(bdi);
2e82b84c0   Jan Kara   block: Remove unu...
936
937
938
  	WARN_ON_ONCE(bdi->dev);
  	wb_exit(&bdi->wb);
  	cgwb_bdi_exit(bdi);
d03f6cdc1   Jan Kara   block: Dynamicall...
939
940
941
942
943
944
945
  	kfree(bdi);
  }
  
  void bdi_put(struct backing_dev_info *bdi)
  {
  	kref_put(&bdi->refcnt, release_bdi);
  }
62bf42adc   Jan Kara   bdi: Export bdi_a...
946
  EXPORT_SYMBOL(bdi_put);
d03f6cdc1   Jan Kara   block: Dynamicall...
947

3fcfab16c   Andrew Morton   [PATCH] separate ...
948
949
950
951
  static wait_queue_head_t congestion_wqh[2] = {
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
  	};
ec8a6f264   Tejun Heo   writeback: make c...
952
  static atomic_t nr_wb_congested[2];
3fcfab16c   Andrew Morton   [PATCH] separate ...
953

ec8a6f264   Tejun Heo   writeback: make c...
954
  void clear_wb_congested(struct bdi_writeback_congested *congested, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
955
  {
1faa16d22   Jens Axboe   block: change the...
956
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
c877ef8ae   Kaixu Xia   writeback: fix th...
957
  	enum wb_congested_state bit;
3fcfab16c   Andrew Morton   [PATCH] separate ...
958

4452226ea   Tejun Heo   writeback: move b...
959
  	bit = sync ? WB_sync_congested : WB_async_congested;
ec8a6f264   Tejun Heo   writeback: make c...
960
961
  	if (test_and_clear_bit(bit, &congested->state))
  		atomic_dec(&nr_wb_congested[sync]);
4e857c58e   Peter Zijlstra   arch: Mass conver...
962
  	smp_mb__after_atomic();
3fcfab16c   Andrew Morton   [PATCH] separate ...
963
964
965
  	if (waitqueue_active(wqh))
  		wake_up(wqh);
  }
ec8a6f264   Tejun Heo   writeback: make c...
966
  EXPORT_SYMBOL(clear_wb_congested);
3fcfab16c   Andrew Morton   [PATCH] separate ...
967

ec8a6f264   Tejun Heo   writeback: make c...
968
  void set_wb_congested(struct bdi_writeback_congested *congested, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
969
  {
c877ef8ae   Kaixu Xia   writeback: fix th...
970
  	enum wb_congested_state bit;
3fcfab16c   Andrew Morton   [PATCH] separate ...
971

4452226ea   Tejun Heo   writeback: move b...
972
  	bit = sync ? WB_sync_congested : WB_async_congested;
ec8a6f264   Tejun Heo   writeback: make c...
973
974
  	if (!test_and_set_bit(bit, &congested->state))
  		atomic_inc(&nr_wb_congested[sync]);
3fcfab16c   Andrew Morton   [PATCH] separate ...
975
  }
ec8a6f264   Tejun Heo   writeback: make c...
976
  EXPORT_SYMBOL(set_wb_congested);
3fcfab16c   Andrew Morton   [PATCH] separate ...
977
978
979
  
  /**
   * congestion_wait - wait for a backing_dev to become uncongested
8aa7e847d   Jens Axboe   Fix congestion_wa...
980
   * @sync: SYNC or ASYNC IO
3fcfab16c   Andrew Morton   [PATCH] separate ...
981
982
983
984
985
986
   * @timeout: timeout in jiffies
   *
   * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
   * write congestion.  If no backing_devs are congested then just wait for the
   * next write to be completed.
   */
8aa7e847d   Jens Axboe   Fix congestion_wa...
987
  long congestion_wait(int sync, long timeout)
3fcfab16c   Andrew Morton   [PATCH] separate ...
988
989
  {
  	long ret;
52bb91986   Mel Gorman   writeback: accoun...
990
  	unsigned long start = jiffies;
3fcfab16c   Andrew Morton   [PATCH] separate ...
991
  	DEFINE_WAIT(wait);
8aa7e847d   Jens Axboe   Fix congestion_wa...
992
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
3fcfab16c   Andrew Morton   [PATCH] separate ...
993
994
995
996
  
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
52bb91986   Mel Gorman   writeback: accoun...
997
998
999
  
  	trace_writeback_congestion_wait(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
3fcfab16c   Andrew Morton   [PATCH] separate ...
1000
1001
1002
  	return ret;
  }
  EXPORT_SYMBOL(congestion_wait);
04fbfdc14   Peter Zijlstra   mm: per device di...
1003

0e093d997   Mel Gorman   writeback: do not...
1004
  /**
599d0c954   Mel Gorman   mm, vmscan: move ...
1005
1006
   * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a pgdat to complete writes
   * @pgdat: A pgdat to check if it is heavily congested
0e093d997   Mel Gorman   writeback: do not...
1007
1008
1009
1010
   * @sync: SYNC or ASYNC IO
   * @timeout: timeout in jiffies
   *
   * In the event of a congested backing_dev (any backing_dev) and the given
599d0c954   Mel Gorman   mm, vmscan: move ...
1011
   * @pgdat has experienced recent congestion, this waits for up to @timeout
0e093d997   Mel Gorman   writeback: do not...
1012
1013
1014
   * jiffies for either a BDI to exit congestion of the given @sync queue
   * or a write to complete.
   *
599d0c954   Mel Gorman   mm, vmscan: move ...
1015
   * In the absence of pgdat congestion, cond_resched() is called to yield
ede377137   Michal Hocko   mm: throttle on I...
1016
   * the processor if necessary but otherwise does not sleep.
0e093d997   Mel Gorman   writeback: do not...
1017
1018
1019
1020
1021
   *
   * The return value is 0 if the sleep is for the full timeout. Otherwise,
   * it is the number of jiffies that were still remaining when the function
   * returned. return_value == timeout implies the function did not sleep.
   */
599d0c954   Mel Gorman   mm, vmscan: move ...
1022
  long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout)
0e093d997   Mel Gorman   writeback: do not...
1023
1024
1025
1026
1027
1028
1029
1030
  {
  	long ret;
  	unsigned long start = jiffies;
  	DEFINE_WAIT(wait);
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
  
  	/*
  	 * If there is no congestion, or heavy congestion is not being
599d0c954   Mel Gorman   mm, vmscan: move ...
1031
  	 * encountered in the current pgdat, yield if necessary instead
0e093d997   Mel Gorman   writeback: do not...
1032
1033
  	 * of sleeping on the congestion queue
  	 */
ec8a6f264   Tejun Heo   writeback: make c...
1034
  	if (atomic_read(&nr_wb_congested[sync]) == 0 ||
599d0c954   Mel Gorman   mm, vmscan: move ...
1035
  	    !test_bit(PGDAT_CONGESTED, &pgdat->flags)) {
ede377137   Michal Hocko   mm: throttle on I...
1036
  		cond_resched();
599d0c954   Mel Gorman   mm, vmscan: move ...
1037

0e093d997   Mel Gorman   writeback: do not...
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
  		/* In case we scheduled, work out time remaining */
  		ret = timeout - (jiffies - start);
  		if (ret < 0)
  			ret = 0;
  
  		goto out;
  	}
  
  	/* Sleep until uncongested or a write happens */
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
  
  out:
  	trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
  
  	return ret;
  }
  EXPORT_SYMBOL(wait_iff_congested);
3965c9ae4   Wanpeng Li   mm: prepare for r...
1058
1059
1060
1061
1062
1063
  
  int pdflush_proc_obsolete(struct ctl_table *table, int write,
  			void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	char kbuf[] = "0
  ";
4c3bffc27   Chen Gang   mm/backing-dev.c:...
1064
  	if (*ppos || *lenp < sizeof(kbuf)) {
3965c9ae4   Wanpeng Li   mm: prepare for r...
1065
1066
1067
1068
1069
1070
  		*lenp = 0;
  		return 0;
  	}
  
  	if (copy_to_user(buffer, kbuf, sizeof(kbuf)))
  		return -EFAULT;
1170532bb   Joe Perches   mm: convert print...
1071
1072
1073
  	pr_warn_once("%s exported in /proc is scheduled for removal
  ",
  		     table->procname);
3965c9ae4   Wanpeng Li   mm: prepare for r...
1074
1075
1076
1077
1078
  
  	*lenp = 2;
  	*ppos += *lenp;
  	return 2;
  }