Blame view

mm/backing-dev.c 27 KB
3fcfab16c   Andrew Morton   [PATCH] separate ...
1
2
3
  
  #include <linux/wait.h>
  #include <linux/backing-dev.h>
03ba3782e   Jens Axboe   writeback: switch...
4
5
  #include <linux/kthread.h>
  #include <linux/freezer.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
6
  #include <linux/fs.h>
26160158d   Jens Axboe   Move the default_...
7
  #include <linux/pagemap.h>
03ba3782e   Jens Axboe   writeback: switch...
8
  #include <linux/mm.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
9
10
  #include <linux/sched.h>
  #include <linux/module.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
11
12
  #include <linux/writeback.h>
  #include <linux/device.h>
455b28646   Dave Chinner   writeback: Initia...
13
  #include <trace/events/writeback.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
14

5129a469a   Jörn Engel   Catch filesystems...
15
16
  struct backing_dev_info noop_backing_dev_info = {
  	.name		= "noop",
976e48f8a   Jan Kara   bdi: Initialize n...
17
  	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
5129a469a   Jörn Engel   Catch filesystems...
18
  };
a212b105b   Tejun Heo   bdi: make inode_t...
19
  EXPORT_SYMBOL_GPL(noop_backing_dev_info);
5129a469a   Jörn Engel   Catch filesystems...
20

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
21
  static struct class *bdi_class;
cfc4ba536   Jens Axboe   writeback: use RC...
22
23
  
  /*
181387da2   Tejun Heo   writeback: remove...
24
   * bdi_lock protects updates to bdi_list. bdi_list has RCU reader side
cfc4ba536   Jens Axboe   writeback: use RC...
25
26
   * locking.
   */
03ba3782e   Jens Axboe   writeback: switch...
27
  DEFINE_SPINLOCK(bdi_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
28
  LIST_HEAD(bdi_list);
03ba3782e   Jens Axboe   writeback: switch...
29

839a8e866   Tejun Heo   writeback: replac...
30
31
  /* bdi_wq serves all asynchronous writeback tasks */
  struct workqueue_struct *bdi_wq;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
32
33
34
35
36
37
38
39
40
41
42
43
44
45
  #ifdef CONFIG_DEBUG_FS
  #include <linux/debugfs.h>
  #include <linux/seq_file.h>
  
  static struct dentry *bdi_debug_root;
  
  static void bdi_debug_init(void)
  {
  	bdi_debug_root = debugfs_create_dir("bdi", NULL);
  }
  
  static int bdi_debug_stats_show(struct seq_file *m, void *v)
  {
  	struct backing_dev_info *bdi = m->private;
c1955ce32   Christoph Hellwig   writeback: remove...
46
  	struct bdi_writeback *wb = &bdi->wb;
364aeb284   David Rientjes   mm: change dirty ...
47
48
  	unsigned long background_thresh;
  	unsigned long dirty_thresh;
0d960a383   Tejun Heo   writeback: clean ...
49
  	unsigned long wb_thresh;
0ae45f63d   Theodore Ts'o   vfs: add support ...
50
  	unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
f09b00d3e   Jens Axboe   writeback: add so...
51
  	struct inode *inode;
0ae45f63d   Theodore Ts'o   vfs: add support ...
52
  	nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
f758eeabe   Christoph Hellwig   writeback: split ...
53
  	spin_lock(&wb->list_lock);
c7f540849   Dave Chinner   inode: rename i_w...
54
  	list_for_each_entry(inode, &wb->b_dirty, i_io_list)
c1955ce32   Christoph Hellwig   writeback: remove...
55
  		nr_dirty++;
c7f540849   Dave Chinner   inode: rename i_w...
56
  	list_for_each_entry(inode, &wb->b_io, i_io_list)
c1955ce32   Christoph Hellwig   writeback: remove...
57
  		nr_io++;
c7f540849   Dave Chinner   inode: rename i_w...
58
  	list_for_each_entry(inode, &wb->b_more_io, i_io_list)
c1955ce32   Christoph Hellwig   writeback: remove...
59
  		nr_more_io++;
c7f540849   Dave Chinner   inode: rename i_w...
60
  	list_for_each_entry(inode, &wb->b_dirty_time, i_io_list)
0ae45f63d   Theodore Ts'o   vfs: add support ...
61
62
  		if (inode->i_state & I_DIRTY_TIME)
  			nr_dirty_time++;
f758eeabe   Christoph Hellwig   writeback: split ...
63
  	spin_unlock(&wb->list_lock);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
64

16c4042f0   Wu Fengguang   writeback: avoid ...
65
  	global_dirty_limits(&background_thresh, &dirty_thresh);
0d960a383   Tejun Heo   writeback: clean ...
66
  	wb_thresh = wb_calc_thresh(wb, dirty_thresh);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
67
68
69
  
  #define K(x) ((x) << (PAGE_SHIFT - 10))
  	seq_printf(m,
00821b002   Wu Fengguang   writeback: show b...
70
71
72
73
74
75
76
77
78
79
  		   "BdiWriteback:       %10lu kB
  "
  		   "BdiReclaimable:     %10lu kB
  "
  		   "BdiDirtyThresh:     %10lu kB
  "
  		   "DirtyThresh:        %10lu kB
  "
  		   "BackgroundThresh:   %10lu kB
  "
c8e28ce04   Wu Fengguang   writeback: accoun...
80
81
  		   "BdiDirtied:         %10lu kB
  "
00821b002   Wu Fengguang   writeback: show b...
82
83
84
85
86
87
88
89
90
91
  		   "BdiWritten:         %10lu kB
  "
  		   "BdiWriteBandwidth:  %10lu kBps
  "
  		   "b_dirty:            %10lu
  "
  		   "b_io:               %10lu
  "
  		   "b_more_io:          %10lu
  "
0ae45f63d   Theodore Ts'o   vfs: add support ...
92
93
  		   "b_dirty_time:       %10lu
  "
00821b002   Wu Fengguang   writeback: show b...
94
95
96
97
  		   "bdi_list:           %10u
  "
  		   "state:              %10lx
  ",
93f78d882   Tejun Heo   writeback: move b...
98
99
  		   (unsigned long) K(wb_stat(wb, WB_WRITEBACK)),
  		   (unsigned long) K(wb_stat(wb, WB_RECLAIMABLE)),
0d960a383   Tejun Heo   writeback: clean ...
100
  		   K(wb_thresh),
f7d2b1ecd   Jan Kara   writeback: accoun...
101
102
  		   K(dirty_thresh),
  		   K(background_thresh),
93f78d882   Tejun Heo   writeback: move b...
103
104
  		   (unsigned long) K(wb_stat(wb, WB_DIRTIED)),
  		   (unsigned long) K(wb_stat(wb, WB_WRITTEN)),
a88a341a7   Tejun Heo   writeback: move b...
105
  		   (unsigned long) K(wb->write_bandwidth),
f7d2b1ecd   Jan Kara   writeback: accoun...
106
107
108
  		   nr_dirty,
  		   nr_io,
  		   nr_more_io,
0ae45f63d   Theodore Ts'o   vfs: add support ...
109
  		   nr_dirty_time,
4452226ea   Tejun Heo   writeback: move b...
110
  		   !list_empty(&bdi->bdi_list), bdi->wb.state);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
111
112
113
114
  #undef K
  
  	return 0;
  }
5ad350936   Andy Shevchenko   mm: reuse DEFINE_...
115
  DEFINE_SHOW_ATTRIBUTE(bdi_debug_stats);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
116

97f076979   weiping zhang   bdi: convert bdi_...
117
  static int bdi_debug_register(struct backing_dev_info *bdi, const char *name)
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
118
  {
97f076979   weiping zhang   bdi: convert bdi_...
119
120
  	if (!bdi_debug_root)
  		return -ENOMEM;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
121
  	bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
97f076979   weiping zhang   bdi: convert bdi_...
122
123
  	if (!bdi->debug_dir)
  		return -ENOMEM;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
124
125
  	bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir,
  					       bdi, &bdi_debug_stats_fops);
97f076979   weiping zhang   bdi: convert bdi_...
126
127
  	if (!bdi->debug_stats) {
  		debugfs_remove(bdi->debug_dir);
f53823c18   Tetsuo Handa   bdi: Fix use afte...
128
  		bdi->debug_dir = NULL;
97f076979   weiping zhang   bdi: convert bdi_...
129
130
131
132
  		return -ENOMEM;
  	}
  
  	return 0;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
133
134
135
136
137
138
139
140
141
142
143
  }
  
  static void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  	debugfs_remove(bdi->debug_stats);
  	debugfs_remove(bdi->debug_dir);
  }
  #else
  static inline void bdi_debug_init(void)
  {
  }
97f076979   weiping zhang   bdi: convert bdi_...
144
  static inline int bdi_debug_register(struct backing_dev_info *bdi,
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
145
146
  				      const char *name)
  {
97f076979   weiping zhang   bdi: convert bdi_...
147
  	return 0;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
148
149
150
151
152
  }
  static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  }
  #endif
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
153
154
155
156
157
  static ssize_t read_ahead_kb_store(struct device *dev,
  				  struct device_attribute *attr,
  				  const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
158
  	unsigned long read_ahead_kb;
7034ed132   Namjae Jeon   backing-dev: use ...
159
  	ssize_t ret;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
160

7034ed132   Namjae Jeon   backing-dev: use ...
161
162
163
164
165
166
167
  	ret = kstrtoul(buf, 10, &read_ahead_kb);
  	if (ret < 0)
  		return ret;
  
  	bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10);
  
  	return count;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
168
169
170
171
172
173
174
175
176
177
178
179
  }
  
  #define K(pages) ((pages) << (PAGE_SHIFT - 10))
  
  #define BDI_SHOW(name, expr)						\
  static ssize_t name##_show(struct device *dev,				\
  			   struct device_attribute *attr, char *page)	\
  {									\
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);		\
  									\
  	return snprintf(page, PAGE_SIZE-1, "%lld
  ", (long long)expr);	\
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
180
181
  }									\
  static DEVICE_ATTR_RW(name);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
182
183
  
  BDI_SHOW(read_ahead_kb, K(bdi->ra_pages))
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
184
185
186
187
  static ssize_t min_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
188
  	unsigned int ratio;
7034ed132   Namjae Jeon   backing-dev: use ...
189
190
191
192
193
194
195
196
197
  	ssize_t ret;
  
  	ret = kstrtouint(buf, 10, &ratio);
  	if (ret < 0)
  		return ret;
  
  	ret = bdi_set_min_ratio(bdi, ratio);
  	if (!ret)
  		ret = count;
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
198

189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
199
200
201
  	return ret;
  }
  BDI_SHOW(min_ratio, bdi->min_ratio)
a42dde041   Peter Zijlstra   mm: bdi: allow se...
202
203
204
205
  static ssize_t max_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
a42dde041   Peter Zijlstra   mm: bdi: allow se...
206
  	unsigned int ratio;
7034ed132   Namjae Jeon   backing-dev: use ...
207
208
209
210
211
212
213
214
215
  	ssize_t ret;
  
  	ret = kstrtouint(buf, 10, &ratio);
  	if (ret < 0)
  		return ret;
  
  	ret = bdi_set_max_ratio(bdi, ratio);
  	if (!ret)
  		ret = count;
a42dde041   Peter Zijlstra   mm: bdi: allow se...
216

a42dde041   Peter Zijlstra   mm: bdi: allow se...
217
218
219
  	return ret;
  }
  BDI_SHOW(max_ratio, bdi->max_ratio)
7d311cdab   Darrick J. Wong   bdi: allow block ...
220
221
222
223
224
225
226
227
228
229
  static ssize_t stable_pages_required_show(struct device *dev,
  					  struct device_attribute *attr,
  					  char *page)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
  
  	return snprintf(page, PAGE_SIZE-1, "%d
  ",
  			bdi_cap_stable_pages_required(bdi) ? 1 : 0);
  }
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
230
231
232
233
234
235
236
237
  static DEVICE_ATTR_RO(stable_pages_required);
  
  static struct attribute *bdi_dev_attrs[] = {
  	&dev_attr_read_ahead_kb.attr,
  	&dev_attr_min_ratio.attr,
  	&dev_attr_max_ratio.attr,
  	&dev_attr_stable_pages_required.attr,
  	NULL,
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
238
  };
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
239
  ATTRIBUTE_GROUPS(bdi_dev);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
240
241
242
243
  
  static __init int bdi_class_init(void)
  {
  	bdi_class = class_create(THIS_MODULE, "bdi");
144214537   Anton Blanchard   backing-dev: Hand...
244
245
  	if (IS_ERR(bdi_class))
  		return PTR_ERR(bdi_class);
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
246
  	bdi_class->dev_groups = bdi_dev_groups;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
247
  	bdi_debug_init();
d03f6cdc1   Jan Kara   block: Dynamicall...
248

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
249
250
  	return 0;
  }
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
251
  postcore_initcall(bdi_class_init);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
252

2e82b84c0   Jan Kara   block: Remove unu...
253
  static int bdi_init(struct backing_dev_info *bdi);
26160158d   Jens Axboe   Move the default_...
254
255
256
  static int __init default_bdi_init(void)
  {
  	int err;
839a8e866   Tejun Heo   writeback: replac...
257
  	bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
b5c872ddb   Tejun Heo   writeback: expose...
258
  					      WQ_UNBOUND | WQ_SYSFS, 0);
839a8e866   Tejun Heo   writeback: replac...
259
260
  	if (!bdi_wq)
  		return -ENOMEM;
976e48f8a   Jan Kara   bdi: Initialize n...
261
  	err = bdi_init(&noop_backing_dev_info);
26160158d   Jens Axboe   Move the default_...
262
263
264
265
  
  	return err;
  }
  subsys_initcall(default_bdi_init);
6467716a3   Artem Bityutskiy   writeback: optimi...
266
  /*
f0054bb1e   Tejun Heo   writeback: move b...
267
   * This function is used when the first inode for this wb is marked dirty. It
6467716a3   Artem Bityutskiy   writeback: optimi...
268
269
270
271
272
273
274
275
   * wakes-up the corresponding bdi thread which should then take care of the
   * periodic background write-out of dirty inodes. Since the write-out would
   * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
   * set up a timer which wakes the bdi thread up later.
   *
   * Note, we wouldn't bother setting up the timer, but this function is on the
   * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
   * by delaying the wake-up.
6ca738d60   Derek Basehore   backing_dev: fix ...
276
277
278
   *
   * We have to be careful not to postpone flush work if it is scheduled for
   * earlier. Thus we use queue_delayed_work().
6467716a3   Artem Bityutskiy   writeback: optimi...
279
   */
f0054bb1e   Tejun Heo   writeback: move b...
280
  void wb_wakeup_delayed(struct bdi_writeback *wb)
6467716a3   Artem Bityutskiy   writeback: optimi...
281
282
283
284
  {
  	unsigned long timeout;
  
  	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
f0054bb1e   Tejun Heo   writeback: move b...
285
286
287
288
  	spin_lock_bh(&wb->work_lock);
  	if (test_bit(WB_registered, &wb->state))
  		queue_delayed_work(bdi_wq, &wb->dwork, timeout);
  	spin_unlock_bh(&wb->work_lock);
03ba3782e   Jens Axboe   writeback: switch...
289
  }
cfc4ba536   Jens Axboe   writeback: use RC...
290
  /*
a88a341a7   Tejun Heo   writeback: move b...
291
   * Initial write bandwidth: 100 MB/s
cfc4ba536   Jens Axboe   writeback: use RC...
292
   */
a88a341a7   Tejun Heo   writeback: move b...
293
  #define INIT_BW		(100 << (20 - PAGE_SHIFT))
cfc4ba536   Jens Axboe   writeback: use RC...
294

8395cd9f8   Tejun Heo   writeback: add @g...
295
  static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
a13f35e87   Tejun Heo   writeback: don't ...
296
  		   int blkcg_id, gfp_t gfp)
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
297
  {
93f78d882   Tejun Heo   writeback: move b...
298
  	int i, err;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
299

6467716a3   Artem Bityutskiy   writeback: optimi...
300
  	memset(wb, 0, sizeof(*wb));
f1d0b063d   Kay Sievers   bdi: register sys...
301

810df54a6   Jan Kara   bdi: Make wb->bdi...
302
303
  	if (wb != &bdi->wb)
  		bdi_get(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
304
305
306
307
308
  	wb->bdi = bdi;
  	wb->last_old_flush = jiffies;
  	INIT_LIST_HEAD(&wb->b_dirty);
  	INIT_LIST_HEAD(&wb->b_io);
  	INIT_LIST_HEAD(&wb->b_more_io);
0ae45f63d   Theodore Ts'o   vfs: add support ...
309
  	INIT_LIST_HEAD(&wb->b_dirty_time);
f758eeabe   Christoph Hellwig   writeback: split ...
310
  	spin_lock_init(&wb->list_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
311

a88a341a7   Tejun Heo   writeback: move b...
312
313
314
315
316
  	wb->bw_time_stamp = jiffies;
  	wb->balanced_dirty_ratelimit = INIT_BW;
  	wb->dirty_ratelimit = INIT_BW;
  	wb->write_bandwidth = INIT_BW;
  	wb->avg_write_bandwidth = INIT_BW;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
317

f0054bb1e   Tejun Heo   writeback: move b...
318
319
320
  	spin_lock_init(&wb->work_lock);
  	INIT_LIST_HEAD(&wb->work_list);
  	INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
b57d74aff   Jens Axboe   writeback: track ...
321
  	wb->dirty_sleep = jiffies;
c284de61d   Artem Bityutskiy   writeback: cleanu...
322

a13f35e87   Tejun Heo   writeback: don't ...
323
  	wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
810df54a6   Jan Kara   bdi: Make wb->bdi...
324
325
326
327
  	if (!wb->congested) {
  		err = -ENOMEM;
  		goto out_put_bdi;
  	}
a13f35e87   Tejun Heo   writeback: don't ...
328

8395cd9f8   Tejun Heo   writeback: add @g...
329
  	err = fprop_local_init_percpu(&wb->completions, gfp);
a88a341a7   Tejun Heo   writeback: move b...
330
  	if (err)
a13f35e87   Tejun Heo   writeback: don't ...
331
  		goto out_put_cong;
c284de61d   Artem Bityutskiy   writeback: cleanu...
332

93f78d882   Tejun Heo   writeback: move b...
333
  	for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
8395cd9f8   Tejun Heo   writeback: add @g...
334
  		err = percpu_counter_init(&wb->stat[i], 0, gfp);
a13f35e87   Tejun Heo   writeback: don't ...
335
336
  		if (err)
  			goto out_destroy_stat;
93f78d882   Tejun Heo   writeback: move b...
337
  	}
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
338

93f78d882   Tejun Heo   writeback: move b...
339
  	return 0;
a13f35e87   Tejun Heo   writeback: don't ...
340
341
  
  out_destroy_stat:
078c6c3a5   Rasmus Villemoes   mm/backing-dev.c:...
342
  	while (i--)
a13f35e87   Tejun Heo   writeback: don't ...
343
344
345
346
  		percpu_counter_destroy(&wb->stat[i]);
  	fprop_local_destroy_percpu(&wb->completions);
  out_put_cong:
  	wb_congested_put(wb->congested);
810df54a6   Jan Kara   bdi: Make wb->bdi...
347
348
349
  out_put_bdi:
  	if (wb != &bdi->wb)
  		bdi_put(bdi);
a13f35e87   Tejun Heo   writeback: don't ...
350
  	return err;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
351
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
352

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
353
  static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb);
03ba3782e   Jens Axboe   writeback: switch...
354
355
356
  /*
   * Remove bdi from the global list and shutdown any threads we have running
   */
461000714   Tejun Heo   writeback: reorga...
357
  static void wb_shutdown(struct bdi_writeback *wb)
66f3b8e2e   Jens Axboe   writeback: move d...
358
  {
c4db59d31   Christoph Hellwig   fs: don't reassig...
359
  	/* Make sure nobody queues further work */
461000714   Tejun Heo   writeback: reorga...
360
361
362
  	spin_lock_bh(&wb->work_lock);
  	if (!test_and_clear_bit(WB_registered, &wb->state)) {
  		spin_unlock_bh(&wb->work_lock);
03ba3782e   Jens Axboe   writeback: switch...
363
  		return;
c4db59d31   Christoph Hellwig   fs: don't reassig...
364
  	}
461000714   Tejun Heo   writeback: reorga...
365
  	spin_unlock_bh(&wb->work_lock);
03ba3782e   Jens Axboe   writeback: switch...
366

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
367
  	cgwb_remove_from_bdi_list(wb);
03ba3782e   Jens Axboe   writeback: switch...
368
  	/*
461000714   Tejun Heo   writeback: reorga...
369
370
371
  	 * Drain work list and shutdown the delayed_work.  !WB_registered
  	 * tells wb_workfn() that @wb is dying and its work_list needs to
  	 * be drained no matter what.
03ba3782e   Jens Axboe   writeback: switch...
372
  	 */
461000714   Tejun Heo   writeback: reorga...
373
374
375
376
  	mod_delayed_work(bdi_wq, &wb->dwork, 0);
  	flush_delayed_work(&wb->dwork);
  	WARN_ON(!list_empty(&wb->work_list));
  }
f0054bb1e   Tejun Heo   writeback: move b...
377
  static void wb_exit(struct bdi_writeback *wb)
93f78d882   Tejun Heo   writeback: move b...
378
379
380
381
382
383
384
  {
  	int i;
  
  	WARN_ON(delayed_work_pending(&wb->dwork));
  
  	for (i = 0; i < NR_WB_STAT_ITEMS; i++)
  		percpu_counter_destroy(&wb->stat[i]);
6467716a3   Artem Bityutskiy   writeback: optimi...
385

a88a341a7   Tejun Heo   writeback: move b...
386
  	fprop_local_destroy_percpu(&wb->completions);
a13f35e87   Tejun Heo   writeback: don't ...
387
  	wb_congested_put(wb->congested);
810df54a6   Jan Kara   bdi: Make wb->bdi...
388
389
  	if (wb != &wb->bdi->wb)
  		bdi_put(wb->bdi);
a88a341a7   Tejun Heo   writeback: move b...
390
  }
e98be2d59   Wu Fengguang   writeback: bdi wr...
391

52ebea749   Tejun Heo   writeback: make b...
392
393
394
395
396
397
398
  #ifdef CONFIG_CGROUP_WRITEBACK
  
  #include <linux/memcontrol.h>
  
  /*
   * cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree,
   * blkcg->cgwb_list, and memcg->cgwb_list.  bdi->cgwb_tree is also RCU
4514451e7   Jan Kara   bdi: Do not wait ...
399
   * protected.
52ebea749   Tejun Heo   writeback: make b...
400
401
   */
  static DEFINE_SPINLOCK(cgwb_lock);
f18346468   Tejun Heo   bdi: Move cgroup ...
402
  static struct workqueue_struct *cgwb_release_wq;
52ebea749   Tejun Heo   writeback: make b...
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
  
  /**
   * wb_congested_get_create - get or create a wb_congested
   * @bdi: associated bdi
   * @blkcg_id: ID of the associated blkcg
   * @gfp: allocation mask
   *
   * Look up the wb_congested for @blkcg_id on @bdi.  If missing, create one.
   * The returned wb_congested has its reference count incremented.  Returns
   * NULL on failure.
   */
  struct bdi_writeback_congested *
  wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
  {
  	struct bdi_writeback_congested *new_congested = NULL, *congested;
  	struct rb_node **node, *parent;
  	unsigned long flags;
52ebea749   Tejun Heo   writeback: make b...
420
421
422
423
424
425
426
427
  retry:
  	spin_lock_irqsave(&cgwb_lock, flags);
  
  	node = &bdi->cgwb_congested_tree.rb_node;
  	parent = NULL;
  
  	while (*node != NULL) {
  		parent = *node;
bc71226b0   Geliang Tang   mm/backing-dev.c:...
428
429
  		congested = rb_entry(parent, struct bdi_writeback_congested,
  				     rb_node);
52ebea749   Tejun Heo   writeback: make b...
430
431
432
433
434
435
436
437
438
439
440
  		if (congested->blkcg_id < blkcg_id)
  			node = &parent->rb_left;
  		else if (congested->blkcg_id > blkcg_id)
  			node = &parent->rb_right;
  		else
  			goto found;
  	}
  
  	if (new_congested) {
  		/* !found and storage for new one already allocated, insert */
  		congested = new_congested;
52ebea749   Tejun Heo   writeback: make b...
441
442
  		rb_link_node(&congested->rb_node, parent, node);
  		rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
e58dd0de5   Sebastian Andrzej Siewior   bdi: use refcount...
443
444
  		spin_unlock_irqrestore(&cgwb_lock, flags);
  		return congested;
52ebea749   Tejun Heo   writeback: make b...
445
446
447
448
449
450
451
452
  	}
  
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  
  	/* allocate storage for new one and retry */
  	new_congested = kzalloc(sizeof(*new_congested), gfp);
  	if (!new_congested)
  		return NULL;
e58dd0de5   Sebastian Andrzej Siewior   bdi: use refcount...
453
  	refcount_set(&new_congested->refcnt, 1);
b7d680d7b   Jan Kara   bdi: Mark congest...
454
  	new_congested->__bdi = bdi;
52ebea749   Tejun Heo   writeback: make b...
455
456
457
458
  	new_congested->blkcg_id = blkcg_id;
  	goto retry;
  
  found:
e58dd0de5   Sebastian Andrzej Siewior   bdi: use refcount...
459
  	refcount_inc(&congested->refcnt);
52ebea749   Tejun Heo   writeback: make b...
460
461
462
463
464
465
466
467
468
469
470
471
472
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	kfree(new_congested);
  	return congested;
  }
  
  /**
   * wb_congested_put - put a wb_congested
   * @congested: wb_congested to put
   *
   * Put @congested and destroy it if the refcnt reaches zero.
   */
  void wb_congested_put(struct bdi_writeback_congested *congested)
  {
52ebea749   Tejun Heo   writeback: make b...
473
  	unsigned long flags;
060288a73   Anna-Maria Gleixner   bdi: use irqsave ...
474
  	if (!refcount_dec_and_lock_irqsave(&congested->refcnt, &cgwb_lock, &flags))
52ebea749   Tejun Heo   writeback: make b...
475
  		return;
52ebea749   Tejun Heo   writeback: make b...
476

a20135ffb   Tejun Heo   writeback: don't ...
477
  	/* bdi might already have been destroyed leaving @congested unlinked */
b7d680d7b   Jan Kara   bdi: Mark congest...
478
  	if (congested->__bdi) {
a20135ffb   Tejun Heo   writeback: don't ...
479
  		rb_erase(&congested->rb_node,
b7d680d7b   Jan Kara   bdi: Mark congest...
480
481
  			 &congested->__bdi->cgwb_congested_tree);
  		congested->__bdi = NULL;
a20135ffb   Tejun Heo   writeback: don't ...
482
  	}
52ebea749   Tejun Heo   writeback: make b...
483
484
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	kfree(congested);
52ebea749   Tejun Heo   writeback: make b...
485
486
487
488
489
490
  }
  
  static void cgwb_release_workfn(struct work_struct *work)
  {
  	struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
  						release_work);
59b57717f   Dennis Zhou (Facebook)   blkcg: delay blkg...
491
  	struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
52ebea749   Tejun Heo   writeback: make b...
492

3ee7e8697   Jan Kara   bdi: Fix another ...
493
  	mutex_lock(&wb->bdi->cgwb_release_mutex);
52ebea749   Tejun Heo   writeback: make b...
494
495
496
497
  	wb_shutdown(wb);
  
  	css_put(wb->memcg_css);
  	css_put(wb->blkcg_css);
3ee7e8697   Jan Kara   bdi: Fix another ...
498
  	mutex_unlock(&wb->bdi->cgwb_release_mutex);
52ebea749   Tejun Heo   writeback: make b...
499

59b57717f   Dennis Zhou (Facebook)   blkcg: delay blkg...
500
501
  	/* triggers blkg destruction if cgwb_refcnt becomes zero */
  	blkcg_cgwb_put(blkcg);
841710aa6   Tejun Heo   writeback: implem...
502
  	fprop_local_destroy_percpu(&wb->memcg_completions);
52ebea749   Tejun Heo   writeback: make b...
503
504
505
  	percpu_ref_exit(&wb->refcnt);
  	wb_exit(wb);
  	kfree_rcu(wb, rcu);
52ebea749   Tejun Heo   writeback: make b...
506
507
508
509
510
511
  }
  
  static void cgwb_release(struct percpu_ref *refcnt)
  {
  	struct bdi_writeback *wb = container_of(refcnt, struct bdi_writeback,
  						refcnt);
f18346468   Tejun Heo   bdi: Move cgroup ...
512
  	queue_work(cgwb_release_wq, &wb->release_work);
52ebea749   Tejun Heo   writeback: make b...
513
514
515
516
517
518
519
520
521
522
523
  }
  
  static void cgwb_kill(struct bdi_writeback *wb)
  {
  	lockdep_assert_held(&cgwb_lock);
  
  	WARN_ON(!radix_tree_delete(&wb->bdi->cgwb_tree, wb->memcg_css->id));
  	list_del(&wb->memcg_node);
  	list_del(&wb->blkcg_node);
  	percpu_ref_kill(&wb->refcnt);
  }
e8cb72b32   Jan Kara   bdi: Unify bdi->w...
524
525
526
527
528
529
  static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
  {
  	spin_lock_irq(&cgwb_lock);
  	list_del_rcu(&wb->bdi_node);
  	spin_unlock_irq(&cgwb_lock);
  }
52ebea749   Tejun Heo   writeback: make b...
530
531
532
533
534
535
536
537
538
539
540
541
  static int cgwb_create(struct backing_dev_info *bdi,
  		       struct cgroup_subsys_state *memcg_css, gfp_t gfp)
  {
  	struct mem_cgroup *memcg;
  	struct cgroup_subsys_state *blkcg_css;
  	struct blkcg *blkcg;
  	struct list_head *memcg_cgwb_list, *blkcg_cgwb_list;
  	struct bdi_writeback *wb;
  	unsigned long flags;
  	int ret = 0;
  
  	memcg = mem_cgroup_from_css(memcg_css);
c165b3e3c   Tejun Heo   blkcg: rename sub...
542
  	blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
52ebea749   Tejun Heo   writeback: make b...
543
  	blkcg = css_to_blkcg(blkcg_css);
9ccc36171   Wang Long   memcg: writeback:...
544
  	memcg_cgwb_list = &memcg->cgwb_list;
52ebea749   Tejun Heo   writeback: make b...
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
  	blkcg_cgwb_list = &blkcg->cgwb_list;
  
  	/* look up again under lock and discard on blkcg mismatch */
  	spin_lock_irqsave(&cgwb_lock, flags);
  	wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
  	if (wb && wb->blkcg_css != blkcg_css) {
  		cgwb_kill(wb);
  		wb = NULL;
  	}
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	if (wb)
  		goto out_put;
  
  	/* need to create a new one */
  	wb = kmalloc(sizeof(*wb), gfp);
0b045bd1c   Christophe JAILLET   mm/backing-dev.c:...
560
561
562
563
  	if (!wb) {
  		ret = -ENOMEM;
  		goto out_put;
  	}
52ebea749   Tejun Heo   writeback: make b...
564

a13f35e87   Tejun Heo   writeback: don't ...
565
  	ret = wb_init(wb, bdi, blkcg_css->id, gfp);
52ebea749   Tejun Heo   writeback: make b...
566
567
568
569
570
571
  	if (ret)
  		goto err_free;
  
  	ret = percpu_ref_init(&wb->refcnt, cgwb_release, 0, gfp);
  	if (ret)
  		goto err_wb_exit;
841710aa6   Tejun Heo   writeback: implem...
572
573
574
  	ret = fprop_local_init_percpu(&wb->memcg_completions, gfp);
  	if (ret)
  		goto err_ref_exit;
52ebea749   Tejun Heo   writeback: make b...
575
576
577
578
  	wb->memcg_css = memcg_css;
  	wb->blkcg_css = blkcg_css;
  	INIT_WORK(&wb->release_work, cgwb_release_workfn);
  	set_bit(WB_registered, &wb->state);
03ba3782e   Jens Axboe   writeback: switch...
579
580
  
  	/*
52ebea749   Tejun Heo   writeback: make b...
581
582
583
584
  	 * The root wb determines the registered state of the whole bdi and
  	 * memcg_cgwb_list and blkcg_cgwb_list's next pointers indicate
  	 * whether they're still online.  Don't link @wb if any is dead.
  	 * See wb_memcg_offline() and wb_blkcg_offline().
03ba3782e   Jens Axboe   writeback: switch...
585
  	 */
52ebea749   Tejun Heo   writeback: make b...
586
587
588
589
590
591
592
  	ret = -ENODEV;
  	spin_lock_irqsave(&cgwb_lock, flags);
  	if (test_bit(WB_registered, &bdi->wb.state) &&
  	    blkcg_cgwb_list->next && memcg_cgwb_list->next) {
  		/* we might have raced another instance of this function */
  		ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
  		if (!ret) {
b817525a4   Tejun Heo   writeback: bdi_wr...
593
  			list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
52ebea749   Tejun Heo   writeback: make b...
594
595
  			list_add(&wb->memcg_node, memcg_cgwb_list);
  			list_add(&wb->blkcg_node, blkcg_cgwb_list);
59b57717f   Dennis Zhou (Facebook)   blkcg: delay blkg...
596
  			blkcg_cgwb_get(blkcg);
52ebea749   Tejun Heo   writeback: make b...
597
598
599
600
601
602
603
604
  			css_get(memcg_css);
  			css_get(blkcg_css);
  		}
  	}
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	if (ret) {
  		if (ret == -EEXIST)
  			ret = 0;
a13f35e87   Tejun Heo   writeback: don't ...
605
  		goto err_fprop_exit;
52ebea749   Tejun Heo   writeback: make b...
606
607
  	}
  	goto out_put;
841710aa6   Tejun Heo   writeback: implem...
608
609
  err_fprop_exit:
  	fprop_local_destroy_percpu(&wb->memcg_completions);
52ebea749   Tejun Heo   writeback: make b...
610
611
612
613
614
615
616
617
618
  err_ref_exit:
  	percpu_ref_exit(&wb->refcnt);
  err_wb_exit:
  	wb_exit(wb);
  err_free:
  	kfree(wb);
  out_put:
  	css_put(blkcg_css);
  	return ret;
66f3b8e2e   Jens Axboe   writeback: move d...
619
  }
52ebea749   Tejun Heo   writeback: make b...
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
  /**
   * wb_get_create - get wb for a given memcg, create if necessary
   * @bdi: target bdi
   * @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref)
   * @gfp: allocation mask to use
   *
   * Try to get the wb for @memcg_css on @bdi.  If it doesn't exist, try to
   * create one.  The returned wb has its refcount incremented.
   *
   * This function uses css_get() on @memcg_css and thus expects its refcnt
   * to be positive on invocation.  IOW, rcu_read_lock() protection on
   * @memcg_css isn't enough.  try_get it before calling this function.
   *
   * A wb is keyed by its associated memcg.  As blkcg implicitly enables
   * memcg on the default hierarchy, memcg association is guaranteed to be
   * more specific (equal or descendant to the associated blkcg) and thus can
   * identify both the memcg and blkcg associations.
   *
   * Because the blkcg associated with a memcg may change as blkcg is enabled
   * and disabled closer to root in the hierarchy, each wb keeps track of
   * both the memcg and blkcg associated with it and verifies the blkcg on
   * each lookup.  On mismatch, the existing wb is discarded and a new one is
   * created.
   */
  struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
  				    struct cgroup_subsys_state *memcg_css,
  				    gfp_t gfp)
6467716a3   Artem Bityutskiy   writeback: optimi...
647
  {
52ebea749   Tejun Heo   writeback: make b...
648
  	struct bdi_writeback *wb;
d0164adc8   Mel Gorman   mm, page_alloc: d...
649
  	might_sleep_if(gfpflags_allow_blocking(gfp));
52ebea749   Tejun Heo   writeback: make b...
650
651
652
653
654
655
656
657
658
659
660
661
  
  	if (!memcg_css->parent)
  		return &bdi->wb;
  
  	do {
  		rcu_read_lock();
  		wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
  		if (wb) {
  			struct cgroup_subsys_state *blkcg_css;
  
  			/* see whether the blkcg association has changed */
  			blkcg_css = cgroup_get_e_css(memcg_css->cgroup,
c165b3e3c   Tejun Heo   blkcg: rename sub...
662
  						     &io_cgrp_subsys);
52ebea749   Tejun Heo   writeback: make b...
663
664
665
666
667
668
669
670
671
672
  			if (unlikely(wb->blkcg_css != blkcg_css ||
  				     !wb_tryget(wb)))
  				wb = NULL;
  			css_put(blkcg_css);
  		}
  		rcu_read_unlock();
  	} while (!wb && !cgwb_create(bdi, memcg_css, gfp));
  
  	return wb;
  }
6467716a3   Artem Bityutskiy   writeback: optimi...
673

a13f35e87   Tejun Heo   writeback: don't ...
674
  static int cgwb_bdi_init(struct backing_dev_info *bdi)
52ebea749   Tejun Heo   writeback: make b...
675
  {
a13f35e87   Tejun Heo   writeback: don't ...
676
  	int ret;
52ebea749   Tejun Heo   writeback: make b...
677
678
  	INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
  	bdi->cgwb_congested_tree = RB_ROOT;
3ee7e8697   Jan Kara   bdi: Fix another ...
679
  	mutex_init(&bdi->cgwb_release_mutex);
edca54b89   Tejun Heo   writeback: synchr...
680
  	init_rwsem(&bdi->wb_switch_rwsem);
a13f35e87   Tejun Heo   writeback: don't ...
681
682
683
  
  	ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
  	if (!ret) {
7d828602e   Johannes Weiner   mm: memcontrol: e...
684
  		bdi->wb.memcg_css = &root_mem_cgroup->css;
a13f35e87   Tejun Heo   writeback: don't ...
685
686
687
  		bdi->wb.blkcg_css = blkcg_root_css;
  	}
  	return ret;
6467716a3   Artem Bityutskiy   writeback: optimi...
688
  }
b1c51afc0   Jan Kara   bdi: Rename cgwb_...
689
  static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
52ebea749   Tejun Heo   writeback: make b...
690
691
692
  {
  	struct radix_tree_iter iter;
  	void **slot;
5318ce7d4   Jan Kara   bdi: Shutdown wri...
693
  	struct bdi_writeback *wb;
52ebea749   Tejun Heo   writeback: make b...
694
695
696
697
698
699
  
  	WARN_ON(test_bit(WB_registered, &bdi->wb.state));
  
  	spin_lock_irq(&cgwb_lock);
  	radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
  		cgwb_kill(*slot);
3ee7e8697   Jan Kara   bdi: Fix another ...
700
  	spin_unlock_irq(&cgwb_lock);
5318ce7d4   Jan Kara   bdi: Shutdown wri...
701

3ee7e8697   Jan Kara   bdi: Fix another ...
702
703
  	mutex_lock(&bdi->cgwb_release_mutex);
  	spin_lock_irq(&cgwb_lock);
5318ce7d4   Jan Kara   bdi: Shutdown wri...
704
705
706
707
708
709
710
  	while (!list_empty(&bdi->wb_list)) {
  		wb = list_first_entry(&bdi->wb_list, struct bdi_writeback,
  				      bdi_node);
  		spin_unlock_irq(&cgwb_lock);
  		wb_shutdown(wb);
  		spin_lock_irq(&cgwb_lock);
  	}
52ebea749   Tejun Heo   writeback: make b...
711
  	spin_unlock_irq(&cgwb_lock);
3ee7e8697   Jan Kara   bdi: Fix another ...
712
  	mutex_unlock(&bdi->cgwb_release_mutex);
52ebea749   Tejun Heo   writeback: make b...
713
714
715
716
717
718
719
  }
  
  /**
   * wb_memcg_offline - kill all wb's associated with a memcg being offlined
   * @memcg: memcg being offlined
   *
   * Also prevents creation of any new wb's associated with @memcg.
e98be2d59   Wu Fengguang   writeback: bdi wr...
720
   */
52ebea749   Tejun Heo   writeback: make b...
721
722
  void wb_memcg_offline(struct mem_cgroup *memcg)
  {
9ccc36171   Wang Long   memcg: writeback:...
723
  	struct list_head *memcg_cgwb_list = &memcg->cgwb_list;
52ebea749   Tejun Heo   writeback: make b...
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
  	struct bdi_writeback *wb, *next;
  
  	spin_lock_irq(&cgwb_lock);
  	list_for_each_entry_safe(wb, next, memcg_cgwb_list, memcg_node)
  		cgwb_kill(wb);
  	memcg_cgwb_list->next = NULL;	/* prevent new wb's */
  	spin_unlock_irq(&cgwb_lock);
  }
  
  /**
   * wb_blkcg_offline - kill all wb's associated with a blkcg being offlined
   * @blkcg: blkcg being offlined
   *
   * Also prevents creation of any new wb's associated with @blkcg.
   */
  void wb_blkcg_offline(struct blkcg *blkcg)
  {
52ebea749   Tejun Heo   writeback: make b...
741
742
743
744
745
746
747
748
  	struct bdi_writeback *wb, *next;
  
  	spin_lock_irq(&cgwb_lock);
  	list_for_each_entry_safe(wb, next, &blkcg->cgwb_list, blkcg_node)
  		cgwb_kill(wb);
  	blkcg->cgwb_list.next = NULL;	/* prevent new wb's */
  	spin_unlock_irq(&cgwb_lock);
  }
df23de556   Jan Kara   bdi: Fix use-afte...
749
750
751
752
753
754
755
756
757
758
  static void cgwb_bdi_exit(struct backing_dev_info *bdi)
  {
  	struct rb_node *rbn;
  
  	spin_lock_irq(&cgwb_lock);
  	while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
  		struct bdi_writeback_congested *congested =
  			rb_entry(rbn, struct bdi_writeback_congested, rb_node);
  
  		rb_erase(rbn, &bdi->cgwb_congested_tree);
b7d680d7b   Jan Kara   bdi: Mark congest...
759
  		congested->__bdi = NULL;	/* mark @congested unlinked */
df23de556   Jan Kara   bdi: Fix use-afte...
760
761
762
  	}
  	spin_unlock_irq(&cgwb_lock);
  }
e8cb72b32   Jan Kara   bdi: Unify bdi->w...
763
764
765
766
767
768
  static void cgwb_bdi_register(struct backing_dev_info *bdi)
  {
  	spin_lock_irq(&cgwb_lock);
  	list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
  	spin_unlock_irq(&cgwb_lock);
  }
f18346468   Tejun Heo   bdi: Move cgroup ...
769
770
771
772
773
774
775
776
777
778
779
780
781
782
  static int __init cgwb_init(void)
  {
  	/*
  	 * There can be many concurrent release work items overwhelming
  	 * system_wq.  Put them in a separate wq and limit concurrency.
  	 * There's no point in executing many of these in parallel.
  	 */
  	cgwb_release_wq = alloc_workqueue("cgwb_release", 0, 1);
  	if (!cgwb_release_wq)
  		return -ENOMEM;
  
  	return 0;
  }
  subsys_initcall(cgwb_init);
52ebea749   Tejun Heo   writeback: make b...
783
  #else	/* CONFIG_CGROUP_WRITEBACK */
a13f35e87   Tejun Heo   writeback: don't ...
784
785
786
787
788
789
790
  static int cgwb_bdi_init(struct backing_dev_info *bdi)
  {
  	int err;
  
  	bdi->wb_congested = kzalloc(sizeof(*bdi->wb_congested), GFP_KERNEL);
  	if (!bdi->wb_congested)
  		return -ENOMEM;
e58dd0de5   Sebastian Andrzej Siewior   bdi: use refcount...
791
  	refcount_set(&bdi->wb_congested->refcnt, 1);
5f478e4ea   Tejun Heo   block: fix double...
792

a13f35e87   Tejun Heo   writeback: don't ...
793
794
  	err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
  	if (err) {
5f478e4ea   Tejun Heo   block: fix double...
795
  		wb_congested_put(bdi->wb_congested);
a13f35e87   Tejun Heo   writeback: don't ...
796
797
798
799
  		return err;
  	}
  	return 0;
  }
b1c51afc0   Jan Kara   bdi: Rename cgwb_...
800
  static void cgwb_bdi_unregister(struct backing_dev_info *bdi) { }
df23de556   Jan Kara   bdi: Fix use-afte...
801
802
  
  static void cgwb_bdi_exit(struct backing_dev_info *bdi)
5f478e4ea   Tejun Heo   block: fix double...
803
804
805
  {
  	wb_congested_put(bdi->wb_congested);
  }
52ebea749   Tejun Heo   writeback: make b...
806

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
807
808
809
810
811
812
813
814
815
  static void cgwb_bdi_register(struct backing_dev_info *bdi)
  {
  	list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
  }
  
  static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
  {
  	list_del_rcu(&wb->bdi_node);
  }
52ebea749   Tejun Heo   writeback: make b...
816
  #endif	/* CONFIG_CGROUP_WRITEBACK */
e98be2d59   Wu Fengguang   writeback: bdi wr...
817

2e82b84c0   Jan Kara   block: Remove unu...
818
  static int bdi_init(struct backing_dev_info *bdi)
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
819
  {
b817525a4   Tejun Heo   writeback: bdi_wr...
820
  	int ret;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
821
  	bdi->dev = NULL;
d03f6cdc1   Jan Kara   block: Dynamicall...
822
  	kref_init(&bdi->refcnt);
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
823
  	bdi->min_ratio = 0;
a42dde041   Peter Zijlstra   mm: bdi: allow se...
824
  	bdi->max_ratio = 100;
eb608e3a3   Jan Kara   block: Convert BD...
825
  	bdi->max_prop_frac = FPROP_FRAC_BASE;
66f3b8e2e   Jens Axboe   writeback: move d...
826
  	INIT_LIST_HEAD(&bdi->bdi_list);
b817525a4   Tejun Heo   writeback: bdi_wr...
827
  	INIT_LIST_HEAD(&bdi->wb_list);
cc395d7f1   Tejun Heo   writeback: implem...
828
  	init_waitqueue_head(&bdi->wb_waitq);
03ba3782e   Jens Axboe   writeback: switch...
829

b817525a4   Tejun Heo   writeback: bdi_wr...
830
  	ret = cgwb_bdi_init(bdi);
b817525a4   Tejun Heo   writeback: bdi_wr...
831
  	return ret;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
832
  }
e98be2d59   Wu Fengguang   writeback: bdi wr...
833

d03f6cdc1   Jan Kara   block: Dynamicall...
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
  struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id)
  {
  	struct backing_dev_info *bdi;
  
  	bdi = kmalloc_node(sizeof(struct backing_dev_info),
  			   gfp_mask | __GFP_ZERO, node_id);
  	if (!bdi)
  		return NULL;
  
  	if (bdi_init(bdi)) {
  		kfree(bdi);
  		return NULL;
  	}
  	return bdi;
  }
62bf42adc   Jan Kara   bdi: Export bdi_a...
849
  EXPORT_SYMBOL(bdi_alloc_node);
d03f6cdc1   Jan Kara   block: Dynamicall...
850

7c4cc3002   Jan Kara   bdi: Drop 'parent...
851
  int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
461000714   Tejun Heo   writeback: reorga...
852
  {
461000714   Tejun Heo   writeback: reorga...
853
  	struct device *dev;
e98be2d59   Wu Fengguang   writeback: bdi wr...
854

461000714   Tejun Heo   writeback: reorga...
855
856
  	if (bdi->dev)	/* The driver needs to use separate queues per device */
  		return 0;
e98be2d59   Wu Fengguang   writeback: bdi wr...
857

7c4cc3002   Jan Kara   bdi: Drop 'parent...
858
  	dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args);
461000714   Tejun Heo   writeback: reorga...
859
860
  	if (IS_ERR(dev))
  		return PTR_ERR(dev);
04fbfdc14   Peter Zijlstra   mm: per device di...
861

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
862
  	cgwb_bdi_register(bdi);
461000714   Tejun Heo   writeback: reorga...
863
  	bdi->dev = dev;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
864

6d0e4827b   Jens Axboe   Revert "bdi: add ...
865
  	bdi_debug_register(bdi, dev_name(dev));
461000714   Tejun Heo   writeback: reorga...
866
867
868
869
870
871
872
873
  	set_bit(WB_registered, &bdi->wb.state);
  
  	spin_lock_bh(&bdi_lock);
  	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
  	spin_unlock_bh(&bdi_lock);
  
  	trace_writeback_bdi_register(bdi);
  	return 0;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
874
  }
baf7a616d   Jan Kara   bdi: Provide bdi_...
875
  EXPORT_SYMBOL(bdi_register_va);
7c4cc3002   Jan Kara   bdi: Drop 'parent...
876
  int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...)
baf7a616d   Jan Kara   bdi: Provide bdi_...
877
878
879
880
881
  {
  	va_list args;
  	int ret;
  
  	va_start(args, fmt);
7c4cc3002   Jan Kara   bdi: Drop 'parent...
882
  	ret = bdi_register_va(bdi, fmt, args);
baf7a616d   Jan Kara   bdi: Provide bdi_...
883
884
885
  	va_end(args);
  	return ret;
  }
461000714   Tejun Heo   writeback: reorga...
886
  EXPORT_SYMBOL(bdi_register);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
887

df08c32ce   Dan Williams   block: fix bdi vs...
888
889
890
  int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner)
  {
  	int rc;
7c4cc3002   Jan Kara   bdi: Drop 'parent...
891
  	rc = bdi_register(bdi, "%u:%u", MAJOR(owner->devt), MINOR(owner->devt));
df08c32ce   Dan Williams   block: fix bdi vs...
892
893
  	if (rc)
  		return rc;
b6f8fec44   Jan Kara   block: Allow bdi ...
894
895
  	/* Leaking owner reference... */
  	WARN_ON(bdi->owner);
df08c32ce   Dan Williams   block: fix bdi vs...
896
897
898
899
900
  	bdi->owner = owner;
  	get_device(owner);
  	return 0;
  }
  EXPORT_SYMBOL(bdi_register_owner);
461000714   Tejun Heo   writeback: reorga...
901
902
903
904
905
906
907
908
  /*
   * Remove bdi from bdi_list, and ensure that it is no longer visible
   */
  static void bdi_remove_from_list(struct backing_dev_info *bdi)
  {
  	spin_lock_bh(&bdi_lock);
  	list_del_rcu(&bdi->bdi_list);
  	spin_unlock_bh(&bdi_lock);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
909

461000714   Tejun Heo   writeback: reorga...
910
911
  	synchronize_rcu_expedited();
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
912

b02176f30   Tejun Heo   block: don't rele...
913
  void bdi_unregister(struct backing_dev_info *bdi)
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
914
  {
f0054bb1e   Tejun Heo   writeback: move b...
915
916
917
  	/* make sure nobody finds us on the bdi_list anymore */
  	bdi_remove_from_list(bdi);
  	wb_shutdown(&bdi->wb);
b1c51afc0   Jan Kara   bdi: Rename cgwb_...
918
  	cgwb_bdi_unregister(bdi);
7a401a972   Rabin Vincent   backing-dev: ensu...
919

c4db59d31   Christoph Hellwig   fs: don't reassig...
920
921
922
923
924
  	if (bdi->dev) {
  		bdi_debug_unregister(bdi);
  		device_unregister(bdi->dev);
  		bdi->dev = NULL;
  	}
df08c32ce   Dan Williams   block: fix bdi vs...
925
926
927
928
929
  
  	if (bdi->owner) {
  		put_device(bdi->owner);
  		bdi->owner = NULL;
  	}
b02176f30   Tejun Heo   block: don't rele...
930
  }
c4db59d31   Christoph Hellwig   fs: don't reassig...
931

d03f6cdc1   Jan Kara   block: Dynamicall...
932
933
934
935
  static void release_bdi(struct kref *ref)
  {
  	struct backing_dev_info *bdi =
  			container_of(ref, struct backing_dev_info, refcnt);
5af110b2f   Jan Kara   block: Unregister...
936
937
  	if (test_bit(WB_registered, &bdi->wb.state))
  		bdi_unregister(bdi);
2e82b84c0   Jan Kara   block: Remove unu...
938
939
940
  	WARN_ON_ONCE(bdi->dev);
  	wb_exit(&bdi->wb);
  	cgwb_bdi_exit(bdi);
d03f6cdc1   Jan Kara   block: Dynamicall...
941
942
943
944
945
946
947
  	kfree(bdi);
  }
  
  void bdi_put(struct backing_dev_info *bdi)
  {
  	kref_put(&bdi->refcnt, release_bdi);
  }
62bf42adc   Jan Kara   bdi: Export bdi_a...
948
  EXPORT_SYMBOL(bdi_put);
d03f6cdc1   Jan Kara   block: Dynamicall...
949

3fcfab16c   Andrew Morton   [PATCH] separate ...
950
951
952
953
  static wait_queue_head_t congestion_wqh[2] = {
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
  	};
ec8a6f264   Tejun Heo   writeback: make c...
954
  static atomic_t nr_wb_congested[2];
3fcfab16c   Andrew Morton   [PATCH] separate ...
955

ec8a6f264   Tejun Heo   writeback: make c...
956
  void clear_wb_congested(struct bdi_writeback_congested *congested, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
957
  {
1faa16d22   Jens Axboe   block: change the...
958
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
c877ef8ae   Kaixu Xia   writeback: fix th...
959
  	enum wb_congested_state bit;
3fcfab16c   Andrew Morton   [PATCH] separate ...
960

4452226ea   Tejun Heo   writeback: move b...
961
  	bit = sync ? WB_sync_congested : WB_async_congested;
ec8a6f264   Tejun Heo   writeback: make c...
962
963
  	if (test_and_clear_bit(bit, &congested->state))
  		atomic_dec(&nr_wb_congested[sync]);
4e857c58e   Peter Zijlstra   arch: Mass conver...
964
  	smp_mb__after_atomic();
3fcfab16c   Andrew Morton   [PATCH] separate ...
965
966
967
  	if (waitqueue_active(wqh))
  		wake_up(wqh);
  }
ec8a6f264   Tejun Heo   writeback: make c...
968
  EXPORT_SYMBOL(clear_wb_congested);
3fcfab16c   Andrew Morton   [PATCH] separate ...
969

ec8a6f264   Tejun Heo   writeback: make c...
970
  void set_wb_congested(struct bdi_writeback_congested *congested, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
971
  {
c877ef8ae   Kaixu Xia   writeback: fix th...
972
  	enum wb_congested_state bit;
3fcfab16c   Andrew Morton   [PATCH] separate ...
973

4452226ea   Tejun Heo   writeback: move b...
974
  	bit = sync ? WB_sync_congested : WB_async_congested;
ec8a6f264   Tejun Heo   writeback: make c...
975
976
  	if (!test_and_set_bit(bit, &congested->state))
  		atomic_inc(&nr_wb_congested[sync]);
3fcfab16c   Andrew Morton   [PATCH] separate ...
977
  }
ec8a6f264   Tejun Heo   writeback: make c...
978
  EXPORT_SYMBOL(set_wb_congested);
3fcfab16c   Andrew Morton   [PATCH] separate ...
979
980
981
  
  /**
   * congestion_wait - wait for a backing_dev to become uncongested
8aa7e847d   Jens Axboe   Fix congestion_wa...
982
   * @sync: SYNC or ASYNC IO
3fcfab16c   Andrew Morton   [PATCH] separate ...
983
984
985
986
987
988
   * @timeout: timeout in jiffies
   *
   * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
   * write congestion.  If no backing_devs are congested then just wait for the
   * next write to be completed.
   */
8aa7e847d   Jens Axboe   Fix congestion_wa...
989
  long congestion_wait(int sync, long timeout)
3fcfab16c   Andrew Morton   [PATCH] separate ...
990
991
  {
  	long ret;
52bb91986   Mel Gorman   writeback: accoun...
992
  	unsigned long start = jiffies;
3fcfab16c   Andrew Morton   [PATCH] separate ...
993
  	DEFINE_WAIT(wait);
8aa7e847d   Jens Axboe   Fix congestion_wa...
994
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
3fcfab16c   Andrew Morton   [PATCH] separate ...
995
996
997
998
  
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
52bb91986   Mel Gorman   writeback: accoun...
999
1000
1001
  
  	trace_writeback_congestion_wait(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
3fcfab16c   Andrew Morton   [PATCH] separate ...
1002
1003
1004
  	return ret;
  }
  EXPORT_SYMBOL(congestion_wait);
04fbfdc14   Peter Zijlstra   mm: per device di...
1005

0e093d997   Mel Gorman   writeback: do not...
1006
  /**
599d0c954   Mel Gorman   mm, vmscan: move ...
1007
   * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a pgdat to complete writes
0e093d997   Mel Gorman   writeback: do not...
1008
1009
1010
   * @sync: SYNC or ASYNC IO
   * @timeout: timeout in jiffies
   *
e3c1ac586   Andrey Ryabinin   mm/vmscan: don't ...
1011
1012
1013
   * In the event of a congested backing_dev (any backing_dev) this waits
   * for up to @timeout jiffies for either a BDI to exit congestion of the
   * given @sync queue or a write to complete.
0e093d997   Mel Gorman   writeback: do not...
1014
1015
1016
1017
1018
   *
   * The return value is 0 if the sleep is for the full timeout. Otherwise,
   * it is the number of jiffies that were still remaining when the function
   * returned. return_value == timeout implies the function did not sleep.
   */
e3c1ac586   Andrey Ryabinin   mm/vmscan: don't ...
1019
  long wait_iff_congested(int sync, long timeout)
0e093d997   Mel Gorman   writeback: do not...
1020
1021
1022
1023
1024
1025
1026
  {
  	long ret;
  	unsigned long start = jiffies;
  	DEFINE_WAIT(wait);
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
  
  	/*
e3c1ac586   Andrey Ryabinin   mm/vmscan: don't ...
1027
  	 * If there is no congestion, yield if necessary instead
0e093d997   Mel Gorman   writeback: do not...
1028
1029
  	 * of sleeping on the congestion queue
  	 */
e3c1ac586   Andrey Ryabinin   mm/vmscan: don't ...
1030
  	if (atomic_read(&nr_wb_congested[sync]) == 0) {
ede377137   Michal Hocko   mm: throttle on I...
1031
  		cond_resched();
599d0c954   Mel Gorman   mm, vmscan: move ...
1032

0e093d997   Mel Gorman   writeback: do not...
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
  		/* In case we scheduled, work out time remaining */
  		ret = timeout - (jiffies - start);
  		if (ret < 0)
  			ret = 0;
  
  		goto out;
  	}
  
  	/* Sleep until uncongested or a write happens */
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
  
  out:
  	trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
  
  	return ret;
  }
  EXPORT_SYMBOL(wait_iff_congested);