Blame view

mm/backing-dev.c 28.5 KB
457c89965   Thomas Gleixner   treewide: Add SPD...
1
  // SPDX-License-Identifier: GPL-2.0-only
3fcfab16c   Andrew Morton   [PATCH] separate ...
2
3
  
  #include <linux/wait.h>
34f8fe501   Tejun Heo   bdi: Add bdi->id
4
  #include <linux/rbtree.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
5
  #include <linux/backing-dev.h>
03ba3782e   Jens Axboe   writeback: switch...
6
7
  #include <linux/kthread.h>
  #include <linux/freezer.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
8
  #include <linux/fs.h>
26160158d   Jens Axboe   Move the default_...
9
  #include <linux/pagemap.h>
03ba3782e   Jens Axboe   writeback: switch...
10
  #include <linux/mm.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
11
12
  #include <linux/sched.h>
  #include <linux/module.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
13
14
  #include <linux/writeback.h>
  #include <linux/device.h>
455b28646   Dave Chinner   writeback: Initia...
15
  #include <trace/events/writeback.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
16

5129a469a   Jörn Engel   Catch filesystems...
17
18
  struct backing_dev_info noop_backing_dev_info = {
  	.name		= "noop",
976e48f8a   Jan Kara   bdi: Initialize n...
19
  	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
5129a469a   Jörn Engel   Catch filesystems...
20
  };
a212b105b   Tejun Heo   bdi: make inode_t...
21
  EXPORT_SYMBOL_GPL(noop_backing_dev_info);
5129a469a   Jörn Engel   Catch filesystems...
22

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
23
  static struct class *bdi_class;
c2c814fc9   Theodore Ts'o   memcg: fix a cras...
24
  const char *bdi_unknown_name = "(unknown)";
cfc4ba536   Jens Axboe   writeback: use RC...
25
26
  
  /*
34f8fe501   Tejun Heo   bdi: Add bdi->id
27
28
   * bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU
   * reader side locking.
cfc4ba536   Jens Axboe   writeback: use RC...
29
   */
03ba3782e   Jens Axboe   writeback: switch...
30
  DEFINE_SPINLOCK(bdi_lock);
34f8fe501   Tejun Heo   bdi: Add bdi->id
31
32
  static u64 bdi_id_cursor;
  static struct rb_root bdi_tree = RB_ROOT;
66f3b8e2e   Jens Axboe   writeback: move d...
33
  LIST_HEAD(bdi_list);
03ba3782e   Jens Axboe   writeback: switch...
34

839a8e866   Tejun Heo   writeback: replac...
35
36
  /* bdi_wq serves all asynchronous writeback tasks */
  struct workqueue_struct *bdi_wq;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
37
38
39
40
41
42
43
44
45
46
47
48
49
50
  #ifdef CONFIG_DEBUG_FS
  #include <linux/debugfs.h>
  #include <linux/seq_file.h>
  
  static struct dentry *bdi_debug_root;
  
  static void bdi_debug_init(void)
  {
  	bdi_debug_root = debugfs_create_dir("bdi", NULL);
  }
  
  static int bdi_debug_stats_show(struct seq_file *m, void *v)
  {
  	struct backing_dev_info *bdi = m->private;
c1955ce32   Christoph Hellwig   writeback: remove...
51
  	struct bdi_writeback *wb = &bdi->wb;
364aeb284   David Rientjes   mm: change dirty ...
52
53
  	unsigned long background_thresh;
  	unsigned long dirty_thresh;
0d960a383   Tejun Heo   writeback: clean ...
54
  	unsigned long wb_thresh;
0ae45f63d   Theodore Ts'o   vfs: add support ...
55
  	unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
f09b00d3e   Jens Axboe   writeback: add so...
56
  	struct inode *inode;
0ae45f63d   Theodore Ts'o   vfs: add support ...
57
  	nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
f758eeabe   Christoph Hellwig   writeback: split ...
58
  	spin_lock(&wb->list_lock);
c7f540849   Dave Chinner   inode: rename i_w...
59
  	list_for_each_entry(inode, &wb->b_dirty, i_io_list)
c1955ce32   Christoph Hellwig   writeback: remove...
60
  		nr_dirty++;
c7f540849   Dave Chinner   inode: rename i_w...
61
  	list_for_each_entry(inode, &wb->b_io, i_io_list)
c1955ce32   Christoph Hellwig   writeback: remove...
62
  		nr_io++;
c7f540849   Dave Chinner   inode: rename i_w...
63
  	list_for_each_entry(inode, &wb->b_more_io, i_io_list)
c1955ce32   Christoph Hellwig   writeback: remove...
64
  		nr_more_io++;
c7f540849   Dave Chinner   inode: rename i_w...
65
  	list_for_each_entry(inode, &wb->b_dirty_time, i_io_list)
0ae45f63d   Theodore Ts'o   vfs: add support ...
66
67
  		if (inode->i_state & I_DIRTY_TIME)
  			nr_dirty_time++;
f758eeabe   Christoph Hellwig   writeback: split ...
68
  	spin_unlock(&wb->list_lock);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
69

16c4042f0   Wu Fengguang   writeback: avoid ...
70
  	global_dirty_limits(&background_thresh, &dirty_thresh);
0d960a383   Tejun Heo   writeback: clean ...
71
  	wb_thresh = wb_calc_thresh(wb, dirty_thresh);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
72
73
74
  
  #define K(x) ((x) << (PAGE_SHIFT - 10))
  	seq_printf(m,
00821b002   Wu Fengguang   writeback: show b...
75
76
77
78
79
80
81
82
83
84
  		   "BdiWriteback:       %10lu kB
  "
  		   "BdiReclaimable:     %10lu kB
  "
  		   "BdiDirtyThresh:     %10lu kB
  "
  		   "DirtyThresh:        %10lu kB
  "
  		   "BackgroundThresh:   %10lu kB
  "
c8e28ce04   Wu Fengguang   writeback: accoun...
85
86
  		   "BdiDirtied:         %10lu kB
  "
00821b002   Wu Fengguang   writeback: show b...
87
88
89
90
91
92
93
94
95
96
  		   "BdiWritten:         %10lu kB
  "
  		   "BdiWriteBandwidth:  %10lu kBps
  "
  		   "b_dirty:            %10lu
  "
  		   "b_io:               %10lu
  "
  		   "b_more_io:          %10lu
  "
0ae45f63d   Theodore Ts'o   vfs: add support ...
97
98
  		   "b_dirty_time:       %10lu
  "
00821b002   Wu Fengguang   writeback: show b...
99
100
101
102
  		   "bdi_list:           %10u
  "
  		   "state:              %10lx
  ",
93f78d882   Tejun Heo   writeback: move b...
103
104
  		   (unsigned long) K(wb_stat(wb, WB_WRITEBACK)),
  		   (unsigned long) K(wb_stat(wb, WB_RECLAIMABLE)),
0d960a383   Tejun Heo   writeback: clean ...
105
  		   K(wb_thresh),
f7d2b1ecd   Jan Kara   writeback: accoun...
106
107
  		   K(dirty_thresh),
  		   K(background_thresh),
93f78d882   Tejun Heo   writeback: move b...
108
109
  		   (unsigned long) K(wb_stat(wb, WB_DIRTIED)),
  		   (unsigned long) K(wb_stat(wb, WB_WRITTEN)),
a88a341a7   Tejun Heo   writeback: move b...
110
  		   (unsigned long) K(wb->write_bandwidth),
f7d2b1ecd   Jan Kara   writeback: accoun...
111
112
113
  		   nr_dirty,
  		   nr_io,
  		   nr_more_io,
0ae45f63d   Theodore Ts'o   vfs: add support ...
114
  		   nr_dirty_time,
4452226ea   Tejun Heo   writeback: move b...
115
  		   !list_empty(&bdi->bdi_list), bdi->wb.state);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
116
117
118
119
  #undef K
  
  	return 0;
  }
5ad350936   Andy Shevchenko   mm: reuse DEFINE_...
120
  DEFINE_SHOW_ATTRIBUTE(bdi_debug_stats);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
121

2d146b924   Greg Kroah-Hartman   backing-dev: no n...
122
  static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
123
124
  {
  	bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
97f076979   weiping zhang   bdi: convert bdi_...
125

2d146b924   Greg Kroah-Hartman   backing-dev: no n...
126
127
  	debugfs_create_file("stats", 0444, bdi->debug_dir, bdi,
  			    &bdi_debug_stats_fops);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
128
129
130
131
  }
  
  static void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
2d146b924   Greg Kroah-Hartman   backing-dev: no n...
132
  	debugfs_remove_recursive(bdi->debug_dir);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
133
134
135
136
137
  }
  #else
  static inline void bdi_debug_init(void)
  {
  }
2d146b924   Greg Kroah-Hartman   backing-dev: no n...
138
  static inline void bdi_debug_register(struct backing_dev_info *bdi,
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
139
140
141
142
143
144
145
  				      const char *name)
  {
  }
  static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  }
  #endif
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
146
147
148
149
150
  static ssize_t read_ahead_kb_store(struct device *dev,
  				  struct device_attribute *attr,
  				  const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
151
  	unsigned long read_ahead_kb;
7034ed132   Namjae Jeon   backing-dev: use ...
152
  	ssize_t ret;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
153

7034ed132   Namjae Jeon   backing-dev: use ...
154
155
156
157
158
159
160
  	ret = kstrtoul(buf, 10, &read_ahead_kb);
  	if (ret < 0)
  		return ret;
  
  	bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10);
  
  	return count;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
161
162
163
164
165
166
167
168
169
170
171
172
  }
  
  #define K(pages) ((pages) << (PAGE_SHIFT - 10))
  
  #define BDI_SHOW(name, expr)						\
  static ssize_t name##_show(struct device *dev,				\
  			   struct device_attribute *attr, char *page)	\
  {									\
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);		\
  									\
  	return snprintf(page, PAGE_SIZE-1, "%lld
  ", (long long)expr);	\
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
173
174
  }									\
  static DEVICE_ATTR_RW(name);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
175
176
  
  BDI_SHOW(read_ahead_kb, K(bdi->ra_pages))
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
177
178
179
180
  static ssize_t min_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
181
  	unsigned int ratio;
7034ed132   Namjae Jeon   backing-dev: use ...
182
183
184
185
186
187
188
189
190
  	ssize_t ret;
  
  	ret = kstrtouint(buf, 10, &ratio);
  	if (ret < 0)
  		return ret;
  
  	ret = bdi_set_min_ratio(bdi, ratio);
  	if (!ret)
  		ret = count;
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
191

189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
192
193
194
  	return ret;
  }
  BDI_SHOW(min_ratio, bdi->min_ratio)
a42dde041   Peter Zijlstra   mm: bdi: allow se...
195
196
197
198
  static ssize_t max_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
a42dde041   Peter Zijlstra   mm: bdi: allow se...
199
  	unsigned int ratio;
7034ed132   Namjae Jeon   backing-dev: use ...
200
201
202
203
204
205
206
207
208
  	ssize_t ret;
  
  	ret = kstrtouint(buf, 10, &ratio);
  	if (ret < 0)
  		return ret;
  
  	ret = bdi_set_max_ratio(bdi, ratio);
  	if (!ret)
  		ret = count;
a42dde041   Peter Zijlstra   mm: bdi: allow se...
209

a42dde041   Peter Zijlstra   mm: bdi: allow se...
210
211
212
  	return ret;
  }
  BDI_SHOW(max_ratio, bdi->max_ratio)
7d311cdab   Darrick J. Wong   bdi: allow block ...
213
214
215
216
217
218
219
220
221
222
  static ssize_t stable_pages_required_show(struct device *dev,
  					  struct device_attribute *attr,
  					  char *page)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
  
  	return snprintf(page, PAGE_SIZE-1, "%d
  ",
  			bdi_cap_stable_pages_required(bdi) ? 1 : 0);
  }
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
223
224
225
226
227
228
229
230
  static DEVICE_ATTR_RO(stable_pages_required);
  
  static struct attribute *bdi_dev_attrs[] = {
  	&dev_attr_read_ahead_kb.attr,
  	&dev_attr_min_ratio.attr,
  	&dev_attr_max_ratio.attr,
  	&dev_attr_stable_pages_required.attr,
  	NULL,
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
231
  };
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
232
  ATTRIBUTE_GROUPS(bdi_dev);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
233
234
235
236
  
  static __init int bdi_class_init(void)
  {
  	bdi_class = class_create(THIS_MODULE, "bdi");
144214537   Anton Blanchard   backing-dev: Hand...
237
238
  	if (IS_ERR(bdi_class))
  		return PTR_ERR(bdi_class);
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
239
  	bdi_class->dev_groups = bdi_dev_groups;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
240
  	bdi_debug_init();
d03f6cdc1   Jan Kara   block: Dynamicall...
241

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
242
243
  	return 0;
  }
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
244
  postcore_initcall(bdi_class_init);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
245

2e82b84c0   Jan Kara   block: Remove unu...
246
  static int bdi_init(struct backing_dev_info *bdi);
26160158d   Jens Axboe   Move the default_...
247
248
249
  static int __init default_bdi_init(void)
  {
  	int err;
a2b90f112   Mika Westerberg   bdi: Do not use f...
250
251
  	bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_UNBOUND |
  				 WQ_SYSFS, 0);
839a8e866   Tejun Heo   writeback: replac...
252
253
  	if (!bdi_wq)
  		return -ENOMEM;
976e48f8a   Jan Kara   bdi: Initialize n...
254
  	err = bdi_init(&noop_backing_dev_info);
26160158d   Jens Axboe   Move the default_...
255
256
257
258
  
  	return err;
  }
  subsys_initcall(default_bdi_init);
6467716a3   Artem Bityutskiy   writeback: optimi...
259
  /*
f0054bb1e   Tejun Heo   writeback: move b...
260
   * This function is used when the first inode for this wb is marked dirty. It
6467716a3   Artem Bityutskiy   writeback: optimi...
261
262
263
264
265
266
267
268
   * wakes-up the corresponding bdi thread which should then take care of the
   * periodic background write-out of dirty inodes. Since the write-out would
   * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
   * set up a timer which wakes the bdi thread up later.
   *
   * Note, we wouldn't bother setting up the timer, but this function is on the
   * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
   * by delaying the wake-up.
6ca738d60   Derek Basehore   backing_dev: fix ...
269
270
271
   *
   * We have to be careful not to postpone flush work if it is scheduled for
   * earlier. Thus we use queue_delayed_work().
6467716a3   Artem Bityutskiy   writeback: optimi...
272
   */
f0054bb1e   Tejun Heo   writeback: move b...
273
  void wb_wakeup_delayed(struct bdi_writeback *wb)
6467716a3   Artem Bityutskiy   writeback: optimi...
274
275
276
277
  {
  	unsigned long timeout;
  
  	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
f0054bb1e   Tejun Heo   writeback: move b...
278
279
280
281
  	spin_lock_bh(&wb->work_lock);
  	if (test_bit(WB_registered, &wb->state))
  		queue_delayed_work(bdi_wq, &wb->dwork, timeout);
  	spin_unlock_bh(&wb->work_lock);
03ba3782e   Jens Axboe   writeback: switch...
282
  }
cfc4ba536   Jens Axboe   writeback: use RC...
283
  /*
a88a341a7   Tejun Heo   writeback: move b...
284
   * Initial write bandwidth: 100 MB/s
cfc4ba536   Jens Axboe   writeback: use RC...
285
   */
a88a341a7   Tejun Heo   writeback: move b...
286
  #define INIT_BW		(100 << (20 - PAGE_SHIFT))
cfc4ba536   Jens Axboe   writeback: use RC...
287

8395cd9f8   Tejun Heo   writeback: add @g...
288
  static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
a13f35e87   Tejun Heo   writeback: don't ...
289
  		   int blkcg_id, gfp_t gfp)
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
290
  {
93f78d882   Tejun Heo   writeback: move b...
291
  	int i, err;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
292

6467716a3   Artem Bityutskiy   writeback: optimi...
293
  	memset(wb, 0, sizeof(*wb));
f1d0b063d   Kay Sievers   bdi: register sys...
294

810df54a6   Jan Kara   bdi: Make wb->bdi...
295
296
  	if (wb != &bdi->wb)
  		bdi_get(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
297
298
299
300
301
  	wb->bdi = bdi;
  	wb->last_old_flush = jiffies;
  	INIT_LIST_HEAD(&wb->b_dirty);
  	INIT_LIST_HEAD(&wb->b_io);
  	INIT_LIST_HEAD(&wb->b_more_io);
0ae45f63d   Theodore Ts'o   vfs: add support ...
302
  	INIT_LIST_HEAD(&wb->b_dirty_time);
f758eeabe   Christoph Hellwig   writeback: split ...
303
  	spin_lock_init(&wb->list_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
304

a88a341a7   Tejun Heo   writeback: move b...
305
306
307
308
309
  	wb->bw_time_stamp = jiffies;
  	wb->balanced_dirty_ratelimit = INIT_BW;
  	wb->dirty_ratelimit = INIT_BW;
  	wb->write_bandwidth = INIT_BW;
  	wb->avg_write_bandwidth = INIT_BW;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
310

f0054bb1e   Tejun Heo   writeback: move b...
311
312
313
  	spin_lock_init(&wb->work_lock);
  	INIT_LIST_HEAD(&wb->work_list);
  	INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
b57d74aff   Jens Axboe   writeback: track ...
314
  	wb->dirty_sleep = jiffies;
c284de61d   Artem Bityutskiy   writeback: cleanu...
315

a13f35e87   Tejun Heo   writeback: don't ...
316
  	wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
810df54a6   Jan Kara   bdi: Make wb->bdi...
317
318
319
320
  	if (!wb->congested) {
  		err = -ENOMEM;
  		goto out_put_bdi;
  	}
a13f35e87   Tejun Heo   writeback: don't ...
321

8395cd9f8   Tejun Heo   writeback: add @g...
322
  	err = fprop_local_init_percpu(&wb->completions, gfp);
a88a341a7   Tejun Heo   writeback: move b...
323
  	if (err)
a13f35e87   Tejun Heo   writeback: don't ...
324
  		goto out_put_cong;
c284de61d   Artem Bityutskiy   writeback: cleanu...
325

93f78d882   Tejun Heo   writeback: move b...
326
  	for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
8395cd9f8   Tejun Heo   writeback: add @g...
327
  		err = percpu_counter_init(&wb->stat[i], 0, gfp);
a13f35e87   Tejun Heo   writeback: don't ...
328
329
  		if (err)
  			goto out_destroy_stat;
93f78d882   Tejun Heo   writeback: move b...
330
  	}
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
331

93f78d882   Tejun Heo   writeback: move b...
332
  	return 0;
a13f35e87   Tejun Heo   writeback: don't ...
333
334
  
  out_destroy_stat:
078c6c3a5   Rasmus Villemoes   mm/backing-dev.c:...
335
  	while (i--)
a13f35e87   Tejun Heo   writeback: don't ...
336
337
338
339
  		percpu_counter_destroy(&wb->stat[i]);
  	fprop_local_destroy_percpu(&wb->completions);
  out_put_cong:
  	wb_congested_put(wb->congested);
810df54a6   Jan Kara   bdi: Make wb->bdi...
340
341
342
  out_put_bdi:
  	if (wb != &bdi->wb)
  		bdi_put(bdi);
a13f35e87   Tejun Heo   writeback: don't ...
343
  	return err;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
344
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
345

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
346
  static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb);
03ba3782e   Jens Axboe   writeback: switch...
347
348
349
  /*
   * Remove bdi from the global list and shutdown any threads we have running
   */
461000714   Tejun Heo   writeback: reorga...
350
  static void wb_shutdown(struct bdi_writeback *wb)
66f3b8e2e   Jens Axboe   writeback: move d...
351
  {
c4db59d31   Christoph Hellwig   fs: don't reassig...
352
  	/* Make sure nobody queues further work */
461000714   Tejun Heo   writeback: reorga...
353
354
355
  	spin_lock_bh(&wb->work_lock);
  	if (!test_and_clear_bit(WB_registered, &wb->state)) {
  		spin_unlock_bh(&wb->work_lock);
03ba3782e   Jens Axboe   writeback: switch...
356
  		return;
c4db59d31   Christoph Hellwig   fs: don't reassig...
357
  	}
461000714   Tejun Heo   writeback: reorga...
358
  	spin_unlock_bh(&wb->work_lock);
03ba3782e   Jens Axboe   writeback: switch...
359

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
360
  	cgwb_remove_from_bdi_list(wb);
03ba3782e   Jens Axboe   writeback: switch...
361
  	/*
461000714   Tejun Heo   writeback: reorga...
362
363
364
  	 * Drain work list and shutdown the delayed_work.  !WB_registered
  	 * tells wb_workfn() that @wb is dying and its work_list needs to
  	 * be drained no matter what.
03ba3782e   Jens Axboe   writeback: switch...
365
  	 */
461000714   Tejun Heo   writeback: reorga...
366
367
368
369
  	mod_delayed_work(bdi_wq, &wb->dwork, 0);
  	flush_delayed_work(&wb->dwork);
  	WARN_ON(!list_empty(&wb->work_list));
  }
f0054bb1e   Tejun Heo   writeback: move b...
370
  static void wb_exit(struct bdi_writeback *wb)
93f78d882   Tejun Heo   writeback: move b...
371
372
373
374
375
376
377
  {
  	int i;
  
  	WARN_ON(delayed_work_pending(&wb->dwork));
  
  	for (i = 0; i < NR_WB_STAT_ITEMS; i++)
  		percpu_counter_destroy(&wb->stat[i]);
6467716a3   Artem Bityutskiy   writeback: optimi...
378

a88a341a7   Tejun Heo   writeback: move b...
379
  	fprop_local_destroy_percpu(&wb->completions);
a13f35e87   Tejun Heo   writeback: don't ...
380
  	wb_congested_put(wb->congested);
810df54a6   Jan Kara   bdi: Make wb->bdi...
381
382
  	if (wb != &wb->bdi->wb)
  		bdi_put(wb->bdi);
a88a341a7   Tejun Heo   writeback: move b...
383
  }
e98be2d59   Wu Fengguang   writeback: bdi wr...
384

52ebea749   Tejun Heo   writeback: make b...
385
386
387
388
389
390
391
  #ifdef CONFIG_CGROUP_WRITEBACK
  
  #include <linux/memcontrol.h>
  
  /*
   * cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree,
   * blkcg->cgwb_list, and memcg->cgwb_list.  bdi->cgwb_tree is also RCU
4514451e7   Jan Kara   bdi: Do not wait ...
392
   * protected.
52ebea749   Tejun Heo   writeback: make b...
393
394
   */
  static DEFINE_SPINLOCK(cgwb_lock);
f18346468   Tejun Heo   bdi: Move cgroup ...
395
  static struct workqueue_struct *cgwb_release_wq;
52ebea749   Tejun Heo   writeback: make b...
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
  
  /**
   * wb_congested_get_create - get or create a wb_congested
   * @bdi: associated bdi
   * @blkcg_id: ID of the associated blkcg
   * @gfp: allocation mask
   *
   * Look up the wb_congested for @blkcg_id on @bdi.  If missing, create one.
   * The returned wb_congested has its reference count incremented.  Returns
   * NULL on failure.
   */
  struct bdi_writeback_congested *
  wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
  {
  	struct bdi_writeback_congested *new_congested = NULL, *congested;
  	struct rb_node **node, *parent;
  	unsigned long flags;
52ebea749   Tejun Heo   writeback: make b...
413
414
415
416
417
418
419
420
  retry:
  	spin_lock_irqsave(&cgwb_lock, flags);
  
  	node = &bdi->cgwb_congested_tree.rb_node;
  	parent = NULL;
  
  	while (*node != NULL) {
  		parent = *node;
bc71226b0   Geliang Tang   mm/backing-dev.c:...
421
422
  		congested = rb_entry(parent, struct bdi_writeback_congested,
  				     rb_node);
52ebea749   Tejun Heo   writeback: make b...
423
424
425
426
427
428
429
430
431
432
433
  		if (congested->blkcg_id < blkcg_id)
  			node = &parent->rb_left;
  		else if (congested->blkcg_id > blkcg_id)
  			node = &parent->rb_right;
  		else
  			goto found;
  	}
  
  	if (new_congested) {
  		/* !found and storage for new one already allocated, insert */
  		congested = new_congested;
52ebea749   Tejun Heo   writeback: make b...
434
435
  		rb_link_node(&congested->rb_node, parent, node);
  		rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
e58dd0de5   Sebastian Andrzej Siewior   bdi: use refcount...
436
437
  		spin_unlock_irqrestore(&cgwb_lock, flags);
  		return congested;
52ebea749   Tejun Heo   writeback: make b...
438
439
440
441
442
443
444
445
  	}
  
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  
  	/* allocate storage for new one and retry */
  	new_congested = kzalloc(sizeof(*new_congested), gfp);
  	if (!new_congested)
  		return NULL;
e58dd0de5   Sebastian Andrzej Siewior   bdi: use refcount...
446
  	refcount_set(&new_congested->refcnt, 1);
b7d680d7b   Jan Kara   bdi: Mark congest...
447
  	new_congested->__bdi = bdi;
52ebea749   Tejun Heo   writeback: make b...
448
449
450
451
  	new_congested->blkcg_id = blkcg_id;
  	goto retry;
  
  found:
e58dd0de5   Sebastian Andrzej Siewior   bdi: use refcount...
452
  	refcount_inc(&congested->refcnt);
52ebea749   Tejun Heo   writeback: make b...
453
454
455
456
457
458
459
460
461
462
463
464
465
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	kfree(new_congested);
  	return congested;
  }
  
  /**
   * wb_congested_put - put a wb_congested
   * @congested: wb_congested to put
   *
   * Put @congested and destroy it if the refcnt reaches zero.
   */
  void wb_congested_put(struct bdi_writeback_congested *congested)
  {
52ebea749   Tejun Heo   writeback: make b...
466
  	unsigned long flags;
060288a73   Anna-Maria Gleixner   bdi: use irqsave ...
467
  	if (!refcount_dec_and_lock_irqsave(&congested->refcnt, &cgwb_lock, &flags))
52ebea749   Tejun Heo   writeback: make b...
468
  		return;
52ebea749   Tejun Heo   writeback: make b...
469

a20135ffb   Tejun Heo   writeback: don't ...
470
  	/* bdi might already have been destroyed leaving @congested unlinked */
b7d680d7b   Jan Kara   bdi: Mark congest...
471
  	if (congested->__bdi) {
a20135ffb   Tejun Heo   writeback: don't ...
472
  		rb_erase(&congested->rb_node,
b7d680d7b   Jan Kara   bdi: Mark congest...
473
474
  			 &congested->__bdi->cgwb_congested_tree);
  		congested->__bdi = NULL;
a20135ffb   Tejun Heo   writeback: don't ...
475
  	}
52ebea749   Tejun Heo   writeback: make b...
476
477
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	kfree(congested);
52ebea749   Tejun Heo   writeback: make b...
478
479
480
481
482
483
  }
  
  static void cgwb_release_workfn(struct work_struct *work)
  {
  	struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
  						release_work);
59b57717f   Dennis Zhou (Facebook)   blkcg: delay blkg...
484
  	struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
52ebea749   Tejun Heo   writeback: make b...
485

3ee7e8697   Jan Kara   bdi: Fix another ...
486
  	mutex_lock(&wb->bdi->cgwb_release_mutex);
52ebea749   Tejun Heo   writeback: make b...
487
488
489
490
  	wb_shutdown(wb);
  
  	css_put(wb->memcg_css);
  	css_put(wb->blkcg_css);
3ee7e8697   Jan Kara   bdi: Fix another ...
491
  	mutex_unlock(&wb->bdi->cgwb_release_mutex);
52ebea749   Tejun Heo   writeback: make b...
492

59b57717f   Dennis Zhou (Facebook)   blkcg: delay blkg...
493
494
  	/* triggers blkg destruction if cgwb_refcnt becomes zero */
  	blkcg_cgwb_put(blkcg);
841710aa6   Tejun Heo   writeback: implem...
495
  	fprop_local_destroy_percpu(&wb->memcg_completions);
52ebea749   Tejun Heo   writeback: make b...
496
497
498
  	percpu_ref_exit(&wb->refcnt);
  	wb_exit(wb);
  	kfree_rcu(wb, rcu);
52ebea749   Tejun Heo   writeback: make b...
499
500
501
502
503
504
  }
  
  static void cgwb_release(struct percpu_ref *refcnt)
  {
  	struct bdi_writeback *wb = container_of(refcnt, struct bdi_writeback,
  						refcnt);
f18346468   Tejun Heo   bdi: Move cgroup ...
505
  	queue_work(cgwb_release_wq, &wb->release_work);
52ebea749   Tejun Heo   writeback: make b...
506
507
508
509
510
511
512
513
514
515
516
  }
  
  static void cgwb_kill(struct bdi_writeback *wb)
  {
  	lockdep_assert_held(&cgwb_lock);
  
  	WARN_ON(!radix_tree_delete(&wb->bdi->cgwb_tree, wb->memcg_css->id));
  	list_del(&wb->memcg_node);
  	list_del(&wb->blkcg_node);
  	percpu_ref_kill(&wb->refcnt);
  }
e8cb72b32   Jan Kara   bdi: Unify bdi->w...
517
518
519
520
521
522
  static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
  {
  	spin_lock_irq(&cgwb_lock);
  	list_del_rcu(&wb->bdi_node);
  	spin_unlock_irq(&cgwb_lock);
  }
52ebea749   Tejun Heo   writeback: make b...
523
524
525
526
527
528
529
530
531
532
533
534
  static int cgwb_create(struct backing_dev_info *bdi,
  		       struct cgroup_subsys_state *memcg_css, gfp_t gfp)
  {
  	struct mem_cgroup *memcg;
  	struct cgroup_subsys_state *blkcg_css;
  	struct blkcg *blkcg;
  	struct list_head *memcg_cgwb_list, *blkcg_cgwb_list;
  	struct bdi_writeback *wb;
  	unsigned long flags;
  	int ret = 0;
  
  	memcg = mem_cgroup_from_css(memcg_css);
c165b3e3c   Tejun Heo   blkcg: rename sub...
535
  	blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
52ebea749   Tejun Heo   writeback: make b...
536
  	blkcg = css_to_blkcg(blkcg_css);
9ccc36171   Wang Long   memcg: writeback:...
537
  	memcg_cgwb_list = &memcg->cgwb_list;
52ebea749   Tejun Heo   writeback: make b...
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
  	blkcg_cgwb_list = &blkcg->cgwb_list;
  
  	/* look up again under lock and discard on blkcg mismatch */
  	spin_lock_irqsave(&cgwb_lock, flags);
  	wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
  	if (wb && wb->blkcg_css != blkcg_css) {
  		cgwb_kill(wb);
  		wb = NULL;
  	}
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	if (wb)
  		goto out_put;
  
  	/* need to create a new one */
  	wb = kmalloc(sizeof(*wb), gfp);
0b045bd1c   Christophe JAILLET   mm/backing-dev.c:...
553
554
555
556
  	if (!wb) {
  		ret = -ENOMEM;
  		goto out_put;
  	}
52ebea749   Tejun Heo   writeback: make b...
557

a13f35e87   Tejun Heo   writeback: don't ...
558
  	ret = wb_init(wb, bdi, blkcg_css->id, gfp);
52ebea749   Tejun Heo   writeback: make b...
559
560
561
562
563
564
  	if (ret)
  		goto err_free;
  
  	ret = percpu_ref_init(&wb->refcnt, cgwb_release, 0, gfp);
  	if (ret)
  		goto err_wb_exit;
841710aa6   Tejun Heo   writeback: implem...
565
566
567
  	ret = fprop_local_init_percpu(&wb->memcg_completions, gfp);
  	if (ret)
  		goto err_ref_exit;
52ebea749   Tejun Heo   writeback: make b...
568
569
570
571
  	wb->memcg_css = memcg_css;
  	wb->blkcg_css = blkcg_css;
  	INIT_WORK(&wb->release_work, cgwb_release_workfn);
  	set_bit(WB_registered, &wb->state);
03ba3782e   Jens Axboe   writeback: switch...
572
573
  
  	/*
52ebea749   Tejun Heo   writeback: make b...
574
575
576
577
  	 * The root wb determines the registered state of the whole bdi and
  	 * memcg_cgwb_list and blkcg_cgwb_list's next pointers indicate
  	 * whether they're still online.  Don't link @wb if any is dead.
  	 * See wb_memcg_offline() and wb_blkcg_offline().
03ba3782e   Jens Axboe   writeback: switch...
578
  	 */
52ebea749   Tejun Heo   writeback: make b...
579
580
581
582
583
584
585
  	ret = -ENODEV;
  	spin_lock_irqsave(&cgwb_lock, flags);
  	if (test_bit(WB_registered, &bdi->wb.state) &&
  	    blkcg_cgwb_list->next && memcg_cgwb_list->next) {
  		/* we might have raced another instance of this function */
  		ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
  		if (!ret) {
b817525a4   Tejun Heo   writeback: bdi_wr...
586
  			list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
52ebea749   Tejun Heo   writeback: make b...
587
588
  			list_add(&wb->memcg_node, memcg_cgwb_list);
  			list_add(&wb->blkcg_node, blkcg_cgwb_list);
59b57717f   Dennis Zhou (Facebook)   blkcg: delay blkg...
589
  			blkcg_cgwb_get(blkcg);
52ebea749   Tejun Heo   writeback: make b...
590
591
592
593
594
595
596
597
  			css_get(memcg_css);
  			css_get(blkcg_css);
  		}
  	}
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	if (ret) {
  		if (ret == -EEXIST)
  			ret = 0;
a13f35e87   Tejun Heo   writeback: don't ...
598
  		goto err_fprop_exit;
52ebea749   Tejun Heo   writeback: make b...
599
600
  	}
  	goto out_put;
841710aa6   Tejun Heo   writeback: implem...
601
602
  err_fprop_exit:
  	fprop_local_destroy_percpu(&wb->memcg_completions);
52ebea749   Tejun Heo   writeback: make b...
603
604
605
606
607
608
609
610
611
  err_ref_exit:
  	percpu_ref_exit(&wb->refcnt);
  err_wb_exit:
  	wb_exit(wb);
  err_free:
  	kfree(wb);
  out_put:
  	css_put(blkcg_css);
  	return ret;
66f3b8e2e   Jens Axboe   writeback: move d...
612
  }
52ebea749   Tejun Heo   writeback: make b...
613
  /**
ed288dc0d   Tejun Heo   writeback: Separa...
614
   * wb_get_lookup - get wb for a given memcg
52ebea749   Tejun Heo   writeback: make b...
615
616
   * @bdi: target bdi
   * @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref)
52ebea749   Tejun Heo   writeback: make b...
617
   *
ed288dc0d   Tejun Heo   writeback: Separa...
618
619
   * Try to get the wb for @memcg_css on @bdi.  The returned wb has its
   * refcount incremented.
52ebea749   Tejun Heo   writeback: make b...
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
   *
   * This function uses css_get() on @memcg_css and thus expects its refcnt
   * to be positive on invocation.  IOW, rcu_read_lock() protection on
   * @memcg_css isn't enough.  try_get it before calling this function.
   *
   * A wb is keyed by its associated memcg.  As blkcg implicitly enables
   * memcg on the default hierarchy, memcg association is guaranteed to be
   * more specific (equal or descendant to the associated blkcg) and thus can
   * identify both the memcg and blkcg associations.
   *
   * Because the blkcg associated with a memcg may change as blkcg is enabled
   * and disabled closer to root in the hierarchy, each wb keeps track of
   * both the memcg and blkcg associated with it and verifies the blkcg on
   * each lookup.  On mismatch, the existing wb is discarded and a new one is
   * created.
   */
ed288dc0d   Tejun Heo   writeback: Separa...
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
  struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi,
  				    struct cgroup_subsys_state *memcg_css)
  {
  	struct bdi_writeback *wb;
  
  	if (!memcg_css->parent)
  		return &bdi->wb;
  
  	rcu_read_lock();
  	wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
  	if (wb) {
  		struct cgroup_subsys_state *blkcg_css;
  
  		/* see whether the blkcg association has changed */
  		blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
  		if (unlikely(wb->blkcg_css != blkcg_css || !wb_tryget(wb)))
  			wb = NULL;
  		css_put(blkcg_css);
  	}
  	rcu_read_unlock();
  
  	return wb;
  }
  
  /**
   * wb_get_create - get wb for a given memcg, create if necessary
   * @bdi: target bdi
   * @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref)
   * @gfp: allocation mask to use
   *
   * Try to get the wb for @memcg_css on @bdi.  If it doesn't exist, try to
   * create one.  See wb_get_lookup() for more details.
   */
52ebea749   Tejun Heo   writeback: make b...
669
670
671
  struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
  				    struct cgroup_subsys_state *memcg_css,
  				    gfp_t gfp)
6467716a3   Artem Bityutskiy   writeback: optimi...
672
  {
52ebea749   Tejun Heo   writeback: make b...
673
  	struct bdi_writeback *wb;
d0164adc8   Mel Gorman   mm, page_alloc: d...
674
  	might_sleep_if(gfpflags_allow_blocking(gfp));
52ebea749   Tejun Heo   writeback: make b...
675
676
677
678
679
  
  	if (!memcg_css->parent)
  		return &bdi->wb;
  
  	do {
ed288dc0d   Tejun Heo   writeback: Separa...
680
  		wb = wb_get_lookup(bdi, memcg_css);
52ebea749   Tejun Heo   writeback: make b...
681
682
683
684
  	} while (!wb && !cgwb_create(bdi, memcg_css, gfp));
  
  	return wb;
  }
6467716a3   Artem Bityutskiy   writeback: optimi...
685

a13f35e87   Tejun Heo   writeback: don't ...
686
  static int cgwb_bdi_init(struct backing_dev_info *bdi)
52ebea749   Tejun Heo   writeback: make b...
687
  {
a13f35e87   Tejun Heo   writeback: don't ...
688
  	int ret;
52ebea749   Tejun Heo   writeback: make b...
689
690
  	INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
  	bdi->cgwb_congested_tree = RB_ROOT;
3ee7e8697   Jan Kara   bdi: Fix another ...
691
  	mutex_init(&bdi->cgwb_release_mutex);
7fc5854f8   Tejun Heo   writeback: synchr...
692
  	init_rwsem(&bdi->wb_switch_rwsem);
a13f35e87   Tejun Heo   writeback: don't ...
693
694
695
  
  	ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
  	if (!ret) {
7d828602e   Johannes Weiner   mm: memcontrol: e...
696
  		bdi->wb.memcg_css = &root_mem_cgroup->css;
a13f35e87   Tejun Heo   writeback: don't ...
697
698
699
  		bdi->wb.blkcg_css = blkcg_root_css;
  	}
  	return ret;
6467716a3   Artem Bityutskiy   writeback: optimi...
700
  }
b1c51afc0   Jan Kara   bdi: Rename cgwb_...
701
  static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
52ebea749   Tejun Heo   writeback: make b...
702
703
704
  {
  	struct radix_tree_iter iter;
  	void **slot;
5318ce7d4   Jan Kara   bdi: Shutdown wri...
705
  	struct bdi_writeback *wb;
52ebea749   Tejun Heo   writeback: make b...
706
707
708
709
710
711
  
  	WARN_ON(test_bit(WB_registered, &bdi->wb.state));
  
  	spin_lock_irq(&cgwb_lock);
  	radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
  		cgwb_kill(*slot);
3ee7e8697   Jan Kara   bdi: Fix another ...
712
  	spin_unlock_irq(&cgwb_lock);
5318ce7d4   Jan Kara   bdi: Shutdown wri...
713

3ee7e8697   Jan Kara   bdi: Fix another ...
714
715
  	mutex_lock(&bdi->cgwb_release_mutex);
  	spin_lock_irq(&cgwb_lock);
5318ce7d4   Jan Kara   bdi: Shutdown wri...
716
717
718
719
720
721
722
  	while (!list_empty(&bdi->wb_list)) {
  		wb = list_first_entry(&bdi->wb_list, struct bdi_writeback,
  				      bdi_node);
  		spin_unlock_irq(&cgwb_lock);
  		wb_shutdown(wb);
  		spin_lock_irq(&cgwb_lock);
  	}
52ebea749   Tejun Heo   writeback: make b...
723
  	spin_unlock_irq(&cgwb_lock);
3ee7e8697   Jan Kara   bdi: Fix another ...
724
  	mutex_unlock(&bdi->cgwb_release_mutex);
52ebea749   Tejun Heo   writeback: make b...
725
726
727
728
729
730
731
  }
  
  /**
   * wb_memcg_offline - kill all wb's associated with a memcg being offlined
   * @memcg: memcg being offlined
   *
   * Also prevents creation of any new wb's associated with @memcg.
e98be2d59   Wu Fengguang   writeback: bdi wr...
732
   */
52ebea749   Tejun Heo   writeback: make b...
733
734
  void wb_memcg_offline(struct mem_cgroup *memcg)
  {
9ccc36171   Wang Long   memcg: writeback:...
735
  	struct list_head *memcg_cgwb_list = &memcg->cgwb_list;
52ebea749   Tejun Heo   writeback: make b...
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
  	struct bdi_writeback *wb, *next;
  
  	spin_lock_irq(&cgwb_lock);
  	list_for_each_entry_safe(wb, next, memcg_cgwb_list, memcg_node)
  		cgwb_kill(wb);
  	memcg_cgwb_list->next = NULL;	/* prevent new wb's */
  	spin_unlock_irq(&cgwb_lock);
  }
  
  /**
   * wb_blkcg_offline - kill all wb's associated with a blkcg being offlined
   * @blkcg: blkcg being offlined
   *
   * Also prevents creation of any new wb's associated with @blkcg.
   */
  void wb_blkcg_offline(struct blkcg *blkcg)
  {
52ebea749   Tejun Heo   writeback: make b...
753
754
755
756
757
758
759
760
  	struct bdi_writeback *wb, *next;
  
  	spin_lock_irq(&cgwb_lock);
  	list_for_each_entry_safe(wb, next, &blkcg->cgwb_list, blkcg_node)
  		cgwb_kill(wb);
  	blkcg->cgwb_list.next = NULL;	/* prevent new wb's */
  	spin_unlock_irq(&cgwb_lock);
  }
df23de556   Jan Kara   bdi: Fix use-afte...
761
762
763
764
765
766
767
768
769
770
  static void cgwb_bdi_exit(struct backing_dev_info *bdi)
  {
  	struct rb_node *rbn;
  
  	spin_lock_irq(&cgwb_lock);
  	while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
  		struct bdi_writeback_congested *congested =
  			rb_entry(rbn, struct bdi_writeback_congested, rb_node);
  
  		rb_erase(rbn, &bdi->cgwb_congested_tree);
b7d680d7b   Jan Kara   bdi: Mark congest...
771
  		congested->__bdi = NULL;	/* mark @congested unlinked */
df23de556   Jan Kara   bdi: Fix use-afte...
772
773
774
  	}
  	spin_unlock_irq(&cgwb_lock);
  }
e8cb72b32   Jan Kara   bdi: Unify bdi->w...
775
776
777
778
779
780
  static void cgwb_bdi_register(struct backing_dev_info *bdi)
  {
  	spin_lock_irq(&cgwb_lock);
  	list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
  	spin_unlock_irq(&cgwb_lock);
  }
f18346468   Tejun Heo   bdi: Move cgroup ...
781
782
783
784
785
786
787
788
789
790
791
792
793
794
  static int __init cgwb_init(void)
  {
  	/*
  	 * There can be many concurrent release work items overwhelming
  	 * system_wq.  Put them in a separate wq and limit concurrency.
  	 * There's no point in executing many of these in parallel.
  	 */
  	cgwb_release_wq = alloc_workqueue("cgwb_release", 0, 1);
  	if (!cgwb_release_wq)
  		return -ENOMEM;
  
  	return 0;
  }
  subsys_initcall(cgwb_init);
52ebea749   Tejun Heo   writeback: make b...
795
  #else	/* CONFIG_CGROUP_WRITEBACK */
a13f35e87   Tejun Heo   writeback: don't ...
796
797
798
799
800
801
802
  static int cgwb_bdi_init(struct backing_dev_info *bdi)
  {
  	int err;
  
  	bdi->wb_congested = kzalloc(sizeof(*bdi->wb_congested), GFP_KERNEL);
  	if (!bdi->wb_congested)
  		return -ENOMEM;
e58dd0de5   Sebastian Andrzej Siewior   bdi: use refcount...
803
  	refcount_set(&bdi->wb_congested->refcnt, 1);
5f478e4ea   Tejun Heo   block: fix double...
804

a13f35e87   Tejun Heo   writeback: don't ...
805
806
  	err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
  	if (err) {
5f478e4ea   Tejun Heo   block: fix double...
807
  		wb_congested_put(bdi->wb_congested);
a13f35e87   Tejun Heo   writeback: don't ...
808
809
810
811
  		return err;
  	}
  	return 0;
  }
b1c51afc0   Jan Kara   bdi: Rename cgwb_...
812
  static void cgwb_bdi_unregister(struct backing_dev_info *bdi) { }
df23de556   Jan Kara   bdi: Fix use-afte...
813
814
  
  static void cgwb_bdi_exit(struct backing_dev_info *bdi)
5f478e4ea   Tejun Heo   block: fix double...
815
816
817
  {
  	wb_congested_put(bdi->wb_congested);
  }
52ebea749   Tejun Heo   writeback: make b...
818

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
819
820
821
822
823
824
825
826
827
  static void cgwb_bdi_register(struct backing_dev_info *bdi)
  {
  	list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
  }
  
  static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
  {
  	list_del_rcu(&wb->bdi_node);
  }
52ebea749   Tejun Heo   writeback: make b...
828
  #endif	/* CONFIG_CGROUP_WRITEBACK */
e98be2d59   Wu Fengguang   writeback: bdi wr...
829

2e82b84c0   Jan Kara   block: Remove unu...
830
  static int bdi_init(struct backing_dev_info *bdi)
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
831
  {
b817525a4   Tejun Heo   writeback: bdi_wr...
832
  	int ret;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
833
  	bdi->dev = NULL;
d03f6cdc1   Jan Kara   block: Dynamicall...
834
  	kref_init(&bdi->refcnt);
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
835
  	bdi->min_ratio = 0;
a42dde041   Peter Zijlstra   mm: bdi: allow se...
836
  	bdi->max_ratio = 100;
eb608e3a3   Jan Kara   block: Convert BD...
837
  	bdi->max_prop_frac = FPROP_FRAC_BASE;
66f3b8e2e   Jens Axboe   writeback: move d...
838
  	INIT_LIST_HEAD(&bdi->bdi_list);
b817525a4   Tejun Heo   writeback: bdi_wr...
839
  	INIT_LIST_HEAD(&bdi->wb_list);
cc395d7f1   Tejun Heo   writeback: implem...
840
  	init_waitqueue_head(&bdi->wb_waitq);
03ba3782e   Jens Axboe   writeback: switch...
841

b817525a4   Tejun Heo   writeback: bdi_wr...
842
  	ret = cgwb_bdi_init(bdi);
b817525a4   Tejun Heo   writeback: bdi_wr...
843
  	return ret;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
844
  }
e98be2d59   Wu Fengguang   writeback: bdi wr...
845

d03f6cdc1   Jan Kara   block: Dynamicall...
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
  struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id)
  {
  	struct backing_dev_info *bdi;
  
  	bdi = kmalloc_node(sizeof(struct backing_dev_info),
  			   gfp_mask | __GFP_ZERO, node_id);
  	if (!bdi)
  		return NULL;
  
  	if (bdi_init(bdi)) {
  		kfree(bdi);
  		return NULL;
  	}
  	return bdi;
  }
62bf42adc   Jan Kara   bdi: Export bdi_a...
861
  EXPORT_SYMBOL(bdi_alloc_node);
d03f6cdc1   Jan Kara   block: Dynamicall...
862

34f8fe501   Tejun Heo   bdi: Add bdi->id
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
  static struct rb_node **bdi_lookup_rb_node(u64 id, struct rb_node **parentp)
  {
  	struct rb_node **p = &bdi_tree.rb_node;
  	struct rb_node *parent = NULL;
  	struct backing_dev_info *bdi;
  
  	lockdep_assert_held(&bdi_lock);
  
  	while (*p) {
  		parent = *p;
  		bdi = rb_entry(parent, struct backing_dev_info, rb_node);
  
  		if (bdi->id > id)
  			p = &(*p)->rb_left;
  		else if (bdi->id < id)
  			p = &(*p)->rb_right;
  		else
  			break;
  	}
  
  	if (parentp)
  		*parentp = parent;
  	return p;
  }
  
  /**
   * bdi_get_by_id - lookup and get bdi from its id
   * @id: bdi id to lookup
   *
   * Find bdi matching @id and get it.  Returns NULL if the matching bdi
   * doesn't exist or is already unregistered.
   */
  struct backing_dev_info *bdi_get_by_id(u64 id)
  {
  	struct backing_dev_info *bdi = NULL;
  	struct rb_node **p;
  
  	spin_lock_bh(&bdi_lock);
  	p = bdi_lookup_rb_node(id, NULL);
  	if (*p) {
  		bdi = rb_entry(*p, struct backing_dev_info, rb_node);
  		bdi_get(bdi);
  	}
  	spin_unlock_bh(&bdi_lock);
  
  	return bdi;
  }
7c4cc3002   Jan Kara   bdi: Drop 'parent...
910
  int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
461000714   Tejun Heo   writeback: reorga...
911
  {
461000714   Tejun Heo   writeback: reorga...
912
  	struct device *dev;
34f8fe501   Tejun Heo   bdi: Add bdi->id
913
  	struct rb_node *parent, **p;
e98be2d59   Wu Fengguang   writeback: bdi wr...
914

461000714   Tejun Heo   writeback: reorga...
915
916
  	if (bdi->dev)	/* The driver needs to use separate queues per device */
  		return 0;
e98be2d59   Wu Fengguang   writeback: bdi wr...
917

7c4cc3002   Jan Kara   bdi: Drop 'parent...
918
  	dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args);
461000714   Tejun Heo   writeback: reorga...
919
920
  	if (IS_ERR(dev))
  		return PTR_ERR(dev);
04fbfdc14   Peter Zijlstra   mm: per device di...
921

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
922
  	cgwb_bdi_register(bdi);
461000714   Tejun Heo   writeback: reorga...
923
  	bdi->dev = dev;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
924

6d0e4827b   Jens Axboe   Revert "bdi: add ...
925
  	bdi_debug_register(bdi, dev_name(dev));
461000714   Tejun Heo   writeback: reorga...
926
927
928
  	set_bit(WB_registered, &bdi->wb.state);
  
  	spin_lock_bh(&bdi_lock);
34f8fe501   Tejun Heo   bdi: Add bdi->id
929
930
931
932
933
934
  
  	bdi->id = ++bdi_id_cursor;
  
  	p = bdi_lookup_rb_node(bdi->id, &parent);
  	rb_link_node(&bdi->rb_node, parent, p);
  	rb_insert_color(&bdi->rb_node, &bdi_tree);
461000714   Tejun Heo   writeback: reorga...
935
  	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
34f8fe501   Tejun Heo   bdi: Add bdi->id
936

461000714   Tejun Heo   writeback: reorga...
937
938
939
940
  	spin_unlock_bh(&bdi_lock);
  
  	trace_writeback_bdi_register(bdi);
  	return 0;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
941
  }
baf7a616d   Jan Kara   bdi: Provide bdi_...
942
  EXPORT_SYMBOL(bdi_register_va);
7c4cc3002   Jan Kara   bdi: Drop 'parent...
943
  int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...)
baf7a616d   Jan Kara   bdi: Provide bdi_...
944
945
946
947
948
  {
  	va_list args;
  	int ret;
  
  	va_start(args, fmt);
7c4cc3002   Jan Kara   bdi: Drop 'parent...
949
  	ret = bdi_register_va(bdi, fmt, args);
baf7a616d   Jan Kara   bdi: Provide bdi_...
950
951
952
  	va_end(args);
  	return ret;
  }
461000714   Tejun Heo   writeback: reorga...
953
  EXPORT_SYMBOL(bdi_register);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
954

df08c32ce   Dan Williams   block: fix bdi vs...
955
956
957
  int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner)
  {
  	int rc;
7c4cc3002   Jan Kara   bdi: Drop 'parent...
958
  	rc = bdi_register(bdi, "%u:%u", MAJOR(owner->devt), MINOR(owner->devt));
df08c32ce   Dan Williams   block: fix bdi vs...
959
960
  	if (rc)
  		return rc;
b6f8fec44   Jan Kara   block: Allow bdi ...
961
962
  	/* Leaking owner reference... */
  	WARN_ON(bdi->owner);
df08c32ce   Dan Williams   block: fix bdi vs...
963
964
965
966
967
  	bdi->owner = owner;
  	get_device(owner);
  	return 0;
  }
  EXPORT_SYMBOL(bdi_register_owner);
461000714   Tejun Heo   writeback: reorga...
968
969
970
971
972
973
  /*
   * Remove bdi from bdi_list, and ensure that it is no longer visible
   */
  static void bdi_remove_from_list(struct backing_dev_info *bdi)
  {
  	spin_lock_bh(&bdi_lock);
34f8fe501   Tejun Heo   bdi: Add bdi->id
974
  	rb_erase(&bdi->rb_node, &bdi_tree);
461000714   Tejun Heo   writeback: reorga...
975
976
  	list_del_rcu(&bdi->bdi_list);
  	spin_unlock_bh(&bdi_lock);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
977

461000714   Tejun Heo   writeback: reorga...
978
979
  	synchronize_rcu_expedited();
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
980

b02176f30   Tejun Heo   block: don't rele...
981
  void bdi_unregister(struct backing_dev_info *bdi)
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
982
  {
f0054bb1e   Tejun Heo   writeback: move b...
983
984
985
  	/* make sure nobody finds us on the bdi_list anymore */
  	bdi_remove_from_list(bdi);
  	wb_shutdown(&bdi->wb);
b1c51afc0   Jan Kara   bdi: Rename cgwb_...
986
  	cgwb_bdi_unregister(bdi);
7a401a972   Rabin Vincent   backing-dev: ensu...
987

c4db59d31   Christoph Hellwig   fs: don't reassig...
988
989
990
991
992
  	if (bdi->dev) {
  		bdi_debug_unregister(bdi);
  		device_unregister(bdi->dev);
  		bdi->dev = NULL;
  	}
df08c32ce   Dan Williams   block: fix bdi vs...
993
994
995
996
997
  
  	if (bdi->owner) {
  		put_device(bdi->owner);
  		bdi->owner = NULL;
  	}
b02176f30   Tejun Heo   block: don't rele...
998
  }
c4db59d31   Christoph Hellwig   fs: don't reassig...
999

d03f6cdc1   Jan Kara   block: Dynamicall...
1000
1001
1002
1003
  static void release_bdi(struct kref *ref)
  {
  	struct backing_dev_info *bdi =
  			container_of(ref, struct backing_dev_info, refcnt);
5af110b2f   Jan Kara   block: Unregister...
1004
1005
  	if (test_bit(WB_registered, &bdi->wb.state))
  		bdi_unregister(bdi);
2e82b84c0   Jan Kara   block: Remove unu...
1006
1007
1008
  	WARN_ON_ONCE(bdi->dev);
  	wb_exit(&bdi->wb);
  	cgwb_bdi_exit(bdi);
d03f6cdc1   Jan Kara   block: Dynamicall...
1009
1010
1011
1012
1013
1014
1015
  	kfree(bdi);
  }
  
  void bdi_put(struct backing_dev_info *bdi)
  {
  	kref_put(&bdi->refcnt, release_bdi);
  }
62bf42adc   Jan Kara   bdi: Export bdi_a...
1016
  EXPORT_SYMBOL(bdi_put);
d03f6cdc1   Jan Kara   block: Dynamicall...
1017

3fcfab16c   Andrew Morton   [PATCH] separate ...
1018
1019
1020
1021
  static wait_queue_head_t congestion_wqh[2] = {
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
  	};
ec8a6f264   Tejun Heo   writeback: make c...
1022
  static atomic_t nr_wb_congested[2];
3fcfab16c   Andrew Morton   [PATCH] separate ...
1023

ec8a6f264   Tejun Heo   writeback: make c...
1024
  void clear_wb_congested(struct bdi_writeback_congested *congested, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
1025
  {
1faa16d22   Jens Axboe   block: change the...
1026
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
c877ef8ae   Kaixu Xia   writeback: fix th...
1027
  	enum wb_congested_state bit;
3fcfab16c   Andrew Morton   [PATCH] separate ...
1028

4452226ea   Tejun Heo   writeback: move b...
1029
  	bit = sync ? WB_sync_congested : WB_async_congested;
ec8a6f264   Tejun Heo   writeback: make c...
1030
1031
  	if (test_and_clear_bit(bit, &congested->state))
  		atomic_dec(&nr_wb_congested[sync]);
4e857c58e   Peter Zijlstra   arch: Mass conver...
1032
  	smp_mb__after_atomic();
3fcfab16c   Andrew Morton   [PATCH] separate ...
1033
1034
1035
  	if (waitqueue_active(wqh))
  		wake_up(wqh);
  }
ec8a6f264   Tejun Heo   writeback: make c...
1036
  EXPORT_SYMBOL(clear_wb_congested);
3fcfab16c   Andrew Morton   [PATCH] separate ...
1037

ec8a6f264   Tejun Heo   writeback: make c...
1038
  void set_wb_congested(struct bdi_writeback_congested *congested, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
1039
  {
c877ef8ae   Kaixu Xia   writeback: fix th...
1040
  	enum wb_congested_state bit;
3fcfab16c   Andrew Morton   [PATCH] separate ...
1041

4452226ea   Tejun Heo   writeback: move b...
1042
  	bit = sync ? WB_sync_congested : WB_async_congested;
ec8a6f264   Tejun Heo   writeback: make c...
1043
1044
  	if (!test_and_set_bit(bit, &congested->state))
  		atomic_inc(&nr_wb_congested[sync]);
3fcfab16c   Andrew Morton   [PATCH] separate ...
1045
  }
ec8a6f264   Tejun Heo   writeback: make c...
1046
  EXPORT_SYMBOL(set_wb_congested);
3fcfab16c   Andrew Morton   [PATCH] separate ...
1047
1048
1049
  
  /**
   * congestion_wait - wait for a backing_dev to become uncongested
8aa7e847d   Jens Axboe   Fix congestion_wa...
1050
   * @sync: SYNC or ASYNC IO
3fcfab16c   Andrew Morton   [PATCH] separate ...
1051
1052
1053
1054
1055
1056
   * @timeout: timeout in jiffies
   *
   * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
   * write congestion.  If no backing_devs are congested then just wait for the
   * next write to be completed.
   */
8aa7e847d   Jens Axboe   Fix congestion_wa...
1057
  long congestion_wait(int sync, long timeout)
3fcfab16c   Andrew Morton   [PATCH] separate ...
1058
1059
  {
  	long ret;
52bb91986   Mel Gorman   writeback: accoun...
1060
  	unsigned long start = jiffies;
3fcfab16c   Andrew Morton   [PATCH] separate ...
1061
  	DEFINE_WAIT(wait);
8aa7e847d   Jens Axboe   Fix congestion_wa...
1062
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
3fcfab16c   Andrew Morton   [PATCH] separate ...
1063
1064
1065
1066
  
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
52bb91986   Mel Gorman   writeback: accoun...
1067
1068
1069
  
  	trace_writeback_congestion_wait(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
3fcfab16c   Andrew Morton   [PATCH] separate ...
1070
1071
1072
  	return ret;
  }
  EXPORT_SYMBOL(congestion_wait);
04fbfdc14   Peter Zijlstra   mm: per device di...
1073

0e093d997   Mel Gorman   writeback: do not...
1074
  /**
599d0c954   Mel Gorman   mm, vmscan: move ...
1075
   * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a pgdat to complete writes
0e093d997   Mel Gorman   writeback: do not...
1076
1077
1078
   * @sync: SYNC or ASYNC IO
   * @timeout: timeout in jiffies
   *
e3c1ac586   Andrey Ryabinin   mm/vmscan: don't ...
1079
1080
1081
   * In the event of a congested backing_dev (any backing_dev) this waits
   * for up to @timeout jiffies for either a BDI to exit congestion of the
   * given @sync queue or a write to complete.
0e093d997   Mel Gorman   writeback: do not...
1082
1083
1084
1085
1086
   *
   * The return value is 0 if the sleep is for the full timeout. Otherwise,
   * it is the number of jiffies that were still remaining when the function
   * returned. return_value == timeout implies the function did not sleep.
   */
e3c1ac586   Andrey Ryabinin   mm/vmscan: don't ...
1087
  long wait_iff_congested(int sync, long timeout)
0e093d997   Mel Gorman   writeback: do not...
1088
1089
1090
1091
1092
1093
1094
  {
  	long ret;
  	unsigned long start = jiffies;
  	DEFINE_WAIT(wait);
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
  
  	/*
e3c1ac586   Andrey Ryabinin   mm/vmscan: don't ...
1095
  	 * If there is no congestion, yield if necessary instead
0e093d997   Mel Gorman   writeback: do not...
1096
1097
  	 * of sleeping on the congestion queue
  	 */
e3c1ac586   Andrey Ryabinin   mm/vmscan: don't ...
1098
  	if (atomic_read(&nr_wb_congested[sync]) == 0) {
ede377137   Michal Hocko   mm: throttle on I...
1099
  		cond_resched();
599d0c954   Mel Gorman   mm, vmscan: move ...
1100

0e093d997   Mel Gorman   writeback: do not...
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
  		/* In case we scheduled, work out time remaining */
  		ret = timeout - (jiffies - start);
  		if (ret < 0)
  			ret = 0;
  
  		goto out;
  	}
  
  	/* Sleep until uncongested or a write happens */
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
  
  out:
  	trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
  
  	return ret;
  }
  EXPORT_SYMBOL(wait_iff_congested);