Blame view

mm/backing-dev.c 25.2 KB
457c89965   Thomas Gleixner   treewide: Add SPD...
1
  // SPDX-License-Identifier: GPL-2.0-only
3fcfab16c   Andrew Morton   [PATCH] separate ...
2
3
  
  #include <linux/wait.h>
34f8fe501   Tejun Heo   bdi: Add bdi->id
4
  #include <linux/rbtree.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
5
  #include <linux/backing-dev.h>
03ba3782e   Jens Axboe   writeback: switch...
6
7
  #include <linux/kthread.h>
  #include <linux/freezer.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
8
  #include <linux/fs.h>
26160158d   Jens Axboe   Move the default_...
9
  #include <linux/pagemap.h>
03ba3782e   Jens Axboe   writeback: switch...
10
  #include <linux/mm.h>
3fcfab16c   Andrew Morton   [PATCH] separate ...
11
12
  #include <linux/sched.h>
  #include <linux/module.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
13
14
  #include <linux/writeback.h>
  #include <linux/device.h>
455b28646   Dave Chinner   writeback: Initia...
15
  #include <trace/events/writeback.h>
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
16

f56753ac2   Christoph Hellwig   bdi: replace BDI_...
17
  struct backing_dev_info noop_backing_dev_info;
a212b105b   Tejun Heo   bdi: make inode_t...
18
  EXPORT_SYMBOL_GPL(noop_backing_dev_info);
5129a469a   Jörn Engel   Catch filesystems...
19

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
20
  static struct class *bdi_class;
eb7ae5e06   Christoph Hellwig   bdi: move bdi_dev...
21
  static const char *bdi_unknown_name = "(unknown)";
cfc4ba536   Jens Axboe   writeback: use RC...
22
23
  
  /*
34f8fe501   Tejun Heo   bdi: Add bdi->id
24
25
   * bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU
   * reader side locking.
cfc4ba536   Jens Axboe   writeback: use RC...
26
   */
03ba3782e   Jens Axboe   writeback: switch...
27
  DEFINE_SPINLOCK(bdi_lock);
34f8fe501   Tejun Heo   bdi: Add bdi->id
28
29
  static u64 bdi_id_cursor;
  static struct rb_root bdi_tree = RB_ROOT;
66f3b8e2e   Jens Axboe   writeback: move d...
30
  LIST_HEAD(bdi_list);
03ba3782e   Jens Axboe   writeback: switch...
31

839a8e866   Tejun Heo   writeback: replac...
32
33
  /* bdi_wq serves all asynchronous writeback tasks */
  struct workqueue_struct *bdi_wq;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
34
35
36
37
38
39
40
41
42
43
44
45
46
47
  #ifdef CONFIG_DEBUG_FS
  #include <linux/debugfs.h>
  #include <linux/seq_file.h>
  
  static struct dentry *bdi_debug_root;
  
  static void bdi_debug_init(void)
  {
  	bdi_debug_root = debugfs_create_dir("bdi", NULL);
  }
  
  static int bdi_debug_stats_show(struct seq_file *m, void *v)
  {
  	struct backing_dev_info *bdi = m->private;
c1955ce32   Christoph Hellwig   writeback: remove...
48
  	struct bdi_writeback *wb = &bdi->wb;
364aeb284   David Rientjes   mm: change dirty ...
49
50
  	unsigned long background_thresh;
  	unsigned long dirty_thresh;
0d960a383   Tejun Heo   writeback: clean ...
51
  	unsigned long wb_thresh;
0ae45f63d   Theodore Ts'o   vfs: add support ...
52
  	unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
f09b00d3e   Jens Axboe   writeback: add so...
53
  	struct inode *inode;
0ae45f63d   Theodore Ts'o   vfs: add support ...
54
  	nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
f758eeabe   Christoph Hellwig   writeback: split ...
55
  	spin_lock(&wb->list_lock);
c7f540849   Dave Chinner   inode: rename i_w...
56
  	list_for_each_entry(inode, &wb->b_dirty, i_io_list)
c1955ce32   Christoph Hellwig   writeback: remove...
57
  		nr_dirty++;
c7f540849   Dave Chinner   inode: rename i_w...
58
  	list_for_each_entry(inode, &wb->b_io, i_io_list)
c1955ce32   Christoph Hellwig   writeback: remove...
59
  		nr_io++;
c7f540849   Dave Chinner   inode: rename i_w...
60
  	list_for_each_entry(inode, &wb->b_more_io, i_io_list)
c1955ce32   Christoph Hellwig   writeback: remove...
61
  		nr_more_io++;
c7f540849   Dave Chinner   inode: rename i_w...
62
  	list_for_each_entry(inode, &wb->b_dirty_time, i_io_list)
0ae45f63d   Theodore Ts'o   vfs: add support ...
63
64
  		if (inode->i_state & I_DIRTY_TIME)
  			nr_dirty_time++;
f758eeabe   Christoph Hellwig   writeback: split ...
65
  	spin_unlock(&wb->list_lock);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
66

16c4042f0   Wu Fengguang   writeback: avoid ...
67
  	global_dirty_limits(&background_thresh, &dirty_thresh);
0d960a383   Tejun Heo   writeback: clean ...
68
  	wb_thresh = wb_calc_thresh(wb, dirty_thresh);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
69
70
71
  
  #define K(x) ((x) << (PAGE_SHIFT - 10))
  	seq_printf(m,
00821b002   Wu Fengguang   writeback: show b...
72
73
74
75
76
77
78
79
80
81
  		   "BdiWriteback:       %10lu kB
  "
  		   "BdiReclaimable:     %10lu kB
  "
  		   "BdiDirtyThresh:     %10lu kB
  "
  		   "DirtyThresh:        %10lu kB
  "
  		   "BackgroundThresh:   %10lu kB
  "
c8e28ce04   Wu Fengguang   writeback: accoun...
82
83
  		   "BdiDirtied:         %10lu kB
  "
00821b002   Wu Fengguang   writeback: show b...
84
85
86
87
88
89
90
91
92
93
  		   "BdiWritten:         %10lu kB
  "
  		   "BdiWriteBandwidth:  %10lu kBps
  "
  		   "b_dirty:            %10lu
  "
  		   "b_io:               %10lu
  "
  		   "b_more_io:          %10lu
  "
0ae45f63d   Theodore Ts'o   vfs: add support ...
94
95
  		   "b_dirty_time:       %10lu
  "
00821b002   Wu Fengguang   writeback: show b...
96
97
98
99
  		   "bdi_list:           %10u
  "
  		   "state:              %10lx
  ",
93f78d882   Tejun Heo   writeback: move b...
100
101
  		   (unsigned long) K(wb_stat(wb, WB_WRITEBACK)),
  		   (unsigned long) K(wb_stat(wb, WB_RECLAIMABLE)),
0d960a383   Tejun Heo   writeback: clean ...
102
  		   K(wb_thresh),
f7d2b1ecd   Jan Kara   writeback: accoun...
103
104
  		   K(dirty_thresh),
  		   K(background_thresh),
93f78d882   Tejun Heo   writeback: move b...
105
106
  		   (unsigned long) K(wb_stat(wb, WB_DIRTIED)),
  		   (unsigned long) K(wb_stat(wb, WB_WRITTEN)),
a88a341a7   Tejun Heo   writeback: move b...
107
  		   (unsigned long) K(wb->write_bandwidth),
f7d2b1ecd   Jan Kara   writeback: accoun...
108
109
110
  		   nr_dirty,
  		   nr_io,
  		   nr_more_io,
0ae45f63d   Theodore Ts'o   vfs: add support ...
111
  		   nr_dirty_time,
4452226ea   Tejun Heo   writeback: move b...
112
  		   !list_empty(&bdi->bdi_list), bdi->wb.state);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
113
114
115
116
  #undef K
  
  	return 0;
  }
5ad350936   Andy Shevchenko   mm: reuse DEFINE_...
117
  DEFINE_SHOW_ATTRIBUTE(bdi_debug_stats);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
118

2d146b924   Greg Kroah-Hartman   backing-dev: no n...
119
  static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
120
121
  {
  	bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
97f076979   weiping zhang   bdi: convert bdi_...
122

2d146b924   Greg Kroah-Hartman   backing-dev: no n...
123
124
  	debugfs_create_file("stats", 0444, bdi->debug_dir, bdi,
  			    &bdi_debug_stats_fops);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
125
126
127
128
  }
  
  static void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
2d146b924   Greg Kroah-Hartman   backing-dev: no n...
129
  	debugfs_remove_recursive(bdi->debug_dir);
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
130
131
132
133
134
  }
  #else
  static inline void bdi_debug_init(void)
  {
  }
2d146b924   Greg Kroah-Hartman   backing-dev: no n...
135
  static inline void bdi_debug_register(struct backing_dev_info *bdi,
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
136
137
138
139
140
141
142
  				      const char *name)
  {
  }
  static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
  {
  }
  #endif
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
143
144
145
146
147
  static ssize_t read_ahead_kb_store(struct device *dev,
  				  struct device_attribute *attr,
  				  const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
148
  	unsigned long read_ahead_kb;
7034ed132   Namjae Jeon   backing-dev: use ...
149
  	ssize_t ret;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
150

7034ed132   Namjae Jeon   backing-dev: use ...
151
152
153
154
155
156
157
  	ret = kstrtoul(buf, 10, &read_ahead_kb);
  	if (ret < 0)
  		return ret;
  
  	bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10);
  
  	return count;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
158
159
160
161
162
163
164
165
166
167
168
169
  }
  
  #define K(pages) ((pages) << (PAGE_SHIFT - 10))
  
  #define BDI_SHOW(name, expr)						\
  static ssize_t name##_show(struct device *dev,				\
  			   struct device_attribute *attr, char *page)	\
  {									\
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);		\
  									\
  	return snprintf(page, PAGE_SIZE-1, "%lld
  ", (long long)expr);	\
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
170
171
  }									\
  static DEVICE_ATTR_RW(name);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
172
173
  
  BDI_SHOW(read_ahead_kb, K(bdi->ra_pages))
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
174
175
176
177
  static ssize_t min_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
178
  	unsigned int ratio;
7034ed132   Namjae Jeon   backing-dev: use ...
179
180
181
182
183
184
185
186
187
  	ssize_t ret;
  
  	ret = kstrtouint(buf, 10, &ratio);
  	if (ret < 0)
  		return ret;
  
  	ret = bdi_set_min_ratio(bdi, ratio);
  	if (!ret)
  		ret = count;
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
188

189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
189
190
191
  	return ret;
  }
  BDI_SHOW(min_ratio, bdi->min_ratio)
a42dde041   Peter Zijlstra   mm: bdi: allow se...
192
193
194
195
  static ssize_t max_ratio_store(struct device *dev,
  		struct device_attribute *attr, const char *buf, size_t count)
  {
  	struct backing_dev_info *bdi = dev_get_drvdata(dev);
a42dde041   Peter Zijlstra   mm: bdi: allow se...
196
  	unsigned int ratio;
7034ed132   Namjae Jeon   backing-dev: use ...
197
198
199
200
201
202
203
204
205
  	ssize_t ret;
  
  	ret = kstrtouint(buf, 10, &ratio);
  	if (ret < 0)
  		return ret;
  
  	ret = bdi_set_max_ratio(bdi, ratio);
  	if (!ret)
  		ret = count;
a42dde041   Peter Zijlstra   mm: bdi: allow se...
206

a42dde041   Peter Zijlstra   mm: bdi: allow se...
207
208
209
  	return ret;
  }
  BDI_SHOW(max_ratio, bdi->max_ratio)
7d311cdab   Darrick J. Wong   bdi: allow block ...
210
211
212
213
  static ssize_t stable_pages_required_show(struct device *dev,
  					  struct device_attribute *attr,
  					  char *page)
  {
1cb039f3d   Christoph Hellwig   bdi: replace BDI_...
214
215
216
217
218
  	dev_warn_once(dev,
  		"the stable_pages_required attribute has been removed. Use the stable_writes queue attribute instead.
  ");
  	return snprintf(page, PAGE_SIZE-1, "%d
  ", 0);
7d311cdab   Darrick J. Wong   bdi: allow block ...
219
  }
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
220
221
222
223
224
225
226
227
  static DEVICE_ATTR_RO(stable_pages_required);
  
  static struct attribute *bdi_dev_attrs[] = {
  	&dev_attr_read_ahead_kb.attr,
  	&dev_attr_min_ratio.attr,
  	&dev_attr_max_ratio.attr,
  	&dev_attr_stable_pages_required.attr,
  	NULL,
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
228
  };
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
229
  ATTRIBUTE_GROUPS(bdi_dev);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
230
231
232
233
  
  static __init int bdi_class_init(void)
  {
  	bdi_class = class_create(THIS_MODULE, "bdi");
144214537   Anton Blanchard   backing-dev: Hand...
234
235
  	if (IS_ERR(bdi_class))
  		return PTR_ERR(bdi_class);
d9e1241e4   Greg Kroah-Hartman   backing-dev: conv...
236
  	bdi_class->dev_groups = bdi_dev_groups;
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
237
  	bdi_debug_init();
d03f6cdc1   Jan Kara   block: Dynamicall...
238

cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
239
240
  	return 0;
  }
76f1418b4   Miklos Szeredi   mm: bdi: move sta...
241
  postcore_initcall(bdi_class_init);
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
242

2e82b84c0   Jan Kara   block: Remove unu...
243
  static int bdi_init(struct backing_dev_info *bdi);
26160158d   Jens Axboe   Move the default_...
244
245
246
  static int __init default_bdi_init(void)
  {
  	int err;
a2b90f112   Mika Westerberg   bdi: Do not use f...
247
248
  	bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_UNBOUND |
  				 WQ_SYSFS, 0);
839a8e866   Tejun Heo   writeback: replac...
249
250
  	if (!bdi_wq)
  		return -ENOMEM;
976e48f8a   Jan Kara   bdi: Initialize n...
251
  	err = bdi_init(&noop_backing_dev_info);
26160158d   Jens Axboe   Move the default_...
252
253
254
255
  
  	return err;
  }
  subsys_initcall(default_bdi_init);
6467716a3   Artem Bityutskiy   writeback: optimi...
256
  /*
f0054bb1e   Tejun Heo   writeback: move b...
257
   * This function is used when the first inode for this wb is marked dirty. It
6467716a3   Artem Bityutskiy   writeback: optimi...
258
259
260
261
262
263
264
265
   * wakes-up the corresponding bdi thread which should then take care of the
   * periodic background write-out of dirty inodes. Since the write-out would
   * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
   * set up a timer which wakes the bdi thread up later.
   *
   * Note, we wouldn't bother setting up the timer, but this function is on the
   * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
   * by delaying the wake-up.
6ca738d60   Derek Basehore   backing_dev: fix ...
266
267
268
   *
   * We have to be careful not to postpone flush work if it is scheduled for
   * earlier. Thus we use queue_delayed_work().
6467716a3   Artem Bityutskiy   writeback: optimi...
269
   */
f0054bb1e   Tejun Heo   writeback: move b...
270
  void wb_wakeup_delayed(struct bdi_writeback *wb)
6467716a3   Artem Bityutskiy   writeback: optimi...
271
272
273
274
  {
  	unsigned long timeout;
  
  	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
f0054bb1e   Tejun Heo   writeback: move b...
275
276
277
278
  	spin_lock_bh(&wb->work_lock);
  	if (test_bit(WB_registered, &wb->state))
  		queue_delayed_work(bdi_wq, &wb->dwork, timeout);
  	spin_unlock_bh(&wb->work_lock);
03ba3782e   Jens Axboe   writeback: switch...
279
  }
cfc4ba536   Jens Axboe   writeback: use RC...
280
  /*
a88a341a7   Tejun Heo   writeback: move b...
281
   * Initial write bandwidth: 100 MB/s
cfc4ba536   Jens Axboe   writeback: use RC...
282
   */
a88a341a7   Tejun Heo   writeback: move b...
283
  #define INIT_BW		(100 << (20 - PAGE_SHIFT))
cfc4ba536   Jens Axboe   writeback: use RC...
284

8395cd9f8   Tejun Heo   writeback: add @g...
285
  static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
8c911f3d4   Christoph Hellwig   writeback: remove...
286
  		   gfp_t gfp)
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
287
  {
93f78d882   Tejun Heo   writeback: move b...
288
  	int i, err;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
289

6467716a3   Artem Bityutskiy   writeback: optimi...
290
  	memset(wb, 0, sizeof(*wb));
f1d0b063d   Kay Sievers   bdi: register sys...
291

810df54a6   Jan Kara   bdi: Make wb->bdi...
292
293
  	if (wb != &bdi->wb)
  		bdi_get(bdi);
6467716a3   Artem Bityutskiy   writeback: optimi...
294
295
296
297
298
  	wb->bdi = bdi;
  	wb->last_old_flush = jiffies;
  	INIT_LIST_HEAD(&wb->b_dirty);
  	INIT_LIST_HEAD(&wb->b_io);
  	INIT_LIST_HEAD(&wb->b_more_io);
0ae45f63d   Theodore Ts'o   vfs: add support ...
299
  	INIT_LIST_HEAD(&wb->b_dirty_time);
f758eeabe   Christoph Hellwig   writeback: split ...
300
  	spin_lock_init(&wb->list_lock);
66f3b8e2e   Jens Axboe   writeback: move d...
301

a88a341a7   Tejun Heo   writeback: move b...
302
303
304
305
306
  	wb->bw_time_stamp = jiffies;
  	wb->balanced_dirty_ratelimit = INIT_BW;
  	wb->dirty_ratelimit = INIT_BW;
  	wb->write_bandwidth = INIT_BW;
  	wb->avg_write_bandwidth = INIT_BW;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
307

f0054bb1e   Tejun Heo   writeback: move b...
308
309
310
  	spin_lock_init(&wb->work_lock);
  	INIT_LIST_HEAD(&wb->work_list);
  	INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
b57d74aff   Jens Axboe   writeback: track ...
311
  	wb->dirty_sleep = jiffies;
c284de61d   Artem Bityutskiy   writeback: cleanu...
312

8395cd9f8   Tejun Heo   writeback: add @g...
313
  	err = fprop_local_init_percpu(&wb->completions, gfp);
a88a341a7   Tejun Heo   writeback: move b...
314
  	if (err)
8c911f3d4   Christoph Hellwig   writeback: remove...
315
  		goto out_put_bdi;
c284de61d   Artem Bityutskiy   writeback: cleanu...
316

93f78d882   Tejun Heo   writeback: move b...
317
  	for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
8395cd9f8   Tejun Heo   writeback: add @g...
318
  		err = percpu_counter_init(&wb->stat[i], 0, gfp);
a13f35e87   Tejun Heo   writeback: don't ...
319
320
  		if (err)
  			goto out_destroy_stat;
93f78d882   Tejun Heo   writeback: move b...
321
  	}
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
322

93f78d882   Tejun Heo   writeback: move b...
323
  	return 0;
a13f35e87   Tejun Heo   writeback: don't ...
324
325
  
  out_destroy_stat:
078c6c3a5   Rasmus Villemoes   mm/backing-dev.c:...
326
  	while (i--)
a13f35e87   Tejun Heo   writeback: don't ...
327
328
  		percpu_counter_destroy(&wb->stat[i]);
  	fprop_local_destroy_percpu(&wb->completions);
810df54a6   Jan Kara   bdi: Make wb->bdi...
329
330
331
  out_put_bdi:
  	if (wb != &bdi->wb)
  		bdi_put(bdi);
a13f35e87   Tejun Heo   writeback: don't ...
332
  	return err;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
333
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
334

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
335
  static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb);
03ba3782e   Jens Axboe   writeback: switch...
336
337
338
  /*
   * Remove bdi from the global list and shutdown any threads we have running
   */
461000714   Tejun Heo   writeback: reorga...
339
  static void wb_shutdown(struct bdi_writeback *wb)
66f3b8e2e   Jens Axboe   writeback: move d...
340
  {
c4db59d31   Christoph Hellwig   fs: don't reassig...
341
  	/* Make sure nobody queues further work */
461000714   Tejun Heo   writeback: reorga...
342
343
344
  	spin_lock_bh(&wb->work_lock);
  	if (!test_and_clear_bit(WB_registered, &wb->state)) {
  		spin_unlock_bh(&wb->work_lock);
03ba3782e   Jens Axboe   writeback: switch...
345
  		return;
c4db59d31   Christoph Hellwig   fs: don't reassig...
346
  	}
461000714   Tejun Heo   writeback: reorga...
347
  	spin_unlock_bh(&wb->work_lock);
03ba3782e   Jens Axboe   writeback: switch...
348

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
349
  	cgwb_remove_from_bdi_list(wb);
03ba3782e   Jens Axboe   writeback: switch...
350
  	/*
461000714   Tejun Heo   writeback: reorga...
351
352
353
  	 * Drain work list and shutdown the delayed_work.  !WB_registered
  	 * tells wb_workfn() that @wb is dying and its work_list needs to
  	 * be drained no matter what.
03ba3782e   Jens Axboe   writeback: switch...
354
  	 */
461000714   Tejun Heo   writeback: reorga...
355
356
357
358
  	mod_delayed_work(bdi_wq, &wb->dwork, 0);
  	flush_delayed_work(&wb->dwork);
  	WARN_ON(!list_empty(&wb->work_list));
  }
f0054bb1e   Tejun Heo   writeback: move b...
359
  static void wb_exit(struct bdi_writeback *wb)
93f78d882   Tejun Heo   writeback: move b...
360
361
362
363
364
365
366
  {
  	int i;
  
  	WARN_ON(delayed_work_pending(&wb->dwork));
  
  	for (i = 0; i < NR_WB_STAT_ITEMS; i++)
  		percpu_counter_destroy(&wb->stat[i]);
6467716a3   Artem Bityutskiy   writeback: optimi...
367

a88a341a7   Tejun Heo   writeback: move b...
368
  	fprop_local_destroy_percpu(&wb->completions);
810df54a6   Jan Kara   bdi: Make wb->bdi...
369
370
  	if (wb != &wb->bdi->wb)
  		bdi_put(wb->bdi);
a88a341a7   Tejun Heo   writeback: move b...
371
  }
e98be2d59   Wu Fengguang   writeback: bdi wr...
372

52ebea749   Tejun Heo   writeback: make b...
373
374
375
376
377
  #ifdef CONFIG_CGROUP_WRITEBACK
  
  #include <linux/memcontrol.h>
  
  /*
8c911f3d4   Christoph Hellwig   writeback: remove...
378
379
   * cgwb_lock protects bdi->cgwb_tree, blkcg->cgwb_list, and memcg->cgwb_list.
   * bdi->cgwb_tree is also RCU protected.
52ebea749   Tejun Heo   writeback: make b...
380
381
   */
  static DEFINE_SPINLOCK(cgwb_lock);
f18346468   Tejun Heo   bdi: Move cgroup ...
382
  static struct workqueue_struct *cgwb_release_wq;
52ebea749   Tejun Heo   writeback: make b...
383

52ebea749   Tejun Heo   writeback: make b...
384
385
386
387
  static void cgwb_release_workfn(struct work_struct *work)
  {
  	struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
  						release_work);
59b57717f   Dennis Zhou (Facebook)   blkcg: delay blkg...
388
  	struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
52ebea749   Tejun Heo   writeback: make b...
389

3ee7e8697   Jan Kara   bdi: Fix another ...
390
  	mutex_lock(&wb->bdi->cgwb_release_mutex);
52ebea749   Tejun Heo   writeback: make b...
391
392
393
394
  	wb_shutdown(wb);
  
  	css_put(wb->memcg_css);
  	css_put(wb->blkcg_css);
3ee7e8697   Jan Kara   bdi: Fix another ...
395
  	mutex_unlock(&wb->bdi->cgwb_release_mutex);
52ebea749   Tejun Heo   writeback: make b...
396

d866dbf61   Tejun Heo   blkcg: rename blk...
397
398
  	/* triggers blkg destruction if no online users left */
  	blkcg_unpin_online(blkcg);
59b57717f   Dennis Zhou (Facebook)   blkcg: delay blkg...
399

841710aa6   Tejun Heo   writeback: implem...
400
  	fprop_local_destroy_percpu(&wb->memcg_completions);
52ebea749   Tejun Heo   writeback: make b...
401
402
403
  	percpu_ref_exit(&wb->refcnt);
  	wb_exit(wb);
  	kfree_rcu(wb, rcu);
52ebea749   Tejun Heo   writeback: make b...
404
405
406
407
408
409
  }
  
  static void cgwb_release(struct percpu_ref *refcnt)
  {
  	struct bdi_writeback *wb = container_of(refcnt, struct bdi_writeback,
  						refcnt);
f18346468   Tejun Heo   bdi: Move cgroup ...
410
  	queue_work(cgwb_release_wq, &wb->release_work);
52ebea749   Tejun Heo   writeback: make b...
411
412
413
414
415
416
417
418
419
420
421
  }
  
  static void cgwb_kill(struct bdi_writeback *wb)
  {
  	lockdep_assert_held(&cgwb_lock);
  
  	WARN_ON(!radix_tree_delete(&wb->bdi->cgwb_tree, wb->memcg_css->id));
  	list_del(&wb->memcg_node);
  	list_del(&wb->blkcg_node);
  	percpu_ref_kill(&wb->refcnt);
  }
e8cb72b32   Jan Kara   bdi: Unify bdi->w...
422
423
424
425
426
427
  static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
  {
  	spin_lock_irq(&cgwb_lock);
  	list_del_rcu(&wb->bdi_node);
  	spin_unlock_irq(&cgwb_lock);
  }
52ebea749   Tejun Heo   writeback: make b...
428
429
430
431
432
433
434
435
436
437
438
439
  static int cgwb_create(struct backing_dev_info *bdi,
  		       struct cgroup_subsys_state *memcg_css, gfp_t gfp)
  {
  	struct mem_cgroup *memcg;
  	struct cgroup_subsys_state *blkcg_css;
  	struct blkcg *blkcg;
  	struct list_head *memcg_cgwb_list, *blkcg_cgwb_list;
  	struct bdi_writeback *wb;
  	unsigned long flags;
  	int ret = 0;
  
  	memcg = mem_cgroup_from_css(memcg_css);
c165b3e3c   Tejun Heo   blkcg: rename sub...
440
  	blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
52ebea749   Tejun Heo   writeback: make b...
441
  	blkcg = css_to_blkcg(blkcg_css);
9ccc36171   Wang Long   memcg: writeback:...
442
  	memcg_cgwb_list = &memcg->cgwb_list;
52ebea749   Tejun Heo   writeback: make b...
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
  	blkcg_cgwb_list = &blkcg->cgwb_list;
  
  	/* look up again under lock and discard on blkcg mismatch */
  	spin_lock_irqsave(&cgwb_lock, flags);
  	wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
  	if (wb && wb->blkcg_css != blkcg_css) {
  		cgwb_kill(wb);
  		wb = NULL;
  	}
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	if (wb)
  		goto out_put;
  
  	/* need to create a new one */
  	wb = kmalloc(sizeof(*wb), gfp);
0b045bd1c   Christophe JAILLET   mm/backing-dev.c:...
458
459
460
461
  	if (!wb) {
  		ret = -ENOMEM;
  		goto out_put;
  	}
52ebea749   Tejun Heo   writeback: make b...
462

8c911f3d4   Christoph Hellwig   writeback: remove...
463
  	ret = wb_init(wb, bdi, gfp);
52ebea749   Tejun Heo   writeback: make b...
464
465
466
467
468
469
  	if (ret)
  		goto err_free;
  
  	ret = percpu_ref_init(&wb->refcnt, cgwb_release, 0, gfp);
  	if (ret)
  		goto err_wb_exit;
841710aa6   Tejun Heo   writeback: implem...
470
471
472
  	ret = fprop_local_init_percpu(&wb->memcg_completions, gfp);
  	if (ret)
  		goto err_ref_exit;
52ebea749   Tejun Heo   writeback: make b...
473
474
475
476
  	wb->memcg_css = memcg_css;
  	wb->blkcg_css = blkcg_css;
  	INIT_WORK(&wb->release_work, cgwb_release_workfn);
  	set_bit(WB_registered, &wb->state);
03ba3782e   Jens Axboe   writeback: switch...
477
478
  
  	/*
52ebea749   Tejun Heo   writeback: make b...
479
480
481
482
  	 * The root wb determines the registered state of the whole bdi and
  	 * memcg_cgwb_list and blkcg_cgwb_list's next pointers indicate
  	 * whether they're still online.  Don't link @wb if any is dead.
  	 * See wb_memcg_offline() and wb_blkcg_offline().
03ba3782e   Jens Axboe   writeback: switch...
483
  	 */
52ebea749   Tejun Heo   writeback: make b...
484
485
486
487
488
489
490
  	ret = -ENODEV;
  	spin_lock_irqsave(&cgwb_lock, flags);
  	if (test_bit(WB_registered, &bdi->wb.state) &&
  	    blkcg_cgwb_list->next && memcg_cgwb_list->next) {
  		/* we might have raced another instance of this function */
  		ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
  		if (!ret) {
b817525a4   Tejun Heo   writeback: bdi_wr...
491
  			list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
52ebea749   Tejun Heo   writeback: make b...
492
493
  			list_add(&wb->memcg_node, memcg_cgwb_list);
  			list_add(&wb->blkcg_node, blkcg_cgwb_list);
d866dbf61   Tejun Heo   blkcg: rename blk...
494
  			blkcg_pin_online(blkcg);
52ebea749   Tejun Heo   writeback: make b...
495
496
497
498
499
500
501
502
  			css_get(memcg_css);
  			css_get(blkcg_css);
  		}
  	}
  	spin_unlock_irqrestore(&cgwb_lock, flags);
  	if (ret) {
  		if (ret == -EEXIST)
  			ret = 0;
a13f35e87   Tejun Heo   writeback: don't ...
503
  		goto err_fprop_exit;
52ebea749   Tejun Heo   writeback: make b...
504
505
  	}
  	goto out_put;
841710aa6   Tejun Heo   writeback: implem...
506
507
  err_fprop_exit:
  	fprop_local_destroy_percpu(&wb->memcg_completions);
52ebea749   Tejun Heo   writeback: make b...
508
509
510
511
512
513
514
515
516
  err_ref_exit:
  	percpu_ref_exit(&wb->refcnt);
  err_wb_exit:
  	wb_exit(wb);
  err_free:
  	kfree(wb);
  out_put:
  	css_put(blkcg_css);
  	return ret;
66f3b8e2e   Jens Axboe   writeback: move d...
517
  }
52ebea749   Tejun Heo   writeback: make b...
518
  /**
ed288dc0d   Tejun Heo   writeback: Separa...
519
   * wb_get_lookup - get wb for a given memcg
52ebea749   Tejun Heo   writeback: make b...
520
521
   * @bdi: target bdi
   * @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref)
52ebea749   Tejun Heo   writeback: make b...
522
   *
ed288dc0d   Tejun Heo   writeback: Separa...
523
524
   * Try to get the wb for @memcg_css on @bdi.  The returned wb has its
   * refcount incremented.
52ebea749   Tejun Heo   writeback: make b...
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
   *
   * This function uses css_get() on @memcg_css and thus expects its refcnt
   * to be positive on invocation.  IOW, rcu_read_lock() protection on
   * @memcg_css isn't enough.  try_get it before calling this function.
   *
   * A wb is keyed by its associated memcg.  As blkcg implicitly enables
   * memcg on the default hierarchy, memcg association is guaranteed to be
   * more specific (equal or descendant to the associated blkcg) and thus can
   * identify both the memcg and blkcg associations.
   *
   * Because the blkcg associated with a memcg may change as blkcg is enabled
   * and disabled closer to root in the hierarchy, each wb keeps track of
   * both the memcg and blkcg associated with it and verifies the blkcg on
   * each lookup.  On mismatch, the existing wb is discarded and a new one is
   * created.
   */
ed288dc0d   Tejun Heo   writeback: Separa...
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
  struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi,
  				    struct cgroup_subsys_state *memcg_css)
  {
  	struct bdi_writeback *wb;
  
  	if (!memcg_css->parent)
  		return &bdi->wb;
  
  	rcu_read_lock();
  	wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
  	if (wb) {
  		struct cgroup_subsys_state *blkcg_css;
  
  		/* see whether the blkcg association has changed */
  		blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
  		if (unlikely(wb->blkcg_css != blkcg_css || !wb_tryget(wb)))
  			wb = NULL;
  		css_put(blkcg_css);
  	}
  	rcu_read_unlock();
  
  	return wb;
  }
  
  /**
   * wb_get_create - get wb for a given memcg, create if necessary
   * @bdi: target bdi
   * @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref)
   * @gfp: allocation mask to use
   *
   * Try to get the wb for @memcg_css on @bdi.  If it doesn't exist, try to
   * create one.  See wb_get_lookup() for more details.
   */
52ebea749   Tejun Heo   writeback: make b...
574
575
576
  struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
  				    struct cgroup_subsys_state *memcg_css,
  				    gfp_t gfp)
6467716a3   Artem Bityutskiy   writeback: optimi...
577
  {
52ebea749   Tejun Heo   writeback: make b...
578
  	struct bdi_writeback *wb;
d0164adc8   Mel Gorman   mm, page_alloc: d...
579
  	might_sleep_if(gfpflags_allow_blocking(gfp));
52ebea749   Tejun Heo   writeback: make b...
580
581
582
583
584
  
  	if (!memcg_css->parent)
  		return &bdi->wb;
  
  	do {
ed288dc0d   Tejun Heo   writeback: Separa...
585
  		wb = wb_get_lookup(bdi, memcg_css);
52ebea749   Tejun Heo   writeback: make b...
586
587
588
589
  	} while (!wb && !cgwb_create(bdi, memcg_css, gfp));
  
  	return wb;
  }
6467716a3   Artem Bityutskiy   writeback: optimi...
590

a13f35e87   Tejun Heo   writeback: don't ...
591
  static int cgwb_bdi_init(struct backing_dev_info *bdi)
52ebea749   Tejun Heo   writeback: make b...
592
  {
a13f35e87   Tejun Heo   writeback: don't ...
593
  	int ret;
52ebea749   Tejun Heo   writeback: make b...
594
  	INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
3ee7e8697   Jan Kara   bdi: Fix another ...
595
  	mutex_init(&bdi->cgwb_release_mutex);
7fc5854f8   Tejun Heo   writeback: synchr...
596
  	init_rwsem(&bdi->wb_switch_rwsem);
a13f35e87   Tejun Heo   writeback: don't ...
597

8c911f3d4   Christoph Hellwig   writeback: remove...
598
  	ret = wb_init(&bdi->wb, bdi, GFP_KERNEL);
a13f35e87   Tejun Heo   writeback: don't ...
599
  	if (!ret) {
7d828602e   Johannes Weiner   mm: memcontrol: e...
600
  		bdi->wb.memcg_css = &root_mem_cgroup->css;
a13f35e87   Tejun Heo   writeback: don't ...
601
602
603
  		bdi->wb.blkcg_css = blkcg_root_css;
  	}
  	return ret;
6467716a3   Artem Bityutskiy   writeback: optimi...
604
  }
b1c51afc0   Jan Kara   bdi: Rename cgwb_...
605
  static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
52ebea749   Tejun Heo   writeback: make b...
606
607
608
  {
  	struct radix_tree_iter iter;
  	void **slot;
5318ce7d4   Jan Kara   bdi: Shutdown wri...
609
  	struct bdi_writeback *wb;
52ebea749   Tejun Heo   writeback: make b...
610
611
612
613
614
615
  
  	WARN_ON(test_bit(WB_registered, &bdi->wb.state));
  
  	spin_lock_irq(&cgwb_lock);
  	radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
  		cgwb_kill(*slot);
3ee7e8697   Jan Kara   bdi: Fix another ...
616
  	spin_unlock_irq(&cgwb_lock);
5318ce7d4   Jan Kara   bdi: Shutdown wri...
617

3ee7e8697   Jan Kara   bdi: Fix another ...
618
619
  	mutex_lock(&bdi->cgwb_release_mutex);
  	spin_lock_irq(&cgwb_lock);
5318ce7d4   Jan Kara   bdi: Shutdown wri...
620
621
622
623
624
625
626
  	while (!list_empty(&bdi->wb_list)) {
  		wb = list_first_entry(&bdi->wb_list, struct bdi_writeback,
  				      bdi_node);
  		spin_unlock_irq(&cgwb_lock);
  		wb_shutdown(wb);
  		spin_lock_irq(&cgwb_lock);
  	}
52ebea749   Tejun Heo   writeback: make b...
627
  	spin_unlock_irq(&cgwb_lock);
3ee7e8697   Jan Kara   bdi: Fix another ...
628
  	mutex_unlock(&bdi->cgwb_release_mutex);
52ebea749   Tejun Heo   writeback: make b...
629
630
631
632
633
634
635
  }
  
  /**
   * wb_memcg_offline - kill all wb's associated with a memcg being offlined
   * @memcg: memcg being offlined
   *
   * Also prevents creation of any new wb's associated with @memcg.
e98be2d59   Wu Fengguang   writeback: bdi wr...
636
   */
52ebea749   Tejun Heo   writeback: make b...
637
638
  void wb_memcg_offline(struct mem_cgroup *memcg)
  {
9ccc36171   Wang Long   memcg: writeback:...
639
  	struct list_head *memcg_cgwb_list = &memcg->cgwb_list;
52ebea749   Tejun Heo   writeback: make b...
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
  	struct bdi_writeback *wb, *next;
  
  	spin_lock_irq(&cgwb_lock);
  	list_for_each_entry_safe(wb, next, memcg_cgwb_list, memcg_node)
  		cgwb_kill(wb);
  	memcg_cgwb_list->next = NULL;	/* prevent new wb's */
  	spin_unlock_irq(&cgwb_lock);
  }
  
  /**
   * wb_blkcg_offline - kill all wb's associated with a blkcg being offlined
   * @blkcg: blkcg being offlined
   *
   * Also prevents creation of any new wb's associated with @blkcg.
   */
  void wb_blkcg_offline(struct blkcg *blkcg)
  {
52ebea749   Tejun Heo   writeback: make b...
657
658
659
660
661
662
663
664
  	struct bdi_writeback *wb, *next;
  
  	spin_lock_irq(&cgwb_lock);
  	list_for_each_entry_safe(wb, next, &blkcg->cgwb_list, blkcg_node)
  		cgwb_kill(wb);
  	blkcg->cgwb_list.next = NULL;	/* prevent new wb's */
  	spin_unlock_irq(&cgwb_lock);
  }
e8cb72b32   Jan Kara   bdi: Unify bdi->w...
665
666
667
668
669
670
  static void cgwb_bdi_register(struct backing_dev_info *bdi)
  {
  	spin_lock_irq(&cgwb_lock);
  	list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
  	spin_unlock_irq(&cgwb_lock);
  }
f18346468   Tejun Heo   bdi: Move cgroup ...
671
672
673
674
675
676
677
678
679
680
681
682
683
684
  static int __init cgwb_init(void)
  {
  	/*
  	 * There can be many concurrent release work items overwhelming
  	 * system_wq.  Put them in a separate wq and limit concurrency.
  	 * There's no point in executing many of these in parallel.
  	 */
  	cgwb_release_wq = alloc_workqueue("cgwb_release", 0, 1);
  	if (!cgwb_release_wq)
  		return -ENOMEM;
  
  	return 0;
  }
  subsys_initcall(cgwb_init);
52ebea749   Tejun Heo   writeback: make b...
685
  #else	/* CONFIG_CGROUP_WRITEBACK */
a13f35e87   Tejun Heo   writeback: don't ...
686
687
  static int cgwb_bdi_init(struct backing_dev_info *bdi)
  {
8c911f3d4   Christoph Hellwig   writeback: remove...
688
  	return wb_init(&bdi->wb, bdi, GFP_KERNEL);
a13f35e87   Tejun Heo   writeback: don't ...
689
  }
b1c51afc0   Jan Kara   bdi: Rename cgwb_...
690
  static void cgwb_bdi_unregister(struct backing_dev_info *bdi) { }
df23de556   Jan Kara   bdi: Fix use-afte...
691

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
692
693
694
695
696
697
698
699
700
  static void cgwb_bdi_register(struct backing_dev_info *bdi)
  {
  	list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
  }
  
  static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
  {
  	list_del_rcu(&wb->bdi_node);
  }
52ebea749   Tejun Heo   writeback: make b...
701
  #endif	/* CONFIG_CGROUP_WRITEBACK */
e98be2d59   Wu Fengguang   writeback: bdi wr...
702

2e82b84c0   Jan Kara   block: Remove unu...
703
  static int bdi_init(struct backing_dev_info *bdi)
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
704
  {
b817525a4   Tejun Heo   writeback: bdi_wr...
705
  	int ret;
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
706
  	bdi->dev = NULL;
d03f6cdc1   Jan Kara   block: Dynamicall...
707
  	kref_init(&bdi->refcnt);
189d3c4a9   Peter Zijlstra   mm: bdi: allow se...
708
  	bdi->min_ratio = 0;
a42dde041   Peter Zijlstra   mm: bdi: allow se...
709
  	bdi->max_ratio = 100;
eb608e3a3   Jan Kara   block: Convert BD...
710
  	bdi->max_prop_frac = FPROP_FRAC_BASE;
66f3b8e2e   Jens Axboe   writeback: move d...
711
  	INIT_LIST_HEAD(&bdi->bdi_list);
b817525a4   Tejun Heo   writeback: bdi_wr...
712
  	INIT_LIST_HEAD(&bdi->wb_list);
cc395d7f1   Tejun Heo   writeback: implem...
713
  	init_waitqueue_head(&bdi->wb_waitq);
03ba3782e   Jens Axboe   writeback: switch...
714

b817525a4   Tejun Heo   writeback: bdi_wr...
715
  	ret = cgwb_bdi_init(bdi);
b817525a4   Tejun Heo   writeback: bdi_wr...
716
  	return ret;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
717
  }
e98be2d59   Wu Fengguang   writeback: bdi wr...
718

aef33c2ff   Christoph Hellwig   bdi: simplify bdi...
719
  struct backing_dev_info *bdi_alloc(int node_id)
d03f6cdc1   Jan Kara   block: Dynamicall...
720
721
  {
  	struct backing_dev_info *bdi;
aef33c2ff   Christoph Hellwig   bdi: simplify bdi...
722
  	bdi = kzalloc_node(sizeof(*bdi), GFP_KERNEL, node_id);
d03f6cdc1   Jan Kara   block: Dynamicall...
723
724
725
726
727
728
729
  	if (!bdi)
  		return NULL;
  
  	if (bdi_init(bdi)) {
  		kfree(bdi);
  		return NULL;
  	}
f56753ac2   Christoph Hellwig   bdi: replace BDI_...
730
  	bdi->capabilities = BDI_CAP_WRITEBACK | BDI_CAP_WRITEBACK_ACCT;
55b2598e8   Christoph Hellwig   bdi: initialize -...
731
732
  	bdi->ra_pages = VM_READAHEAD_PAGES;
  	bdi->io_pages = VM_READAHEAD_PAGES;
d03f6cdc1   Jan Kara   block: Dynamicall...
733
734
  	return bdi;
  }
aef33c2ff   Christoph Hellwig   bdi: simplify bdi...
735
  EXPORT_SYMBOL(bdi_alloc);
d03f6cdc1   Jan Kara   block: Dynamicall...
736

34f8fe501   Tejun Heo   bdi: Add bdi->id
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
  static struct rb_node **bdi_lookup_rb_node(u64 id, struct rb_node **parentp)
  {
  	struct rb_node **p = &bdi_tree.rb_node;
  	struct rb_node *parent = NULL;
  	struct backing_dev_info *bdi;
  
  	lockdep_assert_held(&bdi_lock);
  
  	while (*p) {
  		parent = *p;
  		bdi = rb_entry(parent, struct backing_dev_info, rb_node);
  
  		if (bdi->id > id)
  			p = &(*p)->rb_left;
  		else if (bdi->id < id)
  			p = &(*p)->rb_right;
  		else
  			break;
  	}
  
  	if (parentp)
  		*parentp = parent;
  	return p;
  }
  
  /**
   * bdi_get_by_id - lookup and get bdi from its id
   * @id: bdi id to lookup
   *
   * Find bdi matching @id and get it.  Returns NULL if the matching bdi
   * doesn't exist or is already unregistered.
   */
  struct backing_dev_info *bdi_get_by_id(u64 id)
  {
  	struct backing_dev_info *bdi = NULL;
  	struct rb_node **p;
  
  	spin_lock_bh(&bdi_lock);
  	p = bdi_lookup_rb_node(id, NULL);
  	if (*p) {
  		bdi = rb_entry(*p, struct backing_dev_info, rb_node);
  		bdi_get(bdi);
  	}
  	spin_unlock_bh(&bdi_lock);
  
  	return bdi;
  }
7c4cc3002   Jan Kara   bdi: Drop 'parent...
784
  int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
461000714   Tejun Heo   writeback: reorga...
785
  {
461000714   Tejun Heo   writeback: reorga...
786
  	struct device *dev;
34f8fe501   Tejun Heo   bdi: Add bdi->id
787
  	struct rb_node *parent, **p;
e98be2d59   Wu Fengguang   writeback: bdi wr...
788

461000714   Tejun Heo   writeback: reorga...
789
790
  	if (bdi->dev)	/* The driver needs to use separate queues per device */
  		return 0;
e98be2d59   Wu Fengguang   writeback: bdi wr...
791

6bd87eec2   Christoph Hellwig   bdi: add a ->dev_...
792
793
  	vsnprintf(bdi->dev_name, sizeof(bdi->dev_name), fmt, args);
  	dev = device_create(bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name);
461000714   Tejun Heo   writeback: reorga...
794
795
  	if (IS_ERR(dev))
  		return PTR_ERR(dev);
04fbfdc14   Peter Zijlstra   mm: per device di...
796

e8cb72b32   Jan Kara   bdi: Unify bdi->w...
797
  	cgwb_bdi_register(bdi);
461000714   Tejun Heo   writeback: reorga...
798
  	bdi->dev = dev;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
799

6d0e4827b   Jens Axboe   Revert "bdi: add ...
800
  	bdi_debug_register(bdi, dev_name(dev));
461000714   Tejun Heo   writeback: reorga...
801
802
803
  	set_bit(WB_registered, &bdi->wb.state);
  
  	spin_lock_bh(&bdi_lock);
34f8fe501   Tejun Heo   bdi: Add bdi->id
804
805
806
807
808
809
  
  	bdi->id = ++bdi_id_cursor;
  
  	p = bdi_lookup_rb_node(bdi->id, &parent);
  	rb_link_node(&bdi->rb_node, parent, p);
  	rb_insert_color(&bdi->rb_node, &bdi_tree);
461000714   Tejun Heo   writeback: reorga...
810
  	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
34f8fe501   Tejun Heo   bdi: Add bdi->id
811

461000714   Tejun Heo   writeback: reorga...
812
813
814
815
  	spin_unlock_bh(&bdi_lock);
  
  	trace_writeback_bdi_register(bdi);
  	return 0;
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
816
  }
baf7a616d   Jan Kara   bdi: Provide bdi_...
817

7c4cc3002   Jan Kara   bdi: Drop 'parent...
818
  int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...)
baf7a616d   Jan Kara   bdi: Provide bdi_...
819
820
821
822
823
  {
  	va_list args;
  	int ret;
  
  	va_start(args, fmt);
7c4cc3002   Jan Kara   bdi: Drop 'parent...
824
  	ret = bdi_register_va(bdi, fmt, args);
baf7a616d   Jan Kara   bdi: Provide bdi_...
825
826
827
  	va_end(args);
  	return ret;
  }
461000714   Tejun Heo   writeback: reorga...
828
  EXPORT_SYMBOL(bdi_register);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
829

3c5d202b5   Christoph Hellwig   bdi: remove bdi_r...
830
  void bdi_set_owner(struct backing_dev_info *bdi, struct device *owner)
df08c32ce   Dan Williams   block: fix bdi vs...
831
  {
3c5d202b5   Christoph Hellwig   bdi: remove bdi_r...
832
  	WARN_ON_ONCE(bdi->owner);
df08c32ce   Dan Williams   block: fix bdi vs...
833
834
  	bdi->owner = owner;
  	get_device(owner);
df08c32ce   Dan Williams   block: fix bdi vs...
835
  }
df08c32ce   Dan Williams   block: fix bdi vs...
836

461000714   Tejun Heo   writeback: reorga...
837
838
839
840
841
842
  /*
   * Remove bdi from bdi_list, and ensure that it is no longer visible
   */
  static void bdi_remove_from_list(struct backing_dev_info *bdi)
  {
  	spin_lock_bh(&bdi_lock);
34f8fe501   Tejun Heo   bdi: Add bdi->id
843
  	rb_erase(&bdi->rb_node, &bdi_tree);
461000714   Tejun Heo   writeback: reorga...
844
845
  	list_del_rcu(&bdi->bdi_list);
  	spin_unlock_bh(&bdi_lock);
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
846

461000714   Tejun Heo   writeback: reorga...
847
848
  	synchronize_rcu_expedited();
  }
cf0ca9fe5   Peter Zijlstra   mm: bdi: export B...
849

b02176f30   Tejun Heo   block: don't rele...
850
  void bdi_unregister(struct backing_dev_info *bdi)
b2e8fb6ef   Peter Zijlstra   mm: scalable bdi ...
851
  {
f0054bb1e   Tejun Heo   writeback: move b...
852
853
854
  	/* make sure nobody finds us on the bdi_list anymore */
  	bdi_remove_from_list(bdi);
  	wb_shutdown(&bdi->wb);
b1c51afc0   Jan Kara   bdi: Rename cgwb_...
855
  	cgwb_bdi_unregister(bdi);
7a401a972   Rabin Vincent   backing-dev: ensu...
856

c4db59d31   Christoph Hellwig   fs: don't reassig...
857
858
859
860
861
  	if (bdi->dev) {
  		bdi_debug_unregister(bdi);
  		device_unregister(bdi->dev);
  		bdi->dev = NULL;
  	}
df08c32ce   Dan Williams   block: fix bdi vs...
862
863
864
865
866
  
  	if (bdi->owner) {
  		put_device(bdi->owner);
  		bdi->owner = NULL;
  	}
b02176f30   Tejun Heo   block: don't rele...
867
  }
c4db59d31   Christoph Hellwig   fs: don't reassig...
868

d03f6cdc1   Jan Kara   block: Dynamicall...
869
870
871
872
  static void release_bdi(struct kref *ref)
  {
  	struct backing_dev_info *bdi =
  			container_of(ref, struct backing_dev_info, refcnt);
5af110b2f   Jan Kara   block: Unregister...
873
874
  	if (test_bit(WB_registered, &bdi->wb.state))
  		bdi_unregister(bdi);
2e82b84c0   Jan Kara   block: Remove unu...
875
876
  	WARN_ON_ONCE(bdi->dev);
  	wb_exit(&bdi->wb);
d03f6cdc1   Jan Kara   block: Dynamicall...
877
878
879
880
881
882
883
  	kfree(bdi);
  }
  
  void bdi_put(struct backing_dev_info *bdi)
  {
  	kref_put(&bdi->refcnt, release_bdi);
  }
62bf42adc   Jan Kara   bdi: Export bdi_a...
884
  EXPORT_SYMBOL(bdi_put);
d03f6cdc1   Jan Kara   block: Dynamicall...
885

eb7ae5e06   Christoph Hellwig   bdi: move bdi_dev...
886
887
888
889
  const char *bdi_dev_name(struct backing_dev_info *bdi)
  {
  	if (!bdi || !bdi->dev)
  		return bdi_unknown_name;
6bd87eec2   Christoph Hellwig   bdi: add a ->dev_...
890
  	return bdi->dev_name;
eb7ae5e06   Christoph Hellwig   bdi: move bdi_dev...
891
892
  }
  EXPORT_SYMBOL_GPL(bdi_dev_name);
3fcfab16c   Andrew Morton   [PATCH] separate ...
893
894
895
896
  static wait_queue_head_t congestion_wqh[2] = {
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
  		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
  	};
ec8a6f264   Tejun Heo   writeback: make c...
897
  static atomic_t nr_wb_congested[2];
3fcfab16c   Andrew Morton   [PATCH] separate ...
898

492d76b21   Christoph Hellwig   writeback: remove...
899
  void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
900
  {
1faa16d22   Jens Axboe   block: change the...
901
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
c877ef8ae   Kaixu Xia   writeback: fix th...
902
  	enum wb_congested_state bit;
3fcfab16c   Andrew Morton   [PATCH] separate ...
903

4452226ea   Tejun Heo   writeback: move b...
904
  	bit = sync ? WB_sync_congested : WB_async_congested;
8c911f3d4   Christoph Hellwig   writeback: remove...
905
  	if (test_and_clear_bit(bit, &bdi->wb.congested))
ec8a6f264   Tejun Heo   writeback: make c...
906
  		atomic_dec(&nr_wb_congested[sync]);
4e857c58e   Peter Zijlstra   arch: Mass conver...
907
  	smp_mb__after_atomic();
3fcfab16c   Andrew Morton   [PATCH] separate ...
908
909
910
  	if (waitqueue_active(wqh))
  		wake_up(wqh);
  }
492d76b21   Christoph Hellwig   writeback: remove...
911
  EXPORT_SYMBOL(clear_bdi_congested);
3fcfab16c   Andrew Morton   [PATCH] separate ...
912

492d76b21   Christoph Hellwig   writeback: remove...
913
  void set_bdi_congested(struct backing_dev_info *bdi, int sync)
3fcfab16c   Andrew Morton   [PATCH] separate ...
914
  {
c877ef8ae   Kaixu Xia   writeback: fix th...
915
  	enum wb_congested_state bit;
3fcfab16c   Andrew Morton   [PATCH] separate ...
916

4452226ea   Tejun Heo   writeback: move b...
917
  	bit = sync ? WB_sync_congested : WB_async_congested;
8c911f3d4   Christoph Hellwig   writeback: remove...
918
  	if (!test_and_set_bit(bit, &bdi->wb.congested))
ec8a6f264   Tejun Heo   writeback: make c...
919
  		atomic_inc(&nr_wb_congested[sync]);
3fcfab16c   Andrew Morton   [PATCH] separate ...
920
  }
492d76b21   Christoph Hellwig   writeback: remove...
921
  EXPORT_SYMBOL(set_bdi_congested);
3fcfab16c   Andrew Morton   [PATCH] separate ...
922
923
924
  
  /**
   * congestion_wait - wait for a backing_dev to become uncongested
8aa7e847d   Jens Axboe   Fix congestion_wa...
925
   * @sync: SYNC or ASYNC IO
3fcfab16c   Andrew Morton   [PATCH] separate ...
926
927
928
929
930
931
   * @timeout: timeout in jiffies
   *
   * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
   * write congestion.  If no backing_devs are congested then just wait for the
   * next write to be completed.
   */
8aa7e847d   Jens Axboe   Fix congestion_wa...
932
  long congestion_wait(int sync, long timeout)
3fcfab16c   Andrew Morton   [PATCH] separate ...
933
934
  {
  	long ret;
52bb91986   Mel Gorman   writeback: accoun...
935
  	unsigned long start = jiffies;
3fcfab16c   Andrew Morton   [PATCH] separate ...
936
  	DEFINE_WAIT(wait);
8aa7e847d   Jens Axboe   Fix congestion_wa...
937
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
3fcfab16c   Andrew Morton   [PATCH] separate ...
938
939
940
941
  
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
52bb91986   Mel Gorman   writeback: accoun...
942
943
944
  
  	trace_writeback_congestion_wait(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
3fcfab16c   Andrew Morton   [PATCH] separate ...
945
946
947
  	return ret;
  }
  EXPORT_SYMBOL(congestion_wait);
04fbfdc14   Peter Zijlstra   mm: per device di...
948

0e093d997   Mel Gorman   writeback: do not...
949
  /**
599d0c954   Mel Gorman   mm, vmscan: move ...
950
   * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a pgdat to complete writes
0e093d997   Mel Gorman   writeback: do not...
951
952
953
   * @sync: SYNC or ASYNC IO
   * @timeout: timeout in jiffies
   *
e3c1ac586   Andrey Ryabinin   mm/vmscan: don't ...
954
955
956
   * In the event of a congested backing_dev (any backing_dev) this waits
   * for up to @timeout jiffies for either a BDI to exit congestion of the
   * given @sync queue or a write to complete.
0e093d997   Mel Gorman   writeback: do not...
957
958
959
960
961
   *
   * The return value is 0 if the sleep is for the full timeout. Otherwise,
   * it is the number of jiffies that were still remaining when the function
   * returned. return_value == timeout implies the function did not sleep.
   */
e3c1ac586   Andrey Ryabinin   mm/vmscan: don't ...
962
  long wait_iff_congested(int sync, long timeout)
0e093d997   Mel Gorman   writeback: do not...
963
964
965
966
967
968
969
  {
  	long ret;
  	unsigned long start = jiffies;
  	DEFINE_WAIT(wait);
  	wait_queue_head_t *wqh = &congestion_wqh[sync];
  
  	/*
e3c1ac586   Andrey Ryabinin   mm/vmscan: don't ...
970
  	 * If there is no congestion, yield if necessary instead
0e093d997   Mel Gorman   writeback: do not...
971
972
  	 * of sleeping on the congestion queue
  	 */
e3c1ac586   Andrey Ryabinin   mm/vmscan: don't ...
973
  	if (atomic_read(&nr_wb_congested[sync]) == 0) {
ede377137   Michal Hocko   mm: throttle on I...
974
  		cond_resched();
599d0c954   Mel Gorman   mm, vmscan: move ...
975

0e093d997   Mel Gorman   writeback: do not...
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
  		/* In case we scheduled, work out time remaining */
  		ret = timeout - (jiffies - start);
  		if (ret < 0)
  			ret = 0;
  
  		goto out;
  	}
  
  	/* Sleep until uncongested or a write happens */
  	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
  	ret = io_schedule_timeout(timeout);
  	finish_wait(wqh, &wait);
  
  out:
  	trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout),
  					jiffies_to_usecs(jiffies - start));
  
  	return ret;
  }
  EXPORT_SYMBOL(wait_iff_congested);