Blame view

mm/vmpressure.c 13.8 KB
d2912cb15   Thomas Gleixner   treewide: Replace...
1
  // SPDX-License-Identifier: GPL-2.0-only
70ddf637e   Anton Vorontsov   memcg: add memory...
2
3
4
5
6
7
8
9
  /*
   * Linux VM pressure
   *
   * Copyright 2012 Linaro Ltd.
   *		  Anton Vorontsov <anton.vorontsov@linaro.org>
   *
   * Based on ideas from Andrew Morton, David Rientjes, KOSAKI Motohiro,
   * Leonid Moiseichuk, Mel Gorman, Minchan Kim and Pekka Enberg.
70ddf637e   Anton Vorontsov   memcg: add memory...
10
11
12
13
14
15
16
17
18
   */
  
  #include <linux/cgroup.h>
  #include <linux/fs.h>
  #include <linux/log2.h>
  #include <linux/sched.h>
  #include <linux/mm.h>
  #include <linux/vmstat.h>
  #include <linux/eventfd.h>
1ff6bbfd1   Tejun Heo   arm, pm, vmpressu...
19
  #include <linux/slab.h>
70ddf637e   Anton Vorontsov   memcg: add memory...
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
  #include <linux/swap.h>
  #include <linux/printk.h>
  #include <linux/vmpressure.h>
  
  /*
   * The window size (vmpressure_win) is the number of scanned pages before
   * we try to analyze scanned/reclaimed ratio. So the window is used as a
   * rate-limit tunable for the "low" level notification, and also for
   * averaging the ratio for medium/critical levels. Using small window
   * sizes can cause lot of false positives, but too big window size will
   * delay the notifications.
   *
   * As the vmscan reclaimer logic works with chunks which are multiple of
   * SWAP_CLUSTER_MAX, it makes sense to use it for the window size as well.
   *
   * TODO: Make the window size depend on machine size, as we do for vmstat
   * thresholds. Currently we set it to 512 pages (2MB for 4KB pages).
   */
  static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16;
  
  /*
   * These thresholds are used when we account memory pressure through
   * scanned/reclaimed ratio. The current values were chosen empirically. In
   * essence, they are percents: the higher the value, the more number
   * unsuccessful reclaims there were.
   */
  static const unsigned int vmpressure_level_med = 60;
  static const unsigned int vmpressure_level_critical = 95;
  
  /*
   * When there are too little pages left to scan, vmpressure() may miss the
   * critical pressure as number of pages will be less than "window size".
   * However, in that case the vmscan priority will raise fast as the
   * reclaimer will try to scan LRUs more deeply.
   *
   * The vmscan logic considers these special priorities:
   *
   * prio == DEF_PRIORITY (12): reclaimer starts with that value
   * prio <= DEF_PRIORITY - 2 : kswapd becomes somewhat overwhelmed
   * prio == 0                : close to OOM, kernel scans every page in an lru
   *
   * Any value in this range is acceptable for this tunable (i.e. from 12 to
   * 0). Current value for the vmpressure_level_critical_prio is chosen
   * empirically, but the number, in essence, means that we consider
   * critical level when scanning depth is ~10% of the lru size (vmscan
   * scans 'lru_size >> prio' pages, so it is actually 12.5%, or one
   * eights).
   */
  static const unsigned int vmpressure_level_critical_prio = ilog2(100 / 10);
  
  static struct vmpressure *work_to_vmpressure(struct work_struct *work)
  {
  	return container_of(work, struct vmpressure, work);
  }
70ddf637e   Anton Vorontsov   memcg: add memory...
74
75
  static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr)
  {
9647875be   Hui Su   mm/vmpressure: re...
76
  	struct mem_cgroup *memcg = vmpressure_to_memcg(vmpr);
70ddf637e   Anton Vorontsov   memcg: add memory...
77
78
79
80
81
82
83
84
85
86
87
88
89
  
  	memcg = parent_mem_cgroup(memcg);
  	if (!memcg)
  		return NULL;
  	return memcg_to_vmpressure(memcg);
  }
  
  enum vmpressure_levels {
  	VMPRESSURE_LOW = 0,
  	VMPRESSURE_MEDIUM,
  	VMPRESSURE_CRITICAL,
  	VMPRESSURE_NUM_LEVELS,
  };
b6bb98114   David Rientjes   mm, vmpressure: p...
90
91
92
93
94
95
  enum vmpressure_modes {
  	VMPRESSURE_NO_PASSTHROUGH = 0,
  	VMPRESSURE_HIERARCHY,
  	VMPRESSURE_LOCAL,
  	VMPRESSURE_NUM_MODES,
  };
70ddf637e   Anton Vorontsov   memcg: add memory...
96
97
98
99
100
  static const char * const vmpressure_str_levels[] = {
  	[VMPRESSURE_LOW] = "low",
  	[VMPRESSURE_MEDIUM] = "medium",
  	[VMPRESSURE_CRITICAL] = "critical",
  };
b6bb98114   David Rientjes   mm, vmpressure: p...
101
102
103
104
105
  static const char * const vmpressure_str_modes[] = {
  	[VMPRESSURE_NO_PASSTHROUGH] = "default",
  	[VMPRESSURE_HIERARCHY] = "hierarchy",
  	[VMPRESSURE_LOCAL] = "local",
  };
70ddf637e   Anton Vorontsov   memcg: add memory...
106
107
108
109
110
111
112
113
114
115
116
117
118
  static enum vmpressure_levels vmpressure_level(unsigned long pressure)
  {
  	if (pressure >= vmpressure_level_critical)
  		return VMPRESSURE_CRITICAL;
  	else if (pressure >= vmpressure_level_med)
  		return VMPRESSURE_MEDIUM;
  	return VMPRESSURE_LOW;
  }
  
  static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
  						    unsigned long reclaimed)
  {
  	unsigned long scale = scanned + reclaimed;
e1587a494   Vinayak Menon   mm: vmpressure: f...
119
  	unsigned long pressure = 0;
70ddf637e   Anton Vorontsov   memcg: add memory...
120
121
  
  	/*
d7143e312   zhongjiang   mm: correct the c...
122
123
124
  	 * reclaimed can be greater than scanned for things such as reclaimed
  	 * slab pages. shrink_node() just adds reclaimed pages without a
  	 * related increment to scanned pages.
e1587a494   Vinayak Menon   mm: vmpressure: f...
125
126
127
128
  	 */
  	if (reclaimed >= scanned)
  		goto out;
  	/*
70ddf637e   Anton Vorontsov   memcg: add memory...
129
130
131
132
133
134
135
136
  	 * We calculate the ratio (in percents) of how many pages were
  	 * scanned vs. reclaimed in a given time frame (window). Note that
  	 * time is in VM reclaimer's "ticks", i.e. number of pages
  	 * scanned. This makes it possible to set desired reaction time
  	 * and serves as a ratelimit.
  	 */
  	pressure = scale - (reclaimed * scale / scanned);
  	pressure = pressure * 100 / scale;
e1587a494   Vinayak Menon   mm: vmpressure: f...
137
  out:
70ddf637e   Anton Vorontsov   memcg: add memory...
138
139
140
141
142
143
144
145
146
147
  	pr_debug("%s: %3lu  (s: %lu  r: %lu)
  ", __func__, pressure,
  		 scanned, reclaimed);
  
  	return vmpressure_level(pressure);
  }
  
  struct vmpressure_event {
  	struct eventfd_ctx *efd;
  	enum vmpressure_levels level;
b6bb98114   David Rientjes   mm, vmpressure: p...
148
  	enum vmpressure_modes mode;
70ddf637e   Anton Vorontsov   memcg: add memory...
149
150
151
152
  	struct list_head node;
  };
  
  static bool vmpressure_event(struct vmpressure *vmpr,
b6bb98114   David Rientjes   mm, vmpressure: p...
153
154
  			     const enum vmpressure_levels level,
  			     bool ancestor, bool signalled)
70ddf637e   Anton Vorontsov   memcg: add memory...
155
156
  {
  	struct vmpressure_event *ev;
b6bb98114   David Rientjes   mm, vmpressure: p...
157
  	bool ret = false;
70ddf637e   Anton Vorontsov   memcg: add memory...
158

70ddf637e   Anton Vorontsov   memcg: add memory...
159
  	mutex_lock(&vmpr->events_lock);
70ddf637e   Anton Vorontsov   memcg: add memory...
160
  	list_for_each_entry(ev, &vmpr->events, node) {
b6bb98114   David Rientjes   mm, vmpressure: p...
161
162
163
164
165
166
167
168
  		if (ancestor && ev->mode == VMPRESSURE_LOCAL)
  			continue;
  		if (signalled && ev->mode == VMPRESSURE_NO_PASSTHROUGH)
  			continue;
  		if (level < ev->level)
  			continue;
  		eventfd_signal(ev->efd, 1);
  		ret = true;
70ddf637e   Anton Vorontsov   memcg: add memory...
169
  	}
70ddf637e   Anton Vorontsov   memcg: add memory...
170
  	mutex_unlock(&vmpr->events_lock);
b6bb98114   David Rientjes   mm, vmpressure: p...
171
  	return ret;
70ddf637e   Anton Vorontsov   memcg: add memory...
172
173
174
175
176
177
178
  }
  
  static void vmpressure_work_fn(struct work_struct *work)
  {
  	struct vmpressure *vmpr = work_to_vmpressure(work);
  	unsigned long scanned;
  	unsigned long reclaimed;
8e8ae6452   Johannes Weiner   mm: memcontrol: h...
179
  	enum vmpressure_levels level;
b6bb98114   David Rientjes   mm, vmpressure: p...
180
181
  	bool ancestor = false;
  	bool signalled = false;
70ddf637e   Anton Vorontsov   memcg: add memory...
182

91b57191c   Andrew Morton   mm/vmpressure.c: ...
183
  	spin_lock(&vmpr->sr_lock);
70ddf637e   Anton Vorontsov   memcg: add memory...
184
185
186
187
188
189
190
191
  	/*
  	 * Several contexts might be calling vmpressure(), so it is
  	 * possible that the work was rescheduled again before the old
  	 * work context cleared the counters. In that case we will run
  	 * just after the old work returns, but then scanned might be zero
  	 * here. No need for any locks here since we don't care if
  	 * vmpr->reclaimed is in sync.
  	 */
8e8ae6452   Johannes Weiner   mm: memcontrol: h...
192
  	scanned = vmpr->tree_scanned;
91b57191c   Andrew Morton   mm/vmpressure.c: ...
193
194
  	if (!scanned) {
  		spin_unlock(&vmpr->sr_lock);
70ddf637e   Anton Vorontsov   memcg: add memory...
195
  		return;
91b57191c   Andrew Morton   mm/vmpressure.c: ...
196
  	}
70ddf637e   Anton Vorontsov   memcg: add memory...
197

8e8ae6452   Johannes Weiner   mm: memcontrol: h...
198
199
200
  	reclaimed = vmpr->tree_reclaimed;
  	vmpr->tree_scanned = 0;
  	vmpr->tree_reclaimed = 0;
22f2020f8   Michal Hocko   vmpressure: chang...
201
  	spin_unlock(&vmpr->sr_lock);
70ddf637e   Anton Vorontsov   memcg: add memory...
202

8e8ae6452   Johannes Weiner   mm: memcontrol: h...
203
  	level = vmpressure_calc_level(scanned, reclaimed);
70ddf637e   Anton Vorontsov   memcg: add memory...
204
  	do {
b6bb98114   David Rientjes   mm, vmpressure: p...
205
206
207
  		if (vmpressure_event(vmpr, level, ancestor, signalled))
  			signalled = true;
  		ancestor = true;
70ddf637e   Anton Vorontsov   memcg: add memory...
208
209
210
211
212
213
214
  	} while ((vmpr = vmpressure_parent(vmpr)));
  }
  
  /**
   * vmpressure() - Account memory pressure through scanned/reclaimed ratio
   * @gfp:	reclaimer's gfp mask
   * @memcg:	cgroup memory controller handle
8e8ae6452   Johannes Weiner   mm: memcontrol: h...
215
   * @tree:	legacy subtree mode
70ddf637e   Anton Vorontsov   memcg: add memory...
216
217
218
219
220
221
222
   * @scanned:	number of pages scanned
   * @reclaimed:	number of pages reclaimed
   *
   * This function should be called from the vmscan reclaim path to account
   * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw
   * pressure index is then further refined and averaged over time.
   *
8e8ae6452   Johannes Weiner   mm: memcontrol: h...
223
224
225
226
227
228
229
   * If @tree is set, vmpressure is in traditional userspace reporting
   * mode: @memcg is considered the pressure root and userspace is
   * notified of the entire subtree's reclaim efficiency.
   *
   * If @tree is not set, reclaim efficiency is recorded for @memcg, and
   * only in-kernel users are notified.
   *
70ddf637e   Anton Vorontsov   memcg: add memory...
230
231
   * This function does not return any value.
   */
8e8ae6452   Johannes Weiner   mm: memcontrol: h...
232
  void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
70ddf637e   Anton Vorontsov   memcg: add memory...
233
234
  		unsigned long scanned, unsigned long reclaimed)
  {
56cab2859   Suren Baghdasaryan   mm, memcg: add me...
235
236
237
238
239
240
  	struct vmpressure *vmpr;
  
  	if (mem_cgroup_disabled())
  		return;
  
  	vmpr = memcg_to_vmpressure(memcg);
70ddf637e   Anton Vorontsov   memcg: add memory...
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
  
  	/*
  	 * Here we only want to account pressure that userland is able to
  	 * help us with. For example, suppose that DMA zone is under
  	 * pressure; if we notify userland about that kind of pressure,
  	 * then it will be mostly a waste as it will trigger unnecessary
  	 * freeing of memory by userland (since userland is more likely to
  	 * have HIGHMEM/MOVABLE pages instead of the DMA fallback). That
  	 * is why we include only movable, highmem and FS/IO pages.
  	 * Indirect reclaim (kswapd) sets sc->gfp_mask to GFP_KERNEL, so
  	 * we account it too.
  	 */
  	if (!(gfp & (__GFP_HIGHMEM | __GFP_MOVABLE | __GFP_IO | __GFP_FS)))
  		return;
  
  	/*
  	 * If we got here with no pages scanned, then that is an indicator
  	 * that reclaimer was unable to find any shrinkable LRUs at the
  	 * current scanning depth. But it does not mean that we should
  	 * report the critical pressure, yet. If the scanning priority
  	 * (scanning depth) goes too high (deep), we will be notified
  	 * through vmpressure_prio(). But so far, keep calm.
  	 */
  	if (!scanned)
  		return;
8e8ae6452   Johannes Weiner   mm: memcontrol: h...
266
267
  	if (tree) {
  		spin_lock(&vmpr->sr_lock);
3c1da7bee   Vladimir Davydov   mm/vmpressure.c: ...
268
  		scanned = vmpr->tree_scanned += scanned;
8e8ae6452   Johannes Weiner   mm: memcontrol: h...
269
  		vmpr->tree_reclaimed += reclaimed;
8e8ae6452   Johannes Weiner   mm: memcontrol: h...
270
  		spin_unlock(&vmpr->sr_lock);
70ddf637e   Anton Vorontsov   memcg: add memory...
271

8e8ae6452   Johannes Weiner   mm: memcontrol: h...
272
273
274
275
276
277
278
  		if (scanned < vmpressure_win)
  			return;
  		schedule_work(&vmpr->work);
  	} else {
  		enum vmpressure_levels level;
  
  		/* For now, no users for root-level efficiency */
d8a1c03ff   Yang Shi   mm: vmpressure: u...
279
  		if (!memcg || mem_cgroup_is_root(memcg))
8e8ae6452   Johannes Weiner   mm: memcontrol: h...
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
  			return;
  
  		spin_lock(&vmpr->sr_lock);
  		scanned = vmpr->scanned += scanned;
  		reclaimed = vmpr->reclaimed += reclaimed;
  		if (scanned < vmpressure_win) {
  			spin_unlock(&vmpr->sr_lock);
  			return;
  		}
  		vmpr->scanned = vmpr->reclaimed = 0;
  		spin_unlock(&vmpr->sr_lock);
  
  		level = vmpressure_calc_level(scanned, reclaimed);
  
  		if (level > VMPRESSURE_LOW) {
  			/*
  			 * Let the socket buffer allocator know that
  			 * we are having trouble reclaiming LRU pages.
  			 *
  			 * For hysteresis keep the pressure state
  			 * asserted for a second in which subsequent
  			 * pressure events can occur.
  			 */
  			memcg->socket_pressure = jiffies + HZ;
  		}
  	}
70ddf637e   Anton Vorontsov   memcg: add memory...
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
  }
  
  /**
   * vmpressure_prio() - Account memory pressure through reclaimer priority level
   * @gfp:	reclaimer's gfp mask
   * @memcg:	cgroup memory controller handle
   * @prio:	reclaimer's priority
   *
   * This function should be called from the reclaim path every time when
   * the vmscan's reclaiming priority (scanning depth) changes.
   *
   * This function does not return any value.
   */
  void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
  {
  	/*
  	 * We only use prio for accounting critical level. For more info
  	 * see comment for vmpressure_level_critical_prio variable above.
  	 */
  	if (prio > vmpressure_level_critical_prio)
  		return;
  
  	/*
  	 * OK, the prio is below the threshold, updating vmpressure
  	 * information before shrinker dives into long shrinking of long
  	 * range vmscan. Passing scanned = vmpressure_win, reclaimed = 0
  	 * to the vmpressure() basically means that we signal 'critical'
  	 * level.
  	 */
8e8ae6452   Johannes Weiner   mm: memcontrol: h...
335
  	vmpressure(gfp, memcg, true, vmpressure_win, 0);
70ddf637e   Anton Vorontsov   memcg: add memory...
336
  }
b6bb98114   David Rientjes   mm, vmpressure: p...
337
  #define MAX_VMPRESSURE_ARGS_LEN	(strlen("critical") + strlen("hierarchy") + 2)
70ddf637e   Anton Vorontsov   memcg: add memory...
338
339
  /**
   * vmpressure_register_event() - Bind vmpressure notifications to an eventfd
59b6f8734   Tejun Heo   memcg: make cgrou...
340
   * @memcg:	memcg that is interested in vmpressure notifications
70ddf637e   Anton Vorontsov   memcg: add memory...
341
   * @eventfd:	eventfd context to link notifications with
b6bb98114   David Rientjes   mm, vmpressure: p...
342
   * @args:	event arguments (pressure level threshold, optional mode)
70ddf637e   Anton Vorontsov   memcg: add memory...
343
344
345
   *
   * This function associates eventfd context with the vmpressure
   * infrastructure, so that the notifications will be delivered to the
b6bb98114   David Rientjes   mm, vmpressure: p...
346
347
348
349
   * @eventfd. The @args parameter is a comma-delimited string that denotes a
   * pressure level threshold (one of vmpressure_str_levels, i.e. "low", "medium",
   * or "critical") and an optional mode (one of vmpressure_str_modes, i.e.
   * "hierarchy" or "local").
70ddf637e   Anton Vorontsov   memcg: add memory...
350
   *
347c4a874   Tejun Heo   memcg: remove cgr...
351
   * To be used as memcg event method.
518a86713   Dan Carpenter   mm/vmpressure.c: ...
352
353
354
   *
   * Return: 0 on success, -ENOMEM on memory failure or -EINVAL if @args could
   * not be parsed.
70ddf637e   Anton Vorontsov   memcg: add memory...
355
   */
59b6f8734   Tejun Heo   memcg: make cgrou...
356
  int vmpressure_register_event(struct mem_cgroup *memcg,
347c4a874   Tejun Heo   memcg: remove cgr...
357
  			      struct eventfd_ctx *eventfd, const char *args)
70ddf637e   Anton Vorontsov   memcg: add memory...
358
  {
59b6f8734   Tejun Heo   memcg: make cgrou...
359
  	struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
70ddf637e   Anton Vorontsov   memcg: add memory...
360
  	struct vmpressure_event *ev;
b6bb98114   David Rientjes   mm, vmpressure: p...
361
  	enum vmpressure_modes mode = VMPRESSURE_NO_PASSTHROUGH;
518a86713   Dan Carpenter   mm/vmpressure.c: ...
362
  	enum vmpressure_levels level;
b6bb98114   David Rientjes   mm, vmpressure: p...
363
364
365
  	char *spec, *spec_orig;
  	char *token;
  	int ret = 0;
d62ff365b   Andy Shevchenko   mm/vmpressure.c: ...
366
  	spec_orig = spec = kstrndup(args, MAX_VMPRESSURE_ARGS_LEN, GFP_KERNEL);
565dc8423   Yang Shi   mm: vmpressure: d...
367
368
  	if (!spec)
  		return -ENOMEM;
70ddf637e   Anton Vorontsov   memcg: add memory...
369

b6bb98114   David Rientjes   mm, vmpressure: p...
370
371
  	/* Find required level */
  	token = strsep(&spec, ",");
518a86713   Dan Carpenter   mm/vmpressure.c: ...
372
373
  	ret = match_string(vmpressure_str_levels, VMPRESSURE_NUM_LEVELS, token);
  	if (ret < 0)
b6bb98114   David Rientjes   mm, vmpressure: p...
374
  		goto out;
518a86713   Dan Carpenter   mm/vmpressure.c: ...
375
  	level = ret;
70ddf637e   Anton Vorontsov   memcg: add memory...
376

b6bb98114   David Rientjes   mm, vmpressure: p...
377
378
379
  	/* Find optional mode */
  	token = strsep(&spec, ",");
  	if (token) {
518a86713   Dan Carpenter   mm/vmpressure.c: ...
380
381
  		ret = match_string(vmpressure_str_modes, VMPRESSURE_NUM_MODES, token);
  		if (ret < 0)
b6bb98114   David Rientjes   mm, vmpressure: p...
382
  			goto out;
518a86713   Dan Carpenter   mm/vmpressure.c: ...
383
  		mode = ret;
b6bb98114   David Rientjes   mm, vmpressure: p...
384
  	}
70ddf637e   Anton Vorontsov   memcg: add memory...
385
386
  
  	ev = kzalloc(sizeof(*ev), GFP_KERNEL);
b6bb98114   David Rientjes   mm, vmpressure: p...
387
388
389
390
  	if (!ev) {
  		ret = -ENOMEM;
  		goto out;
  	}
70ddf637e   Anton Vorontsov   memcg: add memory...
391
392
393
  
  	ev->efd = eventfd;
  	ev->level = level;
b6bb98114   David Rientjes   mm, vmpressure: p...
394
  	ev->mode = mode;
70ddf637e   Anton Vorontsov   memcg: add memory...
395
396
397
398
  
  	mutex_lock(&vmpr->events_lock);
  	list_add(&ev->node, &vmpr->events);
  	mutex_unlock(&vmpr->events_lock);
518a86713   Dan Carpenter   mm/vmpressure.c: ...
399
  	ret = 0;
b6bb98114   David Rientjes   mm, vmpressure: p...
400
401
402
  out:
  	kfree(spec_orig);
  	return ret;
70ddf637e   Anton Vorontsov   memcg: add memory...
403
404
405
406
  }
  
  /**
   * vmpressure_unregister_event() - Unbind eventfd from vmpressure
59b6f8734   Tejun Heo   memcg: make cgrou...
407
   * @memcg:	memcg handle
70ddf637e   Anton Vorontsov   memcg: add memory...
408
409
410
411
412
413
   * @eventfd:	eventfd context that was used to link vmpressure with the @cg
   *
   * This function does internal manipulations to detach the @eventfd from
   * the vmpressure notifications, and then frees internal resources
   * associated with the @eventfd (but the @eventfd itself is not freed).
   *
347c4a874   Tejun Heo   memcg: remove cgr...
414
   * To be used as memcg event method.
70ddf637e   Anton Vorontsov   memcg: add memory...
415
   */
59b6f8734   Tejun Heo   memcg: make cgrou...
416
  void vmpressure_unregister_event(struct mem_cgroup *memcg,
70ddf637e   Anton Vorontsov   memcg: add memory...
417
418
  				 struct eventfd_ctx *eventfd)
  {
59b6f8734   Tejun Heo   memcg: make cgrou...
419
  	struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
70ddf637e   Anton Vorontsov   memcg: add memory...
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
  	struct vmpressure_event *ev;
  
  	mutex_lock(&vmpr->events_lock);
  	list_for_each_entry(ev, &vmpr->events, node) {
  		if (ev->efd != eventfd)
  			continue;
  		list_del(&ev->node);
  		kfree(ev);
  		break;
  	}
  	mutex_unlock(&vmpr->events_lock);
  }
  
  /**
   * vmpressure_init() - Initialize vmpressure control structure
   * @vmpr:	Structure to be initialized
   *
   * This function should be called on every allocated vmpressure structure
   * before any usage.
   */
  void vmpressure_init(struct vmpressure *vmpr)
  {
22f2020f8   Michal Hocko   vmpressure: chang...
442
  	spin_lock_init(&vmpr->sr_lock);
70ddf637e   Anton Vorontsov   memcg: add memory...
443
444
445
446
  	mutex_init(&vmpr->events_lock);
  	INIT_LIST_HEAD(&vmpr->events);
  	INIT_WORK(&vmpr->work, vmpressure_work_fn);
  }
33cb876e9   Michal Hocko   vmpressure: make ...
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
  
  /**
   * vmpressure_cleanup() - shuts down vmpressure control structure
   * @vmpr:	Structure to be cleaned up
   *
   * This function should be called before the structure in which it is
   * embedded is cleaned up.
   */
  void vmpressure_cleanup(struct vmpressure *vmpr)
  {
  	/*
  	 * Make sure there is no pending work before eventfd infrastructure
  	 * goes away.
  	 */
  	flush_work(&vmpr->work);
  }