Blame view

mm/memcontrol.c 61.1 KB
8cdea7c05   Balbir Singh   Memory controller...
1
2
3
4
5
  /* memcontrol.c - Memory Controller
   *
   * Copyright IBM Corporation, 2007
   * Author Balbir Singh <balbir@linux.vnet.ibm.com>
   *
78fb74669   Pavel Emelianov   Memory controller...
6
7
8
   * Copyright 2007 OpenVZ SWsoft Inc
   * Author: Pavel Emelianov <xemul@openvz.org>
   *
8cdea7c05   Balbir Singh   Memory controller...
9
10
11
12
13
14
15
16
17
18
19
20
21
22
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
   * the Free Software Foundation; either version 2 of the License, or
   * (at your option) any later version.
   *
   * This program is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
   */
  
  #include <linux/res_counter.h>
  #include <linux/memcontrol.h>
  #include <linux/cgroup.h>
78fb74669   Pavel Emelianov   Memory controller...
23
  #include <linux/mm.h>
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
24
  #include <linux/pagemap.h>
d52aa412d   KAMEZAWA Hiroyuki   memory cgroup enh...
25
  #include <linux/smp.h>
8a9f3ccd2   Balbir Singh   Memory controller...
26
  #include <linux/page-flags.h>
66e1707bc   Balbir Singh   Memory controller...
27
  #include <linux/backing-dev.h>
8a9f3ccd2   Balbir Singh   Memory controller...
28
29
  #include <linux/bit_spinlock.h>
  #include <linux/rcupdate.h>
e222432bf   Balbir Singh   memcg: show memcg...
30
  #include <linux/limits.h>
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
31
  #include <linux/mutex.h>
b6ac57d50   Balbir Singh   memcgroup: move m...
32
  #include <linux/slab.h>
66e1707bc   Balbir Singh   Memory controller...
33
34
35
  #include <linux/swap.h>
  #include <linux/spinlock.h>
  #include <linux/fs.h>
d2ceb9b7d   KAMEZAWA Hiroyuki   memory cgroup enh...
36
  #include <linux/seq_file.h>
333279487   KAMEZAWA Hiroyuki   memcgroup: use vm...
37
  #include <linux/vmalloc.h>
b69408e88   Christoph Lameter   vmscan: Use an in...
38
  #include <linux/mm_inline.h>
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
39
  #include <linux/page_cgroup.h>
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
40
  #include "internal.h"
8cdea7c05   Balbir Singh   Memory controller...
41

8697d3319   Balbir Singh   Memory controller...
42
  #include <asm/uaccess.h>
a181b0e88   KAMEZAWA Hiroyuki   memcg: make globa...
43
  struct cgroup_subsys mem_cgroup_subsys __read_mostly;
a181b0e88   KAMEZAWA Hiroyuki   memcg: make globa...
44
  #define MEM_CGROUP_RECLAIM_RETRIES	5
8cdea7c05   Balbir Singh   Memory controller...
45

c077719be   KAMEZAWA Hiroyuki   memcg: mem+swap c...
46
47
48
49
50
51
52
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
  /* Turned on only when memory cgroup is enabled && really_do_swap_account = 0 */
  int do_swap_account __read_mostly;
  static int really_do_swap_account __initdata = 1; /* for remember boot option*/
  #else
  #define do_swap_account		(0)
  #endif
7f4d454de   Daisuke Nishimura   memcg: avoid dead...
53
  static DEFINE_MUTEX(memcg_tasklist);	/* can be hold under cgroup_mutex */
c077719be   KAMEZAWA Hiroyuki   memcg: mem+swap c...
54

8cdea7c05   Balbir Singh   Memory controller...
55
  /*
d52aa412d   KAMEZAWA Hiroyuki   memory cgroup enh...
56
57
58
59
60
61
62
63
   * Statistics for memory cgroup.
   */
  enum mem_cgroup_stat_index {
  	/*
  	 * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
  	 */
  	MEM_CGROUP_STAT_CACHE, 	   /* # of pages charged as cache */
  	MEM_CGROUP_STAT_RSS,	   /* # of pages charged as rss */
55e462b05   Balaji Rao   memcg: simple sta...
64
65
  	MEM_CGROUP_STAT_PGPGIN_COUNT,	/* # of pages paged in */
  	MEM_CGROUP_STAT_PGPGOUT_COUNT,	/* # of pages paged out */
d52aa412d   KAMEZAWA Hiroyuki   memory cgroup enh...
66
67
68
69
70
71
72
73
74
  
  	MEM_CGROUP_STAT_NSTATS,
  };
  
  struct mem_cgroup_stat_cpu {
  	s64 count[MEM_CGROUP_STAT_NSTATS];
  } ____cacheline_aligned_in_smp;
  
  struct mem_cgroup_stat {
c8dad2bb6   Jan Blunck   memcg: reduce siz...
75
  	struct mem_cgroup_stat_cpu cpustat[0];
d52aa412d   KAMEZAWA Hiroyuki   memory cgroup enh...
76
77
78
79
80
  };
  
  /*
   * For accounting under irq disable, no need for increment preempt count.
   */
addb9efeb   KAMEZAWA Hiroyuki   memcg: optimize p...
81
  static inline void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat_cpu *stat,
d52aa412d   KAMEZAWA Hiroyuki   memory cgroup enh...
82
83
  		enum mem_cgroup_stat_index idx, int val)
  {
addb9efeb   KAMEZAWA Hiroyuki   memcg: optimize p...
84
  	stat->count[idx] += val;
d52aa412d   KAMEZAWA Hiroyuki   memory cgroup enh...
85
86
87
88
89
90
91
92
93
94
95
  }
  
  static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
  		enum mem_cgroup_stat_index idx)
  {
  	int cpu;
  	s64 ret = 0;
  	for_each_possible_cpu(cpu)
  		ret += stat->cpustat[cpu].count[idx];
  	return ret;
  }
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
96
97
98
99
100
101
102
103
  static s64 mem_cgroup_local_usage(struct mem_cgroup_stat *stat)
  {
  	s64 ret;
  
  	ret = mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_CACHE);
  	ret += mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_RSS);
  	return ret;
  }
d52aa412d   KAMEZAWA Hiroyuki   memory cgroup enh...
104
  /*
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
105
106
   * per-zone information in memory controller.
   */
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
107
  struct mem_cgroup_per_zone {
072c56c13   KAMEZAWA Hiroyuki   per-zone and recl...
108
109
110
  	/*
  	 * spin_lock to protect the per cgroup LRU
  	 */
b69408e88   Christoph Lameter   vmscan: Use an in...
111
112
  	struct list_head	lists[NR_LRU_LISTS];
  	unsigned long		count[NR_LRU_LISTS];
3e2f41f1f   KOSAKI Motohiro   memcg: add zone_r...
113
114
  
  	struct zone_reclaim_stat reclaim_stat;
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
115
116
117
118
119
120
121
122
123
124
125
126
127
  };
  /* Macro for accessing counter */
  #define MEM_CGROUP_ZSTAT(mz, idx)	((mz)->count[(idx)])
  
  struct mem_cgroup_per_node {
  	struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
  };
  
  struct mem_cgroup_lru_info {
  	struct mem_cgroup_per_node *nodeinfo[MAX_NUMNODES];
  };
  
  /*
8cdea7c05   Balbir Singh   Memory controller...
128
129
130
131
132
133
   * The memory controller data structure. The memory controller controls both
   * page cache and RSS per cgroup. We would eventually like to provide
   * statistics based on the statistics developed by Rik Van Riel for clock-pro,
   * to help the administrator determine what knobs to tune.
   *
   * TODO: Add a water mark for the memory controller. Reclaim will begin when
8a9f3ccd2   Balbir Singh   Memory controller...
134
135
136
   * we hit the water mark. May be even add a low water mark, such that
   * no reclaim occurs from a cgroup at it's low water mark, this is
   * a feature that will be implemented much later in the future.
8cdea7c05   Balbir Singh   Memory controller...
137
138
139
140
141
142
143
   */
  struct mem_cgroup {
  	struct cgroup_subsys_state css;
  	/*
  	 * the counter to account for memory usage
  	 */
  	struct res_counter res;
78fb74669   Pavel Emelianov   Memory controller...
144
  	/*
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
145
146
147
148
  	 * the counter to account for mem+swap usage.
  	 */
  	struct res_counter memsw;
  	/*
78fb74669   Pavel Emelianov   Memory controller...
149
150
  	 * Per cgroup active and inactive list, similar to the
  	 * per zone LRU lists.
78fb74669   Pavel Emelianov   Memory controller...
151
  	 */
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
152
  	struct mem_cgroup_lru_info info;
072c56c13   KAMEZAWA Hiroyuki   per-zone and recl...
153

2733c06ac   KOSAKI Motohiro   memcg: protect pr...
154
155
156
157
  	/*
  	  protect against reclaim related member.
  	*/
  	spinlock_t reclaim_param_lock;
6c48a1d04   KAMEZAWA Hiroyuki   per-zone and recl...
158
  	int	prev_priority;	/* for recording reclaim priority */
6d61ef409   Balbir Singh   memcg: memory cgr...
159
160
161
  
  	/*
  	 * While reclaiming in a hiearchy, we cache the last child we
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
162
  	 * reclaimed from.
6d61ef409   Balbir Singh   memcg: memory cgr...
163
  	 */
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
164
  	int last_scanned_child;
18f59ea7d   Balbir Singh   memcg: memory cgr...
165
166
167
168
  	/*
  	 * Should the accounting and control be hierarchical, per subtree?
  	 */
  	bool use_hierarchy;
a636b327f   KAMEZAWA Hiroyuki   memcg: avoid unne...
169
  	unsigned long	last_oom_jiffies;
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
170
  	atomic_t	refcnt;
14797e236   KOSAKI Motohiro   memcg: add inacti...
171

a7885eb8a   KOSAKI Motohiro   memcg: swappiness
172
  	unsigned int	swappiness;
d52aa412d   KAMEZAWA Hiroyuki   memory cgroup enh...
173
  	/*
c8dad2bb6   Jan Blunck   memcg: reduce siz...
174
  	 * statistics. This must be placed at the end of memcg.
d52aa412d   KAMEZAWA Hiroyuki   memory cgroup enh...
175
176
  	 */
  	struct mem_cgroup_stat stat;
8cdea7c05   Balbir Singh   Memory controller...
177
  };
217bc3194   KAMEZAWA Hiroyuki   memory cgroup enh...
178
179
180
  enum charge_type {
  	MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
  	MEM_CGROUP_CHARGE_TYPE_MAPPED,
4f98a2fee   Rik van Riel   vmscan: split LRU...
181
  	MEM_CGROUP_CHARGE_TYPE_SHMEM,	/* used by page migration of shmem */
c05555b57   KAMEZAWA Hiroyuki   memcg: atomic ops...
182
  	MEM_CGROUP_CHARGE_TYPE_FORCE,	/* used by force_empty */
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
183
  	MEM_CGROUP_CHARGE_TYPE_SWAPOUT,	/* for accounting swapcache */
c05555b57   KAMEZAWA Hiroyuki   memcg: atomic ops...
184
185
  	NR_CHARGE_TYPE,
  };
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
186
187
188
  /* only for here (for easy reading.) */
  #define PCGF_CACHE	(1UL << PCG_CACHE)
  #define PCGF_USED	(1UL << PCG_USED)
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
189
  #define PCGF_LOCK	(1UL << PCG_LOCK)
c05555b57   KAMEZAWA Hiroyuki   memcg: atomic ops...
190
191
  static const unsigned long
  pcg_default_flags[NR_CHARGE_TYPE] = {
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
192
193
194
  	PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */
  	PCGF_USED | PCGF_LOCK, /* Anon */
  	PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
195
  	0, /* FORCE */
217bc3194   KAMEZAWA Hiroyuki   memory cgroup enh...
196
  };
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
197
198
199
200
201
202
203
204
205
  /* for encoding cft->private value on file */
  #define _MEM			(0)
  #define _MEMSWAP		(1)
  #define MEMFILE_PRIVATE(x, val)	(((x) << 16) | (val))
  #define MEMFILE_TYPE(val)	(((val) >> 16) & 0xffff)
  #define MEMFILE_ATTR(val)	((val) & 0xffff)
  
  static void mem_cgroup_get(struct mem_cgroup *mem);
  static void mem_cgroup_put(struct mem_cgroup *mem);
7bcc1bb12   Daisuke Nishimura   memcg: get/put pa...
206
  static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
207

c05555b57   KAMEZAWA Hiroyuki   memcg: atomic ops...
208
209
210
  static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
  					 struct page_cgroup *pc,
  					 bool charge)
d52aa412d   KAMEZAWA Hiroyuki   memory cgroup enh...
211
212
213
  {
  	int val = (charge)? 1 : -1;
  	struct mem_cgroup_stat *stat = &mem->stat;
addb9efeb   KAMEZAWA Hiroyuki   memcg: optimize p...
214
  	struct mem_cgroup_stat_cpu *cpustat;
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
215
  	int cpu = get_cpu();
d52aa412d   KAMEZAWA Hiroyuki   memory cgroup enh...
216

08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
217
  	cpustat = &stat->cpustat[cpu];
c05555b57   KAMEZAWA Hiroyuki   memcg: atomic ops...
218
  	if (PageCgroupCache(pc))
addb9efeb   KAMEZAWA Hiroyuki   memcg: optimize p...
219
  		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val);
d52aa412d   KAMEZAWA Hiroyuki   memory cgroup enh...
220
  	else
addb9efeb   KAMEZAWA Hiroyuki   memcg: optimize p...
221
  		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_RSS, val);
55e462b05   Balaji Rao   memcg: simple sta...
222
223
  
  	if (charge)
addb9efeb   KAMEZAWA Hiroyuki   memcg: optimize p...
224
  		__mem_cgroup_stat_add_safe(cpustat,
55e462b05   Balaji Rao   memcg: simple sta...
225
226
  				MEM_CGROUP_STAT_PGPGIN_COUNT, 1);
  	else
addb9efeb   KAMEZAWA Hiroyuki   memcg: optimize p...
227
  		__mem_cgroup_stat_add_safe(cpustat,
55e462b05   Balaji Rao   memcg: simple sta...
228
  				MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
229
  	put_cpu();
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
230
  }
d5b69e38f   Hugh Dickins   memcg: memcontrol...
231
  static struct mem_cgroup_per_zone *
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
232
233
  mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
  {
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
234
235
  	return &mem->info.nodeinfo[nid]->zoneinfo[zid];
  }
d5b69e38f   Hugh Dickins   memcg: memcontrol...
236
  static struct mem_cgroup_per_zone *
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
237
238
239
240
241
  page_cgroup_zoneinfo(struct page_cgroup *pc)
  {
  	struct mem_cgroup *mem = pc->mem_cgroup;
  	int nid = page_cgroup_nid(pc);
  	int zid = page_cgroup_zid(pc);
d52aa412d   KAMEZAWA Hiroyuki   memory cgroup enh...
242

549927620   KOSAKI Motohiro   memcg: add null c...
243
244
  	if (!mem)
  		return NULL;
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
245
246
  	return mem_cgroup_zoneinfo(mem, nid, zid);
  }
14067bb3e   KAMEZAWA Hiroyuki   memcg: hierarchic...
247
  static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,
b69408e88   Christoph Lameter   vmscan: Use an in...
248
  					enum lru_list idx)
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
249
250
251
252
253
254
255
256
257
258
259
  {
  	int nid, zid;
  	struct mem_cgroup_per_zone *mz;
  	u64 total = 0;
  
  	for_each_online_node(nid)
  		for (zid = 0; zid < MAX_NR_ZONES; zid++) {
  			mz = mem_cgroup_zoneinfo(mem, nid, zid);
  			total += MEM_CGROUP_ZSTAT(mz, idx);
  		}
  	return total;
d52aa412d   KAMEZAWA Hiroyuki   memory cgroup enh...
260
  }
d5b69e38f   Hugh Dickins   memcg: memcontrol...
261
  static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
8cdea7c05   Balbir Singh   Memory controller...
262
263
264
265
266
  {
  	return container_of(cgroup_subsys_state(cont,
  				mem_cgroup_subsys_id), struct mem_cgroup,
  				css);
  }
cf475ad28   Balbir Singh   cgroups: add an o...
267
  struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
78fb74669   Pavel Emelianov   Memory controller...
268
  {
31a78f23b   Balbir Singh   mm owner: fix rac...
269
270
271
272
273
274
275
  	/*
  	 * mm_update_next_owner() may clear mm->owner to NULL
  	 * if it races with swapoff, page migration, etc.
  	 * So this can be called with p == NULL.
  	 */
  	if (unlikely(!p))
  		return NULL;
78fb74669   Pavel Emelianov   Memory controller...
276
277
278
  	return container_of(task_subsys_state(p, mem_cgroup_subsys_id),
  				struct mem_cgroup, css);
  }
54595fe26   KAMEZAWA Hiroyuki   memcg: use css_tr...
279
280
281
  static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
  {
  	struct mem_cgroup *mem = NULL;
0b7f569e4   KAMEZAWA Hiroyuki   memcg: fix OOM ki...
282
283
284
  
  	if (!mm)
  		return NULL;
54595fe26   KAMEZAWA Hiroyuki   memcg: use css_tr...
285
286
287
288
289
290
291
292
293
294
295
296
297
298
  	/*
  	 * Because we have no locks, mm->owner's may be being moved to other
  	 * cgroup. We use css_tryget() here even if this looks
  	 * pessimistic (rather than adding locks here).
  	 */
  	rcu_read_lock();
  	do {
  		mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
  		if (unlikely(!mem))
  			break;
  	} while (!css_tryget(&mem->css));
  	rcu_read_unlock();
  	return mem;
  }
14067bb3e   KAMEZAWA Hiroyuki   memcg: hierarchic...
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
  /*
   * Call callback function against all cgroup under hierarchy tree.
   */
  static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
  			  int (*func)(struct mem_cgroup *, void *))
  {
  	int found, ret, nextid;
  	struct cgroup_subsys_state *css;
  	struct mem_cgroup *mem;
  
  	if (!root->use_hierarchy)
  		return (*func)(root, data);
  
  	nextid = 1;
  	do {
  		ret = 0;
  		mem = NULL;
  
  		rcu_read_lock();
  		css = css_get_next(&mem_cgroup_subsys, nextid, &root->css,
  				   &found);
  		if (css && css_tryget(css))
  			mem = container_of(css, struct mem_cgroup, css);
  		rcu_read_unlock();
  
  		if (mem) {
  			ret = (*func)(mem, data);
  			css_put(&mem->css);
  		}
  		nextid = found + 1;
  	} while (!ret && css);
  
  	return ret;
  }
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
333
334
335
336
337
338
339
340
341
342
343
344
345
  /*
   * Following LRU functions are allowed to be used without PCG_LOCK.
   * Operations are called by routine of global LRU independently from memcg.
   * What we have to take care of here is validness of pc->mem_cgroup.
   *
   * Changes to pc->mem_cgroup happens when
   * 1. charge
   * 2. moving account
   * In typical case, "charge" is done before add-to-lru. Exception is SwapCache.
   * It is added to LRU before charge.
   * If PCG_USED bit is not set, page_cgroup is not added to this private LRU.
   * When moving account, the page is not on LRU. It's isolated.
   */
4f98a2fee   Rik van Riel   vmscan: split LRU...
346

08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
347
348
349
350
351
  void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
  {
  	struct page_cgroup *pc;
  	struct mem_cgroup *mem;
  	struct mem_cgroup_per_zone *mz;
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
352

f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
353
  	if (mem_cgroup_disabled())
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
354
355
356
  		return;
  	pc = lookup_page_cgroup(page);
  	/* can happen while we handle swapcache. */
544122e5e   KAMEZAWA Hiroyuki   memcg: fix LRU ac...
357
  	if (list_empty(&pc->lru) || !pc->mem_cgroup)
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
358
  		return;
544122e5e   KAMEZAWA Hiroyuki   memcg: fix LRU ac...
359
360
361
362
  	/*
  	 * We don't check PCG_USED bit. It's cleared when the "page" is finally
  	 * removed from global LRU.
  	 */
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
363
364
  	mz = page_cgroup_zoneinfo(pc);
  	mem = pc->mem_cgroup;
b69408e88   Christoph Lameter   vmscan: Use an in...
365
  	MEM_CGROUP_ZSTAT(mz, lru) -= 1;
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
366
367
  	list_del_init(&pc->lru);
  	return;
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
368
  }
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
369
  void mem_cgroup_del_lru(struct page *page)
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
370
  {
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
371
372
  	mem_cgroup_del_lru_list(page, page_lru(page));
  }
b69408e88   Christoph Lameter   vmscan: Use an in...
373

08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
374
375
376
377
  void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
  {
  	struct mem_cgroup_per_zone *mz;
  	struct page_cgroup *pc;
b69408e88   Christoph Lameter   vmscan: Use an in...
378

f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
379
  	if (mem_cgroup_disabled())
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
380
  		return;
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
381

08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
382
  	pc = lookup_page_cgroup(page);
bd112db87   Daisuke Nishimura   memcg: fix mem_cg...
383
384
385
386
  	/*
  	 * Used bit is set without atomic ops but after smp_wmb().
  	 * For making pc->mem_cgroup visible, insert smp_rmb() here.
  	 */
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
387
388
389
390
391
392
  	smp_rmb();
  	/* unused page is not rotated. */
  	if (!PageCgroupUsed(pc))
  		return;
  	mz = page_cgroup_zoneinfo(pc);
  	list_move(&pc->lru, &mz->lists[lru]);
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
393
  }
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
394
  void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
66e1707bc   Balbir Singh   Memory controller...
395
  {
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
396
397
  	struct page_cgroup *pc;
  	struct mem_cgroup_per_zone *mz;
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
398

f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
399
  	if (mem_cgroup_disabled())
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
400
401
  		return;
  	pc = lookup_page_cgroup(page);
bd112db87   Daisuke Nishimura   memcg: fix mem_cg...
402
403
404
405
  	/*
  	 * Used bit is set without atomic ops but after smp_wmb().
  	 * For making pc->mem_cgroup visible, insert smp_rmb() here.
  	 */
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
406
407
  	smp_rmb();
  	if (!PageCgroupUsed(pc))
894bc3104   Lee Schermerhorn   Unevictable LRU I...
408
  		return;
b69408e88   Christoph Lameter   vmscan: Use an in...
409

08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
410
  	mz = page_cgroup_zoneinfo(pc);
b69408e88   Christoph Lameter   vmscan: Use an in...
411
  	MEM_CGROUP_ZSTAT(mz, lru) += 1;
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
412
413
  	list_add(&pc->lru, &mz->lists[lru]);
  }
544122e5e   KAMEZAWA Hiroyuki   memcg: fix LRU ac...
414

08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
415
  /*
544122e5e   KAMEZAWA Hiroyuki   memcg: fix LRU ac...
416
417
418
419
420
   * At handling SwapCache, pc->mem_cgroup may be changed while it's linked to
   * lru because the page may.be reused after it's fully uncharged (because of
   * SwapCache behavior).To handle that, unlink page_cgroup from LRU when charge
   * it again. This function is only used to charge SwapCache. It's done under
   * lock_page and expected that zone->lru_lock is never held.
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
421
   */
544122e5e   KAMEZAWA Hiroyuki   memcg: fix LRU ac...
422
  static void mem_cgroup_lru_del_before_commit_swapcache(struct page *page)
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
423
  {
544122e5e   KAMEZAWA Hiroyuki   memcg: fix LRU ac...
424
425
426
427
428
429
430
431
432
433
434
435
  	unsigned long flags;
  	struct zone *zone = page_zone(page);
  	struct page_cgroup *pc = lookup_page_cgroup(page);
  
  	spin_lock_irqsave(&zone->lru_lock, flags);
  	/*
  	 * Forget old LRU when this page_cgroup is *not* used. This Used bit
  	 * is guarded by lock_page() because the page is SwapCache.
  	 */
  	if (!PageCgroupUsed(pc))
  		mem_cgroup_del_lru_list(page, page_lru(page));
  	spin_unlock_irqrestore(&zone->lru_lock, flags);
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
436
  }
544122e5e   KAMEZAWA Hiroyuki   memcg: fix LRU ac...
437
438
439
440
441
442
443
444
445
446
447
448
  static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page)
  {
  	unsigned long flags;
  	struct zone *zone = page_zone(page);
  	struct page_cgroup *pc = lookup_page_cgroup(page);
  
  	spin_lock_irqsave(&zone->lru_lock, flags);
  	/* link when the page is linked to LRU but page_cgroup isn't */
  	if (PageLRU(page) && list_empty(&pc->lru))
  		mem_cgroup_add_lru_list(page, page_lru(page));
  	spin_unlock_irqrestore(&zone->lru_lock, flags);
  }
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
449
450
451
  void mem_cgroup_move_lists(struct page *page,
  			   enum lru_list from, enum lru_list to)
  {
f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
452
  	if (mem_cgroup_disabled())
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
453
454
455
  		return;
  	mem_cgroup_del_lru_list(page, from);
  	mem_cgroup_add_lru_list(page, to);
66e1707bc   Balbir Singh   Memory controller...
456
  }
4c4a22148   David Rientjes   memcontrol: move ...
457
458
459
  int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
  {
  	int ret;
0b7f569e4   KAMEZAWA Hiroyuki   memcg: fix OOM ki...
460
  	struct mem_cgroup *curr = NULL;
4c4a22148   David Rientjes   memcontrol: move ...
461
462
  
  	task_lock(task);
0b7f569e4   KAMEZAWA Hiroyuki   memcg: fix OOM ki...
463
464
465
  	rcu_read_lock();
  	curr = try_get_mem_cgroup_from_mm(task->mm);
  	rcu_read_unlock();
4c4a22148   David Rientjes   memcontrol: move ...
466
  	task_unlock(task);
0b7f569e4   KAMEZAWA Hiroyuki   memcg: fix OOM ki...
467
468
469
470
471
472
473
  	if (!curr)
  		return 0;
  	if (curr->use_hierarchy)
  		ret = css_is_ancestor(&curr->css, &mem->css);
  	else
  		ret = (curr == mem);
  	css_put(&curr->css);
4c4a22148   David Rientjes   memcontrol: move ...
474
475
  	return ret;
  }
66e1707bc   Balbir Singh   Memory controller...
476
  /*
6c48a1d04   KAMEZAWA Hiroyuki   per-zone and recl...
477
478
479
480
   * prev_priority control...this will be used in memory reclaim path.
   */
  int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
  {
2733c06ac   KOSAKI Motohiro   memcg: protect pr...
481
482
483
484
485
486
487
  	int prev_priority;
  
  	spin_lock(&mem->reclaim_param_lock);
  	prev_priority = mem->prev_priority;
  	spin_unlock(&mem->reclaim_param_lock);
  
  	return prev_priority;
6c48a1d04   KAMEZAWA Hiroyuki   per-zone and recl...
488
489
490
491
  }
  
  void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority)
  {
2733c06ac   KOSAKI Motohiro   memcg: protect pr...
492
  	spin_lock(&mem->reclaim_param_lock);
6c48a1d04   KAMEZAWA Hiroyuki   per-zone and recl...
493
494
  	if (priority < mem->prev_priority)
  		mem->prev_priority = priority;
2733c06ac   KOSAKI Motohiro   memcg: protect pr...
495
  	spin_unlock(&mem->reclaim_param_lock);
6c48a1d04   KAMEZAWA Hiroyuki   per-zone and recl...
496
497
498
499
  }
  
  void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority)
  {
2733c06ac   KOSAKI Motohiro   memcg: protect pr...
500
  	spin_lock(&mem->reclaim_param_lock);
6c48a1d04   KAMEZAWA Hiroyuki   per-zone and recl...
501
  	mem->prev_priority = priority;
2733c06ac   KOSAKI Motohiro   memcg: protect pr...
502
  	spin_unlock(&mem->reclaim_param_lock);
6c48a1d04   KAMEZAWA Hiroyuki   per-zone and recl...
503
  }
c772be939   KOSAKI Motohiro   memcg: fix calcul...
504
  static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages)
14797e236   KOSAKI Motohiro   memcg: add inacti...
505
506
507
  {
  	unsigned long active;
  	unsigned long inactive;
c772be939   KOSAKI Motohiro   memcg: fix calcul...
508
509
  	unsigned long gb;
  	unsigned long inactive_ratio;
14797e236   KOSAKI Motohiro   memcg: add inacti...
510

14067bb3e   KAMEZAWA Hiroyuki   memcg: hierarchic...
511
512
  	inactive = mem_cgroup_get_local_zonestat(memcg, LRU_INACTIVE_ANON);
  	active = mem_cgroup_get_local_zonestat(memcg, LRU_ACTIVE_ANON);
14797e236   KOSAKI Motohiro   memcg: add inacti...
513

c772be939   KOSAKI Motohiro   memcg: fix calcul...
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
  	gb = (inactive + active) >> (30 - PAGE_SHIFT);
  	if (gb)
  		inactive_ratio = int_sqrt(10 * gb);
  	else
  		inactive_ratio = 1;
  
  	if (present_pages) {
  		present_pages[0] = inactive;
  		present_pages[1] = active;
  	}
  
  	return inactive_ratio;
  }
  
  int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
  {
  	unsigned long active;
  	unsigned long inactive;
  	unsigned long present_pages[2];
  	unsigned long inactive_ratio;
  
  	inactive_ratio = calc_inactive_ratio(memcg, present_pages);
  
  	inactive = present_pages[0];
  	active = present_pages[1];
  
  	if (inactive * inactive_ratio < active)
14797e236   KOSAKI Motohiro   memcg: add inacti...
541
542
543
544
  		return 1;
  
  	return 0;
  }
a3d8e0549   KOSAKI Motohiro   memcg: add mem_cg...
545
546
547
548
549
550
551
552
553
554
  unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
  				       struct zone *zone,
  				       enum lru_list lru)
  {
  	int nid = zone->zone_pgdat->node_id;
  	int zid = zone_idx(zone);
  	struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid);
  
  	return MEM_CGROUP_ZSTAT(mz, lru);
  }
3e2f41f1f   KOSAKI Motohiro   memcg: add zone_r...
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
  struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
  						      struct zone *zone)
  {
  	int nid = zone->zone_pgdat->node_id;
  	int zid = zone_idx(zone);
  	struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid);
  
  	return &mz->reclaim_stat;
  }
  
  struct zone_reclaim_stat *
  mem_cgroup_get_reclaim_stat_from_page(struct page *page)
  {
  	struct page_cgroup *pc;
  	struct mem_cgroup_per_zone *mz;
  
  	if (mem_cgroup_disabled())
  		return NULL;
  
  	pc = lookup_page_cgroup(page);
bd112db87   Daisuke Nishimura   memcg: fix mem_cg...
575
576
577
578
579
580
581
  	/*
  	 * Used bit is set without atomic ops but after smp_wmb().
  	 * For making pc->mem_cgroup visible, insert smp_rmb() here.
  	 */
  	smp_rmb();
  	if (!PageCgroupUsed(pc))
  		return NULL;
3e2f41f1f   KOSAKI Motohiro   memcg: add zone_r...
582
583
584
585
586
587
  	mz = page_cgroup_zoneinfo(pc);
  	if (!mz)
  		return NULL;
  
  	return &mz->reclaim_stat;
  }
66e1707bc   Balbir Singh   Memory controller...
588
589
590
591
592
  unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
  					struct list_head *dst,
  					unsigned long *scanned, int order,
  					int mode, struct zone *z,
  					struct mem_cgroup *mem_cont,
4f98a2fee   Rik van Riel   vmscan: split LRU...
593
  					int active, int file)
66e1707bc   Balbir Singh   Memory controller...
594
595
596
597
598
599
  {
  	unsigned long nr_taken = 0;
  	struct page *page;
  	unsigned long scan;
  	LIST_HEAD(pc_list);
  	struct list_head *src;
ff7283fa3   KAMEZAWA Hiroyuki   bugfix for memory...
600
  	struct page_cgroup *pc, *tmp;
1ecaab2bd   KAMEZAWA Hiroyuki   per-zone and recl...
601
602
603
  	int nid = z->zone_pgdat->node_id;
  	int zid = zone_idx(z);
  	struct mem_cgroup_per_zone *mz;
4f98a2fee   Rik van Riel   vmscan: split LRU...
604
  	int lru = LRU_FILE * !!file + !!active;
66e1707bc   Balbir Singh   Memory controller...
605

cf475ad28   Balbir Singh   cgroups: add an o...
606
  	BUG_ON(!mem_cont);
1ecaab2bd   KAMEZAWA Hiroyuki   per-zone and recl...
607
  	mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
b69408e88   Christoph Lameter   vmscan: Use an in...
608
  	src = &mz->lists[lru];
66e1707bc   Balbir Singh   Memory controller...
609

ff7283fa3   KAMEZAWA Hiroyuki   bugfix for memory...
610
611
  	scan = 0;
  	list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
436c6541b   Hugh Dickins   memcgroup: fix zo...
612
  		if (scan >= nr_to_scan)
ff7283fa3   KAMEZAWA Hiroyuki   bugfix for memory...
613
  			break;
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
614
615
  
  		page = pc->page;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
616
617
  		if (unlikely(!PageCgroupUsed(pc)))
  			continue;
436c6541b   Hugh Dickins   memcgroup: fix zo...
618
  		if (unlikely(!PageLRU(page)))
ff7283fa3   KAMEZAWA Hiroyuki   bugfix for memory...
619
  			continue;
ff7283fa3   KAMEZAWA Hiroyuki   bugfix for memory...
620

436c6541b   Hugh Dickins   memcgroup: fix zo...
621
  		scan++;
4f98a2fee   Rik van Riel   vmscan: split LRU...
622
  		if (__isolate_lru_page(page, mode, file) == 0) {
66e1707bc   Balbir Singh   Memory controller...
623
624
625
626
  			list_move(&page->lru, dst);
  			nr_taken++;
  		}
  	}
66e1707bc   Balbir Singh   Memory controller...
627
628
629
  	*scanned = scan;
  	return nr_taken;
  }
6d61ef409   Balbir Singh   memcg: memory cgr...
630
631
  #define mem_cgroup_from_res_counter(counter, member)	\
  	container_of(counter, struct mem_cgroup, member)
b85a96c0b   Daisuke Nishimura   memcg: memory swa...
632
633
634
635
636
637
638
639
640
641
642
  static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
  {
  	if (do_swap_account) {
  		if (res_counter_check_under_limit(&mem->res) &&
  			res_counter_check_under_limit(&mem->memsw))
  			return true;
  	} else
  		if (res_counter_check_under_limit(&mem->res))
  			return true;
  	return false;
  }
a7885eb8a   KOSAKI Motohiro   memcg: swappiness
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
  static unsigned int get_swappiness(struct mem_cgroup *memcg)
  {
  	struct cgroup *cgrp = memcg->css.cgroup;
  	unsigned int swappiness;
  
  	/* root ? */
  	if (cgrp->parent == NULL)
  		return vm_swappiness;
  
  	spin_lock(&memcg->reclaim_param_lock);
  	swappiness = memcg->swappiness;
  	spin_unlock(&memcg->reclaim_param_lock);
  
  	return swappiness;
  }
81d39c20f   KAMEZAWA Hiroyuki   memcg: fix shrink...
658
659
660
661
662
663
  static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data)
  {
  	int *val = data;
  	(*val)++;
  	return 0;
  }
e222432bf   Balbir Singh   memcg: show memcg...
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
  
  /**
   * mem_cgroup_print_mem_info: Called from OOM with tasklist_lock held in read mode.
   * @memcg: The memory cgroup that went over limit
   * @p: Task that is going to be killed
   *
   * NOTE: @memcg and @p's mem_cgroup can be different when hierarchy is
   * enabled
   */
  void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
  {
  	struct cgroup *task_cgrp;
  	struct cgroup *mem_cgrp;
  	/*
  	 * Need a buffer in BSS, can't rely on allocations. The code relies
  	 * on the assumption that OOM is serialized for memory controller.
  	 * If this assumption is broken, revisit this code.
  	 */
  	static char memcg_name[PATH_MAX];
  	int ret;
  
  	if (!memcg)
  		return;
  
  
  	rcu_read_lock();
  
  	mem_cgrp = memcg->css.cgroup;
  	task_cgrp = task_cgroup(p, mem_cgroup_subsys_id);
  
  	ret = cgroup_path(task_cgrp, memcg_name, PATH_MAX);
  	if (ret < 0) {
  		/*
  		 * Unfortunately, we are unable to convert to a useful name
  		 * But we'll still print out the usage information
  		 */
  		rcu_read_unlock();
  		goto done;
  	}
  	rcu_read_unlock();
  
  	printk(KERN_INFO "Task in %s killed", memcg_name);
  
  	rcu_read_lock();
  	ret = cgroup_path(mem_cgrp, memcg_name, PATH_MAX);
  	if (ret < 0) {
  		rcu_read_unlock();
  		goto done;
  	}
  	rcu_read_unlock();
  
  	/*
  	 * Continues from above, so we don't need an KERN_ level
  	 */
  	printk(KERN_CONT " as a result of limit of %s
  ", memcg_name);
  done:
  
  	printk(KERN_INFO "memory: usage %llukB, limit %llukB, failcnt %llu
  ",
  		res_counter_read_u64(&memcg->res, RES_USAGE) >> 10,
  		res_counter_read_u64(&memcg->res, RES_LIMIT) >> 10,
  		res_counter_read_u64(&memcg->res, RES_FAILCNT));
  	printk(KERN_INFO "memory+swap: usage %llukB, limit %llukB, "
  		"failcnt %llu
  ",
  		res_counter_read_u64(&memcg->memsw, RES_USAGE) >> 10,
  		res_counter_read_u64(&memcg->memsw, RES_LIMIT) >> 10,
  		res_counter_read_u64(&memcg->memsw, RES_FAILCNT));
  }
81d39c20f   KAMEZAWA Hiroyuki   memcg: fix shrink...
734
735
736
737
738
739
740
741
742
743
  /*
   * This function returns the number of memcg under hierarchy tree. Returns
   * 1(self count) if no children.
   */
  static int mem_cgroup_count_children(struct mem_cgroup *mem)
  {
  	int num = 0;
   	mem_cgroup_walk_tree(mem, &num, mem_cgroup_count_children_cb);
  	return num;
  }
6d61ef409   Balbir Singh   memcg: memory cgr...
744
  /*
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
   * Visit the first child (need not be the first child as per the ordering
   * of the cgroup list, since we track last_scanned_child) of @mem and use
   * that to reclaim free pages from.
   */
  static struct mem_cgroup *
  mem_cgroup_select_victim(struct mem_cgroup *root_mem)
  {
  	struct mem_cgroup *ret = NULL;
  	struct cgroup_subsys_state *css;
  	int nextid, found;
  
  	if (!root_mem->use_hierarchy) {
  		css_get(&root_mem->css);
  		ret = root_mem;
  	}
  
  	while (!ret) {
  		rcu_read_lock();
  		nextid = root_mem->last_scanned_child + 1;
  		css = css_get_next(&mem_cgroup_subsys, nextid, &root_mem->css,
  				   &found);
  		if (css && css_tryget(css))
  			ret = container_of(css, struct mem_cgroup, css);
  
  		rcu_read_unlock();
  		/* Updates scanning parameter */
  		spin_lock(&root_mem->reclaim_param_lock);
  		if (!css) {
  			/* this means start scan from ID:1 */
  			root_mem->last_scanned_child = 0;
  		} else
  			root_mem->last_scanned_child = found;
  		spin_unlock(&root_mem->reclaim_param_lock);
  	}
  
  	return ret;
  }
  
  /*
   * Scan the hierarchy if needed to reclaim memory. We remember the last child
   * we reclaimed from, so that we don't end up penalizing one child extensively
   * based on its position in the children list.
6d61ef409   Balbir Singh   memcg: memory cgr...
787
788
   *
   * root_mem is the original ancestor that we've been reclaim from.
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
789
790
791
   *
   * We give up and return to the caller when we visit root_mem twice.
   * (other groups can be removed while we're walking....)
81d39c20f   KAMEZAWA Hiroyuki   memcg: fix shrink...
792
793
   *
   * If shrink==true, for avoiding to free too much, this returns immedieately.
6d61ef409   Balbir Singh   memcg: memory cgr...
794
795
   */
  static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
81d39c20f   KAMEZAWA Hiroyuki   memcg: fix shrink...
796
  				   gfp_t gfp_mask, bool noswap, bool shrink)
6d61ef409   Balbir Singh   memcg: memory cgr...
797
  {
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
798
799
800
801
802
803
804
805
806
807
808
  	struct mem_cgroup *victim;
  	int ret, total = 0;
  	int loop = 0;
  
  	while (loop < 2) {
  		victim = mem_cgroup_select_victim(root_mem);
  		if (victim == root_mem)
  			loop++;
  		if (!mem_cgroup_local_usage(&victim->stat)) {
  			/* this cgroup's local usage == 0 */
  			css_put(&victim->css);
6d61ef409   Balbir Singh   memcg: memory cgr...
809
810
  			continue;
  		}
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
811
812
813
814
  		/* we use swappiness of local cgroup */
  		ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap,
  						   get_swappiness(victim));
  		css_put(&victim->css);
81d39c20f   KAMEZAWA Hiroyuki   memcg: fix shrink...
815
816
817
818
819
820
821
  		/*
  		 * At shrinking usage, we can't check we should stop here or
  		 * reclaim more. It's depends on callers. last_scanned_child
  		 * will work enough for keeping fairness under tree.
  		 */
  		if (shrink)
  			return ret;
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
822
  		total += ret;
b85a96c0b   Daisuke Nishimura   memcg: memory swa...
823
  		if (mem_cgroup_check_under_limit(root_mem))
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
824
  			return 1 + total;
6d61ef409   Balbir Singh   memcg: memory cgr...
825
  	}
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
826
  	return total;
6d61ef409   Balbir Singh   memcg: memory cgr...
827
  }
a636b327f   KAMEZAWA Hiroyuki   memcg: avoid unne...
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
  bool mem_cgroup_oom_called(struct task_struct *task)
  {
  	bool ret = false;
  	struct mem_cgroup *mem;
  	struct mm_struct *mm;
  
  	rcu_read_lock();
  	mm = task->mm;
  	if (!mm)
  		mm = &init_mm;
  	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
  	if (mem && time_before(jiffies, mem->last_oom_jiffies + HZ/10))
  		ret = true;
  	rcu_read_unlock();
  	return ret;
  }
0b7f569e4   KAMEZAWA Hiroyuki   memcg: fix OOM ki...
844
845
846
847
848
849
850
851
852
853
854
  
  static int record_last_oom_cb(struct mem_cgroup *mem, void *data)
  {
  	mem->last_oom_jiffies = jiffies;
  	return 0;
  }
  
  static void record_last_oom(struct mem_cgroup *mem)
  {
  	mem_cgroup_walk_tree(mem, NULL, record_last_oom_cb);
  }
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
855
856
857
  /*
   * Unlike exported interface, "oom" parameter is added. if oom==true,
   * oom-killer can be invoked.
8a9f3ccd2   Balbir Singh   Memory controller...
858
   */
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
859
  static int __mem_cgroup_try_charge(struct mm_struct *mm,
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
860
861
  			gfp_t gfp_mask, struct mem_cgroup **memcg,
  			bool oom)
8a9f3ccd2   Balbir Singh   Memory controller...
862
  {
6d61ef409   Balbir Singh   memcg: memory cgr...
863
  	struct mem_cgroup *mem, *mem_over_limit;
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
864
  	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
28dbc4b6a   Balbir Singh   memcg: memory cgr...
865
  	struct res_counter *fail_res;
a636b327f   KAMEZAWA Hiroyuki   memcg: avoid unne...
866
867
868
869
870
871
  
  	if (unlikely(test_thread_flag(TIF_MEMDIE))) {
  		/* Don't account this! */
  		*memcg = NULL;
  		return 0;
  	}
8a9f3ccd2   Balbir Singh   Memory controller...
872
  	/*
3be91277e   Hugh Dickins   memcgroup: tidy u...
873
874
  	 * We always charge the cgroup the mm_struct belongs to.
  	 * The mm_struct's mem_cgroup changes on task migration if the
8a9f3ccd2   Balbir Singh   Memory controller...
875
876
877
  	 * thread group leader migrates. It's possible that mm is not
  	 * set, if so charge the init_mm (happens for pagecache usage).
  	 */
54595fe26   KAMEZAWA Hiroyuki   memcg: use css_tr...
878
879
880
  	mem = *memcg;
  	if (likely(!mem)) {
  		mem = try_get_mem_cgroup_from_mm(mm);
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
881
  		*memcg = mem;
e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
882
  	} else {
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
883
  		css_get(&mem->css);
e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
884
  	}
54595fe26   KAMEZAWA Hiroyuki   memcg: use css_tr...
885
886
  	if (unlikely(!mem))
  		return 0;
46f7e602f   Nikanth Karthikesan   memcg: fix build ...
887
  	VM_BUG_ON(css_is_removed(&mem->css));
8a9f3ccd2   Balbir Singh   Memory controller...
888

8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
889
890
891
  	while (1) {
  		int ret;
  		bool noswap = false;
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
892

28dbc4b6a   Balbir Singh   memcg: memory cgr...
893
  		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
894
895
896
  		if (likely(!ret)) {
  			if (!do_swap_account)
  				break;
28dbc4b6a   Balbir Singh   memcg: memory cgr...
897
898
  			ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
  							&fail_res);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
899
900
901
902
903
  			if (likely(!ret))
  				break;
  			/* mem+swap counter fails */
  			res_counter_uncharge(&mem->res, PAGE_SIZE);
  			noswap = true;
6d61ef409   Balbir Singh   memcg: memory cgr...
904
905
906
907
908
909
  			mem_over_limit = mem_cgroup_from_res_counter(fail_res,
  									memsw);
  		} else
  			/* mem counter fails */
  			mem_over_limit = mem_cgroup_from_res_counter(fail_res,
  									res);
3be91277e   Hugh Dickins   memcgroup: tidy u...
910
  		if (!(gfp_mask & __GFP_WAIT))
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
911
  			goto nomem;
e1a1cd590   Balbir Singh   Memory controller...
912

6d61ef409   Balbir Singh   memcg: memory cgr...
913
  		ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
81d39c20f   KAMEZAWA Hiroyuki   memcg: fix shrink...
914
  							noswap, false);
4d1c62738   Daisuke Nishimura   memcg: make oom l...
915
916
  		if (ret)
  			continue;
66e1707bc   Balbir Singh   Memory controller...
917
918
  
  		/*
8869b8f6e   Hugh Dickins   memcg: memcontrol...
919
920
921
922
923
  		 * try_to_free_mem_cgroup_pages() might not give us a full
  		 * picture of reclaim. Some pages are reclaimed and might be
  		 * moved to swap cache or just unmapped from the cgroup.
  		 * Check the limit again to see if the reclaim reduced the
  		 * current usage of the cgroup before giving up
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
924
  		 *
8869b8f6e   Hugh Dickins   memcg: memcontrol...
925
  		 */
b85a96c0b   Daisuke Nishimura   memcg: memory swa...
926
927
  		if (mem_cgroup_check_under_limit(mem_over_limit))
  			continue;
3be91277e   Hugh Dickins   memcgroup: tidy u...
928
929
  
  		if (!nr_retries--) {
a636b327f   KAMEZAWA Hiroyuki   memcg: avoid unne...
930
  			if (oom) {
7f4d454de   Daisuke Nishimura   memcg: avoid dead...
931
  				mutex_lock(&memcg_tasklist);
887007561   KAMEZAWA Hiroyuki   memcg: fix reclai...
932
  				mem_cgroup_out_of_memory(mem_over_limit, gfp_mask);
7f4d454de   Daisuke Nishimura   memcg: avoid dead...
933
  				mutex_unlock(&memcg_tasklist);
0b7f569e4   KAMEZAWA Hiroyuki   memcg: fix OOM ki...
934
  				record_last_oom(mem_over_limit);
a636b327f   KAMEZAWA Hiroyuki   memcg: avoid unne...
935
  			}
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
936
  			goto nomem;
66e1707bc   Balbir Singh   Memory controller...
937
  		}
8a9f3ccd2   Balbir Singh   Memory controller...
938
  	}
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
939
940
941
942
943
  	return 0;
  nomem:
  	css_put(&mem->css);
  	return -ENOMEM;
  }
8a9f3ccd2   Balbir Singh   Memory controller...
944

a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
  
  /*
   * A helper function to get mem_cgroup from ID. must be called under
   * rcu_read_lock(). The caller must check css_is_removed() or some if
   * it's concern. (dropping refcnt from swap can be called against removed
   * memcg.)
   */
  static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
  {
  	struct cgroup_subsys_state *css;
  
  	/* ID 0 is unused ID */
  	if (!id)
  		return NULL;
  	css = css_lookup(&mem_cgroup_subsys, id);
  	if (!css)
  		return NULL;
  	return container_of(css, struct mem_cgroup, css);
  }
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
964
965
966
  static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
  {
  	struct mem_cgroup *mem;
3c776e646   Daisuke Nishimura   memcg: charge swa...
967
  	struct page_cgroup *pc;
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
968
  	unsigned short id;
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
969
  	swp_entry_t ent;
3c776e646   Daisuke Nishimura   memcg: charge swa...
970
  	VM_BUG_ON(!PageLocked(page));
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
971
972
  	if (!PageSwapCache(page))
  		return NULL;
3c776e646   Daisuke Nishimura   memcg: charge swa...
973
  	pc = lookup_page_cgroup(page);
c0bd3f63c   Daisuke Nishimura   memcg: fix try_ge...
974
  	lock_page_cgroup(pc);
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
975
  	if (PageCgroupUsed(pc)) {
3c776e646   Daisuke Nishimura   memcg: charge swa...
976
  		mem = pc->mem_cgroup;
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
977
978
979
  		if (mem && !css_tryget(&mem->css))
  			mem = NULL;
  	} else {
3c776e646   Daisuke Nishimura   memcg: charge swa...
980
  		ent.val = page_private(page);
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
981
982
983
984
985
986
  		id = lookup_swap_cgroup(ent);
  		rcu_read_lock();
  		mem = mem_cgroup_lookup(id);
  		if (mem && !css_tryget(&mem->css))
  			mem = NULL;
  		rcu_read_unlock();
3c776e646   Daisuke Nishimura   memcg: charge swa...
987
  	}
c0bd3f63c   Daisuke Nishimura   memcg: fix try_ge...
988
  	unlock_page_cgroup(pc);
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
989
990
  	return mem;
  }
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
991
  /*
a5e924f5f   Daisuke Nishimura   memcg: remove mem...
992
   * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
993
994
995
996
997
998
999
   * USED state. If already USED, uncharge and return.
   */
  
  static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
  				     struct page_cgroup *pc,
  				     enum charge_type ctype)
  {
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
1000
1001
1002
  	/* try_charge() can return NULL to *memcg, taking care of it. */
  	if (!mem)
  		return;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1003
1004
1005
1006
1007
  
  	lock_page_cgroup(pc);
  	if (unlikely(PageCgroupUsed(pc))) {
  		unlock_page_cgroup(pc);
  		res_counter_uncharge(&mem->res, PAGE_SIZE);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1008
1009
  		if (do_swap_account)
  			res_counter_uncharge(&mem->memsw, PAGE_SIZE);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1010
  		css_put(&mem->css);
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
1011
  		return;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1012
  	}
8a9f3ccd2   Balbir Singh   Memory controller...
1013
  	pc->mem_cgroup = mem;
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1014
  	smp_wmb();
c05555b57   KAMEZAWA Hiroyuki   memcg: atomic ops...
1015
  	pc->flags = pcg_default_flags[ctype];
3be91277e   Hugh Dickins   memcgroup: tidy u...
1016

08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1017
  	mem_cgroup_charge_statistics(mem, pc, true);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1018

52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1019
  	unlock_page_cgroup(pc);
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
1020
  }
66e1707bc   Balbir Singh   Memory controller...
1021

f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1022
1023
1024
1025
1026
1027
1028
  /**
   * mem_cgroup_move_account - move account of the page
   * @pc:	page_cgroup of the page.
   * @from: mem_cgroup which the page is moved from.
   * @to:	mem_cgroup which the page is moved to. @from != @to.
   *
   * The caller must confirm following.
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1029
   * - page is not on LRU (isolate_page() is useful.)
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
   *
   * returns 0 at success,
   * returns -EBUSY when lock is busy or "pc" is unstable.
   *
   * This function does "uncharge" from old cgroup but doesn't do "charge" to
   * new cgroup. It should be done by a caller.
   */
  
  static int mem_cgroup_move_account(struct page_cgroup *pc,
  	struct mem_cgroup *from, struct mem_cgroup *to)
  {
  	struct mem_cgroup_per_zone *from_mz, *to_mz;
  	int nid, zid;
  	int ret = -EBUSY;
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1044
  	VM_BUG_ON(from == to);
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1045
  	VM_BUG_ON(PageLRU(pc->page));
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1046
1047
1048
1049
1050
  
  	nid = page_cgroup_nid(pc);
  	zid = page_cgroup_zid(pc);
  	from_mz =  mem_cgroup_zoneinfo(from, nid, zid);
  	to_mz =  mem_cgroup_zoneinfo(to, nid, zid);
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1051
1052
1053
1054
1055
1056
1057
1058
  	if (!trylock_page_cgroup(pc))
  		return ret;
  
  	if (!PageCgroupUsed(pc))
  		goto out;
  
  	if (pc->mem_cgroup != from)
  		goto out;
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1059
1060
1061
1062
  	res_counter_uncharge(&from->res, PAGE_SIZE);
  	mem_cgroup_charge_statistics(from, pc, false);
  	if (do_swap_account)
  		res_counter_uncharge(&from->memsw, PAGE_SIZE);
40d58138f   Daisuke Nishimura   memcg: fix error ...
1063
1064
1065
  	css_put(&from->css);
  
  	css_get(&to->css);
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1066
1067
  	pc->mem_cgroup = to;
  	mem_cgroup_charge_statistics(to, pc, true);
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1068
  	ret = 0;
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
  out:
  	unlock_page_cgroup(pc);
  	return ret;
  }
  
  /*
   * move charges to its parent.
   */
  
  static int mem_cgroup_move_parent(struct page_cgroup *pc,
  				  struct mem_cgroup *child,
  				  gfp_t gfp_mask)
  {
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1082
  	struct page *page = pc->page;
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1083
1084
1085
  	struct cgroup *cg = child->css.cgroup;
  	struct cgroup *pcg = cg->parent;
  	struct mem_cgroup *parent;
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1086
1087
1088
1089
1090
  	int ret;
  
  	/* Is ROOT ? */
  	if (!pcg)
  		return -EINVAL;
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1091

f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1092
  	parent = mem_cgroup_from_cont(pcg);
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1093

f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1094
  	ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false);
a636b327f   KAMEZAWA Hiroyuki   memcg: avoid unne...
1095
  	if (ret || !parent)
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1096
  		return ret;
40d58138f   Daisuke Nishimura   memcg: fix error ...
1097
1098
1099
1100
  	if (!get_page_unless_zero(page)) {
  		ret = -EBUSY;
  		goto uncharge;
  	}
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1101
1102
1103
1104
1105
  
  	ret = isolate_lru_page(page);
  
  	if (ret)
  		goto cancel;
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1106

f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1107
  	ret = mem_cgroup_move_account(pc, child, parent);
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1108

08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1109
1110
1111
  	putback_lru_page(page);
  	if (!ret) {
  		put_page(page);
40d58138f   Daisuke Nishimura   memcg: fix error ...
1112
1113
  		/* drop extra refcnt by try_charge() */
  		css_put(&parent->css);
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1114
  		return 0;
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1115
  	}
40d58138f   Daisuke Nishimura   memcg: fix error ...
1116

08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1117
  cancel:
40d58138f   Daisuke Nishimura   memcg: fix error ...
1118
1119
1120
1121
1122
  	put_page(page);
  uncharge:
  	/* drop extra refcnt by try_charge() */
  	css_put(&parent->css);
  	/* uncharge if move fails */
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1123
1124
1125
  	res_counter_uncharge(&parent->res, PAGE_SIZE);
  	if (do_swap_account)
  		res_counter_uncharge(&parent->memsw, PAGE_SIZE);
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1126
1127
  	return ret;
  }
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
  /*
   * Charge the memory controller for page usage.
   * Return
   * 0 if the charge was successful
   * < 0 if the cgroup is over its limit
   */
  static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
  				gfp_t gfp_mask, enum charge_type ctype,
  				struct mem_cgroup *memcg)
  {
  	struct mem_cgroup *mem;
  	struct page_cgroup *pc;
  	int ret;
  
  	pc = lookup_page_cgroup(page);
  	/* can happen at boot */
  	if (unlikely(!pc))
  		return 0;
  	prefetchw(pc);
  
  	mem = memcg;
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1149
  	ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true);
a636b327f   KAMEZAWA Hiroyuki   memcg: avoid unne...
1150
  	if (ret || !mem)
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
1151
1152
1153
  		return ret;
  
  	__mem_cgroup_commit_charge(mem, pc, ctype);
8a9f3ccd2   Balbir Singh   Memory controller...
1154
  	return 0;
8a9f3ccd2   Balbir Singh   Memory controller...
1155
  }
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
1156
1157
  int mem_cgroup_newpage_charge(struct page *page,
  			      struct mm_struct *mm, gfp_t gfp_mask)
217bc3194   KAMEZAWA Hiroyuki   memory cgroup enh...
1158
  {
f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
1159
  	if (mem_cgroup_disabled())
cede86acd   Li Zefan   memcg: clean up c...
1160
  		return 0;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1161
1162
  	if (PageCompound(page))
  		return 0;
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
  	/*
  	 * If already mapped, we don't have to account.
  	 * If page cache, page->mapping has address_space.
  	 * But page->mapping may have out-of-use anon_vma pointer,
  	 * detecit it by PageAnon() check. newly-mapped-anon's page->mapping
  	 * is NULL.
    	 */
  	if (page_mapped(page) || (page->mapping && !PageAnon(page)))
  		return 0;
  	if (unlikely(!mm))
  		mm = &init_mm;
217bc3194   KAMEZAWA Hiroyuki   memory cgroup enh...
1174
  	return mem_cgroup_charge_common(page, mm, gfp_mask,
e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
1175
  				MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
217bc3194   KAMEZAWA Hiroyuki   memory cgroup enh...
1176
  }
83aae4c73   Daisuke Nishimura   memcg: cleanup ca...
1177
1178
1179
  static void
  __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
  					enum charge_type ctype);
e1a1cd590   Balbir Singh   Memory controller...
1180
1181
  int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
  				gfp_t gfp_mask)
8697d3319   Balbir Singh   Memory controller...
1182
  {
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
1183
1184
  	struct mem_cgroup *mem = NULL;
  	int ret;
f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
1185
  	if (mem_cgroup_disabled())
cede86acd   Li Zefan   memcg: clean up c...
1186
  		return 0;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1187
1188
  	if (PageCompound(page))
  		return 0;
accf163e6   KAMEZAWA Hiroyuki   memcg: remove a r...
1189
1190
1191
1192
1193
1194
1195
1196
  	/*
  	 * Corner case handling. This is called from add_to_page_cache()
  	 * in usual. But some FS (shmem) precharges this page before calling it
  	 * and call add_to_page_cache() with GFP_NOWAIT.
  	 *
  	 * For GFP_NOWAIT case, the page may be pre-charged before calling
  	 * add_to_page_cache(). (See shmem.c) check it here and avoid to call
  	 * charge twice. (It works but has to pay a bit larger cost.)
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
1197
1198
  	 * And when the page is SwapCache, it should take swap information
  	 * into account. This is under lock_page() now.
accf163e6   KAMEZAWA Hiroyuki   memcg: remove a r...
1199
1200
1201
  	 */
  	if (!(gfp_mask & __GFP_WAIT)) {
  		struct page_cgroup *pc;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1202
1203
1204
1205
1206
1207
1208
  
  		pc = lookup_page_cgroup(page);
  		if (!pc)
  			return 0;
  		lock_page_cgroup(pc);
  		if (PageCgroupUsed(pc)) {
  			unlock_page_cgroup(pc);
accf163e6   KAMEZAWA Hiroyuki   memcg: remove a r...
1209
1210
  			return 0;
  		}
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1211
  		unlock_page_cgroup(pc);
accf163e6   KAMEZAWA Hiroyuki   memcg: remove a r...
1212
  	}
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
1213
  	if (unlikely(!mm && !mem))
8697d3319   Balbir Singh   Memory controller...
1214
  		mm = &init_mm;
accf163e6   KAMEZAWA Hiroyuki   memcg: remove a r...
1215

c05555b57   KAMEZAWA Hiroyuki   memcg: atomic ops...
1216
1217
  	if (page_is_file_cache(page))
  		return mem_cgroup_charge_common(page, mm, gfp_mask,
e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
1218
  				MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
1219

83aae4c73   Daisuke Nishimura   memcg: cleanup ca...
1220
1221
1222
1223
1224
1225
1226
1227
1228
  	/* shmem */
  	if (PageSwapCache(page)) {
  		ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
  		if (!ret)
  			__mem_cgroup_commit_charge_swapin(page, mem,
  					MEM_CGROUP_CHARGE_TYPE_SHMEM);
  	} else
  		ret = mem_cgroup_charge_common(page, mm, gfp_mask,
  					MEM_CGROUP_CHARGE_TYPE_SHMEM, mem);
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
1229

b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
1230
  	return ret;
e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
1231
  }
54595fe26   KAMEZAWA Hiroyuki   memcg: use css_tr...
1232
1233
1234
1235
1236
1237
  /*
   * While swap-in, try_charge -> commit or cancel, the page is locked.
   * And when try_charge() successfully returns, one refcnt to memcg without
   * struct page_cgroup is aquired. This refcnt will be cumsumed by
   * "commit()" or removed by "cancel()"
   */
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1238
1239
1240
1241
1242
  int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
  				 struct page *page,
  				 gfp_t mask, struct mem_cgroup **ptr)
  {
  	struct mem_cgroup *mem;
54595fe26   KAMEZAWA Hiroyuki   memcg: use css_tr...
1243
  	int ret;
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1244

f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
1245
  	if (mem_cgroup_disabled())
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1246
1247
1248
1249
  		return 0;
  
  	if (!do_swap_account)
  		goto charge_cur_mm;
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1250
1251
1252
1253
1254
1255
1256
  	/*
  	 * A racing thread's fault, or swapoff, may have already updated
  	 * the pte, and even removed page from swap cache: return success
  	 * to go on to do_swap_page()'s pte_same() test, which should fail.
  	 */
  	if (!PageSwapCache(page))
  		return 0;
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
1257
  	mem = try_get_mem_cgroup_from_swapcache(page);
54595fe26   KAMEZAWA Hiroyuki   memcg: use css_tr...
1258
1259
  	if (!mem)
  		goto charge_cur_mm;
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1260
  	*ptr = mem;
54595fe26   KAMEZAWA Hiroyuki   memcg: use css_tr...
1261
1262
1263
1264
  	ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
  	/* drop extra refcnt from tryget */
  	css_put(&mem->css);
  	return ret;
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1265
1266
1267
1268
1269
  charge_cur_mm:
  	if (unlikely(!mm))
  		mm = &init_mm;
  	return __mem_cgroup_try_charge(mm, mask, ptr, true);
  }
83aae4c73   Daisuke Nishimura   memcg: cleanup ca...
1270
1271
1272
  static void
  __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
  					enum charge_type ctype)
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
1273
1274
  {
  	struct page_cgroup *pc;
f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
1275
  	if (mem_cgroup_disabled())
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
1276
1277
1278
1279
  		return;
  	if (!ptr)
  		return;
  	pc = lookup_page_cgroup(page);
544122e5e   KAMEZAWA Hiroyuki   memcg: fix LRU ac...
1280
  	mem_cgroup_lru_del_before_commit_swapcache(page);
83aae4c73   Daisuke Nishimura   memcg: cleanup ca...
1281
  	__mem_cgroup_commit_charge(ptr, pc, ctype);
544122e5e   KAMEZAWA Hiroyuki   memcg: fix LRU ac...
1282
  	mem_cgroup_lru_add_after_commit_swapcache(page);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1283
1284
1285
  	/*
  	 * Now swap is on-memory. This means this page may be
  	 * counted both as mem and swap....double count.
03f3c4336   KAMEZAWA Hiroyuki   memcg: fix swap a...
1286
1287
1288
  	 * Fix it by uncharging from memsw. Basically, this SwapCache is stable
  	 * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
  	 * may call delete_from_swap_cache() before reach here.
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1289
  	 */
03f3c4336   KAMEZAWA Hiroyuki   memcg: fix swap a...
1290
  	if (do_swap_account && PageSwapCache(page)) {
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1291
  		swp_entry_t ent = {.val = page_private(page)};
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
1292
  		unsigned short id;
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1293
  		struct mem_cgroup *memcg;
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
1294
1295
1296
1297
  
  		id = swap_cgroup_record(ent, 0);
  		rcu_read_lock();
  		memcg = mem_cgroup_lookup(id);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1298
  		if (memcg) {
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
1299
1300
1301
1302
  			/*
  			 * This recorded memcg can be obsolete one. So, avoid
  			 * calling css_tryget
  			 */
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1303
1304
1305
  			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
  			mem_cgroup_put(memcg);
  		}
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
1306
  		rcu_read_unlock();
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1307
  	}
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1308
  	/* add this page(page_cgroup) to the LRU we want. */
544122e5e   KAMEZAWA Hiroyuki   memcg: fix LRU ac...
1309

7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
1310
  }
83aae4c73   Daisuke Nishimura   memcg: cleanup ca...
1311
1312
1313
1314
1315
  void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
  {
  	__mem_cgroup_commit_charge_swapin(page, ptr,
  					MEM_CGROUP_CHARGE_TYPE_MAPPED);
  }
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
1316
1317
  void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
  {
f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
1318
  	if (mem_cgroup_disabled())
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
1319
1320
1321
1322
  		return;
  	if (!mem)
  		return;
  	res_counter_uncharge(&mem->res, PAGE_SIZE);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1323
1324
  	if (do_swap_account)
  		res_counter_uncharge(&mem->memsw, PAGE_SIZE);
7a81b88cb   KAMEZAWA Hiroyuki   memcg: introduce ...
1325
1326
  	css_put(&mem->css);
  }
8697d3319   Balbir Singh   Memory controller...
1327
  /*
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
1328
   * uncharge if !page_mapped(page)
8a9f3ccd2   Balbir Singh   Memory controller...
1329
   */
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1330
  static struct mem_cgroup *
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
1331
  __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
8a9f3ccd2   Balbir Singh   Memory controller...
1332
  {
8289546e5   Hugh Dickins   memcg: remove mem...
1333
  	struct page_cgroup *pc;
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1334
  	struct mem_cgroup *mem = NULL;
072c56c13   KAMEZAWA Hiroyuki   per-zone and recl...
1335
  	struct mem_cgroup_per_zone *mz;
8a9f3ccd2   Balbir Singh   Memory controller...
1336

f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
1337
  	if (mem_cgroup_disabled())
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1338
  		return NULL;
4077960e2   Balbir Singh   memory controller...
1339

d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
1340
  	if (PageSwapCache(page))
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1341
  		return NULL;
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
1342

8697d3319   Balbir Singh   Memory controller...
1343
  	/*
3c541e14b   Balbir Singh   Memory controller...
1344
  	 * Check if our page_cgroup is valid
8697d3319   Balbir Singh   Memory controller...
1345
  	 */
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1346
1347
  	pc = lookup_page_cgroup(page);
  	if (unlikely(!pc || !PageCgroupUsed(pc)))
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1348
  		return NULL;
b9c565d5a   Hugh Dickins   memcg: remove cle...
1349

52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1350
  	lock_page_cgroup(pc);
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
1351

8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1352
  	mem = pc->mem_cgroup;
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
  	if (!PageCgroupUsed(pc))
  		goto unlock_out;
  
  	switch (ctype) {
  	case MEM_CGROUP_CHARGE_TYPE_MAPPED:
  		if (page_mapped(page))
  			goto unlock_out;
  		break;
  	case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
  		if (!PageAnon(page)) {	/* Shared memory */
  			if (page->mapping && !page_is_file_cache(page))
  				goto unlock_out;
  		} else if (page_mapped(page)) /* Anon */
  				goto unlock_out;
  		break;
  	default:
  		break;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1370
  	}
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
1371

8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1372
1373
1374
  	res_counter_uncharge(&mem->res, PAGE_SIZE);
  	if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
  		res_counter_uncharge(&mem->memsw, PAGE_SIZE);
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1375
  	mem_cgroup_charge_statistics(mem, pc, false);
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
1376

52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1377
  	ClearPageCgroupUsed(pc);
544122e5e   KAMEZAWA Hiroyuki   memcg: fix LRU ac...
1378
1379
1380
1381
1382
1383
  	/*
  	 * pc->mem_cgroup is not cleared here. It will be accessed when it's
  	 * freed from LRU. This is safe because uncharged page is expected not
  	 * to be reused (freed soon). Exception is SwapCache, it's handled by
  	 * special functions.
  	 */
b9c565d5a   Hugh Dickins   memcg: remove cle...
1384

69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
1385
  	mz = page_cgroup_zoneinfo(pc);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1386
  	unlock_page_cgroup(pc);
fb59e9f1e   Hugh Dickins   memcg: fix oops o...
1387

a7fe942e9   KAMEZAWA Hiroyuki   memcg: swapout re...
1388
1389
1390
  	/* at swapout, this memcg will be accessed to record to swap */
  	if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
  		css_put(&mem->css);
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
1391

8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1392
  	return mem;
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
1393
1394
1395
  
  unlock_out:
  	unlock_page_cgroup(pc);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1396
  	return NULL;
3c541e14b   Balbir Singh   Memory controller...
1397
  }
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
1398
1399
  void mem_cgroup_uncharge_page(struct page *page)
  {
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1400
1401
1402
1403
1404
  	/* early check. */
  	if (page_mapped(page))
  		return;
  	if (page->mapping && !PageAnon(page))
  		return;
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
1405
1406
1407
1408
1409
1410
  	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
  }
  
  void mem_cgroup_uncharge_cache_page(struct page *page)
  {
  	VM_BUG_ON(page_mapped(page));
b7abea963   KAMEZAWA Hiroyuki   memcg: make page-...
1411
  	VM_BUG_ON(page->mapping);
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
1412
1413
  	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
  }
e767e0561   Daisuke Nishimura   memcg: fix deadlo...
1414
  #ifdef CONFIG_SWAP
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1415
  /*
e767e0561   Daisuke Nishimura   memcg: fix deadlo...
1416
   * called after __delete_from_swap_cache() and drop "page" account.
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
   * memcg information is recorded to swap_cgroup of "ent"
   */
  void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
  {
  	struct mem_cgroup *memcg;
  
  	memcg = __mem_cgroup_uncharge_common(page,
  					MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
  	/* record memcg information */
  	if (do_swap_account && memcg) {
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
1427
  		swap_cgroup_record(ent, css_id(&memcg->css));
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1428
1429
  		mem_cgroup_get(memcg);
  	}
a7fe942e9   KAMEZAWA Hiroyuki   memcg: swapout re...
1430
1431
  	if (memcg)
  		css_put(&memcg->css);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1432
  }
e767e0561   Daisuke Nishimura   memcg: fix deadlo...
1433
  #endif
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1434
1435
1436
1437
1438
1439
1440
  
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
  /*
   * called from swap_entry_free(). remove record in swap_cgroup and
   * uncharge "memsw" account.
   */
  void mem_cgroup_uncharge_swap(swp_entry_t ent)
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
1441
  {
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1442
  	struct mem_cgroup *memcg;
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
1443
  	unsigned short id;
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1444
1445
1446
  
  	if (!do_swap_account)
  		return;
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
1447
1448
1449
  	id = swap_cgroup_record(ent, 0);
  	rcu_read_lock();
  	memcg = mem_cgroup_lookup(id);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1450
  	if (memcg) {
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
1451
1452
1453
1454
  		/*
  		 * We uncharge this because swap is freed.
  		 * This memcg can be obsolete one. We avoid calling css_tryget
  		 */
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1455
1456
1457
  		res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
  		mem_cgroup_put(memcg);
  	}
a3b2d6926   KAMEZAWA Hiroyuki   cgroups: use css ...
1458
  	rcu_read_unlock();
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
1459
  }
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1460
  #endif
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
1461

ae41be374   KAMEZAWA Hiroyuki   bugfix for memory...
1462
  /*
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
1463
1464
   * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
   * page belongs to.
ae41be374   KAMEZAWA Hiroyuki   bugfix for memory...
1465
   */
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
1466
  int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
ae41be374   KAMEZAWA Hiroyuki   bugfix for memory...
1467
1468
  {
  	struct page_cgroup *pc;
e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
1469
  	struct mem_cgroup *mem = NULL;
e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
1470
  	int ret = 0;
8869b8f6e   Hugh Dickins   memcg: memcontrol...
1471

f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
1472
  	if (mem_cgroup_disabled())
4077960e2   Balbir Singh   memory controller...
1473
  		return 0;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1474
1475
1476
  	pc = lookup_page_cgroup(page);
  	lock_page_cgroup(pc);
  	if (PageCgroupUsed(pc)) {
e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
1477
1478
  		mem = pc->mem_cgroup;
  		css_get(&mem->css);
e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
1479
  	}
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1480
  	unlock_page_cgroup(pc);
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
1481

e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
1482
  	if (mem) {
3bb4edf24   Daisuke Nishimura   memcg: don't trig...
1483
  		ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false);
e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
1484
1485
  		css_put(&mem->css);
  	}
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
1486
  	*ptr = mem;
e8589cc18   KAMEZAWA Hiroyuki   memcg: better mig...
1487
  	return ret;
ae41be374   KAMEZAWA Hiroyuki   bugfix for memory...
1488
  }
8869b8f6e   Hugh Dickins   memcg: memcontrol...
1489

69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
1490
  /* remove redundant charge if migration failed*/
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
1491
1492
  void mem_cgroup_end_migration(struct mem_cgroup *mem,
  		struct page *oldpage, struct page *newpage)
ae41be374   KAMEZAWA Hiroyuki   bugfix for memory...
1493
  {
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
  	struct page *target, *unused;
  	struct page_cgroup *pc;
  	enum charge_type ctype;
  
  	if (!mem)
  		return;
  
  	/* at migration success, oldpage->mapping is NULL. */
  	if (oldpage->mapping) {
  		target = oldpage;
  		unused = NULL;
  	} else {
  		target = newpage;
  		unused = oldpage;
  	}
  
  	if (PageAnon(target))
  		ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
  	else if (page_is_file_cache(target))
  		ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
  	else
  		ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
  
  	/* unused page is not on radix-tree now. */
d13d14430   KAMEZAWA Hiroyuki   memcg: handle swa...
1518
  	if (unused)
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
1519
1520
1521
  		__mem_cgroup_uncharge_common(unused, ctype);
  
  	pc = lookup_page_cgroup(target);
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
1522
  	/*
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
  	 * __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup.
  	 * So, double-counting is effectively avoided.
  	 */
  	__mem_cgroup_commit_charge(mem, pc, ctype);
  
  	/*
  	 * Both of oldpage and newpage are still under lock_page().
  	 * Then, we don't have to care about race in radix-tree.
  	 * But we have to be careful that this page is unmapped or not.
  	 *
  	 * There is a case for !page_mapped(). At the start of
  	 * migration, oldpage was mapped. But now, it's zapped.
  	 * But we know *target* page is not freed/reused under us.
  	 * mem_cgroup_uncharge_page() does all necessary checks.
69029cd55   KAMEZAWA Hiroyuki   memcg: remove ref...
1537
  	 */
01b1ae63c   KAMEZAWA Hiroyuki   memcg: simple mig...
1538
1539
  	if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
  		mem_cgroup_uncharge_page(target);
ae41be374   KAMEZAWA Hiroyuki   bugfix for memory...
1540
  }
78fb74669   Pavel Emelianov   Memory controller...
1541

cc8475822   KAMEZAWA Hiroyuki   memory cgroup enh...
1542
  /*
ae3abae64   Daisuke Nishimura   memcg: fix mem_cg...
1543
1544
1545
1546
1547
1548
   * A call to try to shrink memory usage on charge failure at shmem's swapin.
   * Calling hierarchical_reclaim is not enough because we should update
   * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM.
   * Moreover considering hierarchy, we should reclaim from the mem_over_limit,
   * not from the memcg which this page would be charged to.
   * try_charge_swapin does all of these works properly.
c9b0ed514   KAMEZAWA Hiroyuki   memcg: helper fun...
1549
   */
ae3abae64   Daisuke Nishimura   memcg: fix mem_cg...
1550
  int mem_cgroup_shmem_charge_fallback(struct page *page,
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
1551
1552
  			    struct mm_struct *mm,
  			    gfp_t gfp_mask)
c9b0ed514   KAMEZAWA Hiroyuki   memcg: helper fun...
1553
  {
b5a84319a   KAMEZAWA Hiroyuki   memcg: fix shmem'...
1554
  	struct mem_cgroup *mem = NULL;
ae3abae64   Daisuke Nishimura   memcg: fix mem_cg...
1555
  	int ret;
c9b0ed514   KAMEZAWA Hiroyuki   memcg: helper fun...
1556

f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
1557
  	if (mem_cgroup_disabled())
cede86acd   Li Zefan   memcg: clean up c...
1558
  		return 0;
c9b0ed514   KAMEZAWA Hiroyuki   memcg: helper fun...
1559

ae3abae64   Daisuke Nishimura   memcg: fix mem_cg...
1560
1561
1562
  	ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
  	if (!ret)
  		mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */
c9b0ed514   KAMEZAWA Hiroyuki   memcg: helper fun...
1563

ae3abae64   Daisuke Nishimura   memcg: fix mem_cg...
1564
  	return ret;
c9b0ed514   KAMEZAWA Hiroyuki   memcg: helper fun...
1565
  }
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1566
  static DEFINE_MUTEX(set_limit_mutex);
d38d2a758   KOSAKI Motohiro   mm: make mem_cgro...
1567
  static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1568
  				unsigned long long val)
628f42355   KAMEZAWA Hiroyuki   memcg: limit chan...
1569
  {
81d39c20f   KAMEZAWA Hiroyuki   memcg: fix shrink...
1570
  	int retry_count;
628f42355   KAMEZAWA Hiroyuki   memcg: limit chan...
1571
  	int progress;
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1572
  	u64 memswlimit;
628f42355   KAMEZAWA Hiroyuki   memcg: limit chan...
1573
  	int ret = 0;
81d39c20f   KAMEZAWA Hiroyuki   memcg: fix shrink...
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
  	int children = mem_cgroup_count_children(memcg);
  	u64 curusage, oldusage;
  
  	/*
  	 * For keeping hierarchical_reclaim simple, how long we should retry
  	 * is depends on callers. We set our retry-count to be function
  	 * of # of children which we should visit in this loop.
  	 */
  	retry_count = MEM_CGROUP_RECLAIM_RETRIES * children;
  
  	oldusage = res_counter_read_u64(&memcg->res, RES_USAGE);
628f42355   KAMEZAWA Hiroyuki   memcg: limit chan...
1585

8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1586
  	while (retry_count) {
628f42355   KAMEZAWA Hiroyuki   memcg: limit chan...
1587
1588
1589
1590
  		if (signal_pending(current)) {
  			ret = -EINTR;
  			break;
  		}
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
  		/*
  		 * Rather than hide all in some function, I do this in
  		 * open coded manner. You see what this really does.
  		 * We have to guarantee mem->res.limit < mem->memsw.limit.
  		 */
  		mutex_lock(&set_limit_mutex);
  		memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
  		if (memswlimit < val) {
  			ret = -EINVAL;
  			mutex_unlock(&set_limit_mutex);
628f42355   KAMEZAWA Hiroyuki   memcg: limit chan...
1601
1602
  			break;
  		}
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1603
1604
1605
1606
1607
  		ret = res_counter_set_limit(&memcg->res, val);
  		mutex_unlock(&set_limit_mutex);
  
  		if (!ret)
  			break;
42e9abb62   Daisuke Nishimura   memcg: change try...
1608
  		progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL,
81d39c20f   KAMEZAWA Hiroyuki   memcg: fix shrink...
1609
1610
1611
1612
1613
1614
1615
  						   false, true);
  		curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
  		/* Usage is reduced ? */
    		if (curusage >= oldusage)
  			retry_count--;
  		else
  			oldusage = curusage;
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1616
  	}
14797e236   KOSAKI Motohiro   memcg: add inacti...
1617

8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1618
1619
1620
1621
1622
1623
  	return ret;
  }
  
  int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
  				unsigned long long val)
  {
81d39c20f   KAMEZAWA Hiroyuki   memcg: fix shrink...
1624
  	int retry_count;
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1625
  	u64 memlimit, oldusage, curusage;
81d39c20f   KAMEZAWA Hiroyuki   memcg: fix shrink...
1626
1627
  	int children = mem_cgroup_count_children(memcg);
  	int ret = -EBUSY;
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1628
1629
1630
  
  	if (!do_swap_account)
  		return -EINVAL;
81d39c20f   KAMEZAWA Hiroyuki   memcg: fix shrink...
1631
1632
1633
  	/* see mem_cgroup_resize_res_limit */
   	retry_count = children * MEM_CGROUP_RECLAIM_RETRIES;
  	oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
  	while (retry_count) {
  		if (signal_pending(current)) {
  			ret = -EINTR;
  			break;
  		}
  		/*
  		 * Rather than hide all in some function, I do this in
  		 * open coded manner. You see what this really does.
  		 * We have to guarantee mem->res.limit < mem->memsw.limit.
  		 */
  		mutex_lock(&set_limit_mutex);
  		memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);
  		if (memlimit > val) {
  			ret = -EINVAL;
  			mutex_unlock(&set_limit_mutex);
  			break;
  		}
  		ret = res_counter_set_limit(&memcg->memsw, val);
  		mutex_unlock(&set_limit_mutex);
  
  		if (!ret)
  			break;
81d39c20f   KAMEZAWA Hiroyuki   memcg: fix shrink...
1656
  		mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true, true);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1657
  		curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
81d39c20f   KAMEZAWA Hiroyuki   memcg: fix shrink...
1658
  		/* Usage is reduced ? */
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1659
  		if (curusage >= oldusage)
628f42355   KAMEZAWA Hiroyuki   memcg: limit chan...
1660
  			retry_count--;
81d39c20f   KAMEZAWA Hiroyuki   memcg: fix shrink...
1661
1662
  		else
  			oldusage = curusage;
628f42355   KAMEZAWA Hiroyuki   memcg: limit chan...
1663
1664
1665
  	}
  	return ret;
  }
c9b0ed514   KAMEZAWA Hiroyuki   memcg: helper fun...
1666
  /*
cc8475822   KAMEZAWA Hiroyuki   memory cgroup enh...
1667
   * This routine traverse page_cgroup in given list and drop them all.
cc8475822   KAMEZAWA Hiroyuki   memory cgroup enh...
1668
1669
   * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
   */
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1670
  static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1671
  				int node, int zid, enum lru_list lru)
cc8475822   KAMEZAWA Hiroyuki   memory cgroup enh...
1672
  {
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1673
1674
  	struct zone *zone;
  	struct mem_cgroup_per_zone *mz;
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1675
  	struct page_cgroup *pc, *busy;
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1676
  	unsigned long flags, loop;
072c56c13   KAMEZAWA Hiroyuki   per-zone and recl...
1677
  	struct list_head *list;
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1678
  	int ret = 0;
072c56c13   KAMEZAWA Hiroyuki   per-zone and recl...
1679

08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1680
1681
  	zone = &NODE_DATA(node)->node_zones[zid];
  	mz = mem_cgroup_zoneinfo(mem, node, zid);
b69408e88   Christoph Lameter   vmscan: Use an in...
1682
  	list = &mz->lists[lru];
cc8475822   KAMEZAWA Hiroyuki   memory cgroup enh...
1683

f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1684
1685
1686
1687
1688
1689
  	loop = MEM_CGROUP_ZSTAT(mz, lru);
  	/* give some margin against EBUSY etc...*/
  	loop += 256;
  	busy = NULL;
  	while (loop--) {
  		ret = 0;
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1690
  		spin_lock_irqsave(&zone->lru_lock, flags);
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1691
  		if (list_empty(list)) {
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1692
  			spin_unlock_irqrestore(&zone->lru_lock, flags);
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1693
  			break;
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1694
1695
1696
1697
1698
  		}
  		pc = list_entry(list->prev, struct page_cgroup, lru);
  		if (busy == pc) {
  			list_move(&pc->lru, list);
  			busy = 0;
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1699
  			spin_unlock_irqrestore(&zone->lru_lock, flags);
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1700
1701
  			continue;
  		}
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1702
  		spin_unlock_irqrestore(&zone->lru_lock, flags);
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1703

2c26fdd70   KAMEZAWA Hiroyuki   memcg: revert gfp...
1704
  		ret = mem_cgroup_move_parent(pc, mem, GFP_KERNEL);
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1705
  		if (ret == -ENOMEM)
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1706
  			break;
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1707
1708
1709
1710
1711
1712
1713
  
  		if (ret == -EBUSY || ret == -EINVAL) {
  			/* found lock contention or "pc" is obsolete. */
  			busy = pc;
  			cond_resched();
  		} else
  			busy = NULL;
cc8475822   KAMEZAWA Hiroyuki   memory cgroup enh...
1714
  	}
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1715

f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1716
1717
1718
  	if (!ret && !list_empty(list))
  		return -EBUSY;
  	return ret;
cc8475822   KAMEZAWA Hiroyuki   memory cgroup enh...
1719
1720
1721
1722
1723
1724
  }
  
  /*
   * make mem_cgroup's charge to be 0 if there is no task.
   * This enables deleting this mem_cgroup.
   */
c1e862c1f   KAMEZAWA Hiroyuki   memcg: new force_...
1725
  static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all)
cc8475822   KAMEZAWA Hiroyuki   memory cgroup enh...
1726
  {
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1727
1728
1729
  	int ret;
  	int node, zid, shrink;
  	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
c1e862c1f   KAMEZAWA Hiroyuki   memcg: new force_...
1730
  	struct cgroup *cgrp = mem->css.cgroup;
8869b8f6e   Hugh Dickins   memcg: memcontrol...
1731

cc8475822   KAMEZAWA Hiroyuki   memory cgroup enh...
1732
  	css_get(&mem->css);
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1733
1734
  
  	shrink = 0;
c1e862c1f   KAMEZAWA Hiroyuki   memcg: new force_...
1735
1736
1737
  	/* should free all ? */
  	if (free_all)
  		goto try_to_free;
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1738
  move_account:
1ecaab2bd   KAMEZAWA Hiroyuki   per-zone and recl...
1739
  	while (mem->res.usage > 0) {
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1740
  		ret = -EBUSY;
c1e862c1f   KAMEZAWA Hiroyuki   memcg: new force_...
1741
1742
1743
1744
  		if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
  			goto out;
  		ret = -EINTR;
  		if (signal_pending(current))
cc8475822   KAMEZAWA Hiroyuki   memory cgroup enh...
1745
  			goto out;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1746
1747
  		/* This is for making all *used* pages to be on LRU. */
  		lru_add_drain_all();
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1748
  		ret = 0;
299b4eaa3   KAMEZAWA Hiroyuki   memcg: NULL point...
1749
  		for_each_node_state(node, N_HIGH_MEMORY) {
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1750
  			for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
b69408e88   Christoph Lameter   vmscan: Use an in...
1751
  				enum lru_list l;
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1752
1753
  				for_each_lru(l) {
  					ret = mem_cgroup_force_empty_list(mem,
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1754
  							node, zid, l);
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1755
1756
1757
  					if (ret)
  						break;
  				}
1ecaab2bd   KAMEZAWA Hiroyuki   per-zone and recl...
1758
  			}
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1759
1760
1761
1762
1763
1764
  			if (ret)
  				break;
  		}
  		/* it seems parent cgroup doesn't have enough mem */
  		if (ret == -ENOMEM)
  			goto try_to_free;
52d4b9ac0   KAMEZAWA Hiroyuki   memcg: allocate a...
1765
  		cond_resched();
cc8475822   KAMEZAWA Hiroyuki   memory cgroup enh...
1766
1767
1768
1769
1770
  	}
  	ret = 0;
  out:
  	css_put(&mem->css);
  	return ret;
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1771
1772
  
  try_to_free:
c1e862c1f   KAMEZAWA Hiroyuki   memcg: new force_...
1773
1774
  	/* returns EBUSY if there is a task or if we come here twice. */
  	if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children) || shrink) {
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1775
1776
1777
  		ret = -EBUSY;
  		goto out;
  	}
c1e862c1f   KAMEZAWA Hiroyuki   memcg: new force_...
1778
1779
  	/* we call try-to-free pages for make this cgroup empty */
  	lru_add_drain_all();
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1780
1781
1782
1783
  	/* try to free all pages in this cgroup */
  	shrink = 1;
  	while (nr_retries && mem->res.usage > 0) {
  		int progress;
c1e862c1f   KAMEZAWA Hiroyuki   memcg: new force_...
1784
1785
1786
1787
1788
  
  		if (signal_pending(current)) {
  			ret = -EINTR;
  			goto out;
  		}
a7885eb8a   KOSAKI Motohiro   memcg: swappiness
1789
1790
  		progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
  						false, get_swappiness(mem));
c1e862c1f   KAMEZAWA Hiroyuki   memcg: new force_...
1791
  		if (!progress) {
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1792
  			nr_retries--;
c1e862c1f   KAMEZAWA Hiroyuki   memcg: new force_...
1793
1794
1795
  			/* maybe some writeback is necessary */
  			congestion_wait(WRITE, HZ/10);
  		}
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1796
1797
  
  	}
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
1798
  	lru_add_drain();
f817ed485   KAMEZAWA Hiroyuki   memcg: move all a...
1799
1800
1801
1802
1803
  	/* try move_account...there may be some *locked* pages. */
  	if (mem->res.usage)
  		goto move_account;
  	ret = 0;
  	goto out;
cc8475822   KAMEZAWA Hiroyuki   memory cgroup enh...
1804
  }
c1e862c1f   KAMEZAWA Hiroyuki   memcg: new force_...
1805
1806
1807
1808
  int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
  {
  	return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true);
  }
18f59ea7d   Balbir Singh   memcg: memory cgr...
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
  static u64 mem_cgroup_hierarchy_read(struct cgroup *cont, struct cftype *cft)
  {
  	return mem_cgroup_from_cont(cont)->use_hierarchy;
  }
  
  static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
  					u64 val)
  {
  	int retval = 0;
  	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
  	struct cgroup *parent = cont->parent;
  	struct mem_cgroup *parent_mem = NULL;
  
  	if (parent)
  		parent_mem = mem_cgroup_from_cont(parent);
  
  	cgroup_lock();
  	/*
  	 * If parent's use_hiearchy is set, we can't make any modifications
  	 * in the child subtrees. If it is unset, then the change can
  	 * occur, provided the current cgroup has no children.
  	 *
  	 * For the root cgroup, parent_mem is NULL, we allow value to be
  	 * set if there are no children.
  	 */
  	if ((!parent_mem || !parent_mem->use_hierarchy) &&
  				(val == 1 || val == 0)) {
  		if (list_empty(&cont->children))
  			mem->use_hierarchy = val;
  		else
  			retval = -EBUSY;
  	} else
  		retval = -EINVAL;
  	cgroup_unlock();
  
  	return retval;
  }
2c3daa722   Paul Menage   CGroup API files:...
1846
  static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
8cdea7c05   Balbir Singh   Memory controller...
1847
  {
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
  	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
  	u64 val = 0;
  	int type, name;
  
  	type = MEMFILE_TYPE(cft->private);
  	name = MEMFILE_ATTR(cft->private);
  	switch (type) {
  	case _MEM:
  		val = res_counter_read_u64(&mem->res, name);
  		break;
  	case _MEMSWAP:
  		if (do_swap_account)
  			val = res_counter_read_u64(&mem->memsw, name);
  		break;
  	default:
  		BUG();
  		break;
  	}
  	return val;
8cdea7c05   Balbir Singh   Memory controller...
1867
  }
628f42355   KAMEZAWA Hiroyuki   memcg: limit chan...
1868
1869
1870
1871
  /*
   * The user of this function is...
   * RES_LIMIT.
   */
856c13aa1   Paul Menage   cgroup files: con...
1872
1873
  static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
  			    const char *buffer)
8cdea7c05   Balbir Singh   Memory controller...
1874
  {
628f42355   KAMEZAWA Hiroyuki   memcg: limit chan...
1875
  	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1876
  	int type, name;
628f42355   KAMEZAWA Hiroyuki   memcg: limit chan...
1877
1878
  	unsigned long long val;
  	int ret;
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1879
1880
1881
  	type = MEMFILE_TYPE(cft->private);
  	name = MEMFILE_ATTR(cft->private);
  	switch (name) {
628f42355   KAMEZAWA Hiroyuki   memcg: limit chan...
1882
1883
1884
  	case RES_LIMIT:
  		/* This function does all necessary parse...reuse it */
  		ret = res_counter_memparse_write_strategy(buffer, &val);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1885
1886
1887
  		if (ret)
  			break;
  		if (type == _MEM)
628f42355   KAMEZAWA Hiroyuki   memcg: limit chan...
1888
  			ret = mem_cgroup_resize_limit(memcg, val);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1889
1890
  		else
  			ret = mem_cgroup_resize_memsw_limit(memcg, val);
628f42355   KAMEZAWA Hiroyuki   memcg: limit chan...
1891
1892
1893
1894
1895
1896
  		break;
  	default:
  		ret = -EINVAL; /* should be BUG() ? */
  		break;
  	}
  	return ret;
8cdea7c05   Balbir Singh   Memory controller...
1897
  }
fee7b548e   KAMEZAWA Hiroyuki   memcg: show real ...
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
  static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
  		unsigned long long *mem_limit, unsigned long long *memsw_limit)
  {
  	struct cgroup *cgroup;
  	unsigned long long min_limit, min_memsw_limit, tmp;
  
  	min_limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
  	min_memsw_limit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
  	cgroup = memcg->css.cgroup;
  	if (!memcg->use_hierarchy)
  		goto out;
  
  	while (cgroup->parent) {
  		cgroup = cgroup->parent;
  		memcg = mem_cgroup_from_cont(cgroup);
  		if (!memcg->use_hierarchy)
  			break;
  		tmp = res_counter_read_u64(&memcg->res, RES_LIMIT);
  		min_limit = min(min_limit, tmp);
  		tmp = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
  		min_memsw_limit = min(min_memsw_limit, tmp);
  	}
  out:
  	*mem_limit = min_limit;
  	*memsw_limit = min_memsw_limit;
  	return;
  }
29f2a4dac   Pavel Emelyanov   memcgroup: implem...
1925
  static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
c84872e16   Pavel Emelyanov   memcgroup: add th...
1926
1927
  {
  	struct mem_cgroup *mem;
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1928
  	int type, name;
c84872e16   Pavel Emelyanov   memcgroup: add th...
1929
1930
  
  	mem = mem_cgroup_from_cont(cont);
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1931
1932
1933
  	type = MEMFILE_TYPE(event);
  	name = MEMFILE_ATTR(event);
  	switch (name) {
29f2a4dac   Pavel Emelyanov   memcgroup: implem...
1934
  	case RES_MAX_USAGE:
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1935
1936
1937
1938
  		if (type == _MEM)
  			res_counter_reset_max(&mem->res);
  		else
  			res_counter_reset_max(&mem->memsw);
29f2a4dac   Pavel Emelyanov   memcgroup: implem...
1939
1940
  		break;
  	case RES_FAILCNT:
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
1941
1942
1943
1944
  		if (type == _MEM)
  			res_counter_reset_failcnt(&mem->res);
  		else
  			res_counter_reset_failcnt(&mem->memsw);
29f2a4dac   Pavel Emelyanov   memcgroup: implem...
1945
1946
  		break;
  	}
85cc59db1   Pavel Emelyanov   memcgroup: use tr...
1947
  	return 0;
c84872e16   Pavel Emelyanov   memcgroup: add th...
1948
  }
14067bb3e   KAMEZAWA Hiroyuki   memcg: hierarchic...
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
  
  /* For read statistics */
  enum {
  	MCS_CACHE,
  	MCS_RSS,
  	MCS_PGPGIN,
  	MCS_PGPGOUT,
  	MCS_INACTIVE_ANON,
  	MCS_ACTIVE_ANON,
  	MCS_INACTIVE_FILE,
  	MCS_ACTIVE_FILE,
  	MCS_UNEVICTABLE,
  	NR_MCS_STAT,
  };
  
  struct mcs_total_stat {
  	s64 stat[NR_MCS_STAT];
d2ceb9b7d   KAMEZAWA Hiroyuki   memory cgroup enh...
1966
  };
14067bb3e   KAMEZAWA Hiroyuki   memcg: hierarchic...
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
  struct {
  	char *local_name;
  	char *total_name;
  } memcg_stat_strings[NR_MCS_STAT] = {
  	{"cache", "total_cache"},
  	{"rss", "total_rss"},
  	{"pgpgin", "total_pgpgin"},
  	{"pgpgout", "total_pgpgout"},
  	{"inactive_anon", "total_inactive_anon"},
  	{"active_anon", "total_active_anon"},
  	{"inactive_file", "total_inactive_file"},
  	{"active_file", "total_active_file"},
  	{"unevictable", "total_unevictable"}
  };
  
  
  static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data)
  {
  	struct mcs_total_stat *s = data;
  	s64 val;
  
  	/* per cpu stat */
  	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_CACHE);
  	s->stat[MCS_CACHE] += val * PAGE_SIZE;
  	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS);
  	s->stat[MCS_RSS] += val * PAGE_SIZE;
  	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT);
  	s->stat[MCS_PGPGIN] += val;
  	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT);
  	s->stat[MCS_PGPGOUT] += val;
  
  	/* per zone stat */
  	val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON);
  	s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE;
  	val = mem_cgroup_get_local_zonestat(mem, LRU_ACTIVE_ANON);
  	s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE;
  	val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_FILE);
  	s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE;
  	val = mem_cgroup_get_local_zonestat(mem, LRU_ACTIVE_FILE);
  	s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE;
  	val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE);
  	s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE;
  	return 0;
  }
  
  static void
  mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
  {
  	mem_cgroup_walk_tree(mem, s, mem_cgroup_get_local_stat);
  }
c64745cf0   Paul Menage   CGroup API files:...
2017
2018
  static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
  				 struct cgroup_map_cb *cb)
d2ceb9b7d   KAMEZAWA Hiroyuki   memory cgroup enh...
2019
  {
d2ceb9b7d   KAMEZAWA Hiroyuki   memory cgroup enh...
2020
  	struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont);
14067bb3e   KAMEZAWA Hiroyuki   memcg: hierarchic...
2021
  	struct mcs_total_stat mystat;
d2ceb9b7d   KAMEZAWA Hiroyuki   memory cgroup enh...
2022
  	int i;
14067bb3e   KAMEZAWA Hiroyuki   memcg: hierarchic...
2023
2024
  	memset(&mystat, 0, sizeof(mystat));
  	mem_cgroup_get_local_stat(mem_cont, &mystat);
d2ceb9b7d   KAMEZAWA Hiroyuki   memory cgroup enh...
2025

14067bb3e   KAMEZAWA Hiroyuki   memcg: hierarchic...
2026
2027
  	for (i = 0; i < NR_MCS_STAT; i++)
  		cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]);
7b854121e   Lee Schermerhorn   Unevictable LRU P...
2028

14067bb3e   KAMEZAWA Hiroyuki   memcg: hierarchic...
2029
  	/* Hierarchical information */
fee7b548e   KAMEZAWA Hiroyuki   memcg: show real ...
2030
2031
2032
2033
2034
2035
2036
  	{
  		unsigned long long limit, memsw_limit;
  		memcg_get_hierarchical_limit(mem_cont, &limit, &memsw_limit);
  		cb->fill(cb, "hierarchical_memory_limit", limit);
  		if (do_swap_account)
  			cb->fill(cb, "hierarchical_memsw_limit", memsw_limit);
  	}
7f016ee8b   KOSAKI Motohiro   memcg: show recla...
2037

14067bb3e   KAMEZAWA Hiroyuki   memcg: hierarchic...
2038
2039
2040
2041
  	memset(&mystat, 0, sizeof(mystat));
  	mem_cgroup_get_total_stat(mem_cont, &mystat);
  	for (i = 0; i < NR_MCS_STAT; i++)
  		cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]);
7f016ee8b   KOSAKI Motohiro   memcg: show recla...
2042
  #ifdef CONFIG_DEBUG_VM
c772be939   KOSAKI Motohiro   memcg: fix calcul...
2043
  	cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
7f016ee8b   KOSAKI Motohiro   memcg: show recla...
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
  
  	{
  		int nid, zid;
  		struct mem_cgroup_per_zone *mz;
  		unsigned long recent_rotated[2] = {0, 0};
  		unsigned long recent_scanned[2] = {0, 0};
  
  		for_each_online_node(nid)
  			for (zid = 0; zid < MAX_NR_ZONES; zid++) {
  				mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
  
  				recent_rotated[0] +=
  					mz->reclaim_stat.recent_rotated[0];
  				recent_rotated[1] +=
  					mz->reclaim_stat.recent_rotated[1];
  				recent_scanned[0] +=
  					mz->reclaim_stat.recent_scanned[0];
  				recent_scanned[1] +=
  					mz->reclaim_stat.recent_scanned[1];
  			}
  		cb->fill(cb, "recent_rotated_anon", recent_rotated[0]);
  		cb->fill(cb, "recent_rotated_file", recent_rotated[1]);
  		cb->fill(cb, "recent_scanned_anon", recent_scanned[0]);
  		cb->fill(cb, "recent_scanned_file", recent_scanned[1]);
  	}
  #endif
d2ceb9b7d   KAMEZAWA Hiroyuki   memory cgroup enh...
2070
2071
  	return 0;
  }
a7885eb8a   KOSAKI Motohiro   memcg: swappiness
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
  static u64 mem_cgroup_swappiness_read(struct cgroup *cgrp, struct cftype *cft)
  {
  	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
  
  	return get_swappiness(memcg);
  }
  
  static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
  				       u64 val)
  {
  	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
  	struct mem_cgroup *parent;
068b38c1f   Li Zefan   memcg: fix a race...
2084

a7885eb8a   KOSAKI Motohiro   memcg: swappiness
2085
2086
2087
2088
2089
2090
2091
  	if (val > 100)
  		return -EINVAL;
  
  	if (cgrp->parent == NULL)
  		return -EINVAL;
  
  	parent = mem_cgroup_from_cont(cgrp->parent);
068b38c1f   Li Zefan   memcg: fix a race...
2092
2093
  
  	cgroup_lock();
a7885eb8a   KOSAKI Motohiro   memcg: swappiness
2094
2095
  	/* If under hierarchy, only empty-root can set this value */
  	if ((parent->use_hierarchy) ||
068b38c1f   Li Zefan   memcg: fix a race...
2096
2097
  	    (memcg->use_hierarchy && !list_empty(&cgrp->children))) {
  		cgroup_unlock();
a7885eb8a   KOSAKI Motohiro   memcg: swappiness
2098
  		return -EINVAL;
068b38c1f   Li Zefan   memcg: fix a race...
2099
  	}
a7885eb8a   KOSAKI Motohiro   memcg: swappiness
2100
2101
2102
2103
  
  	spin_lock(&memcg->reclaim_param_lock);
  	memcg->swappiness = val;
  	spin_unlock(&memcg->reclaim_param_lock);
068b38c1f   Li Zefan   memcg: fix a race...
2104
  	cgroup_unlock();
a7885eb8a   KOSAKI Motohiro   memcg: swappiness
2105
2106
  	return 0;
  }
c1e862c1f   KAMEZAWA Hiroyuki   memcg: new force_...
2107

8cdea7c05   Balbir Singh   Memory controller...
2108
2109
  static struct cftype mem_cgroup_files[] = {
  	{
0eea10301   Balbir Singh   Memory controller...
2110
  		.name = "usage_in_bytes",
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
2111
  		.private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
2c3daa722   Paul Menage   CGroup API files:...
2112
  		.read_u64 = mem_cgroup_read,
8cdea7c05   Balbir Singh   Memory controller...
2113
2114
  	},
  	{
c84872e16   Pavel Emelyanov   memcgroup: add th...
2115
  		.name = "max_usage_in_bytes",
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
2116
  		.private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE),
29f2a4dac   Pavel Emelyanov   memcgroup: implem...
2117
  		.trigger = mem_cgroup_reset,
c84872e16   Pavel Emelyanov   memcgroup: add th...
2118
2119
2120
  		.read_u64 = mem_cgroup_read,
  	},
  	{
0eea10301   Balbir Singh   Memory controller...
2121
  		.name = "limit_in_bytes",
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
2122
  		.private = MEMFILE_PRIVATE(_MEM, RES_LIMIT),
856c13aa1   Paul Menage   cgroup files: con...
2123
  		.write_string = mem_cgroup_write,
2c3daa722   Paul Menage   CGroup API files:...
2124
  		.read_u64 = mem_cgroup_read,
8cdea7c05   Balbir Singh   Memory controller...
2125
2126
2127
  	},
  	{
  		.name = "failcnt",
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
2128
  		.private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
29f2a4dac   Pavel Emelyanov   memcgroup: implem...
2129
  		.trigger = mem_cgroup_reset,
2c3daa722   Paul Menage   CGroup API files:...
2130
  		.read_u64 = mem_cgroup_read,
8cdea7c05   Balbir Singh   Memory controller...
2131
  	},
8697d3319   Balbir Singh   Memory controller...
2132
  	{
d2ceb9b7d   KAMEZAWA Hiroyuki   memory cgroup enh...
2133
  		.name = "stat",
c64745cf0   Paul Menage   CGroup API files:...
2134
  		.read_map = mem_control_stat_show,
d2ceb9b7d   KAMEZAWA Hiroyuki   memory cgroup enh...
2135
  	},
c1e862c1f   KAMEZAWA Hiroyuki   memcg: new force_...
2136
2137
2138
2139
  	{
  		.name = "force_empty",
  		.trigger = mem_cgroup_force_empty_write,
  	},
18f59ea7d   Balbir Singh   memcg: memory cgr...
2140
2141
2142
2143
2144
  	{
  		.name = "use_hierarchy",
  		.write_u64 = mem_cgroup_hierarchy_write,
  		.read_u64 = mem_cgroup_hierarchy_read,
  	},
a7885eb8a   KOSAKI Motohiro   memcg: swappiness
2145
2146
2147
2148
2149
  	{
  		.name = "swappiness",
  		.read_u64 = mem_cgroup_swappiness_read,
  		.write_u64 = mem_cgroup_swappiness_write,
  	},
8cdea7c05   Balbir Singh   Memory controller...
2150
  };
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
  static struct cftype memsw_cgroup_files[] = {
  	{
  		.name = "memsw.usage_in_bytes",
  		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
  		.read_u64 = mem_cgroup_read,
  	},
  	{
  		.name = "memsw.max_usage_in_bytes",
  		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE),
  		.trigger = mem_cgroup_reset,
  		.read_u64 = mem_cgroup_read,
  	},
  	{
  		.name = "memsw.limit_in_bytes",
  		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT),
  		.write_string = mem_cgroup_write,
  		.read_u64 = mem_cgroup_read,
  	},
  	{
  		.name = "memsw.failcnt",
  		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT),
  		.trigger = mem_cgroup_reset,
  		.read_u64 = mem_cgroup_read,
  	},
  };
  
  static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss)
  {
  	if (!do_swap_account)
  		return 0;
  	return cgroup_add_files(cont, ss, memsw_cgroup_files,
  				ARRAY_SIZE(memsw_cgroup_files));
  };
  #else
  static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss)
  {
  	return 0;
  }
  #endif
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
2191
2192
2193
  static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
  {
  	struct mem_cgroup_per_node *pn;
1ecaab2bd   KAMEZAWA Hiroyuki   per-zone and recl...
2194
  	struct mem_cgroup_per_zone *mz;
b69408e88   Christoph Lameter   vmscan: Use an in...
2195
  	enum lru_list l;
41e3355de   KAMEZAWA Hiroyuki   memcg: fix node_s...
2196
  	int zone, tmp = node;
1ecaab2bd   KAMEZAWA Hiroyuki   per-zone and recl...
2197
2198
2199
2200
2201
2202
2203
2204
  	/*
  	 * This routine is called against possible nodes.
  	 * But it's BUG to call kmalloc() against offline node.
  	 *
  	 * TODO: this routine can waste much memory for nodes which will
  	 *       never be onlined. It's better to use memory hotplug callback
  	 *       function.
  	 */
41e3355de   KAMEZAWA Hiroyuki   memcg: fix node_s...
2205
2206
2207
  	if (!node_state(node, N_NORMAL_MEMORY))
  		tmp = -1;
  	pn = kmalloc_node(sizeof(*pn), GFP_KERNEL, tmp);
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
2208
2209
  	if (!pn)
  		return 1;
1ecaab2bd   KAMEZAWA Hiroyuki   per-zone and recl...
2210

6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
2211
2212
  	mem->info.nodeinfo[node] = pn;
  	memset(pn, 0, sizeof(*pn));
1ecaab2bd   KAMEZAWA Hiroyuki   per-zone and recl...
2213
2214
2215
  
  	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
  		mz = &pn->zoneinfo[zone];
b69408e88   Christoph Lameter   vmscan: Use an in...
2216
2217
  		for_each_lru(l)
  			INIT_LIST_HEAD(&mz->lists[l]);
1ecaab2bd   KAMEZAWA Hiroyuki   per-zone and recl...
2218
  	}
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
2219
2220
  	return 0;
  }
1ecaab2bd   KAMEZAWA Hiroyuki   per-zone and recl...
2221
2222
2223
2224
  static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
  {
  	kfree(mem->info.nodeinfo[node]);
  }
c8dad2bb6   Jan Blunck   memcg: reduce siz...
2225
2226
2227
2228
2229
  static int mem_cgroup_size(void)
  {
  	int cpustat_size = nr_cpu_ids * sizeof(struct mem_cgroup_stat_cpu);
  	return sizeof(struct mem_cgroup) + cpustat_size;
  }
333279487   KAMEZAWA Hiroyuki   memcgroup: use vm...
2230
2231
2232
  static struct mem_cgroup *mem_cgroup_alloc(void)
  {
  	struct mem_cgroup *mem;
c8dad2bb6   Jan Blunck   memcg: reduce siz...
2233
  	int size = mem_cgroup_size();
333279487   KAMEZAWA Hiroyuki   memcgroup: use vm...
2234

c8dad2bb6   Jan Blunck   memcg: reduce siz...
2235
2236
  	if (size < PAGE_SIZE)
  		mem = kmalloc(size, GFP_KERNEL);
333279487   KAMEZAWA Hiroyuki   memcgroup: use vm...
2237
  	else
c8dad2bb6   Jan Blunck   memcg: reduce siz...
2238
  		mem = vmalloc(size);
333279487   KAMEZAWA Hiroyuki   memcgroup: use vm...
2239
2240
  
  	if (mem)
c8dad2bb6   Jan Blunck   memcg: reduce siz...
2241
  		memset(mem, 0, size);
333279487   KAMEZAWA Hiroyuki   memcgroup: use vm...
2242
2243
  	return mem;
  }
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
2244
2245
2246
2247
2248
2249
2250
2251
  /*
   * At destroying mem_cgroup, references from swap_cgroup can remain.
   * (scanning all at force_empty is too costly...)
   *
   * Instead of clearing all references at force_empty, we remember
   * the number of reference from swap_cgroup and free mem_cgroup when
   * it goes down to 0.
   *
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
2252
2253
   * Removal of cgroup itself succeeds regardless of refs from swap.
   */
a7ba0eef3   KAMEZAWA Hiroyuki   memcg: fix double...
2254
  static void __mem_cgroup_free(struct mem_cgroup *mem)
333279487   KAMEZAWA Hiroyuki   memcgroup: use vm...
2255
  {
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
2256
  	int node;
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
2257
  	free_css_id(&mem_cgroup_subsys, &mem->css);
08e552c69   KAMEZAWA Hiroyuki   memcg: synchroniz...
2258
2259
  	for_each_node_state(node, N_POSSIBLE)
  		free_mem_cgroup_per_zone_info(mem, node);
c8dad2bb6   Jan Blunck   memcg: reduce siz...
2260
  	if (mem_cgroup_size() < PAGE_SIZE)
333279487   KAMEZAWA Hiroyuki   memcgroup: use vm...
2261
2262
2263
2264
  		kfree(mem);
  	else
  		vfree(mem);
  }
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
2265
2266
2267
2268
2269
2270
2271
  static void mem_cgroup_get(struct mem_cgroup *mem)
  {
  	atomic_inc(&mem->refcnt);
  }
  
  static void mem_cgroup_put(struct mem_cgroup *mem)
  {
7bcc1bb12   Daisuke Nishimura   memcg: get/put pa...
2272
2273
  	if (atomic_dec_and_test(&mem->refcnt)) {
  		struct mem_cgroup *parent = parent_mem_cgroup(mem);
a7ba0eef3   KAMEZAWA Hiroyuki   memcg: fix double...
2274
  		__mem_cgroup_free(mem);
7bcc1bb12   Daisuke Nishimura   memcg: get/put pa...
2275
2276
2277
  		if (parent)
  			mem_cgroup_put(parent);
  	}
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
2278
  }
7bcc1bb12   Daisuke Nishimura   memcg: get/put pa...
2279
2280
2281
2282
2283
2284
2285
2286
2287
  /*
   * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
   */
  static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem)
  {
  	if (!mem->res.parent)
  		return NULL;
  	return mem_cgroup_from_res_counter(mem->res.parent, res);
  }
333279487   KAMEZAWA Hiroyuki   memcgroup: use vm...
2288

c077719be   KAMEZAWA Hiroyuki   memcg: mem+swap c...
2289
2290
2291
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
  static void __init enable_swap_cgroup(void)
  {
f8d665422   Hirokazu Takahashi   memcg: add mem_cg...
2292
  	if (!mem_cgroup_disabled() && really_do_swap_account)
c077719be   KAMEZAWA Hiroyuki   memcg: mem+swap c...
2293
2294
2295
2296
2297
2298
2299
  		do_swap_account = 1;
  }
  #else
  static void __init enable_swap_cgroup(void)
  {
  }
  #endif
0eb253e22   Li Zefan   memcg: fix sectio...
2300
  static struct cgroup_subsys_state * __ref
8cdea7c05   Balbir Singh   Memory controller...
2301
2302
  mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
  {
28dbc4b6a   Balbir Singh   memcg: memory cgr...
2303
  	struct mem_cgroup *mem, *parent;
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
2304
  	long error = -ENOMEM;
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
2305
  	int node;
8cdea7c05   Balbir Singh   Memory controller...
2306

c8dad2bb6   Jan Blunck   memcg: reduce siz...
2307
2308
  	mem = mem_cgroup_alloc();
  	if (!mem)
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
2309
  		return ERR_PTR(error);
78fb74669   Pavel Emelianov   Memory controller...
2310

6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
2311
2312
2313
  	for_each_node_state(node, N_POSSIBLE)
  		if (alloc_mem_cgroup_per_zone_info(mem, node))
  			goto free_out;
c077719be   KAMEZAWA Hiroyuki   memcg: mem+swap c...
2314
  	/* root ? */
28dbc4b6a   Balbir Singh   memcg: memory cgr...
2315
  	if (cont->parent == NULL) {
c077719be   KAMEZAWA Hiroyuki   memcg: mem+swap c...
2316
  		enable_swap_cgroup();
28dbc4b6a   Balbir Singh   memcg: memory cgr...
2317
  		parent = NULL;
18f59ea7d   Balbir Singh   memcg: memory cgr...
2318
  	} else {
28dbc4b6a   Balbir Singh   memcg: memory cgr...
2319
  		parent = mem_cgroup_from_cont(cont->parent);
18f59ea7d   Balbir Singh   memcg: memory cgr...
2320
2321
  		mem->use_hierarchy = parent->use_hierarchy;
  	}
28dbc4b6a   Balbir Singh   memcg: memory cgr...
2322

18f59ea7d   Balbir Singh   memcg: memory cgr...
2323
2324
2325
  	if (parent && parent->use_hierarchy) {
  		res_counter_init(&mem->res, &parent->res);
  		res_counter_init(&mem->memsw, &parent->memsw);
7bcc1bb12   Daisuke Nishimura   memcg: get/put pa...
2326
2327
2328
2329
2330
2331
2332
  		/*
  		 * We increment refcnt of the parent to ensure that we can
  		 * safely access it on res_counter_charge/uncharge.
  		 * This refcnt will be decremented when freeing this
  		 * mem_cgroup(see mem_cgroup_put).
  		 */
  		mem_cgroup_get(parent);
18f59ea7d   Balbir Singh   memcg: memory cgr...
2333
2334
2335
2336
  	} else {
  		res_counter_init(&mem->res, NULL);
  		res_counter_init(&mem->memsw, NULL);
  	}
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
2337
  	mem->last_scanned_child = 0;
2733c06ac   KOSAKI Motohiro   memcg: protect pr...
2338
  	spin_lock_init(&mem->reclaim_param_lock);
6d61ef409   Balbir Singh   memcg: memory cgr...
2339

a7885eb8a   KOSAKI Motohiro   memcg: swappiness
2340
2341
  	if (parent)
  		mem->swappiness = get_swappiness(parent);
a7ba0eef3   KAMEZAWA Hiroyuki   memcg: fix double...
2342
  	atomic_set(&mem->refcnt, 1);
8cdea7c05   Balbir Singh   Memory controller...
2343
  	return &mem->css;
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
2344
  free_out:
a7ba0eef3   KAMEZAWA Hiroyuki   memcg: fix double...
2345
  	__mem_cgroup_free(mem);
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
2346
  	return ERR_PTR(error);
8cdea7c05   Balbir Singh   Memory controller...
2347
  }
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
2348
  static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
df878fb04   KAMEZAWA Hiroyuki   memory cgroup enh...
2349
2350
2351
  					struct cgroup *cont)
  {
  	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
2352
2353
  
  	return mem_cgroup_force_empty(mem, false);
df878fb04   KAMEZAWA Hiroyuki   memory cgroup enh...
2354
  }
8cdea7c05   Balbir Singh   Memory controller...
2355
2356
2357
  static void mem_cgroup_destroy(struct cgroup_subsys *ss,
  				struct cgroup *cont)
  {
c268e9946   Daisuke Nishimura   memcg: fix hierar...
2358
  	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
c268e9946   Daisuke Nishimura   memcg: fix hierar...
2359

c268e9946   Daisuke Nishimura   memcg: fix hierar...
2360
  	mem_cgroup_put(mem);
8cdea7c05   Balbir Singh   Memory controller...
2361
2362
2363
2364
2365
  }
  
  static int mem_cgroup_populate(struct cgroup_subsys *ss,
  				struct cgroup *cont)
  {
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
2366
2367
2368
2369
2370
2371
2372
2373
  	int ret;
  
  	ret = cgroup_add_files(cont, ss, mem_cgroup_files,
  				ARRAY_SIZE(mem_cgroup_files));
  
  	if (!ret)
  		ret = register_memsw_files(cont, ss);
  	return ret;
8cdea7c05   Balbir Singh   Memory controller...
2374
  }
67e465a77   Balbir Singh   Memory controller...
2375
2376
2377
2378
2379
  static void mem_cgroup_move_task(struct cgroup_subsys *ss,
  				struct cgroup *cont,
  				struct cgroup *old_cont,
  				struct task_struct *p)
  {
7f4d454de   Daisuke Nishimura   memcg: avoid dead...
2380
  	mutex_lock(&memcg_tasklist);
67e465a77   Balbir Singh   Memory controller...
2381
  	/*
f9717d28d   Nikanth Karthikesan   memcg: check grou...
2382
2383
  	 * FIXME: It's better to move charges of this process from old
  	 * memcg to new memcg. But it's just on TODO-List now.
67e465a77   Balbir Singh   Memory controller...
2384
  	 */
7f4d454de   Daisuke Nishimura   memcg: avoid dead...
2385
  	mutex_unlock(&memcg_tasklist);
67e465a77   Balbir Singh   Memory controller...
2386
  }
8cdea7c05   Balbir Singh   Memory controller...
2387
2388
2389
2390
  struct cgroup_subsys mem_cgroup_subsys = {
  	.name = "memory",
  	.subsys_id = mem_cgroup_subsys_id,
  	.create = mem_cgroup_create,
df878fb04   KAMEZAWA Hiroyuki   memory cgroup enh...
2391
  	.pre_destroy = mem_cgroup_pre_destroy,
8cdea7c05   Balbir Singh   Memory controller...
2392
2393
  	.destroy = mem_cgroup_destroy,
  	.populate = mem_cgroup_populate,
67e465a77   Balbir Singh   Memory controller...
2394
  	.attach = mem_cgroup_move_task,
6d12e2d8d   KAMEZAWA Hiroyuki   per-zone and recl...
2395
  	.early_init = 0,
04046e1a0   KAMEZAWA Hiroyuki   memcg: use CSS ID
2396
  	.use_id = 1,
8cdea7c05   Balbir Singh   Memory controller...
2397
  };
c077719be   KAMEZAWA Hiroyuki   memcg: mem+swap c...
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
  
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
  
  static int __init disable_swap_account(char *s)
  {
  	really_do_swap_account = 0;
  	return 1;
  }
  __setup("noswapaccount", disable_swap_account);
  #endif