Commit 9623e078c1f4692a91531af2f639ec8aff8f0472
Committed by
Linus Torvalds
1 parent
3ee1062b4e
Exists in
master
and in
7 other branches
memcg: fix oops in mem_cgroup_shrink_usage
Got an oops in mem_cgroup_shrink_usage() when testing loop over tmpfs: yes, of course, loop0 has no mm: other entry points check but this didn't. Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 2 additions and 0 deletions Inline Diff
mm/memcontrol.c
1 | /* memcontrol.c - Memory Controller | 1 | /* memcontrol.c - Memory Controller |
2 | * | 2 | * |
3 | * Copyright IBM Corporation, 2007 | 3 | * Copyright IBM Corporation, 2007 |
4 | * Author Balbir Singh <balbir@linux.vnet.ibm.com> | 4 | * Author Balbir Singh <balbir@linux.vnet.ibm.com> |
5 | * | 5 | * |
6 | * Copyright 2007 OpenVZ SWsoft Inc | 6 | * Copyright 2007 OpenVZ SWsoft Inc |
7 | * Author: Pavel Emelianov <xemul@openvz.org> | 7 | * Author: Pavel Emelianov <xemul@openvz.org> |
8 | * | 8 | * |
9 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
10 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
11 | * the Free Software Foundation; either version 2 of the License, or | 11 | * the Free Software Foundation; either version 2 of the License, or |
12 | * (at your option) any later version. | 12 | * (at your option) any later version. |
13 | * | 13 | * |
14 | * This program is distributed in the hope that it will be useful, | 14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. | 17 | * GNU General Public License for more details. |
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include <linux/res_counter.h> | 20 | #include <linux/res_counter.h> |
21 | #include <linux/memcontrol.h> | 21 | #include <linux/memcontrol.h> |
22 | #include <linux/cgroup.h> | 22 | #include <linux/cgroup.h> |
23 | #include <linux/mm.h> | 23 | #include <linux/mm.h> |
24 | #include <linux/smp.h> | 24 | #include <linux/smp.h> |
25 | #include <linux/page-flags.h> | 25 | #include <linux/page-flags.h> |
26 | #include <linux/backing-dev.h> | 26 | #include <linux/backing-dev.h> |
27 | #include <linux/bit_spinlock.h> | 27 | #include <linux/bit_spinlock.h> |
28 | #include <linux/rcupdate.h> | 28 | #include <linux/rcupdate.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/swap.h> | 30 | #include <linux/swap.h> |
31 | #include <linux/spinlock.h> | 31 | #include <linux/spinlock.h> |
32 | #include <linux/fs.h> | 32 | #include <linux/fs.h> |
33 | #include <linux/seq_file.h> | 33 | #include <linux/seq_file.h> |
34 | #include <linux/vmalloc.h> | 34 | #include <linux/vmalloc.h> |
35 | 35 | ||
36 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
37 | 37 | ||
38 | struct cgroup_subsys mem_cgroup_subsys __read_mostly; | 38 | struct cgroup_subsys mem_cgroup_subsys __read_mostly; |
39 | static struct kmem_cache *page_cgroup_cache __read_mostly; | 39 | static struct kmem_cache *page_cgroup_cache __read_mostly; |
40 | #define MEM_CGROUP_RECLAIM_RETRIES 5 | 40 | #define MEM_CGROUP_RECLAIM_RETRIES 5 |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * Statistics for memory cgroup. | 43 | * Statistics for memory cgroup. |
44 | */ | 44 | */ |
45 | enum mem_cgroup_stat_index { | 45 | enum mem_cgroup_stat_index { |
46 | /* | 46 | /* |
47 | * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. | 47 | * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. |
48 | */ | 48 | */ |
49 | MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ | 49 | MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ |
50 | MEM_CGROUP_STAT_RSS, /* # of pages charged as rss */ | 50 | MEM_CGROUP_STAT_RSS, /* # of pages charged as rss */ |
51 | MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ | 51 | MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ |
52 | MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ | 52 | MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ |
53 | 53 | ||
54 | MEM_CGROUP_STAT_NSTATS, | 54 | MEM_CGROUP_STAT_NSTATS, |
55 | }; | 55 | }; |
56 | 56 | ||
57 | struct mem_cgroup_stat_cpu { | 57 | struct mem_cgroup_stat_cpu { |
58 | s64 count[MEM_CGROUP_STAT_NSTATS]; | 58 | s64 count[MEM_CGROUP_STAT_NSTATS]; |
59 | } ____cacheline_aligned_in_smp; | 59 | } ____cacheline_aligned_in_smp; |
60 | 60 | ||
61 | struct mem_cgroup_stat { | 61 | struct mem_cgroup_stat { |
62 | struct mem_cgroup_stat_cpu cpustat[NR_CPUS]; | 62 | struct mem_cgroup_stat_cpu cpustat[NR_CPUS]; |
63 | }; | 63 | }; |
64 | 64 | ||
65 | /* | 65 | /* |
66 | * For accounting under irq disable, no need for increment preempt count. | 66 | * For accounting under irq disable, no need for increment preempt count. |
67 | */ | 67 | */ |
68 | static void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat *stat, | 68 | static void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat *stat, |
69 | enum mem_cgroup_stat_index idx, int val) | 69 | enum mem_cgroup_stat_index idx, int val) |
70 | { | 70 | { |
71 | int cpu = smp_processor_id(); | 71 | int cpu = smp_processor_id(); |
72 | stat->cpustat[cpu].count[idx] += val; | 72 | stat->cpustat[cpu].count[idx] += val; |
73 | } | 73 | } |
74 | 74 | ||
75 | static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat, | 75 | static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat, |
76 | enum mem_cgroup_stat_index idx) | 76 | enum mem_cgroup_stat_index idx) |
77 | { | 77 | { |
78 | int cpu; | 78 | int cpu; |
79 | s64 ret = 0; | 79 | s64 ret = 0; |
80 | for_each_possible_cpu(cpu) | 80 | for_each_possible_cpu(cpu) |
81 | ret += stat->cpustat[cpu].count[idx]; | 81 | ret += stat->cpustat[cpu].count[idx]; |
82 | return ret; | 82 | return ret; |
83 | } | 83 | } |
84 | 84 | ||
85 | /* | 85 | /* |
86 | * per-zone information in memory controller. | 86 | * per-zone information in memory controller. |
87 | */ | 87 | */ |
88 | 88 | ||
89 | enum mem_cgroup_zstat_index { | 89 | enum mem_cgroup_zstat_index { |
90 | MEM_CGROUP_ZSTAT_ACTIVE, | 90 | MEM_CGROUP_ZSTAT_ACTIVE, |
91 | MEM_CGROUP_ZSTAT_INACTIVE, | 91 | MEM_CGROUP_ZSTAT_INACTIVE, |
92 | 92 | ||
93 | NR_MEM_CGROUP_ZSTAT, | 93 | NR_MEM_CGROUP_ZSTAT, |
94 | }; | 94 | }; |
95 | 95 | ||
96 | struct mem_cgroup_per_zone { | 96 | struct mem_cgroup_per_zone { |
97 | /* | 97 | /* |
98 | * spin_lock to protect the per cgroup LRU | 98 | * spin_lock to protect the per cgroup LRU |
99 | */ | 99 | */ |
100 | spinlock_t lru_lock; | 100 | spinlock_t lru_lock; |
101 | struct list_head active_list; | 101 | struct list_head active_list; |
102 | struct list_head inactive_list; | 102 | struct list_head inactive_list; |
103 | unsigned long count[NR_MEM_CGROUP_ZSTAT]; | 103 | unsigned long count[NR_MEM_CGROUP_ZSTAT]; |
104 | }; | 104 | }; |
105 | /* Macro for accessing counter */ | 105 | /* Macro for accessing counter */ |
106 | #define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) | 106 | #define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) |
107 | 107 | ||
108 | struct mem_cgroup_per_node { | 108 | struct mem_cgroup_per_node { |
109 | struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; | 109 | struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; |
110 | }; | 110 | }; |
111 | 111 | ||
112 | struct mem_cgroup_lru_info { | 112 | struct mem_cgroup_lru_info { |
113 | struct mem_cgroup_per_node *nodeinfo[MAX_NUMNODES]; | 113 | struct mem_cgroup_per_node *nodeinfo[MAX_NUMNODES]; |
114 | }; | 114 | }; |
115 | 115 | ||
116 | /* | 116 | /* |
117 | * The memory controller data structure. The memory controller controls both | 117 | * The memory controller data structure. The memory controller controls both |
118 | * page cache and RSS per cgroup. We would eventually like to provide | 118 | * page cache and RSS per cgroup. We would eventually like to provide |
119 | * statistics based on the statistics developed by Rik Van Riel for clock-pro, | 119 | * statistics based on the statistics developed by Rik Van Riel for clock-pro, |
120 | * to help the administrator determine what knobs to tune. | 120 | * to help the administrator determine what knobs to tune. |
121 | * | 121 | * |
122 | * TODO: Add a water mark for the memory controller. Reclaim will begin when | 122 | * TODO: Add a water mark for the memory controller. Reclaim will begin when |
123 | * we hit the water mark. May be even add a low water mark, such that | 123 | * we hit the water mark. May be even add a low water mark, such that |
124 | * no reclaim occurs from a cgroup at it's low water mark, this is | 124 | * no reclaim occurs from a cgroup at it's low water mark, this is |
125 | * a feature that will be implemented much later in the future. | 125 | * a feature that will be implemented much later in the future. |
126 | */ | 126 | */ |
127 | struct mem_cgroup { | 127 | struct mem_cgroup { |
128 | struct cgroup_subsys_state css; | 128 | struct cgroup_subsys_state css; |
129 | /* | 129 | /* |
130 | * the counter to account for memory usage | 130 | * the counter to account for memory usage |
131 | */ | 131 | */ |
132 | struct res_counter res; | 132 | struct res_counter res; |
133 | /* | 133 | /* |
134 | * Per cgroup active and inactive list, similar to the | 134 | * Per cgroup active and inactive list, similar to the |
135 | * per zone LRU lists. | 135 | * per zone LRU lists. |
136 | */ | 136 | */ |
137 | struct mem_cgroup_lru_info info; | 137 | struct mem_cgroup_lru_info info; |
138 | 138 | ||
139 | int prev_priority; /* for recording reclaim priority */ | 139 | int prev_priority; /* for recording reclaim priority */ |
140 | /* | 140 | /* |
141 | * statistics. | 141 | * statistics. |
142 | */ | 142 | */ |
143 | struct mem_cgroup_stat stat; | 143 | struct mem_cgroup_stat stat; |
144 | }; | 144 | }; |
145 | static struct mem_cgroup init_mem_cgroup; | 145 | static struct mem_cgroup init_mem_cgroup; |
146 | 146 | ||
147 | /* | 147 | /* |
148 | * We use the lower bit of the page->page_cgroup pointer as a bit spin | 148 | * We use the lower bit of the page->page_cgroup pointer as a bit spin |
149 | * lock. We need to ensure that page->page_cgroup is at least two | 149 | * lock. We need to ensure that page->page_cgroup is at least two |
150 | * byte aligned (based on comments from Nick Piggin). But since | 150 | * byte aligned (based on comments from Nick Piggin). But since |
151 | * bit_spin_lock doesn't actually set that lock bit in a non-debug | 151 | * bit_spin_lock doesn't actually set that lock bit in a non-debug |
152 | * uniprocessor kernel, we should avoid setting it here too. | 152 | * uniprocessor kernel, we should avoid setting it here too. |
153 | */ | 153 | */ |
154 | #define PAGE_CGROUP_LOCK_BIT 0x0 | 154 | #define PAGE_CGROUP_LOCK_BIT 0x0 |
155 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | 155 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) |
156 | #define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) | 156 | #define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) |
157 | #else | 157 | #else |
158 | #define PAGE_CGROUP_LOCK 0x0 | 158 | #define PAGE_CGROUP_LOCK 0x0 |
159 | #endif | 159 | #endif |
160 | 160 | ||
161 | /* | 161 | /* |
162 | * A page_cgroup page is associated with every page descriptor. The | 162 | * A page_cgroup page is associated with every page descriptor. The |
163 | * page_cgroup helps us identify information about the cgroup | 163 | * page_cgroup helps us identify information about the cgroup |
164 | */ | 164 | */ |
165 | struct page_cgroup { | 165 | struct page_cgroup { |
166 | struct list_head lru; /* per cgroup LRU list */ | 166 | struct list_head lru; /* per cgroup LRU list */ |
167 | struct page *page; | 167 | struct page *page; |
168 | struct mem_cgroup *mem_cgroup; | 168 | struct mem_cgroup *mem_cgroup; |
169 | int flags; | 169 | int flags; |
170 | }; | 170 | }; |
171 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ | 171 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ |
172 | #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ | 172 | #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ |
173 | 173 | ||
174 | static int page_cgroup_nid(struct page_cgroup *pc) | 174 | static int page_cgroup_nid(struct page_cgroup *pc) |
175 | { | 175 | { |
176 | return page_to_nid(pc->page); | 176 | return page_to_nid(pc->page); |
177 | } | 177 | } |
178 | 178 | ||
179 | static enum zone_type page_cgroup_zid(struct page_cgroup *pc) | 179 | static enum zone_type page_cgroup_zid(struct page_cgroup *pc) |
180 | { | 180 | { |
181 | return page_zonenum(pc->page); | 181 | return page_zonenum(pc->page); |
182 | } | 182 | } |
183 | 183 | ||
184 | enum charge_type { | 184 | enum charge_type { |
185 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, | 185 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, |
186 | MEM_CGROUP_CHARGE_TYPE_MAPPED, | 186 | MEM_CGROUP_CHARGE_TYPE_MAPPED, |
187 | MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ | 187 | MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ |
188 | }; | 188 | }; |
189 | 189 | ||
190 | /* | 190 | /* |
191 | * Always modified under lru lock. Then, not necessary to preempt_disable() | 191 | * Always modified under lru lock. Then, not necessary to preempt_disable() |
192 | */ | 192 | */ |
193 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags, | 193 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags, |
194 | bool charge) | 194 | bool charge) |
195 | { | 195 | { |
196 | int val = (charge)? 1 : -1; | 196 | int val = (charge)? 1 : -1; |
197 | struct mem_cgroup_stat *stat = &mem->stat; | 197 | struct mem_cgroup_stat *stat = &mem->stat; |
198 | 198 | ||
199 | VM_BUG_ON(!irqs_disabled()); | 199 | VM_BUG_ON(!irqs_disabled()); |
200 | if (flags & PAGE_CGROUP_FLAG_CACHE) | 200 | if (flags & PAGE_CGROUP_FLAG_CACHE) |
201 | __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val); | 201 | __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val); |
202 | else | 202 | else |
203 | __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val); | 203 | __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val); |
204 | 204 | ||
205 | if (charge) | 205 | if (charge) |
206 | __mem_cgroup_stat_add_safe(stat, | 206 | __mem_cgroup_stat_add_safe(stat, |
207 | MEM_CGROUP_STAT_PGPGIN_COUNT, 1); | 207 | MEM_CGROUP_STAT_PGPGIN_COUNT, 1); |
208 | else | 208 | else |
209 | __mem_cgroup_stat_add_safe(stat, | 209 | __mem_cgroup_stat_add_safe(stat, |
210 | MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); | 210 | MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); |
211 | } | 211 | } |
212 | 212 | ||
213 | static struct mem_cgroup_per_zone * | 213 | static struct mem_cgroup_per_zone * |
214 | mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) | 214 | mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) |
215 | { | 215 | { |
216 | return &mem->info.nodeinfo[nid]->zoneinfo[zid]; | 216 | return &mem->info.nodeinfo[nid]->zoneinfo[zid]; |
217 | } | 217 | } |
218 | 218 | ||
219 | static struct mem_cgroup_per_zone * | 219 | static struct mem_cgroup_per_zone * |
220 | page_cgroup_zoneinfo(struct page_cgroup *pc) | 220 | page_cgroup_zoneinfo(struct page_cgroup *pc) |
221 | { | 221 | { |
222 | struct mem_cgroup *mem = pc->mem_cgroup; | 222 | struct mem_cgroup *mem = pc->mem_cgroup; |
223 | int nid = page_cgroup_nid(pc); | 223 | int nid = page_cgroup_nid(pc); |
224 | int zid = page_cgroup_zid(pc); | 224 | int zid = page_cgroup_zid(pc); |
225 | 225 | ||
226 | return mem_cgroup_zoneinfo(mem, nid, zid); | 226 | return mem_cgroup_zoneinfo(mem, nid, zid); |
227 | } | 227 | } |
228 | 228 | ||
229 | static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem, | 229 | static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem, |
230 | enum mem_cgroup_zstat_index idx) | 230 | enum mem_cgroup_zstat_index idx) |
231 | { | 231 | { |
232 | int nid, zid; | 232 | int nid, zid; |
233 | struct mem_cgroup_per_zone *mz; | 233 | struct mem_cgroup_per_zone *mz; |
234 | u64 total = 0; | 234 | u64 total = 0; |
235 | 235 | ||
236 | for_each_online_node(nid) | 236 | for_each_online_node(nid) |
237 | for (zid = 0; zid < MAX_NR_ZONES; zid++) { | 237 | for (zid = 0; zid < MAX_NR_ZONES; zid++) { |
238 | mz = mem_cgroup_zoneinfo(mem, nid, zid); | 238 | mz = mem_cgroup_zoneinfo(mem, nid, zid); |
239 | total += MEM_CGROUP_ZSTAT(mz, idx); | 239 | total += MEM_CGROUP_ZSTAT(mz, idx); |
240 | } | 240 | } |
241 | return total; | 241 | return total; |
242 | } | 242 | } |
243 | 243 | ||
244 | static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) | 244 | static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) |
245 | { | 245 | { |
246 | return container_of(cgroup_subsys_state(cont, | 246 | return container_of(cgroup_subsys_state(cont, |
247 | mem_cgroup_subsys_id), struct mem_cgroup, | 247 | mem_cgroup_subsys_id), struct mem_cgroup, |
248 | css); | 248 | css); |
249 | } | 249 | } |
250 | 250 | ||
251 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) | 251 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) |
252 | { | 252 | { |
253 | return container_of(task_subsys_state(p, mem_cgroup_subsys_id), | 253 | return container_of(task_subsys_state(p, mem_cgroup_subsys_id), |
254 | struct mem_cgroup, css); | 254 | struct mem_cgroup, css); |
255 | } | 255 | } |
256 | 256 | ||
257 | static inline int page_cgroup_locked(struct page *page) | 257 | static inline int page_cgroup_locked(struct page *page) |
258 | { | 258 | { |
259 | return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | 259 | return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
260 | } | 260 | } |
261 | 261 | ||
262 | static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) | 262 | static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) |
263 | { | 263 | { |
264 | VM_BUG_ON(!page_cgroup_locked(page)); | 264 | VM_BUG_ON(!page_cgroup_locked(page)); |
265 | page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK); | 265 | page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK); |
266 | } | 266 | } |
267 | 267 | ||
268 | struct page_cgroup *page_get_page_cgroup(struct page *page) | 268 | struct page_cgroup *page_get_page_cgroup(struct page *page) |
269 | { | 269 | { |
270 | return (struct page_cgroup *) (page->page_cgroup & ~PAGE_CGROUP_LOCK); | 270 | return (struct page_cgroup *) (page->page_cgroup & ~PAGE_CGROUP_LOCK); |
271 | } | 271 | } |
272 | 272 | ||
273 | static void lock_page_cgroup(struct page *page) | 273 | static void lock_page_cgroup(struct page *page) |
274 | { | 274 | { |
275 | bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | 275 | bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
276 | } | 276 | } |
277 | 277 | ||
278 | static int try_lock_page_cgroup(struct page *page) | 278 | static int try_lock_page_cgroup(struct page *page) |
279 | { | 279 | { |
280 | return bit_spin_trylock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | 280 | return bit_spin_trylock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
281 | } | 281 | } |
282 | 282 | ||
283 | static void unlock_page_cgroup(struct page *page) | 283 | static void unlock_page_cgroup(struct page *page) |
284 | { | 284 | { |
285 | bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | 285 | bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
286 | } | 286 | } |
287 | 287 | ||
288 | static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, | 288 | static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, |
289 | struct page_cgroup *pc) | 289 | struct page_cgroup *pc) |
290 | { | 290 | { |
291 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; | 291 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; |
292 | 292 | ||
293 | if (from) | 293 | if (from) |
294 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1; | 294 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1; |
295 | else | 295 | else |
296 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; | 296 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; |
297 | 297 | ||
298 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); | 298 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); |
299 | list_del(&pc->lru); | 299 | list_del(&pc->lru); |
300 | } | 300 | } |
301 | 301 | ||
302 | static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, | 302 | static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, |
303 | struct page_cgroup *pc) | 303 | struct page_cgroup *pc) |
304 | { | 304 | { |
305 | int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; | 305 | int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; |
306 | 306 | ||
307 | if (!to) { | 307 | if (!to) { |
308 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; | 308 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; |
309 | list_add(&pc->lru, &mz->inactive_list); | 309 | list_add(&pc->lru, &mz->inactive_list); |
310 | } else { | 310 | } else { |
311 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1; | 311 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1; |
312 | list_add(&pc->lru, &mz->active_list); | 312 | list_add(&pc->lru, &mz->active_list); |
313 | } | 313 | } |
314 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true); | 314 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true); |
315 | } | 315 | } |
316 | 316 | ||
317 | static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) | 317 | static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) |
318 | { | 318 | { |
319 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; | 319 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; |
320 | struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); | 320 | struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); |
321 | 321 | ||
322 | if (from) | 322 | if (from) |
323 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1; | 323 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1; |
324 | else | 324 | else |
325 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; | 325 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; |
326 | 326 | ||
327 | if (active) { | 327 | if (active) { |
328 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1; | 328 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1; |
329 | pc->flags |= PAGE_CGROUP_FLAG_ACTIVE; | 329 | pc->flags |= PAGE_CGROUP_FLAG_ACTIVE; |
330 | list_move(&pc->lru, &mz->active_list); | 330 | list_move(&pc->lru, &mz->active_list); |
331 | } else { | 331 | } else { |
332 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; | 332 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; |
333 | pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; | 333 | pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; |
334 | list_move(&pc->lru, &mz->inactive_list); | 334 | list_move(&pc->lru, &mz->inactive_list); |
335 | } | 335 | } |
336 | } | 336 | } |
337 | 337 | ||
338 | int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) | 338 | int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) |
339 | { | 339 | { |
340 | int ret; | 340 | int ret; |
341 | 341 | ||
342 | task_lock(task); | 342 | task_lock(task); |
343 | ret = task->mm && mm_match_cgroup(task->mm, mem); | 343 | ret = task->mm && mm_match_cgroup(task->mm, mem); |
344 | task_unlock(task); | 344 | task_unlock(task); |
345 | return ret; | 345 | return ret; |
346 | } | 346 | } |
347 | 347 | ||
348 | /* | 348 | /* |
349 | * This routine assumes that the appropriate zone's lru lock is already held | 349 | * This routine assumes that the appropriate zone's lru lock is already held |
350 | */ | 350 | */ |
351 | void mem_cgroup_move_lists(struct page *page, bool active) | 351 | void mem_cgroup_move_lists(struct page *page, bool active) |
352 | { | 352 | { |
353 | struct page_cgroup *pc; | 353 | struct page_cgroup *pc; |
354 | struct mem_cgroup_per_zone *mz; | 354 | struct mem_cgroup_per_zone *mz; |
355 | unsigned long flags; | 355 | unsigned long flags; |
356 | 356 | ||
357 | if (mem_cgroup_subsys.disabled) | 357 | if (mem_cgroup_subsys.disabled) |
358 | return; | 358 | return; |
359 | 359 | ||
360 | /* | 360 | /* |
361 | * We cannot lock_page_cgroup while holding zone's lru_lock, | 361 | * We cannot lock_page_cgroup while holding zone's lru_lock, |
362 | * because other holders of lock_page_cgroup can be interrupted | 362 | * because other holders of lock_page_cgroup can be interrupted |
363 | * with an attempt to rotate_reclaimable_page. But we cannot | 363 | * with an attempt to rotate_reclaimable_page. But we cannot |
364 | * safely get to page_cgroup without it, so just try_lock it: | 364 | * safely get to page_cgroup without it, so just try_lock it: |
365 | * mem_cgroup_isolate_pages allows for page left on wrong list. | 365 | * mem_cgroup_isolate_pages allows for page left on wrong list. |
366 | */ | 366 | */ |
367 | if (!try_lock_page_cgroup(page)) | 367 | if (!try_lock_page_cgroup(page)) |
368 | return; | 368 | return; |
369 | 369 | ||
370 | pc = page_get_page_cgroup(page); | 370 | pc = page_get_page_cgroup(page); |
371 | if (pc) { | 371 | if (pc) { |
372 | mz = page_cgroup_zoneinfo(pc); | 372 | mz = page_cgroup_zoneinfo(pc); |
373 | spin_lock_irqsave(&mz->lru_lock, flags); | 373 | spin_lock_irqsave(&mz->lru_lock, flags); |
374 | __mem_cgroup_move_lists(pc, active); | 374 | __mem_cgroup_move_lists(pc, active); |
375 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 375 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
376 | } | 376 | } |
377 | unlock_page_cgroup(page); | 377 | unlock_page_cgroup(page); |
378 | } | 378 | } |
379 | 379 | ||
380 | /* | 380 | /* |
381 | * Calculate mapped_ratio under memory controller. This will be used in | 381 | * Calculate mapped_ratio under memory controller. This will be used in |
382 | * vmscan.c for deteremining we have to reclaim mapped pages. | 382 | * vmscan.c for deteremining we have to reclaim mapped pages. |
383 | */ | 383 | */ |
384 | int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem) | 384 | int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem) |
385 | { | 385 | { |
386 | long total, rss; | 386 | long total, rss; |
387 | 387 | ||
388 | /* | 388 | /* |
389 | * usage is recorded in bytes. But, here, we assume the number of | 389 | * usage is recorded in bytes. But, here, we assume the number of |
390 | * physical pages can be represented by "long" on any arch. | 390 | * physical pages can be represented by "long" on any arch. |
391 | */ | 391 | */ |
392 | total = (long) (mem->res.usage >> PAGE_SHIFT) + 1L; | 392 | total = (long) (mem->res.usage >> PAGE_SHIFT) + 1L; |
393 | rss = (long)mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); | 393 | rss = (long)mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); |
394 | return (int)((rss * 100L) / total); | 394 | return (int)((rss * 100L) / total); |
395 | } | 395 | } |
396 | 396 | ||
397 | /* | 397 | /* |
398 | * This function is called from vmscan.c. In page reclaiming loop. balance | 398 | * This function is called from vmscan.c. In page reclaiming loop. balance |
399 | * between active and inactive list is calculated. For memory controller | 399 | * between active and inactive list is calculated. For memory controller |
400 | * page reclaiming, we should use using mem_cgroup's imbalance rather than | 400 | * page reclaiming, we should use using mem_cgroup's imbalance rather than |
401 | * zone's global lru imbalance. | 401 | * zone's global lru imbalance. |
402 | */ | 402 | */ |
403 | long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem) | 403 | long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem) |
404 | { | 404 | { |
405 | unsigned long active, inactive; | 405 | unsigned long active, inactive; |
406 | /* active and inactive are the number of pages. 'long' is ok.*/ | 406 | /* active and inactive are the number of pages. 'long' is ok.*/ |
407 | active = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_ACTIVE); | 407 | active = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_ACTIVE); |
408 | inactive = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_INACTIVE); | 408 | inactive = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_INACTIVE); |
409 | return (long) (active / (inactive + 1)); | 409 | return (long) (active / (inactive + 1)); |
410 | } | 410 | } |
411 | 411 | ||
412 | /* | 412 | /* |
413 | * prev_priority control...this will be used in memory reclaim path. | 413 | * prev_priority control...this will be used in memory reclaim path. |
414 | */ | 414 | */ |
415 | int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) | 415 | int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) |
416 | { | 416 | { |
417 | return mem->prev_priority; | 417 | return mem->prev_priority; |
418 | } | 418 | } |
419 | 419 | ||
420 | void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority) | 420 | void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority) |
421 | { | 421 | { |
422 | if (priority < mem->prev_priority) | 422 | if (priority < mem->prev_priority) |
423 | mem->prev_priority = priority; | 423 | mem->prev_priority = priority; |
424 | } | 424 | } |
425 | 425 | ||
426 | void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority) | 426 | void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority) |
427 | { | 427 | { |
428 | mem->prev_priority = priority; | 428 | mem->prev_priority = priority; |
429 | } | 429 | } |
430 | 430 | ||
431 | /* | 431 | /* |
432 | * Calculate # of pages to be scanned in this priority/zone. | 432 | * Calculate # of pages to be scanned in this priority/zone. |
433 | * See also vmscan.c | 433 | * See also vmscan.c |
434 | * | 434 | * |
435 | * priority starts from "DEF_PRIORITY" and decremented in each loop. | 435 | * priority starts from "DEF_PRIORITY" and decremented in each loop. |
436 | * (see include/linux/mmzone.h) | 436 | * (see include/linux/mmzone.h) |
437 | */ | 437 | */ |
438 | 438 | ||
439 | long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem, | 439 | long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem, |
440 | struct zone *zone, int priority) | 440 | struct zone *zone, int priority) |
441 | { | 441 | { |
442 | long nr_active; | 442 | long nr_active; |
443 | int nid = zone->zone_pgdat->node_id; | 443 | int nid = zone->zone_pgdat->node_id; |
444 | int zid = zone_idx(zone); | 444 | int zid = zone_idx(zone); |
445 | struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); | 445 | struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); |
446 | 446 | ||
447 | nr_active = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE); | 447 | nr_active = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE); |
448 | return (nr_active >> priority); | 448 | return (nr_active >> priority); |
449 | } | 449 | } |
450 | 450 | ||
451 | long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, | 451 | long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, |
452 | struct zone *zone, int priority) | 452 | struct zone *zone, int priority) |
453 | { | 453 | { |
454 | long nr_inactive; | 454 | long nr_inactive; |
455 | int nid = zone->zone_pgdat->node_id; | 455 | int nid = zone->zone_pgdat->node_id; |
456 | int zid = zone_idx(zone); | 456 | int zid = zone_idx(zone); |
457 | struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); | 457 | struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); |
458 | 458 | ||
459 | nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE); | 459 | nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE); |
460 | return (nr_inactive >> priority); | 460 | return (nr_inactive >> priority); |
461 | } | 461 | } |
462 | 462 | ||
463 | unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | 463 | unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, |
464 | struct list_head *dst, | 464 | struct list_head *dst, |
465 | unsigned long *scanned, int order, | 465 | unsigned long *scanned, int order, |
466 | int mode, struct zone *z, | 466 | int mode, struct zone *z, |
467 | struct mem_cgroup *mem_cont, | 467 | struct mem_cgroup *mem_cont, |
468 | int active) | 468 | int active) |
469 | { | 469 | { |
470 | unsigned long nr_taken = 0; | 470 | unsigned long nr_taken = 0; |
471 | struct page *page; | 471 | struct page *page; |
472 | unsigned long scan; | 472 | unsigned long scan; |
473 | LIST_HEAD(pc_list); | 473 | LIST_HEAD(pc_list); |
474 | struct list_head *src; | 474 | struct list_head *src; |
475 | struct page_cgroup *pc, *tmp; | 475 | struct page_cgroup *pc, *tmp; |
476 | int nid = z->zone_pgdat->node_id; | 476 | int nid = z->zone_pgdat->node_id; |
477 | int zid = zone_idx(z); | 477 | int zid = zone_idx(z); |
478 | struct mem_cgroup_per_zone *mz; | 478 | struct mem_cgroup_per_zone *mz; |
479 | 479 | ||
480 | BUG_ON(!mem_cont); | 480 | BUG_ON(!mem_cont); |
481 | mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); | 481 | mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); |
482 | if (active) | 482 | if (active) |
483 | src = &mz->active_list; | 483 | src = &mz->active_list; |
484 | else | 484 | else |
485 | src = &mz->inactive_list; | 485 | src = &mz->inactive_list; |
486 | 486 | ||
487 | 487 | ||
488 | spin_lock(&mz->lru_lock); | 488 | spin_lock(&mz->lru_lock); |
489 | scan = 0; | 489 | scan = 0; |
490 | list_for_each_entry_safe_reverse(pc, tmp, src, lru) { | 490 | list_for_each_entry_safe_reverse(pc, tmp, src, lru) { |
491 | if (scan >= nr_to_scan) | 491 | if (scan >= nr_to_scan) |
492 | break; | 492 | break; |
493 | page = pc->page; | 493 | page = pc->page; |
494 | 494 | ||
495 | if (unlikely(!PageLRU(page))) | 495 | if (unlikely(!PageLRU(page))) |
496 | continue; | 496 | continue; |
497 | 497 | ||
498 | if (PageActive(page) && !active) { | 498 | if (PageActive(page) && !active) { |
499 | __mem_cgroup_move_lists(pc, true); | 499 | __mem_cgroup_move_lists(pc, true); |
500 | continue; | 500 | continue; |
501 | } | 501 | } |
502 | if (!PageActive(page) && active) { | 502 | if (!PageActive(page) && active) { |
503 | __mem_cgroup_move_lists(pc, false); | 503 | __mem_cgroup_move_lists(pc, false); |
504 | continue; | 504 | continue; |
505 | } | 505 | } |
506 | 506 | ||
507 | scan++; | 507 | scan++; |
508 | list_move(&pc->lru, &pc_list); | 508 | list_move(&pc->lru, &pc_list); |
509 | 509 | ||
510 | if (__isolate_lru_page(page, mode) == 0) { | 510 | if (__isolate_lru_page(page, mode) == 0) { |
511 | list_move(&page->lru, dst); | 511 | list_move(&page->lru, dst); |
512 | nr_taken++; | 512 | nr_taken++; |
513 | } | 513 | } |
514 | } | 514 | } |
515 | 515 | ||
516 | list_splice(&pc_list, src); | 516 | list_splice(&pc_list, src); |
517 | spin_unlock(&mz->lru_lock); | 517 | spin_unlock(&mz->lru_lock); |
518 | 518 | ||
519 | *scanned = scan; | 519 | *scanned = scan; |
520 | return nr_taken; | 520 | return nr_taken; |
521 | } | 521 | } |
522 | 522 | ||
523 | /* | 523 | /* |
524 | * Charge the memory controller for page usage. | 524 | * Charge the memory controller for page usage. |
525 | * Return | 525 | * Return |
526 | * 0 if the charge was successful | 526 | * 0 if the charge was successful |
527 | * < 0 if the cgroup is over its limit | 527 | * < 0 if the cgroup is over its limit |
528 | */ | 528 | */ |
529 | static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | 529 | static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, |
530 | gfp_t gfp_mask, enum charge_type ctype, | 530 | gfp_t gfp_mask, enum charge_type ctype, |
531 | struct mem_cgroup *memcg) | 531 | struct mem_cgroup *memcg) |
532 | { | 532 | { |
533 | struct mem_cgroup *mem; | 533 | struct mem_cgroup *mem; |
534 | struct page_cgroup *pc; | 534 | struct page_cgroup *pc; |
535 | unsigned long flags; | 535 | unsigned long flags; |
536 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 536 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
537 | struct mem_cgroup_per_zone *mz; | 537 | struct mem_cgroup_per_zone *mz; |
538 | 538 | ||
539 | pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask); | 539 | pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask); |
540 | if (unlikely(pc == NULL)) | 540 | if (unlikely(pc == NULL)) |
541 | goto err; | 541 | goto err; |
542 | 542 | ||
543 | /* | 543 | /* |
544 | * We always charge the cgroup the mm_struct belongs to. | 544 | * We always charge the cgroup the mm_struct belongs to. |
545 | * The mm_struct's mem_cgroup changes on task migration if the | 545 | * The mm_struct's mem_cgroup changes on task migration if the |
546 | * thread group leader migrates. It's possible that mm is not | 546 | * thread group leader migrates. It's possible that mm is not |
547 | * set, if so charge the init_mm (happens for pagecache usage). | 547 | * set, if so charge the init_mm (happens for pagecache usage). |
548 | */ | 548 | */ |
549 | if (likely(!memcg)) { | 549 | if (likely(!memcg)) { |
550 | rcu_read_lock(); | 550 | rcu_read_lock(); |
551 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | 551 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); |
552 | /* | 552 | /* |
553 | * For every charge from the cgroup, increment reference count | 553 | * For every charge from the cgroup, increment reference count |
554 | */ | 554 | */ |
555 | css_get(&mem->css); | 555 | css_get(&mem->css); |
556 | rcu_read_unlock(); | 556 | rcu_read_unlock(); |
557 | } else { | 557 | } else { |
558 | mem = memcg; | 558 | mem = memcg; |
559 | css_get(&memcg->css); | 559 | css_get(&memcg->css); |
560 | } | 560 | } |
561 | 561 | ||
562 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { | 562 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { |
563 | if (!(gfp_mask & __GFP_WAIT)) | 563 | if (!(gfp_mask & __GFP_WAIT)) |
564 | goto out; | 564 | goto out; |
565 | 565 | ||
566 | if (try_to_free_mem_cgroup_pages(mem, gfp_mask)) | 566 | if (try_to_free_mem_cgroup_pages(mem, gfp_mask)) |
567 | continue; | 567 | continue; |
568 | 568 | ||
569 | /* | 569 | /* |
570 | * try_to_free_mem_cgroup_pages() might not give us a full | 570 | * try_to_free_mem_cgroup_pages() might not give us a full |
571 | * picture of reclaim. Some pages are reclaimed and might be | 571 | * picture of reclaim. Some pages are reclaimed and might be |
572 | * moved to swap cache or just unmapped from the cgroup. | 572 | * moved to swap cache or just unmapped from the cgroup. |
573 | * Check the limit again to see if the reclaim reduced the | 573 | * Check the limit again to see if the reclaim reduced the |
574 | * current usage of the cgroup before giving up | 574 | * current usage of the cgroup before giving up |
575 | */ | 575 | */ |
576 | if (res_counter_check_under_limit(&mem->res)) | 576 | if (res_counter_check_under_limit(&mem->res)) |
577 | continue; | 577 | continue; |
578 | 578 | ||
579 | if (!nr_retries--) { | 579 | if (!nr_retries--) { |
580 | mem_cgroup_out_of_memory(mem, gfp_mask); | 580 | mem_cgroup_out_of_memory(mem, gfp_mask); |
581 | goto out; | 581 | goto out; |
582 | } | 582 | } |
583 | } | 583 | } |
584 | 584 | ||
585 | pc->mem_cgroup = mem; | 585 | pc->mem_cgroup = mem; |
586 | pc->page = page; | 586 | pc->page = page; |
587 | /* | 587 | /* |
588 | * If a page is accounted as a page cache, insert to inactive list. | 588 | * If a page is accounted as a page cache, insert to inactive list. |
589 | * If anon, insert to active list. | 589 | * If anon, insert to active list. |
590 | */ | 590 | */ |
591 | if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) | 591 | if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) |
592 | pc->flags = PAGE_CGROUP_FLAG_CACHE; | 592 | pc->flags = PAGE_CGROUP_FLAG_CACHE; |
593 | else | 593 | else |
594 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; | 594 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; |
595 | 595 | ||
596 | lock_page_cgroup(page); | 596 | lock_page_cgroup(page); |
597 | if (unlikely(page_get_page_cgroup(page))) { | 597 | if (unlikely(page_get_page_cgroup(page))) { |
598 | unlock_page_cgroup(page); | 598 | unlock_page_cgroup(page); |
599 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 599 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
600 | css_put(&mem->css); | 600 | css_put(&mem->css); |
601 | kmem_cache_free(page_cgroup_cache, pc); | 601 | kmem_cache_free(page_cgroup_cache, pc); |
602 | goto done; | 602 | goto done; |
603 | } | 603 | } |
604 | page_assign_page_cgroup(page, pc); | 604 | page_assign_page_cgroup(page, pc); |
605 | 605 | ||
606 | mz = page_cgroup_zoneinfo(pc); | 606 | mz = page_cgroup_zoneinfo(pc); |
607 | spin_lock_irqsave(&mz->lru_lock, flags); | 607 | spin_lock_irqsave(&mz->lru_lock, flags); |
608 | __mem_cgroup_add_list(mz, pc); | 608 | __mem_cgroup_add_list(mz, pc); |
609 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 609 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
610 | 610 | ||
611 | unlock_page_cgroup(page); | 611 | unlock_page_cgroup(page); |
612 | done: | 612 | done: |
613 | return 0; | 613 | return 0; |
614 | out: | 614 | out: |
615 | css_put(&mem->css); | 615 | css_put(&mem->css); |
616 | kmem_cache_free(page_cgroup_cache, pc); | 616 | kmem_cache_free(page_cgroup_cache, pc); |
617 | err: | 617 | err: |
618 | return -ENOMEM; | 618 | return -ENOMEM; |
619 | } | 619 | } |
620 | 620 | ||
621 | int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) | 621 | int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) |
622 | { | 622 | { |
623 | if (mem_cgroup_subsys.disabled) | 623 | if (mem_cgroup_subsys.disabled) |
624 | return 0; | 624 | return 0; |
625 | 625 | ||
626 | /* | 626 | /* |
627 | * If already mapped, we don't have to account. | 627 | * If already mapped, we don't have to account. |
628 | * If page cache, page->mapping has address_space. | 628 | * If page cache, page->mapping has address_space. |
629 | * But page->mapping may have out-of-use anon_vma pointer, | 629 | * But page->mapping may have out-of-use anon_vma pointer, |
630 | * detecit it by PageAnon() check. newly-mapped-anon's page->mapping | 630 | * detecit it by PageAnon() check. newly-mapped-anon's page->mapping |
631 | * is NULL. | 631 | * is NULL. |
632 | */ | 632 | */ |
633 | if (page_mapped(page) || (page->mapping && !PageAnon(page))) | 633 | if (page_mapped(page) || (page->mapping && !PageAnon(page))) |
634 | return 0; | 634 | return 0; |
635 | if (unlikely(!mm)) | 635 | if (unlikely(!mm)) |
636 | mm = &init_mm; | 636 | mm = &init_mm; |
637 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 637 | return mem_cgroup_charge_common(page, mm, gfp_mask, |
638 | MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); | 638 | MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); |
639 | } | 639 | } |
640 | 640 | ||
641 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | 641 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
642 | gfp_t gfp_mask) | 642 | gfp_t gfp_mask) |
643 | { | 643 | { |
644 | if (mem_cgroup_subsys.disabled) | 644 | if (mem_cgroup_subsys.disabled) |
645 | return 0; | 645 | return 0; |
646 | 646 | ||
647 | /* | 647 | /* |
648 | * Corner case handling. This is called from add_to_page_cache() | 648 | * Corner case handling. This is called from add_to_page_cache() |
649 | * in usual. But some FS (shmem) precharges this page before calling it | 649 | * in usual. But some FS (shmem) precharges this page before calling it |
650 | * and call add_to_page_cache() with GFP_NOWAIT. | 650 | * and call add_to_page_cache() with GFP_NOWAIT. |
651 | * | 651 | * |
652 | * For GFP_NOWAIT case, the page may be pre-charged before calling | 652 | * For GFP_NOWAIT case, the page may be pre-charged before calling |
653 | * add_to_page_cache(). (See shmem.c) check it here and avoid to call | 653 | * add_to_page_cache(). (See shmem.c) check it here and avoid to call |
654 | * charge twice. (It works but has to pay a bit larger cost.) | 654 | * charge twice. (It works but has to pay a bit larger cost.) |
655 | */ | 655 | */ |
656 | if (!(gfp_mask & __GFP_WAIT)) { | 656 | if (!(gfp_mask & __GFP_WAIT)) { |
657 | struct page_cgroup *pc; | 657 | struct page_cgroup *pc; |
658 | 658 | ||
659 | lock_page_cgroup(page); | 659 | lock_page_cgroup(page); |
660 | pc = page_get_page_cgroup(page); | 660 | pc = page_get_page_cgroup(page); |
661 | if (pc) { | 661 | if (pc) { |
662 | VM_BUG_ON(pc->page != page); | 662 | VM_BUG_ON(pc->page != page); |
663 | VM_BUG_ON(!pc->mem_cgroup); | 663 | VM_BUG_ON(!pc->mem_cgroup); |
664 | unlock_page_cgroup(page); | 664 | unlock_page_cgroup(page); |
665 | return 0; | 665 | return 0; |
666 | } | 666 | } |
667 | unlock_page_cgroup(page); | 667 | unlock_page_cgroup(page); |
668 | } | 668 | } |
669 | 669 | ||
670 | if (unlikely(!mm)) | 670 | if (unlikely(!mm)) |
671 | mm = &init_mm; | 671 | mm = &init_mm; |
672 | 672 | ||
673 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 673 | return mem_cgroup_charge_common(page, mm, gfp_mask, |
674 | MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); | 674 | MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); |
675 | } | 675 | } |
676 | 676 | ||
677 | /* | 677 | /* |
678 | * uncharge if !page_mapped(page) | 678 | * uncharge if !page_mapped(page) |
679 | */ | 679 | */ |
680 | static void | 680 | static void |
681 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | 681 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) |
682 | { | 682 | { |
683 | struct page_cgroup *pc; | 683 | struct page_cgroup *pc; |
684 | struct mem_cgroup *mem; | 684 | struct mem_cgroup *mem; |
685 | struct mem_cgroup_per_zone *mz; | 685 | struct mem_cgroup_per_zone *mz; |
686 | unsigned long flags; | 686 | unsigned long flags; |
687 | 687 | ||
688 | if (mem_cgroup_subsys.disabled) | 688 | if (mem_cgroup_subsys.disabled) |
689 | return; | 689 | return; |
690 | 690 | ||
691 | /* | 691 | /* |
692 | * Check if our page_cgroup is valid | 692 | * Check if our page_cgroup is valid |
693 | */ | 693 | */ |
694 | lock_page_cgroup(page); | 694 | lock_page_cgroup(page); |
695 | pc = page_get_page_cgroup(page); | 695 | pc = page_get_page_cgroup(page); |
696 | if (unlikely(!pc)) | 696 | if (unlikely(!pc)) |
697 | goto unlock; | 697 | goto unlock; |
698 | 698 | ||
699 | VM_BUG_ON(pc->page != page); | 699 | VM_BUG_ON(pc->page != page); |
700 | 700 | ||
701 | if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) | 701 | if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) |
702 | && ((pc->flags & PAGE_CGROUP_FLAG_CACHE) | 702 | && ((pc->flags & PAGE_CGROUP_FLAG_CACHE) |
703 | || page_mapped(page))) | 703 | || page_mapped(page))) |
704 | goto unlock; | 704 | goto unlock; |
705 | 705 | ||
706 | mz = page_cgroup_zoneinfo(pc); | 706 | mz = page_cgroup_zoneinfo(pc); |
707 | spin_lock_irqsave(&mz->lru_lock, flags); | 707 | spin_lock_irqsave(&mz->lru_lock, flags); |
708 | __mem_cgroup_remove_list(mz, pc); | 708 | __mem_cgroup_remove_list(mz, pc); |
709 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 709 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
710 | 710 | ||
711 | page_assign_page_cgroup(page, NULL); | 711 | page_assign_page_cgroup(page, NULL); |
712 | unlock_page_cgroup(page); | 712 | unlock_page_cgroup(page); |
713 | 713 | ||
714 | mem = pc->mem_cgroup; | 714 | mem = pc->mem_cgroup; |
715 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 715 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
716 | css_put(&mem->css); | 716 | css_put(&mem->css); |
717 | 717 | ||
718 | kmem_cache_free(page_cgroup_cache, pc); | 718 | kmem_cache_free(page_cgroup_cache, pc); |
719 | return; | 719 | return; |
720 | unlock: | 720 | unlock: |
721 | unlock_page_cgroup(page); | 721 | unlock_page_cgroup(page); |
722 | } | 722 | } |
723 | 723 | ||
724 | void mem_cgroup_uncharge_page(struct page *page) | 724 | void mem_cgroup_uncharge_page(struct page *page) |
725 | { | 725 | { |
726 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); | 726 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); |
727 | } | 727 | } |
728 | 728 | ||
729 | void mem_cgroup_uncharge_cache_page(struct page *page) | 729 | void mem_cgroup_uncharge_cache_page(struct page *page) |
730 | { | 730 | { |
731 | VM_BUG_ON(page_mapped(page)); | 731 | VM_BUG_ON(page_mapped(page)); |
732 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); | 732 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); |
733 | } | 733 | } |
734 | 734 | ||
735 | /* | 735 | /* |
736 | * Before starting migration, account against new page. | 736 | * Before starting migration, account against new page. |
737 | */ | 737 | */ |
738 | int mem_cgroup_prepare_migration(struct page *page, struct page *newpage) | 738 | int mem_cgroup_prepare_migration(struct page *page, struct page *newpage) |
739 | { | 739 | { |
740 | struct page_cgroup *pc; | 740 | struct page_cgroup *pc; |
741 | struct mem_cgroup *mem = NULL; | 741 | struct mem_cgroup *mem = NULL; |
742 | enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; | 742 | enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; |
743 | int ret = 0; | 743 | int ret = 0; |
744 | 744 | ||
745 | if (mem_cgroup_subsys.disabled) | 745 | if (mem_cgroup_subsys.disabled) |
746 | return 0; | 746 | return 0; |
747 | 747 | ||
748 | lock_page_cgroup(page); | 748 | lock_page_cgroup(page); |
749 | pc = page_get_page_cgroup(page); | 749 | pc = page_get_page_cgroup(page); |
750 | if (pc) { | 750 | if (pc) { |
751 | mem = pc->mem_cgroup; | 751 | mem = pc->mem_cgroup; |
752 | css_get(&mem->css); | 752 | css_get(&mem->css); |
753 | if (pc->flags & PAGE_CGROUP_FLAG_CACHE) | 753 | if (pc->flags & PAGE_CGROUP_FLAG_CACHE) |
754 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | 754 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; |
755 | } | 755 | } |
756 | unlock_page_cgroup(page); | 756 | unlock_page_cgroup(page); |
757 | if (mem) { | 757 | if (mem) { |
758 | ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL, | 758 | ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL, |
759 | ctype, mem); | 759 | ctype, mem); |
760 | css_put(&mem->css); | 760 | css_put(&mem->css); |
761 | } | 761 | } |
762 | return ret; | 762 | return ret; |
763 | } | 763 | } |
764 | 764 | ||
765 | /* remove redundant charge if migration failed*/ | 765 | /* remove redundant charge if migration failed*/ |
766 | void mem_cgroup_end_migration(struct page *newpage) | 766 | void mem_cgroup_end_migration(struct page *newpage) |
767 | { | 767 | { |
768 | /* | 768 | /* |
769 | * At success, page->mapping is not NULL. | 769 | * At success, page->mapping is not NULL. |
770 | * special rollback care is necessary when | 770 | * special rollback care is necessary when |
771 | * 1. at migration failure. (newpage->mapping is cleared in this case) | 771 | * 1. at migration failure. (newpage->mapping is cleared in this case) |
772 | * 2. the newpage was moved but not remapped again because the task | 772 | * 2. the newpage was moved but not remapped again because the task |
773 | * exits and the newpage is obsolete. In this case, the new page | 773 | * exits and the newpage is obsolete. In this case, the new page |
774 | * may be a swapcache. So, we just call mem_cgroup_uncharge_page() | 774 | * may be a swapcache. So, we just call mem_cgroup_uncharge_page() |
775 | * always for avoiding mess. The page_cgroup will be removed if | 775 | * always for avoiding mess. The page_cgroup will be removed if |
776 | * unnecessary. File cache pages is still on radix-tree. Don't | 776 | * unnecessary. File cache pages is still on radix-tree. Don't |
777 | * care it. | 777 | * care it. |
778 | */ | 778 | */ |
779 | if (!newpage->mapping) | 779 | if (!newpage->mapping) |
780 | __mem_cgroup_uncharge_common(newpage, | 780 | __mem_cgroup_uncharge_common(newpage, |
781 | MEM_CGROUP_CHARGE_TYPE_FORCE); | 781 | MEM_CGROUP_CHARGE_TYPE_FORCE); |
782 | else if (PageAnon(newpage)) | 782 | else if (PageAnon(newpage)) |
783 | mem_cgroup_uncharge_page(newpage); | 783 | mem_cgroup_uncharge_page(newpage); |
784 | } | 784 | } |
785 | 785 | ||
786 | /* | 786 | /* |
787 | * A call to try to shrink memory usage under specified resource controller. | 787 | * A call to try to shrink memory usage under specified resource controller. |
788 | * This is typically used for page reclaiming for shmem for reducing side | 788 | * This is typically used for page reclaiming for shmem for reducing side |
789 | * effect of page allocation from shmem, which is used by some mem_cgroup. | 789 | * effect of page allocation from shmem, which is used by some mem_cgroup. |
790 | */ | 790 | */ |
791 | int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) | 791 | int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) |
792 | { | 792 | { |
793 | struct mem_cgroup *mem; | 793 | struct mem_cgroup *mem; |
794 | int progress = 0; | 794 | int progress = 0; |
795 | int retry = MEM_CGROUP_RECLAIM_RETRIES; | 795 | int retry = MEM_CGROUP_RECLAIM_RETRIES; |
796 | 796 | ||
797 | if (mem_cgroup_subsys.disabled) | 797 | if (mem_cgroup_subsys.disabled) |
798 | return 0; | 798 | return 0; |
799 | if (!mm) | ||
800 | return 0; | ||
799 | 801 | ||
800 | rcu_read_lock(); | 802 | rcu_read_lock(); |
801 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | 803 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); |
802 | css_get(&mem->css); | 804 | css_get(&mem->css); |
803 | rcu_read_unlock(); | 805 | rcu_read_unlock(); |
804 | 806 | ||
805 | do { | 807 | do { |
806 | progress = try_to_free_mem_cgroup_pages(mem, gfp_mask); | 808 | progress = try_to_free_mem_cgroup_pages(mem, gfp_mask); |
807 | } while (!progress && --retry); | 809 | } while (!progress && --retry); |
808 | 810 | ||
809 | css_put(&mem->css); | 811 | css_put(&mem->css); |
810 | if (!retry) | 812 | if (!retry) |
811 | return -ENOMEM; | 813 | return -ENOMEM; |
812 | return 0; | 814 | return 0; |
813 | } | 815 | } |
814 | 816 | ||
815 | int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val) | 817 | int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val) |
816 | { | 818 | { |
817 | 819 | ||
818 | int retry_count = MEM_CGROUP_RECLAIM_RETRIES; | 820 | int retry_count = MEM_CGROUP_RECLAIM_RETRIES; |
819 | int progress; | 821 | int progress; |
820 | int ret = 0; | 822 | int ret = 0; |
821 | 823 | ||
822 | while (res_counter_set_limit(&memcg->res, val)) { | 824 | while (res_counter_set_limit(&memcg->res, val)) { |
823 | if (signal_pending(current)) { | 825 | if (signal_pending(current)) { |
824 | ret = -EINTR; | 826 | ret = -EINTR; |
825 | break; | 827 | break; |
826 | } | 828 | } |
827 | if (!retry_count) { | 829 | if (!retry_count) { |
828 | ret = -EBUSY; | 830 | ret = -EBUSY; |
829 | break; | 831 | break; |
830 | } | 832 | } |
831 | progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL); | 833 | progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL); |
832 | if (!progress) | 834 | if (!progress) |
833 | retry_count--; | 835 | retry_count--; |
834 | } | 836 | } |
835 | return ret; | 837 | return ret; |
836 | } | 838 | } |
837 | 839 | ||
838 | 840 | ||
839 | /* | 841 | /* |
840 | * This routine traverse page_cgroup in given list and drop them all. | 842 | * This routine traverse page_cgroup in given list and drop them all. |
841 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. | 843 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. |
842 | */ | 844 | */ |
843 | #define FORCE_UNCHARGE_BATCH (128) | 845 | #define FORCE_UNCHARGE_BATCH (128) |
844 | static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, | 846 | static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, |
845 | struct mem_cgroup_per_zone *mz, | 847 | struct mem_cgroup_per_zone *mz, |
846 | int active) | 848 | int active) |
847 | { | 849 | { |
848 | struct page_cgroup *pc; | 850 | struct page_cgroup *pc; |
849 | struct page *page; | 851 | struct page *page; |
850 | int count = FORCE_UNCHARGE_BATCH; | 852 | int count = FORCE_UNCHARGE_BATCH; |
851 | unsigned long flags; | 853 | unsigned long flags; |
852 | struct list_head *list; | 854 | struct list_head *list; |
853 | 855 | ||
854 | if (active) | 856 | if (active) |
855 | list = &mz->active_list; | 857 | list = &mz->active_list; |
856 | else | 858 | else |
857 | list = &mz->inactive_list; | 859 | list = &mz->inactive_list; |
858 | 860 | ||
859 | spin_lock_irqsave(&mz->lru_lock, flags); | 861 | spin_lock_irqsave(&mz->lru_lock, flags); |
860 | while (!list_empty(list)) { | 862 | while (!list_empty(list)) { |
861 | pc = list_entry(list->prev, struct page_cgroup, lru); | 863 | pc = list_entry(list->prev, struct page_cgroup, lru); |
862 | page = pc->page; | 864 | page = pc->page; |
863 | get_page(page); | 865 | get_page(page); |
864 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 866 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
865 | /* | 867 | /* |
866 | * Check if this page is on LRU. !LRU page can be found | 868 | * Check if this page is on LRU. !LRU page can be found |
867 | * if it's under page migration. | 869 | * if it's under page migration. |
868 | */ | 870 | */ |
869 | if (PageLRU(page)) { | 871 | if (PageLRU(page)) { |
870 | __mem_cgroup_uncharge_common(page, | 872 | __mem_cgroup_uncharge_common(page, |
871 | MEM_CGROUP_CHARGE_TYPE_FORCE); | 873 | MEM_CGROUP_CHARGE_TYPE_FORCE); |
872 | put_page(page); | 874 | put_page(page); |
873 | if (--count <= 0) { | 875 | if (--count <= 0) { |
874 | count = FORCE_UNCHARGE_BATCH; | 876 | count = FORCE_UNCHARGE_BATCH; |
875 | cond_resched(); | 877 | cond_resched(); |
876 | } | 878 | } |
877 | } else | 879 | } else |
878 | cond_resched(); | 880 | cond_resched(); |
879 | spin_lock_irqsave(&mz->lru_lock, flags); | 881 | spin_lock_irqsave(&mz->lru_lock, flags); |
880 | } | 882 | } |
881 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 883 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
882 | } | 884 | } |
883 | 885 | ||
884 | /* | 886 | /* |
885 | * make mem_cgroup's charge to be 0 if there is no task. | 887 | * make mem_cgroup's charge to be 0 if there is no task. |
886 | * This enables deleting this mem_cgroup. | 888 | * This enables deleting this mem_cgroup. |
887 | */ | 889 | */ |
888 | static int mem_cgroup_force_empty(struct mem_cgroup *mem) | 890 | static int mem_cgroup_force_empty(struct mem_cgroup *mem) |
889 | { | 891 | { |
890 | int ret = -EBUSY; | 892 | int ret = -EBUSY; |
891 | int node, zid; | 893 | int node, zid; |
892 | 894 | ||
893 | css_get(&mem->css); | 895 | css_get(&mem->css); |
894 | /* | 896 | /* |
895 | * page reclaim code (kswapd etc..) will move pages between | 897 | * page reclaim code (kswapd etc..) will move pages between |
896 | * active_list <-> inactive_list while we don't take a lock. | 898 | * active_list <-> inactive_list while we don't take a lock. |
897 | * So, we have to do loop here until all lists are empty. | 899 | * So, we have to do loop here until all lists are empty. |
898 | */ | 900 | */ |
899 | while (mem->res.usage > 0) { | 901 | while (mem->res.usage > 0) { |
900 | if (atomic_read(&mem->css.cgroup->count) > 0) | 902 | if (atomic_read(&mem->css.cgroup->count) > 0) |
901 | goto out; | 903 | goto out; |
902 | for_each_node_state(node, N_POSSIBLE) | 904 | for_each_node_state(node, N_POSSIBLE) |
903 | for (zid = 0; zid < MAX_NR_ZONES; zid++) { | 905 | for (zid = 0; zid < MAX_NR_ZONES; zid++) { |
904 | struct mem_cgroup_per_zone *mz; | 906 | struct mem_cgroup_per_zone *mz; |
905 | mz = mem_cgroup_zoneinfo(mem, node, zid); | 907 | mz = mem_cgroup_zoneinfo(mem, node, zid); |
906 | /* drop all page_cgroup in active_list */ | 908 | /* drop all page_cgroup in active_list */ |
907 | mem_cgroup_force_empty_list(mem, mz, 1); | 909 | mem_cgroup_force_empty_list(mem, mz, 1); |
908 | /* drop all page_cgroup in inactive_list */ | 910 | /* drop all page_cgroup in inactive_list */ |
909 | mem_cgroup_force_empty_list(mem, mz, 0); | 911 | mem_cgroup_force_empty_list(mem, mz, 0); |
910 | } | 912 | } |
911 | } | 913 | } |
912 | ret = 0; | 914 | ret = 0; |
913 | out: | 915 | out: |
914 | css_put(&mem->css); | 916 | css_put(&mem->css); |
915 | return ret; | 917 | return ret; |
916 | } | 918 | } |
917 | 919 | ||
918 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) | 920 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) |
919 | { | 921 | { |
920 | return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, | 922 | return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, |
921 | cft->private); | 923 | cft->private); |
922 | } | 924 | } |
923 | /* | 925 | /* |
924 | * The user of this function is... | 926 | * The user of this function is... |
925 | * RES_LIMIT. | 927 | * RES_LIMIT. |
926 | */ | 928 | */ |
927 | static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | 929 | static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, |
928 | const char *buffer) | 930 | const char *buffer) |
929 | { | 931 | { |
930 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 932 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
931 | unsigned long long val; | 933 | unsigned long long val; |
932 | int ret; | 934 | int ret; |
933 | 935 | ||
934 | switch (cft->private) { | 936 | switch (cft->private) { |
935 | case RES_LIMIT: | 937 | case RES_LIMIT: |
936 | /* This function does all necessary parse...reuse it */ | 938 | /* This function does all necessary parse...reuse it */ |
937 | ret = res_counter_memparse_write_strategy(buffer, &val); | 939 | ret = res_counter_memparse_write_strategy(buffer, &val); |
938 | if (!ret) | 940 | if (!ret) |
939 | ret = mem_cgroup_resize_limit(memcg, val); | 941 | ret = mem_cgroup_resize_limit(memcg, val); |
940 | break; | 942 | break; |
941 | default: | 943 | default: |
942 | ret = -EINVAL; /* should be BUG() ? */ | 944 | ret = -EINVAL; /* should be BUG() ? */ |
943 | break; | 945 | break; |
944 | } | 946 | } |
945 | return ret; | 947 | return ret; |
946 | } | 948 | } |
947 | 949 | ||
948 | static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) | 950 | static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) |
949 | { | 951 | { |
950 | struct mem_cgroup *mem; | 952 | struct mem_cgroup *mem; |
951 | 953 | ||
952 | mem = mem_cgroup_from_cont(cont); | 954 | mem = mem_cgroup_from_cont(cont); |
953 | switch (event) { | 955 | switch (event) { |
954 | case RES_MAX_USAGE: | 956 | case RES_MAX_USAGE: |
955 | res_counter_reset_max(&mem->res); | 957 | res_counter_reset_max(&mem->res); |
956 | break; | 958 | break; |
957 | case RES_FAILCNT: | 959 | case RES_FAILCNT: |
958 | res_counter_reset_failcnt(&mem->res); | 960 | res_counter_reset_failcnt(&mem->res); |
959 | break; | 961 | break; |
960 | } | 962 | } |
961 | return 0; | 963 | return 0; |
962 | } | 964 | } |
963 | 965 | ||
964 | static int mem_force_empty_write(struct cgroup *cont, unsigned int event) | 966 | static int mem_force_empty_write(struct cgroup *cont, unsigned int event) |
965 | { | 967 | { |
966 | return mem_cgroup_force_empty(mem_cgroup_from_cont(cont)); | 968 | return mem_cgroup_force_empty(mem_cgroup_from_cont(cont)); |
967 | } | 969 | } |
968 | 970 | ||
969 | static const struct mem_cgroup_stat_desc { | 971 | static const struct mem_cgroup_stat_desc { |
970 | const char *msg; | 972 | const char *msg; |
971 | u64 unit; | 973 | u64 unit; |
972 | } mem_cgroup_stat_desc[] = { | 974 | } mem_cgroup_stat_desc[] = { |
973 | [MEM_CGROUP_STAT_CACHE] = { "cache", PAGE_SIZE, }, | 975 | [MEM_CGROUP_STAT_CACHE] = { "cache", PAGE_SIZE, }, |
974 | [MEM_CGROUP_STAT_RSS] = { "rss", PAGE_SIZE, }, | 976 | [MEM_CGROUP_STAT_RSS] = { "rss", PAGE_SIZE, }, |
975 | [MEM_CGROUP_STAT_PGPGIN_COUNT] = {"pgpgin", 1, }, | 977 | [MEM_CGROUP_STAT_PGPGIN_COUNT] = {"pgpgin", 1, }, |
976 | [MEM_CGROUP_STAT_PGPGOUT_COUNT] = {"pgpgout", 1, }, | 978 | [MEM_CGROUP_STAT_PGPGOUT_COUNT] = {"pgpgout", 1, }, |
977 | }; | 979 | }; |
978 | 980 | ||
979 | static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, | 981 | static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, |
980 | struct cgroup_map_cb *cb) | 982 | struct cgroup_map_cb *cb) |
981 | { | 983 | { |
982 | struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont); | 984 | struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont); |
983 | struct mem_cgroup_stat *stat = &mem_cont->stat; | 985 | struct mem_cgroup_stat *stat = &mem_cont->stat; |
984 | int i; | 986 | int i; |
985 | 987 | ||
986 | for (i = 0; i < ARRAY_SIZE(stat->cpustat[0].count); i++) { | 988 | for (i = 0; i < ARRAY_SIZE(stat->cpustat[0].count); i++) { |
987 | s64 val; | 989 | s64 val; |
988 | 990 | ||
989 | val = mem_cgroup_read_stat(stat, i); | 991 | val = mem_cgroup_read_stat(stat, i); |
990 | val *= mem_cgroup_stat_desc[i].unit; | 992 | val *= mem_cgroup_stat_desc[i].unit; |
991 | cb->fill(cb, mem_cgroup_stat_desc[i].msg, val); | 993 | cb->fill(cb, mem_cgroup_stat_desc[i].msg, val); |
992 | } | 994 | } |
993 | /* showing # of active pages */ | 995 | /* showing # of active pages */ |
994 | { | 996 | { |
995 | unsigned long active, inactive; | 997 | unsigned long active, inactive; |
996 | 998 | ||
997 | inactive = mem_cgroup_get_all_zonestat(mem_cont, | 999 | inactive = mem_cgroup_get_all_zonestat(mem_cont, |
998 | MEM_CGROUP_ZSTAT_INACTIVE); | 1000 | MEM_CGROUP_ZSTAT_INACTIVE); |
999 | active = mem_cgroup_get_all_zonestat(mem_cont, | 1001 | active = mem_cgroup_get_all_zonestat(mem_cont, |
1000 | MEM_CGROUP_ZSTAT_ACTIVE); | 1002 | MEM_CGROUP_ZSTAT_ACTIVE); |
1001 | cb->fill(cb, "active", (active) * PAGE_SIZE); | 1003 | cb->fill(cb, "active", (active) * PAGE_SIZE); |
1002 | cb->fill(cb, "inactive", (inactive) * PAGE_SIZE); | 1004 | cb->fill(cb, "inactive", (inactive) * PAGE_SIZE); |
1003 | } | 1005 | } |
1004 | return 0; | 1006 | return 0; |
1005 | } | 1007 | } |
1006 | 1008 | ||
1007 | static struct cftype mem_cgroup_files[] = { | 1009 | static struct cftype mem_cgroup_files[] = { |
1008 | { | 1010 | { |
1009 | .name = "usage_in_bytes", | 1011 | .name = "usage_in_bytes", |
1010 | .private = RES_USAGE, | 1012 | .private = RES_USAGE, |
1011 | .read_u64 = mem_cgroup_read, | 1013 | .read_u64 = mem_cgroup_read, |
1012 | }, | 1014 | }, |
1013 | { | 1015 | { |
1014 | .name = "max_usage_in_bytes", | 1016 | .name = "max_usage_in_bytes", |
1015 | .private = RES_MAX_USAGE, | 1017 | .private = RES_MAX_USAGE, |
1016 | .trigger = mem_cgroup_reset, | 1018 | .trigger = mem_cgroup_reset, |
1017 | .read_u64 = mem_cgroup_read, | 1019 | .read_u64 = mem_cgroup_read, |
1018 | }, | 1020 | }, |
1019 | { | 1021 | { |
1020 | .name = "limit_in_bytes", | 1022 | .name = "limit_in_bytes", |
1021 | .private = RES_LIMIT, | 1023 | .private = RES_LIMIT, |
1022 | .write_string = mem_cgroup_write, | 1024 | .write_string = mem_cgroup_write, |
1023 | .read_u64 = mem_cgroup_read, | 1025 | .read_u64 = mem_cgroup_read, |
1024 | }, | 1026 | }, |
1025 | { | 1027 | { |
1026 | .name = "failcnt", | 1028 | .name = "failcnt", |
1027 | .private = RES_FAILCNT, | 1029 | .private = RES_FAILCNT, |
1028 | .trigger = mem_cgroup_reset, | 1030 | .trigger = mem_cgroup_reset, |
1029 | .read_u64 = mem_cgroup_read, | 1031 | .read_u64 = mem_cgroup_read, |
1030 | }, | 1032 | }, |
1031 | { | 1033 | { |
1032 | .name = "force_empty", | 1034 | .name = "force_empty", |
1033 | .trigger = mem_force_empty_write, | 1035 | .trigger = mem_force_empty_write, |
1034 | }, | 1036 | }, |
1035 | { | 1037 | { |
1036 | .name = "stat", | 1038 | .name = "stat", |
1037 | .read_map = mem_control_stat_show, | 1039 | .read_map = mem_control_stat_show, |
1038 | }, | 1040 | }, |
1039 | }; | 1041 | }; |
1040 | 1042 | ||
1041 | static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) | 1043 | static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) |
1042 | { | 1044 | { |
1043 | struct mem_cgroup_per_node *pn; | 1045 | struct mem_cgroup_per_node *pn; |
1044 | struct mem_cgroup_per_zone *mz; | 1046 | struct mem_cgroup_per_zone *mz; |
1045 | int zone, tmp = node; | 1047 | int zone, tmp = node; |
1046 | /* | 1048 | /* |
1047 | * This routine is called against possible nodes. | 1049 | * This routine is called against possible nodes. |
1048 | * But it's BUG to call kmalloc() against offline node. | 1050 | * But it's BUG to call kmalloc() against offline node. |
1049 | * | 1051 | * |
1050 | * TODO: this routine can waste much memory for nodes which will | 1052 | * TODO: this routine can waste much memory for nodes which will |
1051 | * never be onlined. It's better to use memory hotplug callback | 1053 | * never be onlined. It's better to use memory hotplug callback |
1052 | * function. | 1054 | * function. |
1053 | */ | 1055 | */ |
1054 | if (!node_state(node, N_NORMAL_MEMORY)) | 1056 | if (!node_state(node, N_NORMAL_MEMORY)) |
1055 | tmp = -1; | 1057 | tmp = -1; |
1056 | pn = kmalloc_node(sizeof(*pn), GFP_KERNEL, tmp); | 1058 | pn = kmalloc_node(sizeof(*pn), GFP_KERNEL, tmp); |
1057 | if (!pn) | 1059 | if (!pn) |
1058 | return 1; | 1060 | return 1; |
1059 | 1061 | ||
1060 | mem->info.nodeinfo[node] = pn; | 1062 | mem->info.nodeinfo[node] = pn; |
1061 | memset(pn, 0, sizeof(*pn)); | 1063 | memset(pn, 0, sizeof(*pn)); |
1062 | 1064 | ||
1063 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | 1065 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { |
1064 | mz = &pn->zoneinfo[zone]; | 1066 | mz = &pn->zoneinfo[zone]; |
1065 | INIT_LIST_HEAD(&mz->active_list); | 1067 | INIT_LIST_HEAD(&mz->active_list); |
1066 | INIT_LIST_HEAD(&mz->inactive_list); | 1068 | INIT_LIST_HEAD(&mz->inactive_list); |
1067 | spin_lock_init(&mz->lru_lock); | 1069 | spin_lock_init(&mz->lru_lock); |
1068 | } | 1070 | } |
1069 | return 0; | 1071 | return 0; |
1070 | } | 1072 | } |
1071 | 1073 | ||
1072 | static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) | 1074 | static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) |
1073 | { | 1075 | { |
1074 | kfree(mem->info.nodeinfo[node]); | 1076 | kfree(mem->info.nodeinfo[node]); |
1075 | } | 1077 | } |
1076 | 1078 | ||
1077 | static struct mem_cgroup *mem_cgroup_alloc(void) | 1079 | static struct mem_cgroup *mem_cgroup_alloc(void) |
1078 | { | 1080 | { |
1079 | struct mem_cgroup *mem; | 1081 | struct mem_cgroup *mem; |
1080 | 1082 | ||
1081 | if (sizeof(*mem) < PAGE_SIZE) | 1083 | if (sizeof(*mem) < PAGE_SIZE) |
1082 | mem = kmalloc(sizeof(*mem), GFP_KERNEL); | 1084 | mem = kmalloc(sizeof(*mem), GFP_KERNEL); |
1083 | else | 1085 | else |
1084 | mem = vmalloc(sizeof(*mem)); | 1086 | mem = vmalloc(sizeof(*mem)); |
1085 | 1087 | ||
1086 | if (mem) | 1088 | if (mem) |
1087 | memset(mem, 0, sizeof(*mem)); | 1089 | memset(mem, 0, sizeof(*mem)); |
1088 | return mem; | 1090 | return mem; |
1089 | } | 1091 | } |
1090 | 1092 | ||
1091 | static void mem_cgroup_free(struct mem_cgroup *mem) | 1093 | static void mem_cgroup_free(struct mem_cgroup *mem) |
1092 | { | 1094 | { |
1093 | if (sizeof(*mem) < PAGE_SIZE) | 1095 | if (sizeof(*mem) < PAGE_SIZE) |
1094 | kfree(mem); | 1096 | kfree(mem); |
1095 | else | 1097 | else |
1096 | vfree(mem); | 1098 | vfree(mem); |
1097 | } | 1099 | } |
1098 | 1100 | ||
1099 | 1101 | ||
1100 | static struct cgroup_subsys_state * | 1102 | static struct cgroup_subsys_state * |
1101 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | 1103 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) |
1102 | { | 1104 | { |
1103 | struct mem_cgroup *mem; | 1105 | struct mem_cgroup *mem; |
1104 | int node; | 1106 | int node; |
1105 | 1107 | ||
1106 | if (unlikely((cont->parent) == NULL)) { | 1108 | if (unlikely((cont->parent) == NULL)) { |
1107 | mem = &init_mem_cgroup; | 1109 | mem = &init_mem_cgroup; |
1108 | page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC); | 1110 | page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC); |
1109 | } else { | 1111 | } else { |
1110 | mem = mem_cgroup_alloc(); | 1112 | mem = mem_cgroup_alloc(); |
1111 | if (!mem) | 1113 | if (!mem) |
1112 | return ERR_PTR(-ENOMEM); | 1114 | return ERR_PTR(-ENOMEM); |
1113 | } | 1115 | } |
1114 | 1116 | ||
1115 | res_counter_init(&mem->res); | 1117 | res_counter_init(&mem->res); |
1116 | 1118 | ||
1117 | for_each_node_state(node, N_POSSIBLE) | 1119 | for_each_node_state(node, N_POSSIBLE) |
1118 | if (alloc_mem_cgroup_per_zone_info(mem, node)) | 1120 | if (alloc_mem_cgroup_per_zone_info(mem, node)) |
1119 | goto free_out; | 1121 | goto free_out; |
1120 | 1122 | ||
1121 | return &mem->css; | 1123 | return &mem->css; |
1122 | free_out: | 1124 | free_out: |
1123 | for_each_node_state(node, N_POSSIBLE) | 1125 | for_each_node_state(node, N_POSSIBLE) |
1124 | free_mem_cgroup_per_zone_info(mem, node); | 1126 | free_mem_cgroup_per_zone_info(mem, node); |
1125 | if (cont->parent != NULL) | 1127 | if (cont->parent != NULL) |
1126 | mem_cgroup_free(mem); | 1128 | mem_cgroup_free(mem); |
1127 | return ERR_PTR(-ENOMEM); | 1129 | return ERR_PTR(-ENOMEM); |
1128 | } | 1130 | } |
1129 | 1131 | ||
1130 | static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, | 1132 | static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, |
1131 | struct cgroup *cont) | 1133 | struct cgroup *cont) |
1132 | { | 1134 | { |
1133 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); | 1135 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); |
1134 | mem_cgroup_force_empty(mem); | 1136 | mem_cgroup_force_empty(mem); |
1135 | } | 1137 | } |
1136 | 1138 | ||
1137 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, | 1139 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, |
1138 | struct cgroup *cont) | 1140 | struct cgroup *cont) |
1139 | { | 1141 | { |
1140 | int node; | 1142 | int node; |
1141 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); | 1143 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); |
1142 | 1144 | ||
1143 | for_each_node_state(node, N_POSSIBLE) | 1145 | for_each_node_state(node, N_POSSIBLE) |
1144 | free_mem_cgroup_per_zone_info(mem, node); | 1146 | free_mem_cgroup_per_zone_info(mem, node); |
1145 | 1147 | ||
1146 | mem_cgroup_free(mem_cgroup_from_cont(cont)); | 1148 | mem_cgroup_free(mem_cgroup_from_cont(cont)); |
1147 | } | 1149 | } |
1148 | 1150 | ||
1149 | static int mem_cgroup_populate(struct cgroup_subsys *ss, | 1151 | static int mem_cgroup_populate(struct cgroup_subsys *ss, |
1150 | struct cgroup *cont) | 1152 | struct cgroup *cont) |
1151 | { | 1153 | { |
1152 | return cgroup_add_files(cont, ss, mem_cgroup_files, | 1154 | return cgroup_add_files(cont, ss, mem_cgroup_files, |
1153 | ARRAY_SIZE(mem_cgroup_files)); | 1155 | ARRAY_SIZE(mem_cgroup_files)); |
1154 | } | 1156 | } |
1155 | 1157 | ||
1156 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, | 1158 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, |
1157 | struct cgroup *cont, | 1159 | struct cgroup *cont, |
1158 | struct cgroup *old_cont, | 1160 | struct cgroup *old_cont, |
1159 | struct task_struct *p) | 1161 | struct task_struct *p) |
1160 | { | 1162 | { |
1161 | struct mm_struct *mm; | 1163 | struct mm_struct *mm; |
1162 | struct mem_cgroup *mem, *old_mem; | 1164 | struct mem_cgroup *mem, *old_mem; |
1163 | 1165 | ||
1164 | mm = get_task_mm(p); | 1166 | mm = get_task_mm(p); |
1165 | if (mm == NULL) | 1167 | if (mm == NULL) |
1166 | return; | 1168 | return; |
1167 | 1169 | ||
1168 | mem = mem_cgroup_from_cont(cont); | 1170 | mem = mem_cgroup_from_cont(cont); |
1169 | old_mem = mem_cgroup_from_cont(old_cont); | 1171 | old_mem = mem_cgroup_from_cont(old_cont); |
1170 | 1172 | ||
1171 | /* | 1173 | /* |
1172 | * Only thread group leaders are allowed to migrate, the mm_struct is | 1174 | * Only thread group leaders are allowed to migrate, the mm_struct is |
1173 | * in effect owned by the leader | 1175 | * in effect owned by the leader |
1174 | */ | 1176 | */ |
1175 | if (!thread_group_leader(p)) | 1177 | if (!thread_group_leader(p)) |
1176 | goto out; | 1178 | goto out; |
1177 | 1179 | ||
1178 | out: | 1180 | out: |
1179 | mmput(mm); | 1181 | mmput(mm); |
1180 | } | 1182 | } |
1181 | 1183 | ||
1182 | struct cgroup_subsys mem_cgroup_subsys = { | 1184 | struct cgroup_subsys mem_cgroup_subsys = { |
1183 | .name = "memory", | 1185 | .name = "memory", |
1184 | .subsys_id = mem_cgroup_subsys_id, | 1186 | .subsys_id = mem_cgroup_subsys_id, |
1185 | .create = mem_cgroup_create, | 1187 | .create = mem_cgroup_create, |
1186 | .pre_destroy = mem_cgroup_pre_destroy, | 1188 | .pre_destroy = mem_cgroup_pre_destroy, |
1187 | .destroy = mem_cgroup_destroy, | 1189 | .destroy = mem_cgroup_destroy, |
1188 | .populate = mem_cgroup_populate, | 1190 | .populate = mem_cgroup_populate, |
1189 | .attach = mem_cgroup_move_task, | 1191 | .attach = mem_cgroup_move_task, |
1190 | .early_init = 0, | 1192 | .early_init = 0, |
1191 | }; | 1193 | }; |
1192 | 1194 |