Commit e8ea14cc6eadfe2ea63e9989e16e62625a2619f8
Committed by
Linus Torvalds
1 parent
5ac8fb31ad
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
mm: memcontrol: take a css reference for each charged page
Charges currently pin the css indirectly by playing tricks during css_offline(): user pages stall the offlining process until all of them have been reparented, whereas kmemcg acquires a keep-alive reference if outstanding kernel pages are detected at that point. In preparation for removing all this complexity, make the pinning explicit and acquire a css references for every charged page. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Vladimir Davydov <vdavydov@parallels.com> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: David Rientjes <rientjes@google.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 81 additions and 13 deletions Side-by-side Diff
include/linux/cgroup.h
... | ... | @@ -113,6 +113,19 @@ |
113 | 113 | } |
114 | 114 | |
115 | 115 | /** |
116 | + * css_get_many - obtain references on the specified css | |
117 | + * @css: target css | |
118 | + * @n: number of references to get | |
119 | + * | |
120 | + * The caller must already have a reference. | |
121 | + */ | |
122 | +static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n) | |
123 | +{ | |
124 | + if (!(css->flags & CSS_NO_REF)) | |
125 | + percpu_ref_get_many(&css->refcnt, n); | |
126 | +} | |
127 | + | |
128 | +/** | |
116 | 129 | * css_tryget - try to obtain a reference on the specified css |
117 | 130 | * @css: target css |
118 | 131 | * |
... | ... | @@ -157,6 +170,19 @@ |
157 | 170 | { |
158 | 171 | if (!(css->flags & CSS_NO_REF)) |
159 | 172 | percpu_ref_put(&css->refcnt); |
173 | +} | |
174 | + | |
175 | +/** | |
176 | + * css_put_many - put css references | |
177 | + * @css: target css | |
178 | + * @n: number of references to put | |
179 | + * | |
180 | + * Put references obtained via css_get() and css_tryget_online(). | |
181 | + */ | |
182 | +static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) | |
183 | +{ | |
184 | + if (!(css->flags & CSS_NO_REF)) | |
185 | + percpu_ref_put_many(&css->refcnt, n); | |
160 | 186 | } |
161 | 187 | |
162 | 188 | /* bits in struct cgroup flags field */ |
include/linux/percpu-refcount.h
... | ... | @@ -147,28 +147,42 @@ |
147 | 147 | } |
148 | 148 | |
149 | 149 | /** |
150 | - * percpu_ref_get - increment a percpu refcount | |
150 | + * percpu_ref_get_many - increment a percpu refcount | |
151 | 151 | * @ref: percpu_ref to get |
152 | + * @nr: number of references to get | |
152 | 153 | * |
153 | - * Analagous to atomic_long_inc(). | |
154 | + * Analogous to atomic_long_add(). | |
154 | 155 | * |
155 | 156 | * This function is safe to call as long as @ref is between init and exit. |
156 | 157 | */ |
157 | -static inline void percpu_ref_get(struct percpu_ref *ref) | |
158 | +static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr) | |
158 | 159 | { |
159 | 160 | unsigned long __percpu *percpu_count; |
160 | 161 | |
161 | 162 | rcu_read_lock_sched(); |
162 | 163 | |
163 | 164 | if (__ref_is_percpu(ref, &percpu_count)) |
164 | - this_cpu_inc(*percpu_count); | |
165 | + this_cpu_add(*percpu_count, nr); | |
165 | 166 | else |
166 | - atomic_long_inc(&ref->count); | |
167 | + atomic_long_add(nr, &ref->count); | |
167 | 168 | |
168 | 169 | rcu_read_unlock_sched(); |
169 | 170 | } |
170 | 171 | |
171 | 172 | /** |
173 | + * percpu_ref_get - increment a percpu refcount | |
174 | + * @ref: percpu_ref to get | |
175 | + * | |
176 | + * Analagous to atomic_long_inc(). | |
177 | + * | |
178 | + * This function is safe to call as long as @ref is between init and exit. | |
179 | + */ | |
180 | +static inline void percpu_ref_get(struct percpu_ref *ref) | |
181 | +{ | |
182 | + percpu_ref_get_many(ref, 1); | |
183 | +} | |
184 | + | |
185 | +/** | |
172 | 186 | * percpu_ref_tryget - try to increment a percpu refcount |
173 | 187 | * @ref: percpu_ref to try-get |
174 | 188 | * |
175 | 189 | |
176 | 190 | |
177 | 191 | |
178 | 192 | |
... | ... | @@ -231,26 +245,41 @@ |
231 | 245 | } |
232 | 246 | |
233 | 247 | /** |
234 | - * percpu_ref_put - decrement a percpu refcount | |
248 | + * percpu_ref_put_many - decrement a percpu refcount | |
235 | 249 | * @ref: percpu_ref to put |
250 | + * @nr: number of references to put | |
236 | 251 | * |
237 | 252 | * Decrement the refcount, and if 0, call the release function (which was passed |
238 | 253 | * to percpu_ref_init()) |
239 | 254 | * |
240 | 255 | * This function is safe to call as long as @ref is between init and exit. |
241 | 256 | */ |
242 | -static inline void percpu_ref_put(struct percpu_ref *ref) | |
257 | +static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr) | |
243 | 258 | { |
244 | 259 | unsigned long __percpu *percpu_count; |
245 | 260 | |
246 | 261 | rcu_read_lock_sched(); |
247 | 262 | |
248 | 263 | if (__ref_is_percpu(ref, &percpu_count)) |
249 | - this_cpu_dec(*percpu_count); | |
250 | - else if (unlikely(atomic_long_dec_and_test(&ref->count))) | |
264 | + this_cpu_sub(*percpu_count, nr); | |
265 | + else if (unlikely(atomic_long_sub_and_test(nr, &ref->count))) | |
251 | 266 | ref->release(ref); |
252 | 267 | |
253 | 268 | rcu_read_unlock_sched(); |
269 | +} | |
270 | + | |
271 | +/** | |
272 | + * percpu_ref_put - decrement a percpu refcount | |
273 | + * @ref: percpu_ref to put | |
274 | + * | |
275 | + * Decrement the refcount, and if 0, call the release function (which was passed | |
276 | + * to percpu_ref_init()) | |
277 | + * | |
278 | + * This function is safe to call as long as @ref is between init and exit. | |
279 | + */ | |
280 | +static inline void percpu_ref_put(struct percpu_ref *ref) | |
281 | +{ | |
282 | + percpu_ref_put_many(ref, 1); | |
254 | 283 | } |
255 | 284 | |
256 | 285 | /** |
mm/memcontrol.c
... | ... | @@ -2273,6 +2273,7 @@ |
2273 | 2273 | page_counter_uncharge(&old->memory, stock->nr_pages); |
2274 | 2274 | if (do_swap_account) |
2275 | 2275 | page_counter_uncharge(&old->memsw, stock->nr_pages); |
2276 | + css_put_many(&old->css, stock->nr_pages); | |
2276 | 2277 | stock->nr_pages = 0; |
2277 | 2278 | } |
2278 | 2279 | stock->cached = NULL; |
... | ... | @@ -2530,6 +2531,7 @@ |
2530 | 2531 | return -EINTR; |
2531 | 2532 | |
2532 | 2533 | done_restock: |
2534 | + css_get_many(&memcg->css, batch); | |
2533 | 2535 | if (batch > nr_pages) |
2534 | 2536 | refill_stock(memcg, batch - nr_pages); |
2535 | 2537 | done: |
... | ... | @@ -2544,6 +2546,8 @@ |
2544 | 2546 | page_counter_uncharge(&memcg->memory, nr_pages); |
2545 | 2547 | if (do_swap_account) |
2546 | 2548 | page_counter_uncharge(&memcg->memsw, nr_pages); |
2549 | + | |
2550 | + css_put_many(&memcg->css, nr_pages); | |
2547 | 2551 | } |
2548 | 2552 | |
2549 | 2553 | /* |
... | ... | @@ -2739,6 +2743,7 @@ |
2739 | 2743 | page_counter_charge(&memcg->memory, nr_pages); |
2740 | 2744 | if (do_swap_account) |
2741 | 2745 | page_counter_charge(&memcg->memsw, nr_pages); |
2746 | + css_get_many(&memcg->css, nr_pages); | |
2742 | 2747 | ret = 0; |
2743 | 2748 | } else if (ret) |
2744 | 2749 | page_counter_uncharge(&memcg->kmem, nr_pages); |
2745 | 2750 | |
... | ... | @@ -2754,8 +2759,10 @@ |
2754 | 2759 | page_counter_uncharge(&memcg->memsw, nr_pages); |
2755 | 2760 | |
2756 | 2761 | /* Not down to 0 */ |
2757 | - if (page_counter_uncharge(&memcg->kmem, nr_pages)) | |
2762 | + if (page_counter_uncharge(&memcg->kmem, nr_pages)) { | |
2763 | + css_put_many(&memcg->css, nr_pages); | |
2758 | 2764 | return; |
2765 | + } | |
2759 | 2766 | |
2760 | 2767 | /* |
2761 | 2768 | * Releases a reference taken in kmem_cgroup_css_offline in case |
... | ... | @@ -2767,6 +2774,8 @@ |
2767 | 2774 | */ |
2768 | 2775 | if (memcg_kmem_test_and_clear_dead(memcg)) |
2769 | 2776 | css_put(&memcg->css); |
2777 | + | |
2778 | + css_put_many(&memcg->css, nr_pages); | |
2770 | 2779 | } |
2771 | 2780 | |
2772 | 2781 | /* |
2773 | 2782 | |
... | ... | @@ -3394,10 +3403,13 @@ |
3394 | 3403 | ret = mem_cgroup_move_account(page, nr_pages, |
3395 | 3404 | pc, child, parent); |
3396 | 3405 | if (!ret) { |
3406 | + if (!mem_cgroup_is_root(parent)) | |
3407 | + css_get_many(&parent->css, nr_pages); | |
3397 | 3408 | /* Take charge off the local counters */ |
3398 | 3409 | page_counter_cancel(&child->memory, nr_pages); |
3399 | 3410 | if (do_swap_account) |
3400 | 3411 | page_counter_cancel(&child->memsw, nr_pages); |
3412 | + css_put_many(&child->css, nr_pages); | |
3401 | 3413 | } |
3402 | 3414 | |
3403 | 3415 | if (nr_pages > 1) |
... | ... | @@ -5767,7 +5779,6 @@ |
5767 | 5779 | { |
5768 | 5780 | struct mem_cgroup *from = mc.from; |
5769 | 5781 | struct mem_cgroup *to = mc.to; |
5770 | - int i; | |
5771 | 5782 | |
5772 | 5783 | /* we must uncharge all the leftover precharges from mc.to */ |
5773 | 5784 | if (mc.precharge) { |
... | ... | @@ -5795,8 +5806,7 @@ |
5795 | 5806 | if (!mem_cgroup_is_root(mc.to)) |
5796 | 5807 | page_counter_uncharge(&mc.to->memory, mc.moved_swap); |
5797 | 5808 | |
5798 | - for (i = 0; i < mc.moved_swap; i++) | |
5799 | - css_put(&mc.from->css); | |
5809 | + css_put_many(&mc.from->css, mc.moved_swap); | |
5800 | 5810 | |
5801 | 5811 | /* we've already done css_get(mc.to) */ |
5802 | 5812 | mc.moved_swap = 0; |
... | ... | @@ -6343,6 +6353,9 @@ |
6343 | 6353 | __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file); |
6344 | 6354 | memcg_check_events(memcg, dummy_page); |
6345 | 6355 | local_irq_restore(flags); |
6356 | + | |
6357 | + if (!mem_cgroup_is_root(memcg)) | |
6358 | + css_put_many(&memcg->css, max(nr_mem, nr_memsw)); | |
6346 | 6359 | } |
6347 | 6360 | |
6348 | 6361 | static void uncharge_list(struct list_head *page_list) |
-
mentioned in commit 4bdfc1
-
mentioned in commit 4bdfc1
-
mentioned in commit 4bdfc1
-
mentioned in commit 4bdfc1
-
mentioned in commit 4bdfc1
-
mentioned in commit 4bdfc1
-
mentioned in commit 4bdfc1
-
mentioned in commit 4bdfc1
-
mentioned in commit 4bdfc1
-
mentioned in commit 4bdfc1
-
mentioned in commit 4bdfc1
-
mentioned in commit 4bdfc1
-
mentioned in commit 4bdfc1