Commit e8ea14cc6eadfe2ea63e9989e16e62625a2619f8

Authored by Johannes Weiner
Committed by Linus Torvalds
1 parent 5ac8fb31ad

mm: memcontrol: take a css reference for each charged page

Charges currently pin the css indirectly by playing tricks during
css_offline(): user pages stall the offlining process until all of them
have been reparented, whereas kmemcg acquires a keep-alive reference if
outstanding kernel pages are detected at that point.

In preparation for removing all this complexity, make the pinning explicit
and acquire a css references for every charged page.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 81 additions and 13 deletions Side-by-side Diff

include/linux/cgroup.h
... ... @@ -113,6 +113,19 @@
113 113 }
114 114  
115 115 /**
  116 + * css_get_many - obtain references on the specified css
  117 + * @css: target css
  118 + * @n: number of references to get
  119 + *
  120 + * The caller must already have a reference.
  121 + */
  122 +static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
  123 +{
  124 + if (!(css->flags & CSS_NO_REF))
  125 + percpu_ref_get_many(&css->refcnt, n);
  126 +}
  127 +
  128 +/**
116 129 * css_tryget - try to obtain a reference on the specified css
117 130 * @css: target css
118 131 *
... ... @@ -157,6 +170,19 @@
157 170 {
158 171 if (!(css->flags & CSS_NO_REF))
159 172 percpu_ref_put(&css->refcnt);
  173 +}
  174 +
  175 +/**
  176 + * css_put_many - put css references
  177 + * @css: target css
  178 + * @n: number of references to put
  179 + *
  180 + * Put references obtained via css_get() and css_tryget_online().
  181 + */
  182 +static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
  183 +{
  184 + if (!(css->flags & CSS_NO_REF))
  185 + percpu_ref_put_many(&css->refcnt, n);
160 186 }
161 187  
162 188 /* bits in struct cgroup flags field */
include/linux/percpu-refcount.h
... ... @@ -147,28 +147,42 @@
147 147 }
148 148  
149 149 /**
150   - * percpu_ref_get - increment a percpu refcount
  150 + * percpu_ref_get_many - increment a percpu refcount
151 151 * @ref: percpu_ref to get
  152 + * @nr: number of references to get
152 153 *
153   - * Analagous to atomic_long_inc().
  154 + * Analogous to atomic_long_add().
154 155 *
155 156 * This function is safe to call as long as @ref is between init and exit.
156 157 */
157   -static inline void percpu_ref_get(struct percpu_ref *ref)
  158 +static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr)
158 159 {
159 160 unsigned long __percpu *percpu_count;
160 161  
161 162 rcu_read_lock_sched();
162 163  
163 164 if (__ref_is_percpu(ref, &percpu_count))
164   - this_cpu_inc(*percpu_count);
  165 + this_cpu_add(*percpu_count, nr);
165 166 else
166   - atomic_long_inc(&ref->count);
  167 + atomic_long_add(nr, &ref->count);
167 168  
168 169 rcu_read_unlock_sched();
169 170 }
170 171  
171 172 /**
  173 + * percpu_ref_get - increment a percpu refcount
  174 + * @ref: percpu_ref to get
  175 + *
  176 + * Analagous to atomic_long_inc().
  177 + *
  178 + * This function is safe to call as long as @ref is between init and exit.
  179 + */
  180 +static inline void percpu_ref_get(struct percpu_ref *ref)
  181 +{
  182 + percpu_ref_get_many(ref, 1);
  183 +}
  184 +
  185 +/**
172 186 * percpu_ref_tryget - try to increment a percpu refcount
173 187 * @ref: percpu_ref to try-get
174 188 *
175 189  
176 190  
177 191  
178 192  
... ... @@ -231,26 +245,41 @@
231 245 }
232 246  
233 247 /**
234   - * percpu_ref_put - decrement a percpu refcount
  248 + * percpu_ref_put_many - decrement a percpu refcount
235 249 * @ref: percpu_ref to put
  250 + * @nr: number of references to put
236 251 *
237 252 * Decrement the refcount, and if 0, call the release function (which was passed
238 253 * to percpu_ref_init())
239 254 *
240 255 * This function is safe to call as long as @ref is between init and exit.
241 256 */
242   -static inline void percpu_ref_put(struct percpu_ref *ref)
  257 +static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr)
243 258 {
244 259 unsigned long __percpu *percpu_count;
245 260  
246 261 rcu_read_lock_sched();
247 262  
248 263 if (__ref_is_percpu(ref, &percpu_count))
249   - this_cpu_dec(*percpu_count);
250   - else if (unlikely(atomic_long_dec_and_test(&ref->count)))
  264 + this_cpu_sub(*percpu_count, nr);
  265 + else if (unlikely(atomic_long_sub_and_test(nr, &ref->count)))
251 266 ref->release(ref);
252 267  
253 268 rcu_read_unlock_sched();
  269 +}
  270 +
  271 +/**
  272 + * percpu_ref_put - decrement a percpu refcount
  273 + * @ref: percpu_ref to put
  274 + *
  275 + * Decrement the refcount, and if 0, call the release function (which was passed
  276 + * to percpu_ref_init())
  277 + *
  278 + * This function is safe to call as long as @ref is between init and exit.
  279 + */
  280 +static inline void percpu_ref_put(struct percpu_ref *ref)
  281 +{
  282 + percpu_ref_put_many(ref, 1);
254 283 }
255 284  
256 285 /**
... ... @@ -2273,6 +2273,7 @@
2273 2273 page_counter_uncharge(&old->memory, stock->nr_pages);
2274 2274 if (do_swap_account)
2275 2275 page_counter_uncharge(&old->memsw, stock->nr_pages);
  2276 + css_put_many(&old->css, stock->nr_pages);
2276 2277 stock->nr_pages = 0;
2277 2278 }
2278 2279 stock->cached = NULL;
... ... @@ -2530,6 +2531,7 @@
2530 2531 return -EINTR;
2531 2532  
2532 2533 done_restock:
  2534 + css_get_many(&memcg->css, batch);
2533 2535 if (batch > nr_pages)
2534 2536 refill_stock(memcg, batch - nr_pages);
2535 2537 done:
... ... @@ -2544,6 +2546,8 @@
2544 2546 page_counter_uncharge(&memcg->memory, nr_pages);
2545 2547 if (do_swap_account)
2546 2548 page_counter_uncharge(&memcg->memsw, nr_pages);
  2549 +
  2550 + css_put_many(&memcg->css, nr_pages);
2547 2551 }
2548 2552  
2549 2553 /*
... ... @@ -2739,6 +2743,7 @@
2739 2743 page_counter_charge(&memcg->memory, nr_pages);
2740 2744 if (do_swap_account)
2741 2745 page_counter_charge(&memcg->memsw, nr_pages);
  2746 + css_get_many(&memcg->css, nr_pages);
2742 2747 ret = 0;
2743 2748 } else if (ret)
2744 2749 page_counter_uncharge(&memcg->kmem, nr_pages);
2745 2750  
... ... @@ -2754,8 +2759,10 @@
2754 2759 page_counter_uncharge(&memcg->memsw, nr_pages);
2755 2760  
2756 2761 /* Not down to 0 */
2757   - if (page_counter_uncharge(&memcg->kmem, nr_pages))
  2762 + if (page_counter_uncharge(&memcg->kmem, nr_pages)) {
  2763 + css_put_many(&memcg->css, nr_pages);
2758 2764 return;
  2765 + }
2759 2766  
2760 2767 /*
2761 2768 * Releases a reference taken in kmem_cgroup_css_offline in case
... ... @@ -2767,6 +2774,8 @@
2767 2774 */
2768 2775 if (memcg_kmem_test_and_clear_dead(memcg))
2769 2776 css_put(&memcg->css);
  2777 +
  2778 + css_put_many(&memcg->css, nr_pages);
2770 2779 }
2771 2780  
2772 2781 /*
2773 2782  
... ... @@ -3394,10 +3403,13 @@
3394 3403 ret = mem_cgroup_move_account(page, nr_pages,
3395 3404 pc, child, parent);
3396 3405 if (!ret) {
  3406 + if (!mem_cgroup_is_root(parent))
  3407 + css_get_many(&parent->css, nr_pages);
3397 3408 /* Take charge off the local counters */
3398 3409 page_counter_cancel(&child->memory, nr_pages);
3399 3410 if (do_swap_account)
3400 3411 page_counter_cancel(&child->memsw, nr_pages);
  3412 + css_put_many(&child->css, nr_pages);
3401 3413 }
3402 3414  
3403 3415 if (nr_pages > 1)
... ... @@ -5767,7 +5779,6 @@
5767 5779 {
5768 5780 struct mem_cgroup *from = mc.from;
5769 5781 struct mem_cgroup *to = mc.to;
5770   - int i;
5771 5782  
5772 5783 /* we must uncharge all the leftover precharges from mc.to */
5773 5784 if (mc.precharge) {
... ... @@ -5795,8 +5806,7 @@
5795 5806 if (!mem_cgroup_is_root(mc.to))
5796 5807 page_counter_uncharge(&mc.to->memory, mc.moved_swap);
5797 5808  
5798   - for (i = 0; i < mc.moved_swap; i++)
5799   - css_put(&mc.from->css);
  5809 + css_put_many(&mc.from->css, mc.moved_swap);
5800 5810  
5801 5811 /* we've already done css_get(mc.to) */
5802 5812 mc.moved_swap = 0;
... ... @@ -6343,6 +6353,9 @@
6343 6353 __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
6344 6354 memcg_check_events(memcg, dummy_page);
6345 6355 local_irq_restore(flags);
  6356 +
  6357 + if (!mem_cgroup_is_root(memcg))
  6358 + css_put_many(&memcg->css, max(nr_mem, nr_memsw));
6346 6359 }
6347 6360  
6348 6361 static void uncharge_list(struct list_head *page_list)