Blame view
mm/hugetlb_cgroup.c
10.3 KB
2bc64a204 mm/hugetlb: add n... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
/* * * Copyright IBM Corporation, 2012 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2.1 of the GNU Lesser General Public License * as published by the Free Software Foundation. * * This program is distributed in the hope that it would be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * */ #include <linux/cgroup.h> #include <linux/slab.h> #include <linux/hugetlb.h> #include <linux/hugetlb_cgroup.h> struct hugetlb_cgroup { struct cgroup_subsys_state css; /* * the counter to account for hugepages from hugetlb. */ struct res_counter hugepage[HUGE_MAX_HSTATE]; }; |
abb8206cb hugetlb/cgroup: a... |
28 29 30 |
#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) #define MEMFILE_ATTR(val) ((val) & 0xffff) |
2bc64a204 mm/hugetlb: add n... |
31 32 33 34 35 |
static struct hugetlb_cgroup *root_h_cgroup __read_mostly; static inline struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) { |
a7c6d554a cgroup: add/updat... |
36 |
return s ? container_of(s, struct hugetlb_cgroup, css) : NULL; |
2bc64a204 mm/hugetlb: add n... |
37 38 39 |
} static inline |
2bc64a204 mm/hugetlb: add n... |
40 41 |
struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) { |
073219e99 cgroup: clean up ... |
42 |
return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id)); |
2bc64a204 mm/hugetlb: add n... |
43 44 45 46 47 48 |
} static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) { return (h_cg == root_h_cgroup); } |
3f7985183 hugetlb_cgroup: p... |
49 50 |
static inline struct hugetlb_cgroup * parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg) |
2bc64a204 mm/hugetlb: add n... |
51 |
{ |
5c9d535b8 cgroup: remove cs... |
52 |
return hugetlb_cgroup_from_css(h_cg->css.parent); |
2bc64a204 mm/hugetlb: add n... |
53 |
} |
3f7985183 hugetlb_cgroup: p... |
54 |
static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) |
2bc64a204 mm/hugetlb: add n... |
55 56 |
{ int idx; |
2bc64a204 mm/hugetlb: add n... |
57 58 59 60 61 62 63 |
for (idx = 0; idx < hugetlb_max_hstate; idx++) { if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0) return true; } return false; } |
eb95419b0 cgroup: pass arou... |
64 65 |
static struct cgroup_subsys_state * hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) |
2bc64a204 mm/hugetlb: add n... |
66 |
{ |
eb95419b0 cgroup: pass arou... |
67 68 |
struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css); struct hugetlb_cgroup *h_cgroup; |
2bc64a204 mm/hugetlb: add n... |
69 |
int idx; |
2bc64a204 mm/hugetlb: add n... |
70 71 72 73 |
h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL); if (!h_cgroup) return ERR_PTR(-ENOMEM); |
eb95419b0 cgroup: pass arou... |
74 |
if (parent_h_cgroup) { |
2bc64a204 mm/hugetlb: add n... |
75 76 77 78 79 80 81 82 83 84 |
for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) res_counter_init(&h_cgroup->hugepage[idx], &parent_h_cgroup->hugepage[idx]); } else { root_h_cgroup = h_cgroup; for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) res_counter_init(&h_cgroup->hugepage[idx], NULL); } return &h_cgroup->css; } |
eb95419b0 cgroup: pass arou... |
85 |
static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) |
2bc64a204 mm/hugetlb: add n... |
86 87 |
{ struct hugetlb_cgroup *h_cgroup; |
eb95419b0 cgroup: pass arou... |
88 |
h_cgroup = hugetlb_cgroup_from_css(css); |
2bc64a204 mm/hugetlb: add n... |
89 90 |
kfree(h_cgroup); } |
da1def559 hugetlb/cgroup: a... |
91 92 93 94 95 96 97 98 |
/* * Should be called with hugetlb_lock held. * Since we are holding hugetlb_lock, pages cannot get moved from * active list or uncharged from the cgroup, So no need to get * page reference and test for page active here. This function * cannot fail. */ |
3f7985183 hugetlb_cgroup: p... |
99 |
static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, |
da1def559 hugetlb/cgroup: a... |
100 101 102 103 104 105 |
struct page *page) { int csize; struct res_counter *counter; struct res_counter *fail_res; struct hugetlb_cgroup *page_hcg; |
3f7985183 hugetlb_cgroup: p... |
106 |
struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); |
da1def559 hugetlb/cgroup: a... |
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
page_hcg = hugetlb_cgroup_from_page(page); /* * We can have pages in active list without any cgroup * ie, hugepage with less than 3 pages. We can safely * ignore those pages. */ if (!page_hcg || page_hcg != h_cg) goto out; csize = PAGE_SIZE << compound_order(page); if (!parent) { parent = root_h_cgroup; /* root has no limit */ res_counter_charge_nofail(&parent->hugepage[idx], csize, &fail_res); } counter = &h_cg->hugepage[idx]; res_counter_uncharge_until(counter, counter->parent, csize); set_hugetlb_cgroup(page, parent); out: return; } /* * Force the hugetlb cgroup to empty the hugetlb resources by moving them to * the parent cgroup. */ |
eb95419b0 cgroup: pass arou... |
136 |
static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css) |
2bc64a204 mm/hugetlb: add n... |
137 |
{ |
eb95419b0 cgroup: pass arou... |
138 |
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); |
da1def559 hugetlb/cgroup: a... |
139 140 |
struct hstate *h; struct page *page; |
9d093cb10 hugetlb: do not f... |
141 |
int idx = 0; |
da1def559 hugetlb/cgroup: a... |
142 143 |
do { |
da1def559 hugetlb/cgroup: a... |
144 145 146 |
for_each_hstate(h) { spin_lock(&hugetlb_lock); list_for_each_entry(page, &h->hugepage_activelist, lru) |
3f7985183 hugetlb_cgroup: p... |
147 |
hugetlb_cgroup_move_parent(idx, h_cg, page); |
da1def559 hugetlb/cgroup: a... |
148 149 150 151 152 |
spin_unlock(&hugetlb_lock); idx++; } cond_resched(); |
3f7985183 hugetlb_cgroup: p... |
153 |
} while (hugetlb_cgroup_have_usage(h_cg)); |
2bc64a204 mm/hugetlb: add n... |
154 |
} |
6d76dcf40 hugetlb/cgroup: a... |
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr) { int ret = 0; struct res_counter *fail_res; struct hugetlb_cgroup *h_cg = NULL; unsigned long csize = nr_pages * PAGE_SIZE; if (hugetlb_cgroup_disabled()) goto done; /* * We don't charge any cgroup if the compound page have less * than 3 pages. */ if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) goto done; again: rcu_read_lock(); h_cg = hugetlb_cgroup_from_task(current); |
ec903c0c8 cgroup: rename cs... |
174 |
if (!css_tryget_online(&h_cg->css)) { |
6d76dcf40 hugetlb/cgroup: a... |
175 176 177 178 179 180 181 182 183 184 185 |
rcu_read_unlock(); goto again; } rcu_read_unlock(); ret = res_counter_charge(&h_cg->hugepage[idx], csize, &fail_res); css_put(&h_cg->css); done: *ptr = h_cg; return ret; } |
94ae8ba71 hugetlb/cgroup: a... |
186 |
/* Should be called with hugetlb_lock held */ |
6d76dcf40 hugetlb/cgroup: a... |
187 188 189 190 191 192 |
void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, struct page *page) { if (hugetlb_cgroup_disabled() || !h_cg) return; |
6d76dcf40 hugetlb/cgroup: a... |
193 |
set_hugetlb_cgroup(page, h_cg); |
6d76dcf40 hugetlb/cgroup: a... |
194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
return; } /* * Should be called with hugetlb_lock held */ void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page) { struct hugetlb_cgroup *h_cg; unsigned long csize = nr_pages * PAGE_SIZE; if (hugetlb_cgroup_disabled()) return; |
7ea8574e5 hugetlb_cgroup: u... |
208 |
lockdep_assert_held(&hugetlb_lock); |
6d76dcf40 hugetlb/cgroup: a... |
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 |
h_cg = hugetlb_cgroup_from_page(page); if (unlikely(!h_cg)) return; set_hugetlb_cgroup(page, NULL); res_counter_uncharge(&h_cg->hugepage[idx], csize); return; } void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg) { unsigned long csize = nr_pages * PAGE_SIZE; if (hugetlb_cgroup_disabled() || !h_cg) return; if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) return; res_counter_uncharge(&h_cg->hugepage[idx], csize); return; } |
716f479d2 hugetlb_cgroup: c... |
231 232 |
static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, struct cftype *cft) |
abb8206cb hugetlb/cgroup: a... |
233 |
{ |
716f479d2 hugetlb_cgroup: c... |
234 |
int idx, name; |
182446d08 cgroup: pass arou... |
235 |
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); |
abb8206cb hugetlb/cgroup: a... |
236 237 238 |
idx = MEMFILE_IDX(cft->private); name = MEMFILE_ATTR(cft->private); |
716f479d2 hugetlb_cgroup: c... |
239 |
return res_counter_read_u64(&h_cg->hugepage[idx], name); |
abb8206cb hugetlb/cgroup: a... |
240 |
} |
451af504d cgroup: replace c... |
241 242 |
static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) |
abb8206cb hugetlb/cgroup: a... |
243 244 245 |
{ int idx, name, ret; unsigned long long val; |
451af504d cgroup: replace c... |
246 |
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); |
abb8206cb hugetlb/cgroup: a... |
247 |
|
451af504d cgroup: replace c... |
248 249 250 |
buf = strstrip(buf); idx = MEMFILE_IDX(of_cft(of)->private); name = MEMFILE_ATTR(of_cft(of)->private); |
abb8206cb hugetlb/cgroup: a... |
251 252 253 254 255 256 257 258 259 |
switch (name) { case RES_LIMIT: if (hugetlb_cgroup_is_root(h_cg)) { /* Can't set limit on root */ ret = -EINVAL; break; } /* This function does all necessary parse...reuse it */ |
451af504d cgroup: replace c... |
260 |
ret = res_counter_memparse_write_strategy(buf, &val); |
abb8206cb hugetlb/cgroup: a... |
261 262 |
if (ret) break; |
24d7cd207 mm, hugetlb_cgrou... |
263 |
val = ALIGN(val, 1ULL << huge_page_shift(&hstates[idx])); |
abb8206cb hugetlb/cgroup: a... |
264 265 266 267 268 269 |
ret = res_counter_set_limit(&h_cg->hugepage[idx], val); break; default: ret = -EINVAL; break; } |
451af504d cgroup: replace c... |
270 |
return ret ?: nbytes; |
abb8206cb hugetlb/cgroup: a... |
271 |
} |
6770c64e5 cgroup: replace c... |
272 273 |
static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) |
abb8206cb hugetlb/cgroup: a... |
274 275 |
{ int idx, name, ret = 0; |
6770c64e5 cgroup: replace c... |
276 |
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); |
abb8206cb hugetlb/cgroup: a... |
277 |
|
6770c64e5 cgroup: replace c... |
278 279 |
idx = MEMFILE_IDX(of_cft(of)->private); name = MEMFILE_ATTR(of_cft(of)->private); |
abb8206cb hugetlb/cgroup: a... |
280 281 282 283 284 285 286 287 288 289 290 291 |
switch (name) { case RES_MAX_USAGE: res_counter_reset_max(&h_cg->hugepage[idx]); break; case RES_FAILCNT: res_counter_reset_failcnt(&h_cg->hugepage[idx]); break; default: ret = -EINVAL; break; } |
6770c64e5 cgroup: replace c... |
292 |
return ret ?: nbytes; |
abb8206cb hugetlb/cgroup: a... |
293 294 295 296 297 298 299 300 301 302 303 304 |
} static char *mem_fmt(char *buf, int size, unsigned long hsize) { if (hsize >= (1UL << 30)) snprintf(buf, size, "%luGB", hsize >> 30); else if (hsize >= (1UL << 20)) snprintf(buf, size, "%luMB", hsize >> 20); else snprintf(buf, size, "%luKB", hsize >> 10); return buf; } |
7179e7bf4 mm/hugetlb: creat... |
305 |
static void __init __hugetlb_cgroup_file_init(int idx) |
abb8206cb hugetlb/cgroup: a... |
306 307 308 309 310 311 312 313 314 315 316 317 |
{ char buf[32]; struct cftype *cft; struct hstate *h = &hstates[idx]; /* format the size */ mem_fmt(buf, 32, huge_page_size(h)); /* Add the limit file */ cft = &h->cgroup_files[0]; snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); |
716f479d2 hugetlb_cgroup: c... |
318 |
cft->read_u64 = hugetlb_cgroup_read_u64; |
451af504d cgroup: replace c... |
319 |
cft->write = hugetlb_cgroup_write; |
abb8206cb hugetlb/cgroup: a... |
320 321 322 323 324 |
/* Add the usage file */ cft = &h->cgroup_files[1]; snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf); cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); |
716f479d2 hugetlb_cgroup: c... |
325 |
cft->read_u64 = hugetlb_cgroup_read_u64; |
abb8206cb hugetlb/cgroup: a... |
326 327 328 329 330 |
/* Add the MAX usage file */ cft = &h->cgroup_files[2]; snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); |
6770c64e5 cgroup: replace c... |
331 |
cft->write = hugetlb_cgroup_reset; |
716f479d2 hugetlb_cgroup: c... |
332 |
cft->read_u64 = hugetlb_cgroup_read_u64; |
abb8206cb hugetlb/cgroup: a... |
333 334 335 336 337 |
/* Add the failcntfile */ cft = &h->cgroup_files[3]; snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); |
6770c64e5 cgroup: replace c... |
338 |
cft->write = hugetlb_cgroup_reset; |
716f479d2 hugetlb_cgroup: c... |
339 |
cft->read_u64 = hugetlb_cgroup_read_u64; |
abb8206cb hugetlb/cgroup: a... |
340 341 342 343 |
/* NULL terminate the last cft */ cft = &h->cgroup_files[4]; memset(cft, 0, sizeof(*cft)); |
2cf669a58 cgroup: replace c... |
344 345 |
WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, h->cgroup_files)); |
7179e7bf4 mm/hugetlb: creat... |
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 |
} void __init hugetlb_cgroup_file_init(void) { struct hstate *h; for_each_hstate(h) { /* * Add cgroup control files only if the huge page consists * of more than two normal pages. This is because we use * page[2].lru.next for storing cgroup details. */ if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER) __hugetlb_cgroup_file_init(hstate_index(h)); } |
abb8206cb hugetlb/cgroup: a... |
361 |
} |
75754681f hugetlb/cgroup: r... |
362 363 364 365 |
/* * hugetlb_lock will make sure a parallel cgroup rmdir won't happen * when we migrate hugepages */ |
8e6ac7fab hugetlb/cgroup: m... |
366 367 368 |
void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) { struct hugetlb_cgroup *h_cg; |
94ae8ba71 hugetlb/cgroup: a... |
369 |
struct hstate *h = page_hstate(oldhpage); |
8e6ac7fab hugetlb/cgroup: m... |
370 371 372 |
if (hugetlb_cgroup_disabled()) return; |
309381fea mm: dump page whe... |
373 |
VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage); |
8e6ac7fab hugetlb/cgroup: m... |
374 375 376 |
spin_lock(&hugetlb_lock); h_cg = hugetlb_cgroup_from_page(oldhpage); set_hugetlb_cgroup(oldhpage, NULL); |
8e6ac7fab hugetlb/cgroup: m... |
377 378 379 |
/* move the h_cg details to new cgroup */ set_hugetlb_cgroup(newhpage, h_cg); |
94ae8ba71 hugetlb/cgroup: a... |
380 |
list_move(&newhpage->lru, &h->hugepage_activelist); |
8e6ac7fab hugetlb/cgroup: m... |
381 |
spin_unlock(&hugetlb_lock); |
8e6ac7fab hugetlb/cgroup: m... |
382 383 |
return; } |
073219e99 cgroup: clean up ... |
384 |
struct cgroup_subsys hugetlb_cgrp_subsys = { |
92fb97487 cgroup: rename ->... |
385 386 387 |
.css_alloc = hugetlb_cgroup_css_alloc, .css_offline = hugetlb_cgroup_css_offline, .css_free = hugetlb_cgroup_css_free, |
2bc64a204 mm/hugetlb: add n... |
388 |
}; |