Commit 7717f7ba92de485bce8293419a20ffef130f4286

Authored by Paul Menage
Committed by Linus Torvalds
1 parent fe6934354f

cgroups: add a back-pointer from struct cg_cgroup_link to struct cgroup

Currently the cgroups code makes the assumption that the subsystem
pointers in a struct css_set uniquely identify the hierarchy->cgroup
mappings associated with the css_set; and there's no way to directly
identify the associated set of cgroups other than by indirecting through
the appropriate subsystem state pointers.

This patch removes the need for that assumption by adding a back-pointer
from struct cg_cgroup_link object to its associated cgroup; this allows
the set of cgroups to be determined by traversing the cg_links list in
the struct css_set.

Signed-off-by: Paul Menage <menage@google.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 199 additions and 49 deletions Side-by-side Diff

... ... @@ -207,6 +207,7 @@
207 207 * cgroup, anchored on cgroup->css_sets
208 208 */
209 209 struct list_head cgrp_link_list;
  210 + struct cgroup *cgrp;
210 211 /*
211 212 * List running through cg_cgroup_links pointing at a
212 213 * single css_set object, anchored on css_set->cg_links
... ... @@ -233,8 +234,11 @@
233 234 static DEFINE_RWLOCK(css_set_lock);
234 235 static int css_set_count;
235 236  
236   -/* hash table for cgroup groups. This improves the performance to
237   - * find an existing css_set */
  237 +/*
  238 + * hash table for cgroup groups. This improves the performance to find
  239 + * an existing css_set. This hash doesn't (currently) take into
  240 + * account cgroups in empty hierarchies.
  241 + */
238 242 #define CSS_SET_HASH_BITS 7
239 243 #define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
240 244 static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
... ... @@ -344,6 +348,78 @@
344 348 }
345 349  
346 350 /*
  351 + * compare_css_sets - helper function for find_existing_css_set().
  352 + * @cg: candidate css_set being tested
  353 + * @old_cg: existing css_set for a task
  354 + * @new_cgrp: cgroup that's being entered by the task
  355 + * @template: desired set of css pointers in css_set (pre-calculated)
  356 + *
  357 + * Returns true if "cg" matches "old_cg" except for the hierarchy
  358 + * which "new_cgrp" belongs to, for which it should match "new_cgrp".
  359 + */
  360 +static bool compare_css_sets(struct css_set *cg,
  361 + struct css_set *old_cg,
  362 + struct cgroup *new_cgrp,
  363 + struct cgroup_subsys_state *template[])
  364 +{
  365 + struct list_head *l1, *l2;
  366 +
  367 + if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
  368 + /* Not all subsystems matched */
  369 + return false;
  370 + }
  371 +
  372 + /*
  373 + * Compare cgroup pointers in order to distinguish between
  374 + * different cgroups in heirarchies with no subsystems. We
  375 + * could get by with just this check alone (and skip the
  376 + * memcmp above) but on most setups the memcmp check will
  377 + * avoid the need for this more expensive check on almost all
  378 + * candidates.
  379 + */
  380 +
  381 + l1 = &cg->cg_links;
  382 + l2 = &old_cg->cg_links;
  383 + while (1) {
  384 + struct cg_cgroup_link *cgl1, *cgl2;
  385 + struct cgroup *cg1, *cg2;
  386 +
  387 + l1 = l1->next;
  388 + l2 = l2->next;
  389 + /* See if we reached the end - both lists are equal length. */
  390 + if (l1 == &cg->cg_links) {
  391 + BUG_ON(l2 != &old_cg->cg_links);
  392 + break;
  393 + } else {
  394 + BUG_ON(l2 == &old_cg->cg_links);
  395 + }
  396 + /* Locate the cgroups associated with these links. */
  397 + cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
  398 + cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
  399 + cg1 = cgl1->cgrp;
  400 + cg2 = cgl2->cgrp;
  401 + /* Hierarchies should be linked in the same order. */
  402 + BUG_ON(cg1->root != cg2->root);
  403 +
  404 + /*
  405 + * If this hierarchy is the hierarchy of the cgroup
  406 + * that's changing, then we need to check that this
  407 + * css_set points to the new cgroup; if it's any other
  408 + * hierarchy, then this css_set should point to the
  409 + * same cgroup as the old css_set.
  410 + */
  411 + if (cg1->root == new_cgrp->root) {
  412 + if (cg1 != new_cgrp)
  413 + return false;
  414 + } else {
  415 + if (cg1 != cg2)
  416 + return false;
  417 + }
  418 + }
  419 + return true;
  420 +}
  421 +
  422 +/*
347 423 * find_existing_css_set() is a helper for
348 424 * find_css_set(), and checks to see whether an existing
349 425 * css_set is suitable.
... ... @@ -384,10 +460,11 @@
384 460  
385 461 hhead = css_set_hash(template);
386 462 hlist_for_each_entry(cg, node, hhead, hlist) {
387   - if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
388   - /* All subsystems matched */
389   - return cg;
390   - }
  463 + if (!compare_css_sets(cg, oldcg, cgrp, template))
  464 + continue;
  465 +
  466 + /* This css_set matches what we need */
  467 + return cg;
391 468 }
392 469  
393 470 /* No existing cgroup group matched */
394 471  
... ... @@ -441,8 +518,13 @@
441 518 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
442 519 cgrp_link_list);
443 520 link->cg = cg;
  521 + link->cgrp = cgrp;
444 522 list_move(&link->cgrp_link_list, &cgrp->css_sets);
445   - list_add(&link->cg_link_list, &cg->cg_links);
  523 + /*
  524 + * Always add links to the tail of the list so that the list
  525 + * is sorted by order of hierarchy creation
  526 + */
  527 + list_add_tail(&link->cg_link_list, &cg->cg_links);
446 528 }
447 529  
448 530 /*
... ... @@ -462,6 +544,7 @@
462 544 struct list_head tmp_cg_links;
463 545  
464 546 struct hlist_head *hhead;
  547 + struct cg_cgroup_link *link;
465 548  
466 549 /* First see if we already have a cgroup group that matches
467 550 * the desired set */
468 551  
469 552  
... ... @@ -497,18 +580,14 @@
497 580 /* Add reference counts and links from the new css_set. */
498 581 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
499 582 struct cgroup *cgrp = res->subsys[i]->cgroup;
500   - struct cgroup_subsys *ss = subsys[i];
501 583 atomic_inc(&cgrp->count);
502   - /*
503   - * We want to add a link once per cgroup, so we
504   - * only do it for the first subsystem in each
505   - * hierarchy
506   - */
507   - if (ss->root->subsys_list.next == &ss->sibling)
508   - link_css_set(&tmp_cg_links, res, cgrp);
509 584 }
510   - if (list_empty(&rootnode.subsys_list))
511   - link_css_set(&tmp_cg_links, res, dummytop);
  585 + list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
  586 + struct cgroup *c = link->cgrp;
  587 + if (c->root == cgrp->root)
  588 + c = cgrp;
  589 + link_css_set(&tmp_cg_links, res, c);
  590 + }
512 591  
513 592 BUG_ON(!list_empty(&tmp_cg_links));
514 593  
... ... @@ -524,6 +603,41 @@
524 603 }
525 604  
526 605 /*
  606 + * Return the cgroup for "task" from the given hierarchy. Must be
  607 + * called with cgroup_mutex held.
  608 + */
  609 +static struct cgroup *task_cgroup_from_root(struct task_struct *task,
  610 + struct cgroupfs_root *root)
  611 +{
  612 + struct css_set *css;
  613 + struct cgroup *res = NULL;
  614 +
  615 + BUG_ON(!mutex_is_locked(&cgroup_mutex));
  616 + read_lock(&css_set_lock);
  617 + /*
  618 + * No need to lock the task - since we hold cgroup_mutex the
  619 + * task can't change groups, so the only thing that can happen
  620 + * is that it exits and its css is set back to init_css_set.
  621 + */
  622 + css = task->cgroups;
  623 + if (css == &init_css_set) {
  624 + res = &root->top_cgroup;
  625 + } else {
  626 + struct cg_cgroup_link *link;
  627 + list_for_each_entry(link, &css->cg_links, cg_link_list) {
  628 + struct cgroup *c = link->cgrp;
  629 + if (c->root == root) {
  630 + res = c;
  631 + break;
  632 + }
  633 + }
  634 + }
  635 + read_unlock(&css_set_lock);
  636 + BUG_ON(!res);
  637 + return res;
  638 +}
  639 +
  640 +/*
527 641 * There is one global cgroup mutex. We also require taking
528 642 * task_lock() when dereferencing a task's cgroup subsys pointers.
529 643 * See "The task_lock() exception", at the end of this comment.
... ... @@ -1361,27 +1475,6 @@
1361 1475 return 0;
1362 1476 }
1363 1477  
1364   -/*
1365   - * Return the first subsystem attached to a cgroup's hierarchy, and
1366   - * its subsystem id.
1367   - */
1368   -
1369   -static void get_first_subsys(const struct cgroup *cgrp,
1370   - struct cgroup_subsys_state **css, int *subsys_id)
1371   -{
1372   - const struct cgroupfs_root *root = cgrp->root;
1373   - const struct cgroup_subsys *test_ss;
1374   - BUG_ON(list_empty(&root->subsys_list));
1375   - test_ss = list_entry(root->subsys_list.next,
1376   - struct cgroup_subsys, sibling);
1377   - if (css) {
1378   - *css = cgrp->subsys[test_ss->subsys_id];
1379   - BUG_ON(!*css);
1380   - }
1381   - if (subsys_id)
1382   - *subsys_id = test_ss->subsys_id;
1383   -}
1384   -
1385 1478 /**
1386 1479 * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
1387 1480 * @cgrp: the cgroup the task is attaching to
1388 1481  
1389 1482  
... ... @@ -1398,12 +1491,9 @@
1398 1491 struct css_set *cg;
1399 1492 struct css_set *newcg;
1400 1493 struct cgroupfs_root *root = cgrp->root;
1401   - int subsys_id;
1402 1494  
1403   - get_first_subsys(cgrp, NULL, &subsys_id);
1404   -
1405 1495 /* Nothing to do if the task is already in that cgroup */
1406   - oldcgrp = task_cgroup(tsk, subsys_id);
  1496 + oldcgrp = task_cgroup_from_root(tsk, root);
1407 1497 if (cgrp == oldcgrp)
1408 1498 return 0;
1409 1499  
... ... @@ -1961,7 +2051,7 @@
1961 2051 * the start of a css_set
1962 2052 */
1963 2053 static void cgroup_advance_iter(struct cgroup *cgrp,
1964   - struct cgroup_iter *it)
  2054 + struct cgroup_iter *it)
1965 2055 {
1966 2056 struct list_head *l = it->cg_link;
1967 2057 struct cg_cgroup_link *link;
... ... @@ -2964,6 +3054,7 @@
2964 3054 init_task.cgroups = &init_css_set;
2965 3055  
2966 3056 init_css_set_link.cg = &init_css_set;
  3057 + init_css_set_link.cgrp = dummytop;
2967 3058 list_add(&init_css_set_link.cgrp_link_list,
2968 3059 &rootnode.top_cgroup.css_sets);
2969 3060 list_add(&init_css_set_link.cg_link_list,
... ... @@ -3071,7 +3162,6 @@
3071 3162 for_each_active_root(root) {
3072 3163 struct cgroup_subsys *ss;
3073 3164 struct cgroup *cgrp;
3074   - int subsys_id;
3075 3165 int count = 0;
3076 3166  
3077 3167 seq_printf(m, "%lu:", root->subsys_bits);
... ... @@ -3081,8 +3171,7 @@
3081 3171 seq_printf(m, "%sname=%s", count ? "," : "",
3082 3172 root->name);
3083 3173 seq_putc(m, ':');
3084   - get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
3085   - cgrp = task_cgroup(tsk, subsys_id);
  3174 + cgrp = task_cgroup_from_root(tsk, root);
3086 3175 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
3087 3176 if (retval < 0)
3088 3177 goto out_unlock;
3089 3178  
... ... @@ -3408,13 +3497,11 @@
3408 3497 {
3409 3498 int ret;
3410 3499 struct cgroup *target;
3411   - int subsys_id;
3412 3500  
3413 3501 if (cgrp == dummytop)
3414 3502 return 1;
3415 3503  
3416   - get_first_subsys(cgrp, NULL, &subsys_id);
3417   - target = task_cgroup(task, subsys_id);
  3504 + target = task_cgroup_from_root(task, cgrp->root);
3418 3505 while (cgrp != target && cgrp!= cgrp->top_cgroup)
3419 3506 cgrp = cgrp->parent;
3420 3507 ret = (cgrp == target);
... ... @@ -3824,6 +3911,59 @@
3824 3911 return count;
3825 3912 }
3826 3913  
  3914 +static int current_css_set_cg_links_read(struct cgroup *cont,
  3915 + struct cftype *cft,
  3916 + struct seq_file *seq)
  3917 +{
  3918 + struct cg_cgroup_link *link;
  3919 + struct css_set *cg;
  3920 +
  3921 + read_lock(&css_set_lock);
  3922 + rcu_read_lock();
  3923 + cg = rcu_dereference(current->cgroups);
  3924 + list_for_each_entry(link, &cg->cg_links, cg_link_list) {
  3925 + struct cgroup *c = link->cgrp;
  3926 + const char *name;
  3927 +
  3928 + if (c->dentry)
  3929 + name = c->dentry->d_name.name;
  3930 + else
  3931 + name = "?";
  3932 + seq_printf(seq, "Root %lu group %s\n",
  3933 + c->root->subsys_bits, name);
  3934 + }
  3935 + rcu_read_unlock();
  3936 + read_unlock(&css_set_lock);
  3937 + return 0;
  3938 +}
  3939 +
  3940 +#define MAX_TASKS_SHOWN_PER_CSS 25
  3941 +static int cgroup_css_links_read(struct cgroup *cont,
  3942 + struct cftype *cft,
  3943 + struct seq_file *seq)
  3944 +{
  3945 + struct cg_cgroup_link *link;
  3946 +
  3947 + read_lock(&css_set_lock);
  3948 + list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
  3949 + struct css_set *cg = link->cg;
  3950 + struct task_struct *task;
  3951 + int count = 0;
  3952 + seq_printf(seq, "css_set %p\n", cg);
  3953 + list_for_each_entry(task, &cg->tasks, cg_list) {
  3954 + if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
  3955 + seq_puts(seq, " ...\n");
  3956 + break;
  3957 + } else {
  3958 + seq_printf(seq, " task %d\n",
  3959 + task_pid_vnr(task));
  3960 + }
  3961 + }
  3962 + }
  3963 + read_unlock(&css_set_lock);
  3964 + return 0;
  3965 +}
  3966 +
3827 3967 static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
3828 3968 {
3829 3969 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
... ... @@ -3847,6 +3987,16 @@
3847 3987 {
3848 3988 .name = "current_css_set_refcount",
3849 3989 .read_u64 = current_css_set_refcount_read,
  3990 + },
  3991 +
  3992 + {
  3993 + .name = "current_css_set_cg_links",
  3994 + .read_seq_string = current_css_set_cg_links_read,
  3995 + },
  3996 +
  3997 + {
  3998 + .name = "cgroup_css_links",
  3999 + .read_seq_string = cgroup_css_links_read,
3850 4000 },
3851 4001  
3852 4002 {