Blame view

kernel/cgroup.c 127 KB
ddbcc7e8e   Paul Menage   Task Control Grou...
1
  /*
ddbcc7e8e   Paul Menage   Task Control Grou...
2
3
4
5
6
   *  Generic process-grouping system.
   *
   *  Based originally on the cpuset system, extracted by Paul Menage
   *  Copyright (C) 2006 Google, Inc
   *
0dea11687   Kirill A. Shutemov   cgroup: implement...
7
8
9
10
   *  Notifications support
   *  Copyright (C) 2009 Nokia Corporation
   *  Author: Kirill A. Shutemov
   *
ddbcc7e8e   Paul Menage   Task Control Grou...
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
   *  Copyright notices from the original cpuset code:
   *  --------------------------------------------------
   *  Copyright (C) 2003 BULL SA.
   *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
   *
   *  Portions derived from Patrick Mochel's sysfs code.
   *  sysfs is Copyright (c) 2001-3 Patrick Mochel
   *
   *  2003-10-10 Written by Simon Derr.
   *  2003-10-22 Updates by Stephen Hemminger.
   *  2004 May-July Rework by Paul Jackson.
   *  ---------------------------------------------------
   *
   *  This file is subject to the terms and conditions of the GNU General Public
   *  License.  See the file COPYING in the main directory of the Linux
   *  distribution for more details.
   */
  
  #include <linux/cgroup.h>
c6d57f331   Paul Menage   cgroups: support ...
30
  #include <linux/ctype.h>
ddbcc7e8e   Paul Menage   Task Control Grou...
31
32
33
34
35
36
37
38
  #include <linux/errno.h>
  #include <linux/fs.h>
  #include <linux/kernel.h>
  #include <linux/list.h>
  #include <linux/mm.h>
  #include <linux/mutex.h>
  #include <linux/mount.h>
  #include <linux/pagemap.h>
a424316ca   Paul Menage   Task Control Grou...
39
  #include <linux/proc_fs.h>
ddbcc7e8e   Paul Menage   Task Control Grou...
40
41
  #include <linux/rcupdate.h>
  #include <linux/sched.h>
817929ec2   Paul Menage   Task Control Grou...
42
  #include <linux/backing-dev.h>
ddbcc7e8e   Paul Menage   Task Control Grou...
43
44
45
46
47
  #include <linux/seq_file.h>
  #include <linux/slab.h>
  #include <linux/magic.h>
  #include <linux/spinlock.h>
  #include <linux/string.h>
bbcb81d09   Paul Menage   Task Control Grou...
48
  #include <linux/sort.h>
81a6a5cdd   Paul Menage   Task Control Grou...
49
  #include <linux/kmod.h>
e6a1105ba   Ben Blum   cgroups: subsyste...
50
  #include <linux/module.h>
846c7bb05   Balbir Singh   Add cgroupstats
51
52
  #include <linux/delayacct.h>
  #include <linux/cgroupstats.h>
472b1053f   Li Zefan   cgroups: use a ha...
53
  #include <linux/hash.h>
3f8206d49   Al Viro   [PATCH] get rid o...
54
  #include <linux/namei.h>
337eb00a2   Alessio Igor Bogani   Push BKL down int...
55
  #include <linux/smp_lock.h>
096b7fe01   Li Zefan   cgroups: fix pid ...
56
  #include <linux/pid_namespace.h>
2c6ab6d20   Paul Menage   cgroups: allow cg...
57
  #include <linux/idr.h>
d1d9fd330   Ben Blum   cgroups: use vmal...
58
  #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
0dea11687   Kirill A. Shutemov   cgroup: implement...
59
60
  #include <linux/eventfd.h>
  #include <linux/poll.h>
846c7bb05   Balbir Singh   Add cgroupstats
61

ddbcc7e8e   Paul Menage   Task Control Grou...
62
  #include <asm/atomic.h>
81a6a5cdd   Paul Menage   Task Control Grou...
63
  static DEFINE_MUTEX(cgroup_mutex);
aae8aab40   Ben Blum   cgroups: revamp s...
64
65
66
67
68
69
  /*
   * Generate an array of cgroup subsystem pointers. At boot time, this is
   * populated up to CGROUP_BUILTIN_SUBSYS_COUNT, and modular subsystems are
   * registered after that. The mutable section of this array is protected by
   * cgroup_mutex.
   */
ddbcc7e8e   Paul Menage   Task Control Grou...
70
  #define SUBSYS(_x) &_x ## _subsys,
aae8aab40   Ben Blum   cgroups: revamp s...
71
  static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
ddbcc7e8e   Paul Menage   Task Control Grou...
72
73
  #include <linux/cgroup_subsys.h>
  };
c6d57f331   Paul Menage   cgroups: support ...
74
  #define MAX_CGROUP_ROOT_NAMELEN 64
ddbcc7e8e   Paul Menage   Task Control Grou...
75
76
77
78
79
80
81
82
83
84
85
86
87
  /*
   * A cgroupfs_root represents the root of a cgroup hierarchy,
   * and may be associated with a superblock to form an active
   * hierarchy
   */
  struct cgroupfs_root {
  	struct super_block *sb;
  
  	/*
  	 * The bitmask of subsystems intended to be attached to this
  	 * hierarchy
  	 */
  	unsigned long subsys_bits;
2c6ab6d20   Paul Menage   cgroups: allow cg...
88
89
  	/* Unique id for this hierarchy. */
  	int hierarchy_id;
ddbcc7e8e   Paul Menage   Task Control Grou...
90
91
92
93
94
95
96
97
98
99
100
  	/* The bitmask of subsystems currently attached to this hierarchy */
  	unsigned long actual_subsys_bits;
  
  	/* A list running through the attached subsystems */
  	struct list_head subsys_list;
  
  	/* The root cgroup for this hierarchy */
  	struct cgroup top_cgroup;
  
  	/* Tracks how many cgroups are currently defined in hierarchy.*/
  	int number_of_cgroups;
e5f6a8609   Li Zefan   cgroups: make roo...
101
  	/* A list running through the active hierarchies */
ddbcc7e8e   Paul Menage   Task Control Grou...
102
103
104
105
  	struct list_head root_list;
  
  	/* Hierarchy-specific flags */
  	unsigned long flags;
81a6a5cdd   Paul Menage   Task Control Grou...
106

e788e066c   Paul Menage   cgroup files: mov...
107
  	/* The path to use for release notifications. */
81a6a5cdd   Paul Menage   Task Control Grou...
108
  	char release_agent_path[PATH_MAX];
c6d57f331   Paul Menage   cgroups: support ...
109
110
111
  
  	/* The name for this hierarchy - may be empty */
  	char name[MAX_CGROUP_ROOT_NAMELEN];
ddbcc7e8e   Paul Menage   Task Control Grou...
112
  };
ddbcc7e8e   Paul Menage   Task Control Grou...
113
114
115
116
117
118
  /*
   * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the
   * subsystems that are otherwise unattached - it never has more than a
   * single cgroup, and all tasks are part of that cgroup.
   */
  static struct cgroupfs_root rootnode;
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
  /*
   * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when
   * cgroup_subsys->use_id != 0.
   */
  #define CSS_ID_MAX	(65535)
  struct css_id {
  	/*
  	 * The css to which this ID points. This pointer is set to valid value
  	 * after cgroup is populated. If cgroup is removed, this will be NULL.
  	 * This pointer is expected to be RCU-safe because destroy()
  	 * is called after synchronize_rcu(). But for safe use, css_is_removed()
  	 * css_tryget() should be used for avoiding race.
  	 */
  	struct cgroup_subsys_state *css;
  	/*
  	 * ID of this css.
  	 */
  	unsigned short id;
  	/*
  	 * Depth in hierarchy which this ID belongs to.
  	 */
  	unsigned short depth;
  	/*
  	 * ID is freed by RCU. (and lookup routine is RCU safe.)
  	 */
  	struct rcu_head rcu_head;
  	/*
  	 * Hierarchy of CSS ID belongs to.
  	 */
  	unsigned short stack[0]; /* Array of Length (depth+1) */
  };
0dea11687   Kirill A. Shutemov   cgroup: implement...
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
  /*
   * cgroup_event represents events which userspace want to recieve.
   */
  struct cgroup_event {
  	/*
  	 * Cgroup which the event belongs to.
  	 */
  	struct cgroup *cgrp;
  	/*
  	 * Control file which the event associated.
  	 */
  	struct cftype *cft;
  	/*
  	 * eventfd to signal userspace about the event.
  	 */
  	struct eventfd_ctx *eventfd;
  	/*
  	 * Each of these stored in a list by the cgroup.
  	 */
  	struct list_head list;
  	/*
  	 * All fields below needed to unregister event when
  	 * userspace closes eventfd.
  	 */
  	poll_table pt;
  	wait_queue_head_t *wqh;
  	wait_queue_t wait;
  	struct work_struct remove;
  };
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
179

ddbcc7e8e   Paul Menage   Task Control Grou...
180
181
182
  /* The list of hierarchy roots */
  
  static LIST_HEAD(roots);
817929ec2   Paul Menage   Task Control Grou...
183
  static int root_count;
ddbcc7e8e   Paul Menage   Task Control Grou...
184

2c6ab6d20   Paul Menage   cgroups: allow cg...
185
186
187
  static DEFINE_IDA(hierarchy_ida);
  static int next_hierarchy_id;
  static DEFINE_SPINLOCK(hierarchy_id_lock);
ddbcc7e8e   Paul Menage   Task Control Grou...
188
189
190
191
  /* dummytop is a shorthand for the dummy hierarchy's top cgroup */
  #define dummytop (&rootnode.top_cgroup)
  
  /* This flag indicates whether tasks in the fork and exit paths should
a043e3b2c   Li Zefan   cgroup: fix comments
192
193
194
   * check for fork/exit handlers to call. This avoids us having to do
   * extra work in the fork/exit path if none of the subsystems need to
   * be called.
ddbcc7e8e   Paul Menage   Task Control Grou...
195
   */
8947f9d5b   Li Zefan   cgroups: annotate...
196
  static int need_forkexit_callback __read_mostly;
ddbcc7e8e   Paul Menage   Task Control Grou...
197

d11c563dd   Paul E. McKenney   sched: Use lockde...
198
199
200
201
202
203
204
205
206
207
208
209
210
  #ifdef CONFIG_PROVE_LOCKING
  int cgroup_lock_is_held(void)
  {
  	return lockdep_is_held(&cgroup_mutex);
  }
  #else /* #ifdef CONFIG_PROVE_LOCKING */
  int cgroup_lock_is_held(void)
  {
  	return mutex_is_locked(&cgroup_mutex);
  }
  #endif /* #else #ifdef CONFIG_PROVE_LOCKING */
  
  EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
ddbcc7e8e   Paul Menage   Task Control Grou...
211
  /* convenient tests for these bits */
bd89aabc6   Paul Menage   Control groups: R...
212
  inline int cgroup_is_removed(const struct cgroup *cgrp)
ddbcc7e8e   Paul Menage   Task Control Grou...
213
  {
bd89aabc6   Paul Menage   Control groups: R...
214
  	return test_bit(CGRP_REMOVED, &cgrp->flags);
ddbcc7e8e   Paul Menage   Task Control Grou...
215
216
217
218
219
220
  }
  
  /* bits in struct cgroupfs_root flags field */
  enum {
  	ROOT_NOPREFIX, /* mounted subsystems have no named prefix */
  };
e9685a03c   Adrian Bunk   kernel/cgroup.c: ...
221
  static int cgroup_is_releasable(const struct cgroup *cgrp)
81a6a5cdd   Paul Menage   Task Control Grou...
222
223
  {
  	const int bits =
bd89aabc6   Paul Menage   Control groups: R...
224
225
226
  		(1 << CGRP_RELEASABLE) |
  		(1 << CGRP_NOTIFY_ON_RELEASE);
  	return (cgrp->flags & bits) == bits;
81a6a5cdd   Paul Menage   Task Control Grou...
227
  }
e9685a03c   Adrian Bunk   kernel/cgroup.c: ...
228
  static int notify_on_release(const struct cgroup *cgrp)
81a6a5cdd   Paul Menage   Task Control Grou...
229
  {
bd89aabc6   Paul Menage   Control groups: R...
230
  	return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
81a6a5cdd   Paul Menage   Task Control Grou...
231
  }
ddbcc7e8e   Paul Menage   Task Control Grou...
232
233
234
235
236
237
  /*
   * for_each_subsys() allows you to iterate on each subsystem attached to
   * an active hierarchy
   */
  #define for_each_subsys(_root, _ss) \
  list_for_each_entry(_ss, &_root->subsys_list, sibling)
e5f6a8609   Li Zefan   cgroups: make roo...
238
239
  /* for_each_active_root() allows you to iterate across the active hierarchies */
  #define for_each_active_root(_root) \
ddbcc7e8e   Paul Menage   Task Control Grou...
240
  list_for_each_entry(_root, &roots, root_list)
81a6a5cdd   Paul Menage   Task Control Grou...
241
242
243
244
245
246
  /* the list of cgroups eligible for automatic release. Protected by
   * release_list_lock */
  static LIST_HEAD(release_list);
  static DEFINE_SPINLOCK(release_list_lock);
  static void cgroup_release_agent(struct work_struct *work);
  static DECLARE_WORK(release_agent_work, cgroup_release_agent);
bd89aabc6   Paul Menage   Control groups: R...
247
  static void check_for_release(struct cgroup *cgrp);
81a6a5cdd   Paul Menage   Task Control Grou...
248

817929ec2   Paul Menage   Task Control Grou...
249
250
251
252
253
254
  /* Link structure for associating css_set objects with cgroups */
  struct cg_cgroup_link {
  	/*
  	 * List running through cg_cgroup_links associated with a
  	 * cgroup, anchored on cgroup->css_sets
  	 */
bd89aabc6   Paul Menage   Control groups: R...
255
  	struct list_head cgrp_link_list;
7717f7ba9   Paul Menage   cgroups: add a ba...
256
  	struct cgroup *cgrp;
817929ec2   Paul Menage   Task Control Grou...
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
  	/*
  	 * List running through cg_cgroup_links pointing at a
  	 * single css_set object, anchored on css_set->cg_links
  	 */
  	struct list_head cg_link_list;
  	struct css_set *cg;
  };
  
  /* The default css_set - used by init and its children prior to any
   * hierarchies being mounted. It contains a pointer to the root state
   * for each subsystem. Also used to anchor the list of css_sets. Not
   * reference-counted, to improve performance when child cgroups
   * haven't been created.
   */
  
  static struct css_set init_css_set;
  static struct cg_cgroup_link init_css_set_link;
e6a1105ba   Ben Blum   cgroups: subsyste...
274
275
  static int cgroup_init_idr(struct cgroup_subsys *ss,
  			   struct cgroup_subsys_state *css);
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
276

817929ec2   Paul Menage   Task Control Grou...
277
278
279
280
281
  /* css_set_lock protects the list of css_set objects, and the
   * chain of tasks off each css_set.  Nests outside task->alloc_lock
   * due to cgroup_iter_start() */
  static DEFINE_RWLOCK(css_set_lock);
  static int css_set_count;
7717f7ba9   Paul Menage   cgroups: add a ba...
282
283
284
285
286
  /*
   * hash table for cgroup groups. This improves the performance to find
   * an existing css_set. This hash doesn't (currently) take into
   * account cgroups in empty hierarchies.
   */
472b1053f   Li Zefan   cgroups: use a ha...
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
  #define CSS_SET_HASH_BITS	7
  #define CSS_SET_TABLE_SIZE	(1 << CSS_SET_HASH_BITS)
  static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
  
  static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
  {
  	int i;
  	int index;
  	unsigned long tmp = 0UL;
  
  	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
  		tmp += (unsigned long)css[i];
  	tmp = (tmp >> 16) ^ tmp;
  
  	index = hash_long(tmp, CSS_SET_HASH_BITS);
  
  	return &css_set_table[index];
  }
c378369d8   Ben Blum   cgroups: change c...
305
306
307
308
309
  static void free_css_set_rcu(struct rcu_head *obj)
  {
  	struct css_set *cg = container_of(obj, struct css_set, rcu_head);
  	kfree(cg);
  }
817929ec2   Paul Menage   Task Control Grou...
310
311
312
313
  /* We don't maintain the lists running through each css_set to its
   * task until after the first call to cgroup_iter_start(). This
   * reduces the fork()/exit() overhead for people who have cgroups
   * compiled into their kernel but not actually in use */
8947f9d5b   Li Zefan   cgroups: annotate...
314
  static int use_task_css_set_links __read_mostly;
817929ec2   Paul Menage   Task Control Grou...
315

2c6ab6d20   Paul Menage   cgroups: allow cg...
316
  static void __put_css_set(struct css_set *cg, int taskexit)
b4f48b636   Paul Menage   Task Control Grou...
317
  {
71cbb949d   KOSAKI Motohiro   cgroup: list_for_...
318
319
  	struct cg_cgroup_link *link;
  	struct cg_cgroup_link *saved_link;
146aa1bd0   Lai Jiangshan   cgroups: fix prob...
320
321
322
323
324
325
326
327
328
329
330
331
  	/*
  	 * Ensure that the refcount doesn't hit zero while any readers
  	 * can see it. Similar to atomic_dec_and_lock(), but for an
  	 * rwlock
  	 */
  	if (atomic_add_unless(&cg->refcount, -1, 1))
  		return;
  	write_lock(&css_set_lock);
  	if (!atomic_dec_and_test(&cg->refcount)) {
  		write_unlock(&css_set_lock);
  		return;
  	}
81a6a5cdd   Paul Menage   Task Control Grou...
332

2c6ab6d20   Paul Menage   cgroups: allow cg...
333
334
335
336
337
338
339
340
341
  	/* This css_set is dead. unlink it and release cgroup refcounts */
  	hlist_del(&cg->hlist);
  	css_set_count--;
  
  	list_for_each_entry_safe(link, saved_link, &cg->cg_links,
  				 cg_link_list) {
  		struct cgroup *cgrp = link->cgrp;
  		list_del(&link->cg_link_list);
  		list_del(&link->cgrp_link_list);
bd89aabc6   Paul Menage   Control groups: R...
342
343
  		if (atomic_dec_and_test(&cgrp->count) &&
  		    notify_on_release(cgrp)) {
81a6a5cdd   Paul Menage   Task Control Grou...
344
  			if (taskexit)
bd89aabc6   Paul Menage   Control groups: R...
345
346
  				set_bit(CGRP_RELEASABLE, &cgrp->flags);
  			check_for_release(cgrp);
81a6a5cdd   Paul Menage   Task Control Grou...
347
  		}
2c6ab6d20   Paul Menage   cgroups: allow cg...
348
349
  
  		kfree(link);
81a6a5cdd   Paul Menage   Task Control Grou...
350
  	}
2c6ab6d20   Paul Menage   cgroups: allow cg...
351
352
  
  	write_unlock(&css_set_lock);
c378369d8   Ben Blum   cgroups: change c...
353
  	call_rcu(&cg->rcu_head, free_css_set_rcu);
b4f48b636   Paul Menage   Task Control Grou...
354
  }
817929ec2   Paul Menage   Task Control Grou...
355
356
357
358
359
  /*
   * refcounted get/put for css_set objects
   */
  static inline void get_css_set(struct css_set *cg)
  {
146aa1bd0   Lai Jiangshan   cgroups: fix prob...
360
  	atomic_inc(&cg->refcount);
817929ec2   Paul Menage   Task Control Grou...
361
362
363
364
  }
  
  static inline void put_css_set(struct css_set *cg)
  {
146aa1bd0   Lai Jiangshan   cgroups: fix prob...
365
  	__put_css_set(cg, 0);
817929ec2   Paul Menage   Task Control Grou...
366
  }
81a6a5cdd   Paul Menage   Task Control Grou...
367
368
  static inline void put_css_set_taskexit(struct css_set *cg)
  {
146aa1bd0   Lai Jiangshan   cgroups: fix prob...
369
  	__put_css_set(cg, 1);
81a6a5cdd   Paul Menage   Task Control Grou...
370
  }
817929ec2   Paul Menage   Task Control Grou...
371
  /*
7717f7ba9   Paul Menage   cgroups: add a ba...
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
   * compare_css_sets - helper function for find_existing_css_set().
   * @cg: candidate css_set being tested
   * @old_cg: existing css_set for a task
   * @new_cgrp: cgroup that's being entered by the task
   * @template: desired set of css pointers in css_set (pre-calculated)
   *
   * Returns true if "cg" matches "old_cg" except for the hierarchy
   * which "new_cgrp" belongs to, for which it should match "new_cgrp".
   */
  static bool compare_css_sets(struct css_set *cg,
  			     struct css_set *old_cg,
  			     struct cgroup *new_cgrp,
  			     struct cgroup_subsys_state *template[])
  {
  	struct list_head *l1, *l2;
  
  	if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
  		/* Not all subsystems matched */
  		return false;
  	}
  
  	/*
  	 * Compare cgroup pointers in order to distinguish between
  	 * different cgroups in heirarchies with no subsystems. We
  	 * could get by with just this check alone (and skip the
  	 * memcmp above) but on most setups the memcmp check will
  	 * avoid the need for this more expensive check on almost all
  	 * candidates.
  	 */
  
  	l1 = &cg->cg_links;
  	l2 = &old_cg->cg_links;
  	while (1) {
  		struct cg_cgroup_link *cgl1, *cgl2;
  		struct cgroup *cg1, *cg2;
  
  		l1 = l1->next;
  		l2 = l2->next;
  		/* See if we reached the end - both lists are equal length. */
  		if (l1 == &cg->cg_links) {
  			BUG_ON(l2 != &old_cg->cg_links);
  			break;
  		} else {
  			BUG_ON(l2 == &old_cg->cg_links);
  		}
  		/* Locate the cgroups associated with these links. */
  		cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
  		cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
  		cg1 = cgl1->cgrp;
  		cg2 = cgl2->cgrp;
  		/* Hierarchies should be linked in the same order. */
  		BUG_ON(cg1->root != cg2->root);
  
  		/*
  		 * If this hierarchy is the hierarchy of the cgroup
  		 * that's changing, then we need to check that this
  		 * css_set points to the new cgroup; if it's any other
  		 * hierarchy, then this css_set should point to the
  		 * same cgroup as the old css_set.
  		 */
  		if (cg1->root == new_cgrp->root) {
  			if (cg1 != new_cgrp)
  				return false;
  		} else {
  			if (cg1 != cg2)
  				return false;
  		}
  	}
  	return true;
  }
  
  /*
817929ec2   Paul Menage   Task Control Grou...
444
445
   * find_existing_css_set() is a helper for
   * find_css_set(), and checks to see whether an existing
472b1053f   Li Zefan   cgroups: use a ha...
446
   * css_set is suitable.
817929ec2   Paul Menage   Task Control Grou...
447
448
449
450
   *
   * oldcg: the cgroup group that we're using before the cgroup
   * transition
   *
bd89aabc6   Paul Menage   Control groups: R...
451
   * cgrp: the cgroup that we're moving into
817929ec2   Paul Menage   Task Control Grou...
452
453
454
455
   *
   * template: location in which to build the desired set of subsystem
   * state objects for the new cgroup group
   */
817929ec2   Paul Menage   Task Control Grou...
456
457
  static struct css_set *find_existing_css_set(
  	struct css_set *oldcg,
bd89aabc6   Paul Menage   Control groups: R...
458
  	struct cgroup *cgrp,
817929ec2   Paul Menage   Task Control Grou...
459
  	struct cgroup_subsys_state *template[])
b4f48b636   Paul Menage   Task Control Grou...
460
461
  {
  	int i;
bd89aabc6   Paul Menage   Control groups: R...
462
  	struct cgroupfs_root *root = cgrp->root;
472b1053f   Li Zefan   cgroups: use a ha...
463
464
465
  	struct hlist_head *hhead;
  	struct hlist_node *node;
  	struct css_set *cg;
817929ec2   Paul Menage   Task Control Grou...
466

aae8aab40   Ben Blum   cgroups: revamp s...
467
468
469
470
471
  	/*
  	 * Build the set of subsystem state objects that we want to see in the
  	 * new css_set. while subsystems can change globally, the entries here
  	 * won't change, so no need for locking.
  	 */
817929ec2   Paul Menage   Task Control Grou...
472
  	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
8d53d55d2   Li Zefan   cgroup: fix subsy...
473
  		if (root->subsys_bits & (1UL << i)) {
817929ec2   Paul Menage   Task Control Grou...
474
475
476
  			/* Subsystem is in this hierarchy. So we want
  			 * the subsystem state from the new
  			 * cgroup */
bd89aabc6   Paul Menage   Control groups: R...
477
  			template[i] = cgrp->subsys[i];
817929ec2   Paul Menage   Task Control Grou...
478
479
480
481
482
483
  		} else {
  			/* Subsystem is not in this hierarchy, so we
  			 * don't want to change the subsystem state */
  			template[i] = oldcg->subsys[i];
  		}
  	}
472b1053f   Li Zefan   cgroups: use a ha...
484
485
  	hhead = css_set_hash(template);
  	hlist_for_each_entry(cg, node, hhead, hlist) {
7717f7ba9   Paul Menage   cgroups: add a ba...
486
487
488
489
490
  		if (!compare_css_sets(cg, oldcg, cgrp, template))
  			continue;
  
  		/* This css_set matches what we need */
  		return cg;
472b1053f   Li Zefan   cgroups: use a ha...
491
  	}
817929ec2   Paul Menage   Task Control Grou...
492
493
494
495
  
  	/* No existing cgroup group matched */
  	return NULL;
  }
36553434f   Li Zefan   cgroup: remove du...
496
497
498
499
500
501
502
503
504
505
  static void free_cg_links(struct list_head *tmp)
  {
  	struct cg_cgroup_link *link;
  	struct cg_cgroup_link *saved_link;
  
  	list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
  		list_del(&link->cgrp_link_list);
  		kfree(link);
  	}
  }
817929ec2   Paul Menage   Task Control Grou...
506
507
  /*
   * allocate_cg_links() allocates "count" cg_cgroup_link structures
bd89aabc6   Paul Menage   Control groups: R...
508
   * and chains them on tmp through their cgrp_link_list fields. Returns 0 on
817929ec2   Paul Menage   Task Control Grou...
509
510
   * success or a negative error
   */
817929ec2   Paul Menage   Task Control Grou...
511
512
513
514
515
516
517
518
  static int allocate_cg_links(int count, struct list_head *tmp)
  {
  	struct cg_cgroup_link *link;
  	int i;
  	INIT_LIST_HEAD(tmp);
  	for (i = 0; i < count; i++) {
  		link = kmalloc(sizeof(*link), GFP_KERNEL);
  		if (!link) {
36553434f   Li Zefan   cgroup: remove du...
519
  			free_cg_links(tmp);
817929ec2   Paul Menage   Task Control Grou...
520
521
  			return -ENOMEM;
  		}
bd89aabc6   Paul Menage   Control groups: R...
522
  		list_add(&link->cgrp_link_list, tmp);
817929ec2   Paul Menage   Task Control Grou...
523
524
525
  	}
  	return 0;
  }
c12f65d43   Li Zefan   cgroups: introduc...
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
  /**
   * link_css_set - a helper function to link a css_set to a cgroup
   * @tmp_cg_links: cg_cgroup_link objects allocated by allocate_cg_links()
   * @cg: the css_set to be linked
   * @cgrp: the destination cgroup
   */
  static void link_css_set(struct list_head *tmp_cg_links,
  			 struct css_set *cg, struct cgroup *cgrp)
  {
  	struct cg_cgroup_link *link;
  
  	BUG_ON(list_empty(tmp_cg_links));
  	link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
  				cgrp_link_list);
  	link->cg = cg;
7717f7ba9   Paul Menage   cgroups: add a ba...
541
  	link->cgrp = cgrp;
2c6ab6d20   Paul Menage   cgroups: allow cg...
542
  	atomic_inc(&cgrp->count);
c12f65d43   Li Zefan   cgroups: introduc...
543
  	list_move(&link->cgrp_link_list, &cgrp->css_sets);
7717f7ba9   Paul Menage   cgroups: add a ba...
544
545
546
547
548
  	/*
  	 * Always add links to the tail of the list so that the list
  	 * is sorted by order of hierarchy creation
  	 */
  	list_add_tail(&link->cg_link_list, &cg->cg_links);
c12f65d43   Li Zefan   cgroups: introduc...
549
  }
817929ec2   Paul Menage   Task Control Grou...
550
551
552
553
554
555
556
  /*
   * find_css_set() takes an existing cgroup group and a
   * cgroup object, and returns a css_set object that's
   * equivalent to the old group, but with the given cgroup
   * substituted into the appropriate hierarchy. Must be called with
   * cgroup_mutex held
   */
817929ec2   Paul Menage   Task Control Grou...
557
  static struct css_set *find_css_set(
bd89aabc6   Paul Menage   Control groups: R...
558
  	struct css_set *oldcg, struct cgroup *cgrp)
817929ec2   Paul Menage   Task Control Grou...
559
560
561
  {
  	struct css_set *res;
  	struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
817929ec2   Paul Menage   Task Control Grou...
562
563
  
  	struct list_head tmp_cg_links;
817929ec2   Paul Menage   Task Control Grou...
564

472b1053f   Li Zefan   cgroups: use a ha...
565
  	struct hlist_head *hhead;
7717f7ba9   Paul Menage   cgroups: add a ba...
566
  	struct cg_cgroup_link *link;
472b1053f   Li Zefan   cgroups: use a ha...
567

817929ec2   Paul Menage   Task Control Grou...
568
569
  	/* First see if we already have a cgroup group that matches
  	 * the desired set */
7e9abd89c   Li Zefan   cgroup: use read ...
570
  	read_lock(&css_set_lock);
bd89aabc6   Paul Menage   Control groups: R...
571
  	res = find_existing_css_set(oldcg, cgrp, template);
817929ec2   Paul Menage   Task Control Grou...
572
573
  	if (res)
  		get_css_set(res);
7e9abd89c   Li Zefan   cgroup: use read ...
574
  	read_unlock(&css_set_lock);
817929ec2   Paul Menage   Task Control Grou...
575
576
577
578
579
580
581
582
583
584
585
586
587
  
  	if (res)
  		return res;
  
  	res = kmalloc(sizeof(*res), GFP_KERNEL);
  	if (!res)
  		return NULL;
  
  	/* Allocate all the cg_cgroup_link objects that we'll need */
  	if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
  		kfree(res);
  		return NULL;
  	}
146aa1bd0   Lai Jiangshan   cgroups: fix prob...
588
  	atomic_set(&res->refcount, 1);
817929ec2   Paul Menage   Task Control Grou...
589
590
  	INIT_LIST_HEAD(&res->cg_links);
  	INIT_LIST_HEAD(&res->tasks);
472b1053f   Li Zefan   cgroups: use a ha...
591
  	INIT_HLIST_NODE(&res->hlist);
817929ec2   Paul Menage   Task Control Grou...
592
593
594
595
596
597
598
  
  	/* Copy the set of subsystem state objects generated in
  	 * find_existing_css_set() */
  	memcpy(res->subsys, template, sizeof(res->subsys));
  
  	write_lock(&css_set_lock);
  	/* Add reference counts and links from the new css_set. */
7717f7ba9   Paul Menage   cgroups: add a ba...
599
600
601
602
603
604
  	list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
  		struct cgroup *c = link->cgrp;
  		if (c->root == cgrp->root)
  			c = cgrp;
  		link_css_set(&tmp_cg_links, res, c);
  	}
817929ec2   Paul Menage   Task Control Grou...
605
606
  
  	BUG_ON(!list_empty(&tmp_cg_links));
817929ec2   Paul Menage   Task Control Grou...
607
  	css_set_count++;
472b1053f   Li Zefan   cgroups: use a ha...
608
609
610
611
  
  	/* Add this cgroup group to the hash table */
  	hhead = css_set_hash(res->subsys);
  	hlist_add_head(&res->hlist, hhead);
817929ec2   Paul Menage   Task Control Grou...
612
613
614
  	write_unlock(&css_set_lock);
  
  	return res;
b4f48b636   Paul Menage   Task Control Grou...
615
  }
ddbcc7e8e   Paul Menage   Task Control Grou...
616
  /*
7717f7ba9   Paul Menage   cgroups: add a ba...
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
   * Return the cgroup for "task" from the given hierarchy. Must be
   * called with cgroup_mutex held.
   */
  static struct cgroup *task_cgroup_from_root(struct task_struct *task,
  					    struct cgroupfs_root *root)
  {
  	struct css_set *css;
  	struct cgroup *res = NULL;
  
  	BUG_ON(!mutex_is_locked(&cgroup_mutex));
  	read_lock(&css_set_lock);
  	/*
  	 * No need to lock the task - since we hold cgroup_mutex the
  	 * task can't change groups, so the only thing that can happen
  	 * is that it exits and its css is set back to init_css_set.
  	 */
  	css = task->cgroups;
  	if (css == &init_css_set) {
  		res = &root->top_cgroup;
  	} else {
  		struct cg_cgroup_link *link;
  		list_for_each_entry(link, &css->cg_links, cg_link_list) {
  			struct cgroup *c = link->cgrp;
  			if (c->root == root) {
  				res = c;
  				break;
  			}
  		}
  	}
  	read_unlock(&css_set_lock);
  	BUG_ON(!res);
  	return res;
  }
  
  /*
ddbcc7e8e   Paul Menage   Task Control Grou...
652
653
654
655
656
657
658
659
660
   * There is one global cgroup mutex. We also require taking
   * task_lock() when dereferencing a task's cgroup subsys pointers.
   * See "The task_lock() exception", at the end of this comment.
   *
   * A task must hold cgroup_mutex to modify cgroups.
   *
   * Any task can increment and decrement the count field without lock.
   * So in general, code holding cgroup_mutex can't rely on the count
   * field not changing.  However, if the count goes to zero, then only
956db3ca0   Cliff Wickman   hotplug cpu: move...
661
   * cgroup_attach_task() can increment it again.  Because a count of zero
ddbcc7e8e   Paul Menage   Task Control Grou...
662
663
664
665
666
667
668
669
   * means that no tasks are currently attached, therefore there is no
   * way a task attached to that cgroup can fork (the other way to
   * increment the count).  So code holding cgroup_mutex can safely
   * assume that if the count is zero, it will stay zero. Similarly, if
   * a task holds cgroup_mutex on a cgroup with zero count, it
   * knows that the cgroup won't be removed, as cgroup_rmdir()
   * needs that mutex.
   *
ddbcc7e8e   Paul Menage   Task Control Grou...
670
671
672
673
674
   * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
   * (usually) take cgroup_mutex.  These are the two most performance
   * critical pieces of code here.  The exception occurs on cgroup_exit(),
   * when a task in a notify_on_release cgroup exits.  Then cgroup_mutex
   * is taken, and if the cgroup count is zero, a usermode call made
a043e3b2c   Li Zefan   cgroup: fix comments
675
676
   * to the release agent with the name of the cgroup (path relative to
   * the root of cgroup file system) as the argument.
ddbcc7e8e   Paul Menage   Task Control Grou...
677
678
679
680
681
682
683
684
685
686
687
   *
   * A cgroup can only be deleted if both its 'count' of using tasks
   * is zero, and its list of 'children' cgroups is empty.  Since all
   * tasks in the system use _some_ cgroup, and since there is always at
   * least one task in the system (init, pid == 1), therefore, top_cgroup
   * always has either children cgroups and/or using tasks.  So we don't
   * need a special hack to ensure that top_cgroup cannot be deleted.
   *
   *	The task_lock() exception
   *
   * The need for this exception arises from the action of
956db3ca0   Cliff Wickman   hotplug cpu: move...
688
   * cgroup_attach_task(), which overwrites one tasks cgroup pointer with
a043e3b2c   Li Zefan   cgroup: fix comments
689
   * another.  It does so using cgroup_mutex, however there are
ddbcc7e8e   Paul Menage   Task Control Grou...
690
691
692
   * several performance critical places that need to reference
   * task->cgroup without the expense of grabbing a system global
   * mutex.  Therefore except as noted below, when dereferencing or, as
956db3ca0   Cliff Wickman   hotplug cpu: move...
693
   * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use
ddbcc7e8e   Paul Menage   Task Control Grou...
694
695
696
697
   * task_lock(), which acts on a spinlock (task->alloc_lock) already in
   * the task_struct routinely used for such matters.
   *
   * P.S.  One more locking exception.  RCU is used to guard the
956db3ca0   Cliff Wickman   hotplug cpu: move...
698
   * update of a tasks cgroup pointer by cgroup_attach_task()
ddbcc7e8e   Paul Menage   Task Control Grou...
699
   */
ddbcc7e8e   Paul Menage   Task Control Grou...
700
701
702
703
  /**
   * cgroup_lock - lock out any changes to cgroup structures
   *
   */
ddbcc7e8e   Paul Menage   Task Control Grou...
704
705
706
707
  void cgroup_lock(void)
  {
  	mutex_lock(&cgroup_mutex);
  }
67523c48a   Ben Blum   cgroups: blkio su...
708
  EXPORT_SYMBOL_GPL(cgroup_lock);
ddbcc7e8e   Paul Menage   Task Control Grou...
709
710
711
712
713
714
  
  /**
   * cgroup_unlock - release lock on cgroup changes
   *
   * Undo the lock taken in a previous cgroup_lock() call.
   */
ddbcc7e8e   Paul Menage   Task Control Grou...
715
716
717
718
  void cgroup_unlock(void)
  {
  	mutex_unlock(&cgroup_mutex);
  }
67523c48a   Ben Blum   cgroups: blkio su...
719
  EXPORT_SYMBOL_GPL(cgroup_unlock);
ddbcc7e8e   Paul Menage   Task Control Grou...
720
721
722
723
724
725
726
727
728
729
  
  /*
   * A couple of forward declarations required, due to cyclic reference loop:
   * cgroup_mkdir -> cgroup_create -> cgroup_populate_dir ->
   * cgroup_add_file -> cgroup_create_file -> cgroup_dir_inode_operations
   * -> cgroup_mkdir.
   */
  
  static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
  static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
bd89aabc6   Paul Menage   Control groups: R...
730
  static int cgroup_populate_dir(struct cgroup *cgrp);
6e1d5dcc2   Alexey Dobriyan   const: mark remai...
731
  static const struct inode_operations cgroup_dir_inode_operations;
828c09509   Alexey Dobriyan   const: constify r...
732
  static const struct file_operations proc_cgroupstats_operations;
a424316ca   Paul Menage   Task Control Grou...
733
734
  
  static struct backing_dev_info cgroup_backing_dev_info = {
d993831fa   Jens Axboe   writeback: add na...
735
  	.name		= "cgroup",
e4ad08fe6   Miklos Szeredi   mm: bdi: add sepa...
736
  	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
a424316ca   Paul Menage   Task Control Grou...
737
  };
ddbcc7e8e   Paul Menage   Task Control Grou...
738

38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
739
740
  static int alloc_css_id(struct cgroup_subsys *ss,
  			struct cgroup *parent, struct cgroup *child);
ddbcc7e8e   Paul Menage   Task Control Grou...
741
742
743
  static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
  {
  	struct inode *inode = new_inode(sb);
ddbcc7e8e   Paul Menage   Task Control Grou...
744
745
746
  
  	if (inode) {
  		inode->i_mode = mode;
76aac0e9a   David Howells   CRED: Wrap task c...
747
748
  		inode->i_uid = current_fsuid();
  		inode->i_gid = current_fsgid();
ddbcc7e8e   Paul Menage   Task Control Grou...
749
750
751
752
753
  		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
  		inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
  	}
  	return inode;
  }
4fca88c87   KAMEZAWA Hiroyuki   memory cgroup enh...
754
755
756
757
  /*
   * Call subsys's pre_destroy handler.
   * This is called before css refcnt check.
   */
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
758
  static int cgroup_call_pre_destroy(struct cgroup *cgrp)
4fca88c87   KAMEZAWA Hiroyuki   memory cgroup enh...
759
760
  {
  	struct cgroup_subsys *ss;
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
761
  	int ret = 0;
4fca88c87   KAMEZAWA Hiroyuki   memory cgroup enh...
762
  	for_each_subsys(cgrp->root, ss)
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
763
764
765
  		if (ss->pre_destroy) {
  			ret = ss->pre_destroy(ss, cgrp);
  			if (ret)
4ab78683c   Kirill A. Shutemov   cgroups: fix race...
766
  				break;
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
767
  		}
0dea11687   Kirill A. Shutemov   cgroup: implement...
768

ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
769
  	return ret;
4fca88c87   KAMEZAWA Hiroyuki   memory cgroup enh...
770
  }
a47295e6b   Paul Menage   cgroups: make cgr...
771
772
773
774
775
776
  static void free_cgroup_rcu(struct rcu_head *obj)
  {
  	struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
  
  	kfree(cgrp);
  }
ddbcc7e8e   Paul Menage   Task Control Grou...
777
778
779
780
  static void cgroup_diput(struct dentry *dentry, struct inode *inode)
  {
  	/* is dentry a directory ? if so, kfree() associated cgroup */
  	if (S_ISDIR(inode->i_mode)) {
bd89aabc6   Paul Menage   Control groups: R...
781
  		struct cgroup *cgrp = dentry->d_fsdata;
8dc4f3e17   Paul Menage   cgroups: move cgr...
782
  		struct cgroup_subsys *ss;
bd89aabc6   Paul Menage   Control groups: R...
783
  		BUG_ON(!(cgroup_is_removed(cgrp)));
81a6a5cdd   Paul Menage   Task Control Grou...
784
785
786
787
788
789
790
  		/* It's possible for external users to be holding css
  		 * reference counts on a cgroup; css_put() needs to
  		 * be able to access the cgroup after decrementing
  		 * the reference count in order to know if it needs to
  		 * queue the cgroup to be handled by the release
  		 * agent */
  		synchronize_rcu();
8dc4f3e17   Paul Menage   cgroups: move cgr...
791
792
793
794
795
  
  		mutex_lock(&cgroup_mutex);
  		/*
  		 * Release the subsystem state objects.
  		 */
75139b827   Li Zefan   cgroups: remove s...
796
797
  		for_each_subsys(cgrp->root, ss)
  			ss->destroy(ss, cgrp);
8dc4f3e17   Paul Menage   cgroups: move cgr...
798
799
800
  
  		cgrp->root->number_of_cgroups--;
  		mutex_unlock(&cgroup_mutex);
a47295e6b   Paul Menage   cgroups: make cgr...
801
802
803
804
  		/*
  		 * Drop the active superblock reference that we took when we
  		 * created the cgroup
  		 */
8dc4f3e17   Paul Menage   cgroups: move cgr...
805
  		deactivate_super(cgrp->root->sb);
72a8cb30d   Ben Blum   cgroups: ensure c...
806
807
808
809
810
  		/*
  		 * if we're getting rid of the cgroup, refcount should ensure
  		 * that there are no pidlists left.
  		 */
  		BUG_ON(!list_empty(&cgrp->pidlists));
a47295e6b   Paul Menage   cgroups: make cgr...
811
  		call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
ddbcc7e8e   Paul Menage   Task Control Grou...
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
  	}
  	iput(inode);
  }
  
  static void remove_dir(struct dentry *d)
  {
  	struct dentry *parent = dget(d->d_parent);
  
  	d_delete(d);
  	simple_rmdir(parent->d_inode, d);
  	dput(parent);
  }
  
  static void cgroup_clear_directory(struct dentry *dentry)
  {
  	struct list_head *node;
  
  	BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
  	spin_lock(&dcache_lock);
  	node = dentry->d_subdirs.next;
  	while (node != &dentry->d_subdirs) {
  		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
  		list_del_init(node);
  		if (d->d_inode) {
  			/* This should never be called on a cgroup
  			 * directory with child cgroups */
  			BUG_ON(d->d_inode->i_mode & S_IFDIR);
  			d = dget_locked(d);
  			spin_unlock(&dcache_lock);
  			d_delete(d);
  			simple_unlink(dentry->d_inode, d);
  			dput(d);
  			spin_lock(&dcache_lock);
  		}
  		node = dentry->d_subdirs.next;
  	}
  	spin_unlock(&dcache_lock);
  }
  
  /*
   * NOTE : the dentry must have been dget()'ed
   */
  static void cgroup_d_remove_dir(struct dentry *dentry)
  {
  	cgroup_clear_directory(dentry);
  
  	spin_lock(&dcache_lock);
  	list_del_init(&dentry->d_u.d_child);
  	spin_unlock(&dcache_lock);
  	remove_dir(dentry);
  }
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
863
864
865
866
867
868
  /*
   * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
   * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
   * reference to css->refcnt. In general, this refcnt is expected to goes down
   * to zero, soon.
   *
887032670   KAMEZAWA Hiroyuki   cgroup avoid perm...
869
   * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
870
871
   */
  DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
887032670   KAMEZAWA Hiroyuki   cgroup avoid perm...
872
  static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
873
  {
887032670   KAMEZAWA Hiroyuki   cgroup avoid perm...
874
  	if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
875
876
  		wake_up_all(&cgroup_rmdir_waitq);
  }
887032670   KAMEZAWA Hiroyuki   cgroup avoid perm...
877
878
879
880
881
882
883
884
885
886
  void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
  {
  	css_get(css);
  }
  
  void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
  {
  	cgroup_wakeup_rmdir_waiter(css->cgroup);
  	css_put(css);
  }
aae8aab40   Ben Blum   cgroups: revamp s...
887
  /*
cf5d5941f   Ben Blum   cgroups: subsyste...
888
889
890
   * Call with cgroup_mutex held. Drops reference counts on modules, including
   * any duplicate ones that parse_cgroupfs_options took. If this function
   * returns an error, no reference counts are touched.
aae8aab40   Ben Blum   cgroups: revamp s...
891
   */
ddbcc7e8e   Paul Menage   Task Control Grou...
892
893
894
895
  static int rebind_subsystems(struct cgroupfs_root *root,
  			      unsigned long final_bits)
  {
  	unsigned long added_bits, removed_bits;
bd89aabc6   Paul Menage   Control groups: R...
896
  	struct cgroup *cgrp = &root->top_cgroup;
ddbcc7e8e   Paul Menage   Task Control Grou...
897
  	int i;
aae8aab40   Ben Blum   cgroups: revamp s...
898
  	BUG_ON(!mutex_is_locked(&cgroup_mutex));
ddbcc7e8e   Paul Menage   Task Control Grou...
899
900
901
902
  	removed_bits = root->actual_subsys_bits & ~final_bits;
  	added_bits = final_bits & ~root->actual_subsys_bits;
  	/* Check that any added subsystems are currently free */
  	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
8d53d55d2   Li Zefan   cgroup: fix subsy...
903
  		unsigned long bit = 1UL << i;
ddbcc7e8e   Paul Menage   Task Control Grou...
904
905
906
  		struct cgroup_subsys *ss = subsys[i];
  		if (!(bit & added_bits))
  			continue;
aae8aab40   Ben Blum   cgroups: revamp s...
907
908
909
910
911
912
  		/*
  		 * Nobody should tell us to do a subsys that doesn't exist:
  		 * parse_cgroupfs_options should catch that case and refcounts
  		 * ensure that subsystems won't disappear once selected.
  		 */
  		BUG_ON(ss == NULL);
ddbcc7e8e   Paul Menage   Task Control Grou...
913
914
915
916
917
918
919
920
921
922
  		if (ss->root != &rootnode) {
  			/* Subsystem isn't free */
  			return -EBUSY;
  		}
  	}
  
  	/* Currently we don't handle adding/removing subsystems when
  	 * any child cgroups exist. This is theoretically supportable
  	 * but involves complex error handling, so it's being left until
  	 * later */
307257cf4   Paul Menage   cgroups: fix a ra...
923
  	if (root->number_of_cgroups > 1)
ddbcc7e8e   Paul Menage   Task Control Grou...
924
925
926
927
928
929
930
931
  		return -EBUSY;
  
  	/* Process each subsystem */
  	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
  		struct cgroup_subsys *ss = subsys[i];
  		unsigned long bit = 1UL << i;
  		if (bit & added_bits) {
  			/* We're binding this subsystem to this hierarchy */
aae8aab40   Ben Blum   cgroups: revamp s...
932
  			BUG_ON(ss == NULL);
bd89aabc6   Paul Menage   Control groups: R...
933
  			BUG_ON(cgrp->subsys[i]);
ddbcc7e8e   Paul Menage   Task Control Grou...
934
935
  			BUG_ON(!dummytop->subsys[i]);
  			BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
999cd8a45   Paul Menage   cgroups: add a pe...
936
  			mutex_lock(&ss->hierarchy_mutex);
bd89aabc6   Paul Menage   Control groups: R...
937
938
  			cgrp->subsys[i] = dummytop->subsys[i];
  			cgrp->subsys[i]->cgroup = cgrp;
33a68ac1c   Li Zefan   cgroups: add inac...
939
  			list_move(&ss->sibling, &root->subsys_list);
b2aa30f7b   Lai Jiangshan   cgroups: don't pu...
940
  			ss->root = root;
ddbcc7e8e   Paul Menage   Task Control Grou...
941
  			if (ss->bind)
bd89aabc6   Paul Menage   Control groups: R...
942
  				ss->bind(ss, cgrp);
999cd8a45   Paul Menage   cgroups: add a pe...
943
  			mutex_unlock(&ss->hierarchy_mutex);
cf5d5941f   Ben Blum   cgroups: subsyste...
944
  			/* refcount was already taken, and we're keeping it */
ddbcc7e8e   Paul Menage   Task Control Grou...
945
946
  		} else if (bit & removed_bits) {
  			/* We're removing this subsystem */
aae8aab40   Ben Blum   cgroups: revamp s...
947
  			BUG_ON(ss == NULL);
bd89aabc6   Paul Menage   Control groups: R...
948
949
  			BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
  			BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
999cd8a45   Paul Menage   cgroups: add a pe...
950
  			mutex_lock(&ss->hierarchy_mutex);
ddbcc7e8e   Paul Menage   Task Control Grou...
951
952
953
  			if (ss->bind)
  				ss->bind(ss, dummytop);
  			dummytop->subsys[i]->cgroup = dummytop;
bd89aabc6   Paul Menage   Control groups: R...
954
  			cgrp->subsys[i] = NULL;
b2aa30f7b   Lai Jiangshan   cgroups: don't pu...
955
  			subsys[i]->root = &rootnode;
33a68ac1c   Li Zefan   cgroups: add inac...
956
  			list_move(&ss->sibling, &rootnode.subsys_list);
999cd8a45   Paul Menage   cgroups: add a pe...
957
  			mutex_unlock(&ss->hierarchy_mutex);
cf5d5941f   Ben Blum   cgroups: subsyste...
958
959
  			/* subsystem is now free - drop reference on module */
  			module_put(ss->module);
ddbcc7e8e   Paul Menage   Task Control Grou...
960
961
  		} else if (bit & final_bits) {
  			/* Subsystem state should already exist */
aae8aab40   Ben Blum   cgroups: revamp s...
962
  			BUG_ON(ss == NULL);
bd89aabc6   Paul Menage   Control groups: R...
963
  			BUG_ON(!cgrp->subsys[i]);
cf5d5941f   Ben Blum   cgroups: subsyste...
964
965
966
967
968
969
970
971
  			/*
  			 * a refcount was taken, but we already had one, so
  			 * drop the extra reference.
  			 */
  			module_put(ss->module);
  #ifdef CONFIG_MODULE_UNLOAD
  			BUG_ON(ss->module && !module_refcount(ss->module));
  #endif
ddbcc7e8e   Paul Menage   Task Control Grou...
972
973
  		} else {
  			/* Subsystem state shouldn't exist */
bd89aabc6   Paul Menage   Control groups: R...
974
  			BUG_ON(cgrp->subsys[i]);
ddbcc7e8e   Paul Menage   Task Control Grou...
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
  		}
  	}
  	root->subsys_bits = root->actual_subsys_bits = final_bits;
  	synchronize_rcu();
  
  	return 0;
  }
  
  static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
  {
  	struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
  	struct cgroup_subsys *ss;
  
  	mutex_lock(&cgroup_mutex);
  	for_each_subsys(root, ss)
  		seq_printf(seq, ",%s", ss->name);
  	if (test_bit(ROOT_NOPREFIX, &root->flags))
  		seq_puts(seq, ",noprefix");
81a6a5cdd   Paul Menage   Task Control Grou...
993
994
  	if (strlen(root->release_agent_path))
  		seq_printf(seq, ",release_agent=%s", root->release_agent_path);
c6d57f331   Paul Menage   cgroups: support ...
995
996
  	if (strlen(root->name))
  		seq_printf(seq, ",name=%s", root->name);
ddbcc7e8e   Paul Menage   Task Control Grou...
997
998
999
1000
1001
1002
1003
  	mutex_unlock(&cgroup_mutex);
  	return 0;
  }
  
  struct cgroup_sb_opts {
  	unsigned long subsys_bits;
  	unsigned long flags;
81a6a5cdd   Paul Menage   Task Control Grou...
1004
  	char *release_agent;
c6d57f331   Paul Menage   cgroups: support ...
1005
  	char *name;
2c6ab6d20   Paul Menage   cgroups: allow cg...
1006
1007
  	/* User explicitly requested empty subsystem */
  	bool none;
c6d57f331   Paul Menage   cgroups: support ...
1008
1009
  
  	struct cgroupfs_root *new_root;
2c6ab6d20   Paul Menage   cgroups: allow cg...
1010

ddbcc7e8e   Paul Menage   Task Control Grou...
1011
  };
aae8aab40   Ben Blum   cgroups: revamp s...
1012
1013
  /*
   * Convert a hierarchy specifier into a bitmask of subsystems and flags. Call
cf5d5941f   Ben Blum   cgroups: subsyste...
1014
1015
1016
   * with cgroup_mutex held to protect the subsys[] array. This function takes
   * refcounts on subsystems to be used, unless it returns error, in which case
   * no refcounts are taken.
aae8aab40   Ben Blum   cgroups: revamp s...
1017
   */
cf5d5941f   Ben Blum   cgroups: subsyste...
1018
  static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
ddbcc7e8e   Paul Menage   Task Control Grou...
1019
1020
  {
  	char *token, *o = data ?: "all";
f9ab5b5b0   Li Zefan   cgroups: forbid n...
1021
  	unsigned long mask = (unsigned long)-1;
cf5d5941f   Ben Blum   cgroups: subsyste...
1022
1023
  	int i;
  	bool module_pin_failed = false;
f9ab5b5b0   Li Zefan   cgroups: forbid n...
1024

aae8aab40   Ben Blum   cgroups: revamp s...
1025
  	BUG_ON(!mutex_is_locked(&cgroup_mutex));
f9ab5b5b0   Li Zefan   cgroups: forbid n...
1026
1027
1028
  #ifdef CONFIG_CPUSETS
  	mask = ~(1UL << cpuset_subsys_id);
  #endif
ddbcc7e8e   Paul Menage   Task Control Grou...
1029

c6d57f331   Paul Menage   cgroups: support ...
1030
  	memset(opts, 0, sizeof(*opts));
ddbcc7e8e   Paul Menage   Task Control Grou...
1031
1032
1033
1034
1035
  
  	while ((token = strsep(&o, ",")) != NULL) {
  		if (!*token)
  			return -EINVAL;
  		if (!strcmp(token, "all")) {
8bab8dded   Paul Menage   cgroups: add cgro...
1036
  			/* Add all non-disabled subsystems */
8bab8dded   Paul Menage   cgroups: add cgro...
1037
1038
1039
  			opts->subsys_bits = 0;
  			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
  				struct cgroup_subsys *ss = subsys[i];
aae8aab40   Ben Blum   cgroups: revamp s...
1040
1041
  				if (ss == NULL)
  					continue;
8bab8dded   Paul Menage   cgroups: add cgro...
1042
1043
1044
  				if (!ss->disabled)
  					opts->subsys_bits |= 1ul << i;
  			}
2c6ab6d20   Paul Menage   cgroups: allow cg...
1045
1046
1047
  		} else if (!strcmp(token, "none")) {
  			/* Explicitly have no subsystems */
  			opts->none = true;
ddbcc7e8e   Paul Menage   Task Control Grou...
1048
1049
  		} else if (!strcmp(token, "noprefix")) {
  			set_bit(ROOT_NOPREFIX, &opts->flags);
81a6a5cdd   Paul Menage   Task Control Grou...
1050
1051
1052
1053
  		} else if (!strncmp(token, "release_agent=", 14)) {
  			/* Specifying two release agents is forbidden */
  			if (opts->release_agent)
  				return -EINVAL;
c6d57f331   Paul Menage   cgroups: support ...
1054
1055
  			opts->release_agent =
  				kstrndup(token + 14, PATH_MAX, GFP_KERNEL);
81a6a5cdd   Paul Menage   Task Control Grou...
1056
1057
  			if (!opts->release_agent)
  				return -ENOMEM;
c6d57f331   Paul Menage   cgroups: support ...
1058
  		} else if (!strncmp(token, "name=", 5)) {
c6d57f331   Paul Menage   cgroups: support ...
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
  			const char *name = token + 5;
  			/* Can't specify an empty name */
  			if (!strlen(name))
  				return -EINVAL;
  			/* Must match [\w.-]+ */
  			for (i = 0; i < strlen(name); i++) {
  				char c = name[i];
  				if (isalnum(c))
  					continue;
  				if ((c == '.') || (c == '-') || (c == '_'))
  					continue;
  				return -EINVAL;
  			}
  			/* Specifying two names is forbidden */
  			if (opts->name)
  				return -EINVAL;
  			opts->name = kstrndup(name,
  					      MAX_CGROUP_ROOT_NAMELEN,
  					      GFP_KERNEL);
  			if (!opts->name)
  				return -ENOMEM;
ddbcc7e8e   Paul Menage   Task Control Grou...
1080
1081
  		} else {
  			struct cgroup_subsys *ss;
ddbcc7e8e   Paul Menage   Task Control Grou...
1082
1083
  			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
  				ss = subsys[i];
aae8aab40   Ben Blum   cgroups: revamp s...
1084
1085
  				if (ss == NULL)
  					continue;
ddbcc7e8e   Paul Menage   Task Control Grou...
1086
  				if (!strcmp(token, ss->name)) {
8bab8dded   Paul Menage   cgroups: add cgro...
1087
1088
  					if (!ss->disabled)
  						set_bit(i, &opts->subsys_bits);
ddbcc7e8e   Paul Menage   Task Control Grou...
1089
1090
1091
1092
1093
1094
1095
  					break;
  				}
  			}
  			if (i == CGROUP_SUBSYS_COUNT)
  				return -ENOENT;
  		}
  	}
2c6ab6d20   Paul Menage   cgroups: allow cg...
1096
  	/* Consistency checks */
f9ab5b5b0   Li Zefan   cgroups: forbid n...
1097
1098
1099
1100
1101
1102
1103
1104
  	/*
  	 * Option noprefix was introduced just for backward compatibility
  	 * with the old cpuset, so we allow noprefix only if mounting just
  	 * the cpuset subsystem.
  	 */
  	if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
  	    (opts->subsys_bits & mask))
  		return -EINVAL;
2c6ab6d20   Paul Menage   cgroups: allow cg...
1105
1106
1107
1108
1109
1110
1111
1112
1113
  
  	/* Can't specify "none" and some subsystems */
  	if (opts->subsys_bits && opts->none)
  		return -EINVAL;
  
  	/*
  	 * We either have to specify by name or by subsystems. (So all
  	 * empty hierarchies must have a name).
  	 */
c6d57f331   Paul Menage   cgroups: support ...
1114
  	if (!opts->subsys_bits && !opts->name)
ddbcc7e8e   Paul Menage   Task Control Grou...
1115
  		return -EINVAL;
cf5d5941f   Ben Blum   cgroups: subsyste...
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
  	/*
  	 * Grab references on all the modules we'll need, so the subsystems
  	 * don't dance around before rebind_subsystems attaches them. This may
  	 * take duplicate reference counts on a subsystem that's already used,
  	 * but rebind_subsystems handles this case.
  	 */
  	for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
  		unsigned long bit = 1UL << i;
  
  		if (!(bit & opts->subsys_bits))
  			continue;
  		if (!try_module_get(subsys[i]->module)) {
  			module_pin_failed = true;
  			break;
  		}
  	}
  	if (module_pin_failed) {
  		/*
  		 * oops, one of the modules was going away. this means that we
  		 * raced with a module_delete call, and to the user this is
  		 * essentially a "subsystem doesn't exist" case.
  		 */
  		for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) {
  			/* drop refcounts only on the ones we took */
  			unsigned long bit = 1UL << i;
  
  			if (!(bit & opts->subsys_bits))
  				continue;
  			module_put(subsys[i]->module);
  		}
  		return -ENOENT;
  	}
ddbcc7e8e   Paul Menage   Task Control Grou...
1148
1149
  	return 0;
  }
cf5d5941f   Ben Blum   cgroups: subsyste...
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
  static void drop_parsed_module_refcounts(unsigned long subsys_bits)
  {
  	int i;
  	for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
  		unsigned long bit = 1UL << i;
  
  		if (!(bit & subsys_bits))
  			continue;
  		module_put(subsys[i]->module);
  	}
  }
ddbcc7e8e   Paul Menage   Task Control Grou...
1161
1162
1163
1164
  static int cgroup_remount(struct super_block *sb, int *flags, char *data)
  {
  	int ret = 0;
  	struct cgroupfs_root *root = sb->s_fs_info;
bd89aabc6   Paul Menage   Control groups: R...
1165
  	struct cgroup *cgrp = &root->top_cgroup;
ddbcc7e8e   Paul Menage   Task Control Grou...
1166
  	struct cgroup_sb_opts opts;
337eb00a2   Alessio Igor Bogani   Push BKL down int...
1167
  	lock_kernel();
bd89aabc6   Paul Menage   Control groups: R...
1168
  	mutex_lock(&cgrp->dentry->d_inode->i_mutex);
ddbcc7e8e   Paul Menage   Task Control Grou...
1169
1170
1171
1172
1173
1174
  	mutex_lock(&cgroup_mutex);
  
  	/* See what subsystems are wanted */
  	ret = parse_cgroupfs_options(data, &opts);
  	if (ret)
  		goto out_unlock;
cf5d5941f   Ben Blum   cgroups: subsyste...
1175
1176
1177
  	/* Don't allow flags or name to change at remount */
  	if (opts.flags != root->flags ||
  	    (opts.name && strcmp(opts.name, root->name))) {
c6d57f331   Paul Menage   cgroups: support ...
1178
  		ret = -EINVAL;
cf5d5941f   Ben Blum   cgroups: subsyste...
1179
  		drop_parsed_module_refcounts(opts.subsys_bits);
c6d57f331   Paul Menage   cgroups: support ...
1180
1181
  		goto out_unlock;
  	}
ddbcc7e8e   Paul Menage   Task Control Grou...
1182
  	ret = rebind_subsystems(root, opts.subsys_bits);
cf5d5941f   Ben Blum   cgroups: subsyste...
1183
1184
  	if (ret) {
  		drop_parsed_module_refcounts(opts.subsys_bits);
0670e08bd   Li Zefan   cgroups: don't ch...
1185
  		goto out_unlock;
cf5d5941f   Ben Blum   cgroups: subsyste...
1186
  	}
ddbcc7e8e   Paul Menage   Task Control Grou...
1187
1188
  
  	/* (re)populate subsystem files */
0670e08bd   Li Zefan   cgroups: don't ch...
1189
  	cgroup_populate_dir(cgrp);
ddbcc7e8e   Paul Menage   Task Control Grou...
1190

81a6a5cdd   Paul Menage   Task Control Grou...
1191
1192
  	if (opts.release_agent)
  		strcpy(root->release_agent_path, opts.release_agent);
ddbcc7e8e   Paul Menage   Task Control Grou...
1193
   out_unlock:
66bdc9cfc   Jesper Juhl   kernel/cgroup.c: ...
1194
  	kfree(opts.release_agent);
c6d57f331   Paul Menage   cgroups: support ...
1195
  	kfree(opts.name);
ddbcc7e8e   Paul Menage   Task Control Grou...
1196
  	mutex_unlock(&cgroup_mutex);
bd89aabc6   Paul Menage   Control groups: R...
1197
  	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
337eb00a2   Alessio Igor Bogani   Push BKL down int...
1198
  	unlock_kernel();
ddbcc7e8e   Paul Menage   Task Control Grou...
1199
1200
  	return ret;
  }
b87221de6   Alexey Dobriyan   const: mark remai...
1201
  static const struct super_operations cgroup_ops = {
ddbcc7e8e   Paul Menage   Task Control Grou...
1202
1203
1204
1205
1206
  	.statfs = simple_statfs,
  	.drop_inode = generic_delete_inode,
  	.show_options = cgroup_show_options,
  	.remount_fs = cgroup_remount,
  };
cc31edcee   Paul Menage   cgroups: convert ...
1207
1208
1209
1210
1211
1212
  static void init_cgroup_housekeeping(struct cgroup *cgrp)
  {
  	INIT_LIST_HEAD(&cgrp->sibling);
  	INIT_LIST_HEAD(&cgrp->children);
  	INIT_LIST_HEAD(&cgrp->css_sets);
  	INIT_LIST_HEAD(&cgrp->release_list);
72a8cb30d   Ben Blum   cgroups: ensure c...
1213
1214
  	INIT_LIST_HEAD(&cgrp->pidlists);
  	mutex_init(&cgrp->pidlist_mutex);
0dea11687   Kirill A. Shutemov   cgroup: implement...
1215
1216
  	INIT_LIST_HEAD(&cgrp->event_list);
  	spin_lock_init(&cgrp->event_list_lock);
cc31edcee   Paul Menage   cgroups: convert ...
1217
  }
c6d57f331   Paul Menage   cgroups: support ...
1218

ddbcc7e8e   Paul Menage   Task Control Grou...
1219
1220
  static void init_cgroup_root(struct cgroupfs_root *root)
  {
bd89aabc6   Paul Menage   Control groups: R...
1221
  	struct cgroup *cgrp = &root->top_cgroup;
ddbcc7e8e   Paul Menage   Task Control Grou...
1222
1223
1224
  	INIT_LIST_HEAD(&root->subsys_list);
  	INIT_LIST_HEAD(&root->root_list);
  	root->number_of_cgroups = 1;
bd89aabc6   Paul Menage   Control groups: R...
1225
1226
  	cgrp->root = root;
  	cgrp->top_cgroup = cgrp;
cc31edcee   Paul Menage   cgroups: convert ...
1227
  	init_cgroup_housekeeping(cgrp);
ddbcc7e8e   Paul Menage   Task Control Grou...
1228
  }
2c6ab6d20   Paul Menage   cgroups: allow cg...
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
  static bool init_root_id(struct cgroupfs_root *root)
  {
  	int ret = 0;
  
  	do {
  		if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
  			return false;
  		spin_lock(&hierarchy_id_lock);
  		/* Try to allocate the next unused ID */
  		ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
  					&root->hierarchy_id);
  		if (ret == -ENOSPC)
  			/* Try again starting from 0 */
  			ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
  		if (!ret) {
  			next_hierarchy_id = root->hierarchy_id + 1;
  		} else if (ret != -EAGAIN) {
  			/* Can only get here if the 31-bit IDR is full ... */
  			BUG_ON(ret);
  		}
  		spin_unlock(&hierarchy_id_lock);
  	} while (ret);
  	return true;
  }
ddbcc7e8e   Paul Menage   Task Control Grou...
1253
1254
  static int cgroup_test_super(struct super_block *sb, void *data)
  {
c6d57f331   Paul Menage   cgroups: support ...
1255
  	struct cgroup_sb_opts *opts = data;
ddbcc7e8e   Paul Menage   Task Control Grou...
1256
  	struct cgroupfs_root *root = sb->s_fs_info;
c6d57f331   Paul Menage   cgroups: support ...
1257
1258
1259
  	/* If we asked for a name then it must match */
  	if (opts->name && strcmp(opts->name, root->name))
  		return 0;
ddbcc7e8e   Paul Menage   Task Control Grou...
1260

2c6ab6d20   Paul Menage   cgroups: allow cg...
1261
1262
1263
1264
1265
1266
  	/*
  	 * If we asked for subsystems (or explicitly for no
  	 * subsystems) then they must match
  	 */
  	if ((opts->subsys_bits || opts->none)
  	    && (opts->subsys_bits != root->subsys_bits))
ddbcc7e8e   Paul Menage   Task Control Grou...
1267
1268
1269
1270
  		return 0;
  
  	return 1;
  }
c6d57f331   Paul Menage   cgroups: support ...
1271
1272
1273
  static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
  {
  	struct cgroupfs_root *root;
2c6ab6d20   Paul Menage   cgroups: allow cg...
1274
  	if (!opts->subsys_bits && !opts->none)
c6d57f331   Paul Menage   cgroups: support ...
1275
1276
1277
1278
1279
  		return NULL;
  
  	root = kzalloc(sizeof(*root), GFP_KERNEL);
  	if (!root)
  		return ERR_PTR(-ENOMEM);
2c6ab6d20   Paul Menage   cgroups: allow cg...
1280
1281
1282
1283
  	if (!init_root_id(root)) {
  		kfree(root);
  		return ERR_PTR(-ENOMEM);
  	}
c6d57f331   Paul Menage   cgroups: support ...
1284
  	init_cgroup_root(root);
2c6ab6d20   Paul Menage   cgroups: allow cg...
1285

c6d57f331   Paul Menage   cgroups: support ...
1286
1287
1288
1289
1290
1291
1292
1293
  	root->subsys_bits = opts->subsys_bits;
  	root->flags = opts->flags;
  	if (opts->release_agent)
  		strcpy(root->release_agent_path, opts->release_agent);
  	if (opts->name)
  		strcpy(root->name, opts->name);
  	return root;
  }
2c6ab6d20   Paul Menage   cgroups: allow cg...
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
  static void cgroup_drop_root(struct cgroupfs_root *root)
  {
  	if (!root)
  		return;
  
  	BUG_ON(!root->hierarchy_id);
  	spin_lock(&hierarchy_id_lock);
  	ida_remove(&hierarchy_ida, root->hierarchy_id);
  	spin_unlock(&hierarchy_id_lock);
  	kfree(root);
  }
ddbcc7e8e   Paul Menage   Task Control Grou...
1305
1306
1307
  static int cgroup_set_super(struct super_block *sb, void *data)
  {
  	int ret;
c6d57f331   Paul Menage   cgroups: support ...
1308
1309
1310
1311
1312
  	struct cgroup_sb_opts *opts = data;
  
  	/* If we don't have a new root, we can't set up a new sb */
  	if (!opts->new_root)
  		return -EINVAL;
2c6ab6d20   Paul Menage   cgroups: allow cg...
1313
  	BUG_ON(!opts->subsys_bits && !opts->none);
ddbcc7e8e   Paul Menage   Task Control Grou...
1314
1315
1316
1317
  
  	ret = set_anon_super(sb, NULL);
  	if (ret)
  		return ret;
c6d57f331   Paul Menage   cgroups: support ...
1318
1319
  	sb->s_fs_info = opts->new_root;
  	opts->new_root->sb = sb;
ddbcc7e8e   Paul Menage   Task Control Grou...
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
  
  	sb->s_blocksize = PAGE_CACHE_SIZE;
  	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
  	sb->s_magic = CGROUP_SUPER_MAGIC;
  	sb->s_op = &cgroup_ops;
  
  	return 0;
  }
  
  static int cgroup_get_rootdir(struct super_block *sb)
  {
  	struct inode *inode =
  		cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
  	struct dentry *dentry;
  
  	if (!inode)
  		return -ENOMEM;
ddbcc7e8e   Paul Menage   Task Control Grou...
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
  	inode->i_fop = &simple_dir_operations;
  	inode->i_op = &cgroup_dir_inode_operations;
  	/* directories start off with i_nlink == 2 (for "." entry) */
  	inc_nlink(inode);
  	dentry = d_alloc_root(inode);
  	if (!dentry) {
  		iput(inode);
  		return -ENOMEM;
  	}
  	sb->s_root = dentry;
  	return 0;
  }
  
  static int cgroup_get_sb(struct file_system_type *fs_type,
  			 int flags, const char *unused_dev_name,
  			 void *data, struct vfsmount *mnt)
  {
  	struct cgroup_sb_opts opts;
c6d57f331   Paul Menage   cgroups: support ...
1355
  	struct cgroupfs_root *root;
ddbcc7e8e   Paul Menage   Task Control Grou...
1356
1357
  	int ret = 0;
  	struct super_block *sb;
c6d57f331   Paul Menage   cgroups: support ...
1358
  	struct cgroupfs_root *new_root;
ddbcc7e8e   Paul Menage   Task Control Grou...
1359
1360
  
  	/* First find the desired set of subsystems */
aae8aab40   Ben Blum   cgroups: revamp s...
1361
  	mutex_lock(&cgroup_mutex);
ddbcc7e8e   Paul Menage   Task Control Grou...
1362
  	ret = parse_cgroupfs_options(data, &opts);
aae8aab40   Ben Blum   cgroups: revamp s...
1363
  	mutex_unlock(&cgroup_mutex);
c6d57f331   Paul Menage   cgroups: support ...
1364
1365
  	if (ret)
  		goto out_err;
ddbcc7e8e   Paul Menage   Task Control Grou...
1366

c6d57f331   Paul Menage   cgroups: support ...
1367
1368
1369
1370
1371
1372
1373
  	/*
  	 * Allocate a new cgroup root. We may not need it if we're
  	 * reusing an existing hierarchy.
  	 */
  	new_root = cgroup_root_from_opts(&opts);
  	if (IS_ERR(new_root)) {
  		ret = PTR_ERR(new_root);
cf5d5941f   Ben Blum   cgroups: subsyste...
1374
  		goto drop_modules;
81a6a5cdd   Paul Menage   Task Control Grou...
1375
  	}
c6d57f331   Paul Menage   cgroups: support ...
1376
  	opts.new_root = new_root;
ddbcc7e8e   Paul Menage   Task Control Grou...
1377

c6d57f331   Paul Menage   cgroups: support ...
1378
1379
  	/* Locate an existing or new sb for this hierarchy */
  	sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
ddbcc7e8e   Paul Menage   Task Control Grou...
1380
  	if (IS_ERR(sb)) {
c6d57f331   Paul Menage   cgroups: support ...
1381
  		ret = PTR_ERR(sb);
2c6ab6d20   Paul Menage   cgroups: allow cg...
1382
  		cgroup_drop_root(opts.new_root);
cf5d5941f   Ben Blum   cgroups: subsyste...
1383
  		goto drop_modules;
ddbcc7e8e   Paul Menage   Task Control Grou...
1384
  	}
c6d57f331   Paul Menage   cgroups: support ...
1385
1386
1387
1388
1389
  	root = sb->s_fs_info;
  	BUG_ON(!root);
  	if (root == opts.new_root) {
  		/* We used the new root structure, so this is a new hierarchy */
  		struct list_head tmp_cg_links;
c12f65d43   Li Zefan   cgroups: introduc...
1390
  		struct cgroup *root_cgrp = &root->top_cgroup;
817929ec2   Paul Menage   Task Control Grou...
1391
  		struct inode *inode;
c6d57f331   Paul Menage   cgroups: support ...
1392
  		struct cgroupfs_root *existing_root;
28fd5dfc1   Li Zefan   cgroups: remove t...
1393
  		int i;
ddbcc7e8e   Paul Menage   Task Control Grou...
1394
1395
1396
1397
1398
1399
  
  		BUG_ON(sb->s_root != NULL);
  
  		ret = cgroup_get_rootdir(sb);
  		if (ret)
  			goto drop_new_super;
817929ec2   Paul Menage   Task Control Grou...
1400
  		inode = sb->s_root->d_inode;
ddbcc7e8e   Paul Menage   Task Control Grou...
1401

817929ec2   Paul Menage   Task Control Grou...
1402
  		mutex_lock(&inode->i_mutex);
ddbcc7e8e   Paul Menage   Task Control Grou...
1403
  		mutex_lock(&cgroup_mutex);
c6d57f331   Paul Menage   cgroups: support ...
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
  		if (strlen(root->name)) {
  			/* Check for name clashes with existing mounts */
  			for_each_active_root(existing_root) {
  				if (!strcmp(existing_root->name, root->name)) {
  					ret = -EBUSY;
  					mutex_unlock(&cgroup_mutex);
  					mutex_unlock(&inode->i_mutex);
  					goto drop_new_super;
  				}
  			}
  		}
817929ec2   Paul Menage   Task Control Grou...
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
  		/*
  		 * We're accessing css_set_count without locking
  		 * css_set_lock here, but that's OK - it can only be
  		 * increased by someone holding cgroup_lock, and
  		 * that's us. The worst that can happen is that we
  		 * have some link structures left over
  		 */
  		ret = allocate_cg_links(css_set_count, &tmp_cg_links);
  		if (ret) {
  			mutex_unlock(&cgroup_mutex);
  			mutex_unlock(&inode->i_mutex);
  			goto drop_new_super;
  		}
ddbcc7e8e   Paul Menage   Task Control Grou...
1428
1429
1430
  		ret = rebind_subsystems(root, root->subsys_bits);
  		if (ret == -EBUSY) {
  			mutex_unlock(&cgroup_mutex);
817929ec2   Paul Menage   Task Control Grou...
1431
  			mutex_unlock(&inode->i_mutex);
c6d57f331   Paul Menage   cgroups: support ...
1432
1433
  			free_cg_links(&tmp_cg_links);
  			goto drop_new_super;
ddbcc7e8e   Paul Menage   Task Control Grou...
1434
  		}
cf5d5941f   Ben Blum   cgroups: subsyste...
1435
1436
1437
1438
1439
  		/*
  		 * There must be no failure case after here, since rebinding
  		 * takes care of subsystems' refcounts, which are explicitly
  		 * dropped in the failure exit path.
  		 */
ddbcc7e8e   Paul Menage   Task Control Grou...
1440
1441
1442
1443
1444
  
  		/* EBUSY should be the only error here */
  		BUG_ON(ret);
  
  		list_add(&root->root_list, &roots);
817929ec2   Paul Menage   Task Control Grou...
1445
  		root_count++;
ddbcc7e8e   Paul Menage   Task Control Grou...
1446

c12f65d43   Li Zefan   cgroups: introduc...
1447
  		sb->s_root->d_fsdata = root_cgrp;
ddbcc7e8e   Paul Menage   Task Control Grou...
1448
  		root->top_cgroup.dentry = sb->s_root;
817929ec2   Paul Menage   Task Control Grou...
1449
1450
1451
  		/* Link the top cgroup in this hierarchy into all
  		 * the css_set objects */
  		write_lock(&css_set_lock);
28fd5dfc1   Li Zefan   cgroups: remove t...
1452
1453
1454
  		for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
  			struct hlist_head *hhead = &css_set_table[i];
  			struct hlist_node *node;
817929ec2   Paul Menage   Task Control Grou...
1455
  			struct css_set *cg;
28fd5dfc1   Li Zefan   cgroups: remove t...
1456

c12f65d43   Li Zefan   cgroups: introduc...
1457
1458
  			hlist_for_each_entry(cg, node, hhead, hlist)
  				link_css_set(&tmp_cg_links, cg, root_cgrp);
28fd5dfc1   Li Zefan   cgroups: remove t...
1459
  		}
817929ec2   Paul Menage   Task Control Grou...
1460
1461
1462
  		write_unlock(&css_set_lock);
  
  		free_cg_links(&tmp_cg_links);
c12f65d43   Li Zefan   cgroups: introduc...
1463
1464
  		BUG_ON(!list_empty(&root_cgrp->sibling));
  		BUG_ON(!list_empty(&root_cgrp->children));
ddbcc7e8e   Paul Menage   Task Control Grou...
1465
  		BUG_ON(root->number_of_cgroups != 1);
c12f65d43   Li Zefan   cgroups: introduc...
1466
  		cgroup_populate_dir(root_cgrp);
ddbcc7e8e   Paul Menage   Task Control Grou...
1467
  		mutex_unlock(&cgroup_mutex);
34f77a90f   Xiaotian Feng   cgroups: make unl...
1468
  		mutex_unlock(&inode->i_mutex);
c6d57f331   Paul Menage   cgroups: support ...
1469
1470
1471
1472
1473
  	} else {
  		/*
  		 * We re-used an existing hierarchy - the new root (if
  		 * any) is not needed
  		 */
2c6ab6d20   Paul Menage   cgroups: allow cg...
1474
  		cgroup_drop_root(opts.new_root);
cf5d5941f   Ben Blum   cgroups: subsyste...
1475
1476
  		/* no subsys rebinding, so refcounts don't change */
  		drop_parsed_module_refcounts(opts.subsys_bits);
ddbcc7e8e   Paul Menage   Task Control Grou...
1477
  	}
a3ec947c8   Sukadev Bhattiprolu   vfs: simple_set_m...
1478
  	simple_set_mnt(mnt, sb);
c6d57f331   Paul Menage   cgroups: support ...
1479
1480
  	kfree(opts.release_agent);
  	kfree(opts.name);
a3ec947c8   Sukadev Bhattiprolu   vfs: simple_set_m...
1481
  	return 0;
ddbcc7e8e   Paul Menage   Task Control Grou...
1482
1483
  
   drop_new_super:
6f5bbff9a   Al Viro   Convert obvious p...
1484
  	deactivate_locked_super(sb);
cf5d5941f   Ben Blum   cgroups: subsyste...
1485
1486
   drop_modules:
  	drop_parsed_module_refcounts(opts.subsys_bits);
c6d57f331   Paul Menage   cgroups: support ...
1487
1488
1489
   out_err:
  	kfree(opts.release_agent);
  	kfree(opts.name);
ddbcc7e8e   Paul Menage   Task Control Grou...
1490
1491
1492
1493
1494
  	return ret;
  }
  
  static void cgroup_kill_sb(struct super_block *sb) {
  	struct cgroupfs_root *root = sb->s_fs_info;
bd89aabc6   Paul Menage   Control groups: R...
1495
  	struct cgroup *cgrp = &root->top_cgroup;
ddbcc7e8e   Paul Menage   Task Control Grou...
1496
  	int ret;
71cbb949d   KOSAKI Motohiro   cgroup: list_for_...
1497
1498
  	struct cg_cgroup_link *link;
  	struct cg_cgroup_link *saved_link;
ddbcc7e8e   Paul Menage   Task Control Grou...
1499
1500
1501
1502
  
  	BUG_ON(!root);
  
  	BUG_ON(root->number_of_cgroups != 1);
bd89aabc6   Paul Menage   Control groups: R...
1503
1504
  	BUG_ON(!list_empty(&cgrp->children));
  	BUG_ON(!list_empty(&cgrp->sibling));
ddbcc7e8e   Paul Menage   Task Control Grou...
1505
1506
1507
1508
1509
1510
1511
  
  	mutex_lock(&cgroup_mutex);
  
  	/* Rebind all subsystems back to the default hierarchy */
  	ret = rebind_subsystems(root, 0);
  	/* Shouldn't be able to fail ... */
  	BUG_ON(ret);
817929ec2   Paul Menage   Task Control Grou...
1512
1513
1514
1515
1516
  	/*
  	 * Release all the links from css_sets to this hierarchy's
  	 * root cgroup
  	 */
  	write_lock(&css_set_lock);
71cbb949d   KOSAKI Motohiro   cgroup: list_for_...
1517
1518
1519
  
  	list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
  				 cgrp_link_list) {
817929ec2   Paul Menage   Task Control Grou...
1520
  		list_del(&link->cg_link_list);
bd89aabc6   Paul Menage   Control groups: R...
1521
  		list_del(&link->cgrp_link_list);
817929ec2   Paul Menage   Task Control Grou...
1522
1523
1524
  		kfree(link);
  	}
  	write_unlock(&css_set_lock);
839ec5452   Paul Menage   cgroup: fix root_...
1525
1526
1527
1528
  	if (!list_empty(&root->root_list)) {
  		list_del(&root->root_list);
  		root_count--;
  	}
e5f6a8609   Li Zefan   cgroups: make roo...
1529

ddbcc7e8e   Paul Menage   Task Control Grou...
1530
  	mutex_unlock(&cgroup_mutex);
ddbcc7e8e   Paul Menage   Task Control Grou...
1531
  	kill_litter_super(sb);
2c6ab6d20   Paul Menage   cgroups: allow cg...
1532
  	cgroup_drop_root(root);
ddbcc7e8e   Paul Menage   Task Control Grou...
1533
1534
1535
1536
1537
1538
1539
  }
  
  static struct file_system_type cgroup_fs_type = {
  	.name = "cgroup",
  	.get_sb = cgroup_get_sb,
  	.kill_sb = cgroup_kill_sb,
  };
676db4af0   Greg Kroah-Hartman   cgroupfs: create ...
1540
  static struct kobject *cgroup_kobj;
bd89aabc6   Paul Menage   Control groups: R...
1541
  static inline struct cgroup *__d_cgrp(struct dentry *dentry)
ddbcc7e8e   Paul Menage   Task Control Grou...
1542
1543
1544
1545
1546
1547
1548
1549
  {
  	return dentry->d_fsdata;
  }
  
  static inline struct cftype *__d_cft(struct dentry *dentry)
  {
  	return dentry->d_fsdata;
  }
a043e3b2c   Li Zefan   cgroup: fix comments
1550
1551
1552
1553
1554
1555
  /**
   * cgroup_path - generate the path of a cgroup
   * @cgrp: the cgroup in question
   * @buf: the buffer to write the path into
   * @buflen: the length of the buffer
   *
a47295e6b   Paul Menage   cgroups: make cgr...
1556
1557
1558
   * Called with cgroup_mutex held or else with an RCU-protected cgroup
   * reference.  Writes path of cgroup into buf.  Returns 0 on success,
   * -errno on error.
ddbcc7e8e   Paul Menage   Task Control Grou...
1559
   */
bd89aabc6   Paul Menage   Control groups: R...
1560
  int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
ddbcc7e8e   Paul Menage   Task Control Grou...
1561
1562
  {
  	char *start;
9a9686b63   Li Zefan   cgroup: Fix an RC...
1563
1564
1565
  	struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
  						      rcu_read_lock_held() ||
  						      cgroup_lock_is_held());
ddbcc7e8e   Paul Menage   Task Control Grou...
1566

a47295e6b   Paul Menage   cgroups: make cgr...
1567
  	if (!dentry || cgrp == dummytop) {
ddbcc7e8e   Paul Menage   Task Control Grou...
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
  		/*
  		 * Inactive subsystems have no dentry for their root
  		 * cgroup
  		 */
  		strcpy(buf, "/");
  		return 0;
  	}
  
  	start = buf + buflen;
  
  	*--start = '\0';
  	for (;;) {
a47295e6b   Paul Menage   cgroups: make cgr...
1580
  		int len = dentry->d_name.len;
9a9686b63   Li Zefan   cgroup: Fix an RC...
1581

ddbcc7e8e   Paul Menage   Task Control Grou...
1582
1583
  		if ((start -= len) < buf)
  			return -ENAMETOOLONG;
9a9686b63   Li Zefan   cgroup: Fix an RC...
1584
  		memcpy(start, dentry->d_name.name, len);
bd89aabc6   Paul Menage   Control groups: R...
1585
1586
  		cgrp = cgrp->parent;
  		if (!cgrp)
ddbcc7e8e   Paul Menage   Task Control Grou...
1587
  			break;
9a9686b63   Li Zefan   cgroup: Fix an RC...
1588
1589
1590
1591
  
  		dentry = rcu_dereference_check(cgrp->dentry,
  					       rcu_read_lock_held() ||
  					       cgroup_lock_is_held());
bd89aabc6   Paul Menage   Control groups: R...
1592
  		if (!cgrp->parent)
ddbcc7e8e   Paul Menage   Task Control Grou...
1593
1594
1595
1596
1597
1598
1599
1600
  			continue;
  		if (--start < buf)
  			return -ENAMETOOLONG;
  		*start = '/';
  	}
  	memmove(buf, start, buf + buflen - start);
  	return 0;
  }
67523c48a   Ben Blum   cgroups: blkio su...
1601
  EXPORT_SYMBOL_GPL(cgroup_path);
ddbcc7e8e   Paul Menage   Task Control Grou...
1602

a043e3b2c   Li Zefan   cgroup: fix comments
1603
1604
1605
1606
  /**
   * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
   * @cgrp: the cgroup the task is attaching to
   * @tsk: the task to be attached
bbcb81d09   Paul Menage   Task Control Grou...
1607
   *
a043e3b2c   Li Zefan   cgroup: fix comments
1608
1609
   * Call holding cgroup_mutex. May take task_lock of
   * the task 'tsk' during call.
bbcb81d09   Paul Menage   Task Control Grou...
1610
   */
956db3ca0   Cliff Wickman   hotplug cpu: move...
1611
  int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
bbcb81d09   Paul Menage   Task Control Grou...
1612
1613
  {
  	int retval = 0;
2468c7234   Daisuke Nishimura   cgroup: introduce...
1614
  	struct cgroup_subsys *ss, *failed_ss = NULL;
bd89aabc6   Paul Menage   Control groups: R...
1615
  	struct cgroup *oldcgrp;
77efecd9e   Lai Jiangshan   cgroups: call fin...
1616
  	struct css_set *cg;
817929ec2   Paul Menage   Task Control Grou...
1617
  	struct css_set *newcg;
bd89aabc6   Paul Menage   Control groups: R...
1618
  	struct cgroupfs_root *root = cgrp->root;
bbcb81d09   Paul Menage   Task Control Grou...
1619
1620
  
  	/* Nothing to do if the task is already in that cgroup */
7717f7ba9   Paul Menage   cgroups: add a ba...
1621
  	oldcgrp = task_cgroup_from_root(tsk, root);
bd89aabc6   Paul Menage   Control groups: R...
1622
  	if (cgrp == oldcgrp)
bbcb81d09   Paul Menage   Task Control Grou...
1623
1624
1625
1626
  		return 0;
  
  	for_each_subsys(root, ss) {
  		if (ss->can_attach) {
be367d099   Ben Blum   cgroups: let ss->...
1627
  			retval = ss->can_attach(ss, cgrp, tsk, false);
2468c7234   Daisuke Nishimura   cgroup: introduce...
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
  			if (retval) {
  				/*
  				 * Remember on which subsystem the can_attach()
  				 * failed, so that we only call cancel_attach()
  				 * against the subsystems whose can_attach()
  				 * succeeded. (See below)
  				 */
  				failed_ss = ss;
  				goto out;
  			}
bbcb81d09   Paul Menage   Task Control Grou...
1638
1639
  		}
  	}
77efecd9e   Lai Jiangshan   cgroups: call fin...
1640
1641
1642
1643
  	task_lock(tsk);
  	cg = tsk->cgroups;
  	get_css_set(cg);
  	task_unlock(tsk);
817929ec2   Paul Menage   Task Control Grou...
1644
1645
1646
1647
  	/*
  	 * Locate or allocate a new css_set for this task,
  	 * based on its final set of cgroups
  	 */
bd89aabc6   Paul Menage   Control groups: R...
1648
  	newcg = find_css_set(cg, cgrp);
77efecd9e   Lai Jiangshan   cgroups: call fin...
1649
  	put_css_set(cg);
2468c7234   Daisuke Nishimura   cgroup: introduce...
1650
1651
1652
1653
  	if (!newcg) {
  		retval = -ENOMEM;
  		goto out;
  	}
817929ec2   Paul Menage   Task Control Grou...
1654

bbcb81d09   Paul Menage   Task Control Grou...
1655
1656
1657
  	task_lock(tsk);
  	if (tsk->flags & PF_EXITING) {
  		task_unlock(tsk);
817929ec2   Paul Menage   Task Control Grou...
1658
  		put_css_set(newcg);
2468c7234   Daisuke Nishimura   cgroup: introduce...
1659
1660
  		retval = -ESRCH;
  		goto out;
bbcb81d09   Paul Menage   Task Control Grou...
1661
  	}
817929ec2   Paul Menage   Task Control Grou...
1662
  	rcu_assign_pointer(tsk->cgroups, newcg);
bbcb81d09   Paul Menage   Task Control Grou...
1663
  	task_unlock(tsk);
817929ec2   Paul Menage   Task Control Grou...
1664
1665
1666
1667
1668
1669
1670
  	/* Update the css_set linked lists if we're using them */
  	write_lock(&css_set_lock);
  	if (!list_empty(&tsk->cg_list)) {
  		list_del(&tsk->cg_list);
  		list_add(&tsk->cg_list, &newcg->tasks);
  	}
  	write_unlock(&css_set_lock);
bbcb81d09   Paul Menage   Task Control Grou...
1671
  	for_each_subsys(root, ss) {
e18f6318e   Paul Jackson   cgroup brace codi...
1672
  		if (ss->attach)
be367d099   Ben Blum   cgroups: let ss->...
1673
  			ss->attach(ss, cgrp, oldcgrp, tsk, false);
bbcb81d09   Paul Menage   Task Control Grou...
1674
  	}
bd89aabc6   Paul Menage   Control groups: R...
1675
  	set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
bbcb81d09   Paul Menage   Task Control Grou...
1676
  	synchronize_rcu();
817929ec2   Paul Menage   Task Control Grou...
1677
  	put_css_set(cg);
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
1678
1679
1680
1681
1682
  
  	/*
  	 * wake up rmdir() waiter. the rmdir should fail since the cgroup
  	 * is no longer empty.
  	 */
887032670   KAMEZAWA Hiroyuki   cgroup avoid perm...
1683
  	cgroup_wakeup_rmdir_waiter(cgrp);
2468c7234   Daisuke Nishimura   cgroup: introduce...
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
  out:
  	if (retval) {
  		for_each_subsys(root, ss) {
  			if (ss == failed_ss)
  				/*
  				 * This subsystem was the one that failed the
  				 * can_attach() check earlier, so we don't need
  				 * to call cancel_attach() against it or any
  				 * remaining subsystems.
  				 */
  				break;
  			if (ss->cancel_attach)
  				ss->cancel_attach(ss, cgrp, tsk, false);
  		}
  	}
  	return retval;
bbcb81d09   Paul Menage   Task Control Grou...
1700
  }
d7926ee38   Sridhar Samudrala   cgroups: Add an A...
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
  /**
   * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup
   * @tsk: the task to be attached
   */
  int cgroup_attach_task_current_cg(struct task_struct *tsk)
  {
  	struct cgroupfs_root *root;
  	struct cgroup *cur_cg;
  	int retval = 0;
  
  	cgroup_lock();
  	for_each_active_root(root) {
  		cur_cg = task_cgroup_from_root(current, root);
  		retval = cgroup_attach_task(cur_cg, tsk);
  		if (retval)
  			break;
  	}
  	cgroup_unlock();
  
  	return retval;
  }
  EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg);
bbcb81d09   Paul Menage   Task Control Grou...
1723
  /*
af351026a   Paul Menage   cgroup files: tur...
1724
1725
   * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
   * held. May take task_lock of task
bbcb81d09   Paul Menage   Task Control Grou...
1726
   */
af351026a   Paul Menage   cgroup files: tur...
1727
  static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
bbcb81d09   Paul Menage   Task Control Grou...
1728
  {
bbcb81d09   Paul Menage   Task Control Grou...
1729
  	struct task_struct *tsk;
c69e8d9c0   David Howells   CRED: Use RCU to ...
1730
  	const struct cred *cred = current_cred(), *tcred;
bbcb81d09   Paul Menage   Task Control Grou...
1731
  	int ret;
bbcb81d09   Paul Menage   Task Control Grou...
1732
1733
  	if (pid) {
  		rcu_read_lock();
73507f335   Pavel Emelyanov   Handle pid namesp...
1734
  		tsk = find_task_by_vpid(pid);
bbcb81d09   Paul Menage   Task Control Grou...
1735
1736
1737
1738
  		if (!tsk || tsk->flags & PF_EXITING) {
  			rcu_read_unlock();
  			return -ESRCH;
  		}
bbcb81d09   Paul Menage   Task Control Grou...
1739

c69e8d9c0   David Howells   CRED: Use RCU to ...
1740
1741
1742
1743
1744
  		tcred = __task_cred(tsk);
  		if (cred->euid &&
  		    cred->euid != tcred->uid &&
  		    cred->euid != tcred->suid) {
  			rcu_read_unlock();
bbcb81d09   Paul Menage   Task Control Grou...
1745
1746
  			return -EACCES;
  		}
c69e8d9c0   David Howells   CRED: Use RCU to ...
1747
1748
  		get_task_struct(tsk);
  		rcu_read_unlock();
bbcb81d09   Paul Menage   Task Control Grou...
1749
1750
1751
1752
  	} else {
  		tsk = current;
  		get_task_struct(tsk);
  	}
956db3ca0   Cliff Wickman   hotplug cpu: move...
1753
  	ret = cgroup_attach_task(cgrp, tsk);
bbcb81d09   Paul Menage   Task Control Grou...
1754
1755
1756
  	put_task_struct(tsk);
  	return ret;
  }
af351026a   Paul Menage   cgroup files: tur...
1757
1758
1759
1760
1761
1762
1763
1764
1765
  static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
  {
  	int ret;
  	if (!cgroup_lock_live_group(cgrp))
  		return -ENODEV;
  	ret = attach_task_by_pid(cgrp, pid);
  	cgroup_unlock();
  	return ret;
  }
e788e066c   Paul Menage   cgroup files: mov...
1766
1767
1768
1769
  /**
   * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
   * @cgrp: the cgroup to be checked for liveness
   *
84eea8428   Paul Menage   cgroups: misc cle...
1770
1771
   * On success, returns true; the lock should be later released with
   * cgroup_unlock(). On failure returns false with no lock held.
e788e066c   Paul Menage   cgroup files: mov...
1772
   */
84eea8428   Paul Menage   cgroups: misc cle...
1773
  bool cgroup_lock_live_group(struct cgroup *cgrp)
e788e066c   Paul Menage   cgroup files: mov...
1774
1775
1776
1777
1778
1779
1780
1781
  {
  	mutex_lock(&cgroup_mutex);
  	if (cgroup_is_removed(cgrp)) {
  		mutex_unlock(&cgroup_mutex);
  		return false;
  	}
  	return true;
  }
67523c48a   Ben Blum   cgroups: blkio su...
1782
  EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
e788e066c   Paul Menage   cgroup files: mov...
1783
1784
1785
1786
1787
1788
1789
1790
  
  static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
  				      const char *buffer)
  {
  	BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
  	if (!cgroup_lock_live_group(cgrp))
  		return -ENODEV;
  	strcpy(cgrp->root->release_agent_path, buffer);
84eea8428   Paul Menage   cgroups: misc cle...
1791
  	cgroup_unlock();
e788e066c   Paul Menage   cgroup files: mov...
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
  	return 0;
  }
  
  static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
  				     struct seq_file *seq)
  {
  	if (!cgroup_lock_live_group(cgrp))
  		return -ENODEV;
  	seq_puts(seq, cgrp->root->release_agent_path);
  	seq_putc(seq, '
  ');
84eea8428   Paul Menage   cgroups: misc cle...
1803
  	cgroup_unlock();
e788e066c   Paul Menage   cgroup files: mov...
1804
1805
  	return 0;
  }
84eea8428   Paul Menage   cgroups: misc cle...
1806
1807
  /* A buffer size big enough for numbers or short strings */
  #define CGROUP_LOCAL_BUFFER_SIZE 64
e73d2c61d   Paul Menage   CGroups _s64 file...
1808
  static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
f4c753b7e   Paul Menage   CGroup API files:...
1809
1810
1811
  				struct file *file,
  				const char __user *userbuf,
  				size_t nbytes, loff_t *unused_ppos)
355e0c48b   Paul Menage   Add cgroup write_...
1812
  {
84eea8428   Paul Menage   cgroups: misc cle...
1813
  	char buffer[CGROUP_LOCAL_BUFFER_SIZE];
355e0c48b   Paul Menage   Add cgroup write_...
1814
  	int retval = 0;
355e0c48b   Paul Menage   Add cgroup write_...
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
  	char *end;
  
  	if (!nbytes)
  		return -EINVAL;
  	if (nbytes >= sizeof(buffer))
  		return -E2BIG;
  	if (copy_from_user(buffer, userbuf, nbytes))
  		return -EFAULT;
  
  	buffer[nbytes] = 0;     /* nul-terminate */
e73d2c61d   Paul Menage   CGroups _s64 file...
1825
  	if (cft->write_u64) {
478988d3b   KOSAKI Motohiro   cgroup: fix strst...
1826
  		u64 val = simple_strtoull(strstrip(buffer), &end, 0);
e73d2c61d   Paul Menage   CGroups _s64 file...
1827
1828
1829
1830
  		if (*end)
  			return -EINVAL;
  		retval = cft->write_u64(cgrp, cft, val);
  	} else {
478988d3b   KOSAKI Motohiro   cgroup: fix strst...
1831
  		s64 val = simple_strtoll(strstrip(buffer), &end, 0);
e73d2c61d   Paul Menage   CGroups _s64 file...
1832
1833
1834
1835
  		if (*end)
  			return -EINVAL;
  		retval = cft->write_s64(cgrp, cft, val);
  	}
355e0c48b   Paul Menage   Add cgroup write_...
1836
1837
1838
1839
  	if (!retval)
  		retval = nbytes;
  	return retval;
  }
db3b14978   Paul Menage   cgroup files: add...
1840
1841
1842
1843
1844
  static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
  				   struct file *file,
  				   const char __user *userbuf,
  				   size_t nbytes, loff_t *unused_ppos)
  {
84eea8428   Paul Menage   cgroups: misc cle...
1845
  	char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
db3b14978   Paul Menage   cgroup files: add...
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
  	int retval = 0;
  	size_t max_bytes = cft->max_write_len;
  	char *buffer = local_buffer;
  
  	if (!max_bytes)
  		max_bytes = sizeof(local_buffer) - 1;
  	if (nbytes >= max_bytes)
  		return -E2BIG;
  	/* Allocate a dynamic buffer if we need one */
  	if (nbytes >= sizeof(local_buffer)) {
  		buffer = kmalloc(nbytes + 1, GFP_KERNEL);
  		if (buffer == NULL)
  			return -ENOMEM;
  	}
5a3eb9f6b   Li Zefan   cgroup: fix possi...
1860
1861
1862
1863
  	if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
  		retval = -EFAULT;
  		goto out;
  	}
db3b14978   Paul Menage   cgroup files: add...
1864
1865
  
  	buffer[nbytes] = 0;     /* nul-terminate */
478988d3b   KOSAKI Motohiro   cgroup: fix strst...
1866
  	retval = cft->write_string(cgrp, cft, strstrip(buffer));
db3b14978   Paul Menage   cgroup files: add...
1867
1868
  	if (!retval)
  		retval = nbytes;
5a3eb9f6b   Li Zefan   cgroup: fix possi...
1869
  out:
db3b14978   Paul Menage   cgroup files: add...
1870
1871
1872
1873
  	if (buffer != local_buffer)
  		kfree(buffer);
  	return retval;
  }
ddbcc7e8e   Paul Menage   Task Control Grou...
1874
1875
1876
1877
  static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
  						size_t nbytes, loff_t *ppos)
  {
  	struct cftype *cft = __d_cft(file->f_dentry);
bd89aabc6   Paul Menage   Control groups: R...
1878
  	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
ddbcc7e8e   Paul Menage   Task Control Grou...
1879

75139b827   Li Zefan   cgroups: remove s...
1880
  	if (cgroup_is_removed(cgrp))
ddbcc7e8e   Paul Menage   Task Control Grou...
1881
  		return -ENODEV;
355e0c48b   Paul Menage   Add cgroup write_...
1882
  	if (cft->write)
bd89aabc6   Paul Menage   Control groups: R...
1883
  		return cft->write(cgrp, cft, file, buf, nbytes, ppos);
e73d2c61d   Paul Menage   CGroups _s64 file...
1884
1885
  	if (cft->write_u64 || cft->write_s64)
  		return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
db3b14978   Paul Menage   cgroup files: add...
1886
1887
  	if (cft->write_string)
  		return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
d447ea2f3   Pavel Emelyanov   cgroups: add the ...
1888
1889
1890
1891
  	if (cft->trigger) {
  		int ret = cft->trigger(cgrp, (unsigned int)cft->private);
  		return ret ? ret : nbytes;
  	}
355e0c48b   Paul Menage   Add cgroup write_...
1892
  	return -EINVAL;
ddbcc7e8e   Paul Menage   Task Control Grou...
1893
  }
f4c753b7e   Paul Menage   CGroup API files:...
1894
1895
1896
1897
  static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
  			       struct file *file,
  			       char __user *buf, size_t nbytes,
  			       loff_t *ppos)
ddbcc7e8e   Paul Menage   Task Control Grou...
1898
  {
84eea8428   Paul Menage   cgroups: misc cle...
1899
  	char tmp[CGROUP_LOCAL_BUFFER_SIZE];
f4c753b7e   Paul Menage   CGroup API files:...
1900
  	u64 val = cft->read_u64(cgrp, cft);
ddbcc7e8e   Paul Menage   Task Control Grou...
1901
1902
1903
1904
1905
  	int len = sprintf(tmp, "%llu
  ", (unsigned long long) val);
  
  	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
  }
e73d2c61d   Paul Menage   CGroups _s64 file...
1906
1907
1908
1909
1910
  static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
  			       struct file *file,
  			       char __user *buf, size_t nbytes,
  			       loff_t *ppos)
  {
84eea8428   Paul Menage   cgroups: misc cle...
1911
  	char tmp[CGROUP_LOCAL_BUFFER_SIZE];
e73d2c61d   Paul Menage   CGroups _s64 file...
1912
1913
1914
1915
1916
1917
  	s64 val = cft->read_s64(cgrp, cft);
  	int len = sprintf(tmp, "%lld
  ", (long long) val);
  
  	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
  }
ddbcc7e8e   Paul Menage   Task Control Grou...
1918
1919
1920
1921
  static ssize_t cgroup_file_read(struct file *file, char __user *buf,
  				   size_t nbytes, loff_t *ppos)
  {
  	struct cftype *cft = __d_cft(file->f_dentry);
bd89aabc6   Paul Menage   Control groups: R...
1922
  	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
ddbcc7e8e   Paul Menage   Task Control Grou...
1923

75139b827   Li Zefan   cgroups: remove s...
1924
  	if (cgroup_is_removed(cgrp))
ddbcc7e8e   Paul Menage   Task Control Grou...
1925
1926
1927
  		return -ENODEV;
  
  	if (cft->read)
bd89aabc6   Paul Menage   Control groups: R...
1928
  		return cft->read(cgrp, cft, file, buf, nbytes, ppos);
f4c753b7e   Paul Menage   CGroup API files:...
1929
1930
  	if (cft->read_u64)
  		return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
e73d2c61d   Paul Menage   CGroups _s64 file...
1931
1932
  	if (cft->read_s64)
  		return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
ddbcc7e8e   Paul Menage   Task Control Grou...
1933
1934
  	return -EINVAL;
  }
917965696   Paul Menage   CGroup API files:...
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
  /*
   * seqfile ops/methods for returning structured data. Currently just
   * supports string->u64 maps, but can be extended in future.
   */
  
  struct cgroup_seqfile_state {
  	struct cftype *cft;
  	struct cgroup *cgroup;
  };
  
  static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
  {
  	struct seq_file *sf = cb->state;
  	return seq_printf(sf, "%s %llu
  ", key, (unsigned long long)value);
  }
  
  static int cgroup_seqfile_show(struct seq_file *m, void *arg)
  {
  	struct cgroup_seqfile_state *state = m->private;
  	struct cftype *cft = state->cft;
29486df32   Serge E. Hallyn   cgroups: introduc...
1956
1957
1958
1959
1960
1961
1962
1963
  	if (cft->read_map) {
  		struct cgroup_map_cb cb = {
  			.fill = cgroup_map_add,
  			.state = m,
  		};
  		return cft->read_map(state->cgroup, cft, &cb);
  	}
  	return cft->read_seq_string(state->cgroup, cft, m);
917965696   Paul Menage   CGroup API files:...
1964
  }
96930a636   Adrian Bunk   make cgroup_seqfi...
1965
  static int cgroup_seqfile_release(struct inode *inode, struct file *file)
917965696   Paul Menage   CGroup API files:...
1966
1967
1968
1969
1970
  {
  	struct seq_file *seq = file->private_data;
  	kfree(seq->private);
  	return single_release(inode, file);
  }
828c09509   Alexey Dobriyan   const: constify r...
1971
  static const struct file_operations cgroup_seqfile_operations = {
917965696   Paul Menage   CGroup API files:...
1972
  	.read = seq_read,
e788e066c   Paul Menage   cgroup files: mov...
1973
  	.write = cgroup_file_write,
917965696   Paul Menage   CGroup API files:...
1974
1975
1976
  	.llseek = seq_lseek,
  	.release = cgroup_seqfile_release,
  };
ddbcc7e8e   Paul Menage   Task Control Grou...
1977
1978
1979
1980
1981
1982
1983
1984
  static int cgroup_file_open(struct inode *inode, struct file *file)
  {
  	int err;
  	struct cftype *cft;
  
  	err = generic_file_open(inode, file);
  	if (err)
  		return err;
ddbcc7e8e   Paul Menage   Task Control Grou...
1985
  	cft = __d_cft(file->f_dentry);
75139b827   Li Zefan   cgroups: remove s...
1986

29486df32   Serge E. Hallyn   cgroups: introduc...
1987
  	if (cft->read_map || cft->read_seq_string) {
917965696   Paul Menage   CGroup API files:...
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
  		struct cgroup_seqfile_state *state =
  			kzalloc(sizeof(*state), GFP_USER);
  		if (!state)
  			return -ENOMEM;
  		state->cft = cft;
  		state->cgroup = __d_cgrp(file->f_dentry->d_parent);
  		file->f_op = &cgroup_seqfile_operations;
  		err = single_open(file, cgroup_seqfile_show, state);
  		if (err < 0)
  			kfree(state);
  	} else if (cft->open)
ddbcc7e8e   Paul Menage   Task Control Grou...
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
  		err = cft->open(inode, file);
  	else
  		err = 0;
  
  	return err;
  }
  
  static int cgroup_file_release(struct inode *inode, struct file *file)
  {
  	struct cftype *cft = __d_cft(file->f_dentry);
  	if (cft->release)
  		return cft->release(inode, file);
  	return 0;
  }
  
  /*
   * cgroup_rename - Only allow simple rename of directories in place.
   */
  static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
  			    struct inode *new_dir, struct dentry *new_dentry)
  {
  	if (!S_ISDIR(old_dentry->d_inode->i_mode))
  		return -ENOTDIR;
  	if (new_dentry->d_inode)
  		return -EEXIST;
  	if (old_dir != new_dir)
  		return -EIO;
  	return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
  }
828c09509   Alexey Dobriyan   const: constify r...
2028
  static const struct file_operations cgroup_file_operations = {
ddbcc7e8e   Paul Menage   Task Control Grou...
2029
2030
2031
2032
2033
2034
  	.read = cgroup_file_read,
  	.write = cgroup_file_write,
  	.llseek = generic_file_llseek,
  	.open = cgroup_file_open,
  	.release = cgroup_file_release,
  };
6e1d5dcc2   Alexey Dobriyan   const: mark remai...
2035
  static const struct inode_operations cgroup_dir_inode_operations = {
ddbcc7e8e   Paul Menage   Task Control Grou...
2036
2037
2038
2039
2040
  	.lookup = simple_lookup,
  	.mkdir = cgroup_mkdir,
  	.rmdir = cgroup_rmdir,
  	.rename = cgroup_rename,
  };
0dea11687   Kirill A. Shutemov   cgroup: implement...
2041
2042
2043
2044
2045
2046
2047
2048
2049
  /*
   * Check if a file is a control file
   */
  static inline struct cftype *__file_cft(struct file *file)
  {
  	if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
  		return ERR_PTR(-EINVAL);
  	return __d_cft(file->f_dentry);
  }
099fca322   Li Zefan   cgroups: show cor...
2050
  static int cgroup_create_file(struct dentry *dentry, mode_t mode,
ddbcc7e8e   Paul Menage   Task Control Grou...
2051
2052
  				struct super_block *sb)
  {
3ba13d179   Al Viro   constify dentry_o...
2053
  	static const struct dentry_operations cgroup_dops = {
ddbcc7e8e   Paul Menage   Task Control Grou...
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
  		.d_iput = cgroup_diput,
  	};
  
  	struct inode *inode;
  
  	if (!dentry)
  		return -ENOENT;
  	if (dentry->d_inode)
  		return -EEXIST;
  
  	inode = cgroup_new_inode(mode, sb);
  	if (!inode)
  		return -ENOMEM;
  
  	if (S_ISDIR(mode)) {
  		inode->i_op = &cgroup_dir_inode_operations;
  		inode->i_fop = &simple_dir_operations;
  
  		/* start off with i_nlink == 2 (for "." entry) */
  		inc_nlink(inode);
  
  		/* start with the directory inode held, so that we can
  		 * populate it without racing with another mkdir */
817929ec2   Paul Menage   Task Control Grou...
2077
  		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
ddbcc7e8e   Paul Menage   Task Control Grou...
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
  	} else if (S_ISREG(mode)) {
  		inode->i_size = 0;
  		inode->i_fop = &cgroup_file_operations;
  	}
  	dentry->d_op = &cgroup_dops;
  	d_instantiate(dentry, inode);
  	dget(dentry);	/* Extra count - pin the dentry in core */
  	return 0;
  }
  
  /*
a043e3b2c   Li Zefan   cgroup: fix comments
2089
2090
2091
2092
2093
   * cgroup_create_dir - create a directory for an object.
   * @cgrp: the cgroup we create the directory for. It must have a valid
   *        ->parent field. And we are going to fill its ->dentry field.
   * @dentry: dentry of the new cgroup
   * @mode: mode to set on new directory.
ddbcc7e8e   Paul Menage   Task Control Grou...
2094
   */
bd89aabc6   Paul Menage   Control groups: R...
2095
  static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
099fca322   Li Zefan   cgroups: show cor...
2096
  				mode_t mode)
ddbcc7e8e   Paul Menage   Task Control Grou...
2097
2098
2099
  {
  	struct dentry *parent;
  	int error = 0;
bd89aabc6   Paul Menage   Control groups: R...
2100
2101
  	parent = cgrp->parent->dentry;
  	error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
ddbcc7e8e   Paul Menage   Task Control Grou...
2102
  	if (!error) {
bd89aabc6   Paul Menage   Control groups: R...
2103
  		dentry->d_fsdata = cgrp;
ddbcc7e8e   Paul Menage   Task Control Grou...
2104
  		inc_nlink(parent->d_inode);
a47295e6b   Paul Menage   cgroups: make cgr...
2105
  		rcu_assign_pointer(cgrp->dentry, dentry);
ddbcc7e8e   Paul Menage   Task Control Grou...
2106
2107
2108
2109
2110
2111
  		dget(dentry);
  	}
  	dput(dentry);
  
  	return error;
  }
099fca322   Li Zefan   cgroups: show cor...
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
  /**
   * cgroup_file_mode - deduce file mode of a control file
   * @cft: the control file in question
   *
   * returns cft->mode if ->mode is not 0
   * returns S_IRUGO|S_IWUSR if it has both a read and a write handler
   * returns S_IRUGO if it has only a read handler
   * returns S_IWUSR if it has only a write hander
   */
  static mode_t cgroup_file_mode(const struct cftype *cft)
  {
  	mode_t mode = 0;
  
  	if (cft->mode)
  		return cft->mode;
  
  	if (cft->read || cft->read_u64 || cft->read_s64 ||
  	    cft->read_map || cft->read_seq_string)
  		mode |= S_IRUGO;
  
  	if (cft->write || cft->write_u64 || cft->write_s64 ||
  	    cft->write_string || cft->trigger)
  		mode |= S_IWUSR;
  
  	return mode;
  }
bd89aabc6   Paul Menage   Control groups: R...
2138
  int cgroup_add_file(struct cgroup *cgrp,
ddbcc7e8e   Paul Menage   Task Control Grou...
2139
2140
2141
  		       struct cgroup_subsys *subsys,
  		       const struct cftype *cft)
  {
bd89aabc6   Paul Menage   Control groups: R...
2142
  	struct dentry *dir = cgrp->dentry;
ddbcc7e8e   Paul Menage   Task Control Grou...
2143
2144
  	struct dentry *dentry;
  	int error;
099fca322   Li Zefan   cgroups: show cor...
2145
  	mode_t mode;
ddbcc7e8e   Paul Menage   Task Control Grou...
2146
2147
  
  	char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
bd89aabc6   Paul Menage   Control groups: R...
2148
  	if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
ddbcc7e8e   Paul Menage   Task Control Grou...
2149
2150
2151
2152
2153
2154
2155
  		strcpy(name, subsys->name);
  		strcat(name, ".");
  	}
  	strcat(name, cft->name);
  	BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
  	dentry = lookup_one_len(name, dir, strlen(name));
  	if (!IS_ERR(dentry)) {
099fca322   Li Zefan   cgroups: show cor...
2156
2157
  		mode = cgroup_file_mode(cft);
  		error = cgroup_create_file(dentry, mode | S_IFREG,
bd89aabc6   Paul Menage   Control groups: R...
2158
  						cgrp->root->sb);
ddbcc7e8e   Paul Menage   Task Control Grou...
2159
2160
2161
2162
2163
2164
2165
  		if (!error)
  			dentry->d_fsdata = (void *)cft;
  		dput(dentry);
  	} else
  		error = PTR_ERR(dentry);
  	return error;
  }
e6a1105ba   Ben Blum   cgroups: subsyste...
2166
  EXPORT_SYMBOL_GPL(cgroup_add_file);
ddbcc7e8e   Paul Menage   Task Control Grou...
2167

bd89aabc6   Paul Menage   Control groups: R...
2168
  int cgroup_add_files(struct cgroup *cgrp,
ddbcc7e8e   Paul Menage   Task Control Grou...
2169
2170
2171
2172
2173
2174
  			struct cgroup_subsys *subsys,
  			const struct cftype cft[],
  			int count)
  {
  	int i, err;
  	for (i = 0; i < count; i++) {
bd89aabc6   Paul Menage   Control groups: R...
2175
  		err = cgroup_add_file(cgrp, subsys, &cft[i]);
ddbcc7e8e   Paul Menage   Task Control Grou...
2176
2177
2178
2179
2180
  		if (err)
  			return err;
  	}
  	return 0;
  }
e6a1105ba   Ben Blum   cgroups: subsyste...
2181
  EXPORT_SYMBOL_GPL(cgroup_add_files);
ddbcc7e8e   Paul Menage   Task Control Grou...
2182

a043e3b2c   Li Zefan   cgroup: fix comments
2183
2184
2185
2186
2187
2188
  /**
   * cgroup_task_count - count the number of tasks in a cgroup.
   * @cgrp: the cgroup in question
   *
   * Return the number of tasks in the cgroup.
   */
bd89aabc6   Paul Menage   Control groups: R...
2189
  int cgroup_task_count(const struct cgroup *cgrp)
bbcb81d09   Paul Menage   Task Control Grou...
2190
2191
  {
  	int count = 0;
71cbb949d   KOSAKI Motohiro   cgroup: list_for_...
2192
  	struct cg_cgroup_link *link;
817929ec2   Paul Menage   Task Control Grou...
2193
2194
  
  	read_lock(&css_set_lock);
71cbb949d   KOSAKI Motohiro   cgroup: list_for_...
2195
  	list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
146aa1bd0   Lai Jiangshan   cgroups: fix prob...
2196
  		count += atomic_read(&link->cg->refcount);
817929ec2   Paul Menage   Task Control Grou...
2197
2198
  	}
  	read_unlock(&css_set_lock);
bbcb81d09   Paul Menage   Task Control Grou...
2199
2200
2201
2202
  	return count;
  }
  
  /*
817929ec2   Paul Menage   Task Control Grou...
2203
2204
2205
   * Advance a list_head iterator.  The iterator should be positioned at
   * the start of a css_set
   */
bd89aabc6   Paul Menage   Control groups: R...
2206
  static void cgroup_advance_iter(struct cgroup *cgrp,
7717f7ba9   Paul Menage   cgroups: add a ba...
2207
  				struct cgroup_iter *it)
817929ec2   Paul Menage   Task Control Grou...
2208
2209
2210
2211
2212
2213
2214
2215
  {
  	struct list_head *l = it->cg_link;
  	struct cg_cgroup_link *link;
  	struct css_set *cg;
  
  	/* Advance to the next non-empty css_set */
  	do {
  		l = l->next;
bd89aabc6   Paul Menage   Control groups: R...
2216
  		if (l == &cgrp->css_sets) {
817929ec2   Paul Menage   Task Control Grou...
2217
2218
2219
  			it->cg_link = NULL;
  			return;
  		}
bd89aabc6   Paul Menage   Control groups: R...
2220
  		link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
817929ec2   Paul Menage   Task Control Grou...
2221
2222
2223
2224
2225
  		cg = link->cg;
  	} while (list_empty(&cg->tasks));
  	it->cg_link = l;
  	it->task = cg->tasks.next;
  }
31a7df01f   Cliff Wickman   cgroups: mechanis...
2226
2227
2228
2229
2230
2231
2232
2233
2234
  /*
   * To reduce the fork() overhead for systems that are not actually
   * using their cgroups capability, we don't maintain the lists running
   * through each css_set to its tasks until we see the list actually
   * used - in other words after the first call to cgroup_iter_start().
   *
   * The tasklist_lock is not held here, as do_each_thread() and
   * while_each_thread() are protected by RCU.
   */
3df91fe30   Adrian Bunk   make cgroup_enabl...
2235
  static void cgroup_enable_task_cg_lists(void)
31a7df01f   Cliff Wickman   cgroups: mechanis...
2236
2237
2238
2239
2240
2241
  {
  	struct task_struct *p, *g;
  	write_lock(&css_set_lock);
  	use_task_css_set_links = 1;
  	do_each_thread(g, p) {
  		task_lock(p);
0e04388f0   Li Zefan   cgroup: fix a rac...
2242
2243
2244
2245
2246
2247
  		/*
  		 * We should check if the process is exiting, otherwise
  		 * it will race with cgroup_exit() in that the list
  		 * entry won't be deleted though the process has exited.
  		 */
  		if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
31a7df01f   Cliff Wickman   cgroups: mechanis...
2248
2249
2250
2251
2252
  			list_add(&p->cg_list, &p->cgroups->tasks);
  		task_unlock(p);
  	} while_each_thread(g, p);
  	write_unlock(&css_set_lock);
  }
bd89aabc6   Paul Menage   Control groups: R...
2253
  void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
817929ec2   Paul Menage   Task Control Grou...
2254
2255
2256
2257
2258
2259
  {
  	/*
  	 * The first time anyone tries to iterate across a cgroup,
  	 * we need to enable the list linking each css_set to its
  	 * tasks, and fix up all existing tasks.
  	 */
31a7df01f   Cliff Wickman   cgroups: mechanis...
2260
2261
  	if (!use_task_css_set_links)
  		cgroup_enable_task_cg_lists();
817929ec2   Paul Menage   Task Control Grou...
2262
  	read_lock(&css_set_lock);
bd89aabc6   Paul Menage   Control groups: R...
2263
2264
  	it->cg_link = &cgrp->css_sets;
  	cgroup_advance_iter(cgrp, it);
817929ec2   Paul Menage   Task Control Grou...
2265
  }
bd89aabc6   Paul Menage   Control groups: R...
2266
  struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
817929ec2   Paul Menage   Task Control Grou...
2267
2268
2269
2270
  					struct cgroup_iter *it)
  {
  	struct task_struct *res;
  	struct list_head *l = it->task;
2019f634c   Lai Jiangshan   cgroups: fix cgro...
2271
  	struct cg_cgroup_link *link;
817929ec2   Paul Menage   Task Control Grou...
2272
2273
2274
2275
2276
2277
2278
  
  	/* If the iterator cg is NULL, we have no tasks */
  	if (!it->cg_link)
  		return NULL;
  	res = list_entry(l, struct task_struct, cg_list);
  	/* Advance iterator to find next entry */
  	l = l->next;
2019f634c   Lai Jiangshan   cgroups: fix cgro...
2279
2280
  	link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
  	if (l == &link->cg->tasks) {
817929ec2   Paul Menage   Task Control Grou...
2281
2282
  		/* We reached the end of this task list - move on to
  		 * the next cg_cgroup_link */
bd89aabc6   Paul Menage   Control groups: R...
2283
  		cgroup_advance_iter(cgrp, it);
817929ec2   Paul Menage   Task Control Grou...
2284
2285
2286
2287
2288
  	} else {
  		it->task = l;
  	}
  	return res;
  }
bd89aabc6   Paul Menage   Control groups: R...
2289
  void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
817929ec2   Paul Menage   Task Control Grou...
2290
2291
2292
  {
  	read_unlock(&css_set_lock);
  }
31a7df01f   Cliff Wickman   cgroups: mechanis...
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
  static inline int started_after_time(struct task_struct *t1,
  				     struct timespec *time,
  				     struct task_struct *t2)
  {
  	int start_diff = timespec_compare(&t1->start_time, time);
  	if (start_diff > 0) {
  		return 1;
  	} else if (start_diff < 0) {
  		return 0;
  	} else {
  		/*
  		 * Arbitrarily, if two processes started at the same
  		 * time, we'll say that the lower pointer value
  		 * started first. Note that t2 may have exited by now
  		 * so this may not be a valid pointer any longer, but
  		 * that's fine - it still serves to distinguish
  		 * between two tasks started (effectively) simultaneously.
  		 */
  		return t1 > t2;
  	}
  }
  
  /*
   * This function is a callback from heap_insert() and is used to order
   * the heap.
   * In this case we order the heap in descending task start time.
   */
  static inline int started_after(void *p1, void *p2)
  {
  	struct task_struct *t1 = p1;
  	struct task_struct *t2 = p2;
  	return started_after_time(t1, &t2->start_time, t2);
  }
  
  /**
   * cgroup_scan_tasks - iterate though all the tasks in a cgroup
   * @scan: struct cgroup_scanner containing arguments for the scan
   *
   * Arguments include pointers to callback functions test_task() and
   * process_task().
   * Iterate through all the tasks in a cgroup, calling test_task() for each,
   * and if it returns true, call process_task() for it also.
   * The test_task pointer may be NULL, meaning always true (select all tasks).
   * Effectively duplicates cgroup_iter_{start,next,end}()
   * but does not lock css_set_lock for the call to process_task().
   * The struct cgroup_scanner may be embedded in any structure of the caller's
   * creation.
   * It is guaranteed that process_task() will act on every task that
   * is a member of the cgroup for the duration of this call. This
   * function may or may not call process_task() for tasks that exit
   * or move to a different cgroup during the call, or are forked or
   * move into the cgroup during the call.
   *
   * Note that test_task() may be called with locks held, and may in some
   * situations be called multiple times for the same task, so it should
   * be cheap.
   * If the heap pointer in the struct cgroup_scanner is non-NULL, a heap has been
   * pre-allocated and will be used for heap operations (and its "gt" member will
   * be overwritten), else a temporary heap will be used (allocation of which
   * may cause this function to fail).
   */
  int cgroup_scan_tasks(struct cgroup_scanner *scan)
  {
  	int retval, i;
  	struct cgroup_iter it;
  	struct task_struct *p, *dropped;
  	/* Never dereference latest_task, since it's not refcounted */
  	struct task_struct *latest_task = NULL;
  	struct ptr_heap tmp_heap;
  	struct ptr_heap *heap;
  	struct timespec latest_time = { 0, 0 };
  
  	if (scan->heap) {
  		/* The caller supplied our heap and pre-allocated its memory */
  		heap = scan->heap;
  		heap->gt = &started_after;
  	} else {
  		/* We need to allocate our own heap memory */
  		heap = &tmp_heap;
  		retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
  		if (retval)
  			/* cannot allocate the heap */
  			return retval;
  	}
  
   again:
  	/*
  	 * Scan tasks in the cgroup, using the scanner's "test_task" callback
  	 * to determine which are of interest, and using the scanner's
  	 * "process_task" callback to process any of them that need an update.
  	 * Since we don't want to hold any locks during the task updates,
  	 * gather tasks to be processed in a heap structure.
  	 * The heap is sorted by descending task start time.
  	 * If the statically-sized heap fills up, we overflow tasks that
  	 * started later, and in future iterations only consider tasks that
  	 * started after the latest task in the previous pass. This
  	 * guarantees forward progress and that we don't miss any tasks.
  	 */
  	heap->size = 0;
  	cgroup_iter_start(scan->cg, &it);
  	while ((p = cgroup_iter_next(scan->cg, &it))) {
  		/*
  		 * Only affect tasks that qualify per the caller's callback,
  		 * if he provided one
  		 */
  		if (scan->test_task && !scan->test_task(p, scan))
  			continue;
  		/*
  		 * Only process tasks that started after the last task
  		 * we processed
  		 */
  		if (!started_after_time(p, &latest_time, latest_task))
  			continue;
  		dropped = heap_insert(heap, p);
  		if (dropped == NULL) {
  			/*
  			 * The new task was inserted; the heap wasn't
  			 * previously full
  			 */
  			get_task_struct(p);
  		} else if (dropped != p) {
  			/*
  			 * The new task was inserted, and pushed out a
  			 * different task
  			 */
  			get_task_struct(p);
  			put_task_struct(dropped);
  		}
  		/*
  		 * Else the new task was newer than anything already in
  		 * the heap and wasn't inserted
  		 */
  	}
  	cgroup_iter_end(scan->cg, &it);
  
  	if (heap->size) {
  		for (i = 0; i < heap->size; i++) {
4fe91d518   Paul Jackson   cgroup: fix spars...
2430
  			struct task_struct *q = heap->ptrs[i];
31a7df01f   Cliff Wickman   cgroups: mechanis...
2431
  			if (i == 0) {
4fe91d518   Paul Jackson   cgroup: fix spars...
2432
2433
  				latest_time = q->start_time;
  				latest_task = q;
31a7df01f   Cliff Wickman   cgroups: mechanis...
2434
2435
  			}
  			/* Process the task per the caller's callback */
4fe91d518   Paul Jackson   cgroup: fix spars...
2436
2437
  			scan->process_task(q, scan);
  			put_task_struct(q);
31a7df01f   Cliff Wickman   cgroups: mechanis...
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
  		}
  		/*
  		 * If we had to process any tasks at all, scan again
  		 * in case some of them were in the middle of forking
  		 * children that didn't get processed.
  		 * Not the most efficient way to do it, but it avoids
  		 * having to take callback_mutex in the fork path
  		 */
  		goto again;
  	}
  	if (heap == &tmp_heap)
  		heap_free(&tmp_heap);
  	return 0;
  }
817929ec2   Paul Menage   Task Control Grou...
2452
  /*
102a775e3   Ben Blum   cgroups: add a re...
2453
   * Stuff for reading the 'tasks'/'procs' files.
bbcb81d09   Paul Menage   Task Control Grou...
2454
2455
2456
2457
2458
2459
   *
   * Reading this file can return large amounts of data if a cgroup has
   * *lots* of attached tasks. So it may need several calls to read(),
   * but we cannot guarantee that the information we produce is correct
   * unless we produce it entirely atomically.
   *
bbcb81d09   Paul Menage   Task Control Grou...
2460
   */
bbcb81d09   Paul Menage   Task Control Grou...
2461
2462
  
  /*
d1d9fd330   Ben Blum   cgroups: use vmal...
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
   * The following two functions "fix" the issue where there are more pids
   * than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
   * TODO: replace with a kernel-wide solution to this problem
   */
  #define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
  static void *pidlist_allocate(int count)
  {
  	if (PIDLIST_TOO_LARGE(count))
  		return vmalloc(count * sizeof(pid_t));
  	else
  		return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
  }
  static void pidlist_free(void *p)
  {
  	if (is_vmalloc_addr(p))
  		vfree(p);
  	else
  		kfree(p);
  }
  static void *pidlist_resize(void *p, int newcount)
  {
  	void *newlist;
  	/* note: if new alloc fails, old p will still be valid either way */
  	if (is_vmalloc_addr(p)) {
  		newlist = vmalloc(newcount * sizeof(pid_t));
  		if (!newlist)
  			return NULL;
  		memcpy(newlist, p, newcount * sizeof(pid_t));
  		vfree(p);
  	} else {
  		newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
  	}
  	return newlist;
  }
  
  /*
102a775e3   Ben Blum   cgroups: add a re...
2499
2500
2501
2502
   * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
   * If the new stripped list is sufficiently smaller and there's enough memory
   * to allocate a new buffer, will let go of the unneeded memory. Returns the
   * number of unique elements.
bbcb81d09   Paul Menage   Task Control Grou...
2503
   */
102a775e3   Ben Blum   cgroups: add a re...
2504
2505
2506
  /* is the size difference enough that we should re-allocate the array? */
  #define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
  static int pidlist_uniq(pid_t **p, int length)
bbcb81d09   Paul Menage   Task Control Grou...
2507
  {
102a775e3   Ben Blum   cgroups: add a re...
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
  	int src, dest = 1;
  	pid_t *list = *p;
  	pid_t *newlist;
  
  	/*
  	 * we presume the 0th element is unique, so i starts at 1. trivial
  	 * edge cases first; no work needs to be done for either
  	 */
  	if (length == 0 || length == 1)
  		return length;
  	/* src and dest walk down the list; dest counts unique elements */
  	for (src = 1; src < length; src++) {
  		/* find next unique element */
  		while (list[src] == list[src-1]) {
  			src++;
  			if (src == length)
  				goto after;
  		}
  		/* dest always points to where the next unique element goes */
  		list[dest] = list[src];
  		dest++;
  	}
  after:
  	/*
  	 * if the length difference is large enough, we want to allocate a
  	 * smaller buffer to save memory. if this fails due to out of memory,
  	 * we'll just stay with what we've got.
  	 */
  	if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
d1d9fd330   Ben Blum   cgroups: use vmal...
2537
  		newlist = pidlist_resize(list, dest);
102a775e3   Ben Blum   cgroups: add a re...
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
  		if (newlist)
  			*p = newlist;
  	}
  	return dest;
  }
  
  static int cmppid(const void *a, const void *b)
  {
  	return *(pid_t *)a - *(pid_t *)b;
  }
  
  /*
72a8cb30d   Ben Blum   cgroups: ensure c...
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
   * find the appropriate pidlist for our purpose (given procs vs tasks)
   * returns with the lock on that pidlist already held, and takes care
   * of the use count, or returns NULL with no locks held if we're out of
   * memory.
   */
  static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
  						  enum cgroup_filetype type)
  {
  	struct cgroup_pidlist *l;
  	/* don't need task_nsproxy() if we're looking at ourself */
b70cc5fdb   Li Zefan   cgroups: clean up...
2560
  	struct pid_namespace *ns = current->nsproxy->pid_ns;
72a8cb30d   Ben Blum   cgroups: ensure c...
2561
2562
2563
2564
2565
2566
2567
2568
2569
  	/*
  	 * We can't drop the pidlist_mutex before taking the l->mutex in case
  	 * the last ref-holder is trying to remove l from the list at the same
  	 * time. Holding the pidlist_mutex precludes somebody taking whichever
  	 * list we find out from under us - compare release_pid_array().
  	 */
  	mutex_lock(&cgrp->pidlist_mutex);
  	list_for_each_entry(l, &cgrp->pidlists, links) {
  		if (l->key.type == type && l->key.ns == ns) {
72a8cb30d   Ben Blum   cgroups: ensure c...
2570
2571
2572
  			/* make sure l doesn't vanish out from under us */
  			down_write(&l->mutex);
  			mutex_unlock(&cgrp->pidlist_mutex);
72a8cb30d   Ben Blum   cgroups: ensure c...
2573
2574
2575
2576
2577
2578
2579
  			return l;
  		}
  	}
  	/* entry not found; create a new one */
  	l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
  	if (!l) {
  		mutex_unlock(&cgrp->pidlist_mutex);
72a8cb30d   Ben Blum   cgroups: ensure c...
2580
2581
2582
2583
2584
  		return l;
  	}
  	init_rwsem(&l->mutex);
  	down_write(&l->mutex);
  	l->key.type = type;
b70cc5fdb   Li Zefan   cgroups: clean up...
2585
  	l->key.ns = get_pid_ns(ns);
72a8cb30d   Ben Blum   cgroups: ensure c...
2586
2587
2588
2589
2590
2591
2592
2593
2594
  	l->use_count = 0; /* don't increment here */
  	l->list = NULL;
  	l->owner = cgrp;
  	list_add(&l->links, &cgrp->pidlists);
  	mutex_unlock(&cgrp->pidlist_mutex);
  	return l;
  }
  
  /*
102a775e3   Ben Blum   cgroups: add a re...
2595
2596
   * Load a cgroup's pidarray with either procs' tgids or tasks' pids
   */
72a8cb30d   Ben Blum   cgroups: ensure c...
2597
2598
  static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
  			      struct cgroup_pidlist **lp)
102a775e3   Ben Blum   cgroups: add a re...
2599
2600
2601
2602
  {
  	pid_t *array;
  	int length;
  	int pid, n = 0; /* used for populating the array */
817929ec2   Paul Menage   Task Control Grou...
2603
2604
  	struct cgroup_iter it;
  	struct task_struct *tsk;
102a775e3   Ben Blum   cgroups: add a re...
2605
2606
2607
2608
2609
2610
2611
2612
2613
  	struct cgroup_pidlist *l;
  
  	/*
  	 * If cgroup gets more users after we read count, we won't have
  	 * enough space - tough.  This race is indistinguishable to the
  	 * caller from the case that the additional cgroup users didn't
  	 * show up until sometime later on.
  	 */
  	length = cgroup_task_count(cgrp);
d1d9fd330   Ben Blum   cgroups: use vmal...
2614
  	array = pidlist_allocate(length);
102a775e3   Ben Blum   cgroups: add a re...
2615
2616
2617
  	if (!array)
  		return -ENOMEM;
  	/* now, populate the array */
bd89aabc6   Paul Menage   Control groups: R...
2618
2619
  	cgroup_iter_start(cgrp, &it);
  	while ((tsk = cgroup_iter_next(cgrp, &it))) {
102a775e3   Ben Blum   cgroups: add a re...
2620
  		if (unlikely(n == length))
817929ec2   Paul Menage   Task Control Grou...
2621
  			break;
102a775e3   Ben Blum   cgroups: add a re...
2622
  		/* get tgid or pid for procs or tasks file respectively */
72a8cb30d   Ben Blum   cgroups: ensure c...
2623
2624
2625
2626
  		if (type == CGROUP_FILE_PROCS)
  			pid = task_tgid_vnr(tsk);
  		else
  			pid = task_pid_vnr(tsk);
102a775e3   Ben Blum   cgroups: add a re...
2627
2628
  		if (pid > 0) /* make sure to only use valid results */
  			array[n++] = pid;
817929ec2   Paul Menage   Task Control Grou...
2629
  	}
bd89aabc6   Paul Menage   Control groups: R...
2630
  	cgroup_iter_end(cgrp, &it);
102a775e3   Ben Blum   cgroups: add a re...
2631
2632
2633
  	length = n;
  	/* now sort & (if procs) strip out duplicates */
  	sort(array, length, sizeof(pid_t), cmppid, NULL);
72a8cb30d   Ben Blum   cgroups: ensure c...
2634
  	if (type == CGROUP_FILE_PROCS)
102a775e3   Ben Blum   cgroups: add a re...
2635
  		length = pidlist_uniq(&array, length);
72a8cb30d   Ben Blum   cgroups: ensure c...
2636
2637
  	l = cgroup_pidlist_find(cgrp, type);
  	if (!l) {
d1d9fd330   Ben Blum   cgroups: use vmal...
2638
  		pidlist_free(array);
72a8cb30d   Ben Blum   cgroups: ensure c...
2639
  		return -ENOMEM;
102a775e3   Ben Blum   cgroups: add a re...
2640
  	}
72a8cb30d   Ben Blum   cgroups: ensure c...
2641
  	/* store array, freeing old if necessary - lock already held */
d1d9fd330   Ben Blum   cgroups: use vmal...
2642
  	pidlist_free(l->list);
102a775e3   Ben Blum   cgroups: add a re...
2643
2644
2645
2646
  	l->list = array;
  	l->length = length;
  	l->use_count++;
  	up_write(&l->mutex);
72a8cb30d   Ben Blum   cgroups: ensure c...
2647
  	*lp = l;
102a775e3   Ben Blum   cgroups: add a re...
2648
  	return 0;
bbcb81d09   Paul Menage   Task Control Grou...
2649
  }
846c7bb05   Balbir Singh   Add cgroupstats
2650
  /**
a043e3b2c   Li Zefan   cgroup: fix comments
2651
   * cgroupstats_build - build and fill cgroupstats
846c7bb05   Balbir Singh   Add cgroupstats
2652
2653
2654
   * @stats: cgroupstats to fill information into
   * @dentry: A dentry entry belonging to the cgroup for which stats have
   * been requested.
a043e3b2c   Li Zefan   cgroup: fix comments
2655
2656
2657
   *
   * Build and fill cgroupstats so that taskstats can export it to user
   * space.
846c7bb05   Balbir Singh   Add cgroupstats
2658
2659
2660
2661
   */
  int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
  {
  	int ret = -EINVAL;
bd89aabc6   Paul Menage   Control groups: R...
2662
  	struct cgroup *cgrp;
846c7bb05   Balbir Singh   Add cgroupstats
2663
2664
  	struct cgroup_iter it;
  	struct task_struct *tsk;
33d283bef   Li Zefan   cgroups: fix a se...
2665

846c7bb05   Balbir Singh   Add cgroupstats
2666
  	/*
33d283bef   Li Zefan   cgroups: fix a se...
2667
2668
  	 * Validate dentry by checking the superblock operations,
  	 * and make sure it's a directory.
846c7bb05   Balbir Singh   Add cgroupstats
2669
  	 */
33d283bef   Li Zefan   cgroups: fix a se...
2670
2671
  	if (dentry->d_sb->s_op != &cgroup_ops ||
  	    !S_ISDIR(dentry->d_inode->i_mode))
846c7bb05   Balbir Singh   Add cgroupstats
2672
2673
2674
  		 goto err;
  
  	ret = 0;
bd89aabc6   Paul Menage   Control groups: R...
2675
  	cgrp = dentry->d_fsdata;
846c7bb05   Balbir Singh   Add cgroupstats
2676

bd89aabc6   Paul Menage   Control groups: R...
2677
2678
  	cgroup_iter_start(cgrp, &it);
  	while ((tsk = cgroup_iter_next(cgrp, &it))) {
846c7bb05   Balbir Singh   Add cgroupstats
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
  		switch (tsk->state) {
  		case TASK_RUNNING:
  			stats->nr_running++;
  			break;
  		case TASK_INTERRUPTIBLE:
  			stats->nr_sleeping++;
  			break;
  		case TASK_UNINTERRUPTIBLE:
  			stats->nr_uninterruptible++;
  			break;
  		case TASK_STOPPED:
  			stats->nr_stopped++;
  			break;
  		default:
  			if (delayacct_is_task_waiting_on_io(tsk))
  				stats->nr_io_wait++;
  			break;
  		}
  	}
bd89aabc6   Paul Menage   Control groups: R...
2698
  	cgroup_iter_end(cgrp, &it);
846c7bb05   Balbir Singh   Add cgroupstats
2699

846c7bb05   Balbir Singh   Add cgroupstats
2700
2701
2702
  err:
  	return ret;
  }
8f3ff2086   Paul Menage   cgroups: revert "...
2703

bbcb81d09   Paul Menage   Task Control Grou...
2704
  /*
102a775e3   Ben Blum   cgroups: add a re...
2705
   * seq_file methods for the tasks/procs files. The seq_file position is the
cc31edcee   Paul Menage   cgroups: convert ...
2706
   * next pid to display; the seq_file iterator is a pointer to the pid
102a775e3   Ben Blum   cgroups: add a re...
2707
   * in the cgroup->l->list array.
bbcb81d09   Paul Menage   Task Control Grou...
2708
   */
cc31edcee   Paul Menage   cgroups: convert ...
2709

102a775e3   Ben Blum   cgroups: add a re...
2710
  static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
bbcb81d09   Paul Menage   Task Control Grou...
2711
  {
cc31edcee   Paul Menage   cgroups: convert ...
2712
2713
2714
2715
2716
2717
  	/*
  	 * Initially we receive a position value that corresponds to
  	 * one more than the last pid shown (or 0 on the first call or
  	 * after a seek to the start). Use a binary-search to find the
  	 * next pid to display, if any
  	 */
102a775e3   Ben Blum   cgroups: add a re...
2718
  	struct cgroup_pidlist *l = s->private;
cc31edcee   Paul Menage   cgroups: convert ...
2719
2720
  	int index = 0, pid = *pos;
  	int *iter;
102a775e3   Ben Blum   cgroups: add a re...
2721
  	down_read(&l->mutex);
cc31edcee   Paul Menage   cgroups: convert ...
2722
  	if (pid) {
102a775e3   Ben Blum   cgroups: add a re...
2723
  		int end = l->length;
207777664   Stephen Rothwell   cgroup: remove un...
2724

cc31edcee   Paul Menage   cgroups: convert ...
2725
2726
  		while (index < end) {
  			int mid = (index + end) / 2;
102a775e3   Ben Blum   cgroups: add a re...
2727
  			if (l->list[mid] == pid) {
cc31edcee   Paul Menage   cgroups: convert ...
2728
2729
  				index = mid;
  				break;
102a775e3   Ben Blum   cgroups: add a re...
2730
  			} else if (l->list[mid] <= pid)
cc31edcee   Paul Menage   cgroups: convert ...
2731
2732
2733
2734
2735
2736
  				index = mid + 1;
  			else
  				end = mid;
  		}
  	}
  	/* If we're off the end of the array, we're done */
102a775e3   Ben Blum   cgroups: add a re...
2737
  	if (index >= l->length)
cc31edcee   Paul Menage   cgroups: convert ...
2738
2739
  		return NULL;
  	/* Update the abstract position to be the actual pid that we found */
102a775e3   Ben Blum   cgroups: add a re...
2740
  	iter = l->list + index;
cc31edcee   Paul Menage   cgroups: convert ...
2741
2742
2743
  	*pos = *iter;
  	return iter;
  }
102a775e3   Ben Blum   cgroups: add a re...
2744
  static void cgroup_pidlist_stop(struct seq_file *s, void *v)
cc31edcee   Paul Menage   cgroups: convert ...
2745
  {
102a775e3   Ben Blum   cgroups: add a re...
2746
2747
  	struct cgroup_pidlist *l = s->private;
  	up_read(&l->mutex);
cc31edcee   Paul Menage   cgroups: convert ...
2748
  }
102a775e3   Ben Blum   cgroups: add a re...
2749
  static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
cc31edcee   Paul Menage   cgroups: convert ...
2750
  {
102a775e3   Ben Blum   cgroups: add a re...
2751
2752
2753
  	struct cgroup_pidlist *l = s->private;
  	pid_t *p = v;
  	pid_t *end = l->list + l->length;
cc31edcee   Paul Menage   cgroups: convert ...
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
  	/*
  	 * Advance to the next pid in the array. If this goes off the
  	 * end, we're done
  	 */
  	p++;
  	if (p >= end) {
  		return NULL;
  	} else {
  		*pos = *p;
  		return p;
  	}
  }
102a775e3   Ben Blum   cgroups: add a re...
2766
  static int cgroup_pidlist_show(struct seq_file *s, void *v)
cc31edcee   Paul Menage   cgroups: convert ...
2767
2768
2769
2770
  {
  	return seq_printf(s, "%d
  ", *(int *)v);
  }
bbcb81d09   Paul Menage   Task Control Grou...
2771

102a775e3   Ben Blum   cgroups: add a re...
2772
2773
2774
2775
2776
2777
2778
2779
2780
  /*
   * seq_operations functions for iterating on pidlists through seq_file -
   * independent of whether it's tasks or procs
   */
  static const struct seq_operations cgroup_pidlist_seq_operations = {
  	.start = cgroup_pidlist_start,
  	.stop = cgroup_pidlist_stop,
  	.next = cgroup_pidlist_next,
  	.show = cgroup_pidlist_show,
cc31edcee   Paul Menage   cgroups: convert ...
2781
  };
102a775e3   Ben Blum   cgroups: add a re...
2782
  static void cgroup_release_pid_array(struct cgroup_pidlist *l)
cc31edcee   Paul Menage   cgroups: convert ...
2783
  {
72a8cb30d   Ben Blum   cgroups: ensure c...
2784
2785
2786
2787
2788
2789
2790
  	/*
  	 * the case where we're the last user of this particular pidlist will
  	 * have us remove it from the cgroup's list, which entails taking the
  	 * mutex. since in pidlist_find the pidlist->lock depends on cgroup->
  	 * pidlist_mutex, we have to take pidlist_mutex first.
  	 */
  	mutex_lock(&l->owner->pidlist_mutex);
102a775e3   Ben Blum   cgroups: add a re...
2791
2792
2793
  	down_write(&l->mutex);
  	BUG_ON(!l->use_count);
  	if (!--l->use_count) {
72a8cb30d   Ben Blum   cgroups: ensure c...
2794
2795
2796
  		/* we're the last user if refcount is 0; remove and free */
  		list_del(&l->links);
  		mutex_unlock(&l->owner->pidlist_mutex);
d1d9fd330   Ben Blum   cgroups: use vmal...
2797
  		pidlist_free(l->list);
72a8cb30d   Ben Blum   cgroups: ensure c...
2798
2799
2800
2801
  		put_pid_ns(l->key.ns);
  		up_write(&l->mutex);
  		kfree(l);
  		return;
cc31edcee   Paul Menage   cgroups: convert ...
2802
  	}
72a8cb30d   Ben Blum   cgroups: ensure c...
2803
  	mutex_unlock(&l->owner->pidlist_mutex);
102a775e3   Ben Blum   cgroups: add a re...
2804
  	up_write(&l->mutex);
bbcb81d09   Paul Menage   Task Control Grou...
2805
  }
102a775e3   Ben Blum   cgroups: add a re...
2806
  static int cgroup_pidlist_release(struct inode *inode, struct file *file)
cc31edcee   Paul Menage   cgroups: convert ...
2807
  {
102a775e3   Ben Blum   cgroups: add a re...
2808
  	struct cgroup_pidlist *l;
cc31edcee   Paul Menage   cgroups: convert ...
2809
2810
  	if (!(file->f_mode & FMODE_READ))
  		return 0;
102a775e3   Ben Blum   cgroups: add a re...
2811
2812
2813
2814
2815
2816
  	/*
  	 * the seq_file will only be initialized if the file was opened for
  	 * reading; hence we check if it's not null only in that case.
  	 */
  	l = ((struct seq_file *)file->private_data)->private;
  	cgroup_release_pid_array(l);
cc31edcee   Paul Menage   cgroups: convert ...
2817
2818
  	return seq_release(inode, file);
  }
102a775e3   Ben Blum   cgroups: add a re...
2819
  static const struct file_operations cgroup_pidlist_operations = {
cc31edcee   Paul Menage   cgroups: convert ...
2820
2821
2822
  	.read = seq_read,
  	.llseek = seq_lseek,
  	.write = cgroup_file_write,
102a775e3   Ben Blum   cgroups: add a re...
2823
  	.release = cgroup_pidlist_release,
cc31edcee   Paul Menage   cgroups: convert ...
2824
  };
bbcb81d09   Paul Menage   Task Control Grou...
2825
  /*
102a775e3   Ben Blum   cgroups: add a re...
2826
2827
2828
   * The following functions handle opens on a file that displays a pidlist
   * (tasks or procs). Prepare an array of the process/thread IDs of whoever's
   * in the cgroup.
bbcb81d09   Paul Menage   Task Control Grou...
2829
   */
102a775e3   Ben Blum   cgroups: add a re...
2830
  /* helper function for the two below it */
72a8cb30d   Ben Blum   cgroups: ensure c...
2831
  static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
bbcb81d09   Paul Menage   Task Control Grou...
2832
  {
bd89aabc6   Paul Menage   Control groups: R...
2833
  	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
72a8cb30d   Ben Blum   cgroups: ensure c...
2834
  	struct cgroup_pidlist *l;
cc31edcee   Paul Menage   cgroups: convert ...
2835
  	int retval;
bbcb81d09   Paul Menage   Task Control Grou...
2836

cc31edcee   Paul Menage   cgroups: convert ...
2837
  	/* Nothing to do for write-only files */
bbcb81d09   Paul Menage   Task Control Grou...
2838
2839
  	if (!(file->f_mode & FMODE_READ))
  		return 0;
102a775e3   Ben Blum   cgroups: add a re...
2840
  	/* have the array populated */
72a8cb30d   Ben Blum   cgroups: ensure c...
2841
  	retval = pidlist_array_load(cgrp, type, &l);
102a775e3   Ben Blum   cgroups: add a re...
2842
2843
2844
2845
  	if (retval)
  		return retval;
  	/* configure file information */
  	file->f_op = &cgroup_pidlist_operations;
cc31edcee   Paul Menage   cgroups: convert ...
2846

102a775e3   Ben Blum   cgroups: add a re...
2847
  	retval = seq_open(file, &cgroup_pidlist_seq_operations);
cc31edcee   Paul Menage   cgroups: convert ...
2848
  	if (retval) {
102a775e3   Ben Blum   cgroups: add a re...
2849
  		cgroup_release_pid_array(l);
cc31edcee   Paul Menage   cgroups: convert ...
2850
  		return retval;
bbcb81d09   Paul Menage   Task Control Grou...
2851
  	}
102a775e3   Ben Blum   cgroups: add a re...
2852
  	((struct seq_file *)file->private_data)->private = l;
bbcb81d09   Paul Menage   Task Control Grou...
2853
2854
  	return 0;
  }
102a775e3   Ben Blum   cgroups: add a re...
2855
2856
  static int cgroup_tasks_open(struct inode *unused, struct file *file)
  {
72a8cb30d   Ben Blum   cgroups: ensure c...
2857
  	return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
102a775e3   Ben Blum   cgroups: add a re...
2858
2859
2860
  }
  static int cgroup_procs_open(struct inode *unused, struct file *file)
  {
72a8cb30d   Ben Blum   cgroups: ensure c...
2861
  	return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
102a775e3   Ben Blum   cgroups: add a re...
2862
  }
bbcb81d09   Paul Menage   Task Control Grou...
2863

bd89aabc6   Paul Menage   Control groups: R...
2864
  static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
81a6a5cdd   Paul Menage   Task Control Grou...
2865
2866
  					    struct cftype *cft)
  {
bd89aabc6   Paul Menage   Control groups: R...
2867
  	return notify_on_release(cgrp);
81a6a5cdd   Paul Menage   Task Control Grou...
2868
  }
6379c1061   Paul Menage   cgroup files: mov...
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
  static int cgroup_write_notify_on_release(struct cgroup *cgrp,
  					  struct cftype *cft,
  					  u64 val)
  {
  	clear_bit(CGRP_RELEASABLE, &cgrp->flags);
  	if (val)
  		set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
  	else
  		clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
  	return 0;
  }
bbcb81d09   Paul Menage   Task Control Grou...
2880
  /*
0dea11687   Kirill A. Shutemov   cgroup: implement...
2881
2882
2883
2884
2885
2886
2887
2888
2889
   * Unregister event and free resources.
   *
   * Gets called from workqueue.
   */
  static void cgroup_event_remove(struct work_struct *work)
  {
  	struct cgroup_event *event = container_of(work, struct cgroup_event,
  			remove);
  	struct cgroup *cgrp = event->cgrp;
0dea11687   Kirill A. Shutemov   cgroup: implement...
2890
2891
2892
  	event->cft->unregister_event(cgrp, event->cft, event->eventfd);
  
  	eventfd_ctx_put(event->eventfd);
0dea11687   Kirill A. Shutemov   cgroup: implement...
2893
  	kfree(event);
a0a4db548   Kirill A. Shutemov   cgroups: remove e...
2894
  	dput(cgrp->dentry);
0dea11687   Kirill A. Shutemov   cgroup: implement...
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
  }
  
  /*
   * Gets called on POLLHUP on eventfd when user closes it.
   *
   * Called with wqh->lock held and interrupts disabled.
   */
  static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
  		int sync, void *key)
  {
  	struct cgroup_event *event = container_of(wait,
  			struct cgroup_event, wait);
  	struct cgroup *cgrp = event->cgrp;
  	unsigned long flags = (unsigned long)key;
  
  	if (flags & POLLHUP) {
a93d2f174   Changli Gao   sched, wait: Use ...
2911
  		__remove_wait_queue(event->wqh, &event->wait);
0dea11687   Kirill A. Shutemov   cgroup: implement...
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
  		spin_lock(&cgrp->event_list_lock);
  		list_del(&event->list);
  		spin_unlock(&cgrp->event_list_lock);
  		/*
  		 * We are in atomic context, but cgroup_event_remove() may
  		 * sleep, so we have to call it in workqueue.
  		 */
  		schedule_work(&event->remove);
  	}
  
  	return 0;
  }
  
  static void cgroup_event_ptable_queue_proc(struct file *file,
  		wait_queue_head_t *wqh, poll_table *pt)
  {
  	struct cgroup_event *event = container_of(pt,
  			struct cgroup_event, pt);
  
  	event->wqh = wqh;
  	add_wait_queue(wqh, &event->wait);
  }
  
  /*
   * Parse input and register new cgroup event handler.
   *
   * Input must be in format '<event_fd> <control_fd> <args>'.
   * Interpretation of args is defined by control file implementation.
   */
  static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
  				      const char *buffer)
  {
  	struct cgroup_event *event = NULL;
  	unsigned int efd, cfd;
  	struct file *efile = NULL;
  	struct file *cfile = NULL;
  	char *endp;
  	int ret;
  
  	efd = simple_strtoul(buffer, &endp, 10);
  	if (*endp != ' ')
  		return -EINVAL;
  	buffer = endp + 1;
  
  	cfd = simple_strtoul(buffer, &endp, 10);
  	if ((*endp != ' ') && (*endp != '\0'))
  		return -EINVAL;
  	buffer = endp + 1;
  
  	event = kzalloc(sizeof(*event), GFP_KERNEL);
  	if (!event)
  		return -ENOMEM;
  	event->cgrp = cgrp;
  	INIT_LIST_HEAD(&event->list);
  	init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
  	init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
  	INIT_WORK(&event->remove, cgroup_event_remove);
  
  	efile = eventfd_fget(efd);
  	if (IS_ERR(efile)) {
  		ret = PTR_ERR(efile);
  		goto fail;
  	}
  
  	event->eventfd = eventfd_ctx_fileget(efile);
  	if (IS_ERR(event->eventfd)) {
  		ret = PTR_ERR(event->eventfd);
  		goto fail;
  	}
  
  	cfile = fget(cfd);
  	if (!cfile) {
  		ret = -EBADF;
  		goto fail;
  	}
  
  	/* the process need read permission on control file */
  	ret = file_permission(cfile, MAY_READ);
  	if (ret < 0)
  		goto fail;
  
  	event->cft = __file_cft(cfile);
  	if (IS_ERR(event->cft)) {
  		ret = PTR_ERR(event->cft);
  		goto fail;
  	}
  
  	if (!event->cft->register_event || !event->cft->unregister_event) {
  		ret = -EINVAL;
  		goto fail;
  	}
  
  	ret = event->cft->register_event(cgrp, event->cft,
  			event->eventfd, buffer);
  	if (ret)
  		goto fail;
  
  	if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
  		event->cft->unregister_event(cgrp, event->cft, event->eventfd);
  		ret = 0;
  		goto fail;
  	}
a0a4db548   Kirill A. Shutemov   cgroups: remove e...
3014
3015
3016
3017
3018
3019
  	/*
  	 * Events should be removed after rmdir of cgroup directory, but before
  	 * destroying subsystem state objects. Let's take reference to cgroup
  	 * directory dentry to do that.
  	 */
  	dget(cgrp->dentry);
0dea11687   Kirill A. Shutemov   cgroup: implement...
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
  	spin_lock(&cgrp->event_list_lock);
  	list_add(&event->list, &cgrp->event_list);
  	spin_unlock(&cgrp->event_list_lock);
  
  	fput(cfile);
  	fput(efile);
  
  	return 0;
  
  fail:
  	if (cfile)
  		fput(cfile);
  
  	if (event && event->eventfd && !IS_ERR(event->eventfd))
  		eventfd_ctx_put(event->eventfd);
  
  	if (!IS_ERR_OR_NULL(efile))
  		fput(efile);
  
  	kfree(event);
  
  	return ret;
  }
  
  /*
bbcb81d09   Paul Menage   Task Control Grou...
3045
3046
   * for the common functions, 'private' gives the type of file
   */
102a775e3   Ben Blum   cgroups: add a re...
3047
3048
  /* for hysterical raisins, we can't put this on the older files */
  #define CGROUP_FILE_GENERIC_PREFIX "cgroup."
81a6a5cdd   Paul Menage   Task Control Grou...
3049
3050
3051
3052
  static struct cftype files[] = {
  	{
  		.name = "tasks",
  		.open = cgroup_tasks_open,
af351026a   Paul Menage   cgroup files: tur...
3053
  		.write_u64 = cgroup_tasks_write,
102a775e3   Ben Blum   cgroups: add a re...
3054
  		.release = cgroup_pidlist_release,
099fca322   Li Zefan   cgroups: show cor...
3055
  		.mode = S_IRUGO | S_IWUSR,
81a6a5cdd   Paul Menage   Task Control Grou...
3056
  	},
102a775e3   Ben Blum   cgroups: add a re...
3057
3058
3059
3060
3061
3062
3063
  	{
  		.name = CGROUP_FILE_GENERIC_PREFIX "procs",
  		.open = cgroup_procs_open,
  		/* .write_u64 = cgroup_procs_write, TODO */
  		.release = cgroup_pidlist_release,
  		.mode = S_IRUGO,
  	},
81a6a5cdd   Paul Menage   Task Control Grou...
3064
3065
  	{
  		.name = "notify_on_release",
f4c753b7e   Paul Menage   CGroup API files:...
3066
  		.read_u64 = cgroup_read_notify_on_release,
6379c1061   Paul Menage   cgroup files: mov...
3067
  		.write_u64 = cgroup_write_notify_on_release,
81a6a5cdd   Paul Menage   Task Control Grou...
3068
  	},
0dea11687   Kirill A. Shutemov   cgroup: implement...
3069
3070
3071
3072
3073
  	{
  		.name = CGROUP_FILE_GENERIC_PREFIX "event_control",
  		.write_string = cgroup_write_event_control,
  		.mode = S_IWUGO,
  	},
81a6a5cdd   Paul Menage   Task Control Grou...
3074
3075
3076
3077
  };
  
  static struct cftype cft_release_agent = {
  	.name = "release_agent",
e788e066c   Paul Menage   cgroup files: mov...
3078
3079
3080
  	.read_seq_string = cgroup_release_agent_show,
  	.write_string = cgroup_release_agent_write,
  	.max_write_len = PATH_MAX,
bbcb81d09   Paul Menage   Task Control Grou...
3081
  };
bd89aabc6   Paul Menage   Control groups: R...
3082
  static int cgroup_populate_dir(struct cgroup *cgrp)
ddbcc7e8e   Paul Menage   Task Control Grou...
3083
3084
3085
3086
3087
  {
  	int err;
  	struct cgroup_subsys *ss;
  
  	/* First clear out any existing files */
bd89aabc6   Paul Menage   Control groups: R...
3088
  	cgroup_clear_directory(cgrp->dentry);
ddbcc7e8e   Paul Menage   Task Control Grou...
3089

bd89aabc6   Paul Menage   Control groups: R...
3090
  	err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
bbcb81d09   Paul Menage   Task Control Grou...
3091
3092
  	if (err < 0)
  		return err;
bd89aabc6   Paul Menage   Control groups: R...
3093
3094
  	if (cgrp == cgrp->top_cgroup) {
  		if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
81a6a5cdd   Paul Menage   Task Control Grou...
3095
3096
  			return err;
  	}
bd89aabc6   Paul Menage   Control groups: R...
3097
3098
  	for_each_subsys(cgrp->root, ss) {
  		if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
ddbcc7e8e   Paul Menage   Task Control Grou...
3099
3100
  			return err;
  	}
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
  	/* This cgroup is ready now */
  	for_each_subsys(cgrp->root, ss) {
  		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
  		/*
  		 * Update id->css pointer and make this css visible from
  		 * CSS ID functions. This pointer will be dereferened
  		 * from RCU-read-side without locks.
  		 */
  		if (css->id)
  			rcu_assign_pointer(css->id->css, css);
  	}
ddbcc7e8e   Paul Menage   Task Control Grou...
3112
3113
3114
3115
3116
3117
  
  	return 0;
  }
  
  static void init_cgroup_css(struct cgroup_subsys_state *css,
  			       struct cgroup_subsys *ss,
bd89aabc6   Paul Menage   Control groups: R...
3118
  			       struct cgroup *cgrp)
ddbcc7e8e   Paul Menage   Task Control Grou...
3119
  {
bd89aabc6   Paul Menage   Control groups: R...
3120
  	css->cgroup = cgrp;
e7c5ec919   Paul Menage   cgroups: add css_...
3121
  	atomic_set(&css->refcnt, 1);
ddbcc7e8e   Paul Menage   Task Control Grou...
3122
  	css->flags = 0;
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
3123
  	css->id = NULL;
bd89aabc6   Paul Menage   Control groups: R...
3124
  	if (cgrp == dummytop)
ddbcc7e8e   Paul Menage   Task Control Grou...
3125
  		set_bit(CSS_ROOT, &css->flags);
bd89aabc6   Paul Menage   Control groups: R...
3126
3127
  	BUG_ON(cgrp->subsys[ss->subsys_id]);
  	cgrp->subsys[ss->subsys_id] = css;
ddbcc7e8e   Paul Menage   Task Control Grou...
3128
  }
999cd8a45   Paul Menage   cgroups: add a pe...
3129
3130
3131
3132
  static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
  {
  	/* We need to take each hierarchy_mutex in a consistent order */
  	int i;
aae8aab40   Ben Blum   cgroups: revamp s...
3133
3134
3135
3136
  	/*
  	 * No worry about a race with rebind_subsystems that might mess up the
  	 * locking order, since both parties are under cgroup_mutex.
  	 */
999cd8a45   Paul Menage   cgroups: add a pe...
3137
3138
  	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
  		struct cgroup_subsys *ss = subsys[i];
aae8aab40   Ben Blum   cgroups: revamp s...
3139
3140
  		if (ss == NULL)
  			continue;
999cd8a45   Paul Menage   cgroups: add a pe...
3141
  		if (ss->root == root)
cfebe563b   Li Zefan   cgroups: fix lock...
3142
  			mutex_lock(&ss->hierarchy_mutex);
999cd8a45   Paul Menage   cgroups: add a pe...
3143
3144
3145
3146
3147
3148
3149
3150
3151
  	}
  }
  
  static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
  {
  	int i;
  
  	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
  		struct cgroup_subsys *ss = subsys[i];
aae8aab40   Ben Blum   cgroups: revamp s...
3152
3153
  		if (ss == NULL)
  			continue;
999cd8a45   Paul Menage   cgroups: add a pe...
3154
3155
3156
3157
  		if (ss->root == root)
  			mutex_unlock(&ss->hierarchy_mutex);
  	}
  }
ddbcc7e8e   Paul Menage   Task Control Grou...
3158
  /*
a043e3b2c   Li Zefan   cgroup: fix comments
3159
3160
3161
3162
   * cgroup_create - create a cgroup
   * @parent: cgroup that will be parent of the new cgroup
   * @dentry: dentry of the new cgroup
   * @mode: mode to set on new inode
ddbcc7e8e   Paul Menage   Task Control Grou...
3163
   *
a043e3b2c   Li Zefan   cgroup: fix comments
3164
   * Must be called with the mutex on the parent inode held
ddbcc7e8e   Paul Menage   Task Control Grou...
3165
   */
ddbcc7e8e   Paul Menage   Task Control Grou...
3166
  static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
099fca322   Li Zefan   cgroups: show cor...
3167
  			     mode_t mode)
ddbcc7e8e   Paul Menage   Task Control Grou...
3168
  {
bd89aabc6   Paul Menage   Control groups: R...
3169
  	struct cgroup *cgrp;
ddbcc7e8e   Paul Menage   Task Control Grou...
3170
3171
3172
3173
  	struct cgroupfs_root *root = parent->root;
  	int err = 0;
  	struct cgroup_subsys *ss;
  	struct super_block *sb = root->sb;
bd89aabc6   Paul Menage   Control groups: R...
3174
3175
  	cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
  	if (!cgrp)
ddbcc7e8e   Paul Menage   Task Control Grou...
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
  		return -ENOMEM;
  
  	/* Grab a reference on the superblock so the hierarchy doesn't
  	 * get deleted on unmount if there are child cgroups.  This
  	 * can be done outside cgroup_mutex, since the sb can't
  	 * disappear while someone has an open control file on the
  	 * fs */
  	atomic_inc(&sb->s_active);
  
  	mutex_lock(&cgroup_mutex);
cc31edcee   Paul Menage   cgroups: convert ...
3186
  	init_cgroup_housekeeping(cgrp);
ddbcc7e8e   Paul Menage   Task Control Grou...
3187

bd89aabc6   Paul Menage   Control groups: R...
3188
3189
3190
  	cgrp->parent = parent;
  	cgrp->root = parent->root;
  	cgrp->top_cgroup = parent->top_cgroup;
ddbcc7e8e   Paul Menage   Task Control Grou...
3191

b6abdb0e6   Li Zefan   cgroup: fix defau...
3192
3193
  	if (notify_on_release(parent))
  		set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
ddbcc7e8e   Paul Menage   Task Control Grou...
3194
  	for_each_subsys(root, ss) {
bd89aabc6   Paul Menage   Control groups: R...
3195
  		struct cgroup_subsys_state *css = ss->create(ss, cgrp);
4528fd059   Li Zefan   cgroups: fix to r...
3196

ddbcc7e8e   Paul Menage   Task Control Grou...
3197
3198
3199
3200
  		if (IS_ERR(css)) {
  			err = PTR_ERR(css);
  			goto err_destroy;
  		}
bd89aabc6   Paul Menage   Control groups: R...
3201
  		init_cgroup_css(css, ss, cgrp);
4528fd059   Li Zefan   cgroups: fix to r...
3202
3203
3204
  		if (ss->use_id) {
  			err = alloc_css_id(ss, parent, cgrp);
  			if (err)
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
3205
  				goto err_destroy;
4528fd059   Li Zefan   cgroups: fix to r...
3206
  		}
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
3207
  		/* At error, ->destroy() callback has to free assigned ID. */
ddbcc7e8e   Paul Menage   Task Control Grou...
3208
  	}
999cd8a45   Paul Menage   cgroups: add a pe...
3209
  	cgroup_lock_hierarchy(root);
bd89aabc6   Paul Menage   Control groups: R...
3210
  	list_add(&cgrp->sibling, &cgrp->parent->children);
999cd8a45   Paul Menage   cgroups: add a pe...
3211
  	cgroup_unlock_hierarchy(root);
ddbcc7e8e   Paul Menage   Task Control Grou...
3212
  	root->number_of_cgroups++;
bd89aabc6   Paul Menage   Control groups: R...
3213
  	err = cgroup_create_dir(cgrp, dentry, mode);
ddbcc7e8e   Paul Menage   Task Control Grou...
3214
3215
3216
3217
  	if (err < 0)
  		goto err_remove;
  
  	/* The cgroup directory was pre-locked for us */
bd89aabc6   Paul Menage   Control groups: R...
3218
  	BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
ddbcc7e8e   Paul Menage   Task Control Grou...
3219

bd89aabc6   Paul Menage   Control groups: R...
3220
  	err = cgroup_populate_dir(cgrp);
ddbcc7e8e   Paul Menage   Task Control Grou...
3221
3222
3223
  	/* If err < 0, we have a half-filled directory - oh well ;) */
  
  	mutex_unlock(&cgroup_mutex);
bd89aabc6   Paul Menage   Control groups: R...
3224
  	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
ddbcc7e8e   Paul Menage   Task Control Grou...
3225
3226
3227
3228
  
  	return 0;
  
   err_remove:
baef99a08   KAMEZAWA Hiroyuki   cgroups: use hier...
3229
  	cgroup_lock_hierarchy(root);
bd89aabc6   Paul Menage   Control groups: R...
3230
  	list_del(&cgrp->sibling);
baef99a08   KAMEZAWA Hiroyuki   cgroups: use hier...
3231
  	cgroup_unlock_hierarchy(root);
ddbcc7e8e   Paul Menage   Task Control Grou...
3232
3233
3234
3235
3236
  	root->number_of_cgroups--;
  
   err_destroy:
  
  	for_each_subsys(root, ss) {
bd89aabc6   Paul Menage   Control groups: R...
3237
3238
  		if (cgrp->subsys[ss->subsys_id])
  			ss->destroy(ss, cgrp);
ddbcc7e8e   Paul Menage   Task Control Grou...
3239
3240
3241
3242
3243
3244
  	}
  
  	mutex_unlock(&cgroup_mutex);
  
  	/* Release the reference count that we took on the superblock */
  	deactivate_super(sb);
bd89aabc6   Paul Menage   Control groups: R...
3245
  	kfree(cgrp);
ddbcc7e8e   Paul Menage   Task Control Grou...
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
  	return err;
  }
  
  static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
  {
  	struct cgroup *c_parent = dentry->d_parent->d_fsdata;
  
  	/* the vfs holds inode->i_mutex already */
  	return cgroup_create(c_parent, dentry, mode | S_IFDIR);
  }
55b6fd016   Li Zefan   cgroup: uninline ...
3256
  static int cgroup_has_css_refs(struct cgroup *cgrp)
81a6a5cdd   Paul Menage   Task Control Grou...
3257
3258
3259
  {
  	/* Check the reference count on each subsystem. Since we
  	 * already established that there are no tasks in the
e7c5ec919   Paul Menage   cgroups: add css_...
3260
  	 * cgroup, if the css refcount is also 1, then there should
81a6a5cdd   Paul Menage   Task Control Grou...
3261
3262
3263
3264
3265
3266
3267
  	 * be no outstanding references, so the subsystem is safe to
  	 * destroy. We scan across all subsystems rather than using
  	 * the per-hierarchy linked list of mounted subsystems since
  	 * we can be called via check_for_release() with no
  	 * synchronization other than RCU, and the subsystem linked
  	 * list isn't RCU-safe */
  	int i;
aae8aab40   Ben Blum   cgroups: revamp s...
3268
3269
3270
3271
3272
  	/*
  	 * We won't need to lock the subsys array, because the subsystems
  	 * we're concerned about aren't going anywhere since our cgroup root
  	 * has a reference on them.
  	 */
81a6a5cdd   Paul Menage   Task Control Grou...
3273
3274
3275
  	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
  		struct cgroup_subsys *ss = subsys[i];
  		struct cgroup_subsys_state *css;
aae8aab40   Ben Blum   cgroups: revamp s...
3276
3277
  		/* Skip subsystems not present or not in this hierarchy */
  		if (ss == NULL || ss->root != cgrp->root)
81a6a5cdd   Paul Menage   Task Control Grou...
3278
  			continue;
bd89aabc6   Paul Menage   Control groups: R...
3279
  		css = cgrp->subsys[ss->subsys_id];
81a6a5cdd   Paul Menage   Task Control Grou...
3280
3281
3282
3283
3284
3285
  		/* When called from check_for_release() it's possible
  		 * that by this point the cgroup has been removed
  		 * and the css deleted. But a false-positive doesn't
  		 * matter, since it can only happen if the cgroup
  		 * has been deleted and hence no longer needs the
  		 * release agent to be called anyway. */
e7c5ec919   Paul Menage   cgroups: add css_...
3286
  		if (css && (atomic_read(&css->refcnt) > 1))
81a6a5cdd   Paul Menage   Task Control Grou...
3287
  			return 1;
81a6a5cdd   Paul Menage   Task Control Grou...
3288
3289
3290
  	}
  	return 0;
  }
e7c5ec919   Paul Menage   cgroups: add css_...
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
  /*
   * Atomically mark all (or else none) of the cgroup's CSS objects as
   * CSS_REMOVED. Return true on success, or false if the cgroup has
   * busy subsystems. Call with cgroup_mutex held
   */
  
  static int cgroup_clear_css_refs(struct cgroup *cgrp)
  {
  	struct cgroup_subsys *ss;
  	unsigned long flags;
  	bool failed = false;
  	local_irq_save(flags);
  	for_each_subsys(cgrp->root, ss) {
  		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
  		int refcnt;
804b3c28a   Paul Menage   cgroups: add cpu_...
3306
  		while (1) {
e7c5ec919   Paul Menage   cgroups: add css_...
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
  			/* We can only remove a CSS with a refcnt==1 */
  			refcnt = atomic_read(&css->refcnt);
  			if (refcnt > 1) {
  				failed = true;
  				goto done;
  			}
  			BUG_ON(!refcnt);
  			/*
  			 * Drop the refcnt to 0 while we check other
  			 * subsystems. This will cause any racing
  			 * css_tryget() to spin until we set the
  			 * CSS_REMOVED bits or abort
  			 */
804b3c28a   Paul Menage   cgroups: add cpu_...
3320
3321
3322
3323
  			if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
  				break;
  			cpu_relax();
  		}
e7c5ec919   Paul Menage   cgroups: add css_...
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
  	}
   done:
  	for_each_subsys(cgrp->root, ss) {
  		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
  		if (failed) {
  			/*
  			 * Restore old refcnt if we previously managed
  			 * to clear it from 1 to 0
  			 */
  			if (!atomic_read(&css->refcnt))
  				atomic_set(&css->refcnt, 1);
  		} else {
  			/* Commit the fact that the CSS is removed */
  			set_bit(CSS_REMOVED, &css->flags);
  		}
  	}
  	local_irq_restore(flags);
  	return !failed;
  }
ddbcc7e8e   Paul Menage   Task Control Grou...
3343
3344
  static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
  {
bd89aabc6   Paul Menage   Control groups: R...
3345
  	struct cgroup *cgrp = dentry->d_fsdata;
ddbcc7e8e   Paul Menage   Task Control Grou...
3346
3347
  	struct dentry *d;
  	struct cgroup *parent;
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
3348
  	DEFINE_WAIT(wait);
4ab78683c   Kirill A. Shutemov   cgroups: fix race...
3349
  	struct cgroup_event *event, *tmp;
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
3350
  	int ret;
ddbcc7e8e   Paul Menage   Task Control Grou...
3351
3352
  
  	/* the vfs holds both inode->i_mutex already */
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
3353
  again:
ddbcc7e8e   Paul Menage   Task Control Grou...
3354
  	mutex_lock(&cgroup_mutex);
bd89aabc6   Paul Menage   Control groups: R...
3355
  	if (atomic_read(&cgrp->count) != 0) {
ddbcc7e8e   Paul Menage   Task Control Grou...
3356
3357
3358
  		mutex_unlock(&cgroup_mutex);
  		return -EBUSY;
  	}
bd89aabc6   Paul Menage   Control groups: R...
3359
  	if (!list_empty(&cgrp->children)) {
ddbcc7e8e   Paul Menage   Task Control Grou...
3360
3361
3362
  		mutex_unlock(&cgroup_mutex);
  		return -EBUSY;
  	}
3fa59dfbc   KAMEZAWA Hiroyuki   cgroup: fix poten...
3363
  	mutex_unlock(&cgroup_mutex);
a043e3b2c   Li Zefan   cgroup: fix comments
3364

4fca88c87   KAMEZAWA Hiroyuki   memory cgroup enh...
3365
  	/*
887032670   KAMEZAWA Hiroyuki   cgroup avoid perm...
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
  	 * In general, subsystem has no css->refcnt after pre_destroy(). But
  	 * in racy cases, subsystem may have to get css->refcnt after
  	 * pre_destroy() and it makes rmdir return with -EBUSY. This sometimes
  	 * make rmdir return -EBUSY too often. To avoid that, we use waitqueue
  	 * for cgroup's rmdir. CGRP_WAIT_ON_RMDIR is for synchronizing rmdir
  	 * and subsystem's reference count handling. Please see css_get/put
  	 * and css_tryget() and cgroup_wakeup_rmdir_waiter() implementation.
  	 */
  	set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
  
  	/*
a043e3b2c   Li Zefan   cgroup: fix comments
3377
3378
  	 * Call pre_destroy handlers of subsys. Notify subsystems
  	 * that rmdir() request comes.
4fca88c87   KAMEZAWA Hiroyuki   memory cgroup enh...
3379
  	 */
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
3380
  	ret = cgroup_call_pre_destroy(cgrp);
887032670   KAMEZAWA Hiroyuki   cgroup avoid perm...
3381
3382
  	if (ret) {
  		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
3383
  		return ret;
887032670   KAMEZAWA Hiroyuki   cgroup avoid perm...
3384
  	}
ddbcc7e8e   Paul Menage   Task Control Grou...
3385

3fa59dfbc   KAMEZAWA Hiroyuki   cgroup: fix poten...
3386
3387
  	mutex_lock(&cgroup_mutex);
  	parent = cgrp->parent;
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
3388
  	if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
887032670   KAMEZAWA Hiroyuki   cgroup avoid perm...
3389
  		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
ddbcc7e8e   Paul Menage   Task Control Grou...
3390
3391
3392
  		mutex_unlock(&cgroup_mutex);
  		return -EBUSY;
  	}
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
3393
  	prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
3394
3395
  	if (!cgroup_clear_css_refs(cgrp)) {
  		mutex_unlock(&cgroup_mutex);
887032670   KAMEZAWA Hiroyuki   cgroup avoid perm...
3396
3397
3398
3399
3400
3401
  		/*
  		 * Because someone may call cgroup_wakeup_rmdir_waiter() before
  		 * prepare_to_wait(), we need to check this flag.
  		 */
  		if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
  			schedule();
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
3402
3403
3404
3405
3406
3407
3408
3409
3410
  		finish_wait(&cgroup_rmdir_waitq, &wait);
  		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
  		if (signal_pending(current))
  			return -EINTR;
  		goto again;
  	}
  	/* NO css_tryget() can success after here. */
  	finish_wait(&cgroup_rmdir_waitq, &wait);
  	clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
ddbcc7e8e   Paul Menage   Task Control Grou...
3411

81a6a5cdd   Paul Menage   Task Control Grou...
3412
  	spin_lock(&release_list_lock);
bd89aabc6   Paul Menage   Control groups: R...
3413
3414
3415
  	set_bit(CGRP_REMOVED, &cgrp->flags);
  	if (!list_empty(&cgrp->release_list))
  		list_del(&cgrp->release_list);
81a6a5cdd   Paul Menage   Task Control Grou...
3416
  	spin_unlock(&release_list_lock);
999cd8a45   Paul Menage   cgroups: add a pe...
3417
3418
3419
  
  	cgroup_lock_hierarchy(cgrp->root);
  	/* delete this cgroup from parent->children */
bd89aabc6   Paul Menage   Control groups: R...
3420
  	list_del(&cgrp->sibling);
999cd8a45   Paul Menage   cgroups: add a pe...
3421
  	cgroup_unlock_hierarchy(cgrp->root);
bd89aabc6   Paul Menage   Control groups: R...
3422
3423
  	spin_lock(&cgrp->dentry->d_lock);
  	d = dget(cgrp->dentry);
ddbcc7e8e   Paul Menage   Task Control Grou...
3424
3425
3426
3427
  	spin_unlock(&d->d_lock);
  
  	cgroup_d_remove_dir(d);
  	dput(d);
ddbcc7e8e   Paul Menage   Task Control Grou...
3428

bd89aabc6   Paul Menage   Control groups: R...
3429
  	set_bit(CGRP_RELEASABLE, &parent->flags);
81a6a5cdd   Paul Menage   Task Control Grou...
3430
  	check_for_release(parent);
4ab78683c   Kirill A. Shutemov   cgroups: fix race...
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
  	/*
  	 * Unregister events and notify userspace.
  	 * Notify userspace about cgroup removing only after rmdir of cgroup
  	 * directory to avoid race between userspace and kernelspace
  	 */
  	spin_lock(&cgrp->event_list_lock);
  	list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
  		list_del(&event->list);
  		remove_wait_queue(event->wqh, &event->wait);
  		eventfd_signal(event->eventfd, 1);
  		schedule_work(&event->remove);
  	}
  	spin_unlock(&cgrp->event_list_lock);
ddbcc7e8e   Paul Menage   Task Control Grou...
3444
  	mutex_unlock(&cgroup_mutex);
ddbcc7e8e   Paul Menage   Task Control Grou...
3445
3446
  	return 0;
  }
06a119204   Li Zefan   cgroup: annotate ...
3447
  static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
ddbcc7e8e   Paul Menage   Task Control Grou...
3448
  {
ddbcc7e8e   Paul Menage   Task Control Grou...
3449
  	struct cgroup_subsys_state *css;
cfe36bde5   Diego Calleja   Improve cgroup pr...
3450
3451
3452
  
  	printk(KERN_INFO "Initializing cgroup subsys %s
  ", ss->name);
ddbcc7e8e   Paul Menage   Task Control Grou...
3453
3454
  
  	/* Create the top cgroup state for this subsystem */
33a68ac1c   Li Zefan   cgroups: add inac...
3455
  	list_add(&ss->sibling, &rootnode.subsys_list);
ddbcc7e8e   Paul Menage   Task Control Grou...
3456
3457
3458
3459
3460
  	ss->root = &rootnode;
  	css = ss->create(ss, dummytop);
  	/* We don't handle early failures gracefully */
  	BUG_ON(IS_ERR(css));
  	init_cgroup_css(css, ss, dummytop);
e8d55fdeb   Li Zefan   cgroups: simplify...
3461
  	/* Update the init_css_set to contain a subsys
817929ec2   Paul Menage   Task Control Grou...
3462
  	 * pointer to this state - since the subsystem is
e8d55fdeb   Li Zefan   cgroups: simplify...
3463
3464
3465
  	 * newly registered, all tasks and hence the
  	 * init_css_set is in the subsystem's top cgroup. */
  	init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
ddbcc7e8e   Paul Menage   Task Control Grou...
3466
3467
  
  	need_forkexit_callback |= ss->fork || ss->exit;
e8d55fdeb   Li Zefan   cgroups: simplify...
3468
3469
3470
3471
  	/* At system boot, before all subsystems have been
  	 * registered, no tasks have been forked, so we don't
  	 * need to invoke fork callbacks here. */
  	BUG_ON(!list_empty(&init_task.tasks));
999cd8a45   Paul Menage   cgroups: add a pe...
3472
  	mutex_init(&ss->hierarchy_mutex);
cfebe563b   Li Zefan   cgroups: fix lock...
3473
  	lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
ddbcc7e8e   Paul Menage   Task Control Grou...
3474
  	ss->active = 1;
e6a1105ba   Ben Blum   cgroups: subsyste...
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
  
  	/* this function shouldn't be used with modular subsystems, since they
  	 * need to register a subsys_id, among other things */
  	BUG_ON(ss->module);
  }
  
  /**
   * cgroup_load_subsys: load and register a modular subsystem at runtime
   * @ss: the subsystem to load
   *
   * This function should be called in a modular subsystem's initcall. If the
883931612   Thomas Weber   Fix typos in comm...
3486
   * subsystem is built as a module, it will be assigned a new subsys_id and set
e6a1105ba   Ben Blum   cgroups: subsyste...
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
   * up for use. If the subsystem is built-in anyway, work is delegated to the
   * simpler cgroup_init_subsys.
   */
  int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
  {
  	int i;
  	struct cgroup_subsys_state *css;
  
  	/* check name and function validity */
  	if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
  	    ss->create == NULL || ss->destroy == NULL)
  		return -EINVAL;
  
  	/*
  	 * we don't support callbacks in modular subsystems. this check is
  	 * before the ss->module check for consistency; a subsystem that could
  	 * be a module should still have no callbacks even if the user isn't
  	 * compiling it as one.
  	 */
  	if (ss->fork || ss->exit)
  		return -EINVAL;
  
  	/*
  	 * an optionally modular subsystem is built-in: we want to do nothing,
  	 * since cgroup_init_subsys will have already taken care of it.
  	 */
  	if (ss->module == NULL) {
  		/* a few sanity checks */
  		BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
  		BUG_ON(subsys[ss->subsys_id] != ss);
  		return 0;
  	}
  
  	/*
  	 * need to register a subsys id before anything else - for example,
  	 * init_cgroup_css needs it.
  	 */
  	mutex_lock(&cgroup_mutex);
  	/* find the first empty slot in the array */
  	for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
  		if (subsys[i] == NULL)
  			break;
  	}
  	if (i == CGROUP_SUBSYS_COUNT) {
  		/* maximum number of subsystems already registered! */
  		mutex_unlock(&cgroup_mutex);
  		return -EBUSY;
  	}
  	/* assign ourselves the subsys_id */
  	ss->subsys_id = i;
  	subsys[i] = ss;
  
  	/*
  	 * no ss->create seems to need anything important in the ss struct, so
  	 * this can happen first (i.e. before the rootnode attachment).
  	 */
  	css = ss->create(ss, dummytop);
  	if (IS_ERR(css)) {
  		/* failure case - need to deassign the subsys[] slot. */
  		subsys[i] = NULL;
  		mutex_unlock(&cgroup_mutex);
  		return PTR_ERR(css);
  	}
  
  	list_add(&ss->sibling, &rootnode.subsys_list);
  	ss->root = &rootnode;
  
  	/* our new subsystem will be attached to the dummy hierarchy. */
  	init_cgroup_css(css, ss, dummytop);
  	/* init_idr must be after init_cgroup_css because it sets css->id. */
  	if (ss->use_id) {
  		int ret = cgroup_init_idr(ss, css);
  		if (ret) {
  			dummytop->subsys[ss->subsys_id] = NULL;
  			ss->destroy(ss, dummytop);
  			subsys[i] = NULL;
  			mutex_unlock(&cgroup_mutex);
  			return ret;
  		}
  	}
  
  	/*
  	 * Now we need to entangle the css into the existing css_sets. unlike
  	 * in cgroup_init_subsys, there are now multiple css_sets, so each one
  	 * will need a new pointer to it; done by iterating the css_set_table.
  	 * furthermore, modifying the existing css_sets will corrupt the hash
  	 * table state, so each changed css_set will need its hash recomputed.
  	 * this is all done under the css_set_lock.
  	 */
  	write_lock(&css_set_lock);
  	for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
  		struct css_set *cg;
  		struct hlist_node *node, *tmp;
  		struct hlist_head *bucket = &css_set_table[i], *new_bucket;
  
  		hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
  			/* skip entries that we already rehashed */
  			if (cg->subsys[ss->subsys_id])
  				continue;
  			/* remove existing entry */
  			hlist_del(&cg->hlist);
  			/* set new value */
  			cg->subsys[ss->subsys_id] = css;
  			/* recompute hash and restore entry */
  			new_bucket = css_set_hash(cg->subsys);
  			hlist_add_head(&cg->hlist, new_bucket);
  		}
  	}
  	write_unlock(&css_set_lock);
  
  	mutex_init(&ss->hierarchy_mutex);
  	lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
  	ss->active = 1;
e6a1105ba   Ben Blum   cgroups: subsyste...
3600
3601
3602
  	/* success! */
  	mutex_unlock(&cgroup_mutex);
  	return 0;
ddbcc7e8e   Paul Menage   Task Control Grou...
3603
  }
e6a1105ba   Ben Blum   cgroups: subsyste...
3604
  EXPORT_SYMBOL_GPL(cgroup_load_subsys);
ddbcc7e8e   Paul Menage   Task Control Grou...
3605
3606
  
  /**
cf5d5941f   Ben Blum   cgroups: subsyste...
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
   * cgroup_unload_subsys: unload a modular subsystem
   * @ss: the subsystem to unload
   *
   * This function should be called in a modular subsystem's exitcall. When this
   * function is invoked, the refcount on the subsystem's module will be 0, so
   * the subsystem will not be attached to any hierarchy.
   */
  void cgroup_unload_subsys(struct cgroup_subsys *ss)
  {
  	struct cg_cgroup_link *link;
  	struct hlist_head *hhead;
  
  	BUG_ON(ss->module == NULL);
  
  	/*
  	 * we shouldn't be called if the subsystem is in use, and the use of
  	 * try_module_get in parse_cgroupfs_options should ensure that it
  	 * doesn't start being used while we're killing it off.
  	 */
  	BUG_ON(ss->root != &rootnode);
  
  	mutex_lock(&cgroup_mutex);
  	/* deassign the subsys_id */
  	BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
  	subsys[ss->subsys_id] = NULL;
  
  	/* remove subsystem from rootnode's list of subsystems */
  	list_del(&ss->sibling);
  
  	/*
  	 * disentangle the css from all css_sets attached to the dummytop. as
  	 * in loading, we need to pay our respects to the hashtable gods.
  	 */
  	write_lock(&css_set_lock);
  	list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
  		struct css_set *cg = link->cg;
  
  		hlist_del(&cg->hlist);
  		BUG_ON(!cg->subsys[ss->subsys_id]);
  		cg->subsys[ss->subsys_id] = NULL;
  		hhead = css_set_hash(cg->subsys);
  		hlist_add_head(&cg->hlist, hhead);
  	}
  	write_unlock(&css_set_lock);
  
  	/*
  	 * remove subsystem's css from the dummytop and free it - need to free
  	 * before marking as null because ss->destroy needs the cgrp->subsys
  	 * pointer to find their state. note that this also takes care of
  	 * freeing the css_id.
  	 */
  	ss->destroy(ss, dummytop);
  	dummytop->subsys[ss->subsys_id] = NULL;
  
  	mutex_unlock(&cgroup_mutex);
  }
  EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
  
  /**
a043e3b2c   Li Zefan   cgroup: fix comments
3666
3667
3668
3669
   * cgroup_init_early - cgroup initialization at system boot
   *
   * Initialize cgroups at system boot, and initialize any
   * subsystems that request early init.
ddbcc7e8e   Paul Menage   Task Control Grou...
3670
3671
3672
3673
   */
  int __init cgroup_init_early(void)
  {
  	int i;
146aa1bd0   Lai Jiangshan   cgroups: fix prob...
3674
  	atomic_set(&init_css_set.refcount, 1);
817929ec2   Paul Menage   Task Control Grou...
3675
3676
  	INIT_LIST_HEAD(&init_css_set.cg_links);
  	INIT_LIST_HEAD(&init_css_set.tasks);
472b1053f   Li Zefan   cgroups: use a ha...
3677
  	INIT_HLIST_NODE(&init_css_set.hlist);
817929ec2   Paul Menage   Task Control Grou...
3678
  	css_set_count = 1;
ddbcc7e8e   Paul Menage   Task Control Grou...
3679
  	init_cgroup_root(&rootnode);
817929ec2   Paul Menage   Task Control Grou...
3680
3681
3682
3683
  	root_count = 1;
  	init_task.cgroups = &init_css_set;
  
  	init_css_set_link.cg = &init_css_set;
7717f7ba9   Paul Menage   cgroups: add a ba...
3684
  	init_css_set_link.cgrp = dummytop;
bd89aabc6   Paul Menage   Control groups: R...
3685
  	list_add(&init_css_set_link.cgrp_link_list,
817929ec2   Paul Menage   Task Control Grou...
3686
3687
3688
  		 &rootnode.top_cgroup.css_sets);
  	list_add(&init_css_set_link.cg_link_list,
  		 &init_css_set.cg_links);
ddbcc7e8e   Paul Menage   Task Control Grou...
3689

472b1053f   Li Zefan   cgroups: use a ha...
3690
3691
  	for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
  		INIT_HLIST_HEAD(&css_set_table[i]);
aae8aab40   Ben Blum   cgroups: revamp s...
3692
3693
  	/* at bootup time, we don't worry about modular subsystems */
  	for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
ddbcc7e8e   Paul Menage   Task Control Grou...
3694
3695
3696
3697
3698
3699
3700
  		struct cgroup_subsys *ss = subsys[i];
  
  		BUG_ON(!ss->name);
  		BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
  		BUG_ON(!ss->create);
  		BUG_ON(!ss->destroy);
  		if (ss->subsys_id != i) {
cfe36bde5   Diego Calleja   Improve cgroup pr...
3701
3702
  			printk(KERN_ERR "cgroup: Subsys %s id == %d
  ",
ddbcc7e8e   Paul Menage   Task Control Grou...
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
  			       ss->name, ss->subsys_id);
  			BUG();
  		}
  
  		if (ss->early_init)
  			cgroup_init_subsys(ss);
  	}
  	return 0;
  }
  
  /**
a043e3b2c   Li Zefan   cgroup: fix comments
3714
3715
3716
3717
   * cgroup_init - cgroup initialization
   *
   * Register cgroup filesystem and /proc file, and initialize
   * any subsystems that didn't request early init.
ddbcc7e8e   Paul Menage   Task Control Grou...
3718
3719
3720
3721
3722
   */
  int __init cgroup_init(void)
  {
  	int err;
  	int i;
472b1053f   Li Zefan   cgroups: use a ha...
3723
  	struct hlist_head *hhead;
a424316ca   Paul Menage   Task Control Grou...
3724
3725
3726
3727
  
  	err = bdi_init(&cgroup_backing_dev_info);
  	if (err)
  		return err;
ddbcc7e8e   Paul Menage   Task Control Grou...
3728

aae8aab40   Ben Blum   cgroups: revamp s...
3729
3730
  	/* at bootup time, we don't worry about modular subsystems */
  	for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
ddbcc7e8e   Paul Menage   Task Control Grou...
3731
3732
3733
  		struct cgroup_subsys *ss = subsys[i];
  		if (!ss->early_init)
  			cgroup_init_subsys(ss);
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
3734
  		if (ss->use_id)
e6a1105ba   Ben Blum   cgroups: subsyste...
3735
  			cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
ddbcc7e8e   Paul Menage   Task Control Grou...
3736
  	}
472b1053f   Li Zefan   cgroups: use a ha...
3737
3738
3739
  	/* Add init_css_set to the hash table */
  	hhead = css_set_hash(init_css_set.subsys);
  	hlist_add_head(&init_css_set.hlist, hhead);
2c6ab6d20   Paul Menage   cgroups: allow cg...
3740
  	BUG_ON(!init_root_id(&rootnode));
676db4af0   Greg Kroah-Hartman   cgroupfs: create ...
3741
3742
3743
3744
3745
3746
  
  	cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
  	if (!cgroup_kobj) {
  		err = -ENOMEM;
  		goto out;
  	}
ddbcc7e8e   Paul Menage   Task Control Grou...
3747
  	err = register_filesystem(&cgroup_fs_type);
676db4af0   Greg Kroah-Hartman   cgroupfs: create ...
3748
3749
  	if (err < 0) {
  		kobject_put(cgroup_kobj);
ddbcc7e8e   Paul Menage   Task Control Grou...
3750
  		goto out;
676db4af0   Greg Kroah-Hartman   cgroupfs: create ...
3751
  	}
ddbcc7e8e   Paul Menage   Task Control Grou...
3752

46ae220be   Li Zefan   cgroup: switch to...
3753
  	proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
a424316ca   Paul Menage   Task Control Grou...
3754

ddbcc7e8e   Paul Menage   Task Control Grou...
3755
  out:
a424316ca   Paul Menage   Task Control Grou...
3756
3757
  	if (err)
  		bdi_destroy(&cgroup_backing_dev_info);
ddbcc7e8e   Paul Menage   Task Control Grou...
3758
3759
  	return err;
  }
b4f48b636   Paul Menage   Task Control Grou...
3760

a424316ca   Paul Menage   Task Control Grou...
3761
3762
3763
3764
3765
3766
  /*
   * proc_cgroup_show()
   *  - Print task's cgroup paths into seq_file, one line for each hierarchy
   *  - Used for /proc/<pid>/cgroup.
   *  - No need to task_lock(tsk) on this tsk->cgroup reference, as it
   *    doesn't really matter if tsk->cgroup changes after we read it,
956db3ca0   Cliff Wickman   hotplug cpu: move...
3767
   *    and we take cgroup_mutex, keeping cgroup_attach_task() from changing it
a424316ca   Paul Menage   Task Control Grou...
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
   *    anyway.  No need to check that tsk->cgroup != NULL, thanks to
   *    the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks
   *    cgroup to top_cgroup.
   */
  
  /* TODO: Use a proper seq_file iterator */
  static int proc_cgroup_show(struct seq_file *m, void *v)
  {
  	struct pid *pid;
  	struct task_struct *tsk;
  	char *buf;
  	int retval;
  	struct cgroupfs_root *root;
  
  	retval = -ENOMEM;
  	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
  	if (!buf)
  		goto out;
  
  	retval = -ESRCH;
  	pid = m->private;
  	tsk = get_pid_task(pid, PIDTYPE_PID);
  	if (!tsk)
  		goto out_free;
  
  	retval = 0;
  
  	mutex_lock(&cgroup_mutex);
e5f6a8609   Li Zefan   cgroups: make roo...
3796
  	for_each_active_root(root) {
a424316ca   Paul Menage   Task Control Grou...
3797
  		struct cgroup_subsys *ss;
bd89aabc6   Paul Menage   Control groups: R...
3798
  		struct cgroup *cgrp;
a424316ca   Paul Menage   Task Control Grou...
3799
  		int count = 0;
2c6ab6d20   Paul Menage   cgroups: allow cg...
3800
  		seq_printf(m, "%d:", root->hierarchy_id);
a424316ca   Paul Menage   Task Control Grou...
3801
3802
  		for_each_subsys(root, ss)
  			seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
c6d57f331   Paul Menage   cgroups: support ...
3803
3804
3805
  		if (strlen(root->name))
  			seq_printf(m, "%sname=%s", count ? "," : "",
  				   root->name);
a424316ca   Paul Menage   Task Control Grou...
3806
  		seq_putc(m, ':');
7717f7ba9   Paul Menage   cgroups: add a ba...
3807
  		cgrp = task_cgroup_from_root(tsk, root);
bd89aabc6   Paul Menage   Control groups: R...
3808
  		retval = cgroup_path(cgrp, buf, PAGE_SIZE);
a424316ca   Paul Menage   Task Control Grou...
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
  		if (retval < 0)
  			goto out_unlock;
  		seq_puts(m, buf);
  		seq_putc(m, '
  ');
  	}
  
  out_unlock:
  	mutex_unlock(&cgroup_mutex);
  	put_task_struct(tsk);
  out_free:
  	kfree(buf);
  out:
  	return retval;
  }
  
  static int cgroup_open(struct inode *inode, struct file *file)
  {
  	struct pid *pid = PROC_I(inode)->pid;
  	return single_open(file, proc_cgroup_show, pid);
  }
828c09509   Alexey Dobriyan   const: constify r...
3830
  const struct file_operations proc_cgroup_operations = {
a424316ca   Paul Menage   Task Control Grou...
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
  	.open		= cgroup_open,
  	.read		= seq_read,
  	.llseek		= seq_lseek,
  	.release	= single_release,
  };
  
  /* Display information about each subsystem and each hierarchy */
  static int proc_cgroupstats_show(struct seq_file *m, void *v)
  {
  	int i;
a424316ca   Paul Menage   Task Control Grou...
3841

8bab8dded   Paul Menage   cgroups: add cgro...
3842
3843
  	seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled
  ");
aae8aab40   Ben Blum   cgroups: revamp s...
3844
3845
3846
3847
3848
  	/*
  	 * ideally we don't want subsystems moving around while we do this.
  	 * cgroup_mutex is also necessary to guarantee an atomic snapshot of
  	 * subsys/hierarchy state.
  	 */
a424316ca   Paul Menage   Task Control Grou...
3849
  	mutex_lock(&cgroup_mutex);
a424316ca   Paul Menage   Task Control Grou...
3850
3851
  	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
  		struct cgroup_subsys *ss = subsys[i];
aae8aab40   Ben Blum   cgroups: revamp s...
3852
3853
  		if (ss == NULL)
  			continue;
2c6ab6d20   Paul Menage   cgroups: allow cg...
3854
3855
3856
  		seq_printf(m, "%s\t%d\t%d\t%d
  ",
  			   ss->name, ss->root->hierarchy_id,
8bab8dded   Paul Menage   cgroups: add cgro...
3857
  			   ss->root->number_of_cgroups, !ss->disabled);
a424316ca   Paul Menage   Task Control Grou...
3858
3859
3860
3861
3862
3863
3864
  	}
  	mutex_unlock(&cgroup_mutex);
  	return 0;
  }
  
  static int cgroupstats_open(struct inode *inode, struct file *file)
  {
9dce07f1a   Al Viro   NULL noise: fs/*,...
3865
  	return single_open(file, proc_cgroupstats_show, NULL);
a424316ca   Paul Menage   Task Control Grou...
3866
  }
828c09509   Alexey Dobriyan   const: constify r...
3867
  static const struct file_operations proc_cgroupstats_operations = {
a424316ca   Paul Menage   Task Control Grou...
3868
3869
3870
3871
3872
  	.open = cgroupstats_open,
  	.read = seq_read,
  	.llseek = seq_lseek,
  	.release = single_release,
  };
b4f48b636   Paul Menage   Task Control Grou...
3873
3874
  /**
   * cgroup_fork - attach newly forked task to its parents cgroup.
a043e3b2c   Li Zefan   cgroup: fix comments
3875
   * @child: pointer to task_struct of forking parent process.
b4f48b636   Paul Menage   Task Control Grou...
3876
3877
3878
3879
3880
3881
   *
   * Description: A task inherits its parent's cgroup at fork().
   *
   * A pointer to the shared css_set was automatically copied in
   * fork.c by dup_task_struct().  However, we ignore that copy, since
   * it was not made under the protection of RCU or cgroup_mutex, so
956db3ca0   Cliff Wickman   hotplug cpu: move...
3882
   * might no longer be a valid cgroup pointer.  cgroup_attach_task() might
817929ec2   Paul Menage   Task Control Grou...
3883
3884
   * have already changed current->cgroups, allowing the previously
   * referenced cgroup group to be removed and freed.
b4f48b636   Paul Menage   Task Control Grou...
3885
3886
3887
3888
3889
3890
   *
   * At the point that cgroup_fork() is called, 'current' is the parent
   * task, and the passed argument 'child' points to the child task.
   */
  void cgroup_fork(struct task_struct *child)
  {
817929ec2   Paul Menage   Task Control Grou...
3891
3892
3893
3894
3895
  	task_lock(current);
  	child->cgroups = current->cgroups;
  	get_css_set(child->cgroups);
  	task_unlock(current);
  	INIT_LIST_HEAD(&child->cg_list);
b4f48b636   Paul Menage   Task Control Grou...
3896
3897
3898
  }
  
  /**
a043e3b2c   Li Zefan   cgroup: fix comments
3899
3900
3901
3902
3903
3904
   * cgroup_fork_callbacks - run fork callbacks
   * @child: the new task
   *
   * Called on a new task very soon before adding it to the
   * tasklist. No need to take any locks since no-one can
   * be operating on this task.
b4f48b636   Paul Menage   Task Control Grou...
3905
3906
3907
3908
3909
   */
  void cgroup_fork_callbacks(struct task_struct *child)
  {
  	if (need_forkexit_callback) {
  		int i;
aae8aab40   Ben Blum   cgroups: revamp s...
3910
3911
3912
3913
3914
3915
  		/*
  		 * forkexit callbacks are only supported for builtin
  		 * subsystems, and the builtin section of the subsys array is
  		 * immutable, so we don't need to lock the subsys array here.
  		 */
  		for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
b4f48b636   Paul Menage   Task Control Grou...
3916
3917
3918
3919
3920
3921
3922
3923
  			struct cgroup_subsys *ss = subsys[i];
  			if (ss->fork)
  				ss->fork(ss, child);
  		}
  	}
  }
  
  /**
a043e3b2c   Li Zefan   cgroup: fix comments
3924
3925
3926
3927
3928
3929
3930
3931
   * cgroup_post_fork - called on a new task after adding it to the task list
   * @child: the task in question
   *
   * Adds the task to the list running through its css_set if necessary.
   * Has to be after the task is visible on the task list in case we race
   * with the first call to cgroup_iter_start() - to guarantee that the
   * new task ends up on its list.
   */
817929ec2   Paul Menage   Task Control Grou...
3932
3933
3934
3935
  void cgroup_post_fork(struct task_struct *child)
  {
  	if (use_task_css_set_links) {
  		write_lock(&css_set_lock);
b12b533fa   Lai Jiangshan   cgroups: add lock...
3936
  		task_lock(child);
817929ec2   Paul Menage   Task Control Grou...
3937
3938
  		if (list_empty(&child->cg_list))
  			list_add(&child->cg_list, &child->cgroups->tasks);
b12b533fa   Lai Jiangshan   cgroups: add lock...
3939
  		task_unlock(child);
817929ec2   Paul Menage   Task Control Grou...
3940
3941
3942
3943
  		write_unlock(&css_set_lock);
  	}
  }
  /**
b4f48b636   Paul Menage   Task Control Grou...
3944
3945
   * cgroup_exit - detach cgroup from exiting task
   * @tsk: pointer to task_struct of exiting process
a043e3b2c   Li Zefan   cgroup: fix comments
3946
   * @run_callback: run exit callbacks?
b4f48b636   Paul Menage   Task Control Grou...
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
   *
   * Description: Detach cgroup from @tsk and release it.
   *
   * Note that cgroups marked notify_on_release force every task in
   * them to take the global cgroup_mutex mutex when exiting.
   * This could impact scaling on very large systems.  Be reluctant to
   * use notify_on_release cgroups where very high task exit scaling
   * is required on large systems.
   *
   * the_top_cgroup_hack:
   *
   *    Set the exiting tasks cgroup to the root cgroup (top_cgroup).
   *
   *    We call cgroup_exit() while the task is still competent to
   *    handle notify_on_release(), then leave the task attached to the
   *    root cgroup in each hierarchy for the remainder of its exit.
   *
   *    To do this properly, we would increment the reference count on
   *    top_cgroup, and near the very end of the kernel/exit.c do_exit()
   *    code we would add a second cgroup function call, to drop that
   *    reference.  This would just create an unnecessary hot spot on
   *    the top_cgroup reference count, to no avail.
   *
   *    Normally, holding a reference to a cgroup without bumping its
   *    count is unsafe.   The cgroup could go away, or someone could
   *    attach us to a different cgroup, decrementing the count on
   *    the first cgroup that we never incremented.  But in this case,
   *    top_cgroup isn't going away, and either task has PF_EXITING set,
956db3ca0   Cliff Wickman   hotplug cpu: move...
3975
3976
   *    which wards off any cgroup_attach_task() attempts, or task is a failed
   *    fork, never visible to cgroup_attach_task.
b4f48b636   Paul Menage   Task Control Grou...
3977
3978
3979
3980
   */
  void cgroup_exit(struct task_struct *tsk, int run_callbacks)
  {
  	int i;
817929ec2   Paul Menage   Task Control Grou...
3981
  	struct css_set *cg;
b4f48b636   Paul Menage   Task Control Grou...
3982
3983
  
  	if (run_callbacks && need_forkexit_callback) {
aae8aab40   Ben Blum   cgroups: revamp s...
3984
3985
3986
3987
3988
  		/*
  		 * modular subsystems can't use callbacks, so no need to lock
  		 * the subsys array
  		 */
  		for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
b4f48b636   Paul Menage   Task Control Grou...
3989
3990
3991
3992
3993
  			struct cgroup_subsys *ss = subsys[i];
  			if (ss->exit)
  				ss->exit(ss, tsk);
  		}
  	}
817929ec2   Paul Menage   Task Control Grou...
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
  
  	/*
  	 * Unlink from the css_set task list if necessary.
  	 * Optimistically check cg_list before taking
  	 * css_set_lock
  	 */
  	if (!list_empty(&tsk->cg_list)) {
  		write_lock(&css_set_lock);
  		if (!list_empty(&tsk->cg_list))
  			list_del(&tsk->cg_list);
  		write_unlock(&css_set_lock);
  	}
b4f48b636   Paul Menage   Task Control Grou...
4006
4007
  	/* Reassign the task to the init_css_set. */
  	task_lock(tsk);
817929ec2   Paul Menage   Task Control Grou...
4008
4009
  	cg = tsk->cgroups;
  	tsk->cgroups = &init_css_set;
b4f48b636   Paul Menage   Task Control Grou...
4010
  	task_unlock(tsk);
817929ec2   Paul Menage   Task Control Grou...
4011
  	if (cg)
81a6a5cdd   Paul Menage   Task Control Grou...
4012
  		put_css_set_taskexit(cg);
b4f48b636   Paul Menage   Task Control Grou...
4013
  }
697f41610   Paul Menage   Task Control Grou...
4014
4015
  
  /**
a043e3b2c   Li Zefan   cgroup: fix comments
4016
4017
4018
   * cgroup_clone - clone the cgroup the given subsystem is attached to
   * @tsk: the task to be moved
   * @subsys: the given subsystem
e885dcde7   Serge E. Hallyn   cgroup_clone: use...
4019
   * @nodename: the name for the new cgroup
a043e3b2c   Li Zefan   cgroup: fix comments
4020
4021
4022
4023
   *
   * Duplicate the current cgroup in the hierarchy that the given
   * subsystem is attached to, and move this task into the new
   * child.
697f41610   Paul Menage   Task Control Grou...
4024
   */
e885dcde7   Serge E. Hallyn   cgroup_clone: use...
4025
4026
  int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
  							char *nodename)
697f41610   Paul Menage   Task Control Grou...
4027
4028
4029
  {
  	struct dentry *dentry;
  	int ret = 0;
697f41610   Paul Menage   Task Control Grou...
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
  	struct cgroup *parent, *child;
  	struct inode *inode;
  	struct css_set *cg;
  	struct cgroupfs_root *root;
  	struct cgroup_subsys *ss;
  
  	/* We shouldn't be called by an unregistered subsystem */
  	BUG_ON(!subsys->active);
  
  	/* First figure out what hierarchy and cgroup we're dealing
  	 * with, and pin them so we can drop cgroup_mutex */
  	mutex_lock(&cgroup_mutex);
   again:
  	root = subsys->root;
  	if (root == &rootnode) {
697f41610   Paul Menage   Task Control Grou...
4045
4046
4047
  		mutex_unlock(&cgroup_mutex);
  		return 0;
  	}
697f41610   Paul Menage   Task Control Grou...
4048

697f41610   Paul Menage   Task Control Grou...
4049
  	/* Pin the hierarchy */
1404f0656   Li Zefan   cgroups: fix lock...
4050
  	if (!atomic_inc_not_zero(&root->sb->s_active)) {
7b574b7b0   Li Zefan   cgroups: fix a ra...
4051
4052
4053
4054
  		/* We race with the final deactivate_super() */
  		mutex_unlock(&cgroup_mutex);
  		return 0;
  	}
697f41610   Paul Menage   Task Control Grou...
4055

817929ec2   Paul Menage   Task Control Grou...
4056
  	/* Keep the cgroup alive */
1404f0656   Li Zefan   cgroups: fix lock...
4057
4058
4059
  	task_lock(tsk);
  	parent = task_cgroup(tsk, subsys->subsys_id);
  	cg = tsk->cgroups;
817929ec2   Paul Menage   Task Control Grou...
4060
  	get_css_set(cg);
104cbd553   Lai Jiangshan   cgroups: use task...
4061
  	task_unlock(tsk);
1404f0656   Li Zefan   cgroups: fix lock...
4062

697f41610   Paul Menage   Task Control Grou...
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
  	mutex_unlock(&cgroup_mutex);
  
  	/* Now do the VFS work to create a cgroup */
  	inode = parent->dentry->d_inode;
  
  	/* Hold the parent directory mutex across this operation to
  	 * stop anyone else deleting the new cgroup */
  	mutex_lock(&inode->i_mutex);
  	dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
  	if (IS_ERR(dentry)) {
  		printk(KERN_INFO
cfe36bde5   Diego Calleja   Improve cgroup pr...
4074
4075
  		       "cgroup: Couldn't allocate dentry for %s: %ld
  ", nodename,
697f41610   Paul Menage   Task Control Grou...
4076
4077
4078
4079
4080
4081
  		       PTR_ERR(dentry));
  		ret = PTR_ERR(dentry);
  		goto out_release;
  	}
  
  	/* Create the cgroup directory, which also creates the cgroup */
75139b827   Li Zefan   cgroups: remove s...
4082
  	ret = vfs_mkdir(inode, dentry, 0755);
bd89aabc6   Paul Menage   Control groups: R...
4083
  	child = __d_cgrp(dentry);
697f41610   Paul Menage   Task Control Grou...
4084
4085
4086
4087
4088
4089
4090
4091
  	dput(dentry);
  	if (ret) {
  		printk(KERN_INFO
  		       "Failed to create cgroup %s: %d
  ", nodename,
  		       ret);
  		goto out_release;
  	}
697f41610   Paul Menage   Task Control Grou...
4092
4093
4094
4095
4096
4097
4098
4099
  	/* The cgroup now exists. Retake cgroup_mutex and check
  	 * that we're still in the same state that we thought we
  	 * were. */
  	mutex_lock(&cgroup_mutex);
  	if ((root != subsys->root) ||
  	    (parent != task_cgroup(tsk, subsys->subsys_id))) {
  		/* Aargh, we raced ... */
  		mutex_unlock(&inode->i_mutex);
817929ec2   Paul Menage   Task Control Grou...
4100
  		put_css_set(cg);
697f41610   Paul Menage   Task Control Grou...
4101

1404f0656   Li Zefan   cgroups: fix lock...
4102
  		deactivate_super(root->sb);
697f41610   Paul Menage   Task Control Grou...
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
  		/* The cgroup is still accessible in the VFS, but
  		 * we're not going to try to rmdir() it at this
  		 * point. */
  		printk(KERN_INFO
  		       "Race in cgroup_clone() - leaking cgroup %s
  ",
  		       nodename);
  		goto again;
  	}
  
  	/* do any required auto-setup */
  	for_each_subsys(root, ss) {
  		if (ss->post_clone)
  			ss->post_clone(ss, child);
  	}
  
  	/* All seems fine. Finish by moving the task into the new cgroup */
956db3ca0   Cliff Wickman   hotplug cpu: move...
4120
  	ret = cgroup_attach_task(child, tsk);
697f41610   Paul Menage   Task Control Grou...
4121
4122
4123
4124
  	mutex_unlock(&cgroup_mutex);
  
   out_release:
  	mutex_unlock(&inode->i_mutex);
81a6a5cdd   Paul Menage   Task Control Grou...
4125
4126
  
  	mutex_lock(&cgroup_mutex);
817929ec2   Paul Menage   Task Control Grou...
4127
  	put_css_set(cg);
81a6a5cdd   Paul Menage   Task Control Grou...
4128
  	mutex_unlock(&cgroup_mutex);
1404f0656   Li Zefan   cgroups: fix lock...
4129
  	deactivate_super(root->sb);
697f41610   Paul Menage   Task Control Grou...
4130
4131
  	return ret;
  }
a043e3b2c   Li Zefan   cgroup: fix comments
4132
  /**
313e924c0   Grzegorz Nosek   cgroups: relax ns...
4133
   * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp
a043e3b2c   Li Zefan   cgroup: fix comments
4134
   * @cgrp: the cgroup in question
313e924c0   Grzegorz Nosek   cgroups: relax ns...
4135
   * @task: the task in question
a043e3b2c   Li Zefan   cgroup: fix comments
4136
   *
313e924c0   Grzegorz Nosek   cgroups: relax ns...
4137
4138
   * See if @cgrp is a descendant of @task's cgroup in the appropriate
   * hierarchy.
697f41610   Paul Menage   Task Control Grou...
4139
4140
4141
4142
4143
4144
   *
   * If we are sending in dummytop, then presumably we are creating
   * the top cgroup in the subsystem.
   *
   * Called only by the ns (nsproxy) cgroup.
   */
313e924c0   Grzegorz Nosek   cgroups: relax ns...
4145
  int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
697f41610   Paul Menage   Task Control Grou...
4146
4147
4148
  {
  	int ret;
  	struct cgroup *target;
697f41610   Paul Menage   Task Control Grou...
4149

bd89aabc6   Paul Menage   Control groups: R...
4150
  	if (cgrp == dummytop)
697f41610   Paul Menage   Task Control Grou...
4151
  		return 1;
7717f7ba9   Paul Menage   cgroups: add a ba...
4152
  	target = task_cgroup_from_root(task, cgrp->root);
bd89aabc6   Paul Menage   Control groups: R...
4153
4154
4155
  	while (cgrp != target && cgrp!= cgrp->top_cgroup)
  		cgrp = cgrp->parent;
  	ret = (cgrp == target);
697f41610   Paul Menage   Task Control Grou...
4156
4157
  	return ret;
  }
81a6a5cdd   Paul Menage   Task Control Grou...
4158

bd89aabc6   Paul Menage   Control groups: R...
4159
  static void check_for_release(struct cgroup *cgrp)
81a6a5cdd   Paul Menage   Task Control Grou...
4160
4161
4162
  {
  	/* All of these checks rely on RCU to keep the cgroup
  	 * structure alive */
bd89aabc6   Paul Menage   Control groups: R...
4163
4164
  	if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
  	    && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
81a6a5cdd   Paul Menage   Task Control Grou...
4165
4166
4167
4168
4169
  		/* Control Group is currently removeable. If it's not
  		 * already queued for a userspace notification, queue
  		 * it now */
  		int need_schedule_work = 0;
  		spin_lock(&release_list_lock);
bd89aabc6   Paul Menage   Control groups: R...
4170
4171
4172
  		if (!cgroup_is_removed(cgrp) &&
  		    list_empty(&cgrp->release_list)) {
  			list_add(&cgrp->release_list, &release_list);
81a6a5cdd   Paul Menage   Task Control Grou...
4173
4174
4175
4176
4177
4178
4179
  			need_schedule_work = 1;
  		}
  		spin_unlock(&release_list_lock);
  		if (need_schedule_work)
  			schedule_work(&release_agent_work);
  	}
  }
d7b9fff71   Daisuke Nishimura   cgroup: introduce...
4180
4181
  /* Caller must verify that the css is not for root cgroup */
  void __css_put(struct cgroup_subsys_state *css, int count)
81a6a5cdd   Paul Menage   Task Control Grou...
4182
  {
bd89aabc6   Paul Menage   Control groups: R...
4183
  	struct cgroup *cgrp = css->cgroup;
3dece8347   KAMEZAWA Hiroyuki   cgroup: catch bad...
4184
  	int val;
81a6a5cdd   Paul Menage   Task Control Grou...
4185
  	rcu_read_lock();
d7b9fff71   Daisuke Nishimura   cgroup: introduce...
4186
  	val = atomic_sub_return(count, &css->refcnt);
3dece8347   KAMEZAWA Hiroyuki   cgroup: catch bad...
4187
  	if (val == 1) {
ec64f5154   KAMEZAWA Hiroyuki   cgroup: fix frequ...
4188
4189
4190
4191
  		if (notify_on_release(cgrp)) {
  			set_bit(CGRP_RELEASABLE, &cgrp->flags);
  			check_for_release(cgrp);
  		}
887032670   KAMEZAWA Hiroyuki   cgroup avoid perm...
4192
  		cgroup_wakeup_rmdir_waiter(cgrp);
81a6a5cdd   Paul Menage   Task Control Grou...
4193
4194
  	}
  	rcu_read_unlock();
3dece8347   KAMEZAWA Hiroyuki   cgroup: catch bad...
4195
  	WARN_ON_ONCE(val < 1);
81a6a5cdd   Paul Menage   Task Control Grou...
4196
  }
67523c48a   Ben Blum   cgroups: blkio su...
4197
  EXPORT_SYMBOL_GPL(__css_put);
81a6a5cdd   Paul Menage   Task Control Grou...
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
  
  /*
   * Notify userspace when a cgroup is released, by running the
   * configured release agent with the name of the cgroup (path
   * relative to the root of cgroup file system) as the argument.
   *
   * Most likely, this user command will try to rmdir this cgroup.
   *
   * This races with the possibility that some other task will be
   * attached to this cgroup before it is removed, or that some other
   * user task will 'mkdir' a child cgroup of this cgroup.  That's ok.
   * The presumed 'rmdir' will fail quietly if this cgroup is no longer
   * unused, and this cgroup will be reprieved from its death sentence,
   * to continue to serve a useful existence.  Next time it's released,
   * we will get notified again, if it still has 'notify_on_release' set.
   *
   * The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
   * means only wait until the task is successfully execve()'d.  The
   * separate release agent task is forked by call_usermodehelper(),
   * then control in this thread returns here, without waiting for the
   * release agent task.  We don't bother to wait because the caller of
   * this routine has no use for the exit status of the release agent
   * task, so no sense holding our caller up for that.
81a6a5cdd   Paul Menage   Task Control Grou...
4221
   */
81a6a5cdd   Paul Menage   Task Control Grou...
4222
4223
4224
4225
4226
4227
4228
4229
  static void cgroup_release_agent(struct work_struct *work)
  {
  	BUG_ON(work != &release_agent_work);
  	mutex_lock(&cgroup_mutex);
  	spin_lock(&release_list_lock);
  	while (!list_empty(&release_list)) {
  		char *argv[3], *envp[3];
  		int i;
e788e066c   Paul Menage   cgroup files: mov...
4230
  		char *pathbuf = NULL, *agentbuf = NULL;
bd89aabc6   Paul Menage   Control groups: R...
4231
  		struct cgroup *cgrp = list_entry(release_list.next,
81a6a5cdd   Paul Menage   Task Control Grou...
4232
4233
  						    struct cgroup,
  						    release_list);
bd89aabc6   Paul Menage   Control groups: R...
4234
  		list_del_init(&cgrp->release_list);
81a6a5cdd   Paul Menage   Task Control Grou...
4235
4236
  		spin_unlock(&release_list_lock);
  		pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
e788e066c   Paul Menage   cgroup files: mov...
4237
4238
4239
4240
4241
4242
4243
  		if (!pathbuf)
  			goto continue_free;
  		if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
  			goto continue_free;
  		agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
  		if (!agentbuf)
  			goto continue_free;
81a6a5cdd   Paul Menage   Task Control Grou...
4244
4245
  
  		i = 0;
e788e066c   Paul Menage   cgroup files: mov...
4246
4247
  		argv[i++] = agentbuf;
  		argv[i++] = pathbuf;
81a6a5cdd   Paul Menage   Task Control Grou...
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
  		argv[i] = NULL;
  
  		i = 0;
  		/* minimal command environment */
  		envp[i++] = "HOME=/";
  		envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
  		envp[i] = NULL;
  
  		/* Drop the lock while we invoke the usermode helper,
  		 * since the exec could involve hitting disk and hence
  		 * be a slow process */
  		mutex_unlock(&cgroup_mutex);
  		call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
81a6a5cdd   Paul Menage   Task Control Grou...
4261
  		mutex_lock(&cgroup_mutex);
e788e066c   Paul Menage   cgroup files: mov...
4262
4263
4264
   continue_free:
  		kfree(pathbuf);
  		kfree(agentbuf);
81a6a5cdd   Paul Menage   Task Control Grou...
4265
4266
4267
4268
4269
  		spin_lock(&release_list_lock);
  	}
  	spin_unlock(&release_list_lock);
  	mutex_unlock(&cgroup_mutex);
  }
8bab8dded   Paul Menage   cgroups: add cgro...
4270
4271
4272
4273
4274
4275
4276
4277
4278
  
  static int __init cgroup_disable(char *str)
  {
  	int i;
  	char *token;
  
  	while ((token = strsep(&str, ",")) != NULL) {
  		if (!*token)
  			continue;
aae8aab40   Ben Blum   cgroups: revamp s...
4279
4280
4281
4282
4283
  		/*
  		 * cgroup_disable, being at boot time, can't know about module
  		 * subsystems, so we don't worry about them.
  		 */
  		for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
8bab8dded   Paul Menage   cgroups: add cgro...
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
  			struct cgroup_subsys *ss = subsys[i];
  
  			if (!strcmp(token, ss->name)) {
  				ss->disabled = 1;
  				printk(KERN_INFO "Disabling %s control group"
  					" subsystem
  ", ss->name);
  				break;
  			}
  		}
  	}
  	return 1;
  }
  __setup("cgroup_disable=", cgroup_disable);
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
  
  /*
   * Functons for CSS ID.
   */
  
  /*
   *To get ID other than 0, this should be called when !cgroup_is_removed().
   */
  unsigned short css_id(struct cgroup_subsys_state *css)
  {
7f0f15464   KAMEZAWA Hiroyuki   memcg: fix css_id...
4308
4309
4310
4311
4312
4313
4314
4315
4316
  	struct css_id *cssid;
  
  	/*
  	 * This css_id() can return correct value when somone has refcnt
  	 * on this or this is under rcu_read_lock(). Once css->id is allocated,
  	 * it's unchanged until freed.
  	 */
  	cssid = rcu_dereference_check(css->id,
  			rcu_read_lock_held() || atomic_read(&css->refcnt));
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4317
4318
4319
4320
4321
  
  	if (cssid)
  		return cssid->id;
  	return 0;
  }
67523c48a   Ben Blum   cgroups: blkio su...
4322
  EXPORT_SYMBOL_GPL(css_id);
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4323
4324
4325
  
  unsigned short css_depth(struct cgroup_subsys_state *css)
  {
7f0f15464   KAMEZAWA Hiroyuki   memcg: fix css_id...
4326
4327
4328
4329
  	struct css_id *cssid;
  
  	cssid = rcu_dereference_check(css->id,
  			rcu_read_lock_held() || atomic_read(&css->refcnt));
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4330
4331
4332
4333
4334
  
  	if (cssid)
  		return cssid->depth;
  	return 0;
  }
67523c48a   Ben Blum   cgroups: blkio su...
4335
  EXPORT_SYMBOL_GPL(css_depth);
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4336

747388d78   KAMEZAWA Hiroyuki   memcg: fix css_is...
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
  /**
   *  css_is_ancestor - test "root" css is an ancestor of "child"
   * @child: the css to be tested.
   * @root: the css supporsed to be an ancestor of the child.
   *
   * Returns true if "root" is an ancestor of "child" in its hierarchy. Because
   * this function reads css->id, this use rcu_dereference() and rcu_read_lock().
   * But, considering usual usage, the csses should be valid objects after test.
   * Assuming that the caller will do some action to the child if this returns
   * returns true, the caller must take "child";s reference count.
   * If "child" is valid object and this returns true, "root" is valid, too.
   */
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4349
  bool css_is_ancestor(struct cgroup_subsys_state *child,
0b7f569e4   KAMEZAWA Hiroyuki   memcg: fix OOM ki...
4350
  		    const struct cgroup_subsys_state *root)
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4351
  {
747388d78   KAMEZAWA Hiroyuki   memcg: fix css_is...
4352
4353
4354
  	struct css_id *child_id;
  	struct css_id *root_id;
  	bool ret = true;
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4355

747388d78   KAMEZAWA Hiroyuki   memcg: fix css_is...
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
  	rcu_read_lock();
  	child_id  = rcu_dereference(child->id);
  	root_id = rcu_dereference(root->id);
  	if (!child_id
  	    || !root_id
  	    || (child_id->depth < root_id->depth)
  	    || (child_id->stack[root_id->depth] != root_id->id))
  		ret = false;
  	rcu_read_unlock();
  	return ret;
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
  }
  
  static void __free_css_id_cb(struct rcu_head *head)
  {
  	struct css_id *id;
  
  	id = container_of(head, struct css_id, rcu_head);
  	kfree(id);
  }
  
  void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
  {
  	struct css_id *id = css->id;
  	/* When this is called before css_id initialization, id can be NULL */
  	if (!id)
  		return;
  
  	BUG_ON(!ss->use_id);
  
  	rcu_assign_pointer(id->css, NULL);
  	rcu_assign_pointer(css->id, NULL);
  	spin_lock(&ss->id_lock);
  	idr_remove(&ss->idr, id->id);
  	spin_unlock(&ss->id_lock);
  	call_rcu(&id->rcu_head, __free_css_id_cb);
  }
67523c48a   Ben Blum   cgroups: blkio su...
4392
  EXPORT_SYMBOL_GPL(free_css_id);
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
  
  /*
   * This is called by init or create(). Then, calls to this function are
   * always serialized (By cgroup_mutex() at create()).
   */
  
  static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
  {
  	struct css_id *newid;
  	int myid, error, size;
  
  	BUG_ON(!ss->use_id);
  
  	size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
  	newid = kzalloc(size, GFP_KERNEL);
  	if (!newid)
  		return ERR_PTR(-ENOMEM);
  	/* get id */
  	if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
  		error = -ENOMEM;
  		goto err_out;
  	}
  	spin_lock(&ss->id_lock);
  	/* Don't use 0. allocates an ID of 1-65535 */
  	error = idr_get_new_above(&ss->idr, newid, 1, &myid);
  	spin_unlock(&ss->id_lock);
  
  	/* Returns error when there are no free spaces for new ID.*/
  	if (error) {
  		error = -ENOSPC;
  		goto err_out;
  	}
  	if (myid > CSS_ID_MAX)
  		goto remove_idr;
  
  	newid->id = myid;
  	newid->depth = depth;
  	return newid;
  remove_idr:
  	error = -ENOSPC;
  	spin_lock(&ss->id_lock);
  	idr_remove(&ss->idr, myid);
  	spin_unlock(&ss->id_lock);
  err_out:
  	kfree(newid);
  	return ERR_PTR(error);
  
  }
e6a1105ba   Ben Blum   cgroups: subsyste...
4441
4442
  static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
  					    struct cgroup_subsys_state *rootcss)
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4443
4444
  {
  	struct css_id *newid;
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4445
4446
4447
  
  	spin_lock_init(&ss->id_lock);
  	idr_init(&ss->idr);
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
  	newid = get_new_cssid(ss, 0);
  	if (IS_ERR(newid))
  		return PTR_ERR(newid);
  
  	newid->stack[0] = newid->id;
  	newid->css = rootcss;
  	rootcss->id = newid;
  	return 0;
  }
  
  static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
  			struct cgroup *child)
  {
  	int subsys_id, i, depth = 0;
  	struct cgroup_subsys_state *parent_css, *child_css;
fae9c7917   Li Zefan   cgroup: Fix an RC...
4463
  	struct css_id *child_id, *parent_id;
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4464
4465
4466
4467
  
  	subsys_id = ss->subsys_id;
  	parent_css = parent->subsys[subsys_id];
  	child_css = child->subsys[subsys_id];
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4468
  	parent_id = parent_css->id;
94b3dd0f7   Greg Thelen   cgroups: alloc_cs...
4469
  	depth = parent_id->depth + 1;
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
  
  	child_id = get_new_cssid(ss, depth);
  	if (IS_ERR(child_id))
  		return PTR_ERR(child_id);
  
  	for (i = 0; i < depth; i++)
  		child_id->stack[i] = parent_id->stack[i];
  	child_id->stack[depth] = child_id->id;
  	/*
  	 * child_id->css pointer will be set after this cgroup is available
  	 * see cgroup_populate_dir()
  	 */
  	rcu_assign_pointer(child_css->id, child_id);
  
  	return 0;
  }
  
  /**
   * css_lookup - lookup css by id
   * @ss: cgroup subsys to be looked into.
   * @id: the id
   *
   * Returns pointer to cgroup_subsys_state if there is valid one with id.
   * NULL if not. Should be called under rcu_read_lock()
   */
  struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
  {
  	struct css_id *cssid = NULL;
  
  	BUG_ON(!ss->use_id);
  	cssid = idr_find(&ss->idr, id);
  
  	if (unlikely(!cssid))
  		return NULL;
  
  	return rcu_dereference(cssid->css);
  }
67523c48a   Ben Blum   cgroups: blkio su...
4507
  EXPORT_SYMBOL_GPL(css_lookup);
38460b48d   KAMEZAWA Hiroyuki   cgroup: CSS ID su...
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
  
  /**
   * css_get_next - lookup next cgroup under specified hierarchy.
   * @ss: pointer to subsystem
   * @id: current position of iteration.
   * @root: pointer to css. search tree under this.
   * @foundid: position of found object.
   *
   * Search next css under the specified hierarchy of rootid. Calling under
   * rcu_read_lock() is necessary. Returns NULL if it reaches the end.
   */
  struct cgroup_subsys_state *
  css_get_next(struct cgroup_subsys *ss, int id,
  	     struct cgroup_subsys_state *root, int *foundid)
  {
  	struct cgroup_subsys_state *ret = NULL;
  	struct css_id *tmp;
  	int tmpid;
  	int rootid = css_id(root);
  	int depth = css_depth(root);
  
  	if (!rootid)
  		return NULL;
  
  	BUG_ON(!ss->use_id);
  	/* fill start point for scan */
  	tmpid = id;
  	while (1) {
  		/*
  		 * scan next entry from bitmap(tree), tmpid is updated after
  		 * idr_get_next().
  		 */
  		spin_lock(&ss->id_lock);
  		tmp = idr_get_next(&ss->idr, &tmpid);
  		spin_unlock(&ss->id_lock);
  
  		if (!tmp)
  			break;
  		if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
  			ret = rcu_dereference(tmp->css);
  			if (ret) {
  				*foundid = tmpid;
  				break;
  			}
  		}
  		/* continue to scan from next id */
  		tmpid = tmpid + 1;
  	}
  	return ret;
  }
fe6934354   Paul Menage   cgroups: move the...
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
  #ifdef CONFIG_CGROUP_DEBUG
  static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
  						   struct cgroup *cont)
  {
  	struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
  
  	if (!css)
  		return ERR_PTR(-ENOMEM);
  
  	return css;
  }
  
  static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
  {
  	kfree(cont->subsys[debug_subsys_id]);
  }
  
  static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
  {
  	return atomic_read(&cont->count);
  }
  
  static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
  {
  	return cgroup_task_count(cont);
  }
  
  static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
  {
  	return (u64)(unsigned long)current->cgroups;
  }
  
  static u64 current_css_set_refcount_read(struct cgroup *cont,
  					   struct cftype *cft)
  {
  	u64 count;
  
  	rcu_read_lock();
  	count = atomic_read(&current->cgroups->refcount);
  	rcu_read_unlock();
  	return count;
  }
7717f7ba9   Paul Menage   cgroups: add a ba...
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
  static int current_css_set_cg_links_read(struct cgroup *cont,
  					 struct cftype *cft,
  					 struct seq_file *seq)
  {
  	struct cg_cgroup_link *link;
  	struct css_set *cg;
  
  	read_lock(&css_set_lock);
  	rcu_read_lock();
  	cg = rcu_dereference(current->cgroups);
  	list_for_each_entry(link, &cg->cg_links, cg_link_list) {
  		struct cgroup *c = link->cgrp;
  		const char *name;
  
  		if (c->dentry)
  			name = c->dentry->d_name.name;
  		else
  			name = "?";
2c6ab6d20   Paul Menage   cgroups: allow cg...
4618
4619
4620
  		seq_printf(seq, "Root %d group %s
  ",
  			   c->root->hierarchy_id, name);
7717f7ba9   Paul Menage   cgroups: add a ba...
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
  	}
  	rcu_read_unlock();
  	read_unlock(&css_set_lock);
  	return 0;
  }
  
  #define MAX_TASKS_SHOWN_PER_CSS 25
  static int cgroup_css_links_read(struct cgroup *cont,
  				 struct cftype *cft,
  				 struct seq_file *seq)
  {
  	struct cg_cgroup_link *link;
  
  	read_lock(&css_set_lock);
  	list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
  		struct css_set *cg = link->cg;
  		struct task_struct *task;
  		int count = 0;
  		seq_printf(seq, "css_set %p
  ", cg);
  		list_for_each_entry(task, &cg->tasks, cg_list) {
  			if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
  				seq_puts(seq, "  ...
  ");
  				break;
  			} else {
  				seq_printf(seq, "  task %d
  ",
  					   task_pid_vnr(task));
  			}
  		}
  	}
  	read_unlock(&css_set_lock);
  	return 0;
  }
fe6934354   Paul Menage   cgroups: move the...
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
  static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
  {
  	return test_bit(CGRP_RELEASABLE, &cgrp->flags);
  }
  
  static struct cftype debug_files[] =  {
  	{
  		.name = "cgroup_refcount",
  		.read_u64 = cgroup_refcount_read,
  	},
  	{
  		.name = "taskcount",
  		.read_u64 = debug_taskcount_read,
  	},
  
  	{
  		.name = "current_css_set",
  		.read_u64 = current_css_set_read,
  	},
  
  	{
  		.name = "current_css_set_refcount",
  		.read_u64 = current_css_set_refcount_read,
  	},
  
  	{
7717f7ba9   Paul Menage   cgroups: add a ba...
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
  		.name = "current_css_set_cg_links",
  		.read_seq_string = current_css_set_cg_links_read,
  	},
  
  	{
  		.name = "cgroup_css_links",
  		.read_seq_string = cgroup_css_links_read,
  	},
  
  	{
fe6934354   Paul Menage   cgroups: move the...
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
  		.name = "releasable",
  		.read_u64 = releasable_read,
  	},
  };
  
  static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
  {
  	return cgroup_add_files(cont, ss, debug_files,
  				ARRAY_SIZE(debug_files));
  }
  
  struct cgroup_subsys debug_subsys = {
  	.name = "debug",
  	.create = debug_create,
  	.destroy = debug_destroy,
  	.populate = debug_populate,
  	.subsys_id = debug_subsys_id,
  };
  #endif /* CONFIG_CGROUP_DEBUG */