Blame view

kernel/cpuset.c 74.5 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
  /*
   *  kernel/cpuset.c
   *
   *  Processor and Memory placement constraints for sets of tasks.
   *
   *  Copyright (C) 2003 BULL SA.
029190c51   Paul Jackson   cpuset sched_load...
7
   *  Copyright (C) 2004-2007 Silicon Graphics, Inc.
8793d854e   Paul Menage   Task Control Grou...
8
   *  Copyright (C) 2006 Google, Inc
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
11
   *
   *  Portions derived from Patrick Mochel's sysfs code.
   *  sysfs is Copyright (c) 2001-3 Patrick Mochel
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
   *
825a46af5   Paul Jackson   [PATCH] cpuset me...
13
   *  2003-10-10 Written by Simon Derr.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
   *  2003-10-22 Updates by Stephen Hemminger.
825a46af5   Paul Jackson   [PATCH] cpuset me...
15
   *  2004 May-July Rework by Paul Jackson.
8793d854e   Paul Menage   Task Control Grou...
16
   *  2006 Rework by Paul Menage to use generic cgroups
cf417141c   Max Krasnyansky   sched, cpuset: re...
17
18
   *  2008 Rework of the scheduler domains and CPU hotplug handling
   *       by Max Krasnyansky
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
20
21
22
23
   *
   *  This file is subject to the terms and conditions of the GNU General Public
   *  License.  See the file COPYING in the main directory of the Linux
   *  distribution for more details.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
25
26
27
28
29
30
31
32
33
34
35
  #include <linux/cpu.h>
  #include <linux/cpumask.h>
  #include <linux/cpuset.h>
  #include <linux/err.h>
  #include <linux/errno.h>
  #include <linux/file.h>
  #include <linux/fs.h>
  #include <linux/init.h>
  #include <linux/interrupt.h>
  #include <linux/kernel.h>
  #include <linux/kmod.h>
  #include <linux/list.h>
68860ec10   Paul Jackson   [PATCH] cpusets: ...
36
  #include <linux/mempolicy.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37
  #include <linux/mm.h>
f481891fd   Miao Xie   cpuset: update to...
38
  #include <linux/memory.h>
9984de1a5   Paul Gortmaker   kernel: Map most ...
39
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40
41
42
43
  #include <linux/mount.h>
  #include <linux/namei.h>
  #include <linux/pagemap.h>
  #include <linux/proc_fs.h>
6b9c2603c   Paul Jackson   [PATCH] cpuset: u...
44
  #include <linux/rcupdate.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
  #include <linux/sched.h>
  #include <linux/seq_file.h>
22fb52dd7   David Quigley   [PATCH] SELinux: ...
47
  #include <linux/security.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
48
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
49
50
51
52
53
54
55
56
  #include <linux/spinlock.h>
  #include <linux/stat.h>
  #include <linux/string.h>
  #include <linux/time.h>
  #include <linux/backing-dev.h>
  #include <linux/sort.h>
  
  #include <asm/uaccess.h>
60063497a   Arun Sharma   atomic: use <linu...
57
  #include <linux/atomic.h>
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
58
  #include <linux/mutex.h>
956db3ca0   Cliff Wickman   hotplug cpu: move...
59
60
  #include <linux/workqueue.h>
  #include <linux/cgroup.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
61

202f72d5d   Paul Jackson   [PATCH] cpuset: n...
62
  /*
f90d4118b   Miao Xie   cpuset: fix possi...
63
64
65
66
67
68
69
70
   * Workqueue for cpuset related tasks.
   *
   * Using kevent workqueue may cause deadlock when memory_migrate
   * is set. So we create a separate workqueue thread for cpuset.
   */
  static struct workqueue_struct *cpuset_wq;
  
  /*
202f72d5d   Paul Jackson   [PATCH] cpuset: n...
71
72
73
74
   * Tracks how many cpusets are currently defined in system.
   * When there is only one cpuset (the root cpuset) we can
   * short circuit some hooks.
   */
7edc59628   Paul Jackson   [PATCH] cpuset: m...
75
  int number_of_cpusets __read_mostly;
202f72d5d   Paul Jackson   [PATCH] cpuset: n...
76

2df167a30   Paul Menage   cgroups: update c...
77
  /* Forward declare cgroup structures */
8793d854e   Paul Menage   Task Control Grou...
78
79
  struct cgroup_subsys cpuset_subsys;
  struct cpuset;
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
80
81
82
83
84
85
86
87
  /* See "Frequency meter" comments, below. */
  
  struct fmeter {
  	int cnt;		/* unprocessed events count */
  	int val;		/* most recent output value */
  	time_t time;		/* clock (secs) when val computed */
  	spinlock_t lock;	/* guards read or write of above */
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
88
  struct cpuset {
8793d854e   Paul Menage   Task Control Grou...
89
  	struct cgroup_subsys_state css;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
90
  	unsigned long flags;		/* "unsigned long" so bitops work */
300ed6cbb   Li Zefan   cpuset: convert c...
91
  	cpumask_var_t cpus_allowed;	/* CPUs allowed to tasks in cpuset */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
92
  	nodemask_t mems_allowed;	/* Memory Nodes allowed to tasks */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
93
  	struct cpuset *parent;		/* my parent */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
94

3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
95
  	struct fmeter fmeter;		/* memory_pressure filter */
029190c51   Paul Jackson   cpuset sched_load...
96
97
98
  
  	/* partition number for rebuild_sched_domains() */
  	int pn;
956db3ca0   Cliff Wickman   hotplug cpu: move...
99

1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
100
101
  	/* for custom sched domain */
  	int relax_domain_level;
732bee7af   Uwe Kleine-König   fix typos concern...
102
  	/* used for walking a cpuset hierarchy */
956db3ca0   Cliff Wickman   hotplug cpu: move...
103
  	struct list_head stack_list;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
104
  };
8793d854e   Paul Menage   Task Control Grou...
105
106
107
108
109
110
111
112
113
114
115
116
117
  /* Retrieve the cpuset for a cgroup */
  static inline struct cpuset *cgroup_cs(struct cgroup *cont)
  {
  	return container_of(cgroup_subsys_state(cont, cpuset_subsys_id),
  			    struct cpuset, css);
  }
  
  /* Retrieve the cpuset for a task */
  static inline struct cpuset *task_cs(struct task_struct *task)
  {
  	return container_of(task_subsys_state(task, cpuset_subsys_id),
  			    struct cpuset, css);
  }
8793d854e   Paul Menage   Task Control Grou...
118

b246272ec   David Rientjes   cpusets: stall wh...
119
120
121
122
123
124
125
126
127
128
129
  #ifdef CONFIG_NUMA
  static inline bool task_has_mempolicy(struct task_struct *task)
  {
  	return task->mempolicy;
  }
  #else
  static inline bool task_has_mempolicy(struct task_struct *task)
  {
  	return false;
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
130
131
132
133
  /* bits in struct cpuset flags field */
  typedef enum {
  	CS_CPU_EXCLUSIVE,
  	CS_MEM_EXCLUSIVE,
786083667   Paul Menage   Cpuset hardwall f...
134
  	CS_MEM_HARDWALL,
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
135
  	CS_MEMORY_MIGRATE,
029190c51   Paul Jackson   cpuset sched_load...
136
  	CS_SCHED_LOAD_BALANCE,
825a46af5   Paul Jackson   [PATCH] cpuset me...
137
138
  	CS_SPREAD_PAGE,
  	CS_SPREAD_SLAB,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
139
140
141
142
143
  } cpuset_flagbits_t;
  
  /* convenient tests for these bits */
  static inline int is_cpu_exclusive(const struct cpuset *cs)
  {
7b5b9ef0e   Paul Jackson   [PATCH] cpuset cl...
144
  	return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
145
146
147
148
  }
  
  static inline int is_mem_exclusive(const struct cpuset *cs)
  {
7b5b9ef0e   Paul Jackson   [PATCH] cpuset cl...
149
  	return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
150
  }
786083667   Paul Menage   Cpuset hardwall f...
151
152
153
154
  static inline int is_mem_hardwall(const struct cpuset *cs)
  {
  	return test_bit(CS_MEM_HARDWALL, &cs->flags);
  }
029190c51   Paul Jackson   cpuset sched_load...
155
156
157
158
  static inline int is_sched_load_balance(const struct cpuset *cs)
  {
  	return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
  }
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
159
160
  static inline int is_memory_migrate(const struct cpuset *cs)
  {
7b5b9ef0e   Paul Jackson   [PATCH] cpuset cl...
161
  	return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
162
  }
825a46af5   Paul Jackson   [PATCH] cpuset me...
163
164
165
166
167
168
169
170
171
  static inline int is_spread_page(const struct cpuset *cs)
  {
  	return test_bit(CS_SPREAD_PAGE, &cs->flags);
  }
  
  static inline int is_spread_slab(const struct cpuset *cs)
  {
  	return test_bit(CS_SPREAD_SLAB, &cs->flags);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
172
173
  static struct cpuset top_cpuset = {
  	.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
174
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
  /*
2df167a30   Paul Menage   cgroups: update c...
176
177
178
179
180
181
182
   * There are two global mutexes guarding cpuset structures.  The first
   * is the main control groups cgroup_mutex, accessed via
   * cgroup_lock()/cgroup_unlock().  The second is the cpuset-specific
   * callback_mutex, below. They can nest.  It is ok to first take
   * cgroup_mutex, then nest callback_mutex.  We also require taking
   * task_lock() when dereferencing a task's cpuset pointer.  See "The
   * task_lock() exception", at the end of this comment.
053199edf   Paul Jackson   [PATCH] cpusets: ...
183
   *
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
184
   * A task must hold both mutexes to modify cpusets.  If a task
2df167a30   Paul Menage   cgroups: update c...
185
   * holds cgroup_mutex, then it blocks others wanting that mutex,
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
186
   * ensuring that it is the only task able to also acquire callback_mutex
053199edf   Paul Jackson   [PATCH] cpusets: ...
187
188
   * and be able to modify cpusets.  It can perform various checks on
   * the cpuset structure first, knowing nothing will change.  It can
2df167a30   Paul Menage   cgroups: update c...
189
   * also allocate memory while just holding cgroup_mutex.  While it is
053199edf   Paul Jackson   [PATCH] cpusets: ...
190
   * performing these checks, various callback routines can briefly
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
191
192
   * acquire callback_mutex to query cpusets.  Once it is ready to make
   * the changes, it takes callback_mutex, blocking everyone else.
053199edf   Paul Jackson   [PATCH] cpusets: ...
193
194
   *
   * Calls to the kernel memory allocator can not be made while holding
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
195
   * callback_mutex, as that would risk double tripping on callback_mutex
053199edf   Paul Jackson   [PATCH] cpusets: ...
196
197
198
   * from one of the callbacks into the cpuset code from within
   * __alloc_pages().
   *
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
199
   * If a task is only holding callback_mutex, then it has read-only
053199edf   Paul Jackson   [PATCH] cpusets: ...
200
201
   * access to cpusets.
   *
58568d2a8   Miao Xie   cpuset,mm: update...
202
203
204
   * Now, the task_struct fields mems_allowed and mempolicy may be changed
   * by other task, we use alloc_lock in the task_struct fields to protect
   * them.
053199edf   Paul Jackson   [PATCH] cpusets: ...
205
   *
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
206
   * The cpuset_common_file_read() handlers only hold callback_mutex across
053199edf   Paul Jackson   [PATCH] cpusets: ...
207
208
209
   * small pieces of code, such as when reading out possibly multi-word
   * cpumasks and nodemasks.
   *
2df167a30   Paul Menage   cgroups: update c...
210
211
   * Accessing a task's cpuset should be done in accordance with the
   * guidelines for accessing subsystem state in kernel/cgroup.c
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
212
   */
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
213
  static DEFINE_MUTEX(callback_mutex);
4247bdc60   Paul Jackson   [PATCH] cpuset se...
214

cf417141c   Max Krasnyansky   sched, cpuset: re...
215
  /*
75aa19941   David Rientjes   oom: print trigge...
216
217
218
219
220
221
222
223
224
225
226
   * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist
   * buffers.  They are statically allocated to prevent using excess stack
   * when calling cpuset_print_task_mems_allowed().
   */
  #define CPUSET_NAME_LEN		(128)
  #define	CPUSET_NODELIST_LEN	(256)
  static char cpuset_name[CPUSET_NAME_LEN];
  static char cpuset_nodelist[CPUSET_NODELIST_LEN];
  static DEFINE_SPINLOCK(cpuset_buffer_lock);
  
  /*
cf417141c   Max Krasnyansky   sched, cpuset: re...
227
   * This is ugly, but preserves the userspace API for existing cpuset
8793d854e   Paul Menage   Task Control Grou...
228
   * users. If someone tries to mount the "cpuset" filesystem, we
cf417141c   Max Krasnyansky   sched, cpuset: re...
229
230
   * silently switch it to mount "cgroup" instead
   */
f7e835710   Al Viro   convert cgroup an...
231
232
  static struct dentry *cpuset_mount(struct file_system_type *fs_type,
  			 int flags, const char *unused_dev_name, void *data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
  {
8793d854e   Paul Menage   Task Control Grou...
234
  	struct file_system_type *cgroup_fs = get_fs_type("cgroup");
f7e835710   Al Viro   convert cgroup an...
235
  	struct dentry *ret = ERR_PTR(-ENODEV);
8793d854e   Paul Menage   Task Control Grou...
236
237
238
239
  	if (cgroup_fs) {
  		char mountopts[] =
  			"cpuset,noprefix,"
  			"release_agent=/sbin/cpuset_release_agent";
f7e835710   Al Viro   convert cgroup an...
240
241
  		ret = cgroup_fs->mount(cgroup_fs, flags,
  					   unused_dev_name, mountopts);
8793d854e   Paul Menage   Task Control Grou...
242
243
244
  		put_filesystem(cgroup_fs);
  	}
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
245
246
247
248
  }
  
  static struct file_system_type cpuset_fs_type = {
  	.name = "cpuset",
f7e835710   Al Viro   convert cgroup an...
249
  	.mount = cpuset_mount,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
251
  /*
300ed6cbb   Li Zefan   cpuset: convert c...
252
   * Return in pmask the portion of a cpusets's cpus_allowed that
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
253
254
255
256
257
258
259
260
261
   * are online.  If none are online, walk up the cpuset hierarchy
   * until we find one that does have some online cpus.  If we get
   * all the way to the top and still haven't found any online cpus,
   * return cpu_online_map.  Or if passed a NULL cs from an exit'ing
   * task, return cpu_online_map.
   *
   * One way or another, we guarantee to return some non-empty subset
   * of cpu_online_map.
   *
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
262
   * Call with callback_mutex held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
263
   */
6af866af3   Li Zefan   cpuset: remove re...
264
265
  static void guarantee_online_cpus(const struct cpuset *cs,
  				  struct cpumask *pmask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
266
  {
300ed6cbb   Li Zefan   cpuset: convert c...
267
  	while (cs && !cpumask_intersects(cs->cpus_allowed, cpu_online_mask))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
268
269
  		cs = cs->parent;
  	if (cs)
300ed6cbb   Li Zefan   cpuset: convert c...
270
  		cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
271
  	else
300ed6cbb   Li Zefan   cpuset: convert c...
272
273
  		cpumask_copy(pmask, cpu_online_mask);
  	BUG_ON(!cpumask_intersects(pmask, cpu_online_mask));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
274
275
276
277
  }
  
  /*
   * Return in *pmask the portion of a cpusets's mems_allowed that
0e1e7c7a7   Christoph Lameter   Memoryless nodes:...
278
279
280
281
   * are online, with memory.  If none are online with memory, walk
   * up the cpuset hierarchy until we find one that does have some
   * online mems.  If we get all the way to the top and still haven't
   * found any online mems, return node_states[N_HIGH_MEMORY].
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
282
283
   *
   * One way or another, we guarantee to return some non-empty subset
0e1e7c7a7   Christoph Lameter   Memoryless nodes:...
284
   * of node_states[N_HIGH_MEMORY].
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
285
   *
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
286
   * Call with callback_mutex held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
287
288
289
290
   */
  
  static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
  {
0e1e7c7a7   Christoph Lameter   Memoryless nodes:...
291
292
  	while (cs && !nodes_intersects(cs->mems_allowed,
  					node_states[N_HIGH_MEMORY]))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
293
294
  		cs = cs->parent;
  	if (cs)
0e1e7c7a7   Christoph Lameter   Memoryless nodes:...
295
296
  		nodes_and(*pmask, cs->mems_allowed,
  					node_states[N_HIGH_MEMORY]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
297
  	else
0e1e7c7a7   Christoph Lameter   Memoryless nodes:...
298
299
  		*pmask = node_states[N_HIGH_MEMORY];
  	BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
300
  }
f3b39d47e   Miao Xie   cpusets: restruct...
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
  /*
   * update task's spread flag if cpuset's page/slab spread flag is set
   *
   * Called with callback_mutex/cgroup_mutex held
   */
  static void cpuset_update_task_spread_flag(struct cpuset *cs,
  					struct task_struct *tsk)
  {
  	if (is_spread_page(cs))
  		tsk->flags |= PF_SPREAD_PAGE;
  	else
  		tsk->flags &= ~PF_SPREAD_PAGE;
  	if (is_spread_slab(cs))
  		tsk->flags |= PF_SPREAD_SLAB;
  	else
  		tsk->flags &= ~PF_SPREAD_SLAB;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
318
319
320
321
322
  /*
   * is_cpuset_subset(p, q) - Is cpuset p a subset of cpuset q?
   *
   * One cpuset is a subset of another if all its allowed CPUs and
   * Memory Nodes are a subset of the other, and its exclusive flags
2df167a30   Paul Menage   cgroups: update c...
323
   * are only set if the other's are set.  Call holding cgroup_mutex.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
324
325
326
327
   */
  
  static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
  {
300ed6cbb   Li Zefan   cpuset: convert c...
328
  	return	cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
329
330
331
332
  		nodes_subset(p->mems_allowed, q->mems_allowed) &&
  		is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
  		is_mem_exclusive(p) <= is_mem_exclusive(q);
  }
645fcc9d2   Li Zefan   cpuset: don't all...
333
334
335
336
337
338
  /**
   * alloc_trial_cpuset - allocate a trial cpuset
   * @cs: the cpuset that the trial cpuset duplicates
   */
  static struct cpuset *alloc_trial_cpuset(const struct cpuset *cs)
  {
300ed6cbb   Li Zefan   cpuset: convert c...
339
340
341
342
343
344
345
346
347
348
349
350
351
  	struct cpuset *trial;
  
  	trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
  	if (!trial)
  		return NULL;
  
  	if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) {
  		kfree(trial);
  		return NULL;
  	}
  	cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
  
  	return trial;
645fcc9d2   Li Zefan   cpuset: don't all...
352
353
354
355
356
357
358
359
  }
  
  /**
   * free_trial_cpuset - free the trial cpuset
   * @trial: the trial cpuset to be freed
   */
  static void free_trial_cpuset(struct cpuset *trial)
  {
300ed6cbb   Li Zefan   cpuset: convert c...
360
  	free_cpumask_var(trial->cpus_allowed);
645fcc9d2   Li Zefan   cpuset: don't all...
361
362
  	kfree(trial);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
363
364
365
366
367
368
369
  /*
   * validate_change() - Used to validate that any proposed cpuset change
   *		       follows the structural rules for cpusets.
   *
   * If we replaced the flag and mask values of the current cpuset
   * (cur) with those values in the trial cpuset (trial), would
   * our various subset and exclusive rules still be valid?  Presumes
2df167a30   Paul Menage   cgroups: update c...
370
   * cgroup_mutex held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
371
372
373
374
375
376
377
378
379
380
381
382
383
384
   *
   * 'cur' is the address of an actual, in-use cpuset.  Operations
   * such as list traversal that depend on the actual address of the
   * cpuset in the list must use cur below, not trial.
   *
   * 'trial' is the address of bulk structure copy of cur, with
   * perhaps one or more of the fields cpus_allowed, mems_allowed,
   * or flags changed to new, trial values.
   *
   * Return 0 if valid, -errno if not.
   */
  
  static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
  {
8793d854e   Paul Menage   Task Control Grou...
385
  	struct cgroup *cont;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
386
387
388
  	struct cpuset *c, *par;
  
  	/* Each of our child cpusets must be a subset of us */
8793d854e   Paul Menage   Task Control Grou...
389
390
  	list_for_each_entry(cont, &cur->css.cgroup->children, sibling) {
  		if (!is_cpuset_subset(cgroup_cs(cont), trial))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
391
392
393
394
  			return -EBUSY;
  	}
  
  	/* Remaining checks don't apply to root cpuset */
696040670   Paul Jackson   [PATCH] cpuset: m...
395
  	if (cur == &top_cpuset)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
396
  		return 0;
696040670   Paul Jackson   [PATCH] cpuset: m...
397
  	par = cur->parent;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
398
399
400
  	/* We must be a subset of our parent cpuset */
  	if (!is_cpuset_subset(trial, par))
  		return -EACCES;
2df167a30   Paul Menage   cgroups: update c...
401
402
403
404
  	/*
  	 * If either I or some sibling (!= me) is exclusive, we can't
  	 * overlap
  	 */
8793d854e   Paul Menage   Task Control Grou...
405
406
  	list_for_each_entry(cont, &par->css.cgroup->children, sibling) {
  		c = cgroup_cs(cont);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
407
408
  		if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
  		    c != cur &&
300ed6cbb   Li Zefan   cpuset: convert c...
409
  		    cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
410
411
412
413
414
415
  			return -EINVAL;
  		if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
  		    c != cur &&
  		    nodes_intersects(trial->mems_allowed, c->mems_allowed))
  			return -EINVAL;
  	}
020958b62   Paul Jackson   cpusets: decrusti...
416
417
  	/* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */
  	if (cgroup_task_count(cur->css.cgroup)) {
300ed6cbb   Li Zefan   cpuset: convert c...
418
  		if (cpumask_empty(trial->cpus_allowed) ||
020958b62   Paul Jackson   cpusets: decrusti...
419
420
421
422
  		    nodes_empty(trial->mems_allowed)) {
  			return -ENOSPC;
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
423
424
  	return 0;
  }
db7f47cf4   Paul Menage   cpusets: allow cp...
425
  #ifdef CONFIG_SMP
85d7b9498   Dinakar Guniguntala   [PATCH] Dynamic s...
426
  /*
cf417141c   Max Krasnyansky   sched, cpuset: re...
427
   * Helper routine for generate_sched_domains().
029190c51   Paul Jackson   cpuset sched_load...
428
429
   * Do cpusets a, b have overlapping cpus_allowed masks?
   */
029190c51   Paul Jackson   cpuset sched_load...
430
431
  static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
  {
300ed6cbb   Li Zefan   cpuset: convert c...
432
  	return cpumask_intersects(a->cpus_allowed, b->cpus_allowed);
029190c51   Paul Jackson   cpuset sched_load...
433
  }
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
434
435
436
  static void
  update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
  {
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
437
438
439
440
  	if (dattr->relax_domain_level < c->relax_domain_level)
  		dattr->relax_domain_level = c->relax_domain_level;
  	return;
  }
f5393693e   Lai Jiangshan   cpuset: speed up ...
441
442
443
444
445
446
447
448
449
450
451
452
453
  static void
  update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
  {
  	LIST_HEAD(q);
  
  	list_add(&c->stack_list, &q);
  	while (!list_empty(&q)) {
  		struct cpuset *cp;
  		struct cgroup *cont;
  		struct cpuset *child;
  
  		cp = list_first_entry(&q, struct cpuset, stack_list);
  		list_del(q.next);
300ed6cbb   Li Zefan   cpuset: convert c...
454
  		if (cpumask_empty(cp->cpus_allowed))
f5393693e   Lai Jiangshan   cpuset: speed up ...
455
456
457
458
459
460
461
462
463
464
465
  			continue;
  
  		if (is_sched_load_balance(cp))
  			update_domain_attr(dattr, cp);
  
  		list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
  			child = cgroup_cs(cont);
  			list_add_tail(&child->stack_list, &q);
  		}
  	}
  }
029190c51   Paul Jackson   cpuset sched_load...
466
  /*
cf417141c   Max Krasnyansky   sched, cpuset: re...
467
468
469
470
471
472
473
474
475
   * generate_sched_domains()
   *
   * This function builds a partial partition of the systems CPUs
   * A 'partial partition' is a set of non-overlapping subsets whose
   * union is a subset of that set.
   * The output of this function needs to be passed to kernel/sched.c
   * partition_sched_domains() routine, which will rebuild the scheduler's
   * load balancing domains (sched domains) as specified by that partial
   * partition.
029190c51   Paul Jackson   cpuset sched_load...
476
   *
45ce80fb6   Li Zefan   cgroups: consolid...
477
   * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt
029190c51   Paul Jackson   cpuset sched_load...
478
479
480
481
482
483
484
   * for a background explanation of this.
   *
   * Does not return errors, on the theory that the callers of this
   * routine would rather not worry about failures to rebuild sched
   * domains when operating in the severe memory shortage situations
   * that could cause allocation failures below.
   *
cf417141c   Max Krasnyansky   sched, cpuset: re...
485
   * Must be called with cgroup_lock held.
029190c51   Paul Jackson   cpuset sched_load...
486
487
   *
   * The three key local variables below are:
aeed68242   Li Zefan   cpuset: clean up ...
488
   *    q  - a linked-list queue of cpuset pointers, used to implement a
029190c51   Paul Jackson   cpuset sched_load...
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
   *	   top-down scan of all cpusets.  This scan loads a pointer
   *	   to each cpuset marked is_sched_load_balance into the
   *	   array 'csa'.  For our purposes, rebuilding the schedulers
   *	   sched domains, we can ignore !is_sched_load_balance cpusets.
   *  csa  - (for CpuSet Array) Array of pointers to all the cpusets
   *	   that need to be load balanced, for convenient iterative
   *	   access by the subsequent code that finds the best partition,
   *	   i.e the set of domains (subsets) of CPUs such that the
   *	   cpus_allowed of every cpuset marked is_sched_load_balance
   *	   is a subset of one of these domains, while there are as
   *	   many such domains as possible, each as small as possible.
   * doms  - Conversion of 'csa' to an array of cpumasks, for passing to
   *	   the kernel/sched.c routine partition_sched_domains() in a
   *	   convenient format, that can be easily compared to the prior
   *	   value to determine what partition elements (sched domains)
   *	   were changed (added or removed.)
   *
   * Finding the best partition (set of domains):
   *	The triple nested loops below over i, j, k scan over the
   *	load balanced cpusets (using the array of cpuset pointers in
   *	csa[]) looking for pairs of cpusets that have overlapping
   *	cpus_allowed, but which don't have the same 'pn' partition
   *	number and gives them in the same partition number.  It keeps
   *	looping on the 'restart' label until it can no longer find
   *	any such pairs.
   *
   *	The union of the cpus_allowed masks from the set of
   *	all cpusets having the same 'pn' value then form the one
   *	element of the partition (one sched domain) to be passed to
   *	partition_sched_domains().
   */
acc3f5d7c   Rusty Russell   cpumask: Partitio...
520
  static int generate_sched_domains(cpumask_var_t **domains,
cf417141c   Max Krasnyansky   sched, cpuset: re...
521
  			struct sched_domain_attr **attributes)
029190c51   Paul Jackson   cpuset sched_load...
522
  {
cf417141c   Max Krasnyansky   sched, cpuset: re...
523
  	LIST_HEAD(q);		/* queue of cpusets to be scanned */
029190c51   Paul Jackson   cpuset sched_load...
524
525
526
527
  	struct cpuset *cp;	/* scans q */
  	struct cpuset **csa;	/* array of all cpuset ptrs */
  	int csn;		/* how many cpuset ptrs in csa so far */
  	int i, j, k;		/* indices for partition finding loops */
acc3f5d7c   Rusty Russell   cpumask: Partitio...
528
  	cpumask_var_t *doms;	/* resulting partition; i.e. sched domains */
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
529
  	struct sched_domain_attr *dattr;  /* attributes for custom domains */
1583715dd   Ingo Molnar   sched, cpusets: f...
530
  	int ndoms = 0;		/* number of sched domains in result */
6af866af3   Li Zefan   cpuset: remove re...
531
  	int nslot;		/* next empty doms[] struct cpumask slot */
029190c51   Paul Jackson   cpuset sched_load...
532

029190c51   Paul Jackson   cpuset sched_load...
533
  	doms = NULL;
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
534
  	dattr = NULL;
cf417141c   Max Krasnyansky   sched, cpuset: re...
535
  	csa = NULL;
029190c51   Paul Jackson   cpuset sched_load...
536
537
538
  
  	/* Special case for the 99% of systems with one, full, sched domain */
  	if (is_sched_load_balance(&top_cpuset)) {
acc3f5d7c   Rusty Russell   cpumask: Partitio...
539
540
  		ndoms = 1;
  		doms = alloc_sched_domains(ndoms);
029190c51   Paul Jackson   cpuset sched_load...
541
  		if (!doms)
cf417141c   Max Krasnyansky   sched, cpuset: re...
542
  			goto done;
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
543
544
545
  		dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
  		if (dattr) {
  			*dattr = SD_ATTR_INIT;
93a655755   Li Zefan   cpuset: fix wrong...
546
  			update_domain_attr_tree(dattr, &top_cpuset);
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
547
  		}
acc3f5d7c   Rusty Russell   cpumask: Partitio...
548
  		cpumask_copy(doms[0], top_cpuset.cpus_allowed);
cf417141c   Max Krasnyansky   sched, cpuset: re...
549

cf417141c   Max Krasnyansky   sched, cpuset: re...
550
  		goto done;
029190c51   Paul Jackson   cpuset sched_load...
551
  	}
029190c51   Paul Jackson   cpuset sched_load...
552
553
554
555
  	csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
  	if (!csa)
  		goto done;
  	csn = 0;
aeed68242   Li Zefan   cpuset: clean up ...
556
557
  	list_add(&top_cpuset.stack_list, &q);
  	while (!list_empty(&q)) {
029190c51   Paul Jackson   cpuset sched_load...
558
559
  		struct cgroup *cont;
  		struct cpuset *child;   /* scans child cpusets of cp */
489a5393a   Lai Jiangshan   cpuset: don't pas...
560

aeed68242   Li Zefan   cpuset: clean up ...
561
562
  		cp = list_first_entry(&q, struct cpuset, stack_list);
  		list_del(q.next);
300ed6cbb   Li Zefan   cpuset: convert c...
563
  		if (cpumask_empty(cp->cpus_allowed))
489a5393a   Lai Jiangshan   cpuset: don't pas...
564
  			continue;
f5393693e   Lai Jiangshan   cpuset: speed up ...
565
566
567
568
569
570
571
  		/*
  		 * All child cpusets contain a subset of the parent's cpus, so
  		 * just skip them, and then we call update_domain_attr_tree()
  		 * to calc relax_domain_level of the corresponding sched
  		 * domain.
  		 */
  		if (is_sched_load_balance(cp)) {
029190c51   Paul Jackson   cpuset sched_load...
572
  			csa[csn++] = cp;
f5393693e   Lai Jiangshan   cpuset: speed up ...
573
574
  			continue;
  		}
489a5393a   Lai Jiangshan   cpuset: don't pas...
575

029190c51   Paul Jackson   cpuset sched_load...
576
577
  		list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
  			child = cgroup_cs(cont);
aeed68242   Li Zefan   cpuset: clean up ...
578
  			list_add_tail(&child->stack_list, &q);
029190c51   Paul Jackson   cpuset sched_load...
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
  		}
    	}
  
  	for (i = 0; i < csn; i++)
  		csa[i]->pn = i;
  	ndoms = csn;
  
  restart:
  	/* Find the best partition (set of sched domains) */
  	for (i = 0; i < csn; i++) {
  		struct cpuset *a = csa[i];
  		int apn = a->pn;
  
  		for (j = 0; j < csn; j++) {
  			struct cpuset *b = csa[j];
  			int bpn = b->pn;
  
  			if (apn != bpn && cpusets_overlap(a, b)) {
  				for (k = 0; k < csn; k++) {
  					struct cpuset *c = csa[k];
  
  					if (c->pn == bpn)
  						c->pn = apn;
  				}
  				ndoms--;	/* one less element */
  				goto restart;
  			}
  		}
  	}
cf417141c   Max Krasnyansky   sched, cpuset: re...
608
609
610
611
  	/*
  	 * Now we know how many domains to create.
  	 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
  	 */
acc3f5d7c   Rusty Russell   cpumask: Partitio...
612
  	doms = alloc_sched_domains(ndoms);
700018e0a   Li Zefan   cpuset: fix regre...
613
  	if (!doms)
cf417141c   Max Krasnyansky   sched, cpuset: re...
614
  		goto done;
cf417141c   Max Krasnyansky   sched, cpuset: re...
615
616
617
618
619
  
  	/*
  	 * The rest of the code, including the scheduler, can deal with
  	 * dattr==NULL case. No need to abort if alloc fails.
  	 */
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
620
  	dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL);
029190c51   Paul Jackson   cpuset sched_load...
621
622
623
  
  	for (nslot = 0, i = 0; i < csn; i++) {
  		struct cpuset *a = csa[i];
6af866af3   Li Zefan   cpuset: remove re...
624
  		struct cpumask *dp;
029190c51   Paul Jackson   cpuset sched_load...
625
  		int apn = a->pn;
cf417141c   Max Krasnyansky   sched, cpuset: re...
626
627
628
629
  		if (apn < 0) {
  			/* Skip completed partitions */
  			continue;
  		}
acc3f5d7c   Rusty Russell   cpumask: Partitio...
630
  		dp = doms[nslot];
cf417141c   Max Krasnyansky   sched, cpuset: re...
631
632
633
634
635
636
637
638
639
640
641
  
  		if (nslot == ndoms) {
  			static int warnings = 10;
  			if (warnings) {
  				printk(KERN_WARNING
  				 "rebuild_sched_domains confused:"
  				  " nslot %d, ndoms %d, csn %d, i %d,"
  				  " apn %d
  ",
  				  nslot, ndoms, csn, i, apn);
  				warnings--;
029190c51   Paul Jackson   cpuset sched_load...
642
  			}
cf417141c   Max Krasnyansky   sched, cpuset: re...
643
644
  			continue;
  		}
029190c51   Paul Jackson   cpuset sched_load...
645

6af866af3   Li Zefan   cpuset: remove re...
646
  		cpumask_clear(dp);
cf417141c   Max Krasnyansky   sched, cpuset: re...
647
648
649
650
651
652
  		if (dattr)
  			*(dattr + nslot) = SD_ATTR_INIT;
  		for (j = i; j < csn; j++) {
  			struct cpuset *b = csa[j];
  
  			if (apn == b->pn) {
300ed6cbb   Li Zefan   cpuset: convert c...
653
  				cpumask_or(dp, dp, b->cpus_allowed);
cf417141c   Max Krasnyansky   sched, cpuset: re...
654
655
656
657
658
  				if (dattr)
  					update_domain_attr_tree(dattr + nslot, b);
  
  				/* Done with this partition */
  				b->pn = -1;
029190c51   Paul Jackson   cpuset sched_load...
659
  			}
029190c51   Paul Jackson   cpuset sched_load...
660
  		}
cf417141c   Max Krasnyansky   sched, cpuset: re...
661
  		nslot++;
029190c51   Paul Jackson   cpuset sched_load...
662
663
  	}
  	BUG_ON(nslot != ndoms);
cf417141c   Max Krasnyansky   sched, cpuset: re...
664
665
  done:
  	kfree(csa);
700018e0a   Li Zefan   cpuset: fix regre...
666
667
668
669
670
671
  	/*
  	 * Fallback to the default domain if kmalloc() failed.
  	 * See comments in partition_sched_domains().
  	 */
  	if (doms == NULL)
  		ndoms = 1;
cf417141c   Max Krasnyansky   sched, cpuset: re...
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
  	*domains    = doms;
  	*attributes = dattr;
  	return ndoms;
  }
  
  /*
   * Rebuild scheduler domains.
   *
   * Call with neither cgroup_mutex held nor within get_online_cpus().
   * Takes both cgroup_mutex and get_online_cpus().
   *
   * Cannot be directly called from cpuset code handling changes
   * to the cpuset pseudo-filesystem, because it cannot be called
   * from code that already holds cgroup_mutex.
   */
  static void do_rebuild_sched_domains(struct work_struct *unused)
  {
  	struct sched_domain_attr *attr;
acc3f5d7c   Rusty Russell   cpumask: Partitio...
690
  	cpumask_var_t *doms;
cf417141c   Max Krasnyansky   sched, cpuset: re...
691
  	int ndoms;
86ef5c9a8   Gautham R Shenoy   cpu-hotplug: repl...
692
  	get_online_cpus();
cf417141c   Max Krasnyansky   sched, cpuset: re...
693
694
695
696
697
698
699
700
  
  	/* Generate domain masks and attrs */
  	cgroup_lock();
  	ndoms = generate_sched_domains(&doms, &attr);
  	cgroup_unlock();
  
  	/* Have scheduler rebuild the domains */
  	partition_sched_domains(ndoms, doms, attr);
86ef5c9a8   Gautham R Shenoy   cpu-hotplug: repl...
701
  	put_online_cpus();
cf417141c   Max Krasnyansky   sched, cpuset: re...
702
  }
db7f47cf4   Paul Menage   cpusets: allow cp...
703
704
705
706
  #else /* !CONFIG_SMP */
  static void do_rebuild_sched_domains(struct work_struct *unused)
  {
  }
e1b8090bd   Geert Uytterhoeven   cpumask: Fix gene...
707
  static int generate_sched_domains(cpumask_var_t **domains,
db7f47cf4   Paul Menage   cpusets: allow cp...
708
709
710
711
712
713
  			struct sched_domain_attr **attributes)
  {
  	*domains = NULL;
  	return 1;
  }
  #endif /* CONFIG_SMP */
029190c51   Paul Jackson   cpuset sched_load...
714

cf417141c   Max Krasnyansky   sched, cpuset: re...
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
  static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);
  
  /*
   * Rebuild scheduler domains, asynchronously via workqueue.
   *
   * If the flag 'sched_load_balance' of any cpuset with non-empty
   * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
   * which has that flag enabled, or if any cpuset with a non-empty
   * 'cpus' is removed, then call this routine to rebuild the
   * scheduler's dynamic sched domains.
   *
   * The rebuild_sched_domains() and partition_sched_domains()
   * routines must nest cgroup_lock() inside get_online_cpus(),
   * but such cpuset changes as these must nest that locking the
   * other way, holding cgroup_lock() for much of the code.
   *
   * So in order to avoid an ABBA deadlock, the cpuset code handling
   * these user changes delegates the actual sched domain rebuilding
   * to a separate workqueue thread, which ends up processing the
   * above do_rebuild_sched_domains() function.
   */
  static void async_rebuild_sched_domains(void)
  {
f90d4118b   Miao Xie   cpuset: fix possi...
738
  	queue_work(cpuset_wq, &rebuild_sched_domains_work);
cf417141c   Max Krasnyansky   sched, cpuset: re...
739
740
741
742
743
744
745
746
747
748
749
750
751
752
  }
  
  /*
   * Accomplishes the same scheduler domain rebuild as the above
   * async_rebuild_sched_domains(), however it directly calls the
   * rebuild routine synchronously rather than calling it via an
   * asynchronous work thread.
   *
   * This can only be called from code that is not holding
   * cgroup_mutex (not nested in a cgroup_lock() call.)
   */
  void rebuild_sched_domains(void)
  {
  	do_rebuild_sched_domains(NULL);
029190c51   Paul Jackson   cpuset sched_load...
753
  }
58f4790b7   Cliff Wickman   cpusets: update_c...
754
755
756
757
758
  /**
   * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's
   * @tsk: task to test
   * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner
   *
2df167a30   Paul Menage   cgroups: update c...
759
   * Call with cgroup_mutex held.  May take callback_mutex during call.
58f4790b7   Cliff Wickman   cpusets: update_c...
760
761
762
   * Called for each task in a cgroup by cgroup_scan_tasks().
   * Return nonzero if this tasks's cpus_allowed mask should be changed (in other
   * words, if its mask is not equal to its cpuset's mask).
053199edf   Paul Jackson   [PATCH] cpusets: ...
763
   */
9e0c914ca   Adrian Bunk   kernel/cpuset.c: ...
764
765
  static int cpuset_test_cpumask(struct task_struct *tsk,
  			       struct cgroup_scanner *scan)
58f4790b7   Cliff Wickman   cpusets: update_c...
766
  {
300ed6cbb   Li Zefan   cpuset: convert c...
767
  	return !cpumask_equal(&tsk->cpus_allowed,
58f4790b7   Cliff Wickman   cpusets: update_c...
768
769
  			(cgroup_cs(scan->cg))->cpus_allowed);
  }
053199edf   Paul Jackson   [PATCH] cpusets: ...
770

58f4790b7   Cliff Wickman   cpusets: update_c...
771
772
773
774
775
776
777
778
779
780
781
  /**
   * cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's
   * @tsk: task to test
   * @scan: struct cgroup_scanner containing the cgroup of the task
   *
   * Called by cgroup_scan_tasks() for each task in a cgroup whose
   * cpus_allowed mask needs to be changed.
   *
   * We don't need to re-check for the cgroup/cpuset membership, since we're
   * holding cgroup_lock() at this point.
   */
9e0c914ca   Adrian Bunk   kernel/cpuset.c: ...
782
783
  static void cpuset_change_cpumask(struct task_struct *tsk,
  				  struct cgroup_scanner *scan)
58f4790b7   Cliff Wickman   cpusets: update_c...
784
  {
300ed6cbb   Li Zefan   cpuset: convert c...
785
  	set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed));
58f4790b7   Cliff Wickman   cpusets: update_c...
786
787
788
  }
  
  /**
0b2f630a2   Miao Xie   cpusets: restruct...
789
790
   * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
   * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
4e74339af   Li Zefan   cpuset: avoid cha...
791
   * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks()
0b2f630a2   Miao Xie   cpusets: restruct...
792
793
794
795
796
797
   *
   * Called with cgroup_mutex held
   *
   * The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
   * calling callback functions for each.
   *
4e74339af   Li Zefan   cpuset: avoid cha...
798
799
   * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0
   * if @heap != NULL.
0b2f630a2   Miao Xie   cpusets: restruct...
800
   */
4e74339af   Li Zefan   cpuset: avoid cha...
801
  static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
0b2f630a2   Miao Xie   cpusets: restruct...
802
803
  {
  	struct cgroup_scanner scan;
0b2f630a2   Miao Xie   cpusets: restruct...
804
805
806
807
  
  	scan.cg = cs->css.cgroup;
  	scan.test_task = cpuset_test_cpumask;
  	scan.process_task = cpuset_change_cpumask;
4e74339af   Li Zefan   cpuset: avoid cha...
808
809
  	scan.heap = heap;
  	cgroup_scan_tasks(&scan);
0b2f630a2   Miao Xie   cpusets: restruct...
810
811
812
  }
  
  /**
58f4790b7   Cliff Wickman   cpusets: update_c...
813
814
815
816
   * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
   * @cs: the cpuset to consider
   * @buf: buffer of cpu numbers written to this cpuset
   */
645fcc9d2   Li Zefan   cpuset: don't all...
817
818
  static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
  			  const char *buf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
819
  {
4e74339af   Li Zefan   cpuset: avoid cha...
820
  	struct ptr_heap heap;
58f4790b7   Cliff Wickman   cpusets: update_c...
821
822
  	int retval;
  	int is_load_balanced;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
823

4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
824
825
826
  	/* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
  	if (cs == &top_cpuset)
  		return -EACCES;
6f7f02e78   David Rientjes   cpusets: allow em...
827
  	/*
c8d9c90c7   Paul Jackson   hotplug cpu: move...
828
  	 * An empty cpus_allowed is ok only if the cpuset has no tasks.
020958b62   Paul Jackson   cpusets: decrusti...
829
830
831
  	 * Since cpulist_parse() fails on an empty mask, we special case
  	 * that parsing.  The validate_change() call ensures that cpusets
  	 * with tasks have cpus.
6f7f02e78   David Rientjes   cpusets: allow em...
832
  	 */
020958b62   Paul Jackson   cpusets: decrusti...
833
  	if (!*buf) {
300ed6cbb   Li Zefan   cpuset: convert c...
834
  		cpumask_clear(trialcs->cpus_allowed);
6f7f02e78   David Rientjes   cpusets: allow em...
835
  	} else {
300ed6cbb   Li Zefan   cpuset: convert c...
836
  		retval = cpulist_parse(buf, trialcs->cpus_allowed);
6f7f02e78   David Rientjes   cpusets: allow em...
837
838
  		if (retval < 0)
  			return retval;
37340746a   Lai Jiangshan   cpusets: fix bug ...
839

6ad4c1888   Peter Zijlstra   sched: Fix balanc...
840
  		if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask))
37340746a   Lai Jiangshan   cpusets: fix bug ...
841
  			return -EINVAL;
6f7f02e78   David Rientjes   cpusets: allow em...
842
  	}
645fcc9d2   Li Zefan   cpuset: don't all...
843
  	retval = validate_change(cs, trialcs);
85d7b9498   Dinakar Guniguntala   [PATCH] Dynamic s...
844
845
  	if (retval < 0)
  		return retval;
029190c51   Paul Jackson   cpuset sched_load...
846

8707d8b8c   Paul Menage   Fix cpusets updat...
847
  	/* Nothing to do if the cpus didn't change */
300ed6cbb   Li Zefan   cpuset: convert c...
848
  	if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
8707d8b8c   Paul Menage   Fix cpusets updat...
849
  		return 0;
58f4790b7   Cliff Wickman   cpusets: update_c...
850

4e74339af   Li Zefan   cpuset: avoid cha...
851
852
853
  	retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
  	if (retval)
  		return retval;
645fcc9d2   Li Zefan   cpuset: don't all...
854
  	is_load_balanced = is_sched_load_balance(trialcs);
029190c51   Paul Jackson   cpuset sched_load...
855

3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
856
  	mutex_lock(&callback_mutex);
300ed6cbb   Li Zefan   cpuset: convert c...
857
  	cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
858
  	mutex_unlock(&callback_mutex);
029190c51   Paul Jackson   cpuset sched_load...
859

8707d8b8c   Paul Menage   Fix cpusets updat...
860
861
  	/*
  	 * Scan tasks in the cpuset, and update the cpumasks of any
58f4790b7   Cliff Wickman   cpusets: update_c...
862
  	 * that need an update.
8707d8b8c   Paul Menage   Fix cpusets updat...
863
  	 */
4e74339af   Li Zefan   cpuset: avoid cha...
864
865
866
  	update_tasks_cpumask(cs, &heap);
  
  	heap_free(&heap);
58f4790b7   Cliff Wickman   cpusets: update_c...
867

8707d8b8c   Paul Menage   Fix cpusets updat...
868
  	if (is_load_balanced)
cf417141c   Max Krasnyansky   sched, cpuset: re...
869
  		async_rebuild_sched_domains();
85d7b9498   Dinakar Guniguntala   [PATCH] Dynamic s...
870
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
871
  }
053199edf   Paul Jackson   [PATCH] cpusets: ...
872
  /*
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
873
874
875
876
877
878
879
   * cpuset_migrate_mm
   *
   *    Migrate memory region from one set of nodes to another.
   *
   *    Temporarilly set tasks mems_allowed to target nodes of migration,
   *    so that the migration code can allocate pages on these nodes.
   *
2df167a30   Paul Menage   cgroups: update c...
880
   *    Call holding cgroup_mutex, so current's cpuset won't change
c8d9c90c7   Paul Jackson   hotplug cpu: move...
881
   *    during this call, as manage_mutex holds off any cpuset_attach()
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
882
883
   *    calls.  Therefore we don't need to take task_lock around the
   *    call to guarantee_online_mems(), as we know no one is changing
2df167a30   Paul Menage   cgroups: update c...
884
   *    our task's cpuset.
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
885
   *
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
886
887
888
889
   *    While the mm_struct we are migrating is typically from some
   *    other task, the task_struct mems_allowed that we are hacking
   *    is for our current task, which must allocate new pages for that
   *    migrating memory region.
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
890
891
892
893
894
895
   */
  
  static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
  							const nodemask_t *to)
  {
  	struct task_struct *tsk = current;
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
896
  	tsk->mems_allowed = *to;
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
897
898
  
  	do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
8793d854e   Paul Menage   Task Control Grou...
899
  	guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed);
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
900
  }
3b6766fe6   Li Zefan   cpuset: rewrite u...
901
  /*
58568d2a8   Miao Xie   cpuset,mm: update...
902
903
904
905
906
907
908
   * cpuset_change_task_nodemask - change task's mems_allowed and mempolicy
   * @tsk: the task to change
   * @newmems: new nodes that the task will be set
   *
   * In order to avoid seeing no nodes if the old and new nodes are disjoint,
   * we structure updates as setting all new allowed nodes, then clearing newly
   * disallowed ones.
58568d2a8   Miao Xie   cpuset,mm: update...
909
910
911
912
   */
  static void cpuset_change_task_nodemask(struct task_struct *tsk,
  					nodemask_t *newmems)
  {
b246272ec   David Rientjes   cpusets: stall wh...
913
  	bool need_loop;
89e8a244b   David Rientjes   cpusets: avoid lo...
914

c0ff7453b   Miao Xie   cpuset,mm: fix no...
915
916
917
918
919
920
921
922
923
924
925
  repeat:
  	/*
  	 * Allow tasks that have access to memory reserves because they have
  	 * been OOM killed to get memory anywhere.
  	 */
  	if (unlikely(test_thread_flag(TIF_MEMDIE)))
  		return;
  	if (current->flags & PF_EXITING) /* Let dying task have memory */
  		return;
  
  	task_lock(tsk);
b246272ec   David Rientjes   cpusets: stall wh...
926
927
928
929
930
931
932
933
  	/*
  	 * Determine if a loop is necessary if another thread is doing
  	 * get_mems_allowed().  If at least one node remains unchanged and
  	 * tsk does not have a mempolicy, then an empty nodemask will not be
  	 * possible when mems_allowed is larger than a word.
  	 */
  	need_loop = task_has_mempolicy(tsk) ||
  			!nodes_intersects(*newmems, tsk->mems_allowed);
58568d2a8   Miao Xie   cpuset,mm: update...
934
  	nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
c0ff7453b   Miao Xie   cpuset,mm: fix no...
935
  	mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
c0ff7453b   Miao Xie   cpuset,mm: fix no...
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
  	/*
  	 * ensure checking ->mems_allowed_change_disable after setting all new
  	 * allowed nodes.
  	 *
  	 * the read-side task can see an nodemask with new allowed nodes and
  	 * old allowed nodes. and if it allocates page when cpuset clears newly
  	 * disallowed ones continuous, it can see the new allowed bits.
  	 *
  	 * And if setting all new allowed nodes is after the checking, setting
  	 * all new allowed nodes and clearing newly disallowed ones will be done
  	 * continuous, and the read-side task may find no node to alloc page.
  	 */
  	smp_mb();
  
  	/*
  	 * Allocation of memory is very fast, we needn't sleep when waiting
b246272ec   David Rientjes   cpusets: stall wh...
952
  	 * for the read-side.
c0ff7453b   Miao Xie   cpuset,mm: fix no...
953
  	 */
b246272ec   David Rientjes   cpusets: stall wh...
954
  	while (need_loop && ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
c0ff7453b   Miao Xie   cpuset,mm: fix no...
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
  		task_unlock(tsk);
  		if (!task_curr(tsk))
  			yield();
  		goto repeat;
  	}
  
  	/*
  	 * ensure checking ->mems_allowed_change_disable before clearing all new
  	 * disallowed nodes.
  	 *
  	 * if clearing newly disallowed bits before the checking, the read-side
  	 * task may find no node to alloc page.
  	 */
  	smp_mb();
  
  	mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
58568d2a8   Miao Xie   cpuset,mm: update...
971
  	tsk->mems_allowed = *newmems;
c0ff7453b   Miao Xie   cpuset,mm: fix no...
972
  	task_unlock(tsk);
58568d2a8   Miao Xie   cpuset,mm: update...
973
974
975
976
977
978
  }
  
  /*
   * Update task's mems_allowed and rebind its mempolicy and vmas' mempolicy
   * of it to cpuset's new mems_allowed, and migrate pages to new nodes if
   * memory_migrate flag is set. Called with cgroup_mutex held.
3b6766fe6   Li Zefan   cpuset: rewrite u...
979
980
981
982
983
984
985
986
   */
  static void cpuset_change_nodemask(struct task_struct *p,
  				   struct cgroup_scanner *scan)
  {
  	struct mm_struct *mm;
  	struct cpuset *cs;
  	int migrate;
  	const nodemask_t *oldmem = scan->data;
ee24d3797   Li Zefan   cpuset: fix unche...
987
  	static nodemask_t newmems;	/* protected by cgroup_mutex */
58568d2a8   Miao Xie   cpuset,mm: update...
988
989
  
  	cs = cgroup_cs(scan->cg);
ee24d3797   Li Zefan   cpuset: fix unche...
990
  	guarantee_online_mems(cs, &newmems);
58568d2a8   Miao Xie   cpuset,mm: update...
991

ee24d3797   Li Zefan   cpuset: fix unche...
992
  	cpuset_change_task_nodemask(p, &newmems);
53feb2976   Miao Xie   cpuset: alloc nod...
993

3b6766fe6   Li Zefan   cpuset: rewrite u...
994
995
996
  	mm = get_task_mm(p);
  	if (!mm)
  		return;
3b6766fe6   Li Zefan   cpuset: rewrite u...
997
998
999
1000
1001
1002
1003
  	migrate = is_memory_migrate(cs);
  
  	mpol_rebind_mm(mm, &cs->mems_allowed);
  	if (migrate)
  		cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
  	mmput(mm);
  }
8793d854e   Paul Menage   Task Control Grou...
1004
  static void *cpuset_being_rebound;
0b2f630a2   Miao Xie   cpusets: restruct...
1005
1006
1007
1008
  /**
   * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
   * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
   * @oldmem: old mems_allowed of cpuset cs
010cfac4c   Li Zefan   cpuset: avoid cha...
1009
   * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks()
0b2f630a2   Miao Xie   cpusets: restruct...
1010
1011
   *
   * Called with cgroup_mutex held
010cfac4c   Li Zefan   cpuset: avoid cha...
1012
1013
   * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0
   * if @heap != NULL.
0b2f630a2   Miao Xie   cpusets: restruct...
1014
   */
010cfac4c   Li Zefan   cpuset: avoid cha...
1015
1016
  static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
  				 struct ptr_heap *heap)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1017
  {
3b6766fe6   Li Zefan   cpuset: rewrite u...
1018
  	struct cgroup_scanner scan;
59dac16fb   Paul Jackson   [PATCH] cpuset: u...
1019

846a16bf0   Lee Schermerhorn   mempolicy: rename...
1020
  	cpuset_being_rebound = cs;		/* causes mpol_dup() rebind */
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1021

3b6766fe6   Li Zefan   cpuset: rewrite u...
1022
1023
1024
  	scan.cg = cs->css.cgroup;
  	scan.test_task = NULL;
  	scan.process_task = cpuset_change_nodemask;
010cfac4c   Li Zefan   cpuset: avoid cha...
1025
  	scan.heap = heap;
3b6766fe6   Li Zefan   cpuset: rewrite u...
1026
  	scan.data = (nodemask_t *)oldmem;
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1027
1028
  
  	/*
3b6766fe6   Li Zefan   cpuset: rewrite u...
1029
1030
1031
1032
1033
1034
  	 * The mpol_rebind_mm() call takes mmap_sem, which we couldn't
  	 * take while holding tasklist_lock.  Forks can happen - the
  	 * mpol_dup() cpuset_being_rebound check will catch such forks,
  	 * and rebind their vma mempolicies too.  Because we still hold
  	 * the global cgroup_mutex, we know that no other rebind effort
  	 * will be contending for the global variable cpuset_being_rebound.
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1035
  	 * It's ok if we rebind the same mm twice; mpol_rebind_mm()
04c19fa6f   Paul Jackson   [PATCH] cpuset: m...
1036
  	 * is idempotent.  Also migrate pages in each mm to new nodes.
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1037
  	 */
010cfac4c   Li Zefan   cpuset: avoid cha...
1038
  	cgroup_scan_tasks(&scan);
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1039

2df167a30   Paul Menage   cgroups: update c...
1040
  	/* We're done rebinding vmas to this cpuset's new mems_allowed. */
8793d854e   Paul Menage   Task Control Grou...
1041
  	cpuset_being_rebound = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1042
  }
0b2f630a2   Miao Xie   cpusets: restruct...
1043
1044
1045
  /*
   * Handle user request to change the 'mems' memory placement
   * of a cpuset.  Needs to validate the request, update the
58568d2a8   Miao Xie   cpuset,mm: update...
1046
1047
1048
1049
   * cpusets mems_allowed, and for each task in the cpuset,
   * update mems_allowed and rebind task's mempolicy and any vma
   * mempolicies and if the cpuset is marked 'memory_migrate',
   * migrate the tasks pages to the new memory.
0b2f630a2   Miao Xie   cpusets: restruct...
1050
1051
1052
1053
1054
1055
   *
   * Call with cgroup_mutex held.  May take callback_mutex during call.
   * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
   * lock each such tasks mm->mmap_sem, scan its vma's and rebind
   * their mempolicies to the cpusets new mems_allowed.
   */
645fcc9d2   Li Zefan   cpuset: don't all...
1056
1057
  static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
  			   const char *buf)
0b2f630a2   Miao Xie   cpusets: restruct...
1058
  {
53feb2976   Miao Xie   cpuset: alloc nod...
1059
  	NODEMASK_ALLOC(nodemask_t, oldmem, GFP_KERNEL);
0b2f630a2   Miao Xie   cpusets: restruct...
1060
  	int retval;
010cfac4c   Li Zefan   cpuset: avoid cha...
1061
  	struct ptr_heap heap;
0b2f630a2   Miao Xie   cpusets: restruct...
1062

53feb2976   Miao Xie   cpuset: alloc nod...
1063
1064
  	if (!oldmem)
  		return -ENOMEM;
0b2f630a2   Miao Xie   cpusets: restruct...
1065
1066
1067
1068
  	/*
  	 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
  	 * it's read-only
  	 */
53feb2976   Miao Xie   cpuset: alloc nod...
1069
1070
1071
1072
  	if (cs == &top_cpuset) {
  		retval = -EACCES;
  		goto done;
  	}
0b2f630a2   Miao Xie   cpusets: restruct...
1073

0b2f630a2   Miao Xie   cpusets: restruct...
1074
1075
1076
1077
1078
1079
1080
  	/*
  	 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
  	 * Since nodelist_parse() fails on an empty mask, we special case
  	 * that parsing.  The validate_change() call ensures that cpusets
  	 * with tasks have memory.
  	 */
  	if (!*buf) {
645fcc9d2   Li Zefan   cpuset: don't all...
1081
  		nodes_clear(trialcs->mems_allowed);
0b2f630a2   Miao Xie   cpusets: restruct...
1082
  	} else {
645fcc9d2   Li Zefan   cpuset: don't all...
1083
  		retval = nodelist_parse(buf, trialcs->mems_allowed);
0b2f630a2   Miao Xie   cpusets: restruct...
1084
1085
  		if (retval < 0)
  			goto done;
645fcc9d2   Li Zefan   cpuset: don't all...
1086
  		if (!nodes_subset(trialcs->mems_allowed,
53feb2976   Miao Xie   cpuset: alloc nod...
1087
1088
1089
1090
  				node_states[N_HIGH_MEMORY])) {
  			retval =  -EINVAL;
  			goto done;
  		}
0b2f630a2   Miao Xie   cpusets: restruct...
1091
  	}
53feb2976   Miao Xie   cpuset: alloc nod...
1092
1093
  	*oldmem = cs->mems_allowed;
  	if (nodes_equal(*oldmem, trialcs->mems_allowed)) {
0b2f630a2   Miao Xie   cpusets: restruct...
1094
1095
1096
  		retval = 0;		/* Too easy - nothing to do */
  		goto done;
  	}
645fcc9d2   Li Zefan   cpuset: don't all...
1097
  	retval = validate_change(cs, trialcs);
0b2f630a2   Miao Xie   cpusets: restruct...
1098
1099
  	if (retval < 0)
  		goto done;
010cfac4c   Li Zefan   cpuset: avoid cha...
1100
1101
1102
  	retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
  	if (retval < 0)
  		goto done;
0b2f630a2   Miao Xie   cpusets: restruct...
1103
  	mutex_lock(&callback_mutex);
645fcc9d2   Li Zefan   cpuset: don't all...
1104
  	cs->mems_allowed = trialcs->mems_allowed;
0b2f630a2   Miao Xie   cpusets: restruct...
1105
  	mutex_unlock(&callback_mutex);
53feb2976   Miao Xie   cpuset: alloc nod...
1106
  	update_tasks_nodemask(cs, oldmem, &heap);
010cfac4c   Li Zefan   cpuset: avoid cha...
1107
1108
  
  	heap_free(&heap);
0b2f630a2   Miao Xie   cpusets: restruct...
1109
  done:
53feb2976   Miao Xie   cpuset: alloc nod...
1110
  	NODEMASK_FREE(oldmem);
0b2f630a2   Miao Xie   cpusets: restruct...
1111
1112
  	return retval;
  }
8793d854e   Paul Menage   Task Control Grou...
1113
1114
1115
1116
  int current_cpuset_is_being_rebound(void)
  {
  	return task_cs(current) == cpuset_being_rebound;
  }
5be7a4792   Paul Menage   Fix cpuset sched_...
1117
  static int update_relax_domain_level(struct cpuset *cs, s64 val)
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1118
  {
db7f47cf4   Paul Menage   cpusets: allow cp...
1119
  #ifdef CONFIG_SMP
60495e776   Peter Zijlstra   sched: Dynamic sc...
1120
  	if (val < -1 || val >= sched_domain_level_max)
30e0e1781   Li Zefan   cpuset: limit the...
1121
  		return -EINVAL;
db7f47cf4   Paul Menage   cpusets: allow cp...
1122
  #endif
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1123
1124
1125
  
  	if (val != cs->relax_domain_level) {
  		cs->relax_domain_level = val;
300ed6cbb   Li Zefan   cpuset: convert c...
1126
1127
  		if (!cpumask_empty(cs->cpus_allowed) &&
  		    is_sched_load_balance(cs))
cf417141c   Max Krasnyansky   sched, cpuset: re...
1128
  			async_rebuild_sched_domains();
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1129
1130
1131
1132
  	}
  
  	return 0;
  }
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1133
  /*
950592f7b   Miao Xie   cpusets: update t...
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
   * cpuset_change_flag - make a task's spread flags the same as its cpuset's
   * @tsk: task to be updated
   * @scan: struct cgroup_scanner containing the cgroup of the task
   *
   * Called by cgroup_scan_tasks() for each task in a cgroup.
   *
   * We don't need to re-check for the cgroup/cpuset membership, since we're
   * holding cgroup_lock() at this point.
   */
  static void cpuset_change_flag(struct task_struct *tsk,
  				struct cgroup_scanner *scan)
  {
  	cpuset_update_task_spread_flag(cgroup_cs(scan->cg), tsk);
  }
  
  /*
   * update_tasks_flags - update the spread flags of tasks in the cpuset.
   * @cs: the cpuset in which each task's spread flags needs to be changed
   * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks()
   *
   * Called with cgroup_mutex held
   *
   * The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
   * calling callback functions for each.
   *
   * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0
   * if @heap != NULL.
   */
  static void update_tasks_flags(struct cpuset *cs, struct ptr_heap *heap)
  {
  	struct cgroup_scanner scan;
  
  	scan.cg = cs->css.cgroup;
  	scan.test_task = NULL;
  	scan.process_task = cpuset_change_flag;
  	scan.heap = heap;
  	cgroup_scan_tasks(&scan);
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1174
   * update_flag - read a 0 or a 1 in a file and update associated flag
786083667   Paul Menage   Cpuset hardwall f...
1175
1176
1177
   * bit:		the bit to update (see cpuset_flagbits_t)
   * cs:		the cpuset to update
   * turning_on: 	whether the flag is being set or cleared
053199edf   Paul Jackson   [PATCH] cpusets: ...
1178
   *
2df167a30   Paul Menage   cgroups: update c...
1179
   * Call with cgroup_mutex held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1180
   */
700fe1ab9   Paul Menage   CGroup API files:...
1181
1182
  static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
  		       int turning_on)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1183
  {
645fcc9d2   Li Zefan   cpuset: don't all...
1184
  	struct cpuset *trialcs;
40b6a7623   Rakib Mullick   cpuset.c: remove ...
1185
  	int balance_flag_changed;
950592f7b   Miao Xie   cpusets: update t...
1186
1187
1188
  	int spread_flag_changed;
  	struct ptr_heap heap;
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1189

645fcc9d2   Li Zefan   cpuset: don't all...
1190
1191
1192
  	trialcs = alloc_trial_cpuset(cs);
  	if (!trialcs)
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1193
  	if (turning_on)
645fcc9d2   Li Zefan   cpuset: don't all...
1194
  		set_bit(bit, &trialcs->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1195
  	else
645fcc9d2   Li Zefan   cpuset: don't all...
1196
  		clear_bit(bit, &trialcs->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1197

645fcc9d2   Li Zefan   cpuset: don't all...
1198
  	err = validate_change(cs, trialcs);
85d7b9498   Dinakar Guniguntala   [PATCH] Dynamic s...
1199
  	if (err < 0)
645fcc9d2   Li Zefan   cpuset: don't all...
1200
  		goto out;
029190c51   Paul Jackson   cpuset sched_load...
1201

950592f7b   Miao Xie   cpusets: update t...
1202
1203
1204
  	err = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
  	if (err < 0)
  		goto out;
029190c51   Paul Jackson   cpuset sched_load...
1205
  	balance_flag_changed = (is_sched_load_balance(cs) !=
645fcc9d2   Li Zefan   cpuset: don't all...
1206
  				is_sched_load_balance(trialcs));
029190c51   Paul Jackson   cpuset sched_load...
1207

950592f7b   Miao Xie   cpusets: update t...
1208
1209
  	spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
  			|| (is_spread_page(cs) != is_spread_page(trialcs)));
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
1210
  	mutex_lock(&callback_mutex);
645fcc9d2   Li Zefan   cpuset: don't all...
1211
  	cs->flags = trialcs->flags;
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
1212
  	mutex_unlock(&callback_mutex);
85d7b9498   Dinakar Guniguntala   [PATCH] Dynamic s...
1213

300ed6cbb   Li Zefan   cpuset: convert c...
1214
  	if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
cf417141c   Max Krasnyansky   sched, cpuset: re...
1215
  		async_rebuild_sched_domains();
029190c51   Paul Jackson   cpuset sched_load...
1216

950592f7b   Miao Xie   cpusets: update t...
1217
1218
1219
  	if (spread_flag_changed)
  		update_tasks_flags(cs, &heap);
  	heap_free(&heap);
645fcc9d2   Li Zefan   cpuset: don't all...
1220
1221
1222
  out:
  	free_trial_cpuset(trialcs);
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1223
  }
053199edf   Paul Jackson   [PATCH] cpusets: ...
1224
  /*
80f7228b5   Adrian Bunk   typo fixes: occur...
1225
   * Frequency meter - How fast is some event occurring?
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
   *
   * These routines manage a digitally filtered, constant time based,
   * event frequency meter.  There are four routines:
   *   fmeter_init() - initialize a frequency meter.
   *   fmeter_markevent() - called each time the event happens.
   *   fmeter_getrate() - returns the recent rate of such events.
   *   fmeter_update() - internal routine used to update fmeter.
   *
   * A common data structure is passed to each of these routines,
   * which is used to keep track of the state required to manage the
   * frequency meter and its digital filter.
   *
   * The filter works on the number of events marked per unit time.
   * The filter is single-pole low-pass recursive (IIR).  The time unit
   * is 1 second.  Arithmetic is done using 32-bit integers scaled to
   * simulate 3 decimal digits of precision (multiplied by 1000).
   *
   * With an FM_COEF of 933, and a time base of 1 second, the filter
   * has a half-life of 10 seconds, meaning that if the events quit
   * happening, then the rate returned from the fmeter_getrate()
   * will be cut in half each 10 seconds, until it converges to zero.
   *
   * It is not worth doing a real infinitely recursive filter.  If more
   * than FM_MAXTICKS ticks have elapsed since the last filter event,
   * just compute FM_MAXTICKS ticks worth, by which point the level
   * will be stable.
   *
   * Limit the count of unprocessed events to FM_MAXCNT, so as to avoid
   * arithmetic overflow in the fmeter_update() routine.
   *
   * Given the simple 32 bit integer arithmetic used, this meter works
   * best for reporting rates between one per millisecond (msec) and
   * one per 32 (approx) seconds.  At constant rates faster than one
   * per msec it maxes out at values just under 1,000,000.  At constant
   * rates between one per msec, and one per second it will stabilize
   * to a value N*1000, where N is the rate of events per second.
   * At constant rates between one per second and one per 32 seconds,
   * it will be choppy, moving up on the seconds that have an event,
   * and then decaying until the next event.  At rates slower than
   * about one in 32 seconds, it decays all the way back to zero between
   * each event.
   */
  
  #define FM_COEF 933		/* coefficient for half-life of 10 secs */
  #define FM_MAXTICKS ((time_t)99) /* useless computing more ticks than this */
  #define FM_MAXCNT 1000000	/* limit cnt to avoid overflow */
  #define FM_SCALE 1000		/* faux fixed point scale */
  
  /* Initialize a frequency meter */
  static void fmeter_init(struct fmeter *fmp)
  {
  	fmp->cnt = 0;
  	fmp->val = 0;
  	fmp->time = 0;
  	spin_lock_init(&fmp->lock);
  }
  
  /* Internal meter update - process cnt events and update value */
  static void fmeter_update(struct fmeter *fmp)
  {
  	time_t now = get_seconds();
  	time_t ticks = now - fmp->time;
  
  	if (ticks == 0)
  		return;
  
  	ticks = min(FM_MAXTICKS, ticks);
  	while (ticks-- > 0)
  		fmp->val = (FM_COEF * fmp->val) / FM_SCALE;
  	fmp->time = now;
  
  	fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE;
  	fmp->cnt = 0;
  }
  
  /* Process any previous ticks, then bump cnt by one (times scale). */
  static void fmeter_markevent(struct fmeter *fmp)
  {
  	spin_lock(&fmp->lock);
  	fmeter_update(fmp);
  	fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE);
  	spin_unlock(&fmp->lock);
  }
  
  /* Process any previous ticks, then return current value. */
  static int fmeter_getrate(struct fmeter *fmp)
  {
  	int val;
  
  	spin_lock(&fmp->lock);
  	fmeter_update(fmp);
  	val = fmp->val;
  	spin_unlock(&fmp->lock);
  	return val;
  }
f780bdb7c   Ben Blum   cgroups: add per-...
1321
1322
  /*
   * Protected by cgroup_lock. The nodemasks must be stored globally because
94196f51c   Tejun Heo   cgroup, cpuset: d...
1323
1324
   * dynamically allocating them is not allowed in can_attach, and they must
   * persist until attach.
f780bdb7c   Ben Blum   cgroups: add per-...
1325
1326
1327
1328
   */
  static cpumask_var_t cpus_attach;
  static nodemask_t cpuset_attach_nodemask_from;
  static nodemask_t cpuset_attach_nodemask_to;
2df167a30   Paul Menage   cgroups: update c...
1329
  /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
2f7ee5691   Tejun Heo   cgroup: introduce...
1330
1331
  static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
  			     struct cgroup_taskset *tset)
f780bdb7c   Ben Blum   cgroups: add per-...
1332
  {
2f7ee5691   Tejun Heo   cgroup: introduce...
1333
  	struct cpuset *cs = cgroup_cs(cgrp);
bb9d97b6d   Tejun Heo   cgroup: don't use...
1334
1335
  	struct task_struct *task;
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1336

300ed6cbb   Li Zefan   cpuset: convert c...
1337
  	if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1338
  		return -ENOSPC;
9985b0bab   David Rientjes   sched: prevent bo...
1339

bb9d97b6d   Tejun Heo   cgroup: don't use...
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
  	cgroup_taskset_for_each(task, cgrp, tset) {
  		/*
  		 * Kthreads bound to specific cpus cannot be moved to a new
  		 * cpuset; we cannot change their cpu affinity and
  		 * isolating such threads by their set of allowed nodes is
  		 * unnecessary.  Thus, cpusets are not applicable for such
  		 * threads.  This prevents checking for success of
  		 * set_cpus_allowed_ptr() on all attached tasks before
  		 * cpus_allowed may be changed.
  		 */
  		if (task->flags & PF_THREAD_BOUND)
  			return -EINVAL;
  		if ((ret = security_task_setscheduler(task)))
  			return ret;
  	}
f780bdb7c   Ben Blum   cgroups: add per-...
1355

94196f51c   Tejun Heo   cgroup, cpuset: d...
1356
  	/* prepare for attach */
f780bdb7c   Ben Blum   cgroups: add per-...
1357
1358
1359
1360
1361
1362
  	if (cs == &top_cpuset)
  		cpumask_copy(cpus_attach, cpu_possible_mask);
  	else
  		guarantee_online_cpus(cs, cpus_attach);
  
  	guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
f780bdb7c   Ben Blum   cgroups: add per-...
1363

94196f51c   Tejun Heo   cgroup, cpuset: d...
1364
  	return 0;
8793d854e   Paul Menage   Task Control Grou...
1365
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1366

2f7ee5691   Tejun Heo   cgroup: introduce...
1367
1368
  static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
  			  struct cgroup_taskset *tset)
8793d854e   Paul Menage   Task Control Grou...
1369
  {
8793d854e   Paul Menage   Task Control Grou...
1370
  	struct mm_struct *mm;
bb9d97b6d   Tejun Heo   cgroup: don't use...
1371
1372
  	struct task_struct *task;
  	struct task_struct *leader = cgroup_taskset_first(tset);
2f7ee5691   Tejun Heo   cgroup: introduce...
1373
1374
1375
  	struct cgroup *oldcgrp = cgroup_taskset_cur_cgroup(tset);
  	struct cpuset *cs = cgroup_cs(cgrp);
  	struct cpuset *oldcs = cgroup_cs(oldcgrp);
22fb52dd7   David Quigley   [PATCH] SELinux: ...
1376

bb9d97b6d   Tejun Heo   cgroup: don't use...
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
  	cgroup_taskset_for_each(task, cgrp, tset) {
  		/*
  		 * can_attach beforehand should guarantee that this doesn't
  		 * fail.  TODO: have a better way to handle failure here
  		 */
  		WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
  
  		cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
  		cpuset_update_task_spread_flag(cs, task);
  	}
22fb52dd7   David Quigley   [PATCH] SELinux: ...
1387

f780bdb7c   Ben Blum   cgroups: add per-...
1388
1389
1390
1391
1392
1393
  	/*
  	 * Change mm, possibly for multiple threads in a threadgroup. This is
  	 * expensive and may sleep.
  	 */
  	cpuset_attach_nodemask_from = oldcs->mems_allowed;
  	cpuset_attach_nodemask_to = cs->mems_allowed;
bb9d97b6d   Tejun Heo   cgroup: don't use...
1394
  	mm = get_task_mm(leader);
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1395
  	if (mm) {
f780bdb7c   Ben Blum   cgroups: add per-...
1396
  		mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
2741a559a   Paul Jackson   [PATCH] cpuset: u...
1397
  		if (is_memory_migrate(cs))
f780bdb7c   Ben Blum   cgroups: add per-...
1398
1399
  			cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from,
  					  &cpuset_attach_nodemask_to);
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1400
1401
  		mmput(mm);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1402
1403
1404
1405
1406
  }
  
  /* The various types of files and directories in a cpuset file system */
  
  typedef enum {
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
1407
  	FILE_MEMORY_MIGRATE,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1408
1409
1410
1411
  	FILE_CPULIST,
  	FILE_MEMLIST,
  	FILE_CPU_EXCLUSIVE,
  	FILE_MEM_EXCLUSIVE,
786083667   Paul Menage   Cpuset hardwall f...
1412
  	FILE_MEM_HARDWALL,
029190c51   Paul Jackson   cpuset sched_load...
1413
  	FILE_SCHED_LOAD_BALANCE,
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1414
  	FILE_SCHED_RELAX_DOMAIN_LEVEL,
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1415
1416
  	FILE_MEMORY_PRESSURE_ENABLED,
  	FILE_MEMORY_PRESSURE,
825a46af5   Paul Jackson   [PATCH] cpuset me...
1417
1418
  	FILE_SPREAD_PAGE,
  	FILE_SPREAD_SLAB,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1419
  } cpuset_filetype_t;
700fe1ab9   Paul Menage   CGroup API files:...
1420
1421
1422
1423
1424
  static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
  {
  	int retval = 0;
  	struct cpuset *cs = cgroup_cs(cgrp);
  	cpuset_filetype_t type = cft->private;
e37123953   Paul Menage   cgroup files: rem...
1425
  	if (!cgroup_lock_live_group(cgrp))
700fe1ab9   Paul Menage   CGroup API files:...
1426
  		return -ENODEV;
700fe1ab9   Paul Menage   CGroup API files:...
1427
1428
  
  	switch (type) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1429
  	case FILE_CPU_EXCLUSIVE:
700fe1ab9   Paul Menage   CGroup API files:...
1430
  		retval = update_flag(CS_CPU_EXCLUSIVE, cs, val);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1431
1432
  		break;
  	case FILE_MEM_EXCLUSIVE:
700fe1ab9   Paul Menage   CGroup API files:...
1433
  		retval = update_flag(CS_MEM_EXCLUSIVE, cs, val);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1434
  		break;
786083667   Paul Menage   Cpuset hardwall f...
1435
1436
1437
  	case FILE_MEM_HARDWALL:
  		retval = update_flag(CS_MEM_HARDWALL, cs, val);
  		break;
029190c51   Paul Jackson   cpuset sched_load...
1438
  	case FILE_SCHED_LOAD_BALANCE:
700fe1ab9   Paul Menage   CGroup API files:...
1439
  		retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val);
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1440
  		break;
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
1441
  	case FILE_MEMORY_MIGRATE:
700fe1ab9   Paul Menage   CGroup API files:...
1442
  		retval = update_flag(CS_MEMORY_MIGRATE, cs, val);
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
1443
  		break;
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1444
  	case FILE_MEMORY_PRESSURE_ENABLED:
700fe1ab9   Paul Menage   CGroup API files:...
1445
  		cpuset_memory_pressure_enabled = !!val;
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1446
1447
1448
1449
  		break;
  	case FILE_MEMORY_PRESSURE:
  		retval = -EACCES;
  		break;
825a46af5   Paul Jackson   [PATCH] cpuset me...
1450
  	case FILE_SPREAD_PAGE:
700fe1ab9   Paul Menage   CGroup API files:...
1451
  		retval = update_flag(CS_SPREAD_PAGE, cs, val);
825a46af5   Paul Jackson   [PATCH] cpuset me...
1452
1453
  		break;
  	case FILE_SPREAD_SLAB:
700fe1ab9   Paul Menage   CGroup API files:...
1454
  		retval = update_flag(CS_SPREAD_SLAB, cs, val);
825a46af5   Paul Jackson   [PATCH] cpuset me...
1455
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1456
1457
  	default:
  		retval = -EINVAL;
700fe1ab9   Paul Menage   CGroup API files:...
1458
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1459
  	}
8793d854e   Paul Menage   Task Control Grou...
1460
  	cgroup_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1461
1462
  	return retval;
  }
5be7a4792   Paul Menage   Fix cpuset sched_...
1463
1464
1465
1466
1467
  static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
  {
  	int retval = 0;
  	struct cpuset *cs = cgroup_cs(cgrp);
  	cpuset_filetype_t type = cft->private;
e37123953   Paul Menage   cgroup files: rem...
1468
  	if (!cgroup_lock_live_group(cgrp))
5be7a4792   Paul Menage   Fix cpuset sched_...
1469
  		return -ENODEV;
e37123953   Paul Menage   cgroup files: rem...
1470

5be7a4792   Paul Menage   Fix cpuset sched_...
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
  	switch (type) {
  	case FILE_SCHED_RELAX_DOMAIN_LEVEL:
  		retval = update_relax_domain_level(cs, val);
  		break;
  	default:
  		retval = -EINVAL;
  		break;
  	}
  	cgroup_unlock();
  	return retval;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1482
  /*
e37123953   Paul Menage   cgroup files: rem...
1483
1484
1485
1486
1487
1488
   * Common handling for a write to a "cpus" or "mems" file.
   */
  static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
  				const char *buf)
  {
  	int retval = 0;
645fcc9d2   Li Zefan   cpuset: don't all...
1489
1490
  	struct cpuset *cs = cgroup_cs(cgrp);
  	struct cpuset *trialcs;
e37123953   Paul Menage   cgroup files: rem...
1491
1492
1493
  
  	if (!cgroup_lock_live_group(cgrp))
  		return -ENODEV;
645fcc9d2   Li Zefan   cpuset: don't all...
1494
  	trialcs = alloc_trial_cpuset(cs);
b75f38d65   Li Zefan   cpuset: add a mis...
1495
1496
1497
1498
  	if (!trialcs) {
  		retval = -ENOMEM;
  		goto out;
  	}
645fcc9d2   Li Zefan   cpuset: don't all...
1499

e37123953   Paul Menage   cgroup files: rem...
1500
1501
  	switch (cft->private) {
  	case FILE_CPULIST:
645fcc9d2   Li Zefan   cpuset: don't all...
1502
  		retval = update_cpumask(cs, trialcs, buf);
e37123953   Paul Menage   cgroup files: rem...
1503
1504
  		break;
  	case FILE_MEMLIST:
645fcc9d2   Li Zefan   cpuset: don't all...
1505
  		retval = update_nodemask(cs, trialcs, buf);
e37123953   Paul Menage   cgroup files: rem...
1506
1507
1508
1509
1510
  		break;
  	default:
  		retval = -EINVAL;
  		break;
  	}
645fcc9d2   Li Zefan   cpuset: don't all...
1511
1512
  
  	free_trial_cpuset(trialcs);
b75f38d65   Li Zefan   cpuset: add a mis...
1513
  out:
e37123953   Paul Menage   cgroup files: rem...
1514
1515
1516
1517
1518
  	cgroup_unlock();
  	return retval;
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
   * These ascii lists should be read in a single call, by using a user
   * buffer large enough to hold the entire map.  If read in smaller
   * chunks, there is no guarantee of atomicity.  Since the display format
   * used, list of ranges of sequential numbers, is variable length,
   * and since these maps can change value dynamically, one could read
   * gibberish by doing partial reads while a list was changing.
   * A single large read to a buffer that crosses a page boundary is
   * ok, because the result being copied to user land is not recomputed
   * across a page fault.
   */
9303e0c48   Li Zefan   cpuset: remove un...
1529
  static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1530
  {
9303e0c48   Li Zefan   cpuset: remove un...
1531
  	size_t count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1532

3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
1533
  	mutex_lock(&callback_mutex);
9303e0c48   Li Zefan   cpuset: remove un...
1534
  	count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
1535
  	mutex_unlock(&callback_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1536

9303e0c48   Li Zefan   cpuset: remove un...
1537
  	return count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1538
  }
9303e0c48   Li Zefan   cpuset: remove un...
1539
  static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1540
  {
9303e0c48   Li Zefan   cpuset: remove un...
1541
  	size_t count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1542

3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
1543
  	mutex_lock(&callback_mutex);
9303e0c48   Li Zefan   cpuset: remove un...
1544
  	count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed);
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
1545
  	mutex_unlock(&callback_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1546

9303e0c48   Li Zefan   cpuset: remove un...
1547
  	return count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1548
  }
8793d854e   Paul Menage   Task Control Grou...
1549
1550
1551
1552
1553
  static ssize_t cpuset_common_file_read(struct cgroup *cont,
  				       struct cftype *cft,
  				       struct file *file,
  				       char __user *buf,
  				       size_t nbytes, loff_t *ppos)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1554
  {
8793d854e   Paul Menage   Task Control Grou...
1555
  	struct cpuset *cs = cgroup_cs(cont);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1556
1557
1558
1559
  	cpuset_filetype_t type = cft->private;
  	char *page;
  	ssize_t retval = 0;
  	char *s;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1560

e12ba74d8   Mel Gorman   Group short-lived...
1561
  	if (!(page = (char *)__get_free_page(GFP_TEMPORARY)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
  		return -ENOMEM;
  
  	s = page;
  
  	switch (type) {
  	case FILE_CPULIST:
  		s += cpuset_sprintf_cpulist(s, cs);
  		break;
  	case FILE_MEMLIST:
  		s += cpuset_sprintf_memlist(s, cs);
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1573
1574
1575
1576
1577
1578
  	default:
  		retval = -EINVAL;
  		goto out;
  	}
  	*s++ = '
  ';
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1579

eacaa1f5a   Al Viro   [PATCH] cpuset cr...
1580
  	retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1581
1582
1583
1584
  out:
  	free_page((unsigned long)page);
  	return retval;
  }
700fe1ab9   Paul Menage   CGroup API files:...
1585
1586
1587
1588
1589
1590
1591
1592
1593
  static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft)
  {
  	struct cpuset *cs = cgroup_cs(cont);
  	cpuset_filetype_t type = cft->private;
  	switch (type) {
  	case FILE_CPU_EXCLUSIVE:
  		return is_cpu_exclusive(cs);
  	case FILE_MEM_EXCLUSIVE:
  		return is_mem_exclusive(cs);
786083667   Paul Menage   Cpuset hardwall f...
1594
1595
  	case FILE_MEM_HARDWALL:
  		return is_mem_hardwall(cs);
700fe1ab9   Paul Menage   CGroup API files:...
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
  	case FILE_SCHED_LOAD_BALANCE:
  		return is_sched_load_balance(cs);
  	case FILE_MEMORY_MIGRATE:
  		return is_memory_migrate(cs);
  	case FILE_MEMORY_PRESSURE_ENABLED:
  		return cpuset_memory_pressure_enabled;
  	case FILE_MEMORY_PRESSURE:
  		return fmeter_getrate(&cs->fmeter);
  	case FILE_SPREAD_PAGE:
  		return is_spread_page(cs);
  	case FILE_SPREAD_SLAB:
  		return is_spread_slab(cs);
  	default:
  		BUG();
  	}
cf417141c   Max Krasnyansky   sched, cpuset: re...
1611
1612
1613
  
  	/* Unreachable but makes gcc happy */
  	return 0;
700fe1ab9   Paul Menage   CGroup API files:...
1614
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1615

5be7a4792   Paul Menage   Fix cpuset sched_...
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
  static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft)
  {
  	struct cpuset *cs = cgroup_cs(cont);
  	cpuset_filetype_t type = cft->private;
  	switch (type) {
  	case FILE_SCHED_RELAX_DOMAIN_LEVEL:
  		return cs->relax_domain_level;
  	default:
  		BUG();
  	}
cf417141c   Max Krasnyansky   sched, cpuset: re...
1626
1627
1628
  
  	/* Unrechable but makes gcc happy */
  	return 0;
5be7a4792   Paul Menage   Fix cpuset sched_...
1629
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1630
1631
1632
1633
  
  /*
   * for the common functions, 'private' gives the type of file
   */
addf2c739   Paul Menage   Cpuset hardwall f...
1634
1635
1636
1637
  static struct cftype files[] = {
  	{
  		.name = "cpus",
  		.read = cpuset_common_file_read,
e37123953   Paul Menage   cgroup files: rem...
1638
1639
  		.write_string = cpuset_write_resmask,
  		.max_write_len = (100U + 6 * NR_CPUS),
addf2c739   Paul Menage   Cpuset hardwall f...
1640
1641
1642
1643
1644
1645
  		.private = FILE_CPULIST,
  	},
  
  	{
  		.name = "mems",
  		.read = cpuset_common_file_read,
e37123953   Paul Menage   cgroup files: rem...
1646
1647
  		.write_string = cpuset_write_resmask,
  		.max_write_len = (100U + 6 * MAX_NUMNODES),
addf2c739   Paul Menage   Cpuset hardwall f...
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
  		.private = FILE_MEMLIST,
  	},
  
  	{
  		.name = "cpu_exclusive",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_CPU_EXCLUSIVE,
  	},
  
  	{
  		.name = "mem_exclusive",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_MEM_EXCLUSIVE,
  	},
  
  	{
786083667   Paul Menage   Cpuset hardwall f...
1666
1667
1668
1669
1670
1671
1672
  		.name = "mem_hardwall",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_MEM_HARDWALL,
  	},
  
  	{
addf2c739   Paul Menage   Cpuset hardwall f...
1673
1674
1675
1676
1677
1678
1679
1680
  		.name = "sched_load_balance",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_SCHED_LOAD_BALANCE,
  	},
  
  	{
  		.name = "sched_relax_domain_level",
5be7a4792   Paul Menage   Fix cpuset sched_...
1681
1682
  		.read_s64 = cpuset_read_s64,
  		.write_s64 = cpuset_write_s64,
addf2c739   Paul Menage   Cpuset hardwall f...
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
  		.private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
  	},
  
  	{
  		.name = "memory_migrate",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_MEMORY_MIGRATE,
  	},
  
  	{
  		.name = "memory_pressure",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_MEMORY_PRESSURE,
099fca322   Li Zefan   cgroups: show cor...
1698
  		.mode = S_IRUGO,
addf2c739   Paul Menage   Cpuset hardwall f...
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
  	},
  
  	{
  		.name = "memory_spread_page",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_SPREAD_PAGE,
  	},
  
  	{
  		.name = "memory_spread_slab",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_SPREAD_SLAB,
  	},
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
1714
  };
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1715
1716
  static struct cftype cft_memory_pressure_enabled = {
  	.name = "memory_pressure_enabled",
700fe1ab9   Paul Menage   CGroup API files:...
1717
1718
  	.read_u64 = cpuset_read_u64,
  	.write_u64 = cpuset_write_u64,
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1719
1720
  	.private = FILE_MEMORY_PRESSURE_ENABLED,
  };
8793d854e   Paul Menage   Task Control Grou...
1721
  static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1722
1723
  {
  	int err;
addf2c739   Paul Menage   Cpuset hardwall f...
1724
1725
  	err = cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
  	if (err)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1726
  		return err;
8793d854e   Paul Menage   Task Control Grou...
1727
  	/* memory_pressure_enabled is in root cpuset only */
addf2c739   Paul Menage   Cpuset hardwall f...
1728
  	if (!cont->parent)
8793d854e   Paul Menage   Task Control Grou...
1729
  		err = cgroup_add_file(cont, ss,
addf2c739   Paul Menage   Cpuset hardwall f...
1730
1731
  				      &cft_memory_pressure_enabled);
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1732
1733
1734
  }
  
  /*
a77aea920   Daniel Lezcano   cgroup: remove th...
1735
1736
1737
   * post_clone() is called during cgroup_create() when the
   * clone_children mount argument was specified.  The cgroup
   * can not yet have any tasks.
8793d854e   Paul Menage   Task Control Grou...
1738
1739
1740
1741
1742
1743
1744
1745
1746
   *
   * Currently we refuse to set up the cgroup - thereby
   * refusing the task to be entered, and as a result refusing
   * the sys_unshare() or clone() which initiated it - if any
   * sibling cpusets have exclusive cpus or mem.
   *
   * If this becomes a problem for some users who wish to
   * allow that scenario, then cpuset_post_clone() could be
   * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
2df167a30   Paul Menage   cgroups: update c...
1747
1748
   * (and likewise for mems) to the new cgroup. Called with cgroup_mutex
   * held.
8793d854e   Paul Menage   Task Control Grou...
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
   */
  static void cpuset_post_clone(struct cgroup_subsys *ss,
  			      struct cgroup *cgroup)
  {
  	struct cgroup *parent, *child;
  	struct cpuset *cs, *parent_cs;
  
  	parent = cgroup->parent;
  	list_for_each_entry(child, &parent->children, sibling) {
  		cs = cgroup_cs(child);
  		if (is_mem_exclusive(cs) || is_cpu_exclusive(cs))
  			return;
  	}
  	cs = cgroup_cs(cgroup);
  	parent_cs = cgroup_cs(parent);
523fb486b   Li Zefan   cpuset: hold call...
1764
  	mutex_lock(&callback_mutex);
8793d854e   Paul Menage   Task Control Grou...
1765
  	cs->mems_allowed = parent_cs->mems_allowed;
300ed6cbb   Li Zefan   cpuset: convert c...
1766
  	cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
523fb486b   Li Zefan   cpuset: hold call...
1767
  	mutex_unlock(&callback_mutex);
8793d854e   Paul Menage   Task Control Grou...
1768
1769
1770
1771
  	return;
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1772
   *	cpuset_create - create a cpuset
2df167a30   Paul Menage   cgroups: update c...
1773
1774
   *	ss:	cpuset cgroup subsystem
   *	cont:	control group that the new cpuset will be part of
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1775
   */
8793d854e   Paul Menage   Task Control Grou...
1776
1777
1778
  static struct cgroup_subsys_state *cpuset_create(
  	struct cgroup_subsys *ss,
  	struct cgroup *cont)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1779
1780
  {
  	struct cpuset *cs;
8793d854e   Paul Menage   Task Control Grou...
1781
  	struct cpuset *parent;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1782

8793d854e   Paul Menage   Task Control Grou...
1783
  	if (!cont->parent) {
8793d854e   Paul Menage   Task Control Grou...
1784
1785
1786
  		return &top_cpuset.css;
  	}
  	parent = cgroup_cs(cont->parent);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1787
1788
  	cs = kmalloc(sizeof(*cs), GFP_KERNEL);
  	if (!cs)
8793d854e   Paul Menage   Task Control Grou...
1789
  		return ERR_PTR(-ENOMEM);
300ed6cbb   Li Zefan   cpuset: convert c...
1790
1791
1792
1793
  	if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) {
  		kfree(cs);
  		return ERR_PTR(-ENOMEM);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1794

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1795
  	cs->flags = 0;
825a46af5   Paul Jackson   [PATCH] cpuset me...
1796
1797
1798
1799
  	if (is_spread_page(parent))
  		set_bit(CS_SPREAD_PAGE, &cs->flags);
  	if (is_spread_slab(parent))
  		set_bit(CS_SPREAD_SLAB, &cs->flags);
029190c51   Paul Jackson   cpuset sched_load...
1800
  	set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
300ed6cbb   Li Zefan   cpuset: convert c...
1801
  	cpumask_clear(cs->cpus_allowed);
f9a86fcbb   Mike Travis   cpuset: modify cp...
1802
  	nodes_clear(cs->mems_allowed);
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1803
  	fmeter_init(&cs->fmeter);
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1804
  	cs->relax_domain_level = -1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1805
1806
  
  	cs->parent = parent;
202f72d5d   Paul Jackson   [PATCH] cpuset: n...
1807
  	number_of_cpusets++;
8793d854e   Paul Menage   Task Control Grou...
1808
  	return &cs->css ;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1809
  }
029190c51   Paul Jackson   cpuset sched_load...
1810
  /*
029190c51   Paul Jackson   cpuset sched_load...
1811
1812
   * If the cpuset being removed has its flag 'sched_load_balance'
   * enabled, then simulate turning sched_load_balance off, which
cf417141c   Max Krasnyansky   sched, cpuset: re...
1813
   * will call async_rebuild_sched_domains().
029190c51   Paul Jackson   cpuset sched_load...
1814
   */
8793d854e   Paul Menage   Task Control Grou...
1815
  static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1816
  {
8793d854e   Paul Menage   Task Control Grou...
1817
  	struct cpuset *cs = cgroup_cs(cont);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1818

029190c51   Paul Jackson   cpuset sched_load...
1819
  	if (is_sched_load_balance(cs))
700fe1ab9   Paul Menage   CGroup API files:...
1820
  		update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
029190c51   Paul Jackson   cpuset sched_load...
1821

202f72d5d   Paul Jackson   [PATCH] cpuset: n...
1822
  	number_of_cpusets--;
300ed6cbb   Li Zefan   cpuset: convert c...
1823
  	free_cpumask_var(cs->cpus_allowed);
8793d854e   Paul Menage   Task Control Grou...
1824
  	kfree(cs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1825
  }
8793d854e   Paul Menage   Task Control Grou...
1826
1827
1828
  struct cgroup_subsys cpuset_subsys = {
  	.name = "cpuset",
  	.create = cpuset_create,
cf417141c   Max Krasnyansky   sched, cpuset: re...
1829
  	.destroy = cpuset_destroy,
8793d854e   Paul Menage   Task Control Grou...
1830
1831
1832
1833
1834
1835
1836
  	.can_attach = cpuset_can_attach,
  	.attach = cpuset_attach,
  	.populate = cpuset_populate,
  	.post_clone = cpuset_post_clone,
  	.subsys_id = cpuset_subsys_id,
  	.early_init = 1,
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1837
1838
1839
1840
1841
1842
1843
1844
  /**
   * cpuset_init - initialize cpusets at system boot
   *
   * Description: Initialize top_cpuset and the cpuset internal file system,
   **/
  
  int __init cpuset_init(void)
  {
8793d854e   Paul Menage   Task Control Grou...
1845
  	int err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1846

58568d2a8   Miao Xie   cpuset,mm: update...
1847
1848
  	if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
  		BUG();
300ed6cbb   Li Zefan   cpuset: convert c...
1849
  	cpumask_setall(top_cpuset.cpus_allowed);
f9a86fcbb   Mike Travis   cpuset: modify cp...
1850
  	nodes_setall(top_cpuset.mems_allowed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1851

3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1852
  	fmeter_init(&top_cpuset.fmeter);
029190c51   Paul Jackson   cpuset sched_load...
1853
  	set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1854
  	top_cpuset.relax_domain_level = -1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1855

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1856
1857
  	err = register_filesystem(&cpuset_fs_type);
  	if (err < 0)
8793d854e   Paul Menage   Task Control Grou...
1858
  		return err;
2341d1b65   Li Zefan   cpuset: convert c...
1859
1860
  	if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
  		BUG();
202f72d5d   Paul Jackson   [PATCH] cpuset: n...
1861
  	number_of_cpusets = 1;
8793d854e   Paul Menage   Task Control Grou...
1862
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1863
  }
956db3ca0   Cliff Wickman   hotplug cpu: move...
1864
1865
1866
1867
1868
1869
1870
1871
  /**
   * cpuset_do_move_task - move a given task to another cpuset
   * @tsk: pointer to task_struct the task to move
   * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner
   *
   * Called by cgroup_scan_tasks() for each task in a cgroup.
   * Return nonzero to stop the walk through the tasks.
   */
9e0c914ca   Adrian Bunk   kernel/cpuset.c: ...
1872
1873
  static void cpuset_do_move_task(struct task_struct *tsk,
  				struct cgroup_scanner *scan)
956db3ca0   Cliff Wickman   hotplug cpu: move...
1874
  {
7f81b1ae1   Li Zefan   cpuset: remove st...
1875
  	struct cgroup *new_cgroup = scan->data;
956db3ca0   Cliff Wickman   hotplug cpu: move...
1876

7f81b1ae1   Li Zefan   cpuset: remove st...
1877
  	cgroup_attach_task(new_cgroup, tsk);
956db3ca0   Cliff Wickman   hotplug cpu: move...
1878
1879
1880
1881
1882
1883
1884
  }
  
  /**
   * move_member_tasks_to_cpuset - move tasks from one cpuset to another
   * @from: cpuset in which the tasks currently reside
   * @to: cpuset to which the tasks will be moved
   *
c8d9c90c7   Paul Jackson   hotplug cpu: move...
1885
1886
   * Called with cgroup_mutex held
   * callback_mutex must not be held, as cpuset_attach() will take it.
956db3ca0   Cliff Wickman   hotplug cpu: move...
1887
1888
1889
1890
1891
1892
   *
   * The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
   * calling callback functions for each.
   */
  static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
  {
7f81b1ae1   Li Zefan   cpuset: remove st...
1893
  	struct cgroup_scanner scan;
956db3ca0   Cliff Wickman   hotplug cpu: move...
1894

7f81b1ae1   Li Zefan   cpuset: remove st...
1895
1896
1897
1898
1899
  	scan.cg = from->css.cgroup;
  	scan.test_task = NULL; /* select all tasks in cgroup */
  	scan.process_task = cpuset_do_move_task;
  	scan.heap = NULL;
  	scan.data = to->css.cgroup;
956db3ca0   Cliff Wickman   hotplug cpu: move...
1900

7f81b1ae1   Li Zefan   cpuset: remove st...
1901
  	if (cgroup_scan_tasks(&scan))
956db3ca0   Cliff Wickman   hotplug cpu: move...
1902
1903
1904
1905
  		printk(KERN_ERR "move_member_tasks_to_cpuset: "
  				"cgroup_scan_tasks failed
  ");
  }
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
1906
  /*
cf417141c   Max Krasnyansky   sched, cpuset: re...
1907
   * If CPU and/or memory hotplug handlers, below, unplug any CPUs
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
1908
1909
   * or memory nodes, we need to walk over the cpuset hierarchy,
   * removing that CPU or node from all cpusets.  If this removes the
956db3ca0   Cliff Wickman   hotplug cpu: move...
1910
1911
   * last CPU or node from a cpuset, then move the tasks in the empty
   * cpuset to its next-highest non-empty parent.
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
1912
   *
c8d9c90c7   Paul Jackson   hotplug cpu: move...
1913
1914
   * Called with cgroup_mutex held
   * callback_mutex must not be held, as cpuset_attach() will take it.
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
1915
   */
956db3ca0   Cliff Wickman   hotplug cpu: move...
1916
1917
1918
  static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
  {
  	struct cpuset *parent;
c8d9c90c7   Paul Jackson   hotplug cpu: move...
1919
1920
1921
1922
1923
  	/*
  	 * The cgroup's css_sets list is in use if there are tasks
  	 * in the cpuset; the list is empty if there are none;
  	 * the cs->css.refcnt seems always 0.
  	 */
956db3ca0   Cliff Wickman   hotplug cpu: move...
1924
1925
  	if (list_empty(&cs->css.cgroup->css_sets))
  		return;
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
1926

956db3ca0   Cliff Wickman   hotplug cpu: move...
1927
1928
1929
1930
1931
  	/*
  	 * Find its next-highest non-empty parent, (top cpuset
  	 * has online cpus, so can't be empty).
  	 */
  	parent = cs->parent;
300ed6cbb   Li Zefan   cpuset: convert c...
1932
  	while (cpumask_empty(parent->cpus_allowed) ||
b45012955   Paul Jackson   hotplug cpu move ...
1933
  			nodes_empty(parent->mems_allowed))
956db3ca0   Cliff Wickman   hotplug cpu: move...
1934
  		parent = parent->parent;
956db3ca0   Cliff Wickman   hotplug cpu: move...
1935
1936
1937
1938
1939
1940
1941
1942
  
  	move_member_tasks_to_cpuset(cs, parent);
  }
  
  /*
   * Walk the specified cpuset subtree and look for empty cpusets.
   * The tasks of such cpuset must be moved to a parent cpuset.
   *
2df167a30   Paul Menage   cgroups: update c...
1943
   * Called with cgroup_mutex held.  We take callback_mutex to modify
956db3ca0   Cliff Wickman   hotplug cpu: move...
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
   * cpus_allowed and mems_allowed.
   *
   * This walk processes the tree from top to bottom, completing one layer
   * before dropping down to the next.  It always processes a node before
   * any of its children.
   *
   * For now, since we lack memory hot unplug, we'll never see a cpuset
   * that has tasks along with an empty 'mems'.  But if we did see such
   * a cpuset, we'd handle it just like we do if its 'cpus' was empty.
   */
d294eb83d   Frederic Weisbecker   cpusets: scan_for...
1954
  static void scan_for_empty_cpusets(struct cpuset *root)
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
1955
  {
8d1e6266f   Li Zefan   cpuset: a bit cle...
1956
  	LIST_HEAD(queue);
956db3ca0   Cliff Wickman   hotplug cpu: move...
1957
1958
  	struct cpuset *cp;	/* scans cpusets being updated */
  	struct cpuset *child;	/* scans child cpusets of cp */
8793d854e   Paul Menage   Task Control Grou...
1959
  	struct cgroup *cont;
ee24d3797   Li Zefan   cpuset: fix unche...
1960
  	static nodemask_t oldmems;	/* protected by cgroup_mutex */
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
1961

956db3ca0   Cliff Wickman   hotplug cpu: move...
1962
  	list_add_tail((struct list_head *)&root->stack_list, &queue);
956db3ca0   Cliff Wickman   hotplug cpu: move...
1963
  	while (!list_empty(&queue)) {
8d1e6266f   Li Zefan   cpuset: a bit cle...
1964
  		cp = list_first_entry(&queue, struct cpuset, stack_list);
956db3ca0   Cliff Wickman   hotplug cpu: move...
1965
1966
1967
1968
1969
  		list_del(queue.next);
  		list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
  			child = cgroup_cs(cont);
  			list_add_tail(&child->stack_list, &queue);
  		}
b45012955   Paul Jackson   hotplug cpu move ...
1970
1971
  
  		/* Continue past cpusets with all cpus, mems online */
6ad4c1888   Peter Zijlstra   sched: Fix balanc...
1972
  		if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) &&
b45012955   Paul Jackson   hotplug cpu move ...
1973
1974
  		    nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
  			continue;
ee24d3797   Li Zefan   cpuset: fix unche...
1975
  		oldmems = cp->mems_allowed;
f9b4fb8da   Miao Xie   cpusets: update t...
1976

956db3ca0   Cliff Wickman   hotplug cpu: move...
1977
  		/* Remove offline cpus and mems from this cpuset. */
b45012955   Paul Jackson   hotplug cpu move ...
1978
  		mutex_lock(&callback_mutex);
300ed6cbb   Li Zefan   cpuset: convert c...
1979
  		cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
6ad4c1888   Peter Zijlstra   sched: Fix balanc...
1980
  			    cpu_active_mask);
956db3ca0   Cliff Wickman   hotplug cpu: move...
1981
1982
  		nodes_and(cp->mems_allowed, cp->mems_allowed,
  						node_states[N_HIGH_MEMORY]);
b45012955   Paul Jackson   hotplug cpu move ...
1983
1984
1985
  		mutex_unlock(&callback_mutex);
  
  		/* Move tasks from the empty cpuset to a parent */
300ed6cbb   Li Zefan   cpuset: convert c...
1986
  		if (cpumask_empty(cp->cpus_allowed) ||
b45012955   Paul Jackson   hotplug cpu move ...
1987
  		     nodes_empty(cp->mems_allowed))
956db3ca0   Cliff Wickman   hotplug cpu: move...
1988
  			remove_tasks_in_empty_cpuset(cp);
f9b4fb8da   Miao Xie   cpusets: update t...
1989
  		else {
4e74339af   Li Zefan   cpuset: avoid cha...
1990
  			update_tasks_cpumask(cp, NULL);
ee24d3797   Li Zefan   cpuset: fix unche...
1991
  			update_tasks_nodemask(cp, &oldmems, NULL);
f9b4fb8da   Miao Xie   cpusets: update t...
1992
  		}
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
1993
1994
1995
1996
  	}
  }
  
  /*
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
1997
1998
1999
2000
2001
   * The top_cpuset tracks what CPUs and Memory Nodes are online,
   * period.  This is necessary in order to make cpusets transparent
   * (of no affect) on systems that are actively using CPU hotplug
   * but making no active use of cpusets.
   *
38837fc75   Paul Jackson   [PATCH] cpuset: t...
2002
   * This routine ensures that top_cpuset.cpus_allowed tracks
3a101d054   Tejun Heo   sched: adjust whe...
2003
   * cpu_active_mask on each CPU hotplug (cpuhp) event.
cf417141c   Max Krasnyansky   sched, cpuset: re...
2004
2005
2006
   *
   * Called within get_online_cpus().  Needs to call cgroup_lock()
   * before calling generate_sched_domains().
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
2007
   */
0b2e918aa   Tejun Heo   sched, cpuset: Dr...
2008
  void cpuset_update_active_cpus(void)
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
2009
  {
cf417141c   Max Krasnyansky   sched, cpuset: re...
2010
  	struct sched_domain_attr *attr;
acc3f5d7c   Rusty Russell   cpumask: Partitio...
2011
  	cpumask_var_t *doms;
cf417141c   Max Krasnyansky   sched, cpuset: re...
2012
  	int ndoms;
cf417141c   Max Krasnyansky   sched, cpuset: re...
2013
  	cgroup_lock();
0b4217b3f   Li Zefan   cpuset: fix possi...
2014
  	mutex_lock(&callback_mutex);
6ad4c1888   Peter Zijlstra   sched: Fix balanc...
2015
  	cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
0b4217b3f   Li Zefan   cpuset: fix possi...
2016
  	mutex_unlock(&callback_mutex);
cf417141c   Max Krasnyansky   sched, cpuset: re...
2017
2018
2019
2020
2021
2022
  	scan_for_empty_cpusets(&top_cpuset);
  	ndoms = generate_sched_domains(&doms, &attr);
  	cgroup_unlock();
  
  	/* Have scheduler rebuild the domains */
  	partition_sched_domains(ndoms, doms, attr);
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
2023
  }
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
2024

b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
2025
  #ifdef CONFIG_MEMORY_HOTPLUG
38837fc75   Paul Jackson   [PATCH] cpuset: t...
2026
  /*
0e1e7c7a7   Christoph Lameter   Memoryless nodes:...
2027
   * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
cf417141c   Max Krasnyansky   sched, cpuset: re...
2028
2029
   * Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
   * See also the previous routine cpuset_track_online_cpus().
38837fc75   Paul Jackson   [PATCH] cpuset: t...
2030
   */
f481891fd   Miao Xie   cpuset: update to...
2031
2032
  static int cpuset_track_online_nodes(struct notifier_block *self,
  				unsigned long action, void *arg)
38837fc75   Paul Jackson   [PATCH] cpuset: t...
2033
  {
ee24d3797   Li Zefan   cpuset: fix unche...
2034
  	static nodemask_t oldmems;	/* protected by cgroup_mutex */
5ab116c93   Miao Xie   cpuset: fix the p...
2035

cf417141c   Max Krasnyansky   sched, cpuset: re...
2036
  	cgroup_lock();
f481891fd   Miao Xie   cpuset: update to...
2037
2038
  	switch (action) {
  	case MEM_ONLINE:
ee24d3797   Li Zefan   cpuset: fix unche...
2039
  		oldmems = top_cpuset.mems_allowed;
0b4217b3f   Li Zefan   cpuset: fix possi...
2040
  		mutex_lock(&callback_mutex);
f481891fd   Miao Xie   cpuset: update to...
2041
  		top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
0b4217b3f   Li Zefan   cpuset: fix possi...
2042
  		mutex_unlock(&callback_mutex);
ee24d3797   Li Zefan   cpuset: fix unche...
2043
  		update_tasks_nodemask(&top_cpuset, &oldmems, NULL);
5ab116c93   Miao Xie   cpuset: fix the p...
2044
2045
2046
2047
2048
2049
2050
  		break;
  	case MEM_OFFLINE:
  		/*
  		 * needn't update top_cpuset.mems_allowed explicitly because
  		 * scan_for_empty_cpusets() will update it.
  		 */
  		scan_for_empty_cpusets(&top_cpuset);
f481891fd   Miao Xie   cpuset: update to...
2051
2052
2053
2054
  		break;
  	default:
  		break;
  	}
cf417141c   Max Krasnyansky   sched, cpuset: re...
2055
  	cgroup_unlock();
53feb2976   Miao Xie   cpuset: alloc nod...
2056

f481891fd   Miao Xie   cpuset: update to...
2057
  	return NOTIFY_OK;
38837fc75   Paul Jackson   [PATCH] cpuset: t...
2058
2059
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2060
2061
2062
2063
2064
2065
2066
2067
  /**
   * cpuset_init_smp - initialize cpus_allowed
   *
   * Description: Finish top cpuset after cpu, node maps are initialized
   **/
  
  void __init cpuset_init_smp(void)
  {
6ad4c1888   Peter Zijlstra   sched: Fix balanc...
2068
  	cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
0e1e7c7a7   Christoph Lameter   Memoryless nodes:...
2069
  	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
2070

f481891fd   Miao Xie   cpuset: update to...
2071
  	hotplug_memory_notifier(cpuset_track_online_nodes, 10);
f90d4118b   Miao Xie   cpuset: fix possi...
2072
2073
2074
  
  	cpuset_wq = create_singlethread_workqueue("cpuset");
  	BUG_ON(!cpuset_wq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2075
2076
2077
  }
  
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2078
2079
   * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
   * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
6af866af3   Li Zefan   cpuset: remove re...
2080
   * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2081
   *
300ed6cbb   Li Zefan   cpuset: convert c...
2082
   * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2083
2084
2085
2086
   * attached to the specified @tsk.  Guaranteed to return some non-empty
   * subset of cpu_online_map, even if this means going outside the
   * tasks cpuset.
   **/
6af866af3   Li Zefan   cpuset: remove re...
2087
  void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2088
  {
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
2089
  	mutex_lock(&callback_mutex);
909d75a3b   Paul Jackson   [PATCH] cpuset: i...
2090
  	task_lock(tsk);
f9a86fcbb   Mike Travis   cpuset: modify cp...
2091
  	guarantee_online_cpus(task_cs(tsk), pmask);
909d75a3b   Paul Jackson   [PATCH] cpuset: i...
2092
  	task_unlock(tsk);
897f0b3c3   Oleg Nesterov   sched: Kill the b...
2093
  	mutex_unlock(&callback_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2094
  }
9084bb824   Oleg Nesterov   sched: Make selec...
2095
2096
2097
2098
2099
2100
2101
2102
  int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
  {
  	const struct cpuset *cs;
  	int cpu;
  
  	rcu_read_lock();
  	cs = task_cs(tsk);
  	if (cs)
1e1b6c511   KOSAKI Motohiro   cpuset: Fix cpuse...
2103
  		do_set_cpus_allowed(tsk, cs->cpus_allowed);
9084bb824   Oleg Nesterov   sched: Make selec...
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
  	rcu_read_unlock();
  
  	/*
  	 * We own tsk->cpus_allowed, nobody can change it under us.
  	 *
  	 * But we used cs && cs->cpus_allowed lockless and thus can
  	 * race with cgroup_attach_task() or update_cpumask() and get
  	 * the wrong tsk->cpus_allowed. However, both cases imply the
  	 * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
  	 * which takes task_rq_lock().
  	 *
  	 * If we are called after it dropped the lock we must see all
  	 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
  	 * set any mask even if it is not right from task_cs() pov,
  	 * the pending set_cpus_allowed_ptr() will fix things.
  	 */
  
  	cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
  	if (cpu >= nr_cpu_ids) {
  		/*
  		 * Either tsk->cpus_allowed is wrong (see above) or it
  		 * is actually empty. The latter case is only possible
  		 * if we are racing with remove_tasks_in_empty_cpuset().
  		 * Like above we can temporary set any mask and rely on
  		 * set_cpus_allowed_ptr() as synchronization point.
  		 */
1e1b6c511   KOSAKI Motohiro   cpuset: Fix cpuse...
2130
  		do_set_cpus_allowed(tsk, cpu_possible_mask);
9084bb824   Oleg Nesterov   sched: Make selec...
2131
2132
2133
2134
2135
  		cpu = cpumask_any(cpu_active_mask);
  	}
  
  	return cpu;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2136
2137
  void cpuset_init_current_mems_allowed(void)
  {
f9a86fcbb   Mike Travis   cpuset: modify cp...
2138
  	nodes_setall(current->mems_allowed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2139
  }
d9fd8a6d4   Randy Dunlap   [PATCH] kernel/cp...
2140
  /**
909d75a3b   Paul Jackson   [PATCH] cpuset: i...
2141
2142
2143
2144
2145
   * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset.
   * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed.
   *
   * Description: Returns the nodemask_t mems_allowed of the cpuset
   * attached to the specified @tsk.  Guaranteed to return some non-empty
0e1e7c7a7   Christoph Lameter   Memoryless nodes:...
2146
   * subset of node_states[N_HIGH_MEMORY], even if this means going outside the
909d75a3b   Paul Jackson   [PATCH] cpuset: i...
2147
2148
2149
2150
2151
2152
   * tasks cpuset.
   **/
  
  nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
  {
  	nodemask_t mask;
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
2153
  	mutex_lock(&callback_mutex);
909d75a3b   Paul Jackson   [PATCH] cpuset: i...
2154
  	task_lock(tsk);
8793d854e   Paul Menage   Task Control Grou...
2155
  	guarantee_online_mems(task_cs(tsk), &mask);
909d75a3b   Paul Jackson   [PATCH] cpuset: i...
2156
  	task_unlock(tsk);
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
2157
  	mutex_unlock(&callback_mutex);
909d75a3b   Paul Jackson   [PATCH] cpuset: i...
2158
2159
2160
2161
2162
  
  	return mask;
  }
  
  /**
19770b326   Mel Gorman   mm: filter based ...
2163
2164
   * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed
   * @nodemask: the nodemask to be checked
d9fd8a6d4   Randy Dunlap   [PATCH] kernel/cp...
2165
   *
19770b326   Mel Gorman   mm: filter based ...
2166
   * Are any of the nodes in the nodemask allowed in current->mems_allowed?
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2167
   */
19770b326   Mel Gorman   mm: filter based ...
2168
  int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2169
  {
19770b326   Mel Gorman   mm: filter based ...
2170
  	return nodes_intersects(*nodemask, current->mems_allowed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2171
  }
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2172
  /*
786083667   Paul Menage   Cpuset hardwall f...
2173
2174
2175
2176
   * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
   * mem_hardwall ancestor to the specified cpuset.  Call holding
   * callback_mutex.  If no ancestor is mem_exclusive or mem_hardwall
   * (an unusual configuration), then returns the root cpuset.
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2177
   */
786083667   Paul Menage   Cpuset hardwall f...
2178
  static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs)
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2179
  {
786083667   Paul Menage   Cpuset hardwall f...
2180
  	while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && cs->parent)
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2181
2182
2183
  		cs = cs->parent;
  	return cs;
  }
d9fd8a6d4   Randy Dunlap   [PATCH] kernel/cp...
2184
  /**
a1bc5a4ee   David Rientjes   cpusets: replace ...
2185
2186
   * cpuset_node_allowed_softwall - Can we allocate on a memory node?
   * @node: is this an allowed node?
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2187
   * @gfp_mask: memory allocation flags
d9fd8a6d4   Randy Dunlap   [PATCH] kernel/cp...
2188
   *
a1bc5a4ee   David Rientjes   cpusets: replace ...
2189
2190
2191
2192
2193
2194
   * If we're in interrupt, yes, we can always allocate.  If __GFP_THISNODE is
   * set, yes, we can always allocate.  If node is in our task's mems_allowed,
   * yes.  If it's not a __GFP_HARDWALL request and this node is in the nearest
   * hardwalled cpuset ancestor to this task's cpuset, yes.  If the task has been
   * OOM killed and has access to memory reserves as specified by the TIF_MEMDIE
   * flag, yes.
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2195
2196
   * Otherwise, no.
   *
a1bc5a4ee   David Rientjes   cpusets: replace ...
2197
2198
2199
   * If __GFP_HARDWALL is set, cpuset_node_allowed_softwall() reduces to
   * cpuset_node_allowed_hardwall().  Otherwise, cpuset_node_allowed_softwall()
   * might sleep, and might allow a node from an enclosing cpuset.
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2200
   *
a1bc5a4ee   David Rientjes   cpusets: replace ...
2201
2202
   * cpuset_node_allowed_hardwall() only handles the simpler case of hardwall
   * cpusets, and never sleeps.
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2203
2204
2205
2206
2207
2208
2209
   *
   * The __GFP_THISNODE placement logic is really handled elsewhere,
   * by forcibly using a zonelist starting at a specified node, and by
   * (in get_page_from_freelist()) refusing to consider the zones for
   * any node on the zonelist except the first.  By the time any such
   * calls get to this routine, we should just shut up and say 'yes'.
   *
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2210
   * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
c596d9f32   David Rientjes   cpusets: allow TI...
2211
2212
   * and do not allow allocations outside the current tasks cpuset
   * unless the task has been OOM killed as is marked TIF_MEMDIE.
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2213
   * GFP_KERNEL allocations are not so marked, so can escape to the
786083667   Paul Menage   Cpuset hardwall f...
2214
   * nearest enclosing hardwalled ancestor cpuset.
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2215
   *
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2216
2217
2218
2219
2220
2221
2222
   * Scanning up parent cpusets requires callback_mutex.  The
   * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
   * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
   * current tasks mems_allowed came up empty on the first pass over
   * the zonelist.  So only GFP_KERNEL allocations, if all nodes in the
   * cpuset are short of memory, might require taking the callback_mutex
   * mutex.
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2223
   *
36be57ffe   Paul Jackson   [PATCH] cpuset: u...
2224
   * The first call here from mm/page_alloc:get_page_from_freelist()
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2225
2226
2227
   * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
   * so no allocation on a node outside the cpuset is allowed (unless
   * in interrupt, of course).
36be57ffe   Paul Jackson   [PATCH] cpuset: u...
2228
2229
2230
2231
2232
2233
   *
   * The second pass through get_page_from_freelist() doesn't even call
   * here for GFP_ATOMIC calls.  For those calls, the __alloc_pages()
   * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set
   * in alloc_flags.  That logic and the checks below have the combined
   * affect that:
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2234
2235
   *	in_interrupt - any node ok (current task context irrelevant)
   *	GFP_ATOMIC   - any node ok
c596d9f32   David Rientjes   cpusets: allow TI...
2236
   *	TIF_MEMDIE   - any node ok
786083667   Paul Menage   Cpuset hardwall f...
2237
   *	GFP_KERNEL   - any node in enclosing hardwalled cpuset ok
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2238
   *	GFP_USER     - only nodes in current tasks mems allowed ok.
36be57ffe   Paul Jackson   [PATCH] cpuset: u...
2239
2240
   *
   * Rule:
a1bc5a4ee   David Rientjes   cpusets: replace ...
2241
   *    Don't call cpuset_node_allowed_softwall if you can't sleep, unless you
36be57ffe   Paul Jackson   [PATCH] cpuset: u...
2242
2243
   *    pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
   *    the code that might scan up ancestor cpusets and sleep.
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2244
   */
a1bc5a4ee   David Rientjes   cpusets: replace ...
2245
  int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2246
  {
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2247
  	const struct cpuset *cs;	/* current cpuset ancestors */
29afd49b7   Paul Jackson   [PATCH] cpuset: r...
2248
  	int allowed;			/* is allocation in zone z allowed? */
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2249

9b819d204   Christoph Lameter   [PATCH] Add __GFP...
2250
  	if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2251
  		return 1;
92d1dbd27   Paul Jackson   [PATCH] cpuset: m...
2252
  	might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2253
2254
  	if (node_isset(node, current->mems_allowed))
  		return 1;
c596d9f32   David Rientjes   cpusets: allow TI...
2255
2256
2257
2258
2259
2260
  	/*
  	 * Allow tasks that have access to memory reserves because they have
  	 * been OOM killed to get memory anywhere.
  	 */
  	if (unlikely(test_thread_flag(TIF_MEMDIE)))
  		return 1;
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2261
2262
  	if (gfp_mask & __GFP_HARDWALL)	/* If hardwall request, stop here */
  		return 0;
5563e7707   Bob Picco   [PATCH] cpuset: f...
2263
2264
  	if (current->flags & PF_EXITING) /* Let dying task have memory */
  		return 1;
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2265
  	/* Not hardwall and node outside mems_allowed: scan up cpusets */
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
2266
  	mutex_lock(&callback_mutex);
053199edf   Paul Jackson   [PATCH] cpusets: ...
2267

053199edf   Paul Jackson   [PATCH] cpusets: ...
2268
  	task_lock(current);
786083667   Paul Menage   Cpuset hardwall f...
2269
  	cs = nearest_hardwall_ancestor(task_cs(current));
053199edf   Paul Jackson   [PATCH] cpusets: ...
2270
  	task_unlock(current);
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2271
  	allowed = node_isset(node, cs->mems_allowed);
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
2272
  	mutex_unlock(&callback_mutex);
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2273
  	return allowed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2274
  }
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2275
  /*
a1bc5a4ee   David Rientjes   cpusets: replace ...
2276
2277
   * cpuset_node_allowed_hardwall - Can we allocate on a memory node?
   * @node: is this an allowed node?
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2278
2279
   * @gfp_mask: memory allocation flags
   *
a1bc5a4ee   David Rientjes   cpusets: replace ...
2280
2281
2282
2283
2284
   * If we're in interrupt, yes, we can always allocate.  If __GFP_THISNODE is
   * set, yes, we can always allocate.  If node is in our task's mems_allowed,
   * yes.  If the task has been OOM killed and has access to memory reserves as
   * specified by the TIF_MEMDIE flag, yes.
   * Otherwise, no.
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2285
2286
2287
2288
2289
2290
2291
   *
   * The __GFP_THISNODE placement logic is really handled elsewhere,
   * by forcibly using a zonelist starting at a specified node, and by
   * (in get_page_from_freelist()) refusing to consider the zones for
   * any node on the zonelist except the first.  By the time any such
   * calls get to this routine, we should just shut up and say 'yes'.
   *
a1bc5a4ee   David Rientjes   cpusets: replace ...
2292
2293
   * Unlike the cpuset_node_allowed_softwall() variant, above,
   * this variant requires that the node be in the current task's
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2294
2295
2296
2297
   * mems_allowed or that we're in interrupt.  It does not scan up the
   * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset.
   * It never sleeps.
   */
a1bc5a4ee   David Rientjes   cpusets: replace ...
2298
  int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2299
  {
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2300
2301
  	if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
  		return 1;
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2302
2303
  	if (node_isset(node, current->mems_allowed))
  		return 1;
dedf8b79e   Daniel Walker   whitespace fixes:...
2304
2305
2306
2307
2308
2309
  	/*
  	 * Allow tasks that have access to memory reserves because they have
  	 * been OOM killed to get memory anywhere.
  	 */
  	if (unlikely(test_thread_flag(TIF_MEMDIE)))
  		return 1;
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2310
2311
  	return 0;
  }
ef08e3b49   Paul Jackson   [PATCH] cpusets: ...
2312
  /**
505970b96   Paul Jackson   [PATCH] cpuset oo...
2313
2314
2315
2316
2317
2318
2319
   * cpuset_unlock - release lock on cpuset changes
   *
   * Undo the lock taken in a previous cpuset_lock() call.
   */
  
  void cpuset_unlock(void)
  {
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
2320
  	mutex_unlock(&callback_mutex);
505970b96   Paul Jackson   [PATCH] cpuset oo...
2321
2322
2323
  }
  
  /**
6adef3ebe   Jack Steiner   cpusets: new roun...
2324
2325
   * cpuset_mem_spread_node() - On which node to begin search for a file page
   * cpuset_slab_spread_node() - On which node to begin search for a slab page
825a46af5   Paul Jackson   [PATCH] cpuset me...
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
   *
   * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for
   * tasks in a cpuset with is_spread_page or is_spread_slab set),
   * and if the memory allocation used cpuset_mem_spread_node()
   * to determine on which node to start looking, as it will for
   * certain page cache or slab cache pages such as used for file
   * system buffers and inode caches, then instead of starting on the
   * local node to look for a free page, rather spread the starting
   * node around the tasks mems_allowed nodes.
   *
   * We don't have to worry about the returned node being offline
   * because "it can't happen", and even if it did, it would be ok.
   *
   * The routines calling guarantee_online_mems() are careful to
   * only set nodes in task->mems_allowed that are online.  So it
   * should not be possible for the following code to return an
   * offline node.  But if it did, that would be ok, as this routine
   * is not returning the node where the allocation must be, only
   * the node where the search should start.  The zonelist passed to
   * __alloc_pages() will include all nodes.  If the slab allocator
   * is passed an offline node, it will fall back to the local node.
   * See kmem_cache_alloc_node().
   */
6adef3ebe   Jack Steiner   cpusets: new roun...
2349
  static int cpuset_spread_node(int *rotor)
825a46af5   Paul Jackson   [PATCH] cpuset me...
2350
2351
  {
  	int node;
6adef3ebe   Jack Steiner   cpusets: new roun...
2352
  	node = next_node(*rotor, current->mems_allowed);
825a46af5   Paul Jackson   [PATCH] cpuset me...
2353
2354
  	if (node == MAX_NUMNODES)
  		node = first_node(current->mems_allowed);
6adef3ebe   Jack Steiner   cpusets: new roun...
2355
  	*rotor = node;
825a46af5   Paul Jackson   [PATCH] cpuset me...
2356
2357
  	return node;
  }
6adef3ebe   Jack Steiner   cpusets: new roun...
2358
2359
2360
  
  int cpuset_mem_spread_node(void)
  {
778d3b0ff   Michal Hocko   cpusets: randomiz...
2361
2362
2363
  	if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE)
  		current->cpuset_mem_spread_rotor =
  			node_random(&current->mems_allowed);
6adef3ebe   Jack Steiner   cpusets: new roun...
2364
2365
2366
2367
2368
  	return cpuset_spread_node(&current->cpuset_mem_spread_rotor);
  }
  
  int cpuset_slab_spread_node(void)
  {
778d3b0ff   Michal Hocko   cpusets: randomiz...
2369
2370
2371
  	if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE)
  		current->cpuset_slab_spread_rotor =
  			node_random(&current->mems_allowed);
6adef3ebe   Jack Steiner   cpusets: new roun...
2372
2373
  	return cpuset_spread_node(&current->cpuset_slab_spread_rotor);
  }
825a46af5   Paul Jackson   [PATCH] cpuset me...
2374
2375
2376
  EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
  
  /**
bbe373f2c   David Rientjes   oom: compare cpus...
2377
2378
2379
2380
2381
2382
2383
2384
   * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's?
   * @tsk1: pointer to task_struct of some task.
   * @tsk2: pointer to task_struct of some other task.
   *
   * Description: Return true if @tsk1's mems_allowed intersects the
   * mems_allowed of @tsk2.  Used by the OOM killer to determine if
   * one of the task's memory usage might impact the memory available
   * to the other.
ef08e3b49   Paul Jackson   [PATCH] cpusets: ...
2385
   **/
bbe373f2c   David Rientjes   oom: compare cpus...
2386
2387
  int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
  				   const struct task_struct *tsk2)
ef08e3b49   Paul Jackson   [PATCH] cpusets: ...
2388
  {
bbe373f2c   David Rientjes   oom: compare cpus...
2389
  	return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
ef08e3b49   Paul Jackson   [PATCH] cpusets: ...
2390
  }
75aa19941   David Rientjes   oom: print trigge...
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
  /**
   * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
   * @task: pointer to task_struct of some task.
   *
   * Description: Prints @task's name, cpuset name, and cached copy of its
   * mems_allowed to the kernel log.  Must hold task_lock(task) to allow
   * dereferencing task_cs(task).
   */
  void cpuset_print_task_mems_allowed(struct task_struct *tsk)
  {
  	struct dentry *dentry;
  
  	dentry = task_cs(tsk)->css.cgroup->dentry;
  	spin_lock(&cpuset_buffer_lock);
  	snprintf(cpuset_name, CPUSET_NAME_LEN,
  		 dentry ? (const char *)dentry->d_name.name : "/");
  	nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
  			   tsk->mems_allowed);
  	printk(KERN_INFO "%s cpuset=%s mems_allowed=%s
  ",
  	       tsk->comm, cpuset_name, cpuset_nodelist);
  	spin_unlock(&cpuset_buffer_lock);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2414
  /*
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2415
2416
2417
2418
   * Collection of memory_pressure is suppressed unless
   * this flag is enabled by writing "1" to the special
   * cpuset file 'memory_pressure_enabled' in the root cpuset.
   */
c5b2aff89   Paul Jackson   [PATCH] cpuset: m...
2419
  int cpuset_memory_pressure_enabled __read_mostly;
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
  
  /**
   * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
   *
   * Keep a running average of the rate of synchronous (direct)
   * page reclaim efforts initiated by tasks in each cpuset.
   *
   * This represents the rate at which some task in the cpuset
   * ran low on memory on all nodes it was allowed to use, and
   * had to enter the kernels page reclaim code in an effort to
   * create more free memory by tossing clean pages or swapping
   * or writing dirty pages.
   *
   * Display to user space in the per-cpuset read-only file
   * "memory_pressure".  Value displayed is an integer
   * representing the recent rate of entry into the synchronous
   * (direct) page reclaim by any task attached to the cpuset.
   **/
  
  void __cpuset_memory_pressure_bump(void)
  {
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2441
  	task_lock(current);
8793d854e   Paul Menage   Task Control Grou...
2442
  	fmeter_markevent(&task_cs(current)->fmeter);
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2443
2444
  	task_unlock(current);
  }
8793d854e   Paul Menage   Task Control Grou...
2445
  #ifdef CONFIG_PROC_PID_CPUSET
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2446
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2447
2448
2449
   * proc_cpuset_show()
   *  - Print tasks cpuset path into seq_file.
   *  - Used for /proc/<pid>/cpuset.
053199edf   Paul Jackson   [PATCH] cpusets: ...
2450
2451
   *  - No need to task_lock(tsk) on this tsk->cpuset reference, as it
   *    doesn't really matter if tsk->cpuset changes after we read it,
c8d9c90c7   Paul Jackson   hotplug cpu: move...
2452
   *    and we take cgroup_mutex, keeping cpuset_attach() from changing it
2df167a30   Paul Menage   cgroups: update c...
2453
   *    anyway.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2454
   */
029190c51   Paul Jackson   cpuset sched_load...
2455
  static int proc_cpuset_show(struct seq_file *m, void *unused_v)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2456
  {
13b41b094   Eric W. Biederman   [PATCH] proc: Use...
2457
  	struct pid *pid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2458
2459
  	struct task_struct *tsk;
  	char *buf;
8793d854e   Paul Menage   Task Control Grou...
2460
  	struct cgroup_subsys_state *css;
99f895518   Eric W. Biederman   [PATCH] proc: don...
2461
  	int retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2462

99f895518   Eric W. Biederman   [PATCH] proc: don...
2463
  	retval = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2464
2465
  	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
  	if (!buf)
99f895518   Eric W. Biederman   [PATCH] proc: don...
2466
2467
2468
  		goto out;
  
  	retval = -ESRCH;
13b41b094   Eric W. Biederman   [PATCH] proc: Use...
2469
2470
  	pid = m->private;
  	tsk = get_pid_task(pid, PIDTYPE_PID);
99f895518   Eric W. Biederman   [PATCH] proc: don...
2471
2472
  	if (!tsk)
  		goto out_free;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2473

99f895518   Eric W. Biederman   [PATCH] proc: don...
2474
  	retval = -EINVAL;
8793d854e   Paul Menage   Task Control Grou...
2475
2476
2477
  	cgroup_lock();
  	css = task_subsys_state(tsk, cpuset_subsys_id);
  	retval = cgroup_path(css->cgroup, buf, PAGE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2478
  	if (retval < 0)
99f895518   Eric W. Biederman   [PATCH] proc: don...
2479
  		goto out_unlock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2480
2481
2482
  	seq_puts(m, buf);
  	seq_putc(m, '
  ');
99f895518   Eric W. Biederman   [PATCH] proc: don...
2483
  out_unlock:
8793d854e   Paul Menage   Task Control Grou...
2484
  	cgroup_unlock();
99f895518   Eric W. Biederman   [PATCH] proc: don...
2485
2486
  	put_task_struct(tsk);
  out_free:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2487
  	kfree(buf);
99f895518   Eric W. Biederman   [PATCH] proc: don...
2488
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2489
2490
2491
2492
2493
  	return retval;
  }
  
  static int cpuset_open(struct inode *inode, struct file *file)
  {
13b41b094   Eric W. Biederman   [PATCH] proc: Use...
2494
2495
  	struct pid *pid = PROC_I(inode)->pid;
  	return single_open(file, proc_cpuset_show, pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2496
  }
9a32144e9   Arjan van de Ven   [PATCH] mark stru...
2497
  const struct file_operations proc_cpuset_operations = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2498
2499
2500
2501
2502
  	.open		= cpuset_open,
  	.read		= seq_read,
  	.llseek		= seq_lseek,
  	.release	= single_release,
  };
8793d854e   Paul Menage   Task Control Grou...
2503
  #endif /* CONFIG_PROC_PID_CPUSET */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2504

d01d48278   Heiko Carstens   sched: Always sho...
2505
  /* Display task mems_allowed in /proc/<pid>/status file. */
df5f8314c   Eric W. Biederman   proc: seqfile con...
2506
2507
  void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
  {
df5f8314c   Eric W. Biederman   proc: seqfile con...
2508
  	seq_printf(m, "Mems_allowed:\t");
30e8e1360   Lai Jiangshan   cpuset: use seq_*...
2509
  	seq_nodemask(m, &task->mems_allowed);
df5f8314c   Eric W. Biederman   proc: seqfile con...
2510
2511
  	seq_printf(m, "
  ");
39106dcf8   Mike Travis   cpumask: use new ...
2512
  	seq_printf(m, "Mems_allowed_list:\t");
30e8e1360   Lai Jiangshan   cpuset: use seq_*...
2513
  	seq_nodemask_list(m, &task->mems_allowed);
39106dcf8   Mike Travis   cpumask: use new ...
2514
2515
  	seq_printf(m, "
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2516
  }