Blame view

kernel/cpuset.c 76.5 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
  /*
   *  kernel/cpuset.c
   *
   *  Processor and Memory placement constraints for sets of tasks.
   *
   *  Copyright (C) 2003 BULL SA.
029190c51   Paul Jackson   cpuset sched_load...
7
   *  Copyright (C) 2004-2007 Silicon Graphics, Inc.
8793d854e   Paul Menage   Task Control Grou...
8
   *  Copyright (C) 2006 Google, Inc
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
11
   *
   *  Portions derived from Patrick Mochel's sysfs code.
   *  sysfs is Copyright (c) 2001-3 Patrick Mochel
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
   *
825a46af5   Paul Jackson   [PATCH] cpuset me...
13
   *  2003-10-10 Written by Simon Derr.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
   *  2003-10-22 Updates by Stephen Hemminger.
825a46af5   Paul Jackson   [PATCH] cpuset me...
15
   *  2004 May-July Rework by Paul Jackson.
8793d854e   Paul Menage   Task Control Grou...
16
   *  2006 Rework by Paul Menage to use generic cgroups
cf417141c   Max Krasnyansky   sched, cpuset: re...
17
18
   *  2008 Rework of the scheduler domains and CPU hotplug handling
   *       by Max Krasnyansky
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
20
21
22
23
   *
   *  This file is subject to the terms and conditions of the GNU General Public
   *  License.  See the file COPYING in the main directory of the Linux
   *  distribution for more details.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
25
26
27
28
29
30
31
32
33
34
35
  #include <linux/cpu.h>
  #include <linux/cpumask.h>
  #include <linux/cpuset.h>
  #include <linux/err.h>
  #include <linux/errno.h>
  #include <linux/file.h>
  #include <linux/fs.h>
  #include <linux/init.h>
  #include <linux/interrupt.h>
  #include <linux/kernel.h>
  #include <linux/kmod.h>
  #include <linux/list.h>
68860ec10   Paul Jackson   [PATCH] cpusets: ...
36
  #include <linux/mempolicy.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37
  #include <linux/mm.h>
f481891fd   Miao Xie   cpuset: update to...
38
  #include <linux/memory.h>
9984de1a5   Paul Gortmaker   kernel: Map most ...
39
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40
41
42
43
  #include <linux/mount.h>
  #include <linux/namei.h>
  #include <linux/pagemap.h>
  #include <linux/proc_fs.h>
6b9c2603c   Paul Jackson   [PATCH] cpuset: u...
44
  #include <linux/rcupdate.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
  #include <linux/sched.h>
  #include <linux/seq_file.h>
22fb52dd7   David Quigley   [PATCH] SELinux: ...
47
  #include <linux/security.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
48
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
49
50
51
52
  #include <linux/spinlock.h>
  #include <linux/stat.h>
  #include <linux/string.h>
  #include <linux/time.h>
d2b436580   Arnd Bergmann   cpuset: Replace a...
53
  #include <linux/time64.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
54
55
56
57
  #include <linux/backing-dev.h>
  #include <linux/sort.h>
  
  #include <asm/uaccess.h>
60063497a   Arun Sharma   atomic: use <linu...
58
  #include <linux/atomic.h>
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
59
  #include <linux/mutex.h>
956db3ca0   Cliff Wickman   hotplug cpu: move...
60
  #include <linux/cgroup.h>
e44193d39   Li Zefan   cpuset: let hotpl...
61
  #include <linux/wait.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62

002f29062   Vlastimil Babka   cpuset: use stati...
63
  DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
202f72d5d   Paul Jackson   [PATCH] cpuset: n...
64

3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
65
66
67
68
69
  /* See "Frequency meter" comments, below. */
  
  struct fmeter {
  	int cnt;		/* unprocessed events count */
  	int val;		/* most recent output value */
d2b436580   Arnd Bergmann   cpuset: Replace a...
70
  	time64_t time;		/* clock (secs) when val computed */
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
71
72
  	spinlock_t lock;	/* guards read or write of above */
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
73
  struct cpuset {
8793d854e   Paul Menage   Task Control Grou...
74
  	struct cgroup_subsys_state css;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
75
  	unsigned long flags;		/* "unsigned long" so bitops work */
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
76

7e88291be   Li Zefan   cpuset: make cs->...
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
  	/*
  	 * On default hierarchy:
  	 *
  	 * The user-configured masks can only be changed by writing to
  	 * cpuset.cpus and cpuset.mems, and won't be limited by the
  	 * parent masks.
  	 *
  	 * The effective masks is the real masks that apply to the tasks
  	 * in the cpuset. They may be changed if the configured masks are
  	 * changed or hotplug happens.
  	 *
  	 * effective_mask == configured_mask & parent's effective_mask,
  	 * and if it ends up empty, it will inherit the parent's mask.
  	 *
  	 *
  	 * On legacy hierachy:
  	 *
  	 * The user-configured masks are always the same with effective masks.
  	 */
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
96
97
98
99
100
101
102
  	/* user-configured CPUs and Memory Nodes allow to tasks */
  	cpumask_var_t cpus_allowed;
  	nodemask_t mems_allowed;
  
  	/* effective CPUs and Memory Nodes allow to tasks */
  	cpumask_var_t effective_cpus;
  	nodemask_t effective_mems;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
103

33ad801df   Li Zefan   cpuset: record ol...
104
105
106
107
108
109
110
111
112
113
114
  	/*
  	 * This is old Memory Nodes tasks took on.
  	 *
  	 * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
  	 * - A new cpuset's old_mems_allowed is initialized when some
  	 *   task is moved into it.
  	 * - old_mems_allowed is used in cpuset_migrate_mm() when we change
  	 *   cpuset.mems_allowed and have tasks' nodemask updated, and
  	 *   then old_mems_allowed is updated to mems_allowed.
  	 */
  	nodemask_t old_mems_allowed;
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
115
  	struct fmeter fmeter;		/* memory_pressure filter */
029190c51   Paul Jackson   cpuset sched_load...
116

452477fa6   Tejun Heo   cpuset: pin down ...
117
118
119
120
121
  	/*
  	 * Tasks are being attached to this cpuset.  Used to prevent
  	 * zeroing cpus/mems_allowed between ->can_attach() and ->attach().
  	 */
  	int attach_in_progress;
029190c51   Paul Jackson   cpuset sched_load...
122
123
  	/* partition number for rebuild_sched_domains() */
  	int pn;
956db3ca0   Cliff Wickman   hotplug cpu: move...
124

1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
125
126
  	/* for custom sched domain */
  	int relax_domain_level;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
127
  };
a7c6d554a   Tejun Heo   cgroup: add/updat...
128
  static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
8793d854e   Paul Menage   Task Control Grou...
129
  {
a7c6d554a   Tejun Heo   cgroup: add/updat...
130
  	return css ? container_of(css, struct cpuset, css) : NULL;
8793d854e   Paul Menage   Task Control Grou...
131
132
133
134
135
  }
  
  /* Retrieve the cpuset for a task */
  static inline struct cpuset *task_cs(struct task_struct *task)
  {
073219e99   Tejun Heo   cgroup: clean up ...
136
  	return css_cs(task_css(task, cpuset_cgrp_id));
8793d854e   Paul Menage   Task Control Grou...
137
  }
8793d854e   Paul Menage   Task Control Grou...
138

c9710d801   Tejun Heo   cpuset: drop "con...
139
  static inline struct cpuset *parent_cs(struct cpuset *cs)
c431069fe   Tejun Heo   cpuset: remove cp...
140
  {
5c9d535b8   Tejun Heo   cgroup: remove cs...
141
  	return css_cs(cs->css.parent);
c431069fe   Tejun Heo   cpuset: remove cp...
142
  }
b246272ec   David Rientjes   cpusets: stall wh...
143
144
145
146
147
148
149
150
151
152
153
  #ifdef CONFIG_NUMA
  static inline bool task_has_mempolicy(struct task_struct *task)
  {
  	return task->mempolicy;
  }
  #else
  static inline bool task_has_mempolicy(struct task_struct *task)
  {
  	return false;
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
154
155
  /* bits in struct cpuset flags field */
  typedef enum {
efeb77b2f   Tejun Heo   cpuset: introduce...
156
  	CS_ONLINE,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
157
158
  	CS_CPU_EXCLUSIVE,
  	CS_MEM_EXCLUSIVE,
786083667   Paul Menage   Cpuset hardwall f...
159
  	CS_MEM_HARDWALL,
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
160
  	CS_MEMORY_MIGRATE,
029190c51   Paul Jackson   cpuset sched_load...
161
  	CS_SCHED_LOAD_BALANCE,
825a46af5   Paul Jackson   [PATCH] cpuset me...
162
163
  	CS_SPREAD_PAGE,
  	CS_SPREAD_SLAB,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
164
165
166
  } cpuset_flagbits_t;
  
  /* convenient tests for these bits */
efeb77b2f   Tejun Heo   cpuset: introduce...
167
168
169
170
  static inline bool is_cpuset_online(const struct cpuset *cs)
  {
  	return test_bit(CS_ONLINE, &cs->flags);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
171
172
  static inline int is_cpu_exclusive(const struct cpuset *cs)
  {
7b5b9ef0e   Paul Jackson   [PATCH] cpuset cl...
173
  	return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
174
175
176
177
  }
  
  static inline int is_mem_exclusive(const struct cpuset *cs)
  {
7b5b9ef0e   Paul Jackson   [PATCH] cpuset cl...
178
  	return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
179
  }
786083667   Paul Menage   Cpuset hardwall f...
180
181
182
183
  static inline int is_mem_hardwall(const struct cpuset *cs)
  {
  	return test_bit(CS_MEM_HARDWALL, &cs->flags);
  }
029190c51   Paul Jackson   cpuset sched_load...
184
185
186
187
  static inline int is_sched_load_balance(const struct cpuset *cs)
  {
  	return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
  }
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
188
189
  static inline int is_memory_migrate(const struct cpuset *cs)
  {
7b5b9ef0e   Paul Jackson   [PATCH] cpuset cl...
190
  	return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
191
  }
825a46af5   Paul Jackson   [PATCH] cpuset me...
192
193
194
195
196
197
198
199
200
  static inline int is_spread_page(const struct cpuset *cs)
  {
  	return test_bit(CS_SPREAD_PAGE, &cs->flags);
  }
  
  static inline int is_spread_slab(const struct cpuset *cs)
  {
  	return test_bit(CS_SPREAD_SLAB, &cs->flags);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
201
  static struct cpuset top_cpuset = {
efeb77b2f   Tejun Heo   cpuset: introduce...
202
203
  	.flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) |
  		  (1 << CS_MEM_EXCLUSIVE)),
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
204
  };
ae8086ce1   Tejun Heo   cpuset: introduce...
205
206
207
  /**
   * cpuset_for_each_child - traverse online children of a cpuset
   * @child_cs: loop cursor pointing to the current child
492eb21b9   Tejun Heo   cgroup: make hier...
208
   * @pos_css: used for iteration
ae8086ce1   Tejun Heo   cpuset: introduce...
209
210
211
212
213
   * @parent_cs: target cpuset to walk children of
   *
   * Walk @child_cs through the online children of @parent_cs.  Must be used
   * with RCU read locked.
   */
492eb21b9   Tejun Heo   cgroup: make hier...
214
215
216
  #define cpuset_for_each_child(child_cs, pos_css, parent_cs)		\
  	css_for_each_child((pos_css), &(parent_cs)->css)		\
  		if (is_cpuset_online(((child_cs) = css_cs((pos_css)))))
ae8086ce1   Tejun Heo   cpuset: introduce...
217

fc560a26a   Tejun Heo   cpuset: replace c...
218
219
220
  /**
   * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
   * @des_cs: loop cursor pointing to the current descendant
492eb21b9   Tejun Heo   cgroup: make hier...
221
   * @pos_css: used for iteration
fc560a26a   Tejun Heo   cpuset: replace c...
222
223
224
   * @root_cs: target cpuset to walk ancestor of
   *
   * Walk @des_cs through the online descendants of @root_cs.  Must be used
492eb21b9   Tejun Heo   cgroup: make hier...
225
   * with RCU read locked.  The caller may modify @pos_css by calling
bd8815a6d   Tejun Heo   cgroup: make css_...
226
227
   * css_rightmost_descendant() to skip subtree.  @root_cs is included in the
   * iteration and the first node to be visited.
fc560a26a   Tejun Heo   cpuset: replace c...
228
   */
492eb21b9   Tejun Heo   cgroup: make hier...
229
230
231
  #define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs)	\
  	css_for_each_descendant_pre((pos_css), &(root_cs)->css)		\
  		if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
fc560a26a   Tejun Heo   cpuset: replace c...
232

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
  /*
8447a0fee   Vladimir Davydov   cpuset: convert c...
234
235
236
237
   * There are two global locks guarding cpuset structures - cpuset_mutex and
   * callback_lock. We also require taking task_lock() when dereferencing a
   * task's cpuset pointer. See "The task_lock() exception", at the end of this
   * comment.
5d21cc2db   Tejun Heo   cpuset: replace c...
238
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
239
   * A task must hold both locks to modify cpusets.  If a task holds
5d21cc2db   Tejun Heo   cpuset: replace c...
240
   * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it
8447a0fee   Vladimir Davydov   cpuset: convert c...
241
   * is the only task able to also acquire callback_lock and be able to
5d21cc2db   Tejun Heo   cpuset: replace c...
242
243
244
   * modify cpusets.  It can perform various checks on the cpuset structure
   * first, knowing nothing will change.  It can also allocate memory while
   * just holding cpuset_mutex.  While it is performing these checks, various
8447a0fee   Vladimir Davydov   cpuset: convert c...
245
246
   * callback routines can briefly acquire callback_lock to query cpusets.
   * Once it is ready to make the changes, it takes callback_lock, blocking
5d21cc2db   Tejun Heo   cpuset: replace c...
247
   * everyone else.
053199edf   Paul Jackson   [PATCH] cpusets: ...
248
249
   *
   * Calls to the kernel memory allocator can not be made while holding
8447a0fee   Vladimir Davydov   cpuset: convert c...
250
   * callback_lock, as that would risk double tripping on callback_lock
053199edf   Paul Jackson   [PATCH] cpusets: ...
251
252
253
   * from one of the callbacks into the cpuset code from within
   * __alloc_pages().
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
254
   * If a task is only holding callback_lock, then it has read-only
053199edf   Paul Jackson   [PATCH] cpusets: ...
255
256
   * access to cpusets.
   *
58568d2a8   Miao Xie   cpuset,mm: update...
257
258
259
   * Now, the task_struct fields mems_allowed and mempolicy may be changed
   * by other task, we use alloc_lock in the task_struct fields to protect
   * them.
053199edf   Paul Jackson   [PATCH] cpusets: ...
260
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
261
   * The cpuset_common_file_read() handlers only hold callback_lock across
053199edf   Paul Jackson   [PATCH] cpusets: ...
262
263
264
   * small pieces of code, such as when reading out possibly multi-word
   * cpumasks and nodemasks.
   *
2df167a30   Paul Menage   cgroups: update c...
265
266
   * Accessing a task's cpuset should be done in accordance with the
   * guidelines for accessing subsystem state in kernel/cgroup.c
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
267
   */
5d21cc2db   Tejun Heo   cpuset: replace c...
268
  static DEFINE_MUTEX(cpuset_mutex);
8447a0fee   Vladimir Davydov   cpuset: convert c...
269
  static DEFINE_SPINLOCK(callback_lock);
4247bdc60   Paul Jackson   [PATCH] cpuset se...
270

e93ad19d0   Tejun Heo   cpuset: make mm m...
271
  static struct workqueue_struct *cpuset_migrate_mm_wq;
cf417141c   Max Krasnyansky   sched, cpuset: re...
272
  /*
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
273
274
275
   * CPU / memory hotplug is handled asynchronously.
   */
  static void cpuset_hotplug_workfn(struct work_struct *work);
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
276
  static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
e44193d39   Li Zefan   cpuset: let hotpl...
277
  static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq);
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
278
  /*
cf417141c   Max Krasnyansky   sched, cpuset: re...
279
   * This is ugly, but preserves the userspace API for existing cpuset
8793d854e   Paul Menage   Task Control Grou...
280
   * users. If someone tries to mount the "cpuset" filesystem, we
cf417141c   Max Krasnyansky   sched, cpuset: re...
281
282
   * silently switch it to mount "cgroup" instead
   */
f7e835710   Al Viro   convert cgroup an...
283
284
  static struct dentry *cpuset_mount(struct file_system_type *fs_type,
  			 int flags, const char *unused_dev_name, void *data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
285
  {
8793d854e   Paul Menage   Task Control Grou...
286
  	struct file_system_type *cgroup_fs = get_fs_type("cgroup");
f7e835710   Al Viro   convert cgroup an...
287
  	struct dentry *ret = ERR_PTR(-ENODEV);
8793d854e   Paul Menage   Task Control Grou...
288
289
290
291
  	if (cgroup_fs) {
  		char mountopts[] =
  			"cpuset,noprefix,"
  			"release_agent=/sbin/cpuset_release_agent";
f7e835710   Al Viro   convert cgroup an...
292
293
  		ret = cgroup_fs->mount(cgroup_fs, flags,
  					   unused_dev_name, mountopts);
8793d854e   Paul Menage   Task Control Grou...
294
295
296
  		put_filesystem(cgroup_fs);
  	}
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
297
298
299
300
  }
  
  static struct file_system_type cpuset_fs_type = {
  	.name = "cpuset",
f7e835710   Al Viro   convert cgroup an...
301
  	.mount = cpuset_mount,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
302
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
303
  /*
300ed6cbb   Li Zefan   cpuset: convert c...
304
   * Return in pmask the portion of a cpusets's cpus_allowed that
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
305
   * are online.  If none are online, walk up the cpuset hierarchy
28b89b9e6   Joonwoo Park   cpuset: handle ra...
306
   * until we find one that does have some online cpus.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
307
308
   *
   * One way or another, we guarantee to return some non-empty subset
5f054e31c   Rusty Russell   documentation: re...
309
   * of cpu_online_mask.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
310
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
311
   * Call with callback_lock or cpuset_mutex held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
312
   */
c9710d801   Tejun Heo   cpuset: drop "con...
313
  static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
314
  {
28b89b9e6   Joonwoo Park   cpuset: handle ra...
315
  	while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) {
c431069fe   Tejun Heo   cpuset: remove cp...
316
  		cs = parent_cs(cs);
28b89b9e6   Joonwoo Park   cpuset: handle ra...
317
318
319
320
321
322
323
324
325
326
327
328
  		if (unlikely(!cs)) {
  			/*
  			 * The top cpuset doesn't have any online cpu as a
  			 * consequence of a race between cpuset_hotplug_work
  			 * and cpu hotplug notifier.  But we know the top
  			 * cpuset's effective_cpus is on its way to to be
  			 * identical to cpu_online_mask.
  			 */
  			cpumask_copy(pmask, cpu_online_mask);
  			return;
  		}
  	}
ae1c80238   Li Zefan   cpuset: apply cs-...
329
  	cpumask_and(pmask, cs->effective_cpus, cpu_online_mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
330
331
332
333
  }
  
  /*
   * Return in *pmask the portion of a cpusets's mems_allowed that
0e1e7c7a7   Christoph Lameter   Memoryless nodes:...
334
335
   * are online, with memory.  If none are online with memory, walk
   * up the cpuset hierarchy until we find one that does have some
40df2deb5   Li Zefan   cpuset: cleanup g...
336
   * online mems.  The top cpuset always has some mems online.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
337
338
   *
   * One way or another, we guarantee to return some non-empty subset
38d7bee9d   Lai Jiangshan   cpuset: use N_MEM...
339
   * of node_states[N_MEMORY].
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
340
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
341
   * Call with callback_lock or cpuset_mutex held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
342
   */
c9710d801   Tejun Heo   cpuset: drop "con...
343
  static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
344
  {
ae1c80238   Li Zefan   cpuset: apply cs-...
345
  	while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY]))
c431069fe   Tejun Heo   cpuset: remove cp...
346
  		cs = parent_cs(cs);
ae1c80238   Li Zefan   cpuset: apply cs-...
347
  	nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
348
  }
f3b39d47e   Miao Xie   cpusets: restruct...
349
350
351
  /*
   * update task's spread flag if cpuset's page/slab spread flag is set
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
352
   * Call with callback_lock or cpuset_mutex held.
f3b39d47e   Miao Xie   cpusets: restruct...
353
354
355
356
357
   */
  static void cpuset_update_task_spread_flag(struct cpuset *cs,
  					struct task_struct *tsk)
  {
  	if (is_spread_page(cs))
2ad654bc5   Zefan Li   cpuset: PF_SPREAD...
358
  		task_set_spread_page(tsk);
f3b39d47e   Miao Xie   cpusets: restruct...
359
  	else
2ad654bc5   Zefan Li   cpuset: PF_SPREAD...
360
  		task_clear_spread_page(tsk);
f3b39d47e   Miao Xie   cpusets: restruct...
361
  	if (is_spread_slab(cs))
2ad654bc5   Zefan Li   cpuset: PF_SPREAD...
362
  		task_set_spread_slab(tsk);
f3b39d47e   Miao Xie   cpusets: restruct...
363
  	else
2ad654bc5   Zefan Li   cpuset: PF_SPREAD...
364
  		task_clear_spread_slab(tsk);
f3b39d47e   Miao Xie   cpusets: restruct...
365
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
366
367
368
369
370
  /*
   * is_cpuset_subset(p, q) - Is cpuset p a subset of cpuset q?
   *
   * One cpuset is a subset of another if all its allowed CPUs and
   * Memory Nodes are a subset of the other, and its exclusive flags
5d21cc2db   Tejun Heo   cpuset: replace c...
371
   * are only set if the other's are set.  Call holding cpuset_mutex.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
372
373
374
375
   */
  
  static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
  {
300ed6cbb   Li Zefan   cpuset: convert c...
376
  	return	cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
377
378
379
380
  		nodes_subset(p->mems_allowed, q->mems_allowed) &&
  		is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
  		is_mem_exclusive(p) <= is_mem_exclusive(q);
  }
645fcc9d2   Li Zefan   cpuset: don't all...
381
382
383
384
  /**
   * alloc_trial_cpuset - allocate a trial cpuset
   * @cs: the cpuset that the trial cpuset duplicates
   */
c9710d801   Tejun Heo   cpuset: drop "con...
385
  static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
645fcc9d2   Li Zefan   cpuset: don't all...
386
  {
300ed6cbb   Li Zefan   cpuset: convert c...
387
388
389
390
391
  	struct cpuset *trial;
  
  	trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
  	if (!trial)
  		return NULL;
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
392
393
394
395
  	if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL))
  		goto free_cs;
  	if (!alloc_cpumask_var(&trial->effective_cpus, GFP_KERNEL))
  		goto free_cpus;
300ed6cbb   Li Zefan   cpuset: convert c...
396

e2b9a3d7d   Li Zefan   cpuset: add cs->e...
397
398
  	cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
  	cpumask_copy(trial->effective_cpus, cs->effective_cpus);
300ed6cbb   Li Zefan   cpuset: convert c...
399
  	return trial;
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
400
401
402
403
404
405
  
  free_cpus:
  	free_cpumask_var(trial->cpus_allowed);
  free_cs:
  	kfree(trial);
  	return NULL;
645fcc9d2   Li Zefan   cpuset: don't all...
406
407
408
409
410
411
412
413
  }
  
  /**
   * free_trial_cpuset - free the trial cpuset
   * @trial: the trial cpuset to be freed
   */
  static void free_trial_cpuset(struct cpuset *trial)
  {
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
414
  	free_cpumask_var(trial->effective_cpus);
300ed6cbb   Li Zefan   cpuset: convert c...
415
  	free_cpumask_var(trial->cpus_allowed);
645fcc9d2   Li Zefan   cpuset: don't all...
416
417
  	kfree(trial);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
418
419
420
421
422
423
424
  /*
   * validate_change() - Used to validate that any proposed cpuset change
   *		       follows the structural rules for cpusets.
   *
   * If we replaced the flag and mask values of the current cpuset
   * (cur) with those values in the trial cpuset (trial), would
   * our various subset and exclusive rules still be valid?  Presumes
5d21cc2db   Tejun Heo   cpuset: replace c...
425
   * cpuset_mutex held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
426
427
428
429
430
431
432
433
434
435
436
   *
   * 'cur' is the address of an actual, in-use cpuset.  Operations
   * such as list traversal that depend on the actual address of the
   * cpuset in the list must use cur below, not trial.
   *
   * 'trial' is the address of bulk structure copy of cur, with
   * perhaps one or more of the fields cpus_allowed, mems_allowed,
   * or flags changed to new, trial values.
   *
   * Return 0 if valid, -errno if not.
   */
c9710d801   Tejun Heo   cpuset: drop "con...
437
  static int validate_change(struct cpuset *cur, struct cpuset *trial)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
438
  {
492eb21b9   Tejun Heo   cgroup: make hier...
439
  	struct cgroup_subsys_state *css;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
440
  	struct cpuset *c, *par;
ae8086ce1   Tejun Heo   cpuset: introduce...
441
442
443
  	int ret;
  
  	rcu_read_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
444
445
  
  	/* Each of our child cpusets must be a subset of us */
ae8086ce1   Tejun Heo   cpuset: introduce...
446
  	ret = -EBUSY;
492eb21b9   Tejun Heo   cgroup: make hier...
447
  	cpuset_for_each_child(c, css, cur)
ae8086ce1   Tejun Heo   cpuset: introduce...
448
449
  		if (!is_cpuset_subset(c, trial))
  			goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
450
451
  
  	/* Remaining checks don't apply to root cpuset */
ae8086ce1   Tejun Heo   cpuset: introduce...
452
  	ret = 0;
696040670   Paul Jackson   [PATCH] cpuset: m...
453
  	if (cur == &top_cpuset)
ae8086ce1   Tejun Heo   cpuset: introduce...
454
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455

c431069fe   Tejun Heo   cpuset: remove cp...
456
  	par = parent_cs(cur);
696040670   Paul Jackson   [PATCH] cpuset: m...
457

7e88291be   Li Zefan   cpuset: make cs->...
458
  	/* On legacy hiearchy, we must be a subset of our parent cpuset. */
ae8086ce1   Tejun Heo   cpuset: introduce...
459
  	ret = -EACCES;
9e10a130d   Tejun Heo   cgroup: replace c...
460
461
  	if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
  	    !is_cpuset_subset(trial, par))
ae8086ce1   Tejun Heo   cpuset: introduce...
462
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
463

2df167a30   Paul Menage   cgroups: update c...
464
465
466
467
  	/*
  	 * If either I or some sibling (!= me) is exclusive, we can't
  	 * overlap
  	 */
ae8086ce1   Tejun Heo   cpuset: introduce...
468
  	ret = -EINVAL;
492eb21b9   Tejun Heo   cgroup: make hier...
469
  	cpuset_for_each_child(c, css, par) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
470
471
  		if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
  		    c != cur &&
300ed6cbb   Li Zefan   cpuset: convert c...
472
  		    cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
ae8086ce1   Tejun Heo   cpuset: introduce...
473
  			goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
474
475
476
  		if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
  		    c != cur &&
  		    nodes_intersects(trial->mems_allowed, c->mems_allowed))
ae8086ce1   Tejun Heo   cpuset: introduce...
477
  			goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
478
  	}
452477fa6   Tejun Heo   cpuset: pin down ...
479
480
  	/*
  	 * Cpusets with tasks - existing or newly being attached - can't
1c09b195d   Li Zefan   cpuset: fix a reg...
481
  	 * be changed to have empty cpus_allowed or mems_allowed.
452477fa6   Tejun Heo   cpuset: pin down ...
482
  	 */
ae8086ce1   Tejun Heo   cpuset: introduce...
483
  	ret = -ENOSPC;
27bd4dbb8   Tejun Heo   cgroup: replace c...
484
  	if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) {
1c09b195d   Li Zefan   cpuset: fix a reg...
485
486
487
488
489
490
491
  		if (!cpumask_empty(cur->cpus_allowed) &&
  		    cpumask_empty(trial->cpus_allowed))
  			goto out;
  		if (!nodes_empty(cur->mems_allowed) &&
  		    nodes_empty(trial->mems_allowed))
  			goto out;
  	}
020958b62   Paul Jackson   cpusets: decrusti...
492

f82f80426   Juri Lelli   sched/deadline: E...
493
494
495
496
497
498
499
500
501
  	/*
  	 * We can't shrink if we won't have enough room for SCHED_DEADLINE
  	 * tasks.
  	 */
  	ret = -EBUSY;
  	if (is_cpu_exclusive(cur) &&
  	    !cpuset_cpumask_can_shrink(cur->cpus_allowed,
  				       trial->cpus_allowed))
  		goto out;
ae8086ce1   Tejun Heo   cpuset: introduce...
502
503
504
505
  	ret = 0;
  out:
  	rcu_read_unlock();
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
506
  }
db7f47cf4   Paul Menage   cpusets: allow cp...
507
  #ifdef CONFIG_SMP
85d7b9498   Dinakar Guniguntala   [PATCH] Dynamic s...
508
  /*
cf417141c   Max Krasnyansky   sched, cpuset: re...
509
   * Helper routine for generate_sched_domains().
8b5f1c52d   Li Zefan   cpuset: use effec...
510
   * Do cpusets a, b have overlapping effective cpus_allowed masks?
029190c51   Paul Jackson   cpuset sched_load...
511
   */
029190c51   Paul Jackson   cpuset sched_load...
512
513
  static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
  {
8b5f1c52d   Li Zefan   cpuset: use effec...
514
  	return cpumask_intersects(a->effective_cpus, b->effective_cpus);
029190c51   Paul Jackson   cpuset sched_load...
515
  }
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
516
517
518
  static void
  update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
  {
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
519
520
521
522
  	if (dattr->relax_domain_level < c->relax_domain_level)
  		dattr->relax_domain_level = c->relax_domain_level;
  	return;
  }
fc560a26a   Tejun Heo   cpuset: replace c...
523
524
  static void update_domain_attr_tree(struct sched_domain_attr *dattr,
  				    struct cpuset *root_cs)
f5393693e   Lai Jiangshan   cpuset: speed up ...
525
  {
fc560a26a   Tejun Heo   cpuset: replace c...
526
  	struct cpuset *cp;
492eb21b9   Tejun Heo   cgroup: make hier...
527
  	struct cgroup_subsys_state *pos_css;
f5393693e   Lai Jiangshan   cpuset: speed up ...
528

fc560a26a   Tejun Heo   cpuset: replace c...
529
  	rcu_read_lock();
492eb21b9   Tejun Heo   cgroup: make hier...
530
  	cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
fc560a26a   Tejun Heo   cpuset: replace c...
531
532
  		/* skip the whole subtree if @cp doesn't have any CPU */
  		if (cpumask_empty(cp->cpus_allowed)) {
492eb21b9   Tejun Heo   cgroup: make hier...
533
  			pos_css = css_rightmost_descendant(pos_css);
f5393693e   Lai Jiangshan   cpuset: speed up ...
534
  			continue;
fc560a26a   Tejun Heo   cpuset: replace c...
535
  		}
f5393693e   Lai Jiangshan   cpuset: speed up ...
536
537
538
  
  		if (is_sched_load_balance(cp))
  			update_domain_attr(dattr, cp);
f5393693e   Lai Jiangshan   cpuset: speed up ...
539
  	}
fc560a26a   Tejun Heo   cpuset: replace c...
540
  	rcu_read_unlock();
f5393693e   Lai Jiangshan   cpuset: speed up ...
541
  }
029190c51   Paul Jackson   cpuset sched_load...
542
  /*
cf417141c   Max Krasnyansky   sched, cpuset: re...
543
544
545
546
547
   * generate_sched_domains()
   *
   * This function builds a partial partition of the systems CPUs
   * A 'partial partition' is a set of non-overlapping subsets whose
   * union is a subset of that set.
0a0fca9d8   Viresh Kumar   sched: Rename sch...
548
   * The output of this function needs to be passed to kernel/sched/core.c
cf417141c   Max Krasnyansky   sched, cpuset: re...
549
550
551
   * partition_sched_domains() routine, which will rebuild the scheduler's
   * load balancing domains (sched domains) as specified by that partial
   * partition.
029190c51   Paul Jackson   cpuset sched_load...
552
   *
45ce80fb6   Li Zefan   cgroups: consolid...
553
   * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt
029190c51   Paul Jackson   cpuset sched_load...
554
555
556
557
558
559
560
   * for a background explanation of this.
   *
   * Does not return errors, on the theory that the callers of this
   * routine would rather not worry about failures to rebuild sched
   * domains when operating in the severe memory shortage situations
   * that could cause allocation failures below.
   *
5d21cc2db   Tejun Heo   cpuset: replace c...
561
   * Must be called with cpuset_mutex held.
029190c51   Paul Jackson   cpuset sched_load...
562
563
   *
   * The three key local variables below are:
aeed68242   Li Zefan   cpuset: clean up ...
564
   *    q  - a linked-list queue of cpuset pointers, used to implement a
029190c51   Paul Jackson   cpuset sched_load...
565
566
567
568
569
570
571
572
573
574
575
576
   *	   top-down scan of all cpusets.  This scan loads a pointer
   *	   to each cpuset marked is_sched_load_balance into the
   *	   array 'csa'.  For our purposes, rebuilding the schedulers
   *	   sched domains, we can ignore !is_sched_load_balance cpusets.
   *  csa  - (for CpuSet Array) Array of pointers to all the cpusets
   *	   that need to be load balanced, for convenient iterative
   *	   access by the subsequent code that finds the best partition,
   *	   i.e the set of domains (subsets) of CPUs such that the
   *	   cpus_allowed of every cpuset marked is_sched_load_balance
   *	   is a subset of one of these domains, while there are as
   *	   many such domains as possible, each as small as possible.
   * doms  - Conversion of 'csa' to an array of cpumasks, for passing to
0a0fca9d8   Viresh Kumar   sched: Rename sch...
577
   *	   the kernel/sched/core.c routine partition_sched_domains() in a
029190c51   Paul Jackson   cpuset sched_load...
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
   *	   convenient format, that can be easily compared to the prior
   *	   value to determine what partition elements (sched domains)
   *	   were changed (added or removed.)
   *
   * Finding the best partition (set of domains):
   *	The triple nested loops below over i, j, k scan over the
   *	load balanced cpusets (using the array of cpuset pointers in
   *	csa[]) looking for pairs of cpusets that have overlapping
   *	cpus_allowed, but which don't have the same 'pn' partition
   *	number and gives them in the same partition number.  It keeps
   *	looping on the 'restart' label until it can no longer find
   *	any such pairs.
   *
   *	The union of the cpus_allowed masks from the set of
   *	all cpusets having the same 'pn' value then form the one
   *	element of the partition (one sched domain) to be passed to
   *	partition_sched_domains().
   */
acc3f5d7c   Rusty Russell   cpumask: Partitio...
596
  static int generate_sched_domains(cpumask_var_t **domains,
cf417141c   Max Krasnyansky   sched, cpuset: re...
597
  			struct sched_domain_attr **attributes)
029190c51   Paul Jackson   cpuset sched_load...
598
  {
029190c51   Paul Jackson   cpuset sched_load...
599
600
601
602
  	struct cpuset *cp;	/* scans q */
  	struct cpuset **csa;	/* array of all cpuset ptrs */
  	int csn;		/* how many cpuset ptrs in csa so far */
  	int i, j, k;		/* indices for partition finding loops */
acc3f5d7c   Rusty Russell   cpumask: Partitio...
603
  	cpumask_var_t *doms;	/* resulting partition; i.e. sched domains */
47b8ea718   Rik van Riel   cpusets, isolcpus...
604
  	cpumask_var_t non_isolated_cpus;  /* load balanced CPUs */
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
605
  	struct sched_domain_attr *dattr;  /* attributes for custom domains */
1583715dd   Ingo Molnar   sched, cpusets: f...
606
  	int ndoms = 0;		/* number of sched domains in result */
6af866af3   Li Zefan   cpuset: remove re...
607
  	int nslot;		/* next empty doms[] struct cpumask slot */
492eb21b9   Tejun Heo   cgroup: make hier...
608
  	struct cgroup_subsys_state *pos_css;
029190c51   Paul Jackson   cpuset sched_load...
609

029190c51   Paul Jackson   cpuset sched_load...
610
  	doms = NULL;
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
611
  	dattr = NULL;
cf417141c   Max Krasnyansky   sched, cpuset: re...
612
  	csa = NULL;
029190c51   Paul Jackson   cpuset sched_load...
613

47b8ea718   Rik van Riel   cpusets, isolcpus...
614
615
616
  	if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL))
  		goto done;
  	cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
029190c51   Paul Jackson   cpuset sched_load...
617
618
  	/* Special case for the 99% of systems with one, full, sched domain */
  	if (is_sched_load_balance(&top_cpuset)) {
acc3f5d7c   Rusty Russell   cpumask: Partitio...
619
620
  		ndoms = 1;
  		doms = alloc_sched_domains(ndoms);
029190c51   Paul Jackson   cpuset sched_load...
621
  		if (!doms)
cf417141c   Max Krasnyansky   sched, cpuset: re...
622
  			goto done;
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
623
624
625
  		dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
  		if (dattr) {
  			*dattr = SD_ATTR_INIT;
93a655755   Li Zefan   cpuset: fix wrong...
626
  			update_domain_attr_tree(dattr, &top_cpuset);
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
627
  		}
47b8ea718   Rik van Riel   cpusets, isolcpus...
628
629
  		cpumask_and(doms[0], top_cpuset.effective_cpus,
  				     non_isolated_cpus);
cf417141c   Max Krasnyansky   sched, cpuset: re...
630

cf417141c   Max Krasnyansky   sched, cpuset: re...
631
  		goto done;
029190c51   Paul Jackson   cpuset sched_load...
632
  	}
664eeddee   Mel Gorman   mm: page_alloc: u...
633
  	csa = kmalloc(nr_cpusets() * sizeof(cp), GFP_KERNEL);
029190c51   Paul Jackson   cpuset sched_load...
634
635
636
  	if (!csa)
  		goto done;
  	csn = 0;
fc560a26a   Tejun Heo   cpuset: replace c...
637
  	rcu_read_lock();
492eb21b9   Tejun Heo   cgroup: make hier...
638
  	cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) {
bd8815a6d   Tejun Heo   cgroup: make css_...
639
640
  		if (cp == &top_cpuset)
  			continue;
f5393693e   Lai Jiangshan   cpuset: speed up ...
641
  		/*
fc560a26a   Tejun Heo   cpuset: replace c...
642
643
644
645
646
647
  		 * Continue traversing beyond @cp iff @cp has some CPUs and
  		 * isn't load balancing.  The former is obvious.  The
  		 * latter: All child cpusets contain a subset of the
  		 * parent's cpus, so just skip them, and then we call
  		 * update_domain_attr_tree() to calc relax_domain_level of
  		 * the corresponding sched domain.
f5393693e   Lai Jiangshan   cpuset: speed up ...
648
  		 */
fc560a26a   Tejun Heo   cpuset: replace c...
649
  		if (!cpumask_empty(cp->cpus_allowed) &&
47b8ea718   Rik van Riel   cpusets, isolcpus...
650
651
  		    !(is_sched_load_balance(cp) &&
  		      cpumask_intersects(cp->cpus_allowed, non_isolated_cpus)))
f5393693e   Lai Jiangshan   cpuset: speed up ...
652
  			continue;
489a5393a   Lai Jiangshan   cpuset: don't pas...
653

fc560a26a   Tejun Heo   cpuset: replace c...
654
655
656
657
  		if (is_sched_load_balance(cp))
  			csa[csn++] = cp;
  
  		/* skip @cp's subtree */
492eb21b9   Tejun Heo   cgroup: make hier...
658
  		pos_css = css_rightmost_descendant(pos_css);
fc560a26a   Tejun Heo   cpuset: replace c...
659
660
  	}
  	rcu_read_unlock();
029190c51   Paul Jackson   cpuset sched_load...
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
  
  	for (i = 0; i < csn; i++)
  		csa[i]->pn = i;
  	ndoms = csn;
  
  restart:
  	/* Find the best partition (set of sched domains) */
  	for (i = 0; i < csn; i++) {
  		struct cpuset *a = csa[i];
  		int apn = a->pn;
  
  		for (j = 0; j < csn; j++) {
  			struct cpuset *b = csa[j];
  			int bpn = b->pn;
  
  			if (apn != bpn && cpusets_overlap(a, b)) {
  				for (k = 0; k < csn; k++) {
  					struct cpuset *c = csa[k];
  
  					if (c->pn == bpn)
  						c->pn = apn;
  				}
  				ndoms--;	/* one less element */
  				goto restart;
  			}
  		}
  	}
cf417141c   Max Krasnyansky   sched, cpuset: re...
688
689
690
691
  	/*
  	 * Now we know how many domains to create.
  	 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
  	 */
acc3f5d7c   Rusty Russell   cpumask: Partitio...
692
  	doms = alloc_sched_domains(ndoms);
700018e0a   Li Zefan   cpuset: fix regre...
693
  	if (!doms)
cf417141c   Max Krasnyansky   sched, cpuset: re...
694
  		goto done;
cf417141c   Max Krasnyansky   sched, cpuset: re...
695
696
697
698
699
  
  	/*
  	 * The rest of the code, including the scheduler, can deal with
  	 * dattr==NULL case. No need to abort if alloc fails.
  	 */
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
700
  	dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL);
029190c51   Paul Jackson   cpuset sched_load...
701
702
703
  
  	for (nslot = 0, i = 0; i < csn; i++) {
  		struct cpuset *a = csa[i];
6af866af3   Li Zefan   cpuset: remove re...
704
  		struct cpumask *dp;
029190c51   Paul Jackson   cpuset sched_load...
705
  		int apn = a->pn;
cf417141c   Max Krasnyansky   sched, cpuset: re...
706
707
708
709
  		if (apn < 0) {
  			/* Skip completed partitions */
  			continue;
  		}
acc3f5d7c   Rusty Russell   cpumask: Partitio...
710
  		dp = doms[nslot];
cf417141c   Max Krasnyansky   sched, cpuset: re...
711
712
713
714
  
  		if (nslot == ndoms) {
  			static int warnings = 10;
  			if (warnings) {
12d3089c1   Fabian Frederick   kernel/cpuset.c: ...
715
716
717
  				pr_warn("rebuild_sched_domains confused: nslot %d, ndoms %d, csn %d, i %d, apn %d
  ",
  					nslot, ndoms, csn, i, apn);
cf417141c   Max Krasnyansky   sched, cpuset: re...
718
  				warnings--;
029190c51   Paul Jackson   cpuset sched_load...
719
  			}
cf417141c   Max Krasnyansky   sched, cpuset: re...
720
721
  			continue;
  		}
029190c51   Paul Jackson   cpuset sched_load...
722

6af866af3   Li Zefan   cpuset: remove re...
723
  		cpumask_clear(dp);
cf417141c   Max Krasnyansky   sched, cpuset: re...
724
725
726
727
728
729
  		if (dattr)
  			*(dattr + nslot) = SD_ATTR_INIT;
  		for (j = i; j < csn; j++) {
  			struct cpuset *b = csa[j];
  
  			if (apn == b->pn) {
8b5f1c52d   Li Zefan   cpuset: use effec...
730
  				cpumask_or(dp, dp, b->effective_cpus);
47b8ea718   Rik van Riel   cpusets, isolcpus...
731
  				cpumask_and(dp, dp, non_isolated_cpus);
cf417141c   Max Krasnyansky   sched, cpuset: re...
732
733
734
735
736
  				if (dattr)
  					update_domain_attr_tree(dattr + nslot, b);
  
  				/* Done with this partition */
  				b->pn = -1;
029190c51   Paul Jackson   cpuset sched_load...
737
  			}
029190c51   Paul Jackson   cpuset sched_load...
738
  		}
cf417141c   Max Krasnyansky   sched, cpuset: re...
739
  		nslot++;
029190c51   Paul Jackson   cpuset sched_load...
740
741
  	}
  	BUG_ON(nslot != ndoms);
cf417141c   Max Krasnyansky   sched, cpuset: re...
742
  done:
47b8ea718   Rik van Riel   cpusets, isolcpus...
743
  	free_cpumask_var(non_isolated_cpus);
cf417141c   Max Krasnyansky   sched, cpuset: re...
744
  	kfree(csa);
700018e0a   Li Zefan   cpuset: fix regre...
745
746
747
748
749
750
  	/*
  	 * Fallback to the default domain if kmalloc() failed.
  	 * See comments in partition_sched_domains().
  	 */
  	if (doms == NULL)
  		ndoms = 1;
cf417141c   Max Krasnyansky   sched, cpuset: re...
751
752
753
754
755
756
757
758
  	*domains    = doms;
  	*attributes = dattr;
  	return ndoms;
  }
  
  /*
   * Rebuild scheduler domains.
   *
699140ba8   Tejun Heo   cpuset: drop asyn...
759
760
761
762
763
   * If the flag 'sched_load_balance' of any cpuset with non-empty
   * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
   * which has that flag enabled, or if any cpuset with a non-empty
   * 'cpus' is removed, then call this routine to rebuild the
   * scheduler's dynamic sched domains.
cf417141c   Max Krasnyansky   sched, cpuset: re...
764
   *
5d21cc2db   Tejun Heo   cpuset: replace c...
765
   * Call with cpuset_mutex held.  Takes get_online_cpus().
cf417141c   Max Krasnyansky   sched, cpuset: re...
766
   */
699140ba8   Tejun Heo   cpuset: drop asyn...
767
  static void rebuild_sched_domains_locked(void)
cf417141c   Max Krasnyansky   sched, cpuset: re...
768
769
  {
  	struct sched_domain_attr *attr;
acc3f5d7c   Rusty Russell   cpumask: Partitio...
770
  	cpumask_var_t *doms;
cf417141c   Max Krasnyansky   sched, cpuset: re...
771
  	int ndoms;
5d21cc2db   Tejun Heo   cpuset: replace c...
772
  	lockdep_assert_held(&cpuset_mutex);
86ef5c9a8   Gautham R Shenoy   cpu-hotplug: repl...
773
  	get_online_cpus();
cf417141c   Max Krasnyansky   sched, cpuset: re...
774

5b16c2a49   Li Zefan   cpuset: fix cpu h...
775
776
777
778
779
  	/*
  	 * We have raced with CPU hotplug. Don't do anything to avoid
  	 * passing doms with offlined cpu to partition_sched_domains().
  	 * Anyways, hotplug work item will rebuild sched domains.
  	 */
8b5f1c52d   Li Zefan   cpuset: use effec...
780
  	if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
5b16c2a49   Li Zefan   cpuset: fix cpu h...
781
  		goto out;
cf417141c   Max Krasnyansky   sched, cpuset: re...
782
  	/* Generate domain masks and attrs */
cf417141c   Max Krasnyansky   sched, cpuset: re...
783
  	ndoms = generate_sched_domains(&doms, &attr);
cf417141c   Max Krasnyansky   sched, cpuset: re...
784
785
786
  
  	/* Have scheduler rebuild the domains */
  	partition_sched_domains(ndoms, doms, attr);
5b16c2a49   Li Zefan   cpuset: fix cpu h...
787
  out:
86ef5c9a8   Gautham R Shenoy   cpu-hotplug: repl...
788
  	put_online_cpus();
cf417141c   Max Krasnyansky   sched, cpuset: re...
789
  }
db7f47cf4   Paul Menage   cpusets: allow cp...
790
  #else /* !CONFIG_SMP */
699140ba8   Tejun Heo   cpuset: drop asyn...
791
  static void rebuild_sched_domains_locked(void)
db7f47cf4   Paul Menage   cpusets: allow cp...
792
793
  {
  }
db7f47cf4   Paul Menage   cpusets: allow cp...
794
  #endif /* CONFIG_SMP */
029190c51   Paul Jackson   cpuset sched_load...
795

cf417141c   Max Krasnyansky   sched, cpuset: re...
796
797
  void rebuild_sched_domains(void)
  {
5d21cc2db   Tejun Heo   cpuset: replace c...
798
  	mutex_lock(&cpuset_mutex);
699140ba8   Tejun Heo   cpuset: drop asyn...
799
  	rebuild_sched_domains_locked();
5d21cc2db   Tejun Heo   cpuset: replace c...
800
  	mutex_unlock(&cpuset_mutex);
029190c51   Paul Jackson   cpuset sched_load...
801
  }
58f4790b7   Cliff Wickman   cpusets: update_c...
802
  /**
0b2f630a2   Miao Xie   cpusets: restruct...
803
804
   * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
   * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
0b2f630a2   Miao Xie   cpusets: restruct...
805
   *
d66393e54   Tejun Heo   cpuset: use css_t...
806
807
808
   * Iterate through each task of @cs updating its cpus_allowed to the
   * effective cpuset's.  As this function is called with cpuset_mutex held,
   * cpuset membership stays stable.
0b2f630a2   Miao Xie   cpusets: restruct...
809
   */
d66393e54   Tejun Heo   cpuset: use css_t...
810
  static void update_tasks_cpumask(struct cpuset *cs)
0b2f630a2   Miao Xie   cpusets: restruct...
811
  {
d66393e54   Tejun Heo   cpuset: use css_t...
812
813
814
815
816
  	struct css_task_iter it;
  	struct task_struct *task;
  
  	css_task_iter_start(&cs->css, &it);
  	while ((task = css_task_iter_next(&it)))
ae1c80238   Li Zefan   cpuset: apply cs-...
817
  		set_cpus_allowed_ptr(task, cs->effective_cpus);
d66393e54   Tejun Heo   cpuset: use css_t...
818
  	css_task_iter_end(&it);
0b2f630a2   Miao Xie   cpusets: restruct...
819
  }
5c5cc6232   Li Zefan   cpuset: allow to ...
820
  /*
734d45130   Li Zefan   cpuset: update cs...
821
822
823
824
825
826
   * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree
   * @cs: the cpuset to consider
   * @new_cpus: temp variable for calculating new effective_cpus
   *
   * When congifured cpumask is changed, the effective cpumasks of this cpuset
   * and all its descendants need to be updated.
5c5cc6232   Li Zefan   cpuset: allow to ...
827
   *
734d45130   Li Zefan   cpuset: update cs...
828
   * On legacy hierachy, effective_cpus will be the same with cpu_allowed.
5c5cc6232   Li Zefan   cpuset: allow to ...
829
830
831
   *
   * Called with cpuset_mutex held
   */
734d45130   Li Zefan   cpuset: update cs...
832
  static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
5c5cc6232   Li Zefan   cpuset: allow to ...
833
834
  {
  	struct cpuset *cp;
492eb21b9   Tejun Heo   cgroup: make hier...
835
  	struct cgroup_subsys_state *pos_css;
8b5f1c52d   Li Zefan   cpuset: use effec...
836
  	bool need_rebuild_sched_domains = false;
5c5cc6232   Li Zefan   cpuset: allow to ...
837
838
  
  	rcu_read_lock();
734d45130   Li Zefan   cpuset: update cs...
839
840
841
842
  	cpuset_for_each_descendant_pre(cp, pos_css, cs) {
  		struct cpuset *parent = parent_cs(cp);
  
  		cpumask_and(new_cpus, cp->cpus_allowed, parent->effective_cpus);
554b0d1c8   Li Zefan   cpuset: inherit a...
843
844
845
846
  		/*
  		 * If it becomes empty, inherit the effective mask of the
  		 * parent, which is guaranteed to have some CPUs.
  		 */
9e10a130d   Tejun Heo   cgroup: replace c...
847
848
  		if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
  		    cpumask_empty(new_cpus))
554b0d1c8   Li Zefan   cpuset: inherit a...
849
  			cpumask_copy(new_cpus, parent->effective_cpus);
734d45130   Li Zefan   cpuset: update cs...
850
851
852
853
  		/* Skip the whole subtree if the cpumask remains the same. */
  		if (cpumask_equal(new_cpus, cp->effective_cpus)) {
  			pos_css = css_rightmost_descendant(pos_css);
  			continue;
5c5cc6232   Li Zefan   cpuset: allow to ...
854
  		}
734d45130   Li Zefan   cpuset: update cs...
855

ec903c0c8   Tejun Heo   cgroup: rename cs...
856
  		if (!css_tryget_online(&cp->css))
5c5cc6232   Li Zefan   cpuset: allow to ...
857
858
  			continue;
  		rcu_read_unlock();
8447a0fee   Vladimir Davydov   cpuset: convert c...
859
  		spin_lock_irq(&callback_lock);
734d45130   Li Zefan   cpuset: update cs...
860
  		cpumask_copy(cp->effective_cpus, new_cpus);
8447a0fee   Vladimir Davydov   cpuset: convert c...
861
  		spin_unlock_irq(&callback_lock);
734d45130   Li Zefan   cpuset: update cs...
862

9e10a130d   Tejun Heo   cgroup: replace c...
863
  		WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
734d45130   Li Zefan   cpuset: update cs...
864
  			!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
d66393e54   Tejun Heo   cpuset: use css_t...
865
  		update_tasks_cpumask(cp);
5c5cc6232   Li Zefan   cpuset: allow to ...
866

8b5f1c52d   Li Zefan   cpuset: use effec...
867
868
869
870
871
872
873
  		/*
  		 * If the effective cpumask of any non-empty cpuset is changed,
  		 * we need to rebuild sched domains.
  		 */
  		if (!cpumask_empty(cp->cpus_allowed) &&
  		    is_sched_load_balance(cp))
  			need_rebuild_sched_domains = true;
5c5cc6232   Li Zefan   cpuset: allow to ...
874
875
876
877
  		rcu_read_lock();
  		css_put(&cp->css);
  	}
  	rcu_read_unlock();
8b5f1c52d   Li Zefan   cpuset: use effec...
878
879
880
  
  	if (need_rebuild_sched_domains)
  		rebuild_sched_domains_locked();
5c5cc6232   Li Zefan   cpuset: allow to ...
881
  }
0b2f630a2   Miao Xie   cpusets: restruct...
882
  /**
58f4790b7   Cliff Wickman   cpusets: update_c...
883
884
   * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
   * @cs: the cpuset to consider
fc34ac1dc   Fabian Frederick   kernel/cpuset.c: ...
885
   * @trialcs: trial cpuset
58f4790b7   Cliff Wickman   cpusets: update_c...
886
887
   * @buf: buffer of cpu numbers written to this cpuset
   */
645fcc9d2   Li Zefan   cpuset: don't all...
888
889
  static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
  			  const char *buf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
890
  {
58f4790b7   Cliff Wickman   cpusets: update_c...
891
  	int retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
892

5f054e31c   Rusty Russell   documentation: re...
893
  	/* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
894
895
  	if (cs == &top_cpuset)
  		return -EACCES;
6f7f02e78   David Rientjes   cpusets: allow em...
896
  	/*
c8d9c90c7   Paul Jackson   hotplug cpu: move...
897
  	 * An empty cpus_allowed is ok only if the cpuset has no tasks.
020958b62   Paul Jackson   cpusets: decrusti...
898
899
900
  	 * Since cpulist_parse() fails on an empty mask, we special case
  	 * that parsing.  The validate_change() call ensures that cpusets
  	 * with tasks have cpus.
6f7f02e78   David Rientjes   cpusets: allow em...
901
  	 */
020958b62   Paul Jackson   cpusets: decrusti...
902
  	if (!*buf) {
300ed6cbb   Li Zefan   cpuset: convert c...
903
  		cpumask_clear(trialcs->cpus_allowed);
6f7f02e78   David Rientjes   cpusets: allow em...
904
  	} else {
300ed6cbb   Li Zefan   cpuset: convert c...
905
  		retval = cpulist_parse(buf, trialcs->cpus_allowed);
6f7f02e78   David Rientjes   cpusets: allow em...
906
907
  		if (retval < 0)
  			return retval;
37340746a   Lai Jiangshan   cpusets: fix bug ...
908

5d8ba82c3   Li Zefan   cpuset: allow wri...
909
910
  		if (!cpumask_subset(trialcs->cpus_allowed,
  				    top_cpuset.cpus_allowed))
37340746a   Lai Jiangshan   cpusets: fix bug ...
911
  			return -EINVAL;
6f7f02e78   David Rientjes   cpusets: allow em...
912
  	}
029190c51   Paul Jackson   cpuset sched_load...
913

8707d8b8c   Paul Menage   Fix cpusets updat...
914
  	/* Nothing to do if the cpus didn't change */
300ed6cbb   Li Zefan   cpuset: convert c...
915
  	if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
8707d8b8c   Paul Menage   Fix cpusets updat...
916
  		return 0;
58f4790b7   Cliff Wickman   cpusets: update_c...
917

a73456f37   Li Zefan   cpuset: re-struct...
918
919
920
  	retval = validate_change(cs, trialcs);
  	if (retval < 0)
  		return retval;
8447a0fee   Vladimir Davydov   cpuset: convert c...
921
  	spin_lock_irq(&callback_lock);
300ed6cbb   Li Zefan   cpuset: convert c...
922
  	cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
8447a0fee   Vladimir Davydov   cpuset: convert c...
923
  	spin_unlock_irq(&callback_lock);
029190c51   Paul Jackson   cpuset sched_load...
924

734d45130   Li Zefan   cpuset: update cs...
925
926
  	/* use trialcs->cpus_allowed as a temp variable */
  	update_cpumasks_hier(cs, trialcs->cpus_allowed);
85d7b9498   Dinakar Guniguntala   [PATCH] Dynamic s...
927
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
928
  }
053199edf   Paul Jackson   [PATCH] cpusets: ...
929
  /*
e93ad19d0   Tejun Heo   cpuset: make mm m...
930
931
932
933
934
   * Migrate memory region from one set of nodes to another.  This is
   * performed asynchronously as it can be called from process migration path
   * holding locks involved in process management.  All mm migrations are
   * performed in the queued order and can be waited for by flushing
   * cpuset_migrate_mm_wq.
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
935
   */
e93ad19d0   Tejun Heo   cpuset: make mm m...
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
  struct cpuset_migrate_mm_work {
  	struct work_struct	work;
  	struct mm_struct	*mm;
  	nodemask_t		from;
  	nodemask_t		to;
  };
  
  static void cpuset_migrate_mm_workfn(struct work_struct *work)
  {
  	struct cpuset_migrate_mm_work *mwork =
  		container_of(work, struct cpuset_migrate_mm_work, work);
  
  	/* on a wq worker, no need to worry about %current's mems_allowed */
  	do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL);
  	mmput(mwork->mm);
  	kfree(mwork);
  }
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
953
954
955
  static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
  							const nodemask_t *to)
  {
e93ad19d0   Tejun Heo   cpuset: make mm m...
956
  	struct cpuset_migrate_mm_work *mwork;
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
957

e93ad19d0   Tejun Heo   cpuset: make mm m...
958
959
960
961
962
963
964
965
966
967
968
  	mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
  	if (mwork) {
  		mwork->mm = mm;
  		mwork->from = *from;
  		mwork->to = *to;
  		INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn);
  		queue_work(cpuset_migrate_mm_wq, &mwork->work);
  	} else {
  		mmput(mm);
  	}
  }
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
969

5cf1cacb4   Tejun Heo   cgroup, cpuset: r...
970
  static void cpuset_post_attach(void)
e93ad19d0   Tejun Heo   cpuset: make mm m...
971
972
  {
  	flush_workqueue(cpuset_migrate_mm_wq);
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
973
  }
3b6766fe6   Li Zefan   cpuset: rewrite u...
974
  /*
58568d2a8   Miao Xie   cpuset,mm: update...
975
976
977
978
979
980
981
   * cpuset_change_task_nodemask - change task's mems_allowed and mempolicy
   * @tsk: the task to change
   * @newmems: new nodes that the task will be set
   *
   * In order to avoid seeing no nodes if the old and new nodes are disjoint,
   * we structure updates as setting all new allowed nodes, then clearing newly
   * disallowed ones.
58568d2a8   Miao Xie   cpuset,mm: update...
982
983
984
985
   */
  static void cpuset_change_task_nodemask(struct task_struct *tsk,
  					nodemask_t *newmems)
  {
b246272ec   David Rientjes   cpusets: stall wh...
986
  	bool need_loop;
89e8a244b   David Rientjes   cpusets: avoid lo...
987

c0ff7453b   Miao Xie   cpuset,mm: fix no...
988
  	task_lock(tsk);
b246272ec   David Rientjes   cpusets: stall wh...
989
990
  	/*
  	 * Determine if a loop is necessary if another thread is doing
d26914d11   Mel Gorman   mm: optimize put_...
991
  	 * read_mems_allowed_begin().  If at least one node remains unchanged and
b246272ec   David Rientjes   cpusets: stall wh...
992
993
994
995
996
  	 * tsk does not have a mempolicy, then an empty nodemask will not be
  	 * possible when mems_allowed is larger than a word.
  	 */
  	need_loop = task_has_mempolicy(tsk) ||
  			!nodes_intersects(*newmems, tsk->mems_allowed);
c0ff7453b   Miao Xie   cpuset,mm: fix no...
997

0fc0287c9   Peter Zijlstra   cpuset: Fix memor...
998
999
  	if (need_loop) {
  		local_irq_disable();
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
1000
  		write_seqcount_begin(&tsk->mems_allowed_seq);
0fc0287c9   Peter Zijlstra   cpuset: Fix memor...
1001
  	}
c0ff7453b   Miao Xie   cpuset,mm: fix no...
1002

cc9a6c877   Mel Gorman   cpuset: mm: reduc...
1003
1004
  	nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
  	mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
c0ff7453b   Miao Xie   cpuset,mm: fix no...
1005
1006
  
  	mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
58568d2a8   Miao Xie   cpuset,mm: update...
1007
  	tsk->mems_allowed = *newmems;
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
1008

0fc0287c9   Peter Zijlstra   cpuset: Fix memor...
1009
  	if (need_loop) {
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
1010
  		write_seqcount_end(&tsk->mems_allowed_seq);
0fc0287c9   Peter Zijlstra   cpuset: Fix memor...
1011
1012
  		local_irq_enable();
  	}
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
1013

c0ff7453b   Miao Xie   cpuset,mm: fix no...
1014
  	task_unlock(tsk);
58568d2a8   Miao Xie   cpuset,mm: update...
1015
  }
8793d854e   Paul Menage   Task Control Grou...
1016
  static void *cpuset_being_rebound;
0b2f630a2   Miao Xie   cpusets: restruct...
1017
1018
1019
  /**
   * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
   * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
0b2f630a2   Miao Xie   cpusets: restruct...
1020
   *
d66393e54   Tejun Heo   cpuset: use css_t...
1021
1022
1023
   * Iterate through each task of @cs updating its mems_allowed to the
   * effective cpuset's.  As this function is called with cpuset_mutex held,
   * cpuset membership stays stable.
0b2f630a2   Miao Xie   cpusets: restruct...
1024
   */
d66393e54   Tejun Heo   cpuset: use css_t...
1025
  static void update_tasks_nodemask(struct cpuset *cs)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1026
  {
33ad801df   Li Zefan   cpuset: record ol...
1027
  	static nodemask_t newmems;	/* protected by cpuset_mutex */
d66393e54   Tejun Heo   cpuset: use css_t...
1028
1029
  	struct css_task_iter it;
  	struct task_struct *task;
59dac16fb   Paul Jackson   [PATCH] cpuset: u...
1030

846a16bf0   Lee Schermerhorn   mempolicy: rename...
1031
  	cpuset_being_rebound = cs;		/* causes mpol_dup() rebind */
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1032

ae1c80238   Li Zefan   cpuset: apply cs-...
1033
  	guarantee_online_mems(cs, &newmems);
33ad801df   Li Zefan   cpuset: record ol...
1034

4225399a6   Paul Jackson   [PATCH] cpuset: r...
1035
  	/*
3b6766fe6   Li Zefan   cpuset: rewrite u...
1036
1037
1038
1039
  	 * The mpol_rebind_mm() call takes mmap_sem, which we couldn't
  	 * take while holding tasklist_lock.  Forks can happen - the
  	 * mpol_dup() cpuset_being_rebound check will catch such forks,
  	 * and rebind their vma mempolicies too.  Because we still hold
5d21cc2db   Tejun Heo   cpuset: replace c...
1040
  	 * the global cpuset_mutex, we know that no other rebind effort
3b6766fe6   Li Zefan   cpuset: rewrite u...
1041
  	 * will be contending for the global variable cpuset_being_rebound.
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1042
  	 * It's ok if we rebind the same mm twice; mpol_rebind_mm()
04c19fa6f   Paul Jackson   [PATCH] cpuset: m...
1043
  	 * is idempotent.  Also migrate pages in each mm to new nodes.
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1044
  	 */
d66393e54   Tejun Heo   cpuset: use css_t...
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
  	css_task_iter_start(&cs->css, &it);
  	while ((task = css_task_iter_next(&it))) {
  		struct mm_struct *mm;
  		bool migrate;
  
  		cpuset_change_task_nodemask(task, &newmems);
  
  		mm = get_task_mm(task);
  		if (!mm)
  			continue;
  
  		migrate = is_memory_migrate(cs);
  
  		mpol_rebind_mm(mm, &cs->mems_allowed);
  		if (migrate)
  			cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems);
e93ad19d0   Tejun Heo   cpuset: make mm m...
1061
1062
  		else
  			mmput(mm);
d66393e54   Tejun Heo   cpuset: use css_t...
1063
1064
  	}
  	css_task_iter_end(&it);
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1065

33ad801df   Li Zefan   cpuset: record ol...
1066
1067
1068
1069
1070
  	/*
  	 * All the tasks' nodemasks have been updated, update
  	 * cs->old_mems_allowed.
  	 */
  	cs->old_mems_allowed = newmems;
2df167a30   Paul Menage   cgroups: update c...
1071
  	/* We're done rebinding vmas to this cpuset's new mems_allowed. */
8793d854e   Paul Menage   Task Control Grou...
1072
  	cpuset_being_rebound = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1073
  }
0b2f630a2   Miao Xie   cpusets: restruct...
1074
  /*
734d45130   Li Zefan   cpuset: update cs...
1075
1076
1077
   * update_nodemasks_hier - Update effective nodemasks and tasks in the subtree
   * @cs: the cpuset to consider
   * @new_mems: a temp variable for calculating new effective_mems
5c5cc6232   Li Zefan   cpuset: allow to ...
1078
   *
734d45130   Li Zefan   cpuset: update cs...
1079
1080
   * When configured nodemask is changed, the effective nodemasks of this cpuset
   * and all its descendants need to be updated.
5c5cc6232   Li Zefan   cpuset: allow to ...
1081
   *
734d45130   Li Zefan   cpuset: update cs...
1082
   * On legacy hiearchy, effective_mems will be the same with mems_allowed.
5c5cc6232   Li Zefan   cpuset: allow to ...
1083
1084
1085
   *
   * Called with cpuset_mutex held
   */
734d45130   Li Zefan   cpuset: update cs...
1086
  static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
5c5cc6232   Li Zefan   cpuset: allow to ...
1087
1088
  {
  	struct cpuset *cp;
492eb21b9   Tejun Heo   cgroup: make hier...
1089
  	struct cgroup_subsys_state *pos_css;
5c5cc6232   Li Zefan   cpuset: allow to ...
1090
1091
  
  	rcu_read_lock();
734d45130   Li Zefan   cpuset: update cs...
1092
1093
1094
1095
  	cpuset_for_each_descendant_pre(cp, pos_css, cs) {
  		struct cpuset *parent = parent_cs(cp);
  
  		nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems);
554b0d1c8   Li Zefan   cpuset: inherit a...
1096
1097
1098
1099
  		/*
  		 * If it becomes empty, inherit the effective mask of the
  		 * parent, which is guaranteed to have some MEMs.
  		 */
9e10a130d   Tejun Heo   cgroup: replace c...
1100
1101
  		if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
  		    nodes_empty(*new_mems))
554b0d1c8   Li Zefan   cpuset: inherit a...
1102
  			*new_mems = parent->effective_mems;
734d45130   Li Zefan   cpuset: update cs...
1103
1104
1105
1106
  		/* Skip the whole subtree if the nodemask remains the same. */
  		if (nodes_equal(*new_mems, cp->effective_mems)) {
  			pos_css = css_rightmost_descendant(pos_css);
  			continue;
5c5cc6232   Li Zefan   cpuset: allow to ...
1107
  		}
734d45130   Li Zefan   cpuset: update cs...
1108

ec903c0c8   Tejun Heo   cgroup: rename cs...
1109
  		if (!css_tryget_online(&cp->css))
5c5cc6232   Li Zefan   cpuset: allow to ...
1110
1111
  			continue;
  		rcu_read_unlock();
8447a0fee   Vladimir Davydov   cpuset: convert c...
1112
  		spin_lock_irq(&callback_lock);
734d45130   Li Zefan   cpuset: update cs...
1113
  		cp->effective_mems = *new_mems;
8447a0fee   Vladimir Davydov   cpuset: convert c...
1114
  		spin_unlock_irq(&callback_lock);
734d45130   Li Zefan   cpuset: update cs...
1115

9e10a130d   Tejun Heo   cgroup: replace c...
1116
  		WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
a13812683   Li Zefan   cpuset: fix the W...
1117
  			!nodes_equal(cp->mems_allowed, cp->effective_mems));
734d45130   Li Zefan   cpuset: update cs...
1118

d66393e54   Tejun Heo   cpuset: use css_t...
1119
  		update_tasks_nodemask(cp);
5c5cc6232   Li Zefan   cpuset: allow to ...
1120
1121
1122
1123
1124
1125
1126
1127
  
  		rcu_read_lock();
  		css_put(&cp->css);
  	}
  	rcu_read_unlock();
  }
  
  /*
0b2f630a2   Miao Xie   cpusets: restruct...
1128
1129
   * Handle user request to change the 'mems' memory placement
   * of a cpuset.  Needs to validate the request, update the
58568d2a8   Miao Xie   cpuset,mm: update...
1130
1131
1132
1133
   * cpusets mems_allowed, and for each task in the cpuset,
   * update mems_allowed and rebind task's mempolicy and any vma
   * mempolicies and if the cpuset is marked 'memory_migrate',
   * migrate the tasks pages to the new memory.
0b2f630a2   Miao Xie   cpusets: restruct...
1134
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
1135
   * Call with cpuset_mutex held. May take callback_lock during call.
0b2f630a2   Miao Xie   cpusets: restruct...
1136
1137
1138
1139
   * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
   * lock each such tasks mm->mmap_sem, scan its vma's and rebind
   * their mempolicies to the cpusets new mems_allowed.
   */
645fcc9d2   Li Zefan   cpuset: don't all...
1140
1141
  static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
  			   const char *buf)
0b2f630a2   Miao Xie   cpusets: restruct...
1142
  {
0b2f630a2   Miao Xie   cpusets: restruct...
1143
1144
1145
  	int retval;
  
  	/*
38d7bee9d   Lai Jiangshan   cpuset: use N_MEM...
1146
  	 * top_cpuset.mems_allowed tracks node_stats[N_MEMORY];
0b2f630a2   Miao Xie   cpusets: restruct...
1147
1148
  	 * it's read-only
  	 */
53feb2976   Miao Xie   cpuset: alloc nod...
1149
1150
1151
1152
  	if (cs == &top_cpuset) {
  		retval = -EACCES;
  		goto done;
  	}
0b2f630a2   Miao Xie   cpusets: restruct...
1153

0b2f630a2   Miao Xie   cpusets: restruct...
1154
1155
1156
1157
1158
1159
1160
  	/*
  	 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
  	 * Since nodelist_parse() fails on an empty mask, we special case
  	 * that parsing.  The validate_change() call ensures that cpusets
  	 * with tasks have memory.
  	 */
  	if (!*buf) {
645fcc9d2   Li Zefan   cpuset: don't all...
1161
  		nodes_clear(trialcs->mems_allowed);
0b2f630a2   Miao Xie   cpusets: restruct...
1162
  	} else {
645fcc9d2   Li Zefan   cpuset: don't all...
1163
  		retval = nodelist_parse(buf, trialcs->mems_allowed);
0b2f630a2   Miao Xie   cpusets: restruct...
1164
1165
  		if (retval < 0)
  			goto done;
645fcc9d2   Li Zefan   cpuset: don't all...
1166
  		if (!nodes_subset(trialcs->mems_allowed,
5d8ba82c3   Li Zefan   cpuset: allow wri...
1167
1168
  				  top_cpuset.mems_allowed)) {
  			retval = -EINVAL;
53feb2976   Miao Xie   cpuset: alloc nod...
1169
1170
  			goto done;
  		}
0b2f630a2   Miao Xie   cpusets: restruct...
1171
  	}
33ad801df   Li Zefan   cpuset: record ol...
1172
1173
  
  	if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) {
0b2f630a2   Miao Xie   cpusets: restruct...
1174
1175
1176
  		retval = 0;		/* Too easy - nothing to do */
  		goto done;
  	}
645fcc9d2   Li Zefan   cpuset: don't all...
1177
  	retval = validate_change(cs, trialcs);
0b2f630a2   Miao Xie   cpusets: restruct...
1178
1179
  	if (retval < 0)
  		goto done;
8447a0fee   Vladimir Davydov   cpuset: convert c...
1180
  	spin_lock_irq(&callback_lock);
645fcc9d2   Li Zefan   cpuset: don't all...
1181
  	cs->mems_allowed = trialcs->mems_allowed;
8447a0fee   Vladimir Davydov   cpuset: convert c...
1182
  	spin_unlock_irq(&callback_lock);
0b2f630a2   Miao Xie   cpusets: restruct...
1183

734d45130   Li Zefan   cpuset: update cs...
1184
  	/* use trialcs->mems_allowed as a temp variable */
24ee3cf89   Alban Crequy   cpuset: use trial...
1185
  	update_nodemasks_hier(cs, &trialcs->mems_allowed);
0b2f630a2   Miao Xie   cpusets: restruct...
1186
1187
1188
  done:
  	return retval;
  }
8793d854e   Paul Menage   Task Control Grou...
1189
1190
  int current_cpuset_is_being_rebound(void)
  {
391acf970   Gu Zheng   cpuset,mempolicy:...
1191
1192
1193
1194
1195
1196
1197
  	int ret;
  
  	rcu_read_lock();
  	ret = task_cs(current) == cpuset_being_rebound;
  	rcu_read_unlock();
  
  	return ret;
8793d854e   Paul Menage   Task Control Grou...
1198
  }
5be7a4792   Paul Menage   Fix cpuset sched_...
1199
  static int update_relax_domain_level(struct cpuset *cs, s64 val)
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1200
  {
db7f47cf4   Paul Menage   cpusets: allow cp...
1201
  #ifdef CONFIG_SMP
60495e776   Peter Zijlstra   sched: Dynamic sc...
1202
  	if (val < -1 || val >= sched_domain_level_max)
30e0e1781   Li Zefan   cpuset: limit the...
1203
  		return -EINVAL;
db7f47cf4   Paul Menage   cpusets: allow cp...
1204
  #endif
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1205
1206
1207
  
  	if (val != cs->relax_domain_level) {
  		cs->relax_domain_level = val;
300ed6cbb   Li Zefan   cpuset: convert c...
1208
1209
  		if (!cpumask_empty(cs->cpus_allowed) &&
  		    is_sched_load_balance(cs))
699140ba8   Tejun Heo   cpuset: drop asyn...
1210
  			rebuild_sched_domains_locked();
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1211
1212
1213
1214
  	}
  
  	return 0;
  }
72ec70299   Tejun Heo   cgroup: make task...
1215
  /**
950592f7b   Miao Xie   cpusets: update t...
1216
1217
   * update_tasks_flags - update the spread flags of tasks in the cpuset.
   * @cs: the cpuset in which each task's spread flags needs to be changed
950592f7b   Miao Xie   cpusets: update t...
1218
   *
d66393e54   Tejun Heo   cpuset: use css_t...
1219
1220
1221
   * Iterate through each task of @cs updating its spread flags.  As this
   * function is called with cpuset_mutex held, cpuset membership stays
   * stable.
950592f7b   Miao Xie   cpusets: update t...
1222
   */
d66393e54   Tejun Heo   cpuset: use css_t...
1223
  static void update_tasks_flags(struct cpuset *cs)
950592f7b   Miao Xie   cpusets: update t...
1224
  {
d66393e54   Tejun Heo   cpuset: use css_t...
1225
1226
1227
1228
1229
1230
1231
  	struct css_task_iter it;
  	struct task_struct *task;
  
  	css_task_iter_start(&cs->css, &it);
  	while ((task = css_task_iter_next(&it)))
  		cpuset_update_task_spread_flag(cs, task);
  	css_task_iter_end(&it);
950592f7b   Miao Xie   cpusets: update t...
1232
1233
1234
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1235
   * update_flag - read a 0 or a 1 in a file and update associated flag
786083667   Paul Menage   Cpuset hardwall f...
1236
1237
1238
   * bit:		the bit to update (see cpuset_flagbits_t)
   * cs:		the cpuset to update
   * turning_on: 	whether the flag is being set or cleared
053199edf   Paul Jackson   [PATCH] cpusets: ...
1239
   *
5d21cc2db   Tejun Heo   cpuset: replace c...
1240
   * Call with cpuset_mutex held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1241
   */
700fe1ab9   Paul Menage   CGroup API files:...
1242
1243
  static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
  		       int turning_on)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1244
  {
645fcc9d2   Li Zefan   cpuset: don't all...
1245
  	struct cpuset *trialcs;
40b6a7623   Rakib Mullick   cpuset.c: remove ...
1246
  	int balance_flag_changed;
950592f7b   Miao Xie   cpusets: update t...
1247
  	int spread_flag_changed;
950592f7b   Miao Xie   cpusets: update t...
1248
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1249

645fcc9d2   Li Zefan   cpuset: don't all...
1250
1251
1252
  	trialcs = alloc_trial_cpuset(cs);
  	if (!trialcs)
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1253
  	if (turning_on)
645fcc9d2   Li Zefan   cpuset: don't all...
1254
  		set_bit(bit, &trialcs->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1255
  	else
645fcc9d2   Li Zefan   cpuset: don't all...
1256
  		clear_bit(bit, &trialcs->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1257

645fcc9d2   Li Zefan   cpuset: don't all...
1258
  	err = validate_change(cs, trialcs);
85d7b9498   Dinakar Guniguntala   [PATCH] Dynamic s...
1259
  	if (err < 0)
645fcc9d2   Li Zefan   cpuset: don't all...
1260
  		goto out;
029190c51   Paul Jackson   cpuset sched_load...
1261

029190c51   Paul Jackson   cpuset sched_load...
1262
  	balance_flag_changed = (is_sched_load_balance(cs) !=
645fcc9d2   Li Zefan   cpuset: don't all...
1263
  				is_sched_load_balance(trialcs));
029190c51   Paul Jackson   cpuset sched_load...
1264

950592f7b   Miao Xie   cpusets: update t...
1265
1266
  	spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
  			|| (is_spread_page(cs) != is_spread_page(trialcs)));
8447a0fee   Vladimir Davydov   cpuset: convert c...
1267
  	spin_lock_irq(&callback_lock);
645fcc9d2   Li Zefan   cpuset: don't all...
1268
  	cs->flags = trialcs->flags;
8447a0fee   Vladimir Davydov   cpuset: convert c...
1269
  	spin_unlock_irq(&callback_lock);
85d7b9498   Dinakar Guniguntala   [PATCH] Dynamic s...
1270

300ed6cbb   Li Zefan   cpuset: convert c...
1271
  	if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
699140ba8   Tejun Heo   cpuset: drop asyn...
1272
  		rebuild_sched_domains_locked();
029190c51   Paul Jackson   cpuset sched_load...
1273

950592f7b   Miao Xie   cpusets: update t...
1274
  	if (spread_flag_changed)
d66393e54   Tejun Heo   cpuset: use css_t...
1275
  		update_tasks_flags(cs);
645fcc9d2   Li Zefan   cpuset: don't all...
1276
1277
1278
  out:
  	free_trial_cpuset(trialcs);
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1279
  }
053199edf   Paul Jackson   [PATCH] cpusets: ...
1280
  /*
80f7228b5   Adrian Bunk   typo fixes: occur...
1281
   * Frequency meter - How fast is some event occurring?
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
   *
   * These routines manage a digitally filtered, constant time based,
   * event frequency meter.  There are four routines:
   *   fmeter_init() - initialize a frequency meter.
   *   fmeter_markevent() - called each time the event happens.
   *   fmeter_getrate() - returns the recent rate of such events.
   *   fmeter_update() - internal routine used to update fmeter.
   *
   * A common data structure is passed to each of these routines,
   * which is used to keep track of the state required to manage the
   * frequency meter and its digital filter.
   *
   * The filter works on the number of events marked per unit time.
   * The filter is single-pole low-pass recursive (IIR).  The time unit
   * is 1 second.  Arithmetic is done using 32-bit integers scaled to
   * simulate 3 decimal digits of precision (multiplied by 1000).
   *
   * With an FM_COEF of 933, and a time base of 1 second, the filter
   * has a half-life of 10 seconds, meaning that if the events quit
   * happening, then the rate returned from the fmeter_getrate()
   * will be cut in half each 10 seconds, until it converges to zero.
   *
   * It is not worth doing a real infinitely recursive filter.  If more
   * than FM_MAXTICKS ticks have elapsed since the last filter event,
   * just compute FM_MAXTICKS ticks worth, by which point the level
   * will be stable.
   *
   * Limit the count of unprocessed events to FM_MAXCNT, so as to avoid
   * arithmetic overflow in the fmeter_update() routine.
   *
   * Given the simple 32 bit integer arithmetic used, this meter works
   * best for reporting rates between one per millisecond (msec) and
   * one per 32 (approx) seconds.  At constant rates faster than one
   * per msec it maxes out at values just under 1,000,000.  At constant
   * rates between one per msec, and one per second it will stabilize
   * to a value N*1000, where N is the rate of events per second.
   * At constant rates between one per second and one per 32 seconds,
   * it will be choppy, moving up on the seconds that have an event,
   * and then decaying until the next event.  At rates slower than
   * about one in 32 seconds, it decays all the way back to zero between
   * each event.
   */
  
  #define FM_COEF 933		/* coefficient for half-life of 10 secs */
d2b436580   Arnd Bergmann   cpuset: Replace a...
1326
  #define FM_MAXTICKS ((u32)99)   /* useless computing more ticks than this */
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
  #define FM_MAXCNT 1000000	/* limit cnt to avoid overflow */
  #define FM_SCALE 1000		/* faux fixed point scale */
  
  /* Initialize a frequency meter */
  static void fmeter_init(struct fmeter *fmp)
  {
  	fmp->cnt = 0;
  	fmp->val = 0;
  	fmp->time = 0;
  	spin_lock_init(&fmp->lock);
  }
  
  /* Internal meter update - process cnt events and update value */
  static void fmeter_update(struct fmeter *fmp)
  {
d2b436580   Arnd Bergmann   cpuset: Replace a...
1342
1343
1344
1345
1346
  	time64_t now;
  	u32 ticks;
  
  	now = ktime_get_seconds();
  	ticks = now - fmp->time;
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
  
  	if (ticks == 0)
  		return;
  
  	ticks = min(FM_MAXTICKS, ticks);
  	while (ticks-- > 0)
  		fmp->val = (FM_COEF * fmp->val) / FM_SCALE;
  	fmp->time = now;
  
  	fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE;
  	fmp->cnt = 0;
  }
  
  /* Process any previous ticks, then bump cnt by one (times scale). */
  static void fmeter_markevent(struct fmeter *fmp)
  {
  	spin_lock(&fmp->lock);
  	fmeter_update(fmp);
  	fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE);
  	spin_unlock(&fmp->lock);
  }
  
  /* Process any previous ticks, then return current value. */
  static int fmeter_getrate(struct fmeter *fmp)
  {
  	int val;
  
  	spin_lock(&fmp->lock);
  	fmeter_update(fmp);
  	val = fmp->val;
  	spin_unlock(&fmp->lock);
  	return val;
  }
57fce0a68   Tejun Heo   cpuset: don't use...
1380
  static struct cpuset *cpuset_attach_old_cs;
5d21cc2db   Tejun Heo   cpuset: replace c...
1381
  /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1382
  static int cpuset_can_attach(struct cgroup_taskset *tset)
f780bdb7c   Ben Blum   cgroups: add per-...
1383
  {
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1384
1385
  	struct cgroup_subsys_state *css;
  	struct cpuset *cs;
bb9d97b6d   Tejun Heo   cgroup: don't use...
1386
1387
  	struct task_struct *task;
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1388

57fce0a68   Tejun Heo   cpuset: don't use...
1389
  	/* used later by cpuset_attach() */
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1390
1391
  	cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
  	cs = css_cs(css);
57fce0a68   Tejun Heo   cpuset: don't use...
1392

5d21cc2db   Tejun Heo   cpuset: replace c...
1393
  	mutex_lock(&cpuset_mutex);
aa6ec29be   Tejun Heo   cgroup: remove sa...
1394
  	/* allow moving tasks into an empty cpuset if on default hierarchy */
5d21cc2db   Tejun Heo   cpuset: replace c...
1395
  	ret = -ENOSPC;
9e10a130d   Tejun Heo   cgroup: replace c...
1396
  	if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
88fa523bf   Li Zefan   cpuset: allow to ...
1397
  	    (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
5d21cc2db   Tejun Heo   cpuset: replace c...
1398
  		goto out_unlock;
9985b0bab   David Rientjes   sched: prevent bo...
1399

1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1400
  	cgroup_taskset_for_each(task, css, tset) {
7f51412a4   Juri Lelli   sched/deadline: F...
1401
1402
  		ret = task_can_attach(task, cs->cpus_allowed);
  		if (ret)
5d21cc2db   Tejun Heo   cpuset: replace c...
1403
1404
1405
1406
  			goto out_unlock;
  		ret = security_task_setscheduler(task);
  		if (ret)
  			goto out_unlock;
bb9d97b6d   Tejun Heo   cgroup: don't use...
1407
  	}
f780bdb7c   Ben Blum   cgroups: add per-...
1408

452477fa6   Tejun Heo   cpuset: pin down ...
1409
1410
1411
1412
1413
  	/*
  	 * Mark attach is in progress.  This makes validate_change() fail
  	 * changes which zero cpus/mems_allowed.
  	 */
  	cs->attach_in_progress++;
5d21cc2db   Tejun Heo   cpuset: replace c...
1414
1415
1416
1417
  	ret = 0;
  out_unlock:
  	mutex_unlock(&cpuset_mutex);
  	return ret;
8793d854e   Paul Menage   Task Control Grou...
1418
  }
f780bdb7c   Ben Blum   cgroups: add per-...
1419

1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1420
  static void cpuset_cancel_attach(struct cgroup_taskset *tset)
452477fa6   Tejun Heo   cpuset: pin down ...
1421
  {
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1422
1423
1424
1425
1426
  	struct cgroup_subsys_state *css;
  	struct cpuset *cs;
  
  	cgroup_taskset_first(tset, &css);
  	cs = css_cs(css);
5d21cc2db   Tejun Heo   cpuset: replace c...
1427
  	mutex_lock(&cpuset_mutex);
eb95419b0   Tejun Heo   cgroup: pass arou...
1428
  	css_cs(css)->attach_in_progress--;
5d21cc2db   Tejun Heo   cpuset: replace c...
1429
  	mutex_unlock(&cpuset_mutex);
8793d854e   Paul Menage   Task Control Grou...
1430
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1431

4e4c9a140   Tejun Heo   cpuset: cleanup c...
1432
  /*
5d21cc2db   Tejun Heo   cpuset: replace c...
1433
   * Protected by cpuset_mutex.  cpus_attach is used only by cpuset_attach()
4e4c9a140   Tejun Heo   cpuset: cleanup c...
1434
1435
1436
1437
   * but we can't allocate it dynamically there.  Define it global and
   * allocate from cpuset_init().
   */
  static cpumask_var_t cpus_attach;
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1438
  static void cpuset_attach(struct cgroup_taskset *tset)
8793d854e   Paul Menage   Task Control Grou...
1439
  {
67bd2c598   Li Zefan   cpuset: remove un...
1440
  	/* static buf protected by cpuset_mutex */
4e4c9a140   Tejun Heo   cpuset: cleanup c...
1441
  	static nodemask_t cpuset_attach_nodemask_to;
bb9d97b6d   Tejun Heo   cgroup: don't use...
1442
  	struct task_struct *task;
4530eddb5   Tejun Heo   cgroup, memcg, cp...
1443
  	struct task_struct *leader;
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1444
1445
  	struct cgroup_subsys_state *css;
  	struct cpuset *cs;
57fce0a68   Tejun Heo   cpuset: don't use...
1446
  	struct cpuset *oldcs = cpuset_attach_old_cs;
22fb52dd7   David Quigley   [PATCH] SELinux: ...
1447

1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1448
1449
  	cgroup_taskset_first(tset, &css);
  	cs = css_cs(css);
5d21cc2db   Tejun Heo   cpuset: replace c...
1450
  	mutex_lock(&cpuset_mutex);
4e4c9a140   Tejun Heo   cpuset: cleanup c...
1451
1452
1453
1454
  	/* prepare for attach */
  	if (cs == &top_cpuset)
  		cpumask_copy(cpus_attach, cpu_possible_mask);
  	else
ae1c80238   Li Zefan   cpuset: apply cs-...
1455
  		guarantee_online_cpus(cs, cpus_attach);
4e4c9a140   Tejun Heo   cpuset: cleanup c...
1456

ae1c80238   Li Zefan   cpuset: apply cs-...
1457
  	guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
4e4c9a140   Tejun Heo   cpuset: cleanup c...
1458

1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1459
  	cgroup_taskset_for_each(task, css, tset) {
bb9d97b6d   Tejun Heo   cgroup: don't use...
1460
1461
1462
1463
1464
1465
1466
1467
1468
  		/*
  		 * can_attach beforehand should guarantee that this doesn't
  		 * fail.  TODO: have a better way to handle failure here
  		 */
  		WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
  
  		cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
  		cpuset_update_task_spread_flag(cs, task);
  	}
22fb52dd7   David Quigley   [PATCH] SELinux: ...
1469

f780bdb7c   Ben Blum   cgroups: add per-...
1470
  	/*
4530eddb5   Tejun Heo   cgroup, memcg, cp...
1471
1472
  	 * Change mm for all threadgroup leaders. This is expensive and may
  	 * sleep and should be moved outside migration path proper.
f780bdb7c   Ben Blum   cgroups: add per-...
1473
  	 */
ae1c80238   Li Zefan   cpuset: apply cs-...
1474
  	cpuset_attach_nodemask_to = cs->effective_mems;
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
1475
  	cgroup_taskset_for_each_leader(leader, css, tset) {
3df9ca0a2   Tejun Heo   cpuset: migrate m...
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
  		struct mm_struct *mm = get_task_mm(leader);
  
  		if (mm) {
  			mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
  
  			/*
  			 * old_mems_allowed is the same with mems_allowed
  			 * here, except if this task is being moved
  			 * automatically due to hotplug.  In that case
  			 * @mems_allowed has been updated and is empty, so
  			 * @old_mems_allowed is the right nodesets that we
  			 * migrate mm from.
  			 */
e93ad19d0   Tejun Heo   cpuset: make mm m...
1489
  			if (is_memory_migrate(cs))
3df9ca0a2   Tejun Heo   cpuset: migrate m...
1490
1491
  				cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
  						  &cpuset_attach_nodemask_to);
e93ad19d0   Tejun Heo   cpuset: make mm m...
1492
1493
  			else
  				mmput(mm);
f047cecf2   Li Zefan   cpuset: fix to mi...
1494
  		}
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1495
  	}
452477fa6   Tejun Heo   cpuset: pin down ...
1496

33ad801df   Li Zefan   cpuset: record ol...
1497
  	cs->old_mems_allowed = cpuset_attach_nodemask_to;
02bb58637   Tejun Heo   cpuset: schedule ...
1498

452477fa6   Tejun Heo   cpuset: pin down ...
1499
  	cs->attach_in_progress--;
e44193d39   Li Zefan   cpuset: let hotpl...
1500
1501
  	if (!cs->attach_in_progress)
  		wake_up(&cpuset_attach_wq);
5d21cc2db   Tejun Heo   cpuset: replace c...
1502
1503
  
  	mutex_unlock(&cpuset_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1504
1505
1506
1507
1508
  }
  
  /* The various types of files and directories in a cpuset file system */
  
  typedef enum {
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
1509
  	FILE_MEMORY_MIGRATE,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1510
1511
  	FILE_CPULIST,
  	FILE_MEMLIST,
afd1a8b3e   Li Zefan   cpuset: export ef...
1512
1513
  	FILE_EFFECTIVE_CPULIST,
  	FILE_EFFECTIVE_MEMLIST,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1514
1515
  	FILE_CPU_EXCLUSIVE,
  	FILE_MEM_EXCLUSIVE,
786083667   Paul Menage   Cpuset hardwall f...
1516
  	FILE_MEM_HARDWALL,
029190c51   Paul Jackson   cpuset sched_load...
1517
  	FILE_SCHED_LOAD_BALANCE,
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1518
  	FILE_SCHED_RELAX_DOMAIN_LEVEL,
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1519
1520
  	FILE_MEMORY_PRESSURE_ENABLED,
  	FILE_MEMORY_PRESSURE,
825a46af5   Paul Jackson   [PATCH] cpuset me...
1521
1522
  	FILE_SPREAD_PAGE,
  	FILE_SPREAD_SLAB,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1523
  } cpuset_filetype_t;
182446d08   Tejun Heo   cgroup: pass arou...
1524
1525
  static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
  			    u64 val)
700fe1ab9   Paul Menage   CGroup API files:...
1526
  {
182446d08   Tejun Heo   cgroup: pass arou...
1527
  	struct cpuset *cs = css_cs(css);
700fe1ab9   Paul Menage   CGroup API files:...
1528
  	cpuset_filetype_t type = cft->private;
a903f0865   Li Zefan   cpuset: fix the r...
1529
  	int retval = 0;
700fe1ab9   Paul Menage   CGroup API files:...
1530

5d21cc2db   Tejun Heo   cpuset: replace c...
1531
  	mutex_lock(&cpuset_mutex);
a903f0865   Li Zefan   cpuset: fix the r...
1532
1533
  	if (!is_cpuset_online(cs)) {
  		retval = -ENODEV;
5d21cc2db   Tejun Heo   cpuset: replace c...
1534
  		goto out_unlock;
a903f0865   Li Zefan   cpuset: fix the r...
1535
  	}
700fe1ab9   Paul Menage   CGroup API files:...
1536
1537
  
  	switch (type) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1538
  	case FILE_CPU_EXCLUSIVE:
700fe1ab9   Paul Menage   CGroup API files:...
1539
  		retval = update_flag(CS_CPU_EXCLUSIVE, cs, val);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1540
1541
  		break;
  	case FILE_MEM_EXCLUSIVE:
700fe1ab9   Paul Menage   CGroup API files:...
1542
  		retval = update_flag(CS_MEM_EXCLUSIVE, cs, val);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1543
  		break;
786083667   Paul Menage   Cpuset hardwall f...
1544
1545
1546
  	case FILE_MEM_HARDWALL:
  		retval = update_flag(CS_MEM_HARDWALL, cs, val);
  		break;
029190c51   Paul Jackson   cpuset sched_load...
1547
  	case FILE_SCHED_LOAD_BALANCE:
700fe1ab9   Paul Menage   CGroup API files:...
1548
  		retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val);
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1549
  		break;
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
1550
  	case FILE_MEMORY_MIGRATE:
700fe1ab9   Paul Menage   CGroup API files:...
1551
  		retval = update_flag(CS_MEMORY_MIGRATE, cs, val);
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
1552
  		break;
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1553
  	case FILE_MEMORY_PRESSURE_ENABLED:
700fe1ab9   Paul Menage   CGroup API files:...
1554
  		cpuset_memory_pressure_enabled = !!val;
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1555
  		break;
825a46af5   Paul Jackson   [PATCH] cpuset me...
1556
  	case FILE_SPREAD_PAGE:
700fe1ab9   Paul Menage   CGroup API files:...
1557
  		retval = update_flag(CS_SPREAD_PAGE, cs, val);
825a46af5   Paul Jackson   [PATCH] cpuset me...
1558
1559
  		break;
  	case FILE_SPREAD_SLAB:
700fe1ab9   Paul Menage   CGroup API files:...
1560
  		retval = update_flag(CS_SPREAD_SLAB, cs, val);
825a46af5   Paul Jackson   [PATCH] cpuset me...
1561
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1562
1563
  	default:
  		retval = -EINVAL;
700fe1ab9   Paul Menage   CGroup API files:...
1564
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1565
  	}
5d21cc2db   Tejun Heo   cpuset: replace c...
1566
1567
  out_unlock:
  	mutex_unlock(&cpuset_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1568
1569
  	return retval;
  }
182446d08   Tejun Heo   cgroup: pass arou...
1570
1571
  static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
  			    s64 val)
5be7a4792   Paul Menage   Fix cpuset sched_...
1572
  {
182446d08   Tejun Heo   cgroup: pass arou...
1573
  	struct cpuset *cs = css_cs(css);
5be7a4792   Paul Menage   Fix cpuset sched_...
1574
  	cpuset_filetype_t type = cft->private;
5d21cc2db   Tejun Heo   cpuset: replace c...
1575
  	int retval = -ENODEV;
5be7a4792   Paul Menage   Fix cpuset sched_...
1576

5d21cc2db   Tejun Heo   cpuset: replace c...
1577
1578
1579
  	mutex_lock(&cpuset_mutex);
  	if (!is_cpuset_online(cs))
  		goto out_unlock;
e37123953   Paul Menage   cgroup files: rem...
1580

5be7a4792   Paul Menage   Fix cpuset sched_...
1581
1582
1583
1584
1585
1586
1587
1588
  	switch (type) {
  	case FILE_SCHED_RELAX_DOMAIN_LEVEL:
  		retval = update_relax_domain_level(cs, val);
  		break;
  	default:
  		retval = -EINVAL;
  		break;
  	}
5d21cc2db   Tejun Heo   cpuset: replace c...
1589
1590
  out_unlock:
  	mutex_unlock(&cpuset_mutex);
5be7a4792   Paul Menage   Fix cpuset sched_...
1591
1592
  	return retval;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1593
  /*
e37123953   Paul Menage   cgroup files: rem...
1594
1595
   * Common handling for a write to a "cpus" or "mems" file.
   */
451af504d   Tejun Heo   cgroup: replace c...
1596
1597
  static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
  				    char *buf, size_t nbytes, loff_t off)
e37123953   Paul Menage   cgroup files: rem...
1598
  {
451af504d   Tejun Heo   cgroup: replace c...
1599
  	struct cpuset *cs = css_cs(of_css(of));
645fcc9d2   Li Zefan   cpuset: don't all...
1600
  	struct cpuset *trialcs;
5d21cc2db   Tejun Heo   cpuset: replace c...
1601
  	int retval = -ENODEV;
e37123953   Paul Menage   cgroup files: rem...
1602

451af504d   Tejun Heo   cgroup: replace c...
1603
  	buf = strstrip(buf);
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
  	/*
  	 * CPU or memory hotunplug may leave @cs w/o any execution
  	 * resources, in which case the hotplug code asynchronously updates
  	 * configuration and transfers all tasks to the nearest ancestor
  	 * which can execute.
  	 *
  	 * As writes to "cpus" or "mems" may restore @cs's execution
  	 * resources, wait for the previously scheduled operations before
  	 * proceeding, so that we don't end up keep removing tasks added
  	 * after execution capability is restored.
76bb5ab8f   Tejun Heo   cpuset: break ker...
1614
1615
1616
1617
1618
1619
1620
1621
  	 *
  	 * cpuset_hotplug_work calls back into cgroup core via
  	 * cgroup_transfer_tasks() and waiting for it from a cgroupfs
  	 * operation like this one can lead to a deadlock through kernfs
  	 * active_ref protection.  Let's break the protection.  Losing the
  	 * protection is okay as we check whether @cs is online after
  	 * grabbing cpuset_mutex anyway.  This only happens on the legacy
  	 * hierarchies.
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
1622
  	 */
76bb5ab8f   Tejun Heo   cpuset: break ker...
1623
1624
  	css_get(&cs->css);
  	kernfs_break_active_protection(of->kn);
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
1625
  	flush_work(&cpuset_hotplug_work);
5d21cc2db   Tejun Heo   cpuset: replace c...
1626
1627
1628
  	mutex_lock(&cpuset_mutex);
  	if (!is_cpuset_online(cs))
  		goto out_unlock;
e37123953   Paul Menage   cgroup files: rem...
1629

645fcc9d2   Li Zefan   cpuset: don't all...
1630
  	trialcs = alloc_trial_cpuset(cs);
b75f38d65   Li Zefan   cpuset: add a mis...
1631
1632
  	if (!trialcs) {
  		retval = -ENOMEM;
5d21cc2db   Tejun Heo   cpuset: replace c...
1633
  		goto out_unlock;
b75f38d65   Li Zefan   cpuset: add a mis...
1634
  	}
645fcc9d2   Li Zefan   cpuset: don't all...
1635

451af504d   Tejun Heo   cgroup: replace c...
1636
  	switch (of_cft(of)->private) {
e37123953   Paul Menage   cgroup files: rem...
1637
  	case FILE_CPULIST:
645fcc9d2   Li Zefan   cpuset: don't all...
1638
  		retval = update_cpumask(cs, trialcs, buf);
e37123953   Paul Menage   cgroup files: rem...
1639
1640
  		break;
  	case FILE_MEMLIST:
645fcc9d2   Li Zefan   cpuset: don't all...
1641
  		retval = update_nodemask(cs, trialcs, buf);
e37123953   Paul Menage   cgroup files: rem...
1642
1643
1644
1645
1646
  		break;
  	default:
  		retval = -EINVAL;
  		break;
  	}
645fcc9d2   Li Zefan   cpuset: don't all...
1647
1648
  
  	free_trial_cpuset(trialcs);
5d21cc2db   Tejun Heo   cpuset: replace c...
1649
1650
  out_unlock:
  	mutex_unlock(&cpuset_mutex);
76bb5ab8f   Tejun Heo   cpuset: break ker...
1651
1652
  	kernfs_unbreak_active_protection(of->kn);
  	css_put(&cs->css);
e93ad19d0   Tejun Heo   cpuset: make mm m...
1653
  	flush_workqueue(cpuset_migrate_mm_wq);
451af504d   Tejun Heo   cgroup: replace c...
1654
  	return retval ?: nbytes;
e37123953   Paul Menage   cgroup files: rem...
1655
1656
1657
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1658
1659
1660
1661
1662
1663
   * These ascii lists should be read in a single call, by using a user
   * buffer large enough to hold the entire map.  If read in smaller
   * chunks, there is no guarantee of atomicity.  Since the display format
   * used, list of ranges of sequential numbers, is variable length,
   * and since these maps can change value dynamically, one could read
   * gibberish by doing partial reads while a list was changing.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1664
   */
2da8ca822   Tejun Heo   cgroup: replace c...
1665
  static int cpuset_common_seq_show(struct seq_file *sf, void *v)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1666
  {
2da8ca822   Tejun Heo   cgroup: replace c...
1667
1668
  	struct cpuset *cs = css_cs(seq_css(sf));
  	cpuset_filetype_t type = seq_cft(sf)->private;
51ffe4117   Tejun Heo   cpuset: convert a...
1669
  	int ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1670

8447a0fee   Vladimir Davydov   cpuset: convert c...
1671
  	spin_lock_irq(&callback_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1672
1673
1674
  
  	switch (type) {
  	case FILE_CPULIST:
e8e6d97c9   Tejun Heo   cpuset: use %*pb[...
1675
1676
  		seq_printf(sf, "%*pbl
  ", cpumask_pr_args(cs->cpus_allowed));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1677
1678
  		break;
  	case FILE_MEMLIST:
e8e6d97c9   Tejun Heo   cpuset: use %*pb[...
1679
1680
  		seq_printf(sf, "%*pbl
  ", nodemask_pr_args(&cs->mems_allowed));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1681
  		break;
afd1a8b3e   Li Zefan   cpuset: export ef...
1682
  	case FILE_EFFECTIVE_CPULIST:
e8e6d97c9   Tejun Heo   cpuset: use %*pb[...
1683
1684
  		seq_printf(sf, "%*pbl
  ", cpumask_pr_args(cs->effective_cpus));
afd1a8b3e   Li Zefan   cpuset: export ef...
1685
1686
  		break;
  	case FILE_EFFECTIVE_MEMLIST:
e8e6d97c9   Tejun Heo   cpuset: use %*pb[...
1687
1688
  		seq_printf(sf, "%*pbl
  ", nodemask_pr_args(&cs->effective_mems));
afd1a8b3e   Li Zefan   cpuset: export ef...
1689
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1690
  	default:
51ffe4117   Tejun Heo   cpuset: convert a...
1691
  		ret = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1692
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1693

8447a0fee   Vladimir Davydov   cpuset: convert c...
1694
  	spin_unlock_irq(&callback_lock);
51ffe4117   Tejun Heo   cpuset: convert a...
1695
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1696
  }
182446d08   Tejun Heo   cgroup: pass arou...
1697
  static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
700fe1ab9   Paul Menage   CGroup API files:...
1698
  {
182446d08   Tejun Heo   cgroup: pass arou...
1699
  	struct cpuset *cs = css_cs(css);
700fe1ab9   Paul Menage   CGroup API files:...
1700
1701
1702
1703
1704
1705
  	cpuset_filetype_t type = cft->private;
  	switch (type) {
  	case FILE_CPU_EXCLUSIVE:
  		return is_cpu_exclusive(cs);
  	case FILE_MEM_EXCLUSIVE:
  		return is_mem_exclusive(cs);
786083667   Paul Menage   Cpuset hardwall f...
1706
1707
  	case FILE_MEM_HARDWALL:
  		return is_mem_hardwall(cs);
700fe1ab9   Paul Menage   CGroup API files:...
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
  	case FILE_SCHED_LOAD_BALANCE:
  		return is_sched_load_balance(cs);
  	case FILE_MEMORY_MIGRATE:
  		return is_memory_migrate(cs);
  	case FILE_MEMORY_PRESSURE_ENABLED:
  		return cpuset_memory_pressure_enabled;
  	case FILE_MEMORY_PRESSURE:
  		return fmeter_getrate(&cs->fmeter);
  	case FILE_SPREAD_PAGE:
  		return is_spread_page(cs);
  	case FILE_SPREAD_SLAB:
  		return is_spread_slab(cs);
  	default:
  		BUG();
  	}
cf417141c   Max Krasnyansky   sched, cpuset: re...
1723
1724
1725
  
  	/* Unreachable but makes gcc happy */
  	return 0;
700fe1ab9   Paul Menage   CGroup API files:...
1726
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1727

182446d08   Tejun Heo   cgroup: pass arou...
1728
  static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft)
5be7a4792   Paul Menage   Fix cpuset sched_...
1729
  {
182446d08   Tejun Heo   cgroup: pass arou...
1730
  	struct cpuset *cs = css_cs(css);
5be7a4792   Paul Menage   Fix cpuset sched_...
1731
1732
1733
1734
1735
1736
1737
  	cpuset_filetype_t type = cft->private;
  	switch (type) {
  	case FILE_SCHED_RELAX_DOMAIN_LEVEL:
  		return cs->relax_domain_level;
  	default:
  		BUG();
  	}
cf417141c   Max Krasnyansky   sched, cpuset: re...
1738
1739
1740
  
  	/* Unrechable but makes gcc happy */
  	return 0;
5be7a4792   Paul Menage   Fix cpuset sched_...
1741
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1742
1743
1744
1745
  
  /*
   * for the common functions, 'private' gives the type of file
   */
addf2c739   Paul Menage   Cpuset hardwall f...
1746
1747
1748
  static struct cftype files[] = {
  	{
  		.name = "cpus",
2da8ca822   Tejun Heo   cgroup: replace c...
1749
  		.seq_show = cpuset_common_seq_show,
451af504d   Tejun Heo   cgroup: replace c...
1750
  		.write = cpuset_write_resmask,
e37123953   Paul Menage   cgroup files: rem...
1751
  		.max_write_len = (100U + 6 * NR_CPUS),
addf2c739   Paul Menage   Cpuset hardwall f...
1752
1753
1754
1755
1756
  		.private = FILE_CPULIST,
  	},
  
  	{
  		.name = "mems",
2da8ca822   Tejun Heo   cgroup: replace c...
1757
  		.seq_show = cpuset_common_seq_show,
451af504d   Tejun Heo   cgroup: replace c...
1758
  		.write = cpuset_write_resmask,
e37123953   Paul Menage   cgroup files: rem...
1759
  		.max_write_len = (100U + 6 * MAX_NUMNODES),
addf2c739   Paul Menage   Cpuset hardwall f...
1760
1761
1762
1763
  		.private = FILE_MEMLIST,
  	},
  
  	{
afd1a8b3e   Li Zefan   cpuset: export ef...
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
  		.name = "effective_cpus",
  		.seq_show = cpuset_common_seq_show,
  		.private = FILE_EFFECTIVE_CPULIST,
  	},
  
  	{
  		.name = "effective_mems",
  		.seq_show = cpuset_common_seq_show,
  		.private = FILE_EFFECTIVE_MEMLIST,
  	},
  
  	{
addf2c739   Paul Menage   Cpuset hardwall f...
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
  		.name = "cpu_exclusive",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_CPU_EXCLUSIVE,
  	},
  
  	{
  		.name = "mem_exclusive",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_MEM_EXCLUSIVE,
  	},
  
  	{
786083667   Paul Menage   Cpuset hardwall f...
1790
1791
1792
1793
1794
1795
1796
  		.name = "mem_hardwall",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_MEM_HARDWALL,
  	},
  
  	{
addf2c739   Paul Menage   Cpuset hardwall f...
1797
1798
1799
1800
1801
1802
1803
1804
  		.name = "sched_load_balance",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_SCHED_LOAD_BALANCE,
  	},
  
  	{
  		.name = "sched_relax_domain_level",
5be7a4792   Paul Menage   Fix cpuset sched_...
1805
1806
  		.read_s64 = cpuset_read_s64,
  		.write_s64 = cpuset_write_s64,
addf2c739   Paul Menage   Cpuset hardwall f...
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
  		.private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
  	},
  
  	{
  		.name = "memory_migrate",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_MEMORY_MIGRATE,
  	},
  
  	{
  		.name = "memory_pressure",
  		.read_u64 = cpuset_read_u64,
addf2c739   Paul Menage   Cpuset hardwall f...
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
  	},
  
  	{
  		.name = "memory_spread_page",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_SPREAD_PAGE,
  	},
  
  	{
  		.name = "memory_spread_slab",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_SPREAD_SLAB,
  	},
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1835

4baf6e332   Tejun Heo   cgroup: convert a...
1836
1837
1838
1839
1840
1841
1842
  	{
  		.name = "memory_pressure_enabled",
  		.flags = CFTYPE_ONLY_ON_ROOT,
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_MEMORY_PRESSURE_ENABLED,
  	},
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1843

4baf6e332   Tejun Heo   cgroup: convert a...
1844
1845
  	{ }	/* terminate */
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1846
1847
  
  /*
92fb97487   Tejun Heo   cgroup: rename ->...
1848
   *	cpuset_css_alloc - allocate a cpuset css
c9e5fe66f   Li Zefan   cpuset: rename @c...
1849
   *	cgrp:	control group that the new cpuset will be part of
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1850
   */
eb95419b0   Tejun Heo   cgroup: pass arou...
1851
1852
  static struct cgroup_subsys_state *
  cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1853
  {
c8f699bb5   Tejun Heo   cpuset: introduce...
1854
  	struct cpuset *cs;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1855

eb95419b0   Tejun Heo   cgroup: pass arou...
1856
  	if (!parent_css)
8793d854e   Paul Menage   Task Control Grou...
1857
  		return &top_cpuset.css;
033fa1c5f   Tejun Heo   cgroup, cpuset: r...
1858

c8f699bb5   Tejun Heo   cpuset: introduce...
1859
  	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1860
  	if (!cs)
8793d854e   Paul Menage   Task Control Grou...
1861
  		return ERR_PTR(-ENOMEM);
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
1862
1863
1864
1865
  	if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL))
  		goto free_cs;
  	if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL))
  		goto free_cpus;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1866

029190c51   Paul Jackson   cpuset sched_load...
1867
  	set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
300ed6cbb   Li Zefan   cpuset: convert c...
1868
  	cpumask_clear(cs->cpus_allowed);
f9a86fcbb   Mike Travis   cpuset: modify cp...
1869
  	nodes_clear(cs->mems_allowed);
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
1870
1871
  	cpumask_clear(cs->effective_cpus);
  	nodes_clear(cs->effective_mems);
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1872
  	fmeter_init(&cs->fmeter);
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1873
  	cs->relax_domain_level = -1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1874

c8f699bb5   Tejun Heo   cpuset: introduce...
1875
  	return &cs->css;
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
1876
1877
1878
1879
1880
1881
  
  free_cpus:
  	free_cpumask_var(cs->cpus_allowed);
  free_cs:
  	kfree(cs);
  	return ERR_PTR(-ENOMEM);
c8f699bb5   Tejun Heo   cpuset: introduce...
1882
  }
eb95419b0   Tejun Heo   cgroup: pass arou...
1883
  static int cpuset_css_online(struct cgroup_subsys_state *css)
c8f699bb5   Tejun Heo   cpuset: introduce...
1884
  {
eb95419b0   Tejun Heo   cgroup: pass arou...
1885
  	struct cpuset *cs = css_cs(css);
c431069fe   Tejun Heo   cpuset: remove cp...
1886
  	struct cpuset *parent = parent_cs(cs);
ae8086ce1   Tejun Heo   cpuset: introduce...
1887
  	struct cpuset *tmp_cs;
492eb21b9   Tejun Heo   cgroup: make hier...
1888
  	struct cgroup_subsys_state *pos_css;
c8f699bb5   Tejun Heo   cpuset: introduce...
1889
1890
1891
  
  	if (!parent)
  		return 0;
5d21cc2db   Tejun Heo   cpuset: replace c...
1892
  	mutex_lock(&cpuset_mutex);
efeb77b2f   Tejun Heo   cpuset: introduce...
1893
  	set_bit(CS_ONLINE, &cs->flags);
c8f699bb5   Tejun Heo   cpuset: introduce...
1894
1895
1896
1897
  	if (is_spread_page(parent))
  		set_bit(CS_SPREAD_PAGE, &cs->flags);
  	if (is_spread_slab(parent))
  		set_bit(CS_SPREAD_SLAB, &cs->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1898

664eeddee   Mel Gorman   mm: page_alloc: u...
1899
  	cpuset_inc();
033fa1c5f   Tejun Heo   cgroup, cpuset: r...
1900

8447a0fee   Vladimir Davydov   cpuset: convert c...
1901
  	spin_lock_irq(&callback_lock);
9e10a130d   Tejun Heo   cgroup: replace c...
1902
  	if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
1903
1904
1905
  		cpumask_copy(cs->effective_cpus, parent->effective_cpus);
  		cs->effective_mems = parent->effective_mems;
  	}
8447a0fee   Vladimir Davydov   cpuset: convert c...
1906
  	spin_unlock_irq(&callback_lock);
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
1907

eb95419b0   Tejun Heo   cgroup: pass arou...
1908
  	if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
5d21cc2db   Tejun Heo   cpuset: replace c...
1909
  		goto out_unlock;
033fa1c5f   Tejun Heo   cgroup, cpuset: r...
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
  
  	/*
  	 * Clone @parent's configuration if CGRP_CPUSET_CLONE_CHILDREN is
  	 * set.  This flag handling is implemented in cgroup core for
  	 * histrical reasons - the flag may be specified during mount.
  	 *
  	 * Currently, if any sibling cpusets have exclusive cpus or mem, we
  	 * refuse to clone the configuration - thereby refusing the task to
  	 * be entered, and as a result refusing the sys_unshare() or
  	 * clone() which initiated it.  If this becomes a problem for some
  	 * users who wish to allow that scenario, then this could be
  	 * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
  	 * (and likewise for mems) to the new cgroup.
  	 */
ae8086ce1   Tejun Heo   cpuset: introduce...
1924
  	rcu_read_lock();
492eb21b9   Tejun Heo   cgroup: make hier...
1925
  	cpuset_for_each_child(tmp_cs, pos_css, parent) {
ae8086ce1   Tejun Heo   cpuset: introduce...
1926
1927
  		if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) {
  			rcu_read_unlock();
5d21cc2db   Tejun Heo   cpuset: replace c...
1928
  			goto out_unlock;
ae8086ce1   Tejun Heo   cpuset: introduce...
1929
  		}
033fa1c5f   Tejun Heo   cgroup, cpuset: r...
1930
  	}
ae8086ce1   Tejun Heo   cpuset: introduce...
1931
  	rcu_read_unlock();
033fa1c5f   Tejun Heo   cgroup, cpuset: r...
1932

8447a0fee   Vladimir Davydov   cpuset: convert c...
1933
  	spin_lock_irq(&callback_lock);
033fa1c5f   Tejun Heo   cgroup, cpuset: r...
1934
  	cs->mems_allowed = parent->mems_allowed;
790317e1b   Zefan Li   cpuset: initializ...
1935
  	cs->effective_mems = parent->mems_allowed;
033fa1c5f   Tejun Heo   cgroup, cpuset: r...
1936
  	cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
790317e1b   Zefan Li   cpuset: initializ...
1937
  	cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
cea74465e   Dan Carpenter   cpuset: lock vs u...
1938
  	spin_unlock_irq(&callback_lock);
5d21cc2db   Tejun Heo   cpuset: replace c...
1939
1940
  out_unlock:
  	mutex_unlock(&cpuset_mutex);
c8f699bb5   Tejun Heo   cpuset: introduce...
1941
1942
  	return 0;
  }
0b9e6965a   Zhao Hongjiang   cpuset: relocate ...
1943
1944
1945
1946
1947
  /*
   * If the cpuset being removed has its flag 'sched_load_balance'
   * enabled, then simulate turning sched_load_balance off, which
   * will call rebuild_sched_domains_locked().
   */
eb95419b0   Tejun Heo   cgroup: pass arou...
1948
  static void cpuset_css_offline(struct cgroup_subsys_state *css)
c8f699bb5   Tejun Heo   cpuset: introduce...
1949
  {
eb95419b0   Tejun Heo   cgroup: pass arou...
1950
  	struct cpuset *cs = css_cs(css);
c8f699bb5   Tejun Heo   cpuset: introduce...
1951

5d21cc2db   Tejun Heo   cpuset: replace c...
1952
  	mutex_lock(&cpuset_mutex);
c8f699bb5   Tejun Heo   cpuset: introduce...
1953
1954
1955
  
  	if (is_sched_load_balance(cs))
  		update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
664eeddee   Mel Gorman   mm: page_alloc: u...
1956
  	cpuset_dec();
efeb77b2f   Tejun Heo   cpuset: introduce...
1957
  	clear_bit(CS_ONLINE, &cs->flags);
c8f699bb5   Tejun Heo   cpuset: introduce...
1958

5d21cc2db   Tejun Heo   cpuset: replace c...
1959
  	mutex_unlock(&cpuset_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1960
  }
eb95419b0   Tejun Heo   cgroup: pass arou...
1961
  static void cpuset_css_free(struct cgroup_subsys_state *css)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1962
  {
eb95419b0   Tejun Heo   cgroup: pass arou...
1963
  	struct cpuset *cs = css_cs(css);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1964

e2b9a3d7d   Li Zefan   cpuset: add cs->e...
1965
  	free_cpumask_var(cs->effective_cpus);
300ed6cbb   Li Zefan   cpuset: convert c...
1966
  	free_cpumask_var(cs->cpus_allowed);
8793d854e   Paul Menage   Task Control Grou...
1967
  	kfree(cs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1968
  }
39bd0d15e   Li Zefan   cpuset: initializ...
1969
1970
1971
  static void cpuset_bind(struct cgroup_subsys_state *root_css)
  {
  	mutex_lock(&cpuset_mutex);
8447a0fee   Vladimir Davydov   cpuset: convert c...
1972
  	spin_lock_irq(&callback_lock);
39bd0d15e   Li Zefan   cpuset: initializ...
1973

9e10a130d   Tejun Heo   cgroup: replace c...
1974
  	if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
39bd0d15e   Li Zefan   cpuset: initializ...
1975
1976
1977
1978
1979
1980
1981
  		cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
  		top_cpuset.mems_allowed = node_possible_map;
  	} else {
  		cpumask_copy(top_cpuset.cpus_allowed,
  			     top_cpuset.effective_cpus);
  		top_cpuset.mems_allowed = top_cpuset.effective_mems;
  	}
8447a0fee   Vladimir Davydov   cpuset: convert c...
1982
  	spin_unlock_irq(&callback_lock);
39bd0d15e   Li Zefan   cpuset: initializ...
1983
1984
  	mutex_unlock(&cpuset_mutex);
  }
06f4e9489   Zefan Li   cpuset: make sure...
1985
1986
1987
1988
1989
  /*
   * Make sure the new task conform to the current state of its parent,
   * which could have been changed by cpuset just after it inherits the
   * state from the parent and before it sits on the cgroup's task list.
   */
8a15b8174   Wei Yongjun   cpuset: fix non s...
1990
  static void cpuset_fork(struct task_struct *task)
06f4e9489   Zefan Li   cpuset: make sure...
1991
1992
1993
1994
1995
1996
1997
  {
  	if (task_css_is_root(task, cpuset_cgrp_id))
  		return;
  
  	set_cpus_allowed_ptr(task, &current->cpus_allowed);
  	task->mems_allowed = current->mems_allowed;
  }
073219e99   Tejun Heo   cgroup: clean up ...
1998
  struct cgroup_subsys cpuset_cgrp_subsys = {
39bd0d15e   Li Zefan   cpuset: initializ...
1999
2000
2001
2002
2003
2004
2005
  	.css_alloc	= cpuset_css_alloc,
  	.css_online	= cpuset_css_online,
  	.css_offline	= cpuset_css_offline,
  	.css_free	= cpuset_css_free,
  	.can_attach	= cpuset_can_attach,
  	.cancel_attach	= cpuset_cancel_attach,
  	.attach		= cpuset_attach,
5cf1cacb4   Tejun Heo   cgroup, cpuset: r...
2006
  	.post_attach	= cpuset_post_attach,
39bd0d15e   Li Zefan   cpuset: initializ...
2007
  	.bind		= cpuset_bind,
06f4e9489   Zefan Li   cpuset: make sure...
2008
  	.fork		= cpuset_fork,
5577964e6   Tejun Heo   cgroup: rename cg...
2009
  	.legacy_cftypes	= files,
b38e42e96   Tejun Heo   cgroup: convert c...
2010
  	.early_init	= true,
8793d854e   Paul Menage   Task Control Grou...
2011
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2012
2013
2014
2015
2016
2017
2018
2019
  /**
   * cpuset_init - initialize cpusets at system boot
   *
   * Description: Initialize top_cpuset and the cpuset internal file system,
   **/
  
  int __init cpuset_init(void)
  {
8793d854e   Paul Menage   Task Control Grou...
2020
  	int err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2021

58568d2a8   Miao Xie   cpuset,mm: update...
2022
2023
  	if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
  		BUG();
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
2024
2025
  	if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL))
  		BUG();
58568d2a8   Miao Xie   cpuset,mm: update...
2026

300ed6cbb   Li Zefan   cpuset: convert c...
2027
  	cpumask_setall(top_cpuset.cpus_allowed);
f9a86fcbb   Mike Travis   cpuset: modify cp...
2028
  	nodes_setall(top_cpuset.mems_allowed);
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
2029
2030
  	cpumask_setall(top_cpuset.effective_cpus);
  	nodes_setall(top_cpuset.effective_mems);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2031

3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2032
  	fmeter_init(&top_cpuset.fmeter);
029190c51   Paul Jackson   cpuset sched_load...
2033
  	set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
2034
  	top_cpuset.relax_domain_level = -1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2035

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2036
2037
  	err = register_filesystem(&cpuset_fs_type);
  	if (err < 0)
8793d854e   Paul Menage   Task Control Grou...
2038
  		return err;
2341d1b65   Li Zefan   cpuset: convert c...
2039
2040
  	if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
  		BUG();
8793d854e   Paul Menage   Task Control Grou...
2041
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2042
  }
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
2043
  /*
cf417141c   Max Krasnyansky   sched, cpuset: re...
2044
   * If CPU and/or memory hotplug handlers, below, unplug any CPUs
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
2045
2046
   * or memory nodes, we need to walk over the cpuset hierarchy,
   * removing that CPU or node from all cpusets.  If this removes the
956db3ca0   Cliff Wickman   hotplug cpu: move...
2047
2048
   * last CPU or node from a cpuset, then move the tasks in the empty
   * cpuset to its next-highest non-empty parent.
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
2049
   */
956db3ca0   Cliff Wickman   hotplug cpu: move...
2050
2051
2052
  static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
  {
  	struct cpuset *parent;
c8d9c90c7   Paul Jackson   hotplug cpu: move...
2053
  	/*
956db3ca0   Cliff Wickman   hotplug cpu: move...
2054
2055
2056
  	 * Find its next-highest non-empty parent, (top cpuset
  	 * has online cpus, so can't be empty).
  	 */
c431069fe   Tejun Heo   cpuset: remove cp...
2057
  	parent = parent_cs(cs);
300ed6cbb   Li Zefan   cpuset: convert c...
2058
  	while (cpumask_empty(parent->cpus_allowed) ||
b45012955   Paul Jackson   hotplug cpu move ...
2059
  			nodes_empty(parent->mems_allowed))
c431069fe   Tejun Heo   cpuset: remove cp...
2060
  		parent = parent_cs(parent);
956db3ca0   Cliff Wickman   hotplug cpu: move...
2061

8cc993452   Tejun Heo   cgroup, cpuset: r...
2062
  	if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
12d3089c1   Fabian Frederick   kernel/cpuset.c: ...
2063
  		pr_err("cpuset: failed to transfer tasks out of empty cpuset ");
e61734c55   Tejun Heo   cgroup: remove cg...
2064
2065
2066
  		pr_cont_cgroup_name(cs->css.cgroup);
  		pr_cont("
  ");
8cc993452   Tejun Heo   cgroup, cpuset: r...
2067
  	}
956db3ca0   Cliff Wickman   hotplug cpu: move...
2068
  }
be4c9dd7a   Li Zefan   cpuset: enable on...
2069
2070
2071
2072
  static void
  hotplug_update_tasks_legacy(struct cpuset *cs,
  			    struct cpumask *new_cpus, nodemask_t *new_mems,
  			    bool cpus_updated, bool mems_updated)
390a36aad   Li Zefan   cpuset: refactor ...
2073
2074
  {
  	bool is_empty;
8447a0fee   Vladimir Davydov   cpuset: convert c...
2075
  	spin_lock_irq(&callback_lock);
be4c9dd7a   Li Zefan   cpuset: enable on...
2076
2077
2078
2079
  	cpumask_copy(cs->cpus_allowed, new_cpus);
  	cpumask_copy(cs->effective_cpus, new_cpus);
  	cs->mems_allowed = *new_mems;
  	cs->effective_mems = *new_mems;
8447a0fee   Vladimir Davydov   cpuset: convert c...
2080
  	spin_unlock_irq(&callback_lock);
390a36aad   Li Zefan   cpuset: refactor ...
2081
2082
2083
2084
2085
  
  	/*
  	 * Don't call update_tasks_cpumask() if the cpuset becomes empty,
  	 * as the tasks will be migratecd to an ancestor.
  	 */
be4c9dd7a   Li Zefan   cpuset: enable on...
2086
  	if (cpus_updated && !cpumask_empty(cs->cpus_allowed))
390a36aad   Li Zefan   cpuset: refactor ...
2087
  		update_tasks_cpumask(cs);
be4c9dd7a   Li Zefan   cpuset: enable on...
2088
  	if (mems_updated && !nodes_empty(cs->mems_allowed))
390a36aad   Li Zefan   cpuset: refactor ...
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
  		update_tasks_nodemask(cs);
  
  	is_empty = cpumask_empty(cs->cpus_allowed) ||
  		   nodes_empty(cs->mems_allowed);
  
  	mutex_unlock(&cpuset_mutex);
  
  	/*
  	 * Move tasks to the nearest ancestor with execution resources,
  	 * This is full cgroup operation which will also call back into
  	 * cpuset. Should be done outside any lock.
  	 */
  	if (is_empty)
  		remove_tasks_in_empty_cpuset(cs);
  
  	mutex_lock(&cpuset_mutex);
  }
be4c9dd7a   Li Zefan   cpuset: enable on...
2106
2107
2108
2109
  static void
  hotplug_update_tasks(struct cpuset *cs,
  		     struct cpumask *new_cpus, nodemask_t *new_mems,
  		     bool cpus_updated, bool mems_updated)
390a36aad   Li Zefan   cpuset: refactor ...
2110
  {
be4c9dd7a   Li Zefan   cpuset: enable on...
2111
2112
2113
2114
  	if (cpumask_empty(new_cpus))
  		cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus);
  	if (nodes_empty(*new_mems))
  		*new_mems = parent_cs(cs)->effective_mems;
8447a0fee   Vladimir Davydov   cpuset: convert c...
2115
  	spin_lock_irq(&callback_lock);
be4c9dd7a   Li Zefan   cpuset: enable on...
2116
2117
  	cpumask_copy(cs->effective_cpus, new_cpus);
  	cs->effective_mems = *new_mems;
8447a0fee   Vladimir Davydov   cpuset: convert c...
2118
  	spin_unlock_irq(&callback_lock);
390a36aad   Li Zefan   cpuset: refactor ...
2119

be4c9dd7a   Li Zefan   cpuset: enable on...
2120
  	if (cpus_updated)
390a36aad   Li Zefan   cpuset: refactor ...
2121
  		update_tasks_cpumask(cs);
be4c9dd7a   Li Zefan   cpuset: enable on...
2122
  	if (mems_updated)
390a36aad   Li Zefan   cpuset: refactor ...
2123
2124
  		update_tasks_nodemask(cs);
  }
deb7aa308   Tejun Heo   cpuset: reorganiz...
2125
  /**
388afd854   Li Zefan   cpuset: remove as...
2126
   * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug
deb7aa308   Tejun Heo   cpuset: reorganiz...
2127
   * @cs: cpuset in interest
956db3ca0   Cliff Wickman   hotplug cpu: move...
2128
   *
deb7aa308   Tejun Heo   cpuset: reorganiz...
2129
2130
2131
   * Compare @cs's cpu and mem masks against top_cpuset and if some have gone
   * offline, update @cs accordingly.  If @cs ends up with no CPU or memory,
   * all its tasks are moved to the nearest ancestor with both resources.
80d1fa646   Srivatsa S. Bhat   cpusets, hotplug:...
2132
   */
388afd854   Li Zefan   cpuset: remove as...
2133
  static void cpuset_hotplug_update_tasks(struct cpuset *cs)
80d1fa646   Srivatsa S. Bhat   cpusets, hotplug:...
2134
  {
be4c9dd7a   Li Zefan   cpuset: enable on...
2135
2136
2137
2138
  	static cpumask_t new_cpus;
  	static nodemask_t new_mems;
  	bool cpus_updated;
  	bool mems_updated;
e44193d39   Li Zefan   cpuset: let hotpl...
2139
2140
  retry:
  	wait_event(cpuset_attach_wq, cs->attach_in_progress == 0);
80d1fa646   Srivatsa S. Bhat   cpusets, hotplug:...
2141

5d21cc2db   Tejun Heo   cpuset: replace c...
2142
  	mutex_lock(&cpuset_mutex);
7ddf96b02   Srivatsa S. Bhat   cpusets, hotplug:...
2143

e44193d39   Li Zefan   cpuset: let hotpl...
2144
2145
2146
2147
2148
2149
2150
2151
  	/*
  	 * We have raced with task attaching. We wait until attaching
  	 * is finished, so we won't attach a task to an empty cpuset.
  	 */
  	if (cs->attach_in_progress) {
  		mutex_unlock(&cpuset_mutex);
  		goto retry;
  	}
be4c9dd7a   Li Zefan   cpuset: enable on...
2152
2153
  	cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus);
  	nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems);
80d1fa646   Srivatsa S. Bhat   cpusets, hotplug:...
2154

be4c9dd7a   Li Zefan   cpuset: enable on...
2155
2156
  	cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
  	mems_updated = !nodes_equal(new_mems, cs->effective_mems);
deb7aa308   Tejun Heo   cpuset: reorganiz...
2157

9e10a130d   Tejun Heo   cgroup: replace c...
2158
  	if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
be4c9dd7a   Li Zefan   cpuset: enable on...
2159
2160
  		hotplug_update_tasks(cs, &new_cpus, &new_mems,
  				     cpus_updated, mems_updated);
390a36aad   Li Zefan   cpuset: refactor ...
2161
  	else
be4c9dd7a   Li Zefan   cpuset: enable on...
2162
2163
  		hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems,
  					    cpus_updated, mems_updated);
8d0339487   Tejun Heo   cpuset: make CPU ...
2164

5d21cc2db   Tejun Heo   cpuset: replace c...
2165
  	mutex_unlock(&cpuset_mutex);
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
2166
  }
deb7aa308   Tejun Heo   cpuset: reorganiz...
2167
  /**
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
2168
   * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset
956db3ca0   Cliff Wickman   hotplug cpu: move...
2169
   *
deb7aa308   Tejun Heo   cpuset: reorganiz...
2170
2171
2172
2173
2174
   * This function is called after either CPU or memory configuration has
   * changed and updates cpuset accordingly.  The top_cpuset is always
   * synchronized to cpu_active_mask and N_MEMORY, which is necessary in
   * order to make cpusets transparent (of no affect) on systems that are
   * actively using CPU hotplug but making no active use of cpusets.
956db3ca0   Cliff Wickman   hotplug cpu: move...
2175
   *
deb7aa308   Tejun Heo   cpuset: reorganiz...
2176
   * Non-root cpusets are only affected by offlining.  If any CPUs or memory
388afd854   Li Zefan   cpuset: remove as...
2177
2178
   * nodes have been taken down, cpuset_hotplug_update_tasks() is invoked on
   * all descendants.
956db3ca0   Cliff Wickman   hotplug cpu: move...
2179
   *
deb7aa308   Tejun Heo   cpuset: reorganiz...
2180
2181
   * Note that CPU offlining during suspend is ignored.  We don't modify
   * cpusets across suspend/resume cycles at all.
956db3ca0   Cliff Wickman   hotplug cpu: move...
2182
   */
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
2183
  static void cpuset_hotplug_workfn(struct work_struct *work)
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
2184
  {
5c5cc6232   Li Zefan   cpuset: allow to ...
2185
2186
  	static cpumask_t new_cpus;
  	static nodemask_t new_mems;
deb7aa308   Tejun Heo   cpuset: reorganiz...
2187
  	bool cpus_updated, mems_updated;
9e10a130d   Tejun Heo   cgroup: replace c...
2188
  	bool on_dfl = cgroup_subsys_on_dfl(cpuset_cgrp_subsys);
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
2189

5d21cc2db   Tejun Heo   cpuset: replace c...
2190
  	mutex_lock(&cpuset_mutex);
956db3ca0   Cliff Wickman   hotplug cpu: move...
2191

deb7aa308   Tejun Heo   cpuset: reorganiz...
2192
2193
2194
  	/* fetch the available cpus/mems and find out which changed how */
  	cpumask_copy(&new_cpus, cpu_active_mask);
  	new_mems = node_states[N_MEMORY];
7ddf96b02   Srivatsa S. Bhat   cpusets, hotplug:...
2195

7e88291be   Li Zefan   cpuset: make cs->...
2196
2197
  	cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus);
  	mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems);
7ddf96b02   Srivatsa S. Bhat   cpusets, hotplug:...
2198

deb7aa308   Tejun Heo   cpuset: reorganiz...
2199
2200
  	/* synchronize cpus_allowed to cpu_active_mask */
  	if (cpus_updated) {
8447a0fee   Vladimir Davydov   cpuset: convert c...
2201
  		spin_lock_irq(&callback_lock);
7e88291be   Li Zefan   cpuset: make cs->...
2202
2203
  		if (!on_dfl)
  			cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
1344ab9c2   Li Zefan   cpuset: update cp...
2204
  		cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
8447a0fee   Vladimir Davydov   cpuset: convert c...
2205
  		spin_unlock_irq(&callback_lock);
deb7aa308   Tejun Heo   cpuset: reorganiz...
2206
2207
  		/* we don't mess with cpumasks of tasks in top_cpuset */
  	}
b45012955   Paul Jackson   hotplug cpu move ...
2208

deb7aa308   Tejun Heo   cpuset: reorganiz...
2209
2210
  	/* synchronize mems_allowed to N_MEMORY */
  	if (mems_updated) {
8447a0fee   Vladimir Davydov   cpuset: convert c...
2211
  		spin_lock_irq(&callback_lock);
7e88291be   Li Zefan   cpuset: make cs->...
2212
2213
  		if (!on_dfl)
  			top_cpuset.mems_allowed = new_mems;
1344ab9c2   Li Zefan   cpuset: update cp...
2214
  		top_cpuset.effective_mems = new_mems;
8447a0fee   Vladimir Davydov   cpuset: convert c...
2215
  		spin_unlock_irq(&callback_lock);
d66393e54   Tejun Heo   cpuset: use css_t...
2216
  		update_tasks_nodemask(&top_cpuset);
deb7aa308   Tejun Heo   cpuset: reorganiz...
2217
  	}
b45012955   Paul Jackson   hotplug cpu move ...
2218

388afd854   Li Zefan   cpuset: remove as...
2219
  	mutex_unlock(&cpuset_mutex);
5c5cc6232   Li Zefan   cpuset: allow to ...
2220
2221
  	/* if cpus or mems changed, we need to propagate to descendants */
  	if (cpus_updated || mems_updated) {
deb7aa308   Tejun Heo   cpuset: reorganiz...
2222
  		struct cpuset *cs;
492eb21b9   Tejun Heo   cgroup: make hier...
2223
  		struct cgroup_subsys_state *pos_css;
f9b4fb8da   Miao Xie   cpusets: update t...
2224

fc560a26a   Tejun Heo   cpuset: replace c...
2225
  		rcu_read_lock();
492eb21b9   Tejun Heo   cgroup: make hier...
2226
  		cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
ec903c0c8   Tejun Heo   cgroup: rename cs...
2227
  			if (cs == &top_cpuset || !css_tryget_online(&cs->css))
388afd854   Li Zefan   cpuset: remove as...
2228
2229
  				continue;
  			rcu_read_unlock();
7ddf96b02   Srivatsa S. Bhat   cpusets, hotplug:...
2230

388afd854   Li Zefan   cpuset: remove as...
2231
  			cpuset_hotplug_update_tasks(cs);
b45012955   Paul Jackson   hotplug cpu move ...
2232

388afd854   Li Zefan   cpuset: remove as...
2233
2234
2235
2236
2237
  			rcu_read_lock();
  			css_put(&cs->css);
  		}
  		rcu_read_unlock();
  	}
8d0339487   Tejun Heo   cpuset: make CPU ...
2238

deb7aa308   Tejun Heo   cpuset: reorganiz...
2239
  	/* rebuild sched domains if cpus_allowed has changed */
e0e80a02e   Li Zhong   cpuset: use rebui...
2240
2241
  	if (cpus_updated)
  		rebuild_sched_domains();
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
2242
  }
7ddf96b02   Srivatsa S. Bhat   cpusets, hotplug:...
2243
  void cpuset_update_active_cpus(bool cpu_online)
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
2244
  {
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
  	/*
  	 * We're inside cpu hotplug critical region which usually nests
  	 * inside cgroup synchronization.  Bounce actual hotplug processing
  	 * to a work item to avoid reverse locking order.
  	 *
  	 * We still need to do partition_sched_domains() synchronously;
  	 * otherwise, the scheduler will get confused and put tasks to the
  	 * dead CPU.  Fall back to the default single domain.
  	 * cpuset_hotplug_workfn() will rebuild it as necessary.
  	 */
  	partition_sched_domains(1, NULL, NULL);
  	schedule_work(&cpuset_hotplug_work);
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
2257
  }
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
2258

38837fc75   Paul Jackson   [PATCH] cpuset: t...
2259
  /*
38d7bee9d   Lai Jiangshan   cpuset: use N_MEM...
2260
2261
   * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY].
   * Call this routine anytime after node_states[N_MEMORY] changes.
a1cd2b13f   Srivatsa S. Bhat   cpusets: Remove/u...
2262
   * See cpuset_update_active_cpus() for CPU hotplug handling.
38837fc75   Paul Jackson   [PATCH] cpuset: t...
2263
   */
f481891fd   Miao Xie   cpuset: update to...
2264
2265
  static int cpuset_track_online_nodes(struct notifier_block *self,
  				unsigned long action, void *arg)
38837fc75   Paul Jackson   [PATCH] cpuset: t...
2266
  {
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
2267
  	schedule_work(&cpuset_hotplug_work);
f481891fd   Miao Xie   cpuset: update to...
2268
  	return NOTIFY_OK;
38837fc75   Paul Jackson   [PATCH] cpuset: t...
2269
  }
d8f10cb3d   Andrew Morton   kernel/cpuset.c: ...
2270
2271
2272
2273
2274
  
  static struct notifier_block cpuset_track_online_nodes_nb = {
  	.notifier_call = cpuset_track_online_nodes,
  	.priority = 10,		/* ??! */
  };
38837fc75   Paul Jackson   [PATCH] cpuset: t...
2275

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2276
2277
2278
2279
  /**
   * cpuset_init_smp - initialize cpus_allowed
   *
   * Description: Finish top cpuset after cpu, node maps are initialized
d8f10cb3d   Andrew Morton   kernel/cpuset.c: ...
2280
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2281
2282
  void __init cpuset_init_smp(void)
  {
6ad4c1888   Peter Zijlstra   sched: Fix balanc...
2283
  	cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
38d7bee9d   Lai Jiangshan   cpuset: use N_MEM...
2284
  	top_cpuset.mems_allowed = node_states[N_MEMORY];
33ad801df   Li Zefan   cpuset: record ol...
2285
  	top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
2286

e2b9a3d7d   Li Zefan   cpuset: add cs->e...
2287
2288
  	cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
  	top_cpuset.effective_mems = node_states[N_MEMORY];
d8f10cb3d   Andrew Morton   kernel/cpuset.c: ...
2289
  	register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
e93ad19d0   Tejun Heo   cpuset: make mm m...
2290
2291
2292
  
  	cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
  	BUG_ON(!cpuset_migrate_mm_wq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2293
2294
2295
  }
  
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2296
2297
   * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
   * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
6af866af3   Li Zefan   cpuset: remove re...
2298
   * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2299
   *
300ed6cbb   Li Zefan   cpuset: convert c...
2300
   * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2301
   * attached to the specified @tsk.  Guaranteed to return some non-empty
5f054e31c   Rusty Russell   documentation: re...
2302
   * subset of cpu_online_mask, even if this means going outside the
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2303
2304
   * tasks cpuset.
   **/
6af866af3   Li Zefan   cpuset: remove re...
2305
  void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2306
  {
8447a0fee   Vladimir Davydov   cpuset: convert c...
2307
2308
2309
  	unsigned long flags;
  
  	spin_lock_irqsave(&callback_lock, flags);
b8dadcb58   Li Zefan   cpuset: use rcu_r...
2310
  	rcu_read_lock();
ae1c80238   Li Zefan   cpuset: apply cs-...
2311
  	guarantee_online_cpus(task_cs(tsk), pmask);
b8dadcb58   Li Zefan   cpuset: use rcu_r...
2312
  	rcu_read_unlock();
8447a0fee   Vladimir Davydov   cpuset: convert c...
2313
  	spin_unlock_irqrestore(&callback_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2314
  }
2baab4e90   Peter Zijlstra   sched: Fix select...
2315
  void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
9084bb824   Oleg Nesterov   sched: Make selec...
2316
  {
9084bb824   Oleg Nesterov   sched: Make selec...
2317
  	rcu_read_lock();
ae1c80238   Li Zefan   cpuset: apply cs-...
2318
  	do_set_cpus_allowed(tsk, task_cs(tsk)->effective_cpus);
9084bb824   Oleg Nesterov   sched: Make selec...
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
  	rcu_read_unlock();
  
  	/*
  	 * We own tsk->cpus_allowed, nobody can change it under us.
  	 *
  	 * But we used cs && cs->cpus_allowed lockless and thus can
  	 * race with cgroup_attach_task() or update_cpumask() and get
  	 * the wrong tsk->cpus_allowed. However, both cases imply the
  	 * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
  	 * which takes task_rq_lock().
  	 *
  	 * If we are called after it dropped the lock we must see all
  	 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
  	 * set any mask even if it is not right from task_cs() pov,
  	 * the pending set_cpus_allowed_ptr() will fix things.
2baab4e90   Peter Zijlstra   sched: Fix select...
2334
2335
2336
  	 *
  	 * select_fallback_rq() will fix things ups and set cpu_possible_mask
  	 * if required.
9084bb824   Oleg Nesterov   sched: Make selec...
2337
  	 */
9084bb824   Oleg Nesterov   sched: Make selec...
2338
  }
8f4ab07f4   Rasmus Villemoes   kernel/cpuset.c: ...
2339
  void __init cpuset_init_current_mems_allowed(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2340
  {
f9a86fcbb   Mike Travis   cpuset: modify cp...
2341
  	nodes_setall(current->mems_allowed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2342
  }
d9fd8a6d4   Randy Dunlap   [PATCH] kernel/cp...
2343
  /**
909d75a3b   Paul Jackson   [PATCH] cpuset: i...
2344
2345
2346
2347
2348
   * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset.
   * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed.
   *
   * Description: Returns the nodemask_t mems_allowed of the cpuset
   * attached to the specified @tsk.  Guaranteed to return some non-empty
38d7bee9d   Lai Jiangshan   cpuset: use N_MEM...
2349
   * subset of node_states[N_MEMORY], even if this means going outside the
909d75a3b   Paul Jackson   [PATCH] cpuset: i...
2350
2351
2352
2353
2354
2355
   * tasks cpuset.
   **/
  
  nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
  {
  	nodemask_t mask;
8447a0fee   Vladimir Davydov   cpuset: convert c...
2356
  	unsigned long flags;
909d75a3b   Paul Jackson   [PATCH] cpuset: i...
2357

8447a0fee   Vladimir Davydov   cpuset: convert c...
2358
  	spin_lock_irqsave(&callback_lock, flags);
b8dadcb58   Li Zefan   cpuset: use rcu_r...
2359
  	rcu_read_lock();
ae1c80238   Li Zefan   cpuset: apply cs-...
2360
  	guarantee_online_mems(task_cs(tsk), &mask);
b8dadcb58   Li Zefan   cpuset: use rcu_r...
2361
  	rcu_read_unlock();
8447a0fee   Vladimir Davydov   cpuset: convert c...
2362
  	spin_unlock_irqrestore(&callback_lock, flags);
909d75a3b   Paul Jackson   [PATCH] cpuset: i...
2363
2364
2365
2366
2367
  
  	return mask;
  }
  
  /**
19770b326   Mel Gorman   mm: filter based ...
2368
2369
   * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed
   * @nodemask: the nodemask to be checked
d9fd8a6d4   Randy Dunlap   [PATCH] kernel/cp...
2370
   *
19770b326   Mel Gorman   mm: filter based ...
2371
   * Are any of the nodes in the nodemask allowed in current->mems_allowed?
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2372
   */
19770b326   Mel Gorman   mm: filter based ...
2373
  int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2374
  {
19770b326   Mel Gorman   mm: filter based ...
2375
  	return nodes_intersects(*nodemask, current->mems_allowed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2376
  }
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2377
  /*
786083667   Paul Menage   Cpuset hardwall f...
2378
2379
   * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
   * mem_hardwall ancestor to the specified cpuset.  Call holding
8447a0fee   Vladimir Davydov   cpuset: convert c...
2380
   * callback_lock.  If no ancestor is mem_exclusive or mem_hardwall
786083667   Paul Menage   Cpuset hardwall f...
2381
   * (an unusual configuration), then returns the root cpuset.
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2382
   */
c9710d801   Tejun Heo   cpuset: drop "con...
2383
  static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2384
  {
c431069fe   Tejun Heo   cpuset: remove cp...
2385
2386
  	while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && parent_cs(cs))
  		cs = parent_cs(cs);
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2387
2388
  	return cs;
  }
d9fd8a6d4   Randy Dunlap   [PATCH] kernel/cp...
2389
  /**
344736f29   Vladimir Davydov   cpuset: simplify ...
2390
   * cpuset_node_allowed - Can we allocate on a memory node?
a1bc5a4ee   David Rientjes   cpusets: replace ...
2391
   * @node: is this an allowed node?
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2392
   * @gfp_mask: memory allocation flags
d9fd8a6d4   Randy Dunlap   [PATCH] kernel/cp...
2393
   *
6e276d2a5   David Rientjes   kernel, cpuset: r...
2394
2395
2396
2397
   * If we're in interrupt, yes, we can always allocate.  If @node is set in
   * current's mems_allowed, yes.  If it's not a __GFP_HARDWALL request and this
   * node is set in the nearest hardwalled cpuset ancestor to current's cpuset,
   * yes.  If current has access to memory reserves due to TIF_MEMDIE, yes.
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2398
2399
2400
   * Otherwise, no.
   *
   * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
c596d9f32   David Rientjes   cpusets: allow TI...
2401
2402
   * and do not allow allocations outside the current tasks cpuset
   * unless the task has been OOM killed as is marked TIF_MEMDIE.
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2403
   * GFP_KERNEL allocations are not so marked, so can escape to the
786083667   Paul Menage   Cpuset hardwall f...
2404
   * nearest enclosing hardwalled ancestor cpuset.
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2405
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
2406
   * Scanning up parent cpusets requires callback_lock.  The
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2407
2408
2409
2410
   * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
   * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
   * current tasks mems_allowed came up empty on the first pass over
   * the zonelist.  So only GFP_KERNEL allocations, if all nodes in the
8447a0fee   Vladimir Davydov   cpuset: convert c...
2411
   * cpuset are short of memory, might require taking the callback_lock.
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2412
   *
36be57ffe   Paul Jackson   [PATCH] cpuset: u...
2413
   * The first call here from mm/page_alloc:get_page_from_freelist()
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2414
2415
2416
   * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
   * so no allocation on a node outside the cpuset is allowed (unless
   * in interrupt, of course).
36be57ffe   Paul Jackson   [PATCH] cpuset: u...
2417
2418
2419
2420
2421
2422
   *
   * The second pass through get_page_from_freelist() doesn't even call
   * here for GFP_ATOMIC calls.  For those calls, the __alloc_pages()
   * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set
   * in alloc_flags.  That logic and the checks below have the combined
   * affect that:
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2423
2424
   *	in_interrupt - any node ok (current task context irrelevant)
   *	GFP_ATOMIC   - any node ok
c596d9f32   David Rientjes   cpusets: allow TI...
2425
   *	TIF_MEMDIE   - any node ok
786083667   Paul Menage   Cpuset hardwall f...
2426
   *	GFP_KERNEL   - any node in enclosing hardwalled cpuset ok
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2427
   *	GFP_USER     - only nodes in current tasks mems allowed ok.
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
2428
   */
002f29062   Vlastimil Babka   cpuset: use stati...
2429
  bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2430
  {
c9710d801   Tejun Heo   cpuset: drop "con...
2431
  	struct cpuset *cs;		/* current cpuset ancestors */
29afd49b7   Paul Jackson   [PATCH] cpuset: r...
2432
  	int allowed;			/* is allocation in zone z allowed? */
8447a0fee   Vladimir Davydov   cpuset: convert c...
2433
  	unsigned long flags;
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2434

6e276d2a5   David Rientjes   kernel, cpuset: r...
2435
  	if (in_interrupt())
002f29062   Vlastimil Babka   cpuset: use stati...
2436
  		return true;
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2437
  	if (node_isset(node, current->mems_allowed))
002f29062   Vlastimil Babka   cpuset: use stati...
2438
  		return true;
c596d9f32   David Rientjes   cpusets: allow TI...
2439
2440
2441
2442
2443
  	/*
  	 * Allow tasks that have access to memory reserves because they have
  	 * been OOM killed to get memory anywhere.
  	 */
  	if (unlikely(test_thread_flag(TIF_MEMDIE)))
002f29062   Vlastimil Babka   cpuset: use stati...
2444
  		return true;
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2445
  	if (gfp_mask & __GFP_HARDWALL)	/* If hardwall request, stop here */
002f29062   Vlastimil Babka   cpuset: use stati...
2446
  		return false;
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2447

5563e7707   Bob Picco   [PATCH] cpuset: f...
2448
  	if (current->flags & PF_EXITING) /* Let dying task have memory */
002f29062   Vlastimil Babka   cpuset: use stati...
2449
  		return true;
5563e7707   Bob Picco   [PATCH] cpuset: f...
2450

9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2451
  	/* Not hardwall and node outside mems_allowed: scan up cpusets */
8447a0fee   Vladimir Davydov   cpuset: convert c...
2452
  	spin_lock_irqsave(&callback_lock, flags);
053199edf   Paul Jackson   [PATCH] cpusets: ...
2453

b8dadcb58   Li Zefan   cpuset: use rcu_r...
2454
  	rcu_read_lock();
786083667   Paul Menage   Cpuset hardwall f...
2455
  	cs = nearest_hardwall_ancestor(task_cs(current));
99afb0fd5   Li Zefan   cpuset: fix a rac...
2456
  	allowed = node_isset(node, cs->mems_allowed);
b8dadcb58   Li Zefan   cpuset: use rcu_r...
2457
  	rcu_read_unlock();
053199edf   Paul Jackson   [PATCH] cpusets: ...
2458

8447a0fee   Vladimir Davydov   cpuset: convert c...
2459
  	spin_unlock_irqrestore(&callback_lock, flags);
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
2460
  	return allowed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2461
  }
ef08e3b49   Paul Jackson   [PATCH] cpusets: ...
2462
  /**
6adef3ebe   Jack Steiner   cpusets: new roun...
2463
2464
   * cpuset_mem_spread_node() - On which node to begin search for a file page
   * cpuset_slab_spread_node() - On which node to begin search for a slab page
825a46af5   Paul Jackson   [PATCH] cpuset me...
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
   *
   * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for
   * tasks in a cpuset with is_spread_page or is_spread_slab set),
   * and if the memory allocation used cpuset_mem_spread_node()
   * to determine on which node to start looking, as it will for
   * certain page cache or slab cache pages such as used for file
   * system buffers and inode caches, then instead of starting on the
   * local node to look for a free page, rather spread the starting
   * node around the tasks mems_allowed nodes.
   *
   * We don't have to worry about the returned node being offline
   * because "it can't happen", and even if it did, it would be ok.
   *
   * The routines calling guarantee_online_mems() are careful to
   * only set nodes in task->mems_allowed that are online.  So it
   * should not be possible for the following code to return an
   * offline node.  But if it did, that would be ok, as this routine
   * is not returning the node where the allocation must be, only
   * the node where the search should start.  The zonelist passed to
   * __alloc_pages() will include all nodes.  If the slab allocator
   * is passed an offline node, it will fall back to the local node.
   * See kmem_cache_alloc_node().
   */
6adef3ebe   Jack Steiner   cpusets: new roun...
2488
  static int cpuset_spread_node(int *rotor)
825a46af5   Paul Jackson   [PATCH] cpuset me...
2489
  {
0edaf86cf   Andrew Morton   include/linux/nod...
2490
  	return *rotor = next_node_in(*rotor, current->mems_allowed);
825a46af5   Paul Jackson   [PATCH] cpuset me...
2491
  }
6adef3ebe   Jack Steiner   cpusets: new roun...
2492
2493
2494
  
  int cpuset_mem_spread_node(void)
  {
778d3b0ff   Michal Hocko   cpusets: randomiz...
2495
2496
2497
  	if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE)
  		current->cpuset_mem_spread_rotor =
  			node_random(&current->mems_allowed);
6adef3ebe   Jack Steiner   cpusets: new roun...
2498
2499
2500
2501
2502
  	return cpuset_spread_node(&current->cpuset_mem_spread_rotor);
  }
  
  int cpuset_slab_spread_node(void)
  {
778d3b0ff   Michal Hocko   cpusets: randomiz...
2503
2504
2505
  	if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE)
  		current->cpuset_slab_spread_rotor =
  			node_random(&current->mems_allowed);
6adef3ebe   Jack Steiner   cpusets: new roun...
2506
2507
  	return cpuset_spread_node(&current->cpuset_slab_spread_rotor);
  }
825a46af5   Paul Jackson   [PATCH] cpuset me...
2508
2509
2510
  EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
  
  /**
bbe373f2c   David Rientjes   oom: compare cpus...
2511
2512
2513
2514
2515
2516
2517
2518
   * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's?
   * @tsk1: pointer to task_struct of some task.
   * @tsk2: pointer to task_struct of some other task.
   *
   * Description: Return true if @tsk1's mems_allowed intersects the
   * mems_allowed of @tsk2.  Used by the OOM killer to determine if
   * one of the task's memory usage might impact the memory available
   * to the other.
ef08e3b49   Paul Jackson   [PATCH] cpusets: ...
2519
   **/
bbe373f2c   David Rientjes   oom: compare cpus...
2520
2521
  int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
  				   const struct task_struct *tsk2)
ef08e3b49   Paul Jackson   [PATCH] cpusets: ...
2522
  {
bbe373f2c   David Rientjes   oom: compare cpus...
2523
  	return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
ef08e3b49   Paul Jackson   [PATCH] cpusets: ...
2524
  }
75aa19941   David Rientjes   oom: print trigge...
2525
  /**
da39da3a5   David Rientjes   mm, oom: remove t...
2526
   * cpuset_print_current_mems_allowed - prints current's cpuset and mems_allowed
75aa19941   David Rientjes   oom: print trigge...
2527
   *
da39da3a5   David Rientjes   mm, oom: remove t...
2528
   * Description: Prints current's name, cpuset name, and cached copy of its
b8dadcb58   Li Zefan   cpuset: use rcu_r...
2529
   * mems_allowed to the kernel log.
75aa19941   David Rientjes   oom: print trigge...
2530
   */
da39da3a5   David Rientjes   mm, oom: remove t...
2531
  void cpuset_print_current_mems_allowed(void)
75aa19941   David Rientjes   oom: print trigge...
2532
  {
b8dadcb58   Li Zefan   cpuset: use rcu_r...
2533
  	struct cgroup *cgrp;
75aa19941   David Rientjes   oom: print trigge...
2534

b8dadcb58   Li Zefan   cpuset: use rcu_r...
2535
  	rcu_read_lock();
63f43f55c   Li Zefan   cpuset: fix cpuse...
2536

da39da3a5   David Rientjes   mm, oom: remove t...
2537
2538
  	cgrp = task_cs(current)->css.cgroup;
  	pr_info("%s cpuset=", current->comm);
e61734c55   Tejun Heo   cgroup: remove cg...
2539
  	pr_cont_cgroup_name(cgrp);
da39da3a5   David Rientjes   mm, oom: remove t...
2540
2541
2542
  	pr_cont(" mems_allowed=%*pbl
  ",
  		nodemask_pr_args(&current->mems_allowed));
f440d98f8   Li Zefan   cpuset: use cgrou...
2543

cfb5966be   Li Zefan   cpuset: fix RCU l...
2544
  	rcu_read_unlock();
75aa19941   David Rientjes   oom: print trigge...
2545
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2546
  /*
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2547
2548
2549
2550
   * Collection of memory_pressure is suppressed unless
   * this flag is enabled by writing "1" to the special
   * cpuset file 'memory_pressure_enabled' in the root cpuset.
   */
c5b2aff89   Paul Jackson   [PATCH] cpuset: m...
2551
  int cpuset_memory_pressure_enabled __read_mostly;
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
  
  /**
   * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
   *
   * Keep a running average of the rate of synchronous (direct)
   * page reclaim efforts initiated by tasks in each cpuset.
   *
   * This represents the rate at which some task in the cpuset
   * ran low on memory on all nodes it was allowed to use, and
   * had to enter the kernels page reclaim code in an effort to
   * create more free memory by tossing clean pages or swapping
   * or writing dirty pages.
   *
   * Display to user space in the per-cpuset read-only file
   * "memory_pressure".  Value displayed is an integer
   * representing the recent rate of entry into the synchronous
   * (direct) page reclaim by any task attached to the cpuset.
   **/
  
  void __cpuset_memory_pressure_bump(void)
  {
b8dadcb58   Li Zefan   cpuset: use rcu_r...
2573
  	rcu_read_lock();
8793d854e   Paul Menage   Task Control Grou...
2574
  	fmeter_markevent(&task_cs(current)->fmeter);
b8dadcb58   Li Zefan   cpuset: use rcu_r...
2575
  	rcu_read_unlock();
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2576
  }
8793d854e   Paul Menage   Task Control Grou...
2577
  #ifdef CONFIG_PROC_PID_CPUSET
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2578
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2579
2580
2581
   * proc_cpuset_show()
   *  - Print tasks cpuset path into seq_file.
   *  - Used for /proc/<pid>/cpuset.
053199edf   Paul Jackson   [PATCH] cpusets: ...
2582
2583
   *  - No need to task_lock(tsk) on this tsk->cpuset reference, as it
   *    doesn't really matter if tsk->cpuset changes after we read it,
5d21cc2db   Tejun Heo   cpuset: replace c...
2584
   *    and we take cpuset_mutex, keeping cpuset_attach() from changing it
2df167a30   Paul Menage   cgroups: update c...
2585
   *    anyway.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2586
   */
52de4779f   Zefan Li   cpuset: simplify ...
2587
2588
  int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
  		     struct pid *pid, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2589
  {
4c737b41d   Tejun Heo   cgroup: make cgro...
2590
  	char *buf;
8793d854e   Paul Menage   Task Control Grou...
2591
  	struct cgroup_subsys_state *css;
99f895518   Eric W. Biederman   [PATCH] proc: don...
2592
  	int retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2593

99f895518   Eric W. Biederman   [PATCH] proc: don...
2594
  	retval = -ENOMEM;
e61734c55   Tejun Heo   cgroup: remove cg...
2595
  	buf = kmalloc(PATH_MAX, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2596
  	if (!buf)
99f895518   Eric W. Biederman   [PATCH] proc: don...
2597
  		goto out;
a79a908fd   Aditya Kali   cgroup: introduce...
2598
  	css = task_get_css(tsk, cpuset_cgrp_id);
4c737b41d   Tejun Heo   cgroup: make cgro...
2599
2600
  	retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
  				current->nsproxy->cgroup_ns);
a79a908fd   Aditya Kali   cgroup: introduce...
2601
  	css_put(css);
4c737b41d   Tejun Heo   cgroup: make cgro...
2602
  	if (retval >= PATH_MAX)
679a5e3f1   Tejun Heo   cpuset: fix error...
2603
2604
  		retval = -ENAMETOOLONG;
  	if (retval < 0)
52de4779f   Zefan Li   cpuset: simplify ...
2605
  		goto out_free;
4c737b41d   Tejun Heo   cgroup: make cgro...
2606
  	seq_puts(m, buf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2607
2608
  	seq_putc(m, '
  ');
e61734c55   Tejun Heo   cgroup: remove cg...
2609
  	retval = 0;
99f895518   Eric W. Biederman   [PATCH] proc: don...
2610
  out_free:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2611
  	kfree(buf);
99f895518   Eric W. Biederman   [PATCH] proc: don...
2612
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2613
2614
  	return retval;
  }
8793d854e   Paul Menage   Task Control Grou...
2615
  #endif /* CONFIG_PROC_PID_CPUSET */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2616

d01d48278   Heiko Carstens   sched: Always sho...
2617
  /* Display task mems_allowed in /proc/<pid>/status file. */
df5f8314c   Eric W. Biederman   proc: seqfile con...
2618
2619
  void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
  {
e8e6d97c9   Tejun Heo   cpuset: use %*pb[...
2620
2621
2622
2623
2624
2625
  	seq_printf(m, "Mems_allowed:\t%*pb
  ",
  		   nodemask_pr_args(&task->mems_allowed));
  	seq_printf(m, "Mems_allowed_list:\t%*pbl
  ",
  		   nodemask_pr_args(&task->mems_allowed));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2626
  }