Blame view

kernel/cgroup/cpuset.c 101 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
  /*
   *  kernel/cpuset.c
   *
   *  Processor and Memory placement constraints for sets of tasks.
   *
   *  Copyright (C) 2003 BULL SA.
029190c51   Paul Jackson   cpuset sched_load...
7
   *  Copyright (C) 2004-2007 Silicon Graphics, Inc.
8793d854e   Paul Menage   Task Control Grou...
8
   *  Copyright (C) 2006 Google, Inc
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
11
   *
   *  Portions derived from Patrick Mochel's sysfs code.
   *  sysfs is Copyright (c) 2001-3 Patrick Mochel
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
   *
825a46af5   Paul Jackson   [PATCH] cpuset me...
13
   *  2003-10-10 Written by Simon Derr.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
   *  2003-10-22 Updates by Stephen Hemminger.
825a46af5   Paul Jackson   [PATCH] cpuset me...
15
   *  2004 May-July Rework by Paul Jackson.
8793d854e   Paul Menage   Task Control Grou...
16
   *  2006 Rework by Paul Menage to use generic cgroups
cf417141c   Max Krasnyansky   sched, cpuset: re...
17
18
   *  2008 Rework of the scheduler domains and CPU hotplug handling
   *       by Max Krasnyansky
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
20
21
22
23
   *
   *  This file is subject to the terms and conditions of the GNU General Public
   *  License.  See the file COPYING in the main directory of the Linux
   *  distribution for more details.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
25
26
27
28
29
30
31
32
33
34
35
  #include <linux/cpu.h>
  #include <linux/cpumask.h>
  #include <linux/cpuset.h>
  #include <linux/err.h>
  #include <linux/errno.h>
  #include <linux/file.h>
  #include <linux/fs.h>
  #include <linux/init.h>
  #include <linux/interrupt.h>
  #include <linux/kernel.h>
  #include <linux/kmod.h>
  #include <linux/list.h>
68860ec10   Paul Jackson   [PATCH] cpusets: ...
36
  #include <linux/mempolicy.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37
  #include <linux/mm.h>
f481891fd   Miao Xie   cpuset: update to...
38
  #include <linux/memory.h>
9984de1a5   Paul Gortmaker   kernel: Map most ...
39
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40
  #include <linux/mount.h>
a18753747   David Howells   cpuset: Use fs_co...
41
  #include <linux/fs_context.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42
43
44
  #include <linux/namei.h>
  #include <linux/pagemap.h>
  #include <linux/proc_fs.h>
6b9c2603c   Paul Jackson   [PATCH] cpuset: u...
45
  #include <linux/rcupdate.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
46
  #include <linux/sched.h>
f9a25f776   Mathieu Poirier   cpusets: Rebuild ...
47
  #include <linux/sched/deadline.h>
6e84f3152   Ingo Molnar   sched/headers: Pr...
48
  #include <linux/sched/mm.h>
f719ff9bc   Ingo Molnar   sched/headers: Pr...
49
  #include <linux/sched/task.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
50
  #include <linux/seq_file.h>
22fb52dd7   David Quigley   [PATCH] SELinux: ...
51
  #include <linux/security.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
53
54
55
56
  #include <linux/spinlock.h>
  #include <linux/stat.h>
  #include <linux/string.h>
  #include <linux/time.h>
d2b436580   Arnd Bergmann   cpuset: Replace a...
57
  #include <linux/time64.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
58
59
  #include <linux/backing-dev.h>
  #include <linux/sort.h>
da99ecf11   Michal Hocko   mm: replace TIF_M...
60
  #include <linux/oom.h>
edb938217   Frederic Weisbecker   sched/isolation: ...
61
  #include <linux/sched/isolation.h>
7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
62
  #include <linux/uaccess.h>
60063497a   Arun Sharma   atomic: use <linu...
63
  #include <linux/atomic.h>
3d3f26a7b   Ingo Molnar   [PATCH] kernel/cp...
64
  #include <linux/mutex.h>
956db3ca0   Cliff Wickman   hotplug cpu: move...
65
  #include <linux/cgroup.h>
e44193d39   Li Zefan   cpuset: let hotpl...
66
  #include <linux/wait.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
67

4d1ac6a16   Satya Durga Srinivasu Prabhala   ANDROID: sched/cp...
68
  #include <trace/hooks/sched.h>
89affbf5d   Dima Zavin   cpuset: fix a dea...
69
  DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key);
002f29062   Vlastimil Babka   cpuset: use stati...
70
  DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
202f72d5d   Paul Jackson   [PATCH] cpuset: n...
71

3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
72
73
74
75
76
  /* See "Frequency meter" comments, below. */
  
  struct fmeter {
  	int cnt;		/* unprocessed events count */
  	int val;		/* most recent output value */
d2b436580   Arnd Bergmann   cpuset: Replace a...
77
  	time64_t time;		/* clock (secs) when val computed */
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
78
79
  	spinlock_t lock;	/* guards read or write of above */
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
80
  struct cpuset {
8793d854e   Paul Menage   Task Control Grou...
81
  	struct cgroup_subsys_state css;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82
  	unsigned long flags;		/* "unsigned long" so bitops work */
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
83

7e88291be   Li Zefan   cpuset: make cs->...
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
  	/*
  	 * On default hierarchy:
  	 *
  	 * The user-configured masks can only be changed by writing to
  	 * cpuset.cpus and cpuset.mems, and won't be limited by the
  	 * parent masks.
  	 *
  	 * The effective masks is the real masks that apply to the tasks
  	 * in the cpuset. They may be changed if the configured masks are
  	 * changed or hotplug happens.
  	 *
  	 * effective_mask == configured_mask & parent's effective_mask,
  	 * and if it ends up empty, it will inherit the parent's mask.
  	 *
  	 *
  	 * On legacy hierachy:
  	 *
  	 * The user-configured masks are always the same with effective masks.
  	 */
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
103
104
  	/* user-configured CPUs and Memory Nodes allow to tasks */
  	cpumask_var_t cpus_allowed;
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
105
  	cpumask_var_t cpus_requested;
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
106
107
108
109
110
  	nodemask_t mems_allowed;
  
  	/* effective CPUs and Memory Nodes allow to tasks */
  	cpumask_var_t effective_cpus;
  	nodemask_t effective_mems;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
111

33ad801df   Li Zefan   cpuset: record ol...
112
  	/*
58b748425   Waiman Long   cpuset: Define da...
113
114
115
  	 * CPUs allocated to child sub-partitions (default hierarchy only)
  	 * - CPUs granted by the parent = effective_cpus U subparts_cpus
  	 * - effective_cpus and subparts_cpus are mutually exclusive.
4b842da27   Waiman Long   cpuset: Make CPU ...
116
117
118
  	 *
  	 * effective_cpus contains only onlined CPUs, but subparts_cpus
  	 * may have offlined ones.
58b748425   Waiman Long   cpuset: Define da...
119
120
121
122
  	 */
  	cpumask_var_t subparts_cpus;
  
  	/*
33ad801df   Li Zefan   cpuset: record ol...
123
124
125
126
127
128
129
130
131
132
  	 * This is old Memory Nodes tasks took on.
  	 *
  	 * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
  	 * - A new cpuset's old_mems_allowed is initialized when some
  	 *   task is moved into it.
  	 * - old_mems_allowed is used in cpuset_migrate_mm() when we change
  	 *   cpuset.mems_allowed and have tasks' nodemask updated, and
  	 *   then old_mems_allowed is updated to mems_allowed.
  	 */
  	nodemask_t old_mems_allowed;
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
133
  	struct fmeter fmeter;		/* memory_pressure filter */
029190c51   Paul Jackson   cpuset sched_load...
134

452477fa6   Tejun Heo   cpuset: pin down ...
135
136
137
138
139
  	/*
  	 * Tasks are being attached to this cpuset.  Used to prevent
  	 * zeroing cpus/mems_allowed between ->can_attach() and ->attach().
  	 */
  	int attach_in_progress;
029190c51   Paul Jackson   cpuset sched_load...
140
141
  	/* partition number for rebuild_sched_domains() */
  	int pn;
956db3ca0   Cliff Wickman   hotplug cpu: move...
142

1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
143
144
  	/* for custom sched domain */
  	int relax_domain_level;
58b748425   Waiman Long   cpuset: Define da...
145
146
147
148
149
150
  
  	/* number of CPUs in subparts_cpus */
  	int nr_subparts_cpus;
  
  	/* partition root state */
  	int partition_root_state;
4716909cc   Waiman Long   cpuset: Track cpu...
151
152
153
154
155
156
157
158
  
  	/*
  	 * Default hierarchy only:
  	 * use_parent_ecpus - set if using parent's effective_cpus
  	 * child_ecpus_count - # of children with use_parent_ecpus set
  	 */
  	int use_parent_ecpus;
  	int child_ecpus_count;
58b748425   Waiman Long   cpuset: Define da...
159
160
161
162
163
164
  };
  
  /*
   * Partition root states:
   *
   *   0 - not a partition root
3881b8612   Waiman Long   cpuset: Add an er...
165
   *
58b748425   Waiman Long   cpuset: Define da...
166
   *   1 - partition root
3881b8612   Waiman Long   cpuset: Add an er...
167
168
169
170
171
172
173
   *
   *  -1 - invalid partition root
   *       None of the cpus in cpus_allowed can be put into the parent's
   *       subparts_cpus. In this case, the cpuset is not a real partition
   *       root anymore.  However, the CPU_EXCLUSIVE bit will still be set
   *       and the cpuset can be restored back to a partition root if the
   *       parent cpuset can give more CPUs back to this child cpuset.
58b748425   Waiman Long   cpuset: Define da...
174
175
176
   */
  #define PRS_DISABLED		0
  #define PRS_ENABLED		1
3881b8612   Waiman Long   cpuset: Add an er...
177
  #define PRS_ERROR		-1
58b748425   Waiman Long   cpuset: Define da...
178
179
180
181
182
183
184
185
  
  /*
   * Temporary cpumasks for working with partitions that are passed among
   * functions to avoid memory allocation in inner functions.
   */
  struct tmpmasks {
  	cpumask_var_t addmask, delmask;	/* For partition root */
  	cpumask_var_t new_cpus;		/* For update_cpumasks_hier() */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
186
  };
a7c6d554a   Tejun Heo   cgroup: add/updat...
187
  static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
8793d854e   Paul Menage   Task Control Grou...
188
  {
a7c6d554a   Tejun Heo   cgroup: add/updat...
189
  	return css ? container_of(css, struct cpuset, css) : NULL;
8793d854e   Paul Menage   Task Control Grou...
190
191
192
193
194
  }
  
  /* Retrieve the cpuset for a task */
  static inline struct cpuset *task_cs(struct task_struct *task)
  {
073219e99   Tejun Heo   cgroup: clean up ...
195
  	return css_cs(task_css(task, cpuset_cgrp_id));
8793d854e   Paul Menage   Task Control Grou...
196
  }
8793d854e   Paul Menage   Task Control Grou...
197

c9710d801   Tejun Heo   cpuset: drop "con...
198
  static inline struct cpuset *parent_cs(struct cpuset *cs)
c431069fe   Tejun Heo   cpuset: remove cp...
199
  {
5c9d535b8   Tejun Heo   cgroup: remove cs...
200
  	return css_cs(cs->css.parent);
c431069fe   Tejun Heo   cpuset: remove cp...
201
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
203
  /* bits in struct cpuset flags field */
  typedef enum {
efeb77b2f   Tejun Heo   cpuset: introduce...
204
  	CS_ONLINE,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
205
206
  	CS_CPU_EXCLUSIVE,
  	CS_MEM_EXCLUSIVE,
786083667   Paul Menage   Cpuset hardwall f...
207
  	CS_MEM_HARDWALL,
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
208
  	CS_MEMORY_MIGRATE,
029190c51   Paul Jackson   cpuset sched_load...
209
  	CS_SCHED_LOAD_BALANCE,
825a46af5   Paul Jackson   [PATCH] cpuset me...
210
211
  	CS_SPREAD_PAGE,
  	CS_SPREAD_SLAB,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
212
213
214
  } cpuset_flagbits_t;
  
  /* convenient tests for these bits */
41c25707d   Tejun Heo   cpuset: consider ...
215
  static inline bool is_cpuset_online(struct cpuset *cs)
efeb77b2f   Tejun Heo   cpuset: introduce...
216
  {
41c25707d   Tejun Heo   cpuset: consider ...
217
  	return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css);
efeb77b2f   Tejun Heo   cpuset: introduce...
218
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
219
220
  static inline int is_cpu_exclusive(const struct cpuset *cs)
  {
7b5b9ef0e   Paul Jackson   [PATCH] cpuset cl...
221
  	return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
222
223
224
225
  }
  
  static inline int is_mem_exclusive(const struct cpuset *cs)
  {
7b5b9ef0e   Paul Jackson   [PATCH] cpuset cl...
226
  	return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
227
  }
786083667   Paul Menage   Cpuset hardwall f...
228
229
230
231
  static inline int is_mem_hardwall(const struct cpuset *cs)
  {
  	return test_bit(CS_MEM_HARDWALL, &cs->flags);
  }
029190c51   Paul Jackson   cpuset sched_load...
232
233
234
235
  static inline int is_sched_load_balance(const struct cpuset *cs)
  {
  	return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
  }
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
236
237
  static inline int is_memory_migrate(const struct cpuset *cs)
  {
7b5b9ef0e   Paul Jackson   [PATCH] cpuset cl...
238
  	return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
239
  }
825a46af5   Paul Jackson   [PATCH] cpuset me...
240
241
242
243
244
245
246
247
248
  static inline int is_spread_page(const struct cpuset *cs)
  {
  	return test_bit(CS_SPREAD_PAGE, &cs->flags);
  }
  
  static inline int is_spread_slab(const struct cpuset *cs)
  {
  	return test_bit(CS_SPREAD_SLAB, &cs->flags);
  }
58b748425   Waiman Long   cpuset: Define da...
249
250
  static inline int is_partition_root(const struct cpuset *cs)
  {
3881b8612   Waiman Long   cpuset: Add an er...
251
  	return cs->partition_root_state > 0;
58b748425   Waiman Long   cpuset: Define da...
252
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
253
  static struct cpuset top_cpuset = {
efeb77b2f   Tejun Heo   cpuset: introduce...
254
255
  	.flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) |
  		  (1 << CS_MEM_EXCLUSIVE)),
58b748425   Waiman Long   cpuset: Define da...
256
  	.partition_root_state = PRS_ENABLED,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
257
  };
ae8086ce1   Tejun Heo   cpuset: introduce...
258
259
260
  /**
   * cpuset_for_each_child - traverse online children of a cpuset
   * @child_cs: loop cursor pointing to the current child
492eb21b9   Tejun Heo   cgroup: make hier...
261
   * @pos_css: used for iteration
ae8086ce1   Tejun Heo   cpuset: introduce...
262
263
264
265
266
   * @parent_cs: target cpuset to walk children of
   *
   * Walk @child_cs through the online children of @parent_cs.  Must be used
   * with RCU read locked.
   */
492eb21b9   Tejun Heo   cgroup: make hier...
267
268
269
  #define cpuset_for_each_child(child_cs, pos_css, parent_cs)		\
  	css_for_each_child((pos_css), &(parent_cs)->css)		\
  		if (is_cpuset_online(((child_cs) = css_cs((pos_css)))))
ae8086ce1   Tejun Heo   cpuset: introduce...
270

fc560a26a   Tejun Heo   cpuset: replace c...
271
272
273
  /**
   * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
   * @des_cs: loop cursor pointing to the current descendant
492eb21b9   Tejun Heo   cgroup: make hier...
274
   * @pos_css: used for iteration
fc560a26a   Tejun Heo   cpuset: replace c...
275
276
277
   * @root_cs: target cpuset to walk ancestor of
   *
   * Walk @des_cs through the online descendants of @root_cs.  Must be used
492eb21b9   Tejun Heo   cgroup: make hier...
278
   * with RCU read locked.  The caller may modify @pos_css by calling
bd8815a6d   Tejun Heo   cgroup: make css_...
279
280
   * css_rightmost_descendant() to skip subtree.  @root_cs is included in the
   * iteration and the first node to be visited.
fc560a26a   Tejun Heo   cpuset: replace c...
281
   */
492eb21b9   Tejun Heo   cgroup: make hier...
282
283
284
  #define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs)	\
  	css_for_each_descendant_pre((pos_css), &(root_cs)->css)		\
  		if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
fc560a26a   Tejun Heo   cpuset: replace c...
285

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
286
  /*
8447a0fee   Vladimir Davydov   cpuset: convert c...
287
288
289
290
   * There are two global locks guarding cpuset structures - cpuset_mutex and
   * callback_lock. We also require taking task_lock() when dereferencing a
   * task's cpuset pointer. See "The task_lock() exception", at the end of this
   * comment.
5d21cc2db   Tejun Heo   cpuset: replace c...
291
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
292
   * A task must hold both locks to modify cpusets.  If a task holds
5d21cc2db   Tejun Heo   cpuset: replace c...
293
   * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it
8447a0fee   Vladimir Davydov   cpuset: convert c...
294
   * is the only task able to also acquire callback_lock and be able to
5d21cc2db   Tejun Heo   cpuset: replace c...
295
296
297
   * modify cpusets.  It can perform various checks on the cpuset structure
   * first, knowing nothing will change.  It can also allocate memory while
   * just holding cpuset_mutex.  While it is performing these checks, various
8447a0fee   Vladimir Davydov   cpuset: convert c...
298
299
   * callback routines can briefly acquire callback_lock to query cpusets.
   * Once it is ready to make the changes, it takes callback_lock, blocking
5d21cc2db   Tejun Heo   cpuset: replace c...
300
   * everyone else.
053199edf   Paul Jackson   [PATCH] cpusets: ...
301
302
   *
   * Calls to the kernel memory allocator can not be made while holding
8447a0fee   Vladimir Davydov   cpuset: convert c...
303
   * callback_lock, as that would risk double tripping on callback_lock
053199edf   Paul Jackson   [PATCH] cpusets: ...
304
305
306
   * from one of the callbacks into the cpuset code from within
   * __alloc_pages().
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
307
   * If a task is only holding callback_lock, then it has read-only
053199edf   Paul Jackson   [PATCH] cpusets: ...
308
309
   * access to cpusets.
   *
58568d2a8   Miao Xie   cpuset,mm: update...
310
311
312
   * Now, the task_struct fields mems_allowed and mempolicy may be changed
   * by other task, we use alloc_lock in the task_struct fields to protect
   * them.
053199edf   Paul Jackson   [PATCH] cpusets: ...
313
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
314
   * The cpuset_common_file_read() handlers only hold callback_lock across
053199edf   Paul Jackson   [PATCH] cpusets: ...
315
316
317
   * small pieces of code, such as when reading out possibly multi-word
   * cpumasks and nodemasks.
   *
2df167a30   Paul Menage   cgroups: update c...
318
319
   * Accessing a task's cpuset should be done in accordance with the
   * guidelines for accessing subsystem state in kernel/cgroup.c
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
320
   */
18695a120   Stephen Dickey   Revert "cgroup/cp...
321
  static DEFINE_MUTEX(cpuset_mutex);
8447a0fee   Vladimir Davydov   cpuset: convert c...
322
  static DEFINE_SPINLOCK(callback_lock);
4247bdc60   Paul Jackson   [PATCH] cpuset se...
323

e93ad19d0   Tejun Heo   cpuset: make mm m...
324
  static struct workqueue_struct *cpuset_migrate_mm_wq;
cf417141c   Max Krasnyansky   sched, cpuset: re...
325
  /*
1d3a64fbd   Stephen Dickey   ANDROID: cpu/hotp...
326
327
   * CPU / memory hotplug is handled asynchronously
   * for hotplug, synchronously for resume_cpus
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
328
   */
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
329
  static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
e44193d39   Li Zefan   cpuset: let hotpl...
330
  static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq);
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
331
  /*
0c05b9bdb   Waiman Long   docs: cgroup-v1: ...
332
333
334
335
336
337
   * Cgroup v2 behavior is used on the "cpus" and "mems" control files when
   * on default hierarchy or when the cpuset_v2_mode flag is set by mounting
   * the v1 cpuset cgroup filesystem with the "cpuset_v2_mode" mount option.
   * With v2 behavior, "cpus" and "mems" are always what the users have
   * requested and won't be changed by hotplug events. Only the effective
   * cpus or mems will be affected.
b8d1b8ee9   Waiman Long   cpuset: Allow v2 ...
338
339
340
341
342
343
344
345
   */
  static inline bool is_in_v2_mode(void)
  {
  	return cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
  	      (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
  }
  
  /*
300ed6cbb   Li Zefan   cpuset: convert c...
346
   * Return in pmask the portion of a cpusets's cpus_allowed that
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
347
   * are online.  If none are online, walk up the cpuset hierarchy
28b89b9e6   Joonwoo Park   cpuset: handle ra...
348
   * until we find one that does have some online cpus.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
349
350
   *
   * One way or another, we guarantee to return some non-empty subset
bca99ddbf   Vincent Donnefort   ANDROID: cgroup/c...
351
   * of cpu_active_mask.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
352
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
353
   * Call with callback_lock or cpuset_mutex held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
354
   */
c9710d801   Tejun Heo   cpuset: drop "con...
355
  static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
356
  {
bca99ddbf   Vincent Donnefort   ANDROID: cgroup/c...
357
  	while (!cpumask_intersects(cs->effective_cpus, cpu_active_mask)) {
c431069fe   Tejun Heo   cpuset: remove cp...
358
  		cs = parent_cs(cs);
28b89b9e6   Joonwoo Park   cpuset: handle ra...
359
360
361
362
363
  		if (unlikely(!cs)) {
  			/*
  			 * The top cpuset doesn't have any online cpu as a
  			 * consequence of a race between cpuset_hotplug_work
  			 * and cpu hotplug notifier.  But we know the top
7b7b8a2c9   Randy Dunlap   kernel/: fix repe...
364
  			 * cpuset's effective_cpus is on its way to be
28b89b9e6   Joonwoo Park   cpuset: handle ra...
365
366
  			 * identical to cpu_online_mask.
  			 */
bca99ddbf   Vincent Donnefort   ANDROID: cgroup/c...
367
  			cpumask_copy(pmask, cpu_active_mask);
28b89b9e6   Joonwoo Park   cpuset: handle ra...
368
369
370
  			return;
  		}
  	}
bca99ddbf   Vincent Donnefort   ANDROID: cgroup/c...
371
  	cpumask_and(pmask, cs->effective_cpus, cpu_active_mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
372
373
374
375
  }
  
  /*
   * Return in *pmask the portion of a cpusets's mems_allowed that
0e1e7c7a7   Christoph Lameter   Memoryless nodes:...
376
377
   * are online, with memory.  If none are online with memory, walk
   * up the cpuset hierarchy until we find one that does have some
40df2deb5   Li Zefan   cpuset: cleanup g...
378
   * online mems.  The top cpuset always has some mems online.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
379
380
   *
   * One way or another, we guarantee to return some non-empty subset
38d7bee9d   Lai Jiangshan   cpuset: use N_MEM...
381
   * of node_states[N_MEMORY].
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
382
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
383
   * Call with callback_lock or cpuset_mutex held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
384
   */
c9710d801   Tejun Heo   cpuset: drop "con...
385
  static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
386
  {
ae1c80238   Li Zefan   cpuset: apply cs-...
387
  	while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY]))
c431069fe   Tejun Heo   cpuset: remove cp...
388
  		cs = parent_cs(cs);
ae1c80238   Li Zefan   cpuset: apply cs-...
389
  	nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
390
  }
f3b39d47e   Miao Xie   cpusets: restruct...
391
392
393
  /*
   * update task's spread flag if cpuset's page/slab spread flag is set
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
394
   * Call with callback_lock or cpuset_mutex held.
f3b39d47e   Miao Xie   cpusets: restruct...
395
396
397
398
399
   */
  static void cpuset_update_task_spread_flag(struct cpuset *cs,
  					struct task_struct *tsk)
  {
  	if (is_spread_page(cs))
2ad654bc5   Zefan Li   cpuset: PF_SPREAD...
400
  		task_set_spread_page(tsk);
f3b39d47e   Miao Xie   cpusets: restruct...
401
  	else
2ad654bc5   Zefan Li   cpuset: PF_SPREAD...
402
  		task_clear_spread_page(tsk);
f3b39d47e   Miao Xie   cpusets: restruct...
403
  	if (is_spread_slab(cs))
2ad654bc5   Zefan Li   cpuset: PF_SPREAD...
404
  		task_set_spread_slab(tsk);
f3b39d47e   Miao Xie   cpusets: restruct...
405
  	else
2ad654bc5   Zefan Li   cpuset: PF_SPREAD...
406
  		task_clear_spread_slab(tsk);
f3b39d47e   Miao Xie   cpusets: restruct...
407
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
408
409
410
411
412
  /*
   * is_cpuset_subset(p, q) - Is cpuset p a subset of cpuset q?
   *
   * One cpuset is a subset of another if all its allowed CPUs and
   * Memory Nodes are a subset of the other, and its exclusive flags
5d21cc2db   Tejun Heo   cpuset: replace c...
413
   * are only set if the other's are set.  Call holding cpuset_mutex.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
414
415
416
417
   */
  
  static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
  {
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
418
  	return	cpumask_subset(p->cpus_requested, q->cpus_requested) &&
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
419
420
421
422
  		nodes_subset(p->mems_allowed, q->mems_allowed) &&
  		is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
  		is_mem_exclusive(p) <= is_mem_exclusive(q);
  }
645fcc9d2   Li Zefan   cpuset: don't all...
423
  /**
bf92370c0   Waiman Long   cpuset: Simply al...
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
   * alloc_cpumasks - allocate three cpumasks for cpuset
   * @cs:  the cpuset that have cpumasks to be allocated.
   * @tmp: the tmpmasks structure pointer
   * Return: 0 if successful, -ENOMEM otherwise.
   *
   * Only one of the two input arguments should be non-NULL.
   */
  static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
  {
  	cpumask_var_t *pmask1, *pmask2, *pmask3;
  
  	if (cs) {
  		pmask1 = &cs->cpus_allowed;
  		pmask2 = &cs->effective_cpus;
  		pmask3 = &cs->subparts_cpus;
  	} else {
  		pmask1 = &tmp->new_cpus;
  		pmask2 = &tmp->addmask;
  		pmask3 = &tmp->delmask;
  	}
  
  	if (!zalloc_cpumask_var(pmask1, GFP_KERNEL))
  		return -ENOMEM;
  
  	if (!zalloc_cpumask_var(pmask2, GFP_KERNEL))
  		goto free_one;
  
  	if (!zalloc_cpumask_var(pmask3, GFP_KERNEL))
  		goto free_two;
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
453
454
  	if (cs && !zalloc_cpumask_var(&cs->cpus_requested, GFP_KERNEL))
  		goto free_three;
bf92370c0   Waiman Long   cpuset: Simply al...
455
  	return 0;
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
456
457
  free_three:
  	free_cpumask_var(*pmask3);
bf92370c0   Waiman Long   cpuset: Simply al...
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
  free_two:
  	free_cpumask_var(*pmask2);
  free_one:
  	free_cpumask_var(*pmask1);
  	return -ENOMEM;
  }
  
  /**
   * free_cpumasks - free cpumasks in a tmpmasks structure
   * @cs:  the cpuset that have cpumasks to be free.
   * @tmp: the tmpmasks structure pointer
   */
  static inline void free_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
  {
  	if (cs) {
  		free_cpumask_var(cs->cpus_allowed);
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
474
  		free_cpumask_var(cs->cpus_requested);
bf92370c0   Waiman Long   cpuset: Simply al...
475
476
477
478
479
480
481
482
483
484
485
  		free_cpumask_var(cs->effective_cpus);
  		free_cpumask_var(cs->subparts_cpus);
  	}
  	if (tmp) {
  		free_cpumask_var(tmp->new_cpus);
  		free_cpumask_var(tmp->addmask);
  		free_cpumask_var(tmp->delmask);
  	}
  }
  
  /**
645fcc9d2   Li Zefan   cpuset: don't all...
486
487
488
   * alloc_trial_cpuset - allocate a trial cpuset
   * @cs: the cpuset that the trial cpuset duplicates
   */
c9710d801   Tejun Heo   cpuset: drop "con...
489
  static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
645fcc9d2   Li Zefan   cpuset: don't all...
490
  {
300ed6cbb   Li Zefan   cpuset: convert c...
491
492
493
494
495
  	struct cpuset *trial;
  
  	trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
  	if (!trial)
  		return NULL;
bf92370c0   Waiman Long   cpuset: Simply al...
496
497
498
499
  	if (alloc_cpumasks(trial, NULL)) {
  		kfree(trial);
  		return NULL;
  	}
300ed6cbb   Li Zefan   cpuset: convert c...
500

e2b9a3d7d   Li Zefan   cpuset: add cs->e...
501
  	cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
502
  	cpumask_copy(trial->cpus_requested, cs->cpus_requested);
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
503
  	cpumask_copy(trial->effective_cpus, cs->effective_cpus);
300ed6cbb   Li Zefan   cpuset: convert c...
504
  	return trial;
645fcc9d2   Li Zefan   cpuset: don't all...
505
506
507
  }
  
  /**
bf92370c0   Waiman Long   cpuset: Simply al...
508
509
   * free_cpuset - free the cpuset
   * @cs: the cpuset to be freed
645fcc9d2   Li Zefan   cpuset: don't all...
510
   */
bf92370c0   Waiman Long   cpuset: Simply al...
511
  static inline void free_cpuset(struct cpuset *cs)
645fcc9d2   Li Zefan   cpuset: don't all...
512
  {
bf92370c0   Waiman Long   cpuset: Simply al...
513
514
  	free_cpumasks(cs, NULL);
  	kfree(cs);
645fcc9d2   Li Zefan   cpuset: don't all...
515
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
516
517
518
519
520
521
522
  /*
   * validate_change() - Used to validate that any proposed cpuset change
   *		       follows the structural rules for cpusets.
   *
   * If we replaced the flag and mask values of the current cpuset
   * (cur) with those values in the trial cpuset (trial), would
   * our various subset and exclusive rules still be valid?  Presumes
5d21cc2db   Tejun Heo   cpuset: replace c...
523
   * cpuset_mutex held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
524
525
526
527
528
529
530
531
532
533
534
   *
   * 'cur' is the address of an actual, in-use cpuset.  Operations
   * such as list traversal that depend on the actual address of the
   * cpuset in the list must use cur below, not trial.
   *
   * 'trial' is the address of bulk structure copy of cur, with
   * perhaps one or more of the fields cpus_allowed, mems_allowed,
   * or flags changed to new, trial values.
   *
   * Return 0 if valid, -errno if not.
   */
c9710d801   Tejun Heo   cpuset: drop "con...
535
  static int validate_change(struct cpuset *cur, struct cpuset *trial)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
536
  {
492eb21b9   Tejun Heo   cgroup: make hier...
537
  	struct cgroup_subsys_state *css;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
538
  	struct cpuset *c, *par;
ae8086ce1   Tejun Heo   cpuset: introduce...
539
540
541
  	int ret;
  
  	rcu_read_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
542
543
  
  	/* Each of our child cpusets must be a subset of us */
ae8086ce1   Tejun Heo   cpuset: introduce...
544
  	ret = -EBUSY;
492eb21b9   Tejun Heo   cgroup: make hier...
545
  	cpuset_for_each_child(c, css, cur)
ae8086ce1   Tejun Heo   cpuset: introduce...
546
547
  		if (!is_cpuset_subset(c, trial))
  			goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
548
549
  
  	/* Remaining checks don't apply to root cpuset */
ae8086ce1   Tejun Heo   cpuset: introduce...
550
  	ret = 0;
696040670   Paul Jackson   [PATCH] cpuset: m...
551
  	if (cur == &top_cpuset)
ae8086ce1   Tejun Heo   cpuset: introduce...
552
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
553

c431069fe   Tejun Heo   cpuset: remove cp...
554
  	par = parent_cs(cur);
696040670   Paul Jackson   [PATCH] cpuset: m...
555

7e88291be   Li Zefan   cpuset: make cs->...
556
  	/* On legacy hiearchy, we must be a subset of our parent cpuset. */
ae8086ce1   Tejun Heo   cpuset: introduce...
557
  	ret = -EACCES;
b8d1b8ee9   Waiman Long   cpuset: Allow v2 ...
558
  	if (!is_in_v2_mode() && !is_cpuset_subset(trial, par))
ae8086ce1   Tejun Heo   cpuset: introduce...
559
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
560

2df167a30   Paul Menage   cgroups: update c...
561
562
563
564
  	/*
  	 * If either I or some sibling (!= me) is exclusive, we can't
  	 * overlap
  	 */
ae8086ce1   Tejun Heo   cpuset: introduce...
565
  	ret = -EINVAL;
492eb21b9   Tejun Heo   cgroup: make hier...
566
  	cpuset_for_each_child(c, css, par) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
567
568
  		if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
  		    c != cur &&
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
569
  		    cpumask_intersects(trial->cpus_requested, c->cpus_requested))
ae8086ce1   Tejun Heo   cpuset: introduce...
570
  			goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
571
572
573
  		if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
  		    c != cur &&
  		    nodes_intersects(trial->mems_allowed, c->mems_allowed))
ae8086ce1   Tejun Heo   cpuset: introduce...
574
  			goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
575
  	}
452477fa6   Tejun Heo   cpuset: pin down ...
576
577
  	/*
  	 * Cpusets with tasks - existing or newly being attached - can't
1c09b195d   Li Zefan   cpuset: fix a reg...
578
  	 * be changed to have empty cpus_allowed or mems_allowed.
452477fa6   Tejun Heo   cpuset: pin down ...
579
  	 */
ae8086ce1   Tejun Heo   cpuset: introduce...
580
  	ret = -ENOSPC;
27bd4dbb8   Tejun Heo   cgroup: replace c...
581
  	if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) {
1c09b195d   Li Zefan   cpuset: fix a reg...
582
583
584
585
586
587
588
  		if (!cpumask_empty(cur->cpus_allowed) &&
  		    cpumask_empty(trial->cpus_allowed))
  			goto out;
  		if (!nodes_empty(cur->mems_allowed) &&
  		    nodes_empty(trial->mems_allowed))
  			goto out;
  	}
020958b62   Paul Jackson   cpusets: decrusti...
589

f82f80426   Juri Lelli   sched/deadline: E...
590
591
592
593
594
595
596
597
598
  	/*
  	 * We can't shrink if we won't have enough room for SCHED_DEADLINE
  	 * tasks.
  	 */
  	ret = -EBUSY;
  	if (is_cpu_exclusive(cur) &&
  	    !cpuset_cpumask_can_shrink(cur->cpus_allowed,
  				       trial->cpus_allowed))
  		goto out;
ae8086ce1   Tejun Heo   cpuset: introduce...
599
600
601
602
  	ret = 0;
  out:
  	rcu_read_unlock();
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
603
  }
db7f47cf4   Paul Menage   cpusets: allow cp...
604
  #ifdef CONFIG_SMP
85d7b9498   Dinakar Guniguntala   [PATCH] Dynamic s...
605
  /*
cf417141c   Max Krasnyansky   sched, cpuset: re...
606
   * Helper routine for generate_sched_domains().
8b5f1c52d   Li Zefan   cpuset: use effec...
607
   * Do cpusets a, b have overlapping effective cpus_allowed masks?
029190c51   Paul Jackson   cpuset sched_load...
608
   */
029190c51   Paul Jackson   cpuset sched_load...
609
610
  static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
  {
8b5f1c52d   Li Zefan   cpuset: use effec...
611
  	return cpumask_intersects(a->effective_cpus, b->effective_cpus);
029190c51   Paul Jackson   cpuset sched_load...
612
  }
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
613
614
615
  static void
  update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
  {
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
616
617
618
619
  	if (dattr->relax_domain_level < c->relax_domain_level)
  		dattr->relax_domain_level = c->relax_domain_level;
  	return;
  }
fc560a26a   Tejun Heo   cpuset: replace c...
620
621
  static void update_domain_attr_tree(struct sched_domain_attr *dattr,
  				    struct cpuset *root_cs)
f5393693e   Lai Jiangshan   cpuset: speed up ...
622
  {
fc560a26a   Tejun Heo   cpuset: replace c...
623
  	struct cpuset *cp;
492eb21b9   Tejun Heo   cgroup: make hier...
624
  	struct cgroup_subsys_state *pos_css;
f5393693e   Lai Jiangshan   cpuset: speed up ...
625

fc560a26a   Tejun Heo   cpuset: replace c...
626
  	rcu_read_lock();
492eb21b9   Tejun Heo   cgroup: make hier...
627
  	cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
fc560a26a   Tejun Heo   cpuset: replace c...
628
629
  		/* skip the whole subtree if @cp doesn't have any CPU */
  		if (cpumask_empty(cp->cpus_allowed)) {
492eb21b9   Tejun Heo   cgroup: make hier...
630
  			pos_css = css_rightmost_descendant(pos_css);
f5393693e   Lai Jiangshan   cpuset: speed up ...
631
  			continue;
fc560a26a   Tejun Heo   cpuset: replace c...
632
  		}
f5393693e   Lai Jiangshan   cpuset: speed up ...
633
634
635
  
  		if (is_sched_load_balance(cp))
  			update_domain_attr(dattr, cp);
f5393693e   Lai Jiangshan   cpuset: speed up ...
636
  	}
fc560a26a   Tejun Heo   cpuset: replace c...
637
  	rcu_read_unlock();
f5393693e   Lai Jiangshan   cpuset: speed up ...
638
  }
be040bea9   Paolo Bonzini   cpuset: Make nr_c...
639
640
641
642
643
644
  /* Must be called with cpuset_mutex held.  */
  static inline int nr_cpusets(void)
  {
  	/* jump label reference count + the top-level cpuset */
  	return static_key_count(&cpusets_enabled_key.key) + 1;
  }
029190c51   Paul Jackson   cpuset sched_load...
645
  /*
cf417141c   Max Krasnyansky   sched, cpuset: re...
646
647
648
649
650
   * generate_sched_domains()
   *
   * This function builds a partial partition of the systems CPUs
   * A 'partial partition' is a set of non-overlapping subsets whose
   * union is a subset of that set.
0a0fca9d8   Viresh Kumar   sched: Rename sch...
651
   * The output of this function needs to be passed to kernel/sched/core.c
cf417141c   Max Krasnyansky   sched, cpuset: re...
652
653
654
   * partition_sched_domains() routine, which will rebuild the scheduler's
   * load balancing domains (sched domains) as specified by that partial
   * partition.
029190c51   Paul Jackson   cpuset sched_load...
655
   *
da82c92f1   Mauro Carvalho Chehab   docs: cgroup-v1: ...
656
   * See "What is sched_load_balance" in Documentation/admin-guide/cgroup-v1/cpusets.rst
029190c51   Paul Jackson   cpuset sched_load...
657
658
659
660
661
662
663
   * for a background explanation of this.
   *
   * Does not return errors, on the theory that the callers of this
   * routine would rather not worry about failures to rebuild sched
   * domains when operating in the severe memory shortage situations
   * that could cause allocation failures below.
   *
5d21cc2db   Tejun Heo   cpuset: replace c...
664
   * Must be called with cpuset_mutex held.
029190c51   Paul Jackson   cpuset sched_load...
665
666
   *
   * The three key local variables below are:
b6fbbf31d   Juri Lelli   cgroup/cpuset: Up...
667
668
669
670
   *    cp - cpuset pointer, used (together with pos_css) to perform a
   *	   top-down scan of all cpusets. For our purposes, rebuilding
   *	   the schedulers sched domains, we can ignore !is_sched_load_
   *	   balance cpusets.
029190c51   Paul Jackson   cpuset sched_load...
671
672
673
674
675
676
677
678
   *  csa  - (for CpuSet Array) Array of pointers to all the cpusets
   *	   that need to be load balanced, for convenient iterative
   *	   access by the subsequent code that finds the best partition,
   *	   i.e the set of domains (subsets) of CPUs such that the
   *	   cpus_allowed of every cpuset marked is_sched_load_balance
   *	   is a subset of one of these domains, while there are as
   *	   many such domains as possible, each as small as possible.
   * doms  - Conversion of 'csa' to an array of cpumasks, for passing to
0a0fca9d8   Viresh Kumar   sched: Rename sch...
679
   *	   the kernel/sched/core.c routine partition_sched_domains() in a
029190c51   Paul Jackson   cpuset sched_load...
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
   *	   convenient format, that can be easily compared to the prior
   *	   value to determine what partition elements (sched domains)
   *	   were changed (added or removed.)
   *
   * Finding the best partition (set of domains):
   *	The triple nested loops below over i, j, k scan over the
   *	load balanced cpusets (using the array of cpuset pointers in
   *	csa[]) looking for pairs of cpusets that have overlapping
   *	cpus_allowed, but which don't have the same 'pn' partition
   *	number and gives them in the same partition number.  It keeps
   *	looping on the 'restart' label until it can no longer find
   *	any such pairs.
   *
   *	The union of the cpus_allowed masks from the set of
   *	all cpusets having the same 'pn' value then form the one
   *	element of the partition (one sched domain) to be passed to
   *	partition_sched_domains().
   */
acc3f5d7c   Rusty Russell   cpumask: Partitio...
698
  static int generate_sched_domains(cpumask_var_t **domains,
cf417141c   Max Krasnyansky   sched, cpuset: re...
699
  			struct sched_domain_attr **attributes)
029190c51   Paul Jackson   cpuset sched_load...
700
  {
b6fbbf31d   Juri Lelli   cgroup/cpuset: Up...
701
  	struct cpuset *cp;	/* top-down scan of cpusets */
029190c51   Paul Jackson   cpuset sched_load...
702
703
704
  	struct cpuset **csa;	/* array of all cpuset ptrs */
  	int csn;		/* how many cpuset ptrs in csa so far */
  	int i, j, k;		/* indices for partition finding loops */
acc3f5d7c   Rusty Russell   cpumask: Partitio...
705
  	cpumask_var_t *doms;	/* resulting partition; i.e. sched domains */
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
706
  	struct sched_domain_attr *dattr;  /* attributes for custom domains */
1583715dd   Ingo Molnar   sched, cpusets: f...
707
  	int ndoms = 0;		/* number of sched domains in result */
6af866af3   Li Zefan   cpuset: remove re...
708
  	int nslot;		/* next empty doms[] struct cpumask slot */
492eb21b9   Tejun Heo   cgroup: make hier...
709
  	struct cgroup_subsys_state *pos_css;
0ccea8feb   Waiman Long   cpuset: Make gene...
710
  	bool root_load_balance = is_sched_load_balance(&top_cpuset);
029190c51   Paul Jackson   cpuset sched_load...
711

029190c51   Paul Jackson   cpuset sched_load...
712
  	doms = NULL;
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
713
  	dattr = NULL;
cf417141c   Max Krasnyansky   sched, cpuset: re...
714
  	csa = NULL;
029190c51   Paul Jackson   cpuset sched_load...
715
716
  
  	/* Special case for the 99% of systems with one, full, sched domain */
0ccea8feb   Waiman Long   cpuset: Make gene...
717
  	if (root_load_balance && !top_cpuset.nr_subparts_cpus) {
acc3f5d7c   Rusty Russell   cpumask: Partitio...
718
719
  		ndoms = 1;
  		doms = alloc_sched_domains(ndoms);
029190c51   Paul Jackson   cpuset sched_load...
720
  		if (!doms)
cf417141c   Max Krasnyansky   sched, cpuset: re...
721
  			goto done;
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
722
723
724
  		dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
  		if (dattr) {
  			*dattr = SD_ATTR_INIT;
93a655755   Li Zefan   cpuset: fix wrong...
725
  			update_domain_attr_tree(dattr, &top_cpuset);
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
726
  		}
47b8ea718   Rik van Riel   cpusets, isolcpus...
727
  		cpumask_and(doms[0], top_cpuset.effective_cpus,
edb938217   Frederic Weisbecker   sched/isolation: ...
728
  			    housekeeping_cpumask(HK_FLAG_DOMAIN));
cf417141c   Max Krasnyansky   sched, cpuset: re...
729

cf417141c   Max Krasnyansky   sched, cpuset: re...
730
  		goto done;
029190c51   Paul Jackson   cpuset sched_load...
731
  	}
6da2ec560   Kees Cook   treewide: kmalloc...
732
  	csa = kmalloc_array(nr_cpusets(), sizeof(cp), GFP_KERNEL);
029190c51   Paul Jackson   cpuset sched_load...
733
734
735
  	if (!csa)
  		goto done;
  	csn = 0;
fc560a26a   Tejun Heo   cpuset: replace c...
736
  	rcu_read_lock();
0ccea8feb   Waiman Long   cpuset: Make gene...
737
738
  	if (root_load_balance)
  		csa[csn++] = &top_cpuset;
492eb21b9   Tejun Heo   cgroup: make hier...
739
  	cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) {
bd8815a6d   Tejun Heo   cgroup: make css_...
740
741
  		if (cp == &top_cpuset)
  			continue;
f5393693e   Lai Jiangshan   cpuset: speed up ...
742
  		/*
fc560a26a   Tejun Heo   cpuset: replace c...
743
744
745
746
747
748
  		 * Continue traversing beyond @cp iff @cp has some CPUs and
  		 * isn't load balancing.  The former is obvious.  The
  		 * latter: All child cpusets contain a subset of the
  		 * parent's cpus, so just skip them, and then we call
  		 * update_domain_attr_tree() to calc relax_domain_level of
  		 * the corresponding sched domain.
0ccea8feb   Waiman Long   cpuset: Make gene...
749
750
751
  		 *
  		 * If root is load-balancing, we can skip @cp if it
  		 * is a subset of the root's effective_cpus.
f5393693e   Lai Jiangshan   cpuset: speed up ...
752
  		 */
fc560a26a   Tejun Heo   cpuset: replace c...
753
  		if (!cpumask_empty(cp->cpus_allowed) &&
47b8ea718   Rik van Riel   cpusets, isolcpus...
754
  		    !(is_sched_load_balance(cp) &&
edb938217   Frederic Weisbecker   sched/isolation: ...
755
756
  		      cpumask_intersects(cp->cpus_allowed,
  					 housekeeping_cpumask(HK_FLAG_DOMAIN))))
f5393693e   Lai Jiangshan   cpuset: speed up ...
757
  			continue;
489a5393a   Lai Jiangshan   cpuset: don't pas...
758

0ccea8feb   Waiman Long   cpuset: Make gene...
759
760
761
  		if (root_load_balance &&
  		    cpumask_subset(cp->cpus_allowed, top_cpuset.effective_cpus))
  			continue;
cd1cb3350   Valentin Schneider   sched/topology: D...
762
763
  		if (is_sched_load_balance(cp) &&
  		    !cpumask_empty(cp->effective_cpus))
fc560a26a   Tejun Heo   cpuset: replace c...
764
  			csa[csn++] = cp;
0ccea8feb   Waiman Long   cpuset: Make gene...
765
766
767
  		/* skip @cp's subtree if not a partition root */
  		if (!is_partition_root(cp))
  			pos_css = css_rightmost_descendant(pos_css);
fc560a26a   Tejun Heo   cpuset: replace c...
768
769
  	}
  	rcu_read_unlock();
029190c51   Paul Jackson   cpuset sched_load...
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
  
  	for (i = 0; i < csn; i++)
  		csa[i]->pn = i;
  	ndoms = csn;
  
  restart:
  	/* Find the best partition (set of sched domains) */
  	for (i = 0; i < csn; i++) {
  		struct cpuset *a = csa[i];
  		int apn = a->pn;
  
  		for (j = 0; j < csn; j++) {
  			struct cpuset *b = csa[j];
  			int bpn = b->pn;
  
  			if (apn != bpn && cpusets_overlap(a, b)) {
  				for (k = 0; k < csn; k++) {
  					struct cpuset *c = csa[k];
  
  					if (c->pn == bpn)
  						c->pn = apn;
  				}
  				ndoms--;	/* one less element */
  				goto restart;
  			}
  		}
  	}
cf417141c   Max Krasnyansky   sched, cpuset: re...
797
798
799
800
  	/*
  	 * Now we know how many domains to create.
  	 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
  	 */
acc3f5d7c   Rusty Russell   cpumask: Partitio...
801
  	doms = alloc_sched_domains(ndoms);
700018e0a   Li Zefan   cpuset: fix regre...
802
  	if (!doms)
cf417141c   Max Krasnyansky   sched, cpuset: re...
803
  		goto done;
cf417141c   Max Krasnyansky   sched, cpuset: re...
804
805
806
807
808
  
  	/*
  	 * The rest of the code, including the scheduler, can deal with
  	 * dattr==NULL case. No need to abort if alloc fails.
  	 */
6da2ec560   Kees Cook   treewide: kmalloc...
809
810
  	dattr = kmalloc_array(ndoms, sizeof(struct sched_domain_attr),
  			      GFP_KERNEL);
029190c51   Paul Jackson   cpuset sched_load...
811
812
813
  
  	for (nslot = 0, i = 0; i < csn; i++) {
  		struct cpuset *a = csa[i];
6af866af3   Li Zefan   cpuset: remove re...
814
  		struct cpumask *dp;
029190c51   Paul Jackson   cpuset sched_load...
815
  		int apn = a->pn;
cf417141c   Max Krasnyansky   sched, cpuset: re...
816
817
818
819
  		if (apn < 0) {
  			/* Skip completed partitions */
  			continue;
  		}
acc3f5d7c   Rusty Russell   cpumask: Partitio...
820
  		dp = doms[nslot];
cf417141c   Max Krasnyansky   sched, cpuset: re...
821
822
823
824
  
  		if (nslot == ndoms) {
  			static int warnings = 10;
  			if (warnings) {
12d3089c1   Fabian Frederick   kernel/cpuset.c: ...
825
826
827
  				pr_warn("rebuild_sched_domains confused: nslot %d, ndoms %d, csn %d, i %d, apn %d
  ",
  					nslot, ndoms, csn, i, apn);
cf417141c   Max Krasnyansky   sched, cpuset: re...
828
  				warnings--;
029190c51   Paul Jackson   cpuset sched_load...
829
  			}
cf417141c   Max Krasnyansky   sched, cpuset: re...
830
831
  			continue;
  		}
029190c51   Paul Jackson   cpuset sched_load...
832

6af866af3   Li Zefan   cpuset: remove re...
833
  		cpumask_clear(dp);
cf417141c   Max Krasnyansky   sched, cpuset: re...
834
835
836
837
838
839
  		if (dattr)
  			*(dattr + nslot) = SD_ATTR_INIT;
  		for (j = i; j < csn; j++) {
  			struct cpuset *b = csa[j];
  
  			if (apn == b->pn) {
8b5f1c52d   Li Zefan   cpuset: use effec...
840
  				cpumask_or(dp, dp, b->effective_cpus);
edb938217   Frederic Weisbecker   sched/isolation: ...
841
  				cpumask_and(dp, dp, housekeeping_cpumask(HK_FLAG_DOMAIN));
cf417141c   Max Krasnyansky   sched, cpuset: re...
842
843
844
845
846
  				if (dattr)
  					update_domain_attr_tree(dattr + nslot, b);
  
  				/* Done with this partition */
  				b->pn = -1;
029190c51   Paul Jackson   cpuset sched_load...
847
  			}
029190c51   Paul Jackson   cpuset sched_load...
848
  		}
cf417141c   Max Krasnyansky   sched, cpuset: re...
849
  		nslot++;
029190c51   Paul Jackson   cpuset sched_load...
850
851
  	}
  	BUG_ON(nslot != ndoms);
cf417141c   Max Krasnyansky   sched, cpuset: re...
852
853
  done:
  	kfree(csa);
700018e0a   Li Zefan   cpuset: fix regre...
854
855
856
857
858
859
  	/*
  	 * Fallback to the default domain if kmalloc() failed.
  	 * See comments in partition_sched_domains().
  	 */
  	if (doms == NULL)
  		ndoms = 1;
cf417141c   Max Krasnyansky   sched, cpuset: re...
860
861
862
863
  	*domains    = doms;
  	*attributes = dattr;
  	return ndoms;
  }
f9a25f776   Mathieu Poirier   cpusets: Rebuild ...
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
  static void update_tasks_root_domain(struct cpuset *cs)
  {
  	struct css_task_iter it;
  	struct task_struct *task;
  
  	css_task_iter_start(&cs->css, 0, &it);
  
  	while ((task = css_task_iter_next(&it)))
  		dl_add_task_root_domain(task);
  
  	css_task_iter_end(&it);
  }
  
  static void rebuild_root_domains(void)
  {
  	struct cpuset *cs = NULL;
  	struct cgroup_subsys_state *pos_css;
18695a120   Stephen Dickey   Revert "cgroup/cp...
881
  	lockdep_assert_held(&cpuset_mutex);
f9a25f776   Mathieu Poirier   cpusets: Rebuild ...
882
883
  	lockdep_assert_cpus_held();
  	lockdep_assert_held(&sched_domains_mutex);
f9a25f776   Mathieu Poirier   cpusets: Rebuild ...
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
  	rcu_read_lock();
  
  	/*
  	 * Clear default root domain DL accounting, it will be computed again
  	 * if a task belongs to it.
  	 */
  	dl_clear_root_domain(&def_root_domain);
  
  	cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
  
  		if (cpumask_empty(cs->effective_cpus)) {
  			pos_css = css_rightmost_descendant(pos_css);
  			continue;
  		}
  
  		css_get(&cs->css);
  
  		rcu_read_unlock();
  
  		update_tasks_root_domain(cs);
  
  		rcu_read_lock();
  		css_put(&cs->css);
  	}
  	rcu_read_unlock();
  }
  
  static void
  partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
  				    struct sched_domain_attr *dattr_new)
  {
  	mutex_lock(&sched_domains_mutex);
  	partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
  	rebuild_root_domains();
  	mutex_unlock(&sched_domains_mutex);
  }
cf417141c   Max Krasnyansky   sched, cpuset: re...
920
921
922
  /*
   * Rebuild scheduler domains.
   *
699140ba8   Tejun Heo   cpuset: drop asyn...
923
924
925
926
927
   * If the flag 'sched_load_balance' of any cpuset with non-empty
   * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
   * which has that flag enabled, or if any cpuset with a non-empty
   * 'cpus' is removed, then call this routine to rebuild the
   * scheduler's dynamic sched domains.
cf417141c   Max Krasnyansky   sched, cpuset: re...
928
   *
5d21cc2db   Tejun Heo   cpuset: replace c...
929
   * Call with cpuset_mutex held.  Takes get_online_cpus().
cf417141c   Max Krasnyansky   sched, cpuset: re...
930
   */
699140ba8   Tejun Heo   cpuset: drop asyn...
931
  static void rebuild_sched_domains_locked(void)
cf417141c   Max Krasnyansky   sched, cpuset: re...
932
  {
771b663fa   Daniel Jordan   cpuset: fix race ...
933
  	struct cgroup_subsys_state *pos_css;
cf417141c   Max Krasnyansky   sched, cpuset: re...
934
  	struct sched_domain_attr *attr;
acc3f5d7c   Rusty Russell   cpumask: Partitio...
935
  	cpumask_var_t *doms;
771b663fa   Daniel Jordan   cpuset: fix race ...
936
  	struct cpuset *cs;
cf417141c   Max Krasnyansky   sched, cpuset: re...
937
  	int ndoms;
18695a120   Stephen Dickey   Revert "cgroup/cp...
938
  	lockdep_assert_held(&cpuset_mutex);
cf417141c   Max Krasnyansky   sched, cpuset: re...
939

5b16c2a49   Li Zefan   cpuset: fix cpu h...
940
  	/*
771b663fa   Daniel Jordan   cpuset: fix race ...
941
  	 * If we have raced with CPU hotplug, return early to avoid
5b16c2a49   Li Zefan   cpuset: fix cpu h...
942
  	 * passing doms with offlined cpu to partition_sched_domains().
771b663fa   Daniel Jordan   cpuset: fix race ...
943
944
945
946
947
  	 * Anyways, cpuset_hotplug_workfn() will rebuild sched domains.
  	 *
  	 * With no CPUs in any subpartitions, top_cpuset's effective CPUs
  	 * should be the same as the active CPUs, so checking only top_cpuset
  	 * is enough to detect racing CPU offlines.
5b16c2a49   Li Zefan   cpuset: fix cpu h...
948
  	 */
0ccea8feb   Waiman Long   cpuset: Make gene...
949
950
  	if (!top_cpuset.nr_subparts_cpus &&
  	    !cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
d74b27d63   Juri Lelli   cgroup/cpuset: Ch...
951
  		return;
0ccea8feb   Waiman Long   cpuset: Make gene...
952

771b663fa   Daniel Jordan   cpuset: fix race ...
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
  	/*
  	 * With subpartition CPUs, however, the effective CPUs of a partition
  	 * root should be only a subset of the active CPUs.  Since a CPU in any
  	 * partition root could be offlined, all must be checked.
  	 */
  	if (top_cpuset.nr_subparts_cpus) {
  		rcu_read_lock();
  		cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
  			if (!is_partition_root(cs)) {
  				pos_css = css_rightmost_descendant(pos_css);
  				continue;
  			}
  			if (!cpumask_subset(cs->effective_cpus,
  					    cpu_active_mask)) {
  				rcu_read_unlock();
  				return;
  			}
  		}
  		rcu_read_unlock();
  	}
5b16c2a49   Li Zefan   cpuset: fix cpu h...
973

cf417141c   Max Krasnyansky   sched, cpuset: re...
974
  	/* Generate domain masks and attrs */
cf417141c   Max Krasnyansky   sched, cpuset: re...
975
  	ndoms = generate_sched_domains(&doms, &attr);
cf417141c   Max Krasnyansky   sched, cpuset: re...
976
977
  
  	/* Have scheduler rebuild the domains */
f9a25f776   Mathieu Poirier   cpusets: Rebuild ...
978
  	partition_and_rebuild_sched_domains(ndoms, doms, attr);
cf417141c   Max Krasnyansky   sched, cpuset: re...
979
  }
db7f47cf4   Paul Menage   cpusets: allow cp...
980
  #else /* !CONFIG_SMP */
699140ba8   Tejun Heo   cpuset: drop asyn...
981
  static void rebuild_sched_domains_locked(void)
db7f47cf4   Paul Menage   cpusets: allow cp...
982
983
  {
  }
db7f47cf4   Paul Menage   cpusets: allow cp...
984
  #endif /* CONFIG_SMP */
029190c51   Paul Jackson   cpuset sched_load...
985

cf417141c   Max Krasnyansky   sched, cpuset: re...
986
987
  void rebuild_sched_domains(void)
  {
d74b27d63   Juri Lelli   cgroup/cpuset: Ch...
988
  	get_online_cpus();
18695a120   Stephen Dickey   Revert "cgroup/cp...
989
  	mutex_lock(&cpuset_mutex);
699140ba8   Tejun Heo   cpuset: drop asyn...
990
  	rebuild_sched_domains_locked();
18695a120   Stephen Dickey   Revert "cgroup/cp...
991
  	mutex_unlock(&cpuset_mutex);
d74b27d63   Juri Lelli   cgroup/cpuset: Ch...
992
  	put_online_cpus();
029190c51   Paul Jackson   cpuset sched_load...
993
  }
4d1ac6a16   Satya Durga Srinivasu Prabhala   ANDROID: sched/cp...
994
995
996
997
998
999
1000
1001
1002
1003
1004
  static int update_cpus_allowed(struct cpuset *cs, struct task_struct *p,
  				const struct cpumask *new_mask)
  {
  	int ret = -EINVAL;
  
  	trace_android_rvh_update_cpus_allowed(p, cs->cpus_requested, new_mask, &ret);
  	if (!ret)
  		return ret;
  
  	return set_cpus_allowed_ptr(p, new_mask);
  }
58f4790b7   Cliff Wickman   cpusets: update_c...
1005
  /**
0b2f630a2   Miao Xie   cpusets: restruct...
1006
1007
   * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
   * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
0b2f630a2   Miao Xie   cpusets: restruct...
1008
   *
d66393e54   Tejun Heo   cpuset: use css_t...
1009
1010
1011
   * Iterate through each task of @cs updating its cpus_allowed to the
   * effective cpuset's.  As this function is called with cpuset_mutex held,
   * cpuset membership stays stable.
0b2f630a2   Miao Xie   cpusets: restruct...
1012
   */
d66393e54   Tejun Heo   cpuset: use css_t...
1013
  static void update_tasks_cpumask(struct cpuset *cs)
0b2f630a2   Miao Xie   cpusets: restruct...
1014
  {
d66393e54   Tejun Heo   cpuset: use css_t...
1015
1016
  	struct css_task_iter it;
  	struct task_struct *task;
bc2fb7ed0   Tejun Heo   cgroup: add @flag...
1017
  	css_task_iter_start(&cs->css, 0, &it);
d66393e54   Tejun Heo   cpuset: use css_t...
1018
  	while ((task = css_task_iter_next(&it)))
4d1ac6a16   Satya Durga Srinivasu Prabhala   ANDROID: sched/cp...
1019
  		update_cpus_allowed(cs, task, cs->effective_cpus);
d66393e54   Tejun Heo   cpuset: use css_t...
1020
  	css_task_iter_end(&it);
0b2f630a2   Miao Xie   cpusets: restruct...
1021
  }
ee8dde0cd   Waiman Long   cpuset: Add new v...
1022
1023
1024
1025
1026
1027
1028
  /**
   * compute_effective_cpumask - Compute the effective cpumask of the cpuset
   * @new_cpus: the temp variable for the new effective_cpus mask
   * @cs: the cpuset the need to recompute the new effective_cpus mask
   * @parent: the parent cpuset
   *
   * If the parent has subpartition CPUs, include them in the list of
4b842da27   Waiman Long   cpuset: Make CPU ...
1029
1030
1031
   * allowable CPUs in computing the new effective_cpus mask. Since offlined
   * CPUs are not removed from subparts_cpus, we have to use cpu_active_mask
   * to mask those out.
ee8dde0cd   Waiman Long   cpuset: Add new v...
1032
1033
1034
1035
1036
1037
1038
   */
  static void compute_effective_cpumask(struct cpumask *new_cpus,
  				      struct cpuset *cs, struct cpuset *parent)
  {
  	if (parent->nr_subparts_cpus) {
  		cpumask_or(new_cpus, parent->effective_cpus,
  			   parent->subparts_cpus);
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
1039
  		cpumask_and(new_cpus, new_cpus, cs->cpus_requested);
4b842da27   Waiman Long   cpuset: Make CPU ...
1040
  		cpumask_and(new_cpus, new_cpus, cpu_active_mask);
ee8dde0cd   Waiman Long   cpuset: Add new v...
1041
  	} else {
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
1042
  		cpumask_and(new_cpus, cs->cpus_requested, parent_cs(cs)->effective_cpus);
ee8dde0cd   Waiman Long   cpuset: Add new v...
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
  	}
  }
  
  /*
   * Commands for update_parent_subparts_cpumask
   */
  enum subparts_cmd {
  	partcmd_enable,		/* Enable partition root	 */
  	partcmd_disable,	/* Disable partition root	 */
  	partcmd_update,		/* Update parent's subparts_cpus */
  };
  
  /**
   * update_parent_subparts_cpumask - update subparts_cpus mask of parent cpuset
   * @cpuset:  The cpuset that requests change in partition root state
   * @cmd:     Partition root state change command
   * @newmask: Optional new cpumask for partcmd_update
   * @tmp:     Temporary addmask and delmask
   * Return:   0, 1 or an error code
   *
   * For partcmd_enable, the cpuset is being transformed from a non-partition
   * root to a partition root. The cpus_allowed mask of the given cpuset will
   * be put into parent's subparts_cpus and taken away from parent's
   * effective_cpus. The function will return 0 if all the CPUs listed in
   * cpus_allowed can be granted or an error code will be returned.
   *
   * For partcmd_disable, the cpuset is being transofrmed from a partition
   * root back to a non-partition root. any CPUs in cpus_allowed that are in
   * parent's subparts_cpus will be taken away from that cpumask and put back
   * into parent's effective_cpus. 0 should always be returned.
   *
   * For partcmd_update, if the optional newmask is specified, the cpu
   * list is to be changed from cpus_allowed to newmask. Otherwise,
3881b8612   Waiman Long   cpuset: Add an er...
1076
1077
1078
1079
1080
1081
   * cpus_allowed is assumed to remain the same. The cpuset should either
   * be a partition root or an invalid partition root. The partition root
   * state may change if newmask is NULL and none of the requested CPUs can
   * be granted by the parent. The function will return 1 if changes to
   * parent's subparts_cpus and effective_cpus happen or 0 otherwise.
   * Error code should only be returned when newmask is non-NULL.
ee8dde0cd   Waiman Long   cpuset: Add new v...
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
   *
   * The partcmd_enable and partcmd_disable commands are used by
   * update_prstate(). The partcmd_update command is used by
   * update_cpumasks_hier() with newmask NULL and update_cpumask() with
   * newmask set.
   *
   * The checking is more strict when enabling partition root than the
   * other two commands.
   *
   * Because of the implicit cpu exclusive nature of a partition root,
   * cpumask changes that violates the cpu exclusivity rule will not be
   * permitted when checked by validate_change(). The validate_change()
   * function will also prevent any changes to the cpu list if it is not
   * a superset of children's cpu lists.
   */
  static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
  					  struct cpumask *newmask,
  					  struct tmpmasks *tmp)
  {
  	struct cpuset *parent = parent_cs(cpuset);
  	int adding;	/* Moving cpus from effective_cpus to subparts_cpus */
  	int deleting;	/* Moving cpus from subparts_cpus to effective_cpus */
3881b8612   Waiman Long   cpuset: Add an er...
1104
  	bool part_error = false;	/* Partition error? */
ee8dde0cd   Waiman Long   cpuset: Add new v...
1105

18695a120   Stephen Dickey   Revert "cgroup/cp...
1106
  	lockdep_assert_held(&cpuset_mutex);
ee8dde0cd   Waiman Long   cpuset: Add new v...
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
  
  	/*
  	 * The parent must be a partition root.
  	 * The new cpumask, if present, or the current cpus_allowed must
  	 * not be empty.
  	 */
  	if (!is_partition_root(parent) ||
  	   (newmask && cpumask_empty(newmask)) ||
  	   (!newmask && cpumask_empty(cpuset->cpus_allowed)))
  		return -EINVAL;
  
  	/*
  	 * Enabling/disabling partition root is not allowed if there are
  	 * online children.
  	 */
  	if ((cmd != partcmd_update) && css_has_online_children(&cpuset->css))
  		return -EBUSY;
  
  	/*
  	 * Enabling partition root is not allowed if not all the CPUs
  	 * can be granted from parent's effective_cpus or at least one
  	 * CPU will be left after that.
  	 */
  	if ((cmd == partcmd_enable) &&
  	   (!cpumask_subset(cpuset->cpus_allowed, parent->effective_cpus) ||
  	     cpumask_equal(cpuset->cpus_allowed, parent->effective_cpus)))
  		return -EINVAL;
  
  	/*
  	 * A cpumask update cannot make parent's effective_cpus become empty.
  	 */
  	adding = deleting = false;
  	if (cmd == partcmd_enable) {
  		cpumask_copy(tmp->addmask, cpuset->cpus_allowed);
  		adding = true;
  	} else if (cmd == partcmd_disable) {
  		deleting = cpumask_and(tmp->delmask, cpuset->cpus_allowed,
  				       parent->subparts_cpus);
  	} else if (newmask) {
  		/*
  		 * partcmd_update with newmask:
  		 *
  		 * delmask = cpus_allowed & ~newmask & parent->subparts_cpus
  		 * addmask = newmask & parent->effective_cpus
  		 *		     & ~parent->subparts_cpus
  		 */
  		cpumask_andnot(tmp->delmask, cpuset->cpus_allowed, newmask);
  		deleting = cpumask_and(tmp->delmask, tmp->delmask,
  				       parent->subparts_cpus);
  
  		cpumask_and(tmp->addmask, newmask, parent->effective_cpus);
  		adding = cpumask_andnot(tmp->addmask, tmp->addmask,
  					parent->subparts_cpus);
  		/*
  		 * Return error if the new effective_cpus could become empty.
  		 */
4b842da27   Waiman Long   cpuset: Make CPU ...
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
  		if (adding &&
  		    cpumask_equal(parent->effective_cpus, tmp->addmask)) {
  			if (!deleting)
  				return -EINVAL;
  			/*
  			 * As some of the CPUs in subparts_cpus might have
  			 * been offlined, we need to compute the real delmask
  			 * to confirm that.
  			 */
  			if (!cpumask_and(tmp->addmask, tmp->delmask,
  					 cpu_active_mask))
  				return -EINVAL;
  			cpumask_copy(tmp->addmask, parent->effective_cpus);
  		}
ee8dde0cd   Waiman Long   cpuset: Add new v...
1177
1178
1179
1180
1181
1182
1183
  	} else {
  		/*
  		 * partcmd_update w/o newmask:
  		 *
  		 * addmask = cpus_allowed & parent->effectiveb_cpus
  		 *
  		 * Note that parent's subparts_cpus may have been
3881b8612   Waiman Long   cpuset: Add an er...
1184
1185
  		 * pre-shrunk in case there is a change in the cpu list.
  		 * So no deletion is needed.
ee8dde0cd   Waiman Long   cpuset: Add new v...
1186
1187
1188
  		 */
  		adding = cpumask_and(tmp->addmask, cpuset->cpus_allowed,
  				     parent->effective_cpus);
3881b8612   Waiman Long   cpuset: Add an er...
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
  		part_error = cpumask_equal(tmp->addmask,
  					   parent->effective_cpus);
  	}
  
  	if (cmd == partcmd_update) {
  		int prev_prs = cpuset->partition_root_state;
  
  		/*
  		 * Check for possible transition between PRS_ENABLED
  		 * and PRS_ERROR.
  		 */
  		switch (cpuset->partition_root_state) {
  		case PRS_ENABLED:
  			if (part_error)
  				cpuset->partition_root_state = PRS_ERROR;
  			break;
  		case PRS_ERROR:
  			if (!part_error)
  				cpuset->partition_root_state = PRS_ENABLED;
  			break;
  		}
  		/*
  		 * Set part_error if previously in invalid state.
  		 */
  		part_error = (prev_prs == PRS_ERROR);
  	}
  
  	if (!part_error && (cpuset->partition_root_state == PRS_ERROR))
  		return 0;	/* Nothing need to be done */
  
  	if (cpuset->partition_root_state == PRS_ERROR) {
  		/*
  		 * Remove all its cpus from parent's subparts_cpus.
  		 */
  		adding = false;
  		deleting = cpumask_and(tmp->delmask, cpuset->cpus_allowed,
  				       parent->subparts_cpus);
ee8dde0cd   Waiman Long   cpuset: Add new v...
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
  	}
  
  	if (!adding && !deleting)
  		return 0;
  
  	/*
  	 * Change the parent's subparts_cpus.
  	 * Newly added CPUs will be removed from effective_cpus and
  	 * newly deleted ones will be added back to effective_cpus.
  	 */
  	spin_lock_irq(&callback_lock);
  	if (adding) {
  		cpumask_or(parent->subparts_cpus,
  			   parent->subparts_cpus, tmp->addmask);
  		cpumask_andnot(parent->effective_cpus,
  			       parent->effective_cpus, tmp->addmask);
  	}
  	if (deleting) {
  		cpumask_andnot(parent->subparts_cpus,
  			       parent->subparts_cpus, tmp->delmask);
4b842da27   Waiman Long   cpuset: Make CPU ...
1246
1247
1248
1249
  		/*
  		 * Some of the CPUs in subparts_cpus might have been offlined.
  		 */
  		cpumask_and(tmp->delmask, tmp->delmask, cpu_active_mask);
ee8dde0cd   Waiman Long   cpuset: Add new v...
1250
1251
1252
1253
1254
1255
1256
1257
1258
  		cpumask_or(parent->effective_cpus,
  			   parent->effective_cpus, tmp->delmask);
  	}
  
  	parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus);
  	spin_unlock_irq(&callback_lock);
  
  	return cmd == partcmd_update;
  }
5c5cc6232   Li Zefan   cpuset: allow to ...
1259
  /*
734d45130   Li Zefan   cpuset: update cs...
1260
   * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree
ee8dde0cd   Waiman Long   cpuset: Add new v...
1261
1262
   * @cs:  the cpuset to consider
   * @tmp: temp variables for calculating effective_cpus & partition setup
734d45130   Li Zefan   cpuset: update cs...
1263
1264
1265
   *
   * When congifured cpumask is changed, the effective cpumasks of this cpuset
   * and all its descendants need to be updated.
5c5cc6232   Li Zefan   cpuset: allow to ...
1266
   *
734d45130   Li Zefan   cpuset: update cs...
1267
   * On legacy hierachy, effective_cpus will be the same with cpu_allowed.
5c5cc6232   Li Zefan   cpuset: allow to ...
1268
1269
1270
   *
   * Called with cpuset_mutex held
   */
ee8dde0cd   Waiman Long   cpuset: Add new v...
1271
  static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
5c5cc6232   Li Zefan   cpuset: allow to ...
1272
1273
  {
  	struct cpuset *cp;
492eb21b9   Tejun Heo   cgroup: make hier...
1274
  	struct cgroup_subsys_state *pos_css;
8b5f1c52d   Li Zefan   cpuset: use effec...
1275
  	bool need_rebuild_sched_domains = false;
5c5cc6232   Li Zefan   cpuset: allow to ...
1276
1277
  
  	rcu_read_lock();
734d45130   Li Zefan   cpuset: update cs...
1278
1279
  	cpuset_for_each_descendant_pre(cp, pos_css, cs) {
  		struct cpuset *parent = parent_cs(cp);
ee8dde0cd   Waiman Long   cpuset: Add new v...
1280
  		compute_effective_cpumask(tmp->new_cpus, cp, parent);
734d45130   Li Zefan   cpuset: update cs...
1281

554b0d1c8   Li Zefan   cpuset: inherit a...
1282
1283
1284
1285
  		/*
  		 * If it becomes empty, inherit the effective mask of the
  		 * parent, which is guaranteed to have some CPUs.
  		 */
4716909cc   Waiman Long   cpuset: Track cpu...
1286
  		if (is_in_v2_mode() && cpumask_empty(tmp->new_cpus)) {
ee8dde0cd   Waiman Long   cpuset: Add new v...
1287
  			cpumask_copy(tmp->new_cpus, parent->effective_cpus);
4716909cc   Waiman Long   cpuset: Track cpu...
1288
1289
1290
1291
1292
1293
1294
1295
1296
  			if (!cp->use_parent_ecpus) {
  				cp->use_parent_ecpus = true;
  				parent->child_ecpus_count++;
  			}
  		} else if (cp->use_parent_ecpus) {
  			cp->use_parent_ecpus = false;
  			WARN_ON_ONCE(!parent->child_ecpus_count);
  			parent->child_ecpus_count--;
  		}
554b0d1c8   Li Zefan   cpuset: inherit a...
1297

ee8dde0cd   Waiman Long   cpuset: Add new v...
1298
1299
1300
1301
  		/*
  		 * Skip the whole subtree if the cpumask remains the same
  		 * and has no partition root state.
  		 */
3881b8612   Waiman Long   cpuset: Add an er...
1302
  		if (!cp->partition_root_state &&
ee8dde0cd   Waiman Long   cpuset: Add new v...
1303
  		    cpumask_equal(tmp->new_cpus, cp->effective_cpus)) {
734d45130   Li Zefan   cpuset: update cs...
1304
1305
  			pos_css = css_rightmost_descendant(pos_css);
  			continue;
5c5cc6232   Li Zefan   cpuset: allow to ...
1306
  		}
734d45130   Li Zefan   cpuset: update cs...
1307

ee8dde0cd   Waiman Long   cpuset: Add new v...
1308
1309
1310
1311
1312
1313
  		/*
  		 * update_parent_subparts_cpumask() should have been called
  		 * for cs already in update_cpumask(). We should also call
  		 * update_tasks_cpumask() again for tasks in the parent
  		 * cpuset if the parent's subparts_cpus changes.
  		 */
3881b8612   Waiman Long   cpuset: Add an er...
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
  		if ((cp != cs) && cp->partition_root_state) {
  			switch (parent->partition_root_state) {
  			case PRS_DISABLED:
  				/*
  				 * If parent is not a partition root or an
  				 * invalid partition root, clear the state
  				 * state and the CS_CPU_EXCLUSIVE flag.
  				 */
  				WARN_ON_ONCE(cp->partition_root_state
  					     != PRS_ERROR);
  				cp->partition_root_state = 0;
  
  				/*
  				 * clear_bit() is an atomic operation and
  				 * readers aren't interested in the state
  				 * of CS_CPU_EXCLUSIVE anyway. So we can
  				 * just update the flag without holding
  				 * the callback_lock.
  				 */
  				clear_bit(CS_CPU_EXCLUSIVE, &cp->flags);
  				break;
  
  			case PRS_ENABLED:
  				if (update_parent_subparts_cpumask(cp, partcmd_update, NULL, tmp))
  					update_tasks_cpumask(parent);
  				break;
  
  			case PRS_ERROR:
  				/*
  				 * When parent is invalid, it has to be too.
  				 */
  				cp->partition_root_state = PRS_ERROR;
  				if (cp->nr_subparts_cpus) {
  					cp->nr_subparts_cpus = 0;
  					cpumask_clear(cp->subparts_cpus);
  				}
  				break;
  			}
ee8dde0cd   Waiman Long   cpuset: Add new v...
1352
  		}
ec903c0c8   Tejun Heo   cgroup: rename cs...
1353
  		if (!css_tryget_online(&cp->css))
5c5cc6232   Li Zefan   cpuset: allow to ...
1354
1355
  			continue;
  		rcu_read_unlock();
8447a0fee   Vladimir Davydov   cpuset: convert c...
1356
  		spin_lock_irq(&callback_lock);
ee8dde0cd   Waiman Long   cpuset: Add new v...
1357
1358
  
  		cpumask_copy(cp->effective_cpus, tmp->new_cpus);
3881b8612   Waiman Long   cpuset: Add an er...
1359
1360
1361
1362
1363
  		if (cp->nr_subparts_cpus &&
  		   (cp->partition_root_state != PRS_ENABLED)) {
  			cp->nr_subparts_cpus = 0;
  			cpumask_clear(cp->subparts_cpus);
  		} else if (cp->nr_subparts_cpus) {
ee8dde0cd   Waiman Long   cpuset: Add new v...
1364
1365
1366
  			/*
  			 * Make sure that effective_cpus & subparts_cpus
  			 * are mutually exclusive.
3881b8612   Waiman Long   cpuset: Add an er...
1367
1368
1369
1370
1371
  			 *
  			 * In the unlikely event that effective_cpus
  			 * becomes empty. we clear cp->nr_subparts_cpus and
  			 * let its child partition roots to compete for
  			 * CPUs again.
ee8dde0cd   Waiman Long   cpuset: Add new v...
1372
1373
1374
  			 */
  			cpumask_andnot(cp->effective_cpus, cp->effective_cpus,
  				       cp->subparts_cpus);
3881b8612   Waiman Long   cpuset: Add an er...
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
  			if (cpumask_empty(cp->effective_cpus)) {
  				cpumask_copy(cp->effective_cpus, tmp->new_cpus);
  				cpumask_clear(cp->subparts_cpus);
  				cp->nr_subparts_cpus = 0;
  			} else if (!cpumask_subset(cp->subparts_cpus,
  						   tmp->new_cpus)) {
  				cpumask_andnot(cp->subparts_cpus,
  					cp->subparts_cpus, tmp->new_cpus);
  				cp->nr_subparts_cpus
  					= cpumask_weight(cp->subparts_cpus);
  			}
ee8dde0cd   Waiman Long   cpuset: Add new v...
1386
  		}
8447a0fee   Vladimir Davydov   cpuset: convert c...
1387
  		spin_unlock_irq(&callback_lock);
734d45130   Li Zefan   cpuset: update cs...
1388

b8d1b8ee9   Waiman Long   cpuset: Allow v2 ...
1389
  		WARN_ON(!is_in_v2_mode() &&
734d45130   Li Zefan   cpuset: update cs...
1390
  			!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
d66393e54   Tejun Heo   cpuset: use css_t...
1391
  		update_tasks_cpumask(cp);
5c5cc6232   Li Zefan   cpuset: allow to ...
1392

8b5f1c52d   Li Zefan   cpuset: use effec...
1393
  		/*
0ccea8feb   Waiman Long   cpuset: Make gene...
1394
1395
1396
1397
  		 * On legacy hierarchy, if the effective cpumask of any non-
  		 * empty cpuset is changed, we need to rebuild sched domains.
  		 * On default hierarchy, the cpuset needs to be a partition
  		 * root as well.
8b5f1c52d   Li Zefan   cpuset: use effec...
1398
1399
  		 */
  		if (!cpumask_empty(cp->cpus_allowed) &&
0ccea8feb   Waiman Long   cpuset: Make gene...
1400
1401
1402
  		    is_sched_load_balance(cp) &&
  		   (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
  		    is_partition_root(cp)))
8b5f1c52d   Li Zefan   cpuset: use effec...
1403
  			need_rebuild_sched_domains = true;
5c5cc6232   Li Zefan   cpuset: allow to ...
1404
1405
1406
1407
  		rcu_read_lock();
  		css_put(&cp->css);
  	}
  	rcu_read_unlock();
8b5f1c52d   Li Zefan   cpuset: use effec...
1408
1409
1410
  
  	if (need_rebuild_sched_domains)
  		rebuild_sched_domains_locked();
5c5cc6232   Li Zefan   cpuset: allow to ...
1411
  }
0b2f630a2   Miao Xie   cpusets: restruct...
1412
  /**
4716909cc   Waiman Long   cpuset: Track cpu...
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
   * update_sibling_cpumasks - Update siblings cpumasks
   * @parent:  Parent cpuset
   * @cs:      Current cpuset
   * @tmp:     Temp variables
   */
  static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
  				    struct tmpmasks *tmp)
  {
  	struct cpuset *sibling;
  	struct cgroup_subsys_state *pos_css;
  
  	/*
  	 * Check all its siblings and call update_cpumasks_hier()
  	 * if their use_parent_ecpus flag is set in order for them
  	 * to use the right effective_cpus value.
  	 */
  	rcu_read_lock();
  	cpuset_for_each_child(sibling, pos_css, parent) {
  		if (sibling == cs)
  			continue;
  		if (!sibling->use_parent_ecpus)
  			continue;
  
  		update_cpumasks_hier(sibling, tmp);
  	}
  	rcu_read_unlock();
  }
  
  /**
58f4790b7   Cliff Wickman   cpusets: update_c...
1442
1443
   * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
   * @cs: the cpuset to consider
fc34ac1dc   Fabian Frederick   kernel/cpuset.c: ...
1444
   * @trialcs: trial cpuset
58f4790b7   Cliff Wickman   cpusets: update_c...
1445
1446
   * @buf: buffer of cpu numbers written to this cpuset
   */
645fcc9d2   Li Zefan   cpuset: don't all...
1447
1448
  static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
  			  const char *buf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1449
  {
58f4790b7   Cliff Wickman   cpusets: update_c...
1450
  	int retval;
ee8dde0cd   Waiman Long   cpuset: Add new v...
1451
  	struct tmpmasks tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1452

5f054e31c   Rusty Russell   documentation: re...
1453
  	/* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
1454
1455
  	if (cs == &top_cpuset)
  		return -EACCES;
6f7f02e78   David Rientjes   cpusets: allow em...
1456
  	/*
54a1b2c15   Roman Kiryanov   ANDROID: kernel: ...
1457
  	 * An empty cpus_requested is ok only if the cpuset has no tasks.
020958b62   Paul Jackson   cpusets: decrusti...
1458
1459
1460
  	 * Since cpulist_parse() fails on an empty mask, we special case
  	 * that parsing.  The validate_change() call ensures that cpusets
  	 * with tasks have cpus.
6f7f02e78   David Rientjes   cpusets: allow em...
1461
  	 */
020958b62   Paul Jackson   cpusets: decrusti...
1462
  	if (!*buf) {
54a1b2c15   Roman Kiryanov   ANDROID: kernel: ...
1463
  		cpumask_clear(trialcs->cpus_requested);
6f7f02e78   David Rientjes   cpusets: allow em...
1464
  	} else {
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
1465
  		retval = cpulist_parse(buf, trialcs->cpus_requested);
6f7f02e78   David Rientjes   cpusets: allow em...
1466
1467
  		if (retval < 0)
  			return retval;
54a1b2c15   Roman Kiryanov   ANDROID: kernel: ...
1468
  	}
37340746a   Lai Jiangshan   cpusets: fix bug ...
1469

54a1b2c15   Roman Kiryanov   ANDROID: kernel: ...
1470
1471
  	if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask))
  		return -EINVAL;
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
1472

54a1b2c15   Roman Kiryanov   ANDROID: kernel: ...
1473
  	cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask);
029190c51   Paul Jackson   cpuset sched_load...
1474

8707d8b8c   Paul Menage   Fix cpusets updat...
1475
  	/* Nothing to do if the cpus didn't change */
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
1476
  	if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested))
8707d8b8c   Paul Menage   Fix cpusets updat...
1477
  		return 0;
58f4790b7   Cliff Wickman   cpusets: update_c...
1478

a73456f37   Li Zefan   cpuset: re-struct...
1479
1480
1481
  	retval = validate_change(cs, trialcs);
  	if (retval < 0)
  		return retval;
ee8dde0cd   Waiman Long   cpuset: Add new v...
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
  #ifdef CONFIG_CPUMASK_OFFSTACK
  	/*
  	 * Use the cpumasks in trialcs for tmpmasks when they are pointers
  	 * to allocated cpumasks.
  	 */
  	tmp.addmask  = trialcs->subparts_cpus;
  	tmp.delmask  = trialcs->effective_cpus;
  	tmp.new_cpus = trialcs->cpus_allowed;
  #endif
  
  	if (cs->partition_root_state) {
  		/* Cpumask of a partition root cannot be empty */
  		if (cpumask_empty(trialcs->cpus_allowed))
  			return -EINVAL;
  		if (update_parent_subparts_cpumask(cs, partcmd_update,
  					trialcs->cpus_allowed, &tmp) < 0)
  			return -EINVAL;
  	}
8447a0fee   Vladimir Davydov   cpuset: convert c...
1500
  	spin_lock_irq(&callback_lock);
300ed6cbb   Li Zefan   cpuset: convert c...
1501
  	cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
1502
  	cpumask_copy(cs->cpus_requested, trialcs->cpus_requested);
ee8dde0cd   Waiman Long   cpuset: Add new v...
1503
1504
1505
1506
1507
1508
1509
1510
1511
  
  	/*
  	 * Make sure that subparts_cpus is a subset of cpus_allowed.
  	 */
  	if (cs->nr_subparts_cpus) {
  		cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus,
  			       cs->cpus_allowed);
  		cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus);
  	}
8447a0fee   Vladimir Davydov   cpuset: convert c...
1512
  	spin_unlock_irq(&callback_lock);
029190c51   Paul Jackson   cpuset sched_load...
1513

ee8dde0cd   Waiman Long   cpuset: Add new v...
1514
  	update_cpumasks_hier(cs, &tmp);
4716909cc   Waiman Long   cpuset: Track cpu...
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
  
  	if (cs->partition_root_state) {
  		struct cpuset *parent = parent_cs(cs);
  
  		/*
  		 * For partition root, update the cpumasks of sibling
  		 * cpusets if they use parent's effective_cpus.
  		 */
  		if (parent->child_ecpus_count)
  			update_sibling_cpumasks(parent, cs, &tmp);
  	}
85d7b9498   Dinakar Guniguntala   [PATCH] Dynamic s...
1526
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1527
  }
053199edf   Paul Jackson   [PATCH] cpusets: ...
1528
  /*
e93ad19d0   Tejun Heo   cpuset: make mm m...
1529
1530
1531
1532
1533
   * Migrate memory region from one set of nodes to another.  This is
   * performed asynchronously as it can be called from process migration path
   * holding locks involved in process management.  All mm migrations are
   * performed in the queued order and can be waited for by flushing
   * cpuset_migrate_mm_wq.
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
1534
   */
e93ad19d0   Tejun Heo   cpuset: make mm m...
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
  struct cpuset_migrate_mm_work {
  	struct work_struct	work;
  	struct mm_struct	*mm;
  	nodemask_t		from;
  	nodemask_t		to;
  };
  
  static void cpuset_migrate_mm_workfn(struct work_struct *work)
  {
  	struct cpuset_migrate_mm_work *mwork =
  		container_of(work, struct cpuset_migrate_mm_work, work);
  
  	/* on a wq worker, no need to worry about %current's mems_allowed */
  	do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL);
  	mmput(mwork->mm);
  	kfree(mwork);
  }
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
1552
1553
1554
  static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
  							const nodemask_t *to)
  {
e93ad19d0   Tejun Heo   cpuset: make mm m...
1555
  	struct cpuset_migrate_mm_work *mwork;
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
1556

e93ad19d0   Tejun Heo   cpuset: make mm m...
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
  	mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
  	if (mwork) {
  		mwork->mm = mm;
  		mwork->from = *from;
  		mwork->to = *to;
  		INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn);
  		queue_work(cpuset_migrate_mm_wq, &mwork->work);
  	} else {
  		mmput(mm);
  	}
  }
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
1568

5cf1cacb4   Tejun Heo   cgroup, cpuset: r...
1569
  static void cpuset_post_attach(void)
e93ad19d0   Tejun Heo   cpuset: make mm m...
1570
1571
  {
  	flush_workqueue(cpuset_migrate_mm_wq);
e4e364e86   Paul Jackson   [PATCH] cpuset: m...
1572
  }
3b6766fe6   Li Zefan   cpuset: rewrite u...
1573
  /*
58568d2a8   Miao Xie   cpuset,mm: update...
1574
1575
1576
1577
   * cpuset_change_task_nodemask - change task's mems_allowed and mempolicy
   * @tsk: the task to change
   * @newmems: new nodes that the task will be set
   *
5f155f27c   Vlastimil Babka   mm, cpuset: alway...
1578
1579
1580
1581
   * We use the mems_allowed_seq seqlock to safely update both tsk->mems_allowed
   * and rebind an eventual tasks' mempolicy. If the task is allocating in
   * parallel, it might temporarily see an empty intersection, which results in
   * a seqlock check and retry before OOM or allocation failure.
58568d2a8   Miao Xie   cpuset,mm: update...
1582
1583
1584
1585
   */
  static void cpuset_change_task_nodemask(struct task_struct *tsk,
  					nodemask_t *newmems)
  {
c0ff7453b   Miao Xie   cpuset,mm: fix no...
1586
  	task_lock(tsk);
c0ff7453b   Miao Xie   cpuset,mm: fix no...
1587

5f155f27c   Vlastimil Babka   mm, cpuset: alway...
1588
1589
  	local_irq_disable();
  	write_seqcount_begin(&tsk->mems_allowed_seq);
c0ff7453b   Miao Xie   cpuset,mm: fix no...
1590

cc9a6c877   Mel Gorman   cpuset: mm: reduc...
1591
  	nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
213980c0f   Vlastimil Babka   mm, mempolicy: si...
1592
  	mpol_rebind_task(tsk, newmems);
58568d2a8   Miao Xie   cpuset,mm: update...
1593
  	tsk->mems_allowed = *newmems;
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
1594

5f155f27c   Vlastimil Babka   mm, cpuset: alway...
1595
1596
  	write_seqcount_end(&tsk->mems_allowed_seq);
  	local_irq_enable();
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
1597

c0ff7453b   Miao Xie   cpuset,mm: fix no...
1598
  	task_unlock(tsk);
58568d2a8   Miao Xie   cpuset,mm: update...
1599
  }
8793d854e   Paul Menage   Task Control Grou...
1600
  static void *cpuset_being_rebound;
0b2f630a2   Miao Xie   cpusets: restruct...
1601
1602
1603
  /**
   * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
   * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
0b2f630a2   Miao Xie   cpusets: restruct...
1604
   *
d66393e54   Tejun Heo   cpuset: use css_t...
1605
1606
1607
   * Iterate through each task of @cs updating its mems_allowed to the
   * effective cpuset's.  As this function is called with cpuset_mutex held,
   * cpuset membership stays stable.
0b2f630a2   Miao Xie   cpusets: restruct...
1608
   */
d66393e54   Tejun Heo   cpuset: use css_t...
1609
  static void update_tasks_nodemask(struct cpuset *cs)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1610
  {
33ad801df   Li Zefan   cpuset: record ol...
1611
  	static nodemask_t newmems;	/* protected by cpuset_mutex */
d66393e54   Tejun Heo   cpuset: use css_t...
1612
1613
  	struct css_task_iter it;
  	struct task_struct *task;
59dac16fb   Paul Jackson   [PATCH] cpuset: u...
1614

846a16bf0   Lee Schermerhorn   mempolicy: rename...
1615
  	cpuset_being_rebound = cs;		/* causes mpol_dup() rebind */
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1616

ae1c80238   Li Zefan   cpuset: apply cs-...
1617
  	guarantee_online_mems(cs, &newmems);
33ad801df   Li Zefan   cpuset: record ol...
1618

4225399a6   Paul Jackson   [PATCH] cpuset: r...
1619
  	/*
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
1620
  	 * The mpol_rebind_mm() call takes mmap_lock, which we couldn't
3b6766fe6   Li Zefan   cpuset: rewrite u...
1621
1622
1623
  	 * take while holding tasklist_lock.  Forks can happen - the
  	 * mpol_dup() cpuset_being_rebound check will catch such forks,
  	 * and rebind their vma mempolicies too.  Because we still hold
5d21cc2db   Tejun Heo   cpuset: replace c...
1624
  	 * the global cpuset_mutex, we know that no other rebind effort
3b6766fe6   Li Zefan   cpuset: rewrite u...
1625
  	 * will be contending for the global variable cpuset_being_rebound.
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1626
  	 * It's ok if we rebind the same mm twice; mpol_rebind_mm()
04c19fa6f   Paul Jackson   [PATCH] cpuset: m...
1627
  	 * is idempotent.  Also migrate pages in each mm to new nodes.
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1628
  	 */
bc2fb7ed0   Tejun Heo   cgroup: add @flag...
1629
  	css_task_iter_start(&cs->css, 0, &it);
d66393e54   Tejun Heo   cpuset: use css_t...
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
  	while ((task = css_task_iter_next(&it))) {
  		struct mm_struct *mm;
  		bool migrate;
  
  		cpuset_change_task_nodemask(task, &newmems);
  
  		mm = get_task_mm(task);
  		if (!mm)
  			continue;
  
  		migrate = is_memory_migrate(cs);
  
  		mpol_rebind_mm(mm, &cs->mems_allowed);
  		if (migrate)
  			cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems);
e93ad19d0   Tejun Heo   cpuset: make mm m...
1645
1646
  		else
  			mmput(mm);
d66393e54   Tejun Heo   cpuset: use css_t...
1647
1648
  	}
  	css_task_iter_end(&it);
4225399a6   Paul Jackson   [PATCH] cpuset: r...
1649

33ad801df   Li Zefan   cpuset: record ol...
1650
1651
1652
1653
1654
  	/*
  	 * All the tasks' nodemasks have been updated, update
  	 * cs->old_mems_allowed.
  	 */
  	cs->old_mems_allowed = newmems;
2df167a30   Paul Menage   cgroups: update c...
1655
  	/* We're done rebinding vmas to this cpuset's new mems_allowed. */
8793d854e   Paul Menage   Task Control Grou...
1656
  	cpuset_being_rebound = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1657
  }
0b2f630a2   Miao Xie   cpusets: restruct...
1658
  /*
734d45130   Li Zefan   cpuset: update cs...
1659
1660
1661
   * update_nodemasks_hier - Update effective nodemasks and tasks in the subtree
   * @cs: the cpuset to consider
   * @new_mems: a temp variable for calculating new effective_mems
5c5cc6232   Li Zefan   cpuset: allow to ...
1662
   *
734d45130   Li Zefan   cpuset: update cs...
1663
1664
   * When configured nodemask is changed, the effective nodemasks of this cpuset
   * and all its descendants need to be updated.
5c5cc6232   Li Zefan   cpuset: allow to ...
1665
   *
734d45130   Li Zefan   cpuset: update cs...
1666
   * On legacy hiearchy, effective_mems will be the same with mems_allowed.
5c5cc6232   Li Zefan   cpuset: allow to ...
1667
1668
1669
   *
   * Called with cpuset_mutex held
   */
734d45130   Li Zefan   cpuset: update cs...
1670
  static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
5c5cc6232   Li Zefan   cpuset: allow to ...
1671
1672
  {
  	struct cpuset *cp;
492eb21b9   Tejun Heo   cgroup: make hier...
1673
  	struct cgroup_subsys_state *pos_css;
5c5cc6232   Li Zefan   cpuset: allow to ...
1674
1675
  
  	rcu_read_lock();
734d45130   Li Zefan   cpuset: update cs...
1676
1677
1678
1679
  	cpuset_for_each_descendant_pre(cp, pos_css, cs) {
  		struct cpuset *parent = parent_cs(cp);
  
  		nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems);
554b0d1c8   Li Zefan   cpuset: inherit a...
1680
1681
1682
1683
  		/*
  		 * If it becomes empty, inherit the effective mask of the
  		 * parent, which is guaranteed to have some MEMs.
  		 */
b8d1b8ee9   Waiman Long   cpuset: Allow v2 ...
1684
  		if (is_in_v2_mode() && nodes_empty(*new_mems))
554b0d1c8   Li Zefan   cpuset: inherit a...
1685
  			*new_mems = parent->effective_mems;
734d45130   Li Zefan   cpuset: update cs...
1686
1687
1688
1689
  		/* Skip the whole subtree if the nodemask remains the same. */
  		if (nodes_equal(*new_mems, cp->effective_mems)) {
  			pos_css = css_rightmost_descendant(pos_css);
  			continue;
5c5cc6232   Li Zefan   cpuset: allow to ...
1690
  		}
734d45130   Li Zefan   cpuset: update cs...
1691

ec903c0c8   Tejun Heo   cgroup: rename cs...
1692
  		if (!css_tryget_online(&cp->css))
5c5cc6232   Li Zefan   cpuset: allow to ...
1693
1694
  			continue;
  		rcu_read_unlock();
8447a0fee   Vladimir Davydov   cpuset: convert c...
1695
  		spin_lock_irq(&callback_lock);
734d45130   Li Zefan   cpuset: update cs...
1696
  		cp->effective_mems = *new_mems;
8447a0fee   Vladimir Davydov   cpuset: convert c...
1697
  		spin_unlock_irq(&callback_lock);
734d45130   Li Zefan   cpuset: update cs...
1698

b8d1b8ee9   Waiman Long   cpuset: Allow v2 ...
1699
  		WARN_ON(!is_in_v2_mode() &&
a13812683   Li Zefan   cpuset: fix the W...
1700
  			!nodes_equal(cp->mems_allowed, cp->effective_mems));
734d45130   Li Zefan   cpuset: update cs...
1701

d66393e54   Tejun Heo   cpuset: use css_t...
1702
  		update_tasks_nodemask(cp);
5c5cc6232   Li Zefan   cpuset: allow to ...
1703
1704
1705
1706
1707
1708
1709
1710
  
  		rcu_read_lock();
  		css_put(&cp->css);
  	}
  	rcu_read_unlock();
  }
  
  /*
0b2f630a2   Miao Xie   cpusets: restruct...
1711
1712
   * Handle user request to change the 'mems' memory placement
   * of a cpuset.  Needs to validate the request, update the
58568d2a8   Miao Xie   cpuset,mm: update...
1713
1714
1715
1716
   * cpusets mems_allowed, and for each task in the cpuset,
   * update mems_allowed and rebind task's mempolicy and any vma
   * mempolicies and if the cpuset is marked 'memory_migrate',
   * migrate the tasks pages to the new memory.
0b2f630a2   Miao Xie   cpusets: restruct...
1717
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
1718
   * Call with cpuset_mutex held. May take callback_lock during call.
0b2f630a2   Miao Xie   cpusets: restruct...
1719
   * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
1720
   * lock each such tasks mm->mmap_lock, scan its vma's and rebind
0b2f630a2   Miao Xie   cpusets: restruct...
1721
1722
   * their mempolicies to the cpusets new mems_allowed.
   */
645fcc9d2   Li Zefan   cpuset: don't all...
1723
1724
  static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
  			   const char *buf)
0b2f630a2   Miao Xie   cpusets: restruct...
1725
  {
0b2f630a2   Miao Xie   cpusets: restruct...
1726
1727
1728
  	int retval;
  
  	/*
38d7bee9d   Lai Jiangshan   cpuset: use N_MEM...
1729
  	 * top_cpuset.mems_allowed tracks node_stats[N_MEMORY];
0b2f630a2   Miao Xie   cpusets: restruct...
1730
1731
  	 * it's read-only
  	 */
53feb2976   Miao Xie   cpuset: alloc nod...
1732
1733
1734
1735
  	if (cs == &top_cpuset) {
  		retval = -EACCES;
  		goto done;
  	}
0b2f630a2   Miao Xie   cpusets: restruct...
1736

0b2f630a2   Miao Xie   cpusets: restruct...
1737
1738
1739
1740
1741
1742
1743
  	/*
  	 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
  	 * Since nodelist_parse() fails on an empty mask, we special case
  	 * that parsing.  The validate_change() call ensures that cpusets
  	 * with tasks have memory.
  	 */
  	if (!*buf) {
645fcc9d2   Li Zefan   cpuset: don't all...
1744
  		nodes_clear(trialcs->mems_allowed);
0b2f630a2   Miao Xie   cpusets: restruct...
1745
  	} else {
645fcc9d2   Li Zefan   cpuset: don't all...
1746
  		retval = nodelist_parse(buf, trialcs->mems_allowed);
0b2f630a2   Miao Xie   cpusets: restruct...
1747
1748
  		if (retval < 0)
  			goto done;
645fcc9d2   Li Zefan   cpuset: don't all...
1749
  		if (!nodes_subset(trialcs->mems_allowed,
5d8ba82c3   Li Zefan   cpuset: allow wri...
1750
1751
  				  top_cpuset.mems_allowed)) {
  			retval = -EINVAL;
53feb2976   Miao Xie   cpuset: alloc nod...
1752
1753
  			goto done;
  		}
0b2f630a2   Miao Xie   cpusets: restruct...
1754
  	}
33ad801df   Li Zefan   cpuset: record ol...
1755
1756
  
  	if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) {
0b2f630a2   Miao Xie   cpusets: restruct...
1757
1758
1759
  		retval = 0;		/* Too easy - nothing to do */
  		goto done;
  	}
645fcc9d2   Li Zefan   cpuset: don't all...
1760
  	retval = validate_change(cs, trialcs);
0b2f630a2   Miao Xie   cpusets: restruct...
1761
1762
  	if (retval < 0)
  		goto done;
8447a0fee   Vladimir Davydov   cpuset: convert c...
1763
  	spin_lock_irq(&callback_lock);
645fcc9d2   Li Zefan   cpuset: don't all...
1764
  	cs->mems_allowed = trialcs->mems_allowed;
8447a0fee   Vladimir Davydov   cpuset: convert c...
1765
  	spin_unlock_irq(&callback_lock);
0b2f630a2   Miao Xie   cpusets: restruct...
1766

734d45130   Li Zefan   cpuset: update cs...
1767
  	/* use trialcs->mems_allowed as a temp variable */
24ee3cf89   Alban Crequy   cpuset: use trial...
1768
  	update_nodemasks_hier(cs, &trialcs->mems_allowed);
0b2f630a2   Miao Xie   cpusets: restruct...
1769
1770
1771
  done:
  	return retval;
  }
77ef80c65   Yaowei Bai   kernel/cpuset: cu...
1772
  bool current_cpuset_is_being_rebound(void)
8793d854e   Paul Menage   Task Control Grou...
1773
  {
77ef80c65   Yaowei Bai   kernel/cpuset: cu...
1774
  	bool ret;
391acf970   Gu Zheng   cpuset,mempolicy:...
1775
1776
1777
1778
1779
1780
  
  	rcu_read_lock();
  	ret = task_cs(current) == cpuset_being_rebound;
  	rcu_read_unlock();
  
  	return ret;
8793d854e   Paul Menage   Task Control Grou...
1781
  }
5be7a4792   Paul Menage   Fix cpuset sched_...
1782
  static int update_relax_domain_level(struct cpuset *cs, s64 val)
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1783
  {
db7f47cf4   Paul Menage   cpusets: allow cp...
1784
  #ifdef CONFIG_SMP
60495e776   Peter Zijlstra   sched: Dynamic sc...
1785
  	if (val < -1 || val >= sched_domain_level_max)
30e0e1781   Li Zefan   cpuset: limit the...
1786
  		return -EINVAL;
db7f47cf4   Paul Menage   cpusets: allow cp...
1787
  #endif
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1788
1789
1790
  
  	if (val != cs->relax_domain_level) {
  		cs->relax_domain_level = val;
300ed6cbb   Li Zefan   cpuset: convert c...
1791
1792
  		if (!cpumask_empty(cs->cpus_allowed) &&
  		    is_sched_load_balance(cs))
699140ba8   Tejun Heo   cpuset: drop asyn...
1793
  			rebuild_sched_domains_locked();
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
1794
1795
1796
1797
  	}
  
  	return 0;
  }
72ec70299   Tejun Heo   cgroup: make task...
1798
  /**
950592f7b   Miao Xie   cpusets: update t...
1799
1800
   * update_tasks_flags - update the spread flags of tasks in the cpuset.
   * @cs: the cpuset in which each task's spread flags needs to be changed
950592f7b   Miao Xie   cpusets: update t...
1801
   *
d66393e54   Tejun Heo   cpuset: use css_t...
1802
1803
1804
   * Iterate through each task of @cs updating its spread flags.  As this
   * function is called with cpuset_mutex held, cpuset membership stays
   * stable.
950592f7b   Miao Xie   cpusets: update t...
1805
   */
d66393e54   Tejun Heo   cpuset: use css_t...
1806
  static void update_tasks_flags(struct cpuset *cs)
950592f7b   Miao Xie   cpusets: update t...
1807
  {
d66393e54   Tejun Heo   cpuset: use css_t...
1808
1809
  	struct css_task_iter it;
  	struct task_struct *task;
bc2fb7ed0   Tejun Heo   cgroup: add @flag...
1810
  	css_task_iter_start(&cs->css, 0, &it);
d66393e54   Tejun Heo   cpuset: use css_t...
1811
1812
1813
  	while ((task = css_task_iter_next(&it)))
  		cpuset_update_task_spread_flag(cs, task);
  	css_task_iter_end(&it);
950592f7b   Miao Xie   cpusets: update t...
1814
1815
1816
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1817
   * update_flag - read a 0 or a 1 in a file and update associated flag
786083667   Paul Menage   Cpuset hardwall f...
1818
1819
1820
   * bit:		the bit to update (see cpuset_flagbits_t)
   * cs:		the cpuset to update
   * turning_on: 	whether the flag is being set or cleared
053199edf   Paul Jackson   [PATCH] cpusets: ...
1821
   *
5d21cc2db   Tejun Heo   cpuset: replace c...
1822
   * Call with cpuset_mutex held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1823
   */
700fe1ab9   Paul Menage   CGroup API files:...
1824
1825
  static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
  		       int turning_on)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1826
  {
645fcc9d2   Li Zefan   cpuset: don't all...
1827
  	struct cpuset *trialcs;
40b6a7623   Rakib Mullick   cpuset.c: remove ...
1828
  	int balance_flag_changed;
950592f7b   Miao Xie   cpusets: update t...
1829
  	int spread_flag_changed;
950592f7b   Miao Xie   cpusets: update t...
1830
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1831

645fcc9d2   Li Zefan   cpuset: don't all...
1832
1833
1834
  	trialcs = alloc_trial_cpuset(cs);
  	if (!trialcs)
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1835
  	if (turning_on)
645fcc9d2   Li Zefan   cpuset: don't all...
1836
  		set_bit(bit, &trialcs->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1837
  	else
645fcc9d2   Li Zefan   cpuset: don't all...
1838
  		clear_bit(bit, &trialcs->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1839

645fcc9d2   Li Zefan   cpuset: don't all...
1840
  	err = validate_change(cs, trialcs);
85d7b9498   Dinakar Guniguntala   [PATCH] Dynamic s...
1841
  	if (err < 0)
645fcc9d2   Li Zefan   cpuset: don't all...
1842
  		goto out;
029190c51   Paul Jackson   cpuset sched_load...
1843

029190c51   Paul Jackson   cpuset sched_load...
1844
  	balance_flag_changed = (is_sched_load_balance(cs) !=
645fcc9d2   Li Zefan   cpuset: don't all...
1845
  				is_sched_load_balance(trialcs));
029190c51   Paul Jackson   cpuset sched_load...
1846

950592f7b   Miao Xie   cpusets: update t...
1847
1848
  	spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
  			|| (is_spread_page(cs) != is_spread_page(trialcs)));
8447a0fee   Vladimir Davydov   cpuset: convert c...
1849
  	spin_lock_irq(&callback_lock);
645fcc9d2   Li Zefan   cpuset: don't all...
1850
  	cs->flags = trialcs->flags;
8447a0fee   Vladimir Davydov   cpuset: convert c...
1851
  	spin_unlock_irq(&callback_lock);
85d7b9498   Dinakar Guniguntala   [PATCH] Dynamic s...
1852

300ed6cbb   Li Zefan   cpuset: convert c...
1853
  	if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
699140ba8   Tejun Heo   cpuset: drop asyn...
1854
  		rebuild_sched_domains_locked();
029190c51   Paul Jackson   cpuset sched_load...
1855

950592f7b   Miao Xie   cpusets: update t...
1856
  	if (spread_flag_changed)
d66393e54   Tejun Heo   cpuset: use css_t...
1857
  		update_tasks_flags(cs);
645fcc9d2   Li Zefan   cpuset: don't all...
1858
  out:
bf92370c0   Waiman Long   cpuset: Simply al...
1859
  	free_cpuset(trialcs);
645fcc9d2   Li Zefan   cpuset: don't all...
1860
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1861
  }
053199edf   Paul Jackson   [PATCH] cpusets: ...
1862
  /*
ee8dde0cd   Waiman Long   cpuset: Add new v...
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
   * update_prstate - update partititon_root_state
   * cs:	the cpuset to update
   * val: 0 - disabled, 1 - enabled
   *
   * Call with cpuset_mutex held.
   */
  static int update_prstate(struct cpuset *cs, int val)
  {
  	int err;
  	struct cpuset *parent = parent_cs(cs);
  	struct tmpmasks tmp;
  
  	if ((val != 0) && (val != 1))
  		return -EINVAL;
  	if (val == cs->partition_root_state)
  		return 0;
  
  	/*
3881b8612   Waiman Long   cpuset: Add an er...
1881
  	 * Cannot force a partial or invalid partition root to a full
ee8dde0cd   Waiman Long   cpuset: Add new v...
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
  	 * partition root.
  	 */
  	if (val && cs->partition_root_state)
  		return -EINVAL;
  
  	if (alloc_cpumasks(NULL, &tmp))
  		return -ENOMEM;
  
  	err = -EINVAL;
  	if (!cs->partition_root_state) {
  		/*
  		 * Turning on partition root requires setting the
  		 * CS_CPU_EXCLUSIVE bit implicitly as well and cpus_allowed
  		 * cannot be NULL.
  		 */
  		if (cpumask_empty(cs->cpus_allowed))
  			goto out;
  
  		err = update_flag(CS_CPU_EXCLUSIVE, cs, 1);
  		if (err)
  			goto out;
  
  		err = update_parent_subparts_cpumask(cs, partcmd_enable,
  						     NULL, &tmp);
  		if (err) {
  			update_flag(CS_CPU_EXCLUSIVE, cs, 0);
  			goto out;
  		}
  		cs->partition_root_state = PRS_ENABLED;
  	} else {
3881b8612   Waiman Long   cpuset: Add an er...
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
  		/*
  		 * Turning off partition root will clear the
  		 * CS_CPU_EXCLUSIVE bit.
  		 */
  		if (cs->partition_root_state == PRS_ERROR) {
  			cs->partition_root_state = 0;
  			update_flag(CS_CPU_EXCLUSIVE, cs, 0);
  			err = 0;
  			goto out;
  		}
ee8dde0cd   Waiman Long   cpuset: Add new v...
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
  		err = update_parent_subparts_cpumask(cs, partcmd_disable,
  						     NULL, &tmp);
  		if (err)
  			goto out;
  
  		cs->partition_root_state = 0;
  
  		/* Turning off CS_CPU_EXCLUSIVE will not return error */
  		update_flag(CS_CPU_EXCLUSIVE, cs, 0);
  	}
  
  	/*
  	 * Update cpumask of parent's tasks except when it is the top
  	 * cpuset as some system daemons cannot be mapped to other CPUs.
  	 */
  	if (parent != &top_cpuset)
  		update_tasks_cpumask(parent);
4716909cc   Waiman Long   cpuset: Track cpu...
1939
1940
  	if (parent->child_ecpus_count)
  		update_sibling_cpumasks(parent, cs, &tmp);
ee8dde0cd   Waiman Long   cpuset: Add new v...
1941
1942
1943
  	rebuild_sched_domains_locked();
  out:
  	free_cpumasks(NULL, &tmp);
645fcc9d2   Li Zefan   cpuset: don't all...
1944
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1945
  }
053199edf   Paul Jackson   [PATCH] cpusets: ...
1946
  /*
80f7228b5   Adrian Bunk   typo fixes: occur...
1947
   * Frequency meter - How fast is some event occurring?
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
   *
   * These routines manage a digitally filtered, constant time based,
   * event frequency meter.  There are four routines:
   *   fmeter_init() - initialize a frequency meter.
   *   fmeter_markevent() - called each time the event happens.
   *   fmeter_getrate() - returns the recent rate of such events.
   *   fmeter_update() - internal routine used to update fmeter.
   *
   * A common data structure is passed to each of these routines,
   * which is used to keep track of the state required to manage the
   * frequency meter and its digital filter.
   *
   * The filter works on the number of events marked per unit time.
   * The filter is single-pole low-pass recursive (IIR).  The time unit
   * is 1 second.  Arithmetic is done using 32-bit integers scaled to
   * simulate 3 decimal digits of precision (multiplied by 1000).
   *
   * With an FM_COEF of 933, and a time base of 1 second, the filter
   * has a half-life of 10 seconds, meaning that if the events quit
   * happening, then the rate returned from the fmeter_getrate()
   * will be cut in half each 10 seconds, until it converges to zero.
   *
   * It is not worth doing a real infinitely recursive filter.  If more
   * than FM_MAXTICKS ticks have elapsed since the last filter event,
   * just compute FM_MAXTICKS ticks worth, by which point the level
   * will be stable.
   *
   * Limit the count of unprocessed events to FM_MAXCNT, so as to avoid
   * arithmetic overflow in the fmeter_update() routine.
   *
   * Given the simple 32 bit integer arithmetic used, this meter works
   * best for reporting rates between one per millisecond (msec) and
   * one per 32 (approx) seconds.  At constant rates faster than one
   * per msec it maxes out at values just under 1,000,000.  At constant
   * rates between one per msec, and one per second it will stabilize
   * to a value N*1000, where N is the rate of events per second.
   * At constant rates between one per second and one per 32 seconds,
   * it will be choppy, moving up on the seconds that have an event,
   * and then decaying until the next event.  At rates slower than
   * about one in 32 seconds, it decays all the way back to zero between
   * each event.
   */
  
  #define FM_COEF 933		/* coefficient for half-life of 10 secs */
d2b436580   Arnd Bergmann   cpuset: Replace a...
1992
  #define FM_MAXTICKS ((u32)99)   /* useless computing more ticks than this */
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
  #define FM_MAXCNT 1000000	/* limit cnt to avoid overflow */
  #define FM_SCALE 1000		/* faux fixed point scale */
  
  /* Initialize a frequency meter */
  static void fmeter_init(struct fmeter *fmp)
  {
  	fmp->cnt = 0;
  	fmp->val = 0;
  	fmp->time = 0;
  	spin_lock_init(&fmp->lock);
  }
  
  /* Internal meter update - process cnt events and update value */
  static void fmeter_update(struct fmeter *fmp)
  {
d2b436580   Arnd Bergmann   cpuset: Replace a...
2008
2009
2010
2011
2012
  	time64_t now;
  	u32 ticks;
  
  	now = ktime_get_seconds();
  	ticks = now - fmp->time;
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
  
  	if (ticks == 0)
  		return;
  
  	ticks = min(FM_MAXTICKS, ticks);
  	while (ticks-- > 0)
  		fmp->val = (FM_COEF * fmp->val) / FM_SCALE;
  	fmp->time = now;
  
  	fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE;
  	fmp->cnt = 0;
  }
  
  /* Process any previous ticks, then bump cnt by one (times scale). */
  static void fmeter_markevent(struct fmeter *fmp)
  {
  	spin_lock(&fmp->lock);
  	fmeter_update(fmp);
  	fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE);
  	spin_unlock(&fmp->lock);
  }
  
  /* Process any previous ticks, then return current value. */
  static int fmeter_getrate(struct fmeter *fmp)
  {
  	int val;
  
  	spin_lock(&fmp->lock);
  	fmeter_update(fmp);
  	val = fmp->val;
  	spin_unlock(&fmp->lock);
  	return val;
  }
57fce0a68   Tejun Heo   cpuset: don't use...
2046
  static struct cpuset *cpuset_attach_old_cs;
5d21cc2db   Tejun Heo   cpuset: replace c...
2047
  /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
2048
  static int cpuset_can_attach(struct cgroup_taskset *tset)
f780bdb7c   Ben Blum   cgroups: add per-...
2049
  {
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
2050
2051
  	struct cgroup_subsys_state *css;
  	struct cpuset *cs;
bb9d97b6d   Tejun Heo   cgroup: don't use...
2052
2053
  	struct task_struct *task;
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2054

57fce0a68   Tejun Heo   cpuset: don't use...
2055
  	/* used later by cpuset_attach() */
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
2056
2057
  	cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
  	cs = css_cs(css);
57fce0a68   Tejun Heo   cpuset: don't use...
2058

18695a120   Stephen Dickey   Revert "cgroup/cp...
2059
  	mutex_lock(&cpuset_mutex);
5d21cc2db   Tejun Heo   cpuset: replace c...
2060

aa6ec29be   Tejun Heo   cgroup: remove sa...
2061
  	/* allow moving tasks into an empty cpuset if on default hierarchy */
5d21cc2db   Tejun Heo   cpuset: replace c...
2062
  	ret = -ENOSPC;
b8d1b8ee9   Waiman Long   cpuset: Allow v2 ...
2063
  	if (!is_in_v2_mode() &&
88fa523bf   Li Zefan   cpuset: allow to ...
2064
  	    (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
5d21cc2db   Tejun Heo   cpuset: replace c...
2065
  		goto out_unlock;
9985b0bab   David Rientjes   sched: prevent bo...
2066

1f7dd3e5a   Tejun Heo   cgroup: fix handl...
2067
  	cgroup_taskset_for_each(task, css, tset) {
7f51412a4   Juri Lelli   sched/deadline: F...
2068
2069
  		ret = task_can_attach(task, cs->cpus_allowed);
  		if (ret)
5d21cc2db   Tejun Heo   cpuset: replace c...
2070
2071
2072
2073
  			goto out_unlock;
  		ret = security_task_setscheduler(task);
  		if (ret)
  			goto out_unlock;
bb9d97b6d   Tejun Heo   cgroup: don't use...
2074
  	}
f780bdb7c   Ben Blum   cgroups: add per-...
2075

452477fa6   Tejun Heo   cpuset: pin down ...
2076
2077
2078
2079
2080
  	/*
  	 * Mark attach is in progress.  This makes validate_change() fail
  	 * changes which zero cpus/mems_allowed.
  	 */
  	cs->attach_in_progress++;
5d21cc2db   Tejun Heo   cpuset: replace c...
2081
2082
  	ret = 0;
  out_unlock:
18695a120   Stephen Dickey   Revert "cgroup/cp...
2083
  	mutex_unlock(&cpuset_mutex);
5d21cc2db   Tejun Heo   cpuset: replace c...
2084
  	return ret;
8793d854e   Paul Menage   Task Control Grou...
2085
  }
f780bdb7c   Ben Blum   cgroups: add per-...
2086

1f7dd3e5a   Tejun Heo   cgroup: fix handl...
2087
  static void cpuset_cancel_attach(struct cgroup_taskset *tset)
452477fa6   Tejun Heo   cpuset: pin down ...
2088
  {
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
2089
  	struct cgroup_subsys_state *css;
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
2090
2091
  
  	cgroup_taskset_first(tset, &css);
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
2092

18695a120   Stephen Dickey   Revert "cgroup/cp...
2093
  	mutex_lock(&cpuset_mutex);
eb95419b0   Tejun Heo   cgroup: pass arou...
2094
  	css_cs(css)->attach_in_progress--;
18695a120   Stephen Dickey   Revert "cgroup/cp...
2095
  	mutex_unlock(&cpuset_mutex);
8793d854e   Paul Menage   Task Control Grou...
2096
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2097

4e4c9a140   Tejun Heo   cpuset: cleanup c...
2098
  /*
5d21cc2db   Tejun Heo   cpuset: replace c...
2099
   * Protected by cpuset_mutex.  cpus_attach is used only by cpuset_attach()
4e4c9a140   Tejun Heo   cpuset: cleanup c...
2100
2101
2102
2103
   * but we can't allocate it dynamically there.  Define it global and
   * allocate from cpuset_init().
   */
  static cpumask_var_t cpus_attach;
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
2104
  static void cpuset_attach(struct cgroup_taskset *tset)
8793d854e   Paul Menage   Task Control Grou...
2105
  {
67bd2c598   Li Zefan   cpuset: remove un...
2106
  	/* static buf protected by cpuset_mutex */
4e4c9a140   Tejun Heo   cpuset: cleanup c...
2107
  	static nodemask_t cpuset_attach_nodemask_to;
bb9d97b6d   Tejun Heo   cgroup: don't use...
2108
  	struct task_struct *task;
4530eddb5   Tejun Heo   cgroup, memcg, cp...
2109
  	struct task_struct *leader;
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
2110
2111
  	struct cgroup_subsys_state *css;
  	struct cpuset *cs;
57fce0a68   Tejun Heo   cpuset: don't use...
2112
  	struct cpuset *oldcs = cpuset_attach_old_cs;
22fb52dd7   David Quigley   [PATCH] SELinux: ...
2113

1f7dd3e5a   Tejun Heo   cgroup: fix handl...
2114
2115
  	cgroup_taskset_first(tset, &css);
  	cs = css_cs(css);
18695a120   Stephen Dickey   Revert "cgroup/cp...
2116
  	mutex_lock(&cpuset_mutex);
5d21cc2db   Tejun Heo   cpuset: replace c...
2117

4e4c9a140   Tejun Heo   cpuset: cleanup c...
2118
2119
2120
2121
  	/* prepare for attach */
  	if (cs == &top_cpuset)
  		cpumask_copy(cpus_attach, cpu_possible_mask);
  	else
ae1c80238   Li Zefan   cpuset: apply cs-...
2122
  		guarantee_online_cpus(cs, cpus_attach);
4e4c9a140   Tejun Heo   cpuset: cleanup c...
2123

ae1c80238   Li Zefan   cpuset: apply cs-...
2124
  	guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
4e4c9a140   Tejun Heo   cpuset: cleanup c...
2125

1f7dd3e5a   Tejun Heo   cgroup: fix handl...
2126
  	cgroup_taskset_for_each(task, css, tset) {
bb9d97b6d   Tejun Heo   cgroup: don't use...
2127
2128
2129
2130
  		/*
  		 * can_attach beforehand should guarantee that this doesn't
  		 * fail.  TODO: have a better way to handle failure here
  		 */
4d1ac6a16   Satya Durga Srinivasu Prabhala   ANDROID: sched/cp...
2131
  		WARN_ON_ONCE(update_cpus_allowed(cs, task, cpus_attach));
bb9d97b6d   Tejun Heo   cgroup: don't use...
2132
2133
2134
2135
  
  		cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
  		cpuset_update_task_spread_flag(cs, task);
  	}
22fb52dd7   David Quigley   [PATCH] SELinux: ...
2136

f780bdb7c   Ben Blum   cgroups: add per-...
2137
  	/*
4530eddb5   Tejun Heo   cgroup, memcg, cp...
2138
2139
  	 * Change mm for all threadgroup leaders. This is expensive and may
  	 * sleep and should be moved outside migration path proper.
f780bdb7c   Ben Blum   cgroups: add per-...
2140
  	 */
ae1c80238   Li Zefan   cpuset: apply cs-...
2141
  	cpuset_attach_nodemask_to = cs->effective_mems;
1f7dd3e5a   Tejun Heo   cgroup: fix handl...
2142
  	cgroup_taskset_for_each_leader(leader, css, tset) {
3df9ca0a2   Tejun Heo   cpuset: migrate m...
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
  		struct mm_struct *mm = get_task_mm(leader);
  
  		if (mm) {
  			mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
  
  			/*
  			 * old_mems_allowed is the same with mems_allowed
  			 * here, except if this task is being moved
  			 * automatically due to hotplug.  In that case
  			 * @mems_allowed has been updated and is empty, so
  			 * @old_mems_allowed is the right nodesets that we
  			 * migrate mm from.
  			 */
e93ad19d0   Tejun Heo   cpuset: make mm m...
2156
  			if (is_memory_migrate(cs))
3df9ca0a2   Tejun Heo   cpuset: migrate m...
2157
2158
  				cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
  						  &cpuset_attach_nodemask_to);
e93ad19d0   Tejun Heo   cpuset: make mm m...
2159
2160
  			else
  				mmput(mm);
f047cecf2   Li Zefan   cpuset: fix to mi...
2161
  		}
4225399a6   Paul Jackson   [PATCH] cpuset: r...
2162
  	}
452477fa6   Tejun Heo   cpuset: pin down ...
2163

33ad801df   Li Zefan   cpuset: record ol...
2164
  	cs->old_mems_allowed = cpuset_attach_nodemask_to;
02bb58637   Tejun Heo   cpuset: schedule ...
2165

452477fa6   Tejun Heo   cpuset: pin down ...
2166
  	cs->attach_in_progress--;
e44193d39   Li Zefan   cpuset: let hotpl...
2167
2168
  	if (!cs->attach_in_progress)
  		wake_up(&cpuset_attach_wq);
5d21cc2db   Tejun Heo   cpuset: replace c...
2169

18695a120   Stephen Dickey   Revert "cgroup/cp...
2170
  	mutex_unlock(&cpuset_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2171
2172
2173
2174
2175
  }
  
  /* The various types of files and directories in a cpuset file system */
  
  typedef enum {
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
2176
  	FILE_MEMORY_MIGRATE,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2177
2178
  	FILE_CPULIST,
  	FILE_MEMLIST,
afd1a8b3e   Li Zefan   cpuset: export ef...
2179
2180
  	FILE_EFFECTIVE_CPULIST,
  	FILE_EFFECTIVE_MEMLIST,
5cf8114d6   Waiman Long   cpuset: Expose cp...
2181
  	FILE_SUBPARTS_CPULIST,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2182
2183
  	FILE_CPU_EXCLUSIVE,
  	FILE_MEM_EXCLUSIVE,
786083667   Paul Menage   Cpuset hardwall f...
2184
  	FILE_MEM_HARDWALL,
029190c51   Paul Jackson   cpuset sched_load...
2185
  	FILE_SCHED_LOAD_BALANCE,
ee8dde0cd   Waiman Long   cpuset: Add new v...
2186
  	FILE_PARTITION_ROOT,
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
2187
  	FILE_SCHED_RELAX_DOMAIN_LEVEL,
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2188
2189
  	FILE_MEMORY_PRESSURE_ENABLED,
  	FILE_MEMORY_PRESSURE,
825a46af5   Paul Jackson   [PATCH] cpuset me...
2190
2191
  	FILE_SPREAD_PAGE,
  	FILE_SPREAD_SLAB,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2192
  } cpuset_filetype_t;
182446d08   Tejun Heo   cgroup: pass arou...
2193
2194
  static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
  			    u64 val)
700fe1ab9   Paul Menage   CGroup API files:...
2195
  {
182446d08   Tejun Heo   cgroup: pass arou...
2196
  	struct cpuset *cs = css_cs(css);
700fe1ab9   Paul Menage   CGroup API files:...
2197
  	cpuset_filetype_t type = cft->private;
a903f0865   Li Zefan   cpuset: fix the r...
2198
  	int retval = 0;
700fe1ab9   Paul Menage   CGroup API files:...
2199

d74b27d63   Juri Lelli   cgroup/cpuset: Ch...
2200
  	get_online_cpus();
18695a120   Stephen Dickey   Revert "cgroup/cp...
2201
  	mutex_lock(&cpuset_mutex);
a903f0865   Li Zefan   cpuset: fix the r...
2202
2203
  	if (!is_cpuset_online(cs)) {
  		retval = -ENODEV;
5d21cc2db   Tejun Heo   cpuset: replace c...
2204
  		goto out_unlock;
a903f0865   Li Zefan   cpuset: fix the r...
2205
  	}
700fe1ab9   Paul Menage   CGroup API files:...
2206
2207
  
  	switch (type) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2208
  	case FILE_CPU_EXCLUSIVE:
700fe1ab9   Paul Menage   CGroup API files:...
2209
  		retval = update_flag(CS_CPU_EXCLUSIVE, cs, val);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2210
2211
  		break;
  	case FILE_MEM_EXCLUSIVE:
700fe1ab9   Paul Menage   CGroup API files:...
2212
  		retval = update_flag(CS_MEM_EXCLUSIVE, cs, val);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2213
  		break;
786083667   Paul Menage   Cpuset hardwall f...
2214
2215
2216
  	case FILE_MEM_HARDWALL:
  		retval = update_flag(CS_MEM_HARDWALL, cs, val);
  		break;
029190c51   Paul Jackson   cpuset sched_load...
2217
  	case FILE_SCHED_LOAD_BALANCE:
700fe1ab9   Paul Menage   CGroup API files:...
2218
  		retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val);
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
2219
  		break;
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
2220
  	case FILE_MEMORY_MIGRATE:
700fe1ab9   Paul Menage   CGroup API files:...
2221
  		retval = update_flag(CS_MEMORY_MIGRATE, cs, val);
45b07ef31   Paul Jackson   [PATCH] cpusets: ...
2222
  		break;
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2223
  	case FILE_MEMORY_PRESSURE_ENABLED:
700fe1ab9   Paul Menage   CGroup API files:...
2224
  		cpuset_memory_pressure_enabled = !!val;
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2225
  		break;
825a46af5   Paul Jackson   [PATCH] cpuset me...
2226
  	case FILE_SPREAD_PAGE:
700fe1ab9   Paul Menage   CGroup API files:...
2227
  		retval = update_flag(CS_SPREAD_PAGE, cs, val);
825a46af5   Paul Jackson   [PATCH] cpuset me...
2228
2229
  		break;
  	case FILE_SPREAD_SLAB:
700fe1ab9   Paul Menage   CGroup API files:...
2230
  		retval = update_flag(CS_SPREAD_SLAB, cs, val);
825a46af5   Paul Jackson   [PATCH] cpuset me...
2231
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2232
2233
  	default:
  		retval = -EINVAL;
700fe1ab9   Paul Menage   CGroup API files:...
2234
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2235
  	}
5d21cc2db   Tejun Heo   cpuset: replace c...
2236
  out_unlock:
18695a120   Stephen Dickey   Revert "cgroup/cp...
2237
  	mutex_unlock(&cpuset_mutex);
d74b27d63   Juri Lelli   cgroup/cpuset: Ch...
2238
  	put_online_cpus();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2239
2240
  	return retval;
  }
182446d08   Tejun Heo   cgroup: pass arou...
2241
2242
  static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
  			    s64 val)
5be7a4792   Paul Menage   Fix cpuset sched_...
2243
  {
182446d08   Tejun Heo   cgroup: pass arou...
2244
  	struct cpuset *cs = css_cs(css);
5be7a4792   Paul Menage   Fix cpuset sched_...
2245
  	cpuset_filetype_t type = cft->private;
5d21cc2db   Tejun Heo   cpuset: replace c...
2246
  	int retval = -ENODEV;
5be7a4792   Paul Menage   Fix cpuset sched_...
2247

d74b27d63   Juri Lelli   cgroup/cpuset: Ch...
2248
  	get_online_cpus();
18695a120   Stephen Dickey   Revert "cgroup/cp...
2249
  	mutex_lock(&cpuset_mutex);
5d21cc2db   Tejun Heo   cpuset: replace c...
2250
2251
  	if (!is_cpuset_online(cs))
  		goto out_unlock;
e37123953   Paul Menage   cgroup files: rem...
2252

5be7a4792   Paul Menage   Fix cpuset sched_...
2253
2254
2255
2256
2257
2258
2259
2260
  	switch (type) {
  	case FILE_SCHED_RELAX_DOMAIN_LEVEL:
  		retval = update_relax_domain_level(cs, val);
  		break;
  	default:
  		retval = -EINVAL;
  		break;
  	}
5d21cc2db   Tejun Heo   cpuset: replace c...
2261
  out_unlock:
18695a120   Stephen Dickey   Revert "cgroup/cp...
2262
  	mutex_unlock(&cpuset_mutex);
d74b27d63   Juri Lelli   cgroup/cpuset: Ch...
2263
  	put_online_cpus();
5be7a4792   Paul Menage   Fix cpuset sched_...
2264
2265
  	return retval;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2266
  /*
e37123953   Paul Menage   cgroup files: rem...
2267
2268
   * Common handling for a write to a "cpus" or "mems" file.
   */
451af504d   Tejun Heo   cgroup: replace c...
2269
2270
  static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
  				    char *buf, size_t nbytes, loff_t off)
e37123953   Paul Menage   cgroup files: rem...
2271
  {
451af504d   Tejun Heo   cgroup: replace c...
2272
  	struct cpuset *cs = css_cs(of_css(of));
645fcc9d2   Li Zefan   cpuset: don't all...
2273
  	struct cpuset *trialcs;
5d21cc2db   Tejun Heo   cpuset: replace c...
2274
  	int retval = -ENODEV;
e37123953   Paul Menage   cgroup files: rem...
2275

451af504d   Tejun Heo   cgroup: replace c...
2276
  	buf = strstrip(buf);
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
  	/*
  	 * CPU or memory hotunplug may leave @cs w/o any execution
  	 * resources, in which case the hotplug code asynchronously updates
  	 * configuration and transfers all tasks to the nearest ancestor
  	 * which can execute.
  	 *
  	 * As writes to "cpus" or "mems" may restore @cs's execution
  	 * resources, wait for the previously scheduled operations before
  	 * proceeding, so that we don't end up keep removing tasks added
  	 * after execution capability is restored.
76bb5ab8f   Tejun Heo   cpuset: break ker...
2287
2288
2289
2290
2291
2292
2293
2294
  	 *
  	 * cpuset_hotplug_work calls back into cgroup core via
  	 * cgroup_transfer_tasks() and waiting for it from a cgroupfs
  	 * operation like this one can lead to a deadlock through kernfs
  	 * active_ref protection.  Let's break the protection.  Losing the
  	 * protection is okay as we check whether @cs is online after
  	 * grabbing cpuset_mutex anyway.  This only happens on the legacy
  	 * hierarchies.
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
2295
  	 */
76bb5ab8f   Tejun Heo   cpuset: break ker...
2296
2297
  	css_get(&cs->css);
  	kernfs_break_active_protection(of->kn);
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
2298
  	flush_work(&cpuset_hotplug_work);
d74b27d63   Juri Lelli   cgroup/cpuset: Ch...
2299
  	get_online_cpus();
18695a120   Stephen Dickey   Revert "cgroup/cp...
2300
  	mutex_lock(&cpuset_mutex);
5d21cc2db   Tejun Heo   cpuset: replace c...
2301
2302
  	if (!is_cpuset_online(cs))
  		goto out_unlock;
e37123953   Paul Menage   cgroup files: rem...
2303

645fcc9d2   Li Zefan   cpuset: don't all...
2304
  	trialcs = alloc_trial_cpuset(cs);
b75f38d65   Li Zefan   cpuset: add a mis...
2305
2306
  	if (!trialcs) {
  		retval = -ENOMEM;
5d21cc2db   Tejun Heo   cpuset: replace c...
2307
  		goto out_unlock;
b75f38d65   Li Zefan   cpuset: add a mis...
2308
  	}
645fcc9d2   Li Zefan   cpuset: don't all...
2309

451af504d   Tejun Heo   cgroup: replace c...
2310
  	switch (of_cft(of)->private) {
e37123953   Paul Menage   cgroup files: rem...
2311
  	case FILE_CPULIST:
645fcc9d2   Li Zefan   cpuset: don't all...
2312
  		retval = update_cpumask(cs, trialcs, buf);
e37123953   Paul Menage   cgroup files: rem...
2313
2314
  		break;
  	case FILE_MEMLIST:
645fcc9d2   Li Zefan   cpuset: don't all...
2315
  		retval = update_nodemask(cs, trialcs, buf);
e37123953   Paul Menage   cgroup files: rem...
2316
2317
2318
2319
2320
  		break;
  	default:
  		retval = -EINVAL;
  		break;
  	}
645fcc9d2   Li Zefan   cpuset: don't all...
2321

bf92370c0   Waiman Long   cpuset: Simply al...
2322
  	free_cpuset(trialcs);
5d21cc2db   Tejun Heo   cpuset: replace c...
2323
  out_unlock:
18695a120   Stephen Dickey   Revert "cgroup/cp...
2324
  	mutex_unlock(&cpuset_mutex);
d74b27d63   Juri Lelli   cgroup/cpuset: Ch...
2325
  	put_online_cpus();
76bb5ab8f   Tejun Heo   cpuset: break ker...
2326
2327
  	kernfs_unbreak_active_protection(of->kn);
  	css_put(&cs->css);
e93ad19d0   Tejun Heo   cpuset: make mm m...
2328
  	flush_workqueue(cpuset_migrate_mm_wq);
451af504d   Tejun Heo   cgroup: replace c...
2329
  	return retval ?: nbytes;
e37123953   Paul Menage   cgroup files: rem...
2330
2331
2332
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2333
2334
2335
2336
2337
2338
   * These ascii lists should be read in a single call, by using a user
   * buffer large enough to hold the entire map.  If read in smaller
   * chunks, there is no guarantee of atomicity.  Since the display format
   * used, list of ranges of sequential numbers, is variable length,
   * and since these maps can change value dynamically, one could read
   * gibberish by doing partial reads while a list was changing.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2339
   */
2da8ca822   Tejun Heo   cgroup: replace c...
2340
  static int cpuset_common_seq_show(struct seq_file *sf, void *v)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2341
  {
2da8ca822   Tejun Heo   cgroup: replace c...
2342
2343
  	struct cpuset *cs = css_cs(seq_css(sf));
  	cpuset_filetype_t type = seq_cft(sf)->private;
51ffe4117   Tejun Heo   cpuset: convert a...
2344
  	int ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2345

8447a0fee   Vladimir Davydov   cpuset: convert c...
2346
  	spin_lock_irq(&callback_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2347
2348
2349
  
  	switch (type) {
  	case FILE_CPULIST:
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
2350
2351
  		seq_printf(sf, "%*pbl
  ", cpumask_pr_args(cs->cpus_requested));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2352
2353
  		break;
  	case FILE_MEMLIST:
e8e6d97c9   Tejun Heo   cpuset: use %*pb[...
2354
2355
  		seq_printf(sf, "%*pbl
  ", nodemask_pr_args(&cs->mems_allowed));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2356
  		break;
afd1a8b3e   Li Zefan   cpuset: export ef...
2357
  	case FILE_EFFECTIVE_CPULIST:
e8e6d97c9   Tejun Heo   cpuset: use %*pb[...
2358
2359
  		seq_printf(sf, "%*pbl
  ", cpumask_pr_args(cs->effective_cpus));
afd1a8b3e   Li Zefan   cpuset: export ef...
2360
2361
  		break;
  	case FILE_EFFECTIVE_MEMLIST:
e8e6d97c9   Tejun Heo   cpuset: use %*pb[...
2362
2363
  		seq_printf(sf, "%*pbl
  ", nodemask_pr_args(&cs->effective_mems));
afd1a8b3e   Li Zefan   cpuset: export ef...
2364
  		break;
5cf8114d6   Waiman Long   cpuset: Expose cp...
2365
2366
2367
2368
  	case FILE_SUBPARTS_CPULIST:
  		seq_printf(sf, "%*pbl
  ", cpumask_pr_args(cs->subparts_cpus));
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2369
  	default:
51ffe4117   Tejun Heo   cpuset: convert a...
2370
  		ret = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2371
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2372

8447a0fee   Vladimir Davydov   cpuset: convert c...
2373
  	spin_unlock_irq(&callback_lock);
51ffe4117   Tejun Heo   cpuset: convert a...
2374
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2375
  }
182446d08   Tejun Heo   cgroup: pass arou...
2376
  static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
700fe1ab9   Paul Menage   CGroup API files:...
2377
  {
182446d08   Tejun Heo   cgroup: pass arou...
2378
  	struct cpuset *cs = css_cs(css);
700fe1ab9   Paul Menage   CGroup API files:...
2379
2380
2381
2382
2383
2384
  	cpuset_filetype_t type = cft->private;
  	switch (type) {
  	case FILE_CPU_EXCLUSIVE:
  		return is_cpu_exclusive(cs);
  	case FILE_MEM_EXCLUSIVE:
  		return is_mem_exclusive(cs);
786083667   Paul Menage   Cpuset hardwall f...
2385
2386
  	case FILE_MEM_HARDWALL:
  		return is_mem_hardwall(cs);
700fe1ab9   Paul Menage   CGroup API files:...
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
  	case FILE_SCHED_LOAD_BALANCE:
  		return is_sched_load_balance(cs);
  	case FILE_MEMORY_MIGRATE:
  		return is_memory_migrate(cs);
  	case FILE_MEMORY_PRESSURE_ENABLED:
  		return cpuset_memory_pressure_enabled;
  	case FILE_MEMORY_PRESSURE:
  		return fmeter_getrate(&cs->fmeter);
  	case FILE_SPREAD_PAGE:
  		return is_spread_page(cs);
  	case FILE_SPREAD_SLAB:
  		return is_spread_slab(cs);
  	default:
  		BUG();
  	}
cf417141c   Max Krasnyansky   sched, cpuset: re...
2402
2403
2404
  
  	/* Unreachable but makes gcc happy */
  	return 0;
700fe1ab9   Paul Menage   CGroup API files:...
2405
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2406

182446d08   Tejun Heo   cgroup: pass arou...
2407
  static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft)
5be7a4792   Paul Menage   Fix cpuset sched_...
2408
  {
182446d08   Tejun Heo   cgroup: pass arou...
2409
  	struct cpuset *cs = css_cs(css);
5be7a4792   Paul Menage   Fix cpuset sched_...
2410
2411
2412
2413
2414
2415
2416
  	cpuset_filetype_t type = cft->private;
  	switch (type) {
  	case FILE_SCHED_RELAX_DOMAIN_LEVEL:
  		return cs->relax_domain_level;
  	default:
  		BUG();
  	}
cf417141c   Max Krasnyansky   sched, cpuset: re...
2417
2418
2419
  
  	/* Unrechable but makes gcc happy */
  	return 0;
5be7a4792   Paul Menage   Fix cpuset sched_...
2420
  }
bb5b553c3   Waiman Long   cpuset: Use descr...
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
  static int sched_partition_show(struct seq_file *seq, void *v)
  {
  	struct cpuset *cs = css_cs(seq_css(seq));
  
  	switch (cs->partition_root_state) {
  	case PRS_ENABLED:
  		seq_puts(seq, "root
  ");
  		break;
  	case PRS_DISABLED:
  		seq_puts(seq, "member
  ");
  		break;
  	case PRS_ERROR:
  		seq_puts(seq, "root invalid
  ");
  		break;
  	}
  	return 0;
  }
  
  static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
  				     size_t nbytes, loff_t off)
  {
  	struct cpuset *cs = css_cs(of_css(of));
  	int val;
  	int retval = -ENODEV;
  
  	buf = strstrip(buf);
  
  	/*
b1e3aeb11   Tejun Heo   cpuset: Minor cgr...
2452
  	 * Convert "root" to ENABLED, and convert "member" to DISABLED.
bb5b553c3   Waiman Long   cpuset: Use descr...
2453
  	 */
b1e3aeb11   Tejun Heo   cpuset: Minor cgr...
2454
  	if (!strcmp(buf, "root"))
bb5b553c3   Waiman Long   cpuset: Use descr...
2455
  		val = PRS_ENABLED;
b1e3aeb11   Tejun Heo   cpuset: Minor cgr...
2456
  	else if (!strcmp(buf, "member"))
bb5b553c3   Waiman Long   cpuset: Use descr...
2457
2458
2459
2460
2461
  		val = PRS_DISABLED;
  	else
  		return -EINVAL;
  
  	css_get(&cs->css);
d74b27d63   Juri Lelli   cgroup/cpuset: Ch...
2462
  	get_online_cpus();
18695a120   Stephen Dickey   Revert "cgroup/cp...
2463
  	mutex_lock(&cpuset_mutex);
bb5b553c3   Waiman Long   cpuset: Use descr...
2464
2465
2466
2467
2468
  	if (!is_cpuset_online(cs))
  		goto out_unlock;
  
  	retval = update_prstate(cs, val);
  out_unlock:
18695a120   Stephen Dickey   Revert "cgroup/cp...
2469
  	mutex_unlock(&cpuset_mutex);
d74b27d63   Juri Lelli   cgroup/cpuset: Ch...
2470
  	put_online_cpus();
bb5b553c3   Waiman Long   cpuset: Use descr...
2471
2472
2473
  	css_put(&cs->css);
  	return retval ?: nbytes;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2474
2475
2476
2477
  
  /*
   * for the common functions, 'private' gives the type of file
   */
4ec22e9c5   Waiman Long   cpuset: Enable cp...
2478
  static struct cftype legacy_files[] = {
addf2c739   Paul Menage   Cpuset hardwall f...
2479
2480
  	{
  		.name = "cpus",
2da8ca822   Tejun Heo   cgroup: replace c...
2481
  		.seq_show = cpuset_common_seq_show,
451af504d   Tejun Heo   cgroup: replace c...
2482
  		.write = cpuset_write_resmask,
e37123953   Paul Menage   cgroup files: rem...
2483
  		.max_write_len = (100U + 6 * NR_CPUS),
addf2c739   Paul Menage   Cpuset hardwall f...
2484
2485
2486
2487
2488
  		.private = FILE_CPULIST,
  	},
  
  	{
  		.name = "mems",
2da8ca822   Tejun Heo   cgroup: replace c...
2489
  		.seq_show = cpuset_common_seq_show,
451af504d   Tejun Heo   cgroup: replace c...
2490
  		.write = cpuset_write_resmask,
e37123953   Paul Menage   cgroup files: rem...
2491
  		.max_write_len = (100U + 6 * MAX_NUMNODES),
addf2c739   Paul Menage   Cpuset hardwall f...
2492
2493
2494
2495
  		.private = FILE_MEMLIST,
  	},
  
  	{
afd1a8b3e   Li Zefan   cpuset: export ef...
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
  		.name = "effective_cpus",
  		.seq_show = cpuset_common_seq_show,
  		.private = FILE_EFFECTIVE_CPULIST,
  	},
  
  	{
  		.name = "effective_mems",
  		.seq_show = cpuset_common_seq_show,
  		.private = FILE_EFFECTIVE_MEMLIST,
  	},
  
  	{
addf2c739   Paul Menage   Cpuset hardwall f...
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
  		.name = "cpu_exclusive",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_CPU_EXCLUSIVE,
  	},
  
  	{
  		.name = "mem_exclusive",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_MEM_EXCLUSIVE,
  	},
  
  	{
786083667   Paul Menage   Cpuset hardwall f...
2522
2523
2524
2525
2526
2527
2528
  		.name = "mem_hardwall",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_MEM_HARDWALL,
  	},
  
  	{
addf2c739   Paul Menage   Cpuset hardwall f...
2529
2530
2531
2532
2533
2534
2535
2536
  		.name = "sched_load_balance",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_SCHED_LOAD_BALANCE,
  	},
  
  	{
  		.name = "sched_relax_domain_level",
5be7a4792   Paul Menage   Fix cpuset sched_...
2537
2538
  		.read_s64 = cpuset_read_s64,
  		.write_s64 = cpuset_write_s64,
addf2c739   Paul Menage   Cpuset hardwall f...
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
  		.private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
  	},
  
  	{
  		.name = "memory_migrate",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_MEMORY_MIGRATE,
  	},
  
  	{
  		.name = "memory_pressure",
  		.read_u64 = cpuset_read_u64,
1c08c22c8   Waiman Long   cpuset: Fix incor...
2552
  		.private = FILE_MEMORY_PRESSURE,
addf2c739   Paul Menage   Cpuset hardwall f...
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
  	},
  
  	{
  		.name = "memory_spread_page",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_SPREAD_PAGE,
  	},
  
  	{
  		.name = "memory_spread_slab",
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_SPREAD_SLAB,
  	},
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2568

4baf6e332   Tejun Heo   cgroup: convert a...
2569
2570
2571
2572
2573
2574
2575
  	{
  		.name = "memory_pressure_enabled",
  		.flags = CFTYPE_ONLY_ON_ROOT,
  		.read_u64 = cpuset_read_u64,
  		.write_u64 = cpuset_write_u64,
  		.private = FILE_MEMORY_PRESSURE_ENABLED,
  	},
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2576

4baf6e332   Tejun Heo   cgroup: convert a...
2577
2578
  	{ }	/* terminate */
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2579
2580
  
  /*
4ec22e9c5   Waiman Long   cpuset: Enable cp...
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
   * This is currently a minimal set for the default hierarchy. It can be
   * expanded later on by migrating more features and control files from v1.
   */
  static struct cftype dfl_files[] = {
  	{
  		.name = "cpus",
  		.seq_show = cpuset_common_seq_show,
  		.write = cpuset_write_resmask,
  		.max_write_len = (100U + 6 * NR_CPUS),
  		.private = FILE_CPULIST,
  		.flags = CFTYPE_NOT_ON_ROOT,
  	},
  
  	{
  		.name = "mems",
  		.seq_show = cpuset_common_seq_show,
  		.write = cpuset_write_resmask,
  		.max_write_len = (100U + 6 * MAX_NUMNODES),
  		.private = FILE_MEMLIST,
  		.flags = CFTYPE_NOT_ON_ROOT,
  	},
  
  	{
  		.name = "cpus.effective",
  		.seq_show = cpuset_common_seq_show,
  		.private = FILE_EFFECTIVE_CPULIST,
4ec22e9c5   Waiman Long   cpuset: Enable cp...
2607
2608
2609
2610
2611
2612
  	},
  
  	{
  		.name = "mems.effective",
  		.seq_show = cpuset_common_seq_show,
  		.private = FILE_EFFECTIVE_MEMLIST,
4ec22e9c5   Waiman Long   cpuset: Enable cp...
2613
  	},
ee8dde0cd   Waiman Long   cpuset: Add new v...
2614
  	{
b1e3aeb11   Tejun Heo   cpuset: Minor cgr...
2615
  		.name = "cpus.partition",
bb5b553c3   Waiman Long   cpuset: Use descr...
2616
2617
  		.seq_show = sched_partition_show,
  		.write = sched_partition_write,
ee8dde0cd   Waiman Long   cpuset: Add new v...
2618
2619
2620
  		.private = FILE_PARTITION_ROOT,
  		.flags = CFTYPE_NOT_ON_ROOT,
  	},
5cf8114d6   Waiman Long   cpuset: Expose cp...
2621
2622
2623
2624
2625
2626
  	{
  		.name = "cpus.subpartitions",
  		.seq_show = cpuset_common_seq_show,
  		.private = FILE_SUBPARTS_CPULIST,
  		.flags = CFTYPE_DEBUG,
  	},
4ec22e9c5   Waiman Long   cpuset: Enable cp...
2627
2628
2629
2630
2631
  	{ }	/* terminate */
  };
  
  
  /*
92fb97487   Tejun Heo   cgroup: rename ->...
2632
   *	cpuset_css_alloc - allocate a cpuset css
c9e5fe66f   Li Zefan   cpuset: rename @c...
2633
   *	cgrp:	control group that the new cpuset will be part of
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2634
   */
eb95419b0   Tejun Heo   cgroup: pass arou...
2635
2636
  static struct cgroup_subsys_state *
  cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2637
  {
c8f699bb5   Tejun Heo   cpuset: introduce...
2638
  	struct cpuset *cs;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2639

eb95419b0   Tejun Heo   cgroup: pass arou...
2640
  	if (!parent_css)
8793d854e   Paul Menage   Task Control Grou...
2641
  		return &top_cpuset.css;
033fa1c5f   Tejun Heo   cgroup, cpuset: r...
2642

c8f699bb5   Tejun Heo   cpuset: introduce...
2643
  	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2644
  	if (!cs)
8793d854e   Paul Menage   Task Control Grou...
2645
  		return ERR_PTR(-ENOMEM);
bf92370c0   Waiman Long   cpuset: Simply al...
2646
2647
2648
2649
2650
  
  	if (alloc_cpumasks(cs, NULL)) {
  		kfree(cs);
  		return ERR_PTR(-ENOMEM);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2651

029190c51   Paul Jackson   cpuset sched_load...
2652
  	set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
f9a86fcbb   Mike Travis   cpuset: modify cp...
2653
  	nodes_clear(cs->mems_allowed);
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
2654
  	nodes_clear(cs->effective_mems);
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2655
  	fmeter_init(&cs->fmeter);
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
2656
  	cs->relax_domain_level = -1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2657

c8f699bb5   Tejun Heo   cpuset: introduce...
2658
2659
  	return &cs->css;
  }
eb95419b0   Tejun Heo   cgroup: pass arou...
2660
  static int cpuset_css_online(struct cgroup_subsys_state *css)
c8f699bb5   Tejun Heo   cpuset: introduce...
2661
  {
eb95419b0   Tejun Heo   cgroup: pass arou...
2662
  	struct cpuset *cs = css_cs(css);
c431069fe   Tejun Heo   cpuset: remove cp...
2663
  	struct cpuset *parent = parent_cs(cs);
ae8086ce1   Tejun Heo   cpuset: introduce...
2664
  	struct cpuset *tmp_cs;
492eb21b9   Tejun Heo   cgroup: make hier...
2665
  	struct cgroup_subsys_state *pos_css;
c8f699bb5   Tejun Heo   cpuset: introduce...
2666
2667
2668
  
  	if (!parent)
  		return 0;
d74b27d63   Juri Lelli   cgroup/cpuset: Ch...
2669
  	get_online_cpus();
18695a120   Stephen Dickey   Revert "cgroup/cp...
2670
  	mutex_lock(&cpuset_mutex);
5d21cc2db   Tejun Heo   cpuset: replace c...
2671

efeb77b2f   Tejun Heo   cpuset: introduce...
2672
  	set_bit(CS_ONLINE, &cs->flags);
c8f699bb5   Tejun Heo   cpuset: introduce...
2673
2674
2675
2676
  	if (is_spread_page(parent))
  		set_bit(CS_SPREAD_PAGE, &cs->flags);
  	if (is_spread_slab(parent))
  		set_bit(CS_SPREAD_SLAB, &cs->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2677

664eeddee   Mel Gorman   mm: page_alloc: u...
2678
  	cpuset_inc();
033fa1c5f   Tejun Heo   cgroup, cpuset: r...
2679

8447a0fee   Vladimir Davydov   cpuset: convert c...
2680
  	spin_lock_irq(&callback_lock);
b8d1b8ee9   Waiman Long   cpuset: Allow v2 ...
2681
  	if (is_in_v2_mode()) {
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
2682
2683
  		cpumask_copy(cs->effective_cpus, parent->effective_cpus);
  		cs->effective_mems = parent->effective_mems;
4716909cc   Waiman Long   cpuset: Track cpu...
2684
2685
  		cs->use_parent_ecpus = true;
  		parent->child_ecpus_count++;
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
2686
  	}
8447a0fee   Vladimir Davydov   cpuset: convert c...
2687
  	spin_unlock_irq(&callback_lock);
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
2688

eb95419b0   Tejun Heo   cgroup: pass arou...
2689
  	if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
5d21cc2db   Tejun Heo   cpuset: replace c...
2690
  		goto out_unlock;
033fa1c5f   Tejun Heo   cgroup, cpuset: r...
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
  
  	/*
  	 * Clone @parent's configuration if CGRP_CPUSET_CLONE_CHILDREN is
  	 * set.  This flag handling is implemented in cgroup core for
  	 * histrical reasons - the flag may be specified during mount.
  	 *
  	 * Currently, if any sibling cpusets have exclusive cpus or mem, we
  	 * refuse to clone the configuration - thereby refusing the task to
  	 * be entered, and as a result refusing the sys_unshare() or
  	 * clone() which initiated it.  If this becomes a problem for some
  	 * users who wish to allow that scenario, then this could be
  	 * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
  	 * (and likewise for mems) to the new cgroup.
  	 */
ae8086ce1   Tejun Heo   cpuset: introduce...
2705
  	rcu_read_lock();
492eb21b9   Tejun Heo   cgroup: make hier...
2706
  	cpuset_for_each_child(tmp_cs, pos_css, parent) {
ae8086ce1   Tejun Heo   cpuset: introduce...
2707
2708
  		if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) {
  			rcu_read_unlock();
5d21cc2db   Tejun Heo   cpuset: replace c...
2709
  			goto out_unlock;
ae8086ce1   Tejun Heo   cpuset: introduce...
2710
  		}
033fa1c5f   Tejun Heo   cgroup, cpuset: r...
2711
  	}
ae8086ce1   Tejun Heo   cpuset: introduce...
2712
  	rcu_read_unlock();
033fa1c5f   Tejun Heo   cgroup, cpuset: r...
2713

8447a0fee   Vladimir Davydov   cpuset: convert c...
2714
  	spin_lock_irq(&callback_lock);
033fa1c5f   Tejun Heo   cgroup, cpuset: r...
2715
  	cs->mems_allowed = parent->mems_allowed;
790317e1b   Zefan Li   cpuset: initializ...
2716
  	cs->effective_mems = parent->mems_allowed;
033fa1c5f   Tejun Heo   cgroup, cpuset: r...
2717
  	cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
2718
  	cpumask_copy(cs->cpus_requested, parent->cpus_requested);
790317e1b   Zefan Li   cpuset: initializ...
2719
  	cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
cea74465e   Dan Carpenter   cpuset: lock vs u...
2720
  	spin_unlock_irq(&callback_lock);
5d21cc2db   Tejun Heo   cpuset: replace c...
2721
  out_unlock:
18695a120   Stephen Dickey   Revert "cgroup/cp...
2722
  	mutex_unlock(&cpuset_mutex);
d74b27d63   Juri Lelli   cgroup/cpuset: Ch...
2723
  	put_online_cpus();
c8f699bb5   Tejun Heo   cpuset: introduce...
2724
2725
  	return 0;
  }
0b9e6965a   Zhao Hongjiang   cpuset: relocate ...
2726
2727
2728
  /*
   * If the cpuset being removed has its flag 'sched_load_balance'
   * enabled, then simulate turning sched_load_balance off, which
ee8dde0cd   Waiman Long   cpuset: Add new v...
2729
2730
2731
2732
2733
2734
   * will call rebuild_sched_domains_locked(). That is not needed
   * in the default hierarchy where only changes in partition
   * will cause repartitioning.
   *
   * If the cpuset has the 'sched.partition' flag enabled, simulate
   * turning 'sched.partition" off.
0b9e6965a   Zhao Hongjiang   cpuset: relocate ...
2735
   */
eb95419b0   Tejun Heo   cgroup: pass arou...
2736
  static void cpuset_css_offline(struct cgroup_subsys_state *css)
c8f699bb5   Tejun Heo   cpuset: introduce...
2737
  {
eb95419b0   Tejun Heo   cgroup: pass arou...
2738
  	struct cpuset *cs = css_cs(css);
c8f699bb5   Tejun Heo   cpuset: introduce...
2739

d74b27d63   Juri Lelli   cgroup/cpuset: Ch...
2740
  	get_online_cpus();
18695a120   Stephen Dickey   Revert "cgroup/cp...
2741
  	mutex_lock(&cpuset_mutex);
c8f699bb5   Tejun Heo   cpuset: introduce...
2742

ee8dde0cd   Waiman Long   cpuset: Add new v...
2743
2744
2745
2746
2747
  	if (is_partition_root(cs))
  		update_prstate(cs, 0);
  
  	if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
  	    is_sched_load_balance(cs))
c8f699bb5   Tejun Heo   cpuset: introduce...
2748
  		update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
4716909cc   Waiman Long   cpuset: Track cpu...
2749
2750
2751
2752
2753
2754
  	if (cs->use_parent_ecpus) {
  		struct cpuset *parent = parent_cs(cs);
  
  		cs->use_parent_ecpus = false;
  		parent->child_ecpus_count--;
  	}
664eeddee   Mel Gorman   mm: page_alloc: u...
2755
  	cpuset_dec();
efeb77b2f   Tejun Heo   cpuset: introduce...
2756
  	clear_bit(CS_ONLINE, &cs->flags);
c8f699bb5   Tejun Heo   cpuset: introduce...
2757

18695a120   Stephen Dickey   Revert "cgroup/cp...
2758
  	mutex_unlock(&cpuset_mutex);
d74b27d63   Juri Lelli   cgroup/cpuset: Ch...
2759
  	put_online_cpus();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2760
  }
eb95419b0   Tejun Heo   cgroup: pass arou...
2761
  static void cpuset_css_free(struct cgroup_subsys_state *css)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2762
  {
eb95419b0   Tejun Heo   cgroup: pass arou...
2763
  	struct cpuset *cs = css_cs(css);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2764

bf92370c0   Waiman Long   cpuset: Simply al...
2765
  	free_cpuset(cs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2766
  }
39bd0d15e   Li Zefan   cpuset: initializ...
2767
2768
  static void cpuset_bind(struct cgroup_subsys_state *root_css)
  {
18695a120   Stephen Dickey   Revert "cgroup/cp...
2769
  	mutex_lock(&cpuset_mutex);
8447a0fee   Vladimir Davydov   cpuset: convert c...
2770
  	spin_lock_irq(&callback_lock);
39bd0d15e   Li Zefan   cpuset: initializ...
2771

b8d1b8ee9   Waiman Long   cpuset: Allow v2 ...
2772
  	if (is_in_v2_mode()) {
39bd0d15e   Li Zefan   cpuset: initializ...
2773
2774
2775
2776
2777
2778
2779
  		cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
  		top_cpuset.mems_allowed = node_possible_map;
  	} else {
  		cpumask_copy(top_cpuset.cpus_allowed,
  			     top_cpuset.effective_cpus);
  		top_cpuset.mems_allowed = top_cpuset.effective_mems;
  	}
8447a0fee   Vladimir Davydov   cpuset: convert c...
2780
  	spin_unlock_irq(&callback_lock);
18695a120   Stephen Dickey   Revert "cgroup/cp...
2781
  	mutex_unlock(&cpuset_mutex);
39bd0d15e   Li Zefan   cpuset: initializ...
2782
  }
06f4e9489   Zefan Li   cpuset: make sure...
2783
2784
2785
2786
2787
  /*
   * Make sure the new task conform to the current state of its parent,
   * which could have been changed by cpuset just after it inherits the
   * state from the parent and before it sits on the cgroup's task list.
   */
8a15b8174   Wei Yongjun   cpuset: fix non s...
2788
  static void cpuset_fork(struct task_struct *task)
06f4e9489   Zefan Li   cpuset: make sure...
2789
2790
2791
  {
  	if (task_css_is_root(task, cpuset_cgrp_id))
  		return;
3bd370625   Sebastian Andrzej Siewior   sched/core: Provi...
2792
  	set_cpus_allowed_ptr(task, current->cpus_ptr);
06f4e9489   Zefan Li   cpuset: make sure...
2793
2794
  	task->mems_allowed = current->mems_allowed;
  }
073219e99   Tejun Heo   cgroup: clean up ...
2795
  struct cgroup_subsys cpuset_cgrp_subsys = {
39bd0d15e   Li Zefan   cpuset: initializ...
2796
2797
2798
2799
2800
2801
2802
  	.css_alloc	= cpuset_css_alloc,
  	.css_online	= cpuset_css_online,
  	.css_offline	= cpuset_css_offline,
  	.css_free	= cpuset_css_free,
  	.can_attach	= cpuset_can_attach,
  	.cancel_attach	= cpuset_cancel_attach,
  	.attach		= cpuset_attach,
5cf1cacb4   Tejun Heo   cgroup, cpuset: r...
2803
  	.post_attach	= cpuset_post_attach,
39bd0d15e   Li Zefan   cpuset: initializ...
2804
  	.bind		= cpuset_bind,
06f4e9489   Zefan Li   cpuset: make sure...
2805
  	.fork		= cpuset_fork,
4ec22e9c5   Waiman Long   cpuset: Enable cp...
2806
2807
  	.legacy_cftypes	= legacy_files,
  	.dfl_cftypes	= dfl_files,
b38e42e96   Tejun Heo   cgroup: convert c...
2808
  	.early_init	= true,
4ec22e9c5   Waiman Long   cpuset: Enable cp...
2809
  	.threaded	= true,
8793d854e   Paul Menage   Task Control Grou...
2810
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2811
2812
2813
  /**
   * cpuset_init - initialize cpusets at system boot
   *
d5f68d330   Al Viro   cpuset: move moun...
2814
   * Description: Initialize top_cpuset
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2815
2816
2817
2818
   **/
  
  int __init cpuset_init(void)
  {
75fa8e5d3   Nicholas Mc Guire   cgroup: switch to...
2819
2820
  	BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
  	BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
bf92370c0   Waiman Long   cpuset: Simply al...
2821
  	BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL));
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
2822
  	BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL));
58568d2a8   Miao Xie   cpuset,mm: update...
2823

300ed6cbb   Li Zefan   cpuset: convert c...
2824
  	cpumask_setall(top_cpuset.cpus_allowed);
c8dc4422c   Riley Andrews   ANDROID: cpuset: ...
2825
  	cpumask_setall(top_cpuset.cpus_requested);
f9a86fcbb   Mike Travis   cpuset: modify cp...
2826
  	nodes_setall(top_cpuset.mems_allowed);
e2b9a3d7d   Li Zefan   cpuset: add cs->e...
2827
2828
  	cpumask_setall(top_cpuset.effective_cpus);
  	nodes_setall(top_cpuset.effective_mems);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2829

3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
2830
  	fmeter_init(&top_cpuset.fmeter);
029190c51   Paul Jackson   cpuset sched_load...
2831
  	set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
1d3504fcf   Hidetoshi Seto   sched, cpuset: cu...
2832
  	top_cpuset.relax_domain_level = -1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2833

75fa8e5d3   Nicholas Mc Guire   cgroup: switch to...
2834
  	BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
2341d1b65   Li Zefan   cpuset: convert c...
2835

8793d854e   Paul Menage   Task Control Grou...
2836
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2837
  }
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
2838
  /*
cf417141c   Max Krasnyansky   sched, cpuset: re...
2839
   * If CPU and/or memory hotplug handlers, below, unplug any CPUs
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
2840
2841
   * or memory nodes, we need to walk over the cpuset hierarchy,
   * removing that CPU or node from all cpusets.  If this removes the
956db3ca0   Cliff Wickman   hotplug cpu: move...
2842
2843
   * last CPU or node from a cpuset, then move the tasks in the empty
   * cpuset to its next-highest non-empty parent.
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
2844
   */
956db3ca0   Cliff Wickman   hotplug cpu: move...
2845
2846
2847
  static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
  {
  	struct cpuset *parent;
c8d9c90c7   Paul Jackson   hotplug cpu: move...
2848
  	/*
956db3ca0   Cliff Wickman   hotplug cpu: move...
2849
2850
2851
  	 * Find its next-highest non-empty parent, (top cpuset
  	 * has online cpus, so can't be empty).
  	 */
c431069fe   Tejun Heo   cpuset: remove cp...
2852
  	parent = parent_cs(cs);
300ed6cbb   Li Zefan   cpuset: convert c...
2853
  	while (cpumask_empty(parent->cpus_allowed) ||
b45012955   Paul Jackson   hotplug cpu move ...
2854
  			nodes_empty(parent->mems_allowed))
c431069fe   Tejun Heo   cpuset: remove cp...
2855
  		parent = parent_cs(parent);
956db3ca0   Cliff Wickman   hotplug cpu: move...
2856

8cc993452   Tejun Heo   cgroup, cpuset: r...
2857
  	if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
12d3089c1   Fabian Frederick   kernel/cpuset.c: ...
2858
  		pr_err("cpuset: failed to transfer tasks out of empty cpuset ");
e61734c55   Tejun Heo   cgroup: remove cg...
2859
2860
2861
  		pr_cont_cgroup_name(cs->css.cgroup);
  		pr_cont("
  ");
8cc993452   Tejun Heo   cgroup, cpuset: r...
2862
  	}
956db3ca0   Cliff Wickman   hotplug cpu: move...
2863
  }
be4c9dd7a   Li Zefan   cpuset: enable on...
2864
2865
2866
2867
  static void
  hotplug_update_tasks_legacy(struct cpuset *cs,
  			    struct cpumask *new_cpus, nodemask_t *new_mems,
  			    bool cpus_updated, bool mems_updated)
390a36aad   Li Zefan   cpuset: refactor ...
2868
2869
  {
  	bool is_empty;
8447a0fee   Vladimir Davydov   cpuset: convert c...
2870
  	spin_lock_irq(&callback_lock);
be4c9dd7a   Li Zefan   cpuset: enable on...
2871
2872
2873
2874
  	cpumask_copy(cs->cpus_allowed, new_cpus);
  	cpumask_copy(cs->effective_cpus, new_cpus);
  	cs->mems_allowed = *new_mems;
  	cs->effective_mems = *new_mems;
8447a0fee   Vladimir Davydov   cpuset: convert c...
2875
  	spin_unlock_irq(&callback_lock);
390a36aad   Li Zefan   cpuset: refactor ...
2876
2877
2878
2879
2880
  
  	/*
  	 * Don't call update_tasks_cpumask() if the cpuset becomes empty,
  	 * as the tasks will be migratecd to an ancestor.
  	 */
be4c9dd7a   Li Zefan   cpuset: enable on...
2881
  	if (cpus_updated && !cpumask_empty(cs->cpus_allowed))
390a36aad   Li Zefan   cpuset: refactor ...
2882
  		update_tasks_cpumask(cs);
be4c9dd7a   Li Zefan   cpuset: enable on...
2883
  	if (mems_updated && !nodes_empty(cs->mems_allowed))
390a36aad   Li Zefan   cpuset: refactor ...
2884
2885
2886
2887
  		update_tasks_nodemask(cs);
  
  	is_empty = cpumask_empty(cs->cpus_allowed) ||
  		   nodes_empty(cs->mems_allowed);
18695a120   Stephen Dickey   Revert "cgroup/cp...
2888
  	mutex_unlock(&cpuset_mutex);
390a36aad   Li Zefan   cpuset: refactor ...
2889
2890
2891
2892
2893
2894
2895
2896
  
  	/*
  	 * Move tasks to the nearest ancestor with execution resources,
  	 * This is full cgroup operation which will also call back into
  	 * cpuset. Should be done outside any lock.
  	 */
  	if (is_empty)
  		remove_tasks_in_empty_cpuset(cs);
18695a120   Stephen Dickey   Revert "cgroup/cp...
2897
  	mutex_lock(&cpuset_mutex);
390a36aad   Li Zefan   cpuset: refactor ...
2898
  }
be4c9dd7a   Li Zefan   cpuset: enable on...
2899
2900
2901
2902
  static void
  hotplug_update_tasks(struct cpuset *cs,
  		     struct cpumask *new_cpus, nodemask_t *new_mems,
  		     bool cpus_updated, bool mems_updated)
390a36aad   Li Zefan   cpuset: refactor ...
2903
  {
be4c9dd7a   Li Zefan   cpuset: enable on...
2904
2905
2906
2907
  	if (cpumask_empty(new_cpus))
  		cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus);
  	if (nodes_empty(*new_mems))
  		*new_mems = parent_cs(cs)->effective_mems;
8447a0fee   Vladimir Davydov   cpuset: convert c...
2908
  	spin_lock_irq(&callback_lock);
be4c9dd7a   Li Zefan   cpuset: enable on...
2909
2910
  	cpumask_copy(cs->effective_cpus, new_cpus);
  	cs->effective_mems = *new_mems;
8447a0fee   Vladimir Davydov   cpuset: convert c...
2911
  	spin_unlock_irq(&callback_lock);
390a36aad   Li Zefan   cpuset: refactor ...
2912

be4c9dd7a   Li Zefan   cpuset: enable on...
2913
  	if (cpus_updated)
390a36aad   Li Zefan   cpuset: refactor ...
2914
  		update_tasks_cpumask(cs);
be4c9dd7a   Li Zefan   cpuset: enable on...
2915
  	if (mems_updated)
390a36aad   Li Zefan   cpuset: refactor ...
2916
2917
  		update_tasks_nodemask(cs);
  }
4b842da27   Waiman Long   cpuset: Make CPU ...
2918
2919
2920
2921
2922
2923
  static bool force_rebuild;
  
  void cpuset_force_rebuild(void)
  {
  	force_rebuild = true;
  }
deb7aa308   Tejun Heo   cpuset: reorganiz...
2924
  /**
388afd854   Li Zefan   cpuset: remove as...
2925
   * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug
deb7aa308   Tejun Heo   cpuset: reorganiz...
2926
   * @cs: cpuset in interest
4b842da27   Waiman Long   cpuset: Make CPU ...
2927
   * @tmp: the tmpmasks structure pointer
956db3ca0   Cliff Wickman   hotplug cpu: move...
2928
   *
deb7aa308   Tejun Heo   cpuset: reorganiz...
2929
2930
2931
   * Compare @cs's cpu and mem masks against top_cpuset and if some have gone
   * offline, update @cs accordingly.  If @cs ends up with no CPU or memory,
   * all its tasks are moved to the nearest ancestor with both resources.
80d1fa646   Srivatsa S. Bhat   cpusets, hotplug:...
2932
   */
4b842da27   Waiman Long   cpuset: Make CPU ...
2933
  static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
80d1fa646   Srivatsa S. Bhat   cpusets, hotplug:...
2934
  {
be4c9dd7a   Li Zefan   cpuset: enable on...
2935
2936
2937
2938
  	static cpumask_t new_cpus;
  	static nodemask_t new_mems;
  	bool cpus_updated;
  	bool mems_updated;
4b842da27   Waiman Long   cpuset: Make CPU ...
2939
  	struct cpuset *parent;
e44193d39   Li Zefan   cpuset: let hotpl...
2940
2941
  retry:
  	wait_event(cpuset_attach_wq, cs->attach_in_progress == 0);
80d1fa646   Srivatsa S. Bhat   cpusets, hotplug:...
2942

18695a120   Stephen Dickey   Revert "cgroup/cp...
2943
  	mutex_lock(&cpuset_mutex);
7ddf96b02   Srivatsa S. Bhat   cpusets, hotplug:...
2944

e44193d39   Li Zefan   cpuset: let hotpl...
2945
2946
2947
2948
2949
  	/*
  	 * We have raced with task attaching. We wait until attaching
  	 * is finished, so we won't attach a task to an empty cpuset.
  	 */
  	if (cs->attach_in_progress) {
18695a120   Stephen Dickey   Revert "cgroup/cp...
2950
  		mutex_unlock(&cpuset_mutex);
e44193d39   Li Zefan   cpuset: let hotpl...
2951
2952
  		goto retry;
  	}
4b842da27   Waiman Long   cpuset: Make CPU ...
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
  	parent =  parent_cs(cs);
  	compute_effective_cpumask(&new_cpus, cs, parent);
  	nodes_and(new_mems, cs->mems_allowed, parent->effective_mems);
  
  	if (cs->nr_subparts_cpus)
  		/*
  		 * Make sure that CPUs allocated to child partitions
  		 * do not show up in effective_cpus.
  		 */
  		cpumask_andnot(&new_cpus, &new_cpus, cs->subparts_cpus);
  
  	if (!tmp || !cs->partition_root_state)
  		goto update_tasks;
80d1fa646   Srivatsa S. Bhat   cpusets, hotplug:...
2966

4b842da27   Waiman Long   cpuset: Make CPU ...
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
  	/*
  	 * In the unlikely event that a partition root has empty
  	 * effective_cpus or its parent becomes erroneous, we have to
  	 * transition it to the erroneous state.
  	 */
  	if (is_partition_root(cs) && (cpumask_empty(&new_cpus) ||
  	   (parent->partition_root_state == PRS_ERROR))) {
  		if (cs->nr_subparts_cpus) {
  			cs->nr_subparts_cpus = 0;
  			cpumask_clear(cs->subparts_cpus);
  			compute_effective_cpumask(&new_cpus, cs, parent);
  		}
80d1fa646   Srivatsa S. Bhat   cpusets, hotplug:...
2979

4b842da27   Waiman Long   cpuset: Make CPU ...
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
  		/*
  		 * If the effective_cpus is empty because the child
  		 * partitions take away all the CPUs, we can keep
  		 * the current partition and let the child partitions
  		 * fight for available CPUs.
  		 */
  		if ((parent->partition_root_state == PRS_ERROR) ||
  		     cpumask_empty(&new_cpus)) {
  			update_parent_subparts_cpumask(cs, partcmd_disable,
  						       NULL, tmp);
  			cs->partition_root_state = PRS_ERROR;
  		}
  		cpuset_force_rebuild();
  	}
  
  	/*
  	 * On the other hand, an erroneous partition root may be transitioned
  	 * back to a regular one or a partition root with no CPU allocated
  	 * from the parent may change to erroneous.
  	 */
  	if (is_partition_root(parent) &&
  	   ((cs->partition_root_state == PRS_ERROR) ||
  	    !cpumask_intersects(&new_cpus, parent->subparts_cpus)) &&
  	     update_parent_subparts_cpumask(cs, partcmd_update, NULL, tmp))
  		cpuset_force_rebuild();
  
  update_tasks:
be4c9dd7a   Li Zefan   cpuset: enable on...
3007
3008
  	cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
  	mems_updated = !nodes_equal(new_mems, cs->effective_mems);
deb7aa308   Tejun Heo   cpuset: reorganiz...
3009

b8d1b8ee9   Waiman Long   cpuset: Allow v2 ...
3010
  	if (is_in_v2_mode())
be4c9dd7a   Li Zefan   cpuset: enable on...
3011
3012
  		hotplug_update_tasks(cs, &new_cpus, &new_mems,
  				     cpus_updated, mems_updated);
390a36aad   Li Zefan   cpuset: refactor ...
3013
  	else
be4c9dd7a   Li Zefan   cpuset: enable on...
3014
3015
  		hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems,
  					    cpus_updated, mems_updated);
8d0339487   Tejun Heo   cpuset: make CPU ...
3016

18695a120   Stephen Dickey   Revert "cgroup/cp...
3017
  	mutex_unlock(&cpuset_mutex);
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
3018
  }
deb7aa308   Tejun Heo   cpuset: reorganiz...
3019
  /**
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
3020
   * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset
956db3ca0   Cliff Wickman   hotplug cpu: move...
3021
   *
deb7aa308   Tejun Heo   cpuset: reorganiz...
3022
3023
3024
3025
3026
   * This function is called after either CPU or memory configuration has
   * changed and updates cpuset accordingly.  The top_cpuset is always
   * synchronized to cpu_active_mask and N_MEMORY, which is necessary in
   * order to make cpusets transparent (of no affect) on systems that are
   * actively using CPU hotplug but making no active use of cpusets.
956db3ca0   Cliff Wickman   hotplug cpu: move...
3027
   *
deb7aa308   Tejun Heo   cpuset: reorganiz...
3028
   * Non-root cpusets are only affected by offlining.  If any CPUs or memory
388afd854   Li Zefan   cpuset: remove as...
3029
3030
   * nodes have been taken down, cpuset_hotplug_update_tasks() is invoked on
   * all descendants.
956db3ca0   Cliff Wickman   hotplug cpu: move...
3031
   *
deb7aa308   Tejun Heo   cpuset: reorganiz...
3032
3033
   * Note that CPU offlining during suspend is ignored.  We don't modify
   * cpusets across suspend/resume cycles at all.
956db3ca0   Cliff Wickman   hotplug cpu: move...
3034
   */
1d3a64fbd   Stephen Dickey   ANDROID: cpu/hotp...
3035
  void cpuset_hotplug_workfn(struct work_struct *work)
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
3036
  {
5c5cc6232   Li Zefan   cpuset: allow to ...
3037
3038
  	static cpumask_t new_cpus;
  	static nodemask_t new_mems;
deb7aa308   Tejun Heo   cpuset: reorganiz...
3039
  	bool cpus_updated, mems_updated;
b8d1b8ee9   Waiman Long   cpuset: Allow v2 ...
3040
  	bool on_dfl = is_in_v2_mode();
4b842da27   Waiman Long   cpuset: Make CPU ...
3041
3042
3043
3044
  	struct tmpmasks tmp, *ptmp = NULL;
  
  	if (on_dfl && !alloc_cpumasks(NULL, &tmp))
  		ptmp = &tmp;
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
3045

18695a120   Stephen Dickey   Revert "cgroup/cp...
3046
  	mutex_lock(&cpuset_mutex);
956db3ca0   Cliff Wickman   hotplug cpu: move...
3047

deb7aa308   Tejun Heo   cpuset: reorganiz...
3048
3049
3050
  	/* fetch the available cpus/mems and find out which changed how */
  	cpumask_copy(&new_cpus, cpu_active_mask);
  	new_mems = node_states[N_MEMORY];
7ddf96b02   Srivatsa S. Bhat   cpusets, hotplug:...
3051

4b842da27   Waiman Long   cpuset: Make CPU ...
3052
3053
3054
3055
3056
  	/*
  	 * If subparts_cpus is populated, it is likely that the check below
  	 * will produce a false positive on cpus_updated when the cpu list
  	 * isn't changed. It is extra work, but it is better to be safe.
  	 */
7e88291be   Li Zefan   cpuset: make cs->...
3057
3058
  	cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus);
  	mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems);
7ddf96b02   Srivatsa S. Bhat   cpusets, hotplug:...
3059

deb7aa308   Tejun Heo   cpuset: reorganiz...
3060
3061
  	/* synchronize cpus_allowed to cpu_active_mask */
  	if (cpus_updated) {
8447a0fee   Vladimir Davydov   cpuset: convert c...
3062
  		spin_lock_irq(&callback_lock);
7e88291be   Li Zefan   cpuset: make cs->...
3063
3064
  		if (!on_dfl)
  			cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
4b842da27   Waiman Long   cpuset: Make CPU ...
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
  		/*
  		 * Make sure that CPUs allocated to child partitions
  		 * do not show up in effective_cpus. If no CPU is left,
  		 * we clear the subparts_cpus & let the child partitions
  		 * fight for the CPUs again.
  		 */
  		if (top_cpuset.nr_subparts_cpus) {
  			if (cpumask_subset(&new_cpus,
  					   top_cpuset.subparts_cpus)) {
  				top_cpuset.nr_subparts_cpus = 0;
  				cpumask_clear(top_cpuset.subparts_cpus);
  			} else {
  				cpumask_andnot(&new_cpus, &new_cpus,
  					       top_cpuset.subparts_cpus);
  			}
  		}
1344ab9c2   Li Zefan   cpuset: update cp...
3081
  		cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
8447a0fee   Vladimir Davydov   cpuset: convert c...
3082
  		spin_unlock_irq(&callback_lock);
deb7aa308   Tejun Heo   cpuset: reorganiz...
3083
3084
  		/* we don't mess with cpumasks of tasks in top_cpuset */
  	}
b45012955   Paul Jackson   hotplug cpu move ...
3085

deb7aa308   Tejun Heo   cpuset: reorganiz...
3086
3087
  	/* synchronize mems_allowed to N_MEMORY */
  	if (mems_updated) {
8447a0fee   Vladimir Davydov   cpuset: convert c...
3088
  		spin_lock_irq(&callback_lock);
7e88291be   Li Zefan   cpuset: make cs->...
3089
3090
  		if (!on_dfl)
  			top_cpuset.mems_allowed = new_mems;
1344ab9c2   Li Zefan   cpuset: update cp...
3091
  		top_cpuset.effective_mems = new_mems;
8447a0fee   Vladimir Davydov   cpuset: convert c...
3092
  		spin_unlock_irq(&callback_lock);
d66393e54   Tejun Heo   cpuset: use css_t...
3093
  		update_tasks_nodemask(&top_cpuset);
deb7aa308   Tejun Heo   cpuset: reorganiz...
3094
  	}
b45012955   Paul Jackson   hotplug cpu move ...
3095

18695a120   Stephen Dickey   Revert "cgroup/cp...
3096
  	mutex_unlock(&cpuset_mutex);
388afd854   Li Zefan   cpuset: remove as...
3097

5c5cc6232   Li Zefan   cpuset: allow to ...
3098
3099
  	/* if cpus or mems changed, we need to propagate to descendants */
  	if (cpus_updated || mems_updated) {
deb7aa308   Tejun Heo   cpuset: reorganiz...
3100
  		struct cpuset *cs;
492eb21b9   Tejun Heo   cgroup: make hier...
3101
  		struct cgroup_subsys_state *pos_css;
f9b4fb8da   Miao Xie   cpusets: update t...
3102

fc560a26a   Tejun Heo   cpuset: replace c...
3103
  		rcu_read_lock();
492eb21b9   Tejun Heo   cgroup: make hier...
3104
  		cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
ec903c0c8   Tejun Heo   cgroup: rename cs...
3105
  			if (cs == &top_cpuset || !css_tryget_online(&cs->css))
388afd854   Li Zefan   cpuset: remove as...
3106
3107
  				continue;
  			rcu_read_unlock();
7ddf96b02   Srivatsa S. Bhat   cpusets, hotplug:...
3108

4b842da27   Waiman Long   cpuset: Make CPU ...
3109
  			cpuset_hotplug_update_tasks(cs, ptmp);
b45012955   Paul Jackson   hotplug cpu move ...
3110

388afd854   Li Zefan   cpuset: remove as...
3111
3112
3113
3114
3115
  			rcu_read_lock();
  			css_put(&cs->css);
  		}
  		rcu_read_unlock();
  	}
8d0339487   Tejun Heo   cpuset: make CPU ...
3116

deb7aa308   Tejun Heo   cpuset: reorganiz...
3117
  	/* rebuild sched domains if cpus_allowed has changed */
50e766323   Peter Zijlstra   sched/cpuset/pm: ...
3118
3119
  	if (cpus_updated || force_rebuild) {
  		force_rebuild = false;
e0e80a02e   Li Zhong   cpuset: use rebui...
3120
  		rebuild_sched_domains();
50e766323   Peter Zijlstra   sched/cpuset/pm: ...
3121
  	}
4b842da27   Waiman Long   cpuset: Make CPU ...
3122
3123
  
  	free_cpumasks(NULL, ptmp);
b1aac8bb8   Paul Jackson   [PATCH] cpuset: h...
3124
  }
30e03acda   Rakib Mullick   cpuset: Remove cp...
3125
  void cpuset_update_active_cpus(void)
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
3126
  {
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
3127
3128
3129
3130
  	/*
  	 * We're inside cpu hotplug critical region which usually nests
  	 * inside cgroup synchronization.  Bounce actual hotplug processing
  	 * to a work item to avoid reverse locking order.
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
3131
  	 */
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
3132
  	schedule_work(&cpuset_hotplug_work);
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
3133
  }
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
3134

50e766323   Peter Zijlstra   sched/cpuset/pm: ...
3135
3136
3137
3138
  void cpuset_wait_for_hotplug(void)
  {
  	flush_work(&cpuset_hotplug_work);
  }
38837fc75   Paul Jackson   [PATCH] cpuset: t...
3139
  /*
38d7bee9d   Lai Jiangshan   cpuset: use N_MEM...
3140
3141
   * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY].
   * Call this routine anytime after node_states[N_MEMORY] changes.
a1cd2b13f   Srivatsa S. Bhat   cpusets: Remove/u...
3142
   * See cpuset_update_active_cpus() for CPU hotplug handling.
38837fc75   Paul Jackson   [PATCH] cpuset: t...
3143
   */
f481891fd   Miao Xie   cpuset: update to...
3144
3145
  static int cpuset_track_online_nodes(struct notifier_block *self,
  				unsigned long action, void *arg)
38837fc75   Paul Jackson   [PATCH] cpuset: t...
3146
  {
3a5a6d0c2   Tejun Heo   cpuset: don't nes...
3147
  	schedule_work(&cpuset_hotplug_work);
f481891fd   Miao Xie   cpuset: update to...
3148
  	return NOTIFY_OK;
38837fc75   Paul Jackson   [PATCH] cpuset: t...
3149
  }
d8f10cb3d   Andrew Morton   kernel/cpuset.c: ...
3150
3151
3152
3153
3154
  
  static struct notifier_block cpuset_track_online_nodes_nb = {
  	.notifier_call = cpuset_track_online_nodes,
  	.priority = 10,		/* ??! */
  };
38837fc75   Paul Jackson   [PATCH] cpuset: t...
3155

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3156
3157
3158
3159
  /**
   * cpuset_init_smp - initialize cpus_allowed
   *
   * Description: Finish top cpuset after cpu, node maps are initialized
d8f10cb3d   Andrew Morton   kernel/cpuset.c: ...
3160
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3161
3162
  void __init cpuset_init_smp(void)
  {
6ad4c1888   Peter Zijlstra   sched: Fix balanc...
3163
  	cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
38d7bee9d   Lai Jiangshan   cpuset: use N_MEM...
3164
  	top_cpuset.mems_allowed = node_states[N_MEMORY];
33ad801df   Li Zefan   cpuset: record ol...
3165
  	top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;
4c4d50f7b   Paul Jackson   [PATCH] cpuset: t...
3166

e2b9a3d7d   Li Zefan   cpuset: add cs->e...
3167
3168
  	cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
  	top_cpuset.effective_mems = node_states[N_MEMORY];
d8f10cb3d   Andrew Morton   kernel/cpuset.c: ...
3169
  	register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
e93ad19d0   Tejun Heo   cpuset: make mm m...
3170
3171
3172
  
  	cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
  	BUG_ON(!cpuset_migrate_mm_wq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3173
3174
3175
  }
  
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3176
3177
   * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
   * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
6af866af3   Li Zefan   cpuset: remove re...
3178
   * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3179
   *
300ed6cbb   Li Zefan   cpuset: convert c...
3180
   * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3181
   * attached to the specified @tsk.  Guaranteed to return some non-empty
5f054e31c   Rusty Russell   documentation: re...
3182
   * subset of cpu_online_mask, even if this means going outside the
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3183
3184
   * tasks cpuset.
   **/
6af866af3   Li Zefan   cpuset: remove re...
3185
  void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3186
  {
8447a0fee   Vladimir Davydov   cpuset: convert c...
3187
3188
3189
  	unsigned long flags;
  
  	spin_lock_irqsave(&callback_lock, flags);
b8dadcb58   Li Zefan   cpuset: use rcu_r...
3190
  	rcu_read_lock();
ae1c80238   Li Zefan   cpuset: apply cs-...
3191
  	guarantee_online_cpus(task_cs(tsk), pmask);
b8dadcb58   Li Zefan   cpuset: use rcu_r...
3192
  	rcu_read_unlock();
8447a0fee   Vladimir Davydov   cpuset: convert c...
3193
  	spin_unlock_irqrestore(&callback_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3194
  }
d477f8c20   Joel Savitz   cpuset: restore s...
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
  /**
   * cpuset_cpus_allowed_fallback - final fallback before complete catastrophe.
   * @tsk: pointer to task_struct with which the scheduler is struggling
   *
   * Description: In the case that the scheduler cannot find an allowed cpu in
   * tsk->cpus_allowed, we fall back to task_cs(tsk)->cpus_allowed. In legacy
   * mode however, this value is the same as task_cs(tsk)->effective_cpus,
   * which will not contain a sane cpumask during cases such as cpu hotplugging.
   * This is the absolute last resort for the scheduler and it is only used if
   * _every_ other avenue has been traveled.
   **/
2baab4e90   Peter Zijlstra   sched: Fix select...
3206
  void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
9084bb824   Oleg Nesterov   sched: Make selec...
3207
  {
9084bb824   Oleg Nesterov   sched: Make selec...
3208
  	rcu_read_lock();
d477f8c20   Joel Savitz   cpuset: restore s...
3209
3210
  	do_set_cpus_allowed(tsk, is_in_v2_mode() ?
  		task_cs(tsk)->cpus_allowed : cpu_possible_mask);
9084bb824   Oleg Nesterov   sched: Make selec...
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
  	rcu_read_unlock();
  
  	/*
  	 * We own tsk->cpus_allowed, nobody can change it under us.
  	 *
  	 * But we used cs && cs->cpus_allowed lockless and thus can
  	 * race with cgroup_attach_task() or update_cpumask() and get
  	 * the wrong tsk->cpus_allowed. However, both cases imply the
  	 * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
  	 * which takes task_rq_lock().
  	 *
  	 * If we are called after it dropped the lock we must see all
  	 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
  	 * set any mask even if it is not right from task_cs() pov,
  	 * the pending set_cpus_allowed_ptr() will fix things.
2baab4e90   Peter Zijlstra   sched: Fix select...
3226
3227
3228
  	 *
  	 * select_fallback_rq() will fix things ups and set cpu_possible_mask
  	 * if required.
9084bb824   Oleg Nesterov   sched: Make selec...
3229
  	 */
9084bb824   Oleg Nesterov   sched: Make selec...
3230
  }
8f4ab07f4   Rasmus Villemoes   kernel/cpuset.c: ...
3231
  void __init cpuset_init_current_mems_allowed(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3232
  {
f9a86fcbb   Mike Travis   cpuset: modify cp...
3233
  	nodes_setall(current->mems_allowed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3234
  }
d9fd8a6d4   Randy Dunlap   [PATCH] kernel/cp...
3235
  /**
909d75a3b   Paul Jackson   [PATCH] cpuset: i...
3236
3237
3238
3239
3240
   * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset.
   * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed.
   *
   * Description: Returns the nodemask_t mems_allowed of the cpuset
   * attached to the specified @tsk.  Guaranteed to return some non-empty
38d7bee9d   Lai Jiangshan   cpuset: use N_MEM...
3241
   * subset of node_states[N_MEMORY], even if this means going outside the
909d75a3b   Paul Jackson   [PATCH] cpuset: i...
3242
3243
3244
3245
3246
3247
   * tasks cpuset.
   **/
  
  nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
  {
  	nodemask_t mask;
8447a0fee   Vladimir Davydov   cpuset: convert c...
3248
  	unsigned long flags;
909d75a3b   Paul Jackson   [PATCH] cpuset: i...
3249

8447a0fee   Vladimir Davydov   cpuset: convert c...
3250
  	spin_lock_irqsave(&callback_lock, flags);
b8dadcb58   Li Zefan   cpuset: use rcu_r...
3251
  	rcu_read_lock();
ae1c80238   Li Zefan   cpuset: apply cs-...
3252
  	guarantee_online_mems(task_cs(tsk), &mask);
b8dadcb58   Li Zefan   cpuset: use rcu_r...
3253
  	rcu_read_unlock();
8447a0fee   Vladimir Davydov   cpuset: convert c...
3254
  	spin_unlock_irqrestore(&callback_lock, flags);
909d75a3b   Paul Jackson   [PATCH] cpuset: i...
3255
3256
3257
3258
3259
  
  	return mask;
  }
  
  /**
19770b326   Mel Gorman   mm: filter based ...
3260
3261
   * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed
   * @nodemask: the nodemask to be checked
d9fd8a6d4   Randy Dunlap   [PATCH] kernel/cp...
3262
   *
19770b326   Mel Gorman   mm: filter based ...
3263
   * Are any of the nodes in the nodemask allowed in current->mems_allowed?
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3264
   */
19770b326   Mel Gorman   mm: filter based ...
3265
  int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3266
  {
19770b326   Mel Gorman   mm: filter based ...
3267
  	return nodes_intersects(*nodemask, current->mems_allowed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3268
  }
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3269
  /*
786083667   Paul Menage   Cpuset hardwall f...
3270
3271
   * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
   * mem_hardwall ancestor to the specified cpuset.  Call holding
8447a0fee   Vladimir Davydov   cpuset: convert c...
3272
   * callback_lock.  If no ancestor is mem_exclusive or mem_hardwall
786083667   Paul Menage   Cpuset hardwall f...
3273
   * (an unusual configuration), then returns the root cpuset.
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3274
   */
c9710d801   Tejun Heo   cpuset: drop "con...
3275
  static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3276
  {
c431069fe   Tejun Heo   cpuset: remove cp...
3277
3278
  	while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && parent_cs(cs))
  		cs = parent_cs(cs);
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3279
3280
  	return cs;
  }
d9fd8a6d4   Randy Dunlap   [PATCH] kernel/cp...
3281
  /**
344736f29   Vladimir Davydov   cpuset: simplify ...
3282
   * cpuset_node_allowed - Can we allocate on a memory node?
a1bc5a4ee   David Rientjes   cpusets: replace ...
3283
   * @node: is this an allowed node?
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
3284
   * @gfp_mask: memory allocation flags
d9fd8a6d4   Randy Dunlap   [PATCH] kernel/cp...
3285
   *
6e276d2a5   David Rientjes   kernel, cpuset: r...
3286
3287
3288
   * If we're in interrupt, yes, we can always allocate.  If @node is set in
   * current's mems_allowed, yes.  If it's not a __GFP_HARDWALL request and this
   * node is set in the nearest hardwalled cpuset ancestor to current's cpuset,
da99ecf11   Michal Hocko   mm: replace TIF_M...
3289
   * yes.  If current has access to memory reserves as an oom victim, yes.
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3290
3291
3292
   * Otherwise, no.
   *
   * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
c596d9f32   David Rientjes   cpusets: allow TI...
3293
   * and do not allow allocations outside the current tasks cpuset
da99ecf11   Michal Hocko   mm: replace TIF_M...
3294
   * unless the task has been OOM killed.
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3295
   * GFP_KERNEL allocations are not so marked, so can escape to the
786083667   Paul Menage   Cpuset hardwall f...
3296
   * nearest enclosing hardwalled ancestor cpuset.
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3297
   *
8447a0fee   Vladimir Davydov   cpuset: convert c...
3298
   * Scanning up parent cpusets requires callback_lock.  The
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
3299
3300
3301
3302
   * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
   * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
   * current tasks mems_allowed came up empty on the first pass over
   * the zonelist.  So only GFP_KERNEL allocations, if all nodes in the
8447a0fee   Vladimir Davydov   cpuset: convert c...
3303
   * cpuset are short of memory, might require taking the callback_lock.
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3304
   *
36be57ffe   Paul Jackson   [PATCH] cpuset: u...
3305
   * The first call here from mm/page_alloc:get_page_from_freelist()
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
3306
3307
3308
   * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
   * so no allocation on a node outside the cpuset is allowed (unless
   * in interrupt, of course).
36be57ffe   Paul Jackson   [PATCH] cpuset: u...
3309
3310
3311
3312
3313
3314
   *
   * The second pass through get_page_from_freelist() doesn't even call
   * here for GFP_ATOMIC calls.  For those calls, the __alloc_pages()
   * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set
   * in alloc_flags.  That logic and the checks below have the combined
   * affect that:
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3315
3316
   *	in_interrupt - any node ok (current task context irrelevant)
   *	GFP_ATOMIC   - any node ok
da99ecf11   Michal Hocko   mm: replace TIF_M...
3317
   *	tsk_is_oom_victim   - any node ok
786083667   Paul Menage   Cpuset hardwall f...
3318
   *	GFP_KERNEL   - any node in enclosing hardwalled cpuset ok
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3319
   *	GFP_USER     - only nodes in current tasks mems allowed ok.
02a0e53d8   Paul Jackson   [PATCH] cpuset: r...
3320
   */
002f29062   Vlastimil Babka   cpuset: use stati...
3321
  bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3322
  {
c9710d801   Tejun Heo   cpuset: drop "con...
3323
  	struct cpuset *cs;		/* current cpuset ancestors */
29afd49b7   Paul Jackson   [PATCH] cpuset: r...
3324
  	int allowed;			/* is allocation in zone z allowed? */
8447a0fee   Vladimir Davydov   cpuset: convert c...
3325
  	unsigned long flags;
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3326

6e276d2a5   David Rientjes   kernel, cpuset: r...
3327
  	if (in_interrupt())
002f29062   Vlastimil Babka   cpuset: use stati...
3328
  		return true;
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3329
  	if (node_isset(node, current->mems_allowed))
002f29062   Vlastimil Babka   cpuset: use stati...
3330
  		return true;
c596d9f32   David Rientjes   cpusets: allow TI...
3331
3332
3333
3334
  	/*
  	 * Allow tasks that have access to memory reserves because they have
  	 * been OOM killed to get memory anywhere.
  	 */
da99ecf11   Michal Hocko   mm: replace TIF_M...
3335
  	if (unlikely(tsk_is_oom_victim(current)))
002f29062   Vlastimil Babka   cpuset: use stati...
3336
  		return true;
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3337
  	if (gfp_mask & __GFP_HARDWALL)	/* If hardwall request, stop here */
002f29062   Vlastimil Babka   cpuset: use stati...
3338
  		return false;
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3339

5563e7707   Bob Picco   [PATCH] cpuset: f...
3340
  	if (current->flags & PF_EXITING) /* Let dying task have memory */
002f29062   Vlastimil Babka   cpuset: use stati...
3341
  		return true;
5563e7707   Bob Picco   [PATCH] cpuset: f...
3342

9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3343
  	/* Not hardwall and node outside mems_allowed: scan up cpusets */
8447a0fee   Vladimir Davydov   cpuset: convert c...
3344
  	spin_lock_irqsave(&callback_lock, flags);
053199edf   Paul Jackson   [PATCH] cpusets: ...
3345

b8dadcb58   Li Zefan   cpuset: use rcu_r...
3346
  	rcu_read_lock();
786083667   Paul Menage   Cpuset hardwall f...
3347
  	cs = nearest_hardwall_ancestor(task_cs(current));
99afb0fd5   Li Zefan   cpuset: fix a rac...
3348
  	allowed = node_isset(node, cs->mems_allowed);
b8dadcb58   Li Zefan   cpuset: use rcu_r...
3349
  	rcu_read_unlock();
053199edf   Paul Jackson   [PATCH] cpusets: ...
3350

8447a0fee   Vladimir Davydov   cpuset: convert c...
3351
  	spin_unlock_irqrestore(&callback_lock, flags);
9bf2229f8   Paul Jackson   [PATCH] cpusets: ...
3352
  	return allowed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3353
  }
ef08e3b49   Paul Jackson   [PATCH] cpusets: ...
3354
  /**
6adef3ebe   Jack Steiner   cpusets: new roun...
3355
3356
   * cpuset_mem_spread_node() - On which node to begin search for a file page
   * cpuset_slab_spread_node() - On which node to begin search for a slab page
825a46af5   Paul Jackson   [PATCH] cpuset me...
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
   *
   * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for
   * tasks in a cpuset with is_spread_page or is_spread_slab set),
   * and if the memory allocation used cpuset_mem_spread_node()
   * to determine on which node to start looking, as it will for
   * certain page cache or slab cache pages such as used for file
   * system buffers and inode caches, then instead of starting on the
   * local node to look for a free page, rather spread the starting
   * node around the tasks mems_allowed nodes.
   *
   * We don't have to worry about the returned node being offline
   * because "it can't happen", and even if it did, it would be ok.
   *
   * The routines calling guarantee_online_mems() are careful to
   * only set nodes in task->mems_allowed that are online.  So it
   * should not be possible for the following code to return an
   * offline node.  But if it did, that would be ok, as this routine
   * is not returning the node where the allocation must be, only
   * the node where the search should start.  The zonelist passed to
   * __alloc_pages() will include all nodes.  If the slab allocator
   * is passed an offline node, it will fall back to the local node.
   * See kmem_cache_alloc_node().
   */
6adef3ebe   Jack Steiner   cpusets: new roun...
3380
  static int cpuset_spread_node(int *rotor)
825a46af5   Paul Jackson   [PATCH] cpuset me...
3381
  {
0edaf86cf   Andrew Morton   include/linux/nod...
3382
  	return *rotor = next_node_in(*rotor, current->mems_allowed);
825a46af5   Paul Jackson   [PATCH] cpuset me...
3383
  }
6adef3ebe   Jack Steiner   cpusets: new roun...
3384
3385
3386
  
  int cpuset_mem_spread_node(void)
  {
778d3b0ff   Michal Hocko   cpusets: randomiz...
3387
3388
3389
  	if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE)
  		current->cpuset_mem_spread_rotor =
  			node_random(&current->mems_allowed);
6adef3ebe   Jack Steiner   cpusets: new roun...
3390
3391
3392
3393
3394
  	return cpuset_spread_node(&current->cpuset_mem_spread_rotor);
  }
  
  int cpuset_slab_spread_node(void)
  {
778d3b0ff   Michal Hocko   cpusets: randomiz...
3395
3396
3397
  	if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE)
  		current->cpuset_slab_spread_rotor =
  			node_random(&current->mems_allowed);
6adef3ebe   Jack Steiner   cpusets: new roun...
3398
3399
  	return cpuset_spread_node(&current->cpuset_slab_spread_rotor);
  }
825a46af5   Paul Jackson   [PATCH] cpuset me...
3400
3401
3402
  EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
  
  /**
bbe373f2c   David Rientjes   oom: compare cpus...
3403
3404
3405
3406
3407
3408
3409
3410
   * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's?
   * @tsk1: pointer to task_struct of some task.
   * @tsk2: pointer to task_struct of some other task.
   *
   * Description: Return true if @tsk1's mems_allowed intersects the
   * mems_allowed of @tsk2.  Used by the OOM killer to determine if
   * one of the task's memory usage might impact the memory available
   * to the other.
ef08e3b49   Paul Jackson   [PATCH] cpusets: ...
3411
   **/
bbe373f2c   David Rientjes   oom: compare cpus...
3412
3413
  int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
  				   const struct task_struct *tsk2)
ef08e3b49   Paul Jackson   [PATCH] cpusets: ...
3414
  {
bbe373f2c   David Rientjes   oom: compare cpus...
3415
  	return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
ef08e3b49   Paul Jackson   [PATCH] cpusets: ...
3416
  }
75aa19941   David Rientjes   oom: print trigge...
3417
  /**
da39da3a5   David Rientjes   mm, oom: remove t...
3418
   * cpuset_print_current_mems_allowed - prints current's cpuset and mems_allowed
75aa19941   David Rientjes   oom: print trigge...
3419
   *
da39da3a5   David Rientjes   mm, oom: remove t...
3420
   * Description: Prints current's name, cpuset name, and cached copy of its
b8dadcb58   Li Zefan   cpuset: use rcu_r...
3421
   * mems_allowed to the kernel log.
75aa19941   David Rientjes   oom: print trigge...
3422
   */
da39da3a5   David Rientjes   mm, oom: remove t...
3423
  void cpuset_print_current_mems_allowed(void)
75aa19941   David Rientjes   oom: print trigge...
3424
  {
b8dadcb58   Li Zefan   cpuset: use rcu_r...
3425
  	struct cgroup *cgrp;
75aa19941   David Rientjes   oom: print trigge...
3426

b8dadcb58   Li Zefan   cpuset: use rcu_r...
3427
  	rcu_read_lock();
63f43f55c   Li Zefan   cpuset: fix cpuse...
3428

da39da3a5   David Rientjes   mm, oom: remove t...
3429
  	cgrp = task_cs(current)->css.cgroup;
ef8444ea0   yuzhoujian   mm, oom: reorgani...
3430
  	pr_cont(",cpuset=");
e61734c55   Tejun Heo   cgroup: remove cg...
3431
  	pr_cont_cgroup_name(cgrp);
ef8444ea0   yuzhoujian   mm, oom: reorgani...
3432
  	pr_cont(",mems_allowed=%*pbl",
da39da3a5   David Rientjes   mm, oom: remove t...
3433
  		nodemask_pr_args(&current->mems_allowed));
f440d98f8   Li Zefan   cpuset: use cgrou...
3434

cfb5966be   Li Zefan   cpuset: fix RCU l...
3435
  	rcu_read_unlock();
75aa19941   David Rientjes   oom: print trigge...
3436
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3437
  /*
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
3438
3439
3440
3441
   * Collection of memory_pressure is suppressed unless
   * this flag is enabled by writing "1" to the special
   * cpuset file 'memory_pressure_enabled' in the root cpuset.
   */
c5b2aff89   Paul Jackson   [PATCH] cpuset: m...
3442
  int cpuset_memory_pressure_enabled __read_mostly;
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
  
  /**
   * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
   *
   * Keep a running average of the rate of synchronous (direct)
   * page reclaim efforts initiated by tasks in each cpuset.
   *
   * This represents the rate at which some task in the cpuset
   * ran low on memory on all nodes it was allowed to use, and
   * had to enter the kernels page reclaim code in an effort to
   * create more free memory by tossing clean pages or swapping
   * or writing dirty pages.
   *
   * Display to user space in the per-cpuset read-only file
   * "memory_pressure".  Value displayed is an integer
   * representing the recent rate of entry into the synchronous
   * (direct) page reclaim by any task attached to the cpuset.
   **/
  
  void __cpuset_memory_pressure_bump(void)
  {
b8dadcb58   Li Zefan   cpuset: use rcu_r...
3464
  	rcu_read_lock();
8793d854e   Paul Menage   Task Control Grou...
3465
  	fmeter_markevent(&task_cs(current)->fmeter);
b8dadcb58   Li Zefan   cpuset: use rcu_r...
3466
  	rcu_read_unlock();
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
3467
  }
8793d854e   Paul Menage   Task Control Grou...
3468
  #ifdef CONFIG_PROC_PID_CPUSET
3e0d98b9f   Paul Jackson   [PATCH] cpuset: m...
3469
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3470
3471
3472
   * proc_cpuset_show()
   *  - Print tasks cpuset path into seq_file.
   *  - Used for /proc/<pid>/cpuset.
053199edf   Paul Jackson   [PATCH] cpusets: ...
3473
3474
   *  - No need to task_lock(tsk) on this tsk->cpuset reference, as it
   *    doesn't really matter if tsk->cpuset changes after we read it,
5d21cc2db   Tejun Heo   cpuset: replace c...
3475
   *    and we take cpuset_mutex, keeping cpuset_attach() from changing it
2df167a30   Paul Menage   cgroups: update c...
3476
   *    anyway.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3477
   */
52de4779f   Zefan Li   cpuset: simplify ...
3478
3479
  int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
  		     struct pid *pid, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3480
  {
4c737b41d   Tejun Heo   cgroup: make cgro...
3481
  	char *buf;
8793d854e   Paul Menage   Task Control Grou...
3482
  	struct cgroup_subsys_state *css;
99f895518   Eric W. Biederman   [PATCH] proc: don...
3483
  	int retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3484

99f895518   Eric W. Biederman   [PATCH] proc: don...
3485
  	retval = -ENOMEM;
e61734c55   Tejun Heo   cgroup: remove cg...
3486
  	buf = kmalloc(PATH_MAX, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3487
  	if (!buf)
99f895518   Eric W. Biederman   [PATCH] proc: don...
3488
  		goto out;
a79a908fd   Aditya Kali   cgroup: introduce...
3489
  	css = task_get_css(tsk, cpuset_cgrp_id);
4c737b41d   Tejun Heo   cgroup: make cgro...
3490
3491
  	retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
  				current->nsproxy->cgroup_ns);
a79a908fd   Aditya Kali   cgroup: introduce...
3492
  	css_put(css);
4c737b41d   Tejun Heo   cgroup: make cgro...
3493
  	if (retval >= PATH_MAX)
679a5e3f1   Tejun Heo   cpuset: fix error...
3494
3495
  		retval = -ENAMETOOLONG;
  	if (retval < 0)
52de4779f   Zefan Li   cpuset: simplify ...
3496
  		goto out_free;
4c737b41d   Tejun Heo   cgroup: make cgro...
3497
  	seq_puts(m, buf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3498
3499
  	seq_putc(m, '
  ');
e61734c55   Tejun Heo   cgroup: remove cg...
3500
  	retval = 0;
99f895518   Eric W. Biederman   [PATCH] proc: don...
3501
  out_free:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3502
  	kfree(buf);
99f895518   Eric W. Biederman   [PATCH] proc: don...
3503
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3504
3505
  	return retval;
  }
8793d854e   Paul Menage   Task Control Grou...
3506
  #endif /* CONFIG_PROC_PID_CPUSET */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3507

d01d48278   Heiko Carstens   sched: Always sho...
3508
  /* Display task mems_allowed in /proc/<pid>/status file. */
df5f8314c   Eric W. Biederman   proc: seqfile con...
3509
3510
  void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
  {
e8e6d97c9   Tejun Heo   cpuset: use %*pb[...
3511
3512
3513
3514
3515
3516
  	seq_printf(m, "Mems_allowed:\t%*pb
  ",
  		   nodemask_pr_args(&task->mems_allowed));
  	seq_printf(m, "Mems_allowed_list:\t%*pbl
  ",
  		   nodemask_pr_args(&task->mems_allowed));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3517
  }